"""Neural Network Model for CTF Competition.

This module implements a deep neural network for predicting stock returns
using cross-sectional features. It supports both pooled and stock-wise training
modes, with optional retraining over time intervals.

The model uses a 2-layer deep neural network with dropout regularization
and early stopping to prevent overfitting.

BUG FIX #1 (2025-11-18):
------------------------
WHAT WENT WRONG: Model execution failed with error: 'DataFrame' object has no attribute 'to_list'
    - Used `features.to_list()` to convert pandas Series to a Python list
    - This caused an AttributeError during pipeline execution

WHY IT WENT WRONG: Pandas uses `tolist()` not `to_list()`
    - The pandas library uses the method `tolist()` without underscore
    - The method `to_list()` with underscore does not exist in pandas/numpy
    - Common mistake when switching between pandas and other libraries (e.g., Polars uses `to_list()`)

HOW IT WAS FIXED: Changed to proper DataFrame handling
    - Lines 42-53: Added DataFrame detection and proper column extraction in standardize()
    - Lines 314-323: Added same DataFrame handling in main()
    - If features is DataFrame with "features" column, extract: features['features'].tolist()
    - Each row in the DataFrame contains a single feature name (string)
    - All rows are extracted as a list of feature names

BUG FIX #2 (2025-11-18):
------------------------
WHAT WENT WRONG: Pipeline failed with error: 'unable to find column "features"'
    - Test script passed but actual pipeline failed
    - Test script was converting features DataFrame to a Series
    - Pipeline provides features as a DataFrame with a "features" column

WHY IT WENT WRONG: Test-production parity issue
    - The pipeline follows the pattern from benchmark code (e.g., ipca_pf.py, elastic_net_pooled.py)
    - Pipeline signature: main(chars: DataFrame, features: DataFrame, daily_ret: DataFrame)
    - Features DataFrame structure: 10 rows × 1 column, each row contains one feature name
    - Test script was incorrectly extracting this to a Series before passing to main()

HOW IT WAS FIXED: Updated validation script and model to handle DataFrame features parameter
    - validate_nn_model.py: Keep features as DataFrame (matching pipeline)
    - nn_model.py lines 42-53: Extract feature names from DataFrame rows
    - nn_model.py lines 314-323: Extract feature names in main() before use
    - Ensures test-production parity by using exact pipeline data format

Functions:
    standardize: Cross-sectionally standardize features by month
    train_and_predict_neural_network_with_modes: Main training orchestrator
    train_neural_network_with_early_stopping: Core model training logic
    main: Entry point that coordinates data processing and prediction
"""

import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import polars as pl


def standardize(df, features):
    """Cross-sectionally standardize features by month (eom).

    For each feature and each month, compute mean and std across all stocks,
    then standardize: (x - mean) / std. Missing values, infinities, and NaNs
    are replaced with the cross-sectional mean before standardization.

    Args:
        df: pandas DataFrame with columns ['id', 'eom', 'eom_ret', 'ctff_test',
            'ret_exc_lead1m'] plus feature columns
        features: pandas Series or list of feature column names to standardize

    Returns:
        pandas DataFrame with standardized features and original id/date columns
    """
    # Extract feature list from DataFrame (pipeline provides DataFrame with "features" column)
    # Each row in the DataFrame is a feature name (string)
    if isinstance(features, pd.DataFrame):
        if 'features' in features.columns:
            # Extract all rows as a list of feature names
            features = features['features'].tolist()
        else:
            # Fallback: assume single column
            features = features.iloc[:, 0].tolist()
    else:
        # Already a Series or list
        features = features.tolist() if hasattr(features, 'tolist') else list(features)
    df = pl.from_pandas(df)
    target_variable = ['ret_exc_lead1m']
    id_cols = ['id', 'eom', 'eom_ret', 'ctff_test']
    df = df.select(id_cols + features + target_variable)

    for feature in features:
        # Compute cross-sectional mean and std for this month
        df = df.with_columns(
            pl.col(feature).mean().over('eom').alias('mean_temp')
        ).with_columns(
            pl.when(pl.col('mean_temp').is_null())
            .then(pl.lit(0))
            .otherwise(pl.col('mean_temp'))
            .alias('mean_temp')
        )
        df = df.with_columns(pl.col(feature).std().over('eom').alias('std_temp'))

        # Replace NaN/inf values with cross-sectional mean before standardizing
        df = df.with_columns([
            pl.when(
                pl.col(feature).is_nan() |
                pl.col(feature).is_null() |
                (pl.col(feature) == float('inf')) |
                (pl.col(feature) == float('-inf'))
            )
            .then(pl.col('mean_temp'))
            .otherwise(pl.col(feature))
            .alias(feature)
        ])

        # Standardize: (x - mean) / std
        df = df.with_columns(
            ((pl.col(feature) - pl.col('mean_temp')) / pl.col('std_temp')).alias(feature)
        )

        # Replace any remaining NaN/inf with 0 (e.g., if std=0)
        df = df.with_columns([
            pl.when(
                pl.col(feature).is_nan() |
                pl.col(feature).is_null() |
                (pl.col(feature) == float('inf')) |
                (pl.col(feature) == float('-inf'))
            )
            .then(0)
            .otherwise(pl.col(feature))
            .alias(feature)
        ])

        df = df.drop('mean_temp').drop('std_temp')

    df = df.to_pandas()

    return df



def train_and_predict_neural_network_with_modes(df, feature_columns, target_variable, model_type='simple', training_mode='pooled', epochs=50, batch_size=32, days_interval=5, patience=5):
    """Train neural network and generate predictions with different training modes.

    Args:
        df: pandas DataFrame with features, target, and 'ctff_test' flag
        feature_columns: list of feature column names to use as predictors
        target_variable: str, name of target column ('ret_exc_lead1m')
        model_type: str, 'simple' (train once) or 'retrain' (retrain at intervals)
        training_mode: str, 'pooled' (train on all stocks) or 'stockwise' (per stock)
        epochs: int, maximum training epochs (early stopping may stop sooner)
        batch_size: int, batch size for gradient descent
        days_interval: int, for 'retrain' mode, number of dates between retraining
        patience: int, early stopping patience (epochs without improvement)

    Returns:
        pandas DataFrame with columns ['eom', 'id', 'actual_return', 'predicted_return']
    """
    predictions_list = []

    # Split into train and test sets based on 'ctff_test' flag
    train_data = df[df['ctff_test'] == False]
    test_data = df[df['ctff_test'] == True]

    print(f"Number of train samples: {len(train_data)}, Number of test samples: {len(test_data)}")

    if training_mode == 'pooled':
        final_predictions_df, _ = train_neural_network_with_early_stopping(train_data, test_data, feature_columns, target_variable, model_type, epochs, batch_size, days_interval, patience)
        predictions_list.append(final_predictions_df)

    elif training_mode == 'stockwise':
        stock_ids = train_data['id'].unique()

        for stock_id in stock_ids:
            stock_train_data = train_data[train_data['id'] == stock_id]
            stock_test_data = test_data[test_data['id'] == stock_id]

            if len(stock_test_data) == 0:
                continue

            stock_predictions, _ = train_neural_network_with_early_stopping(stock_train_data, stock_test_data, feature_columns, target_variable, model_type, epochs, batch_size, days_interval, patience)
            predictions_list.append(stock_predictions)

    final_output_df = pd.concat(predictions_list, ignore_index=True)
    return final_output_df

def train_neural_network_with_early_stopping(train_data, test_data, feature_columns, target_variable, model_type='simple', epochs=1, batch_size=32, days_interval=5, patience=5):
    """Build and train a 2-layer neural network with early stopping.

    Model architecture:
    - Layer 1: 192 units, ReLU activation, Dropout(0.4)
    - Layer 2: 192 units, ReLU activation, Dropout(0.1)
    - Output: 1 unit, linear activation (regression)
    - Optimizer: Adam(lr=0.0001)
    - Loss: Mean Squared Error

    Args:
        train_data: pandas DataFrame with training examples
        test_data: pandas DataFrame with test examples
        feature_columns: list of feature column names
        target_variable: str, target column name
        model_type: str, 'simple' or 'retrain'
        epochs: int, maximum epochs
        batch_size: int, batch size
        days_interval: int, for 'retrain' mode
        patience: int, early stopping patience

    Returns:
        Tuple of (predictions_df, trained_model)
        - predictions_df: DataFrame with ['eom', 'id', 'actual_return', 'predicted_return']
        - trained_model: Keras Sequential model
    """
    predictors_train = train_data[feature_columns].values
    returns_train = train_data[target_variable].values

    predictors_test = test_data[feature_columns].values
    actual_returns_test = test_data[target_variable].values.flatten()

    # Note: Features should already be standardized by the standardize() function
    # If not, uncomment the StandardScaler code below:
    # scaler = StandardScaler()
    # predictors_train = scaler.fit_transform(predictors_train)
    # predictors_test = scaler.transform(predictors_test)

    # Handle any potential NaNs or Infs in features (safety check)
    predictors_train = np.nan_to_num(predictors_train, nan=0.0, posinf=0.0, neginf=0.0)
    predictors_test = np.nan_to_num(predictors_test, nan=0.0, posinf=0.0, neginf=0.0)

    # Get the number of input features
    n_cols = predictors_train.shape[1]

    # Build the neural network model
    # Architecture: 192 -> 192 -> 1 with dropout regularization
    model = Sequential()
    model.add(Dense(
        192,
        activation='relu',
        input_shape=(n_cols,),
        kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)
    ))
    model.add(Dropout(0.4))
    model.add(Dense(
        192,
        activation='relu',
        kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)
    ))
    model.add(Dropout(0.1))
    model.add(Dense(
        1,
        activation='linear',
        kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)
    ))
    model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='mean_squared_error',
        metrics=['mean_absolute_error']
    )

    # Implement Early Stopping
    early_stopping = EarlyStopping(
        monitor='val_loss',  # Monitor validation loss
        patience=patience,  # Number of epochs to wait before stopping
        restore_best_weights=True,  # Restore the best weights after stopping
        verbose=1
    )

    if model_type == 'simple':
        # Train the neural network on the full train data once with early stopping
        history = model.fit(
            predictors_train, returns_train,
            epochs=epochs,
            batch_size=batch_size,
            verbose=1,
            validation_split=0.2,  # Use 20% of the training data for validation
            callbacks=[early_stopping]  # Add EarlyStopping as a callback
        )

        # Predict on all test data in one go
        stock_predictions = model.predict(predictors_test).flatten()

    elif model_type == 'retrain':
        stock_predictions = []
        current_train_data = predictors_train.copy()
        current_train_labels = returns_train.copy()

        test_data_sorted = test_data.sort_values(by='eom')
        unique_dates = test_data_sorted['eom'].unique()

        # Iterate over the dates in intervals of `days_interval`
        for i in range(0, len(unique_dates), days_interval):
            interval_dates = unique_dates[i:i + days_interval]
            date_group = test_data_sorted[test_data_sorted['eom'].isin(interval_dates)]

            predictors_test_interval = date_group[feature_columns].values
            # predictors_test_interval = scaler.transform(predictors_test_interval)
            actual_returns_test_interval = date_group[target_variable].values

            # Train the model with Early Stopping
            model.fit(
                current_train_data, current_train_labels,
                epochs=epochs,
                batch_size=batch_size,
                verbose=1,
                validation_split=0.2,
                callbacks=[early_stopping]
            )
            interval_predictions = model.predict(predictors_test_interval).flatten()

            # Store the predictions
            stock_predictions.extend(interval_predictions)

            current_train_data = np.vstack([current_train_data, predictors_test_interval])
            current_train_labels = np.append(current_train_labels, actual_returns_test_interval)

    # Combine predictions with actual returns, eom, and stock ID
    stock_output = pd.DataFrame({
        'eom': test_data['eom'],
        'id': test_data['id'],
        'actual_return': actual_returns_test,
        'predicted_return': stock_predictions
    })

    return stock_output, model

def main(chars, features, daily_ret):
    """Main entry point for neural network model.

    This function coordinates the full pipeline:
    1. Standardize features cross-sectionally by month
    2. Train neural network with early stopping
    3. Generate predictions on test set
    4. Convert predictions to portfolio weights via cross-sectional ranking

    Args:
        chars: pandas DataFrame with characteristics (features + target + metadata)
        features: pandas DataFrame with 'features' column, Series, or list of feature column names
        daily_ret: pandas DataFrame with daily returns (not used in current implementation)

    Returns:
        pandas DataFrame with columns ['id', 'eom', 'w'] representing portfolio weights
        where 'w' is the rank-based weight (rank / count) for each stock in each month
    """
    df = chars
    target_variable = 'ret_exc_lead1m'

    # Extract feature list from DataFrame (pipeline provides DataFrame with "features" column)
    # Each row in the DataFrame is a feature name (string)
    if isinstance(features, pd.DataFrame):
        if 'features' in features.columns:
            # Extract all rows as a list of feature names
            feature_columns = features['features'].tolist()
        else:
            feature_columns = features.iloc[:, 0].tolist()
    else:
        feature_columns = features.tolist() if hasattr(features, 'tolist') else list(features)

    # Standardize features cross-sectionally by month
    df = standardize(df=df, features=features)

    # Train model and generate predictions
    model_output = train_and_predict_neural_network_with_modes(
        df,
        feature_columns,
        target_variable,
        model_type='simple',
        training_mode='pooled',
        epochs=1,
        batch_size=32,
        days_interval=100,
        patience=5
    )

    # Convert predictions to portfolio weights via cross-sectional ranking
    pf = (
        model_output
        .assign(rank=lambda x: x.groupby('eom')['predicted_return'].rank(method='first'))
        .assign(count=lambda x: x.groupby('eom')['rank'].transform('count'))
        .assign(w=lambda x: x['rank'] / x['count'])
        [['id', 'eom', 'w']]
    )

    return pf



if __name__ == '__main__':
    (features, chars) = load_data()
    pf = main(chars, features, daily_ret)
    export_data(pf)