"""
Ridge Tilt Portfolio

CTF Admin Modifications (2026-02-16):
--------------------------------------
1. Added [CTF-DEBUG] progress statements
   Reason: Enable HPC job monitoring with start/end timestamps and phase tracking.

2. Fixed 'eom' column access in groupby apply function
   Reason: When using groupby("eom").apply(), the grouping column is not available
           as a regular column inside each group. Changed to use g.name (the group key)
           to reconstruct the eom column in the output.
"""

import time
import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge

def main(chars: pd.DataFrame,
         features: pd.DataFrame,
         daily_ret: pd.DataFrame) -> pd.DataFrame:

    # CTF-FIX: Add start timestamp for HPC monitoring
    print(f"[CTF-DEBUG] Starting main() at {time.strftime('%Y-%m-%d %H:%M:%S')}", flush=True)
    start_time = time.time()

    # ------------------------------------------------------------
    # Setup
    # ------------------------------------------------------------
    chars = chars.copy()
    chars["eom"] = pd.to_datetime(chars["eom"])

    # extract feature list (pipeline provides DataFrame with 'features' column)
    if "features" in features.columns:
        feature_cols = features["features"].tolist()
    else:
        feature_cols = features.iloc[:, 0].tolist()

    y_col = "ret_exc_lead1m"

    data = chars[["id", "eom", "ctff_test"] + feature_cols + [y_col]].copy()
    print(f"[CTF-DEBUG] Using {len(feature_cols)} features", flush=True)

    # ------------------------------------------------------------
    # Cross-sectional rank standardization
    # ------------------------------------------------------------
    print("[CTF-DEBUG] Cross-sectional rank standardization...", flush=True)
    def rank_to_half(x):
        mask = x.notna()
        n = mask.sum()
        if n <= 1:
            return pd.Series(np.zeros(len(x)), index=x.index)
        r = x[mask].rank(method="average")
        scaled = (r - 1) / (n - 1) - 0.5
        out = pd.Series(np.zeros(len(x)), index=x.index)
        out.loc[mask] = scaled.values
        return out

    for col in feature_cols:
        data[col] = data.groupby("eom")[col].transform(rank_to_half)

    # ------------------------------------------------------------
    # Train / Test split
    # ------------------------------------------------------------
    train = data[data["ctff_test"] == False].dropna(subset=[y_col]).copy()
    test  = data[data["ctff_test"] == True].copy()
    print(f"[CTF-DEBUG] Train: {len(train)} rows, Test: {len(test)} rows", flush=True)

    train = train.sort_values("eom")

    # ------------------------------------------------------------
    # Internal time-based tuning (inside training sample only)
    # ------------------------------------------------------------
    print("[CTF-DEBUG] Hyperparameter tuning (30 alphas)...", flush=True)
    unique_train_dates = train["eom"].sort_values().unique()
    split_idx = int(len(unique_train_dates) * 0.8)
    val_start_date = unique_train_dates[split_idx]

    train_tr = train[train["eom"] < val_start_date]
    train_val = train[train["eom"] >= val_start_date]

    x_tr = train_tr[feature_cols]
    y_tr = train_tr[y_col]

    x_val = train_val[feature_cols]
    y_val = train_val[y_col]

    ridge_grid = np.logspace(-3, 6, 30)

    best_alpha = None
    best_mse = np.inf

    for a in ridge_grid:
        model = Ridge(alpha=a)
        model.fit(x_tr, y_tr)
        pred_val = model.predict(x_val)
        mse = np.mean((y_val - pred_val) ** 2)
        if mse < best_mse:
            best_mse = mse
            best_alpha = a

    # ------------------------------------------------------------
    # Final model on full training sample
    # ------------------------------------------------------------
    print(f"[CTF-DEBUG] Best alpha: {best_alpha:.6f}, training final model...", flush=True)
    ridge = Ridge(alpha=best_alpha)
    ridge.fit(train[feature_cols], train[y_col])

    print("[CTF-DEBUG] Generating predictions...", flush=True)
    test = test.copy()
    test["pred"] = ridge.predict(test[feature_cols])

    # ------------------------------------------------------------
    # Long-only continuous weighting
    # shift by cross-sectional minimum
    # ------------------------------------------------------------
    print("[CTF-DEBUG] Building portfolio weights...", flush=True)
    def build_weights_long_only(g):
        signal = g["pred"] - g["pred"].min()
        denom = signal.sum()

        if denom <= 1e-12:
            w = pd.Series(1.0 / len(g), index=g.index)
        else:
            w = signal / denom

        # CTF-FIX: Use g.name (group key) for eom since groupby removes it from columns
        return pd.DataFrame({
            "id": g["id"].values,
            "eom": g.name,  # The groupby key (eom value for this group)
            "w": w.values
        })

    weights = (
        test
        .groupby("eom", group_keys=False)
        .apply(build_weights_long_only)
        .reset_index(drop=True)
    )

    # Safety: remove potential NaN
    weights["w"] = weights["w"].fillna(0.0)

    # CTF-FIX: Add completion summary for HPC monitoring
    elapsed = time.time() - start_time
    print(f"[CTF-DEBUG] Completed main() in {elapsed:.1f}s", flush=True)
    print(f"[CTF-DEBUG] Output: {len(weights)} rows, {weights['eom'].nunique()} unique months", flush=True)

    return weights[["id", "eom", "w"]]