import pandas as pd


def main(chars: pd.DataFrame, features: pd.DataFrame, daily_ret: pd.DataFrame) -> pd.DataFrame:
    """
    Market-cap weighted index: weights proportional to market equity (me).

    Args:
        chars: Stock characteristics (ctff_chars.parquet)
        features: Computed features (ctff_features.parquet)
        daily_ret: Historical daily returns (ctff_daily_ret.parquet)

    Returns:
        DataFrame with columns: id, eom, w

    CTF Admin Modifications (2026-03-07):
    --------------------------------------
    1. Added [CTF-DEBUG] progress statements at start/end of main()
       Reason: HPC jobs can run for hours; debug output helps monitor progress

    2. Added explicit type casting for output columns (id->int, w->float)
       Reason: Ensures consistent output types for pipeline processing
    """
    print("[CTF-DEBUG] Starting main()", flush=True)
    # Filter to test period only
    test = chars.loc[chars['ctff_test'] == True, ['id', 'eom', 'market_equity']].copy()

    # Drop rows with missing market equity
    test = test.dropna(subset=['market_equity'])

    # Keep only positive market equity
    test = test[test['market_equity'] > 0]

    # Compute market-cap weights within each month (sum to 1)
    test['w'] = test.groupby('eom')['market_equity'].transform(lambda x: x / x.sum())

    # CTF-FIX: Explicit type casting for pipeline compatibility
    test['id'] = test['id'].astype(int)
    test['w'] = test['w'].astype(float)

    result = test[['id', 'eom', 'w']]
    print(f"[CTF-DEBUG] Completed: {len(result)} rows, {result['eom'].nunique()} months", flush=True)
    return result


# Local testing (ignored by CTF pipeline)
if __name__ == "__main__":
    chars = pd.read_parquet('ctff_chars.parquet')
    features = pd.read_parquet('ctff_features.parquet')
    daily_ret = pd.read_parquet('ctff_daily_ret.parquet')

    pf = main(chars, features, daily_ret)
    pf.to_csv('output.csv', index=False)
    print(f"Output shape: {pf.shape}")
    print(f"Months: {pf['eom'].nunique()}")
    print(f"Stocks per month (avg): {pf.groupby('eom').size().mean():.0f}")
    print(pf.head(10))
