"""
Inverse Volatility Weighting Strategy

CTF Admin Modifications (2026-03-16):
--------------------------------------
1. Added .drop_nulls(subset=['w']) before returning weights
   Reason: Stocks with zero realized volatility produce NaN weights
   (1/0=inf, inf/sum=NaN). Pipeline CSV validation failed with
   "Weight value '' is not numeric" for 10 affected rows.
"""
import pandas as pd
import polars as pl
import numpy as np

def inv_vol(chars: pd.DataFrame, daily_ret: pd.DataFrame, window: int = 21) -> pl.DataFrame:
    # Trailing realized vol per stock, matched to each month-end
    vol = (
        pl.from_pandas(daily_ret).lazy()
        .sort(['id', 'date'])
        .with_columns(
            vol = pl.col('ret_exc').rolling_std(window_size=window).over('id'),
            ym  = pl.col('date').dt.truncate('1mo'),
        )
        .group_by(['id', 'ym']).agg(pl.col('vol').last())
    )

    df = (
        pl.from_pandas(chars).lazy()
        .filter(pl.col('ctff_test').eq(1))
        .select(['id', 'eom'])
        .with_columns(ym = pl.col('eom').dt.truncate('1mo'))
        .join(vol, on=['id', 'ym'], how='left')
        .with_columns(vol = pl.col('vol').fill_null(pl.col('vol').median().over('eom')))
        .with_columns(
            w = (1 / pl.col('vol')) / (1 / pl.col('vol')).sum().over('eom')
        )
        .select(['id', 'eom', 'w'])
        .drop_nulls(subset=['w'])
        .collect()
    )
    return df

    

def main(chars: pd.DataFrame, features: pd.DataFrame, daily_ret: pd.DataFrame) -> pd.DataFrame:
    """
    Main function to load packages, prepare data, train model, and calculate portfolio weights.
    Args:
        chars (pd.DataFrame): DataFrame containing characteristics data.
        features (pd.Series): Series containing feature names.
        daily_ret (str): DataFrame containing daily returns data.
    Returns:
        pd.DataFrame: DataFrame with columns 'id', eom, and 'w'.
    """
    # Prepare the data 
    df = inv_vol(chars, daily_ret)

    # Output
    return df.to_pandas()

if False:
    chars = pd.read_parquet('data/ctff_chars.parquet')
    features = pd.read_parquet('data/ctff_features.parquet')
    daily_ret = pd.read_parquet('data/ctff_daily_ret.parquet')
    output = main(chars=chars, features=features, daily_ret=daily_ret)
    output.to_csv('output.csv', index=False)


if __name__ == "__main__":
    features, chars, daily_ret = load_data()
    pf = main(chars, features, daily_ret)
    export_data(pf)
