This guide shows how to use Python or R to get data for the Common Task Framework-inspired competition proposed by Hellum, Jensen, Kelly, and Pedersen (2025).
We'll extract the following three tables from the WRDS database:
contrib_global_factor.ctff_features
contrib_global_factor.ctff_chars
contrib_global_factor.ctff_daily_ret
Install packages (uv/pip/conda all fine):
uv add pandas sqlalchemy psycopg2-binary keyring
Store WRDS password securely (replace WRDS_USERNAME
and WRDS_PASSWORD
):
import keyring
keyring.set_password("wrds", "WRDS_USERNAME", "WRDS_PASSWORD")
Install packages:
install.packages(c("DBI", "RPostgres", "keyring"))
Store WRDS password securely (replace WRDS_USERNAME
):
keyring::key_set(service = "wrds", username = "WRDS_USERNAME")
import keyring
import pandas as pd
from sqlalchemy import create_engine, text
# --- Credentials from OS keychain (keyring) ---
creds = keyring.get_credential("wrds", None)
if creds is None:
raise RuntimeError(
"""No WRDS credentials stored.
Run: keyring.set_password('wrds', 'WRDS_USERNAME', 'WRDS_PASSWORD')."""
)
wrds_un, wrds_pw = creds.username, creds.password
# --- WRDS Postgres connection (SSL required) ---
engine = create_engine(
f"postgresql+psycopg2://{wrds_un}:{wrds_pw}"
"@wrds-pgdata.wharton.upenn.edu:9737/wrds?sslmode=require"
)
# --- Helper: simple fetch ---
def wrds_fetch(sql: str) -> pd.DataFrame:
with engine.connect() as conn:
return pd.read_sql_query(text(sql), conn)
# --- Download tables ---
ctff_features = wrds_fetch("SELECT * FROM contrib_global_factor.ctff_features;")
ctff_chars = wrds_fetch("SELECT * FROM contrib_global_factor.ctff_chars;")
ctff_daily_ret = wrds_fetch("SELECT * FROM contrib_global_factor.ctff_daily_ret;")
# --- Save locally ---
# For example, we use (requires pyarrow package):
# ctff_features.to_parquet("data/raw/ctff_features.parquet", index=False)
# ctff_chars.to_parquet("data/raw/ctff_chars.parquet", index=False)
# ctff_daily_ret.to_parquet("data/raw/ctff_daily_ret.parquet", index=False)
Memory issues? If memory is tight for, e.g., ctff_chars
, fetch in chunks:
with engine.connect() as conn:
parts = pd.read_sql_query(
text("SELECT * FROM contrib_global_factor.ctff_chars;"),
conn,
chunksize=500_000,
)
ctff_chars = pd.concat(parts, ignore_index=True)
# --- Libraries ---
library(keyring)
library(DBI)
library(RPostgres)
# --- Credentials from OS keychain (keyring) ---
if (nrow(key_list("wrds"))==0) {
stop(
"No WRDS credentials stored.\n",
"Run: keyring::key_set(service = 'wrds', username = 'WRDS_USERNAME')."
)
}
wrds_un <- key_list(service = "wrds")$username[1]
wrds_pw <- key_get("wrds", wrds_un)
# --- Connect to WRDS (SSL required) ---
con <- dbConnect(
RPostgres::Postgres(),
host = "wrds-pgdata.wharton.upenn.edu",
port = 9737,
dbname = "wrds",
sslmode = "require",
user = wrds_un,
password = wrds_pw
)
on.exit(dbDisconnect(con), add = TRUE)
# --- Helper: simple fetch ---
wrds_fetch <- function(sql) DBI::dbGetQuery(con, sql)
# --- Download tables ---
ctff_features <- wrds_fetch("SELECT * FROM contrib_global_factor.ctff_features;")
ctff_chars <- wrds_fetch("SELECT * FROM contrib_global_factor.ctff_chars;")
ctff_daily_ret <- wrds_fetch("SELECT * FROM contrib_global_factor.ctff_daily_ret;")
# --- Save locally ---
# For example, we use (requires arrow package):
# arrow::write_parquet(ctff_features, "data/raw/ctff_features.parquet")
# arrow::write_parquet(ctff_chars, "data/raw/ctff_chars.parquet")
# arrow::write_parquet(ctff_daily_ret, "data/raw/ctff_daily_ret.parquet")