Build a Stock Factor Backtest Framework with yfinance and pandas
Build a Stock Factor Backtest Framework with yfinance and pandas
Factor investing is one of the most well-researched areas in quantitative finance. Here's how to build a factor backtest framework using free data from yfinance.
Architecture
Data Layer (yfinance) → Factor Engine (pandas) → Portfolio Builder → Performance Analyzer
Step 1: Data Collection
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
def get_universe_data(
symbols: list,
period: str = "2y"
) -> pd.DataFrame:
"""Download price data for a stock universe."""
data = yf.download(
symbols,
period=period,
group_by='ticker',
auto_adjust=True,
threads=True,
)
# Reshape to long format
frames = []
for symbol in symbols:
try:
df = data[symbol][['Close', 'Volume']].copy()
df.columns = ['close', 'volume']
df['symbol'] = symbol
frames.append(df)
except (KeyError, TypeError):
continue
return pd.concat(frames).reset_index()
# S&P 500 subset for demo
symbols = [
'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA',
'META', 'TSLA', 'JPM', 'V', 'JNJ',
'WMT', 'PG', 'UNH', 'MA', 'HD',
'DIS', 'BAC', 'ADBE', 'CRM', 'NFLX',
]
universe = get_universe_data(symbols, '2y')
print(f"Loaded {len(universe)} rows for "
f"{universe['symbol'].nunique()} stocks")
Step 2: Factor Calculation
def calculate_factors(df: pd.DataFrame) -> pd.DataFrame:
"""Calculate momentum, mean reversion, and volatility factors."""
result = df.copy()
for symbol in result['symbol'].unique():
mask = result['symbol'] == symbol
prices = result.loc[mask, 'close']
# Momentum: 12-month return (skip last month)
result.loc[mask, 'momentum_12m'] = (
prices.pct_change(252).shift(21)
)
# Short-term reversal: 1-month return
result.loc[mask, 'reversal_1m'] = prices.pct_change(21)
# Volatility: 60-day realized vol
result.loc[mask, 'volatility'] = (
prices.pct_change()
.rolling(60)
.std()
* np.sqrt(252)
)
# Volume momentum: 20d vs 60d average volume
vol = result.loc[mask, 'volume']
result.loc[mask, 'volume_momentum'] = (
vol.rolling(20).mean() / vol.rolling(60).mean()
)
return result.dropna()
factors = calculate_factors(universe)
Step 3: Portfolio Construction
def build_factor_portfolio(
df: pd.DataFrame,
factor: str,
n_long: int = 5,
n_short: int = 5,
rebalance_freq: str = 'M',
) -> pd.DataFrame:
"""Build long-short portfolio based on factor ranking."""
# Resample to rebalance dates
dates = df.groupby(
pd.Grouper(key='Date', freq=rebalance_freq)
).first().index
portfolio_returns = []
for date in dates:
# Get latest factor values on rebalance date
snapshot = df[df['Date'] <= date].groupby('symbol').last()
if len(snapshot) < n_long + n_short:
continue
# Rank by factor
ranked = snapshot[factor].rank(ascending=False)
# Long top N, short bottom N
longs = ranked.nsmallest(n_long).index.tolist()
shorts = ranked.nlargest(n_short).index.tolist()
# Calculate next-period returns
next_month = df[
(df['Date'] > date) &
(df['Date'] <= date + pd.DateOffset(months=1))
]
if next_month.empty:
continue
long_ret = (
next_month[next_month['symbol'].isin(longs)]
.groupby('symbol')['close']
.apply(lambda x: x.iloc[-1] / x.iloc[0] - 1)
.mean()
)
short_ret = (
next_month[next_month['symbol'].isin(shorts)]
.groupby('symbol')['close']
.apply(lambda x: x.iloc[-1] / x.iloc[0] - 1)
.mean()
)
portfolio_returns.append({
'date': date,
'long_return': long_ret,
'short_return': short_ret,
'ls_return': long_ret - short_ret,
'longs': longs,
'shorts': shorts,
})
return pd.DataFrame(portfolio_returns)
# Backtest momentum factor
momentum_results = build_factor_portfolio(
factors, 'momentum_12m', n_long=5, n_short=5
)
total_return = (
(1 + momentum_results['ls_return']).prod() - 1
)
sharpe = (
momentum_results['ls_return'].mean()
/ momentum_results['ls_return'].std()
* np.sqrt(12)
)
print(f"Momentum L/S Total Return: {total_return:.1%}")
print(f"Momentum L/S Sharpe: {sharpe:.2f}")
Factor Performance Summary
| Factor | Typical Sharpe | Best For |
|---|---|---|
| Momentum (12-1) | 0.3-0.7 | Trend-following |
| Short-term Reversal | 0.2-0.5 | Mean reversion |
| Low Volatility | 0.3-0.6 | Defensive |
| Volume Momentum | 0.1-0.4 | Liquidity signals |
This factor framework forms the analytical backbone behind strategy verification on ClawDUX, where uploaded strategies are evaluated against standard factor benchmarks to ensure genuine alpha generation.
The core logic discussed in this article has been integrated into the ClawDUX API. Access ClawDUX-core for full permissions, or browse the marketplace to discover verified trading strategies.