API & Python

Build a Stock Factor Backtest Framework with yfinance and pandas

ClawDUX TeamMarch 26, 20268 min read0 views

Build a Stock Factor Backtest Framework with yfinance and pandas

Factor investing is one of the most well-researched areas in quantitative finance. Here's how to build a factor backtest framework using free data from yfinance.

Architecture

plaintext
Data Layer (yfinance) → Factor Engine (pandas) → Portfolio Builder → Performance Analyzer

Step 1: Data Collection

python
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def get_universe_data(
    symbols: list,
    period: str = "2y"
) -> pd.DataFrame:
    """Download price data for a stock universe."""
    data = yf.download(
        symbols,
        period=period,
        group_by='ticker',
        auto_adjust=True,
        threads=True,
    )

    # Reshape to long format
    frames = []
    for symbol in symbols:
        try:
            df = data[symbol][['Close', 'Volume']].copy()
            df.columns = ['close', 'volume']
            df['symbol'] = symbol
            frames.append(df)
        except (KeyError, TypeError):
            continue

    return pd.concat(frames).reset_index()

# S&P 500 subset for demo
symbols = [
    'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA',
    'META', 'TSLA', 'JPM', 'V', 'JNJ',
    'WMT', 'PG', 'UNH', 'MA', 'HD',
    'DIS', 'BAC', 'ADBE', 'CRM', 'NFLX',
]

universe = get_universe_data(symbols, '2y')
print(f"Loaded {len(universe)} rows for "
      f"{universe['symbol'].nunique()} stocks")

Step 2: Factor Calculation

python
def calculate_factors(df: pd.DataFrame) -> pd.DataFrame:
    """Calculate momentum, mean reversion, and volatility factors."""
    result = df.copy()

    for symbol in result['symbol'].unique():
        mask = result['symbol'] == symbol
        prices = result.loc[mask, 'close']

        # Momentum: 12-month return (skip last month)
        result.loc[mask, 'momentum_12m'] = (
            prices.pct_change(252).shift(21)
        )

        # Short-term reversal: 1-month return
        result.loc[mask, 'reversal_1m'] = prices.pct_change(21)

        # Volatility: 60-day realized vol
        result.loc[mask, 'volatility'] = (
            prices.pct_change()
            .rolling(60)
            .std()
            * np.sqrt(252)
        )

        # Volume momentum: 20d vs 60d average volume
        vol = result.loc[mask, 'volume']
        result.loc[mask, 'volume_momentum'] = (
            vol.rolling(20).mean() / vol.rolling(60).mean()
        )

    return result.dropna()

factors = calculate_factors(universe)

Step 3: Portfolio Construction

python
def build_factor_portfolio(
    df: pd.DataFrame,
    factor: str,
    n_long: int = 5,
    n_short: int = 5,
    rebalance_freq: str = 'M',
) -> pd.DataFrame:
    """Build long-short portfolio based on factor ranking."""
    # Resample to rebalance dates
    dates = df.groupby(
        pd.Grouper(key='Date', freq=rebalance_freq)
    ).first().index

    portfolio_returns = []

    for date in dates:
        # Get latest factor values on rebalance date
        snapshot = df[df['Date'] <= date].groupby('symbol').last()
        if len(snapshot) < n_long + n_short:
            continue

        # Rank by factor
        ranked = snapshot[factor].rank(ascending=False)

        # Long top N, short bottom N
        longs = ranked.nsmallest(n_long).index.tolist()
        shorts = ranked.nlargest(n_short).index.tolist()

        # Calculate next-period returns
        next_month = df[
            (df['Date'] > date) &
            (df['Date'] <= date + pd.DateOffset(months=1))
        ]

        if next_month.empty:
            continue

        long_ret = (
            next_month[next_month['symbol'].isin(longs)]
            .groupby('symbol')['close']
            .apply(lambda x: x.iloc[-1] / x.iloc[0] - 1)
            .mean()
        )

        short_ret = (
            next_month[next_month['symbol'].isin(shorts)]
            .groupby('symbol')['close']
            .apply(lambda x: x.iloc[-1] / x.iloc[0] - 1)
            .mean()
        )

        portfolio_returns.append({
            'date': date,
            'long_return': long_ret,
            'short_return': short_ret,
            'ls_return': long_ret - short_ret,
            'longs': longs,
            'shorts': shorts,
        })

    return pd.DataFrame(portfolio_returns)

# Backtest momentum factor
momentum_results = build_factor_portfolio(
    factors, 'momentum_12m', n_long=5, n_short=5
)

total_return = (
    (1 + momentum_results['ls_return']).prod() - 1
)
sharpe = (
    momentum_results['ls_return'].mean()
    / momentum_results['ls_return'].std()
    * np.sqrt(12)
)

print(f"Momentum L/S Total Return: {total_return:.1%}")
print(f"Momentum L/S Sharpe: {sharpe:.2f}")

Factor Performance Summary

Factor Typical Sharpe Best For
Momentum (12-1) 0.3-0.7 Trend-following
Short-term Reversal 0.2-0.5 Mean reversion
Low Volatility 0.3-0.6 Defensive
Volume Momentum 0.1-0.4 Liquidity signals

This factor framework forms the analytical backbone behind strategy verification on ClawDUX, where uploaded strategies are evaluated against standard factor benchmarks to ensure genuine alpha generation.

The core logic discussed in this article has been integrated into the ClawDUX API. Access ClawDUX-core for full permissions, or browse the marketplace to discover verified trading strategies.

#yfinance#pandas#backtesting#factor-investing#stocks

Related Articles