Source code for mktlib.data._gbm

from __future__ import annotations

import math

import polars as pl
from polars_sdist import sample_normal


def _gbm_price_expr(log_base: float, mu_dt: float, sigma_sqrt_dt: float) -> pl.Expr:
    """GBM price expression. Assumes columns ``step`` and ``z`` exist."""
    return (
        pl.when(pl.col("step") == 0)
        .then(0.0)
        .otherwise(pl.col("z") * sigma_sqrt_dt + mu_dt)
        .cum_sum()
        .add(log_base)
        .exp()
        .alias("price")
    )


[docs] def geometric_brownian_motion( n: int, base_price: float = 100.0, drift: float = 0.0, volatility: float = 0.01, dt: float = 1 / 252, seed: int | None = None, ) -> pl.DataFrame: r"""Geometric Brownian motion price path. Simulates the SDE :math:`dS = \mu S \, dt + \sigma S \, dW` using the exact log-space solution derived via Itô's lemma. Parameters ---------- n Number of time steps to generate. base_price Initial price :math:`S_0`. drift Annualised expected return :math:`\mu`. volatility Annualised volatility :math:`\sigma`. dt Time step size in annualised units. Defaults to ``1/252`` (one trading day). For sub-daily granularity, divide further — e.g. ``1/(252*6.5*3600)`` for one-second ticks during US equity hours. seed RNG seed for reproducibility. Returns ------- pl.DataFrame Columns ``step`` (int) and ``price`` (float). Notes ----- **Why log-space?** The SDE is multiplicative in *S*, so direct Euler-Maruyama discretisation introduces bias. Applying Itô's lemma to :math:`X = \ln S` yields an additive SDE:: dX = (μ − ½σ²) dt + σ dW whose discrete-time exact solution is:: ln S[i] = ln S[i−1] + (μ − ½σ²)·dt + σ·√dt·Z[i], Z ~ N(0,1) The implementation draws all *Z* values in one ``sample_normal`` call, computes log-increments, and uses ``cum_sum`` + ``exp`` to recover prices — no Python loop required. **Itô correction (−½σ²).** Because :math:`\exp` is convex, Jensen's inequality gives :math:`E[\exp(X)] > \exp(E[X])`. The −½σ² drift adjustment compensates for this so that the expected price path satisfies :math:`E[S(t)] = S_0 \exp(\mu t)`. **Distributional properties.** Log-returns :math:`\ln(S_{i}/S_{i-1})` are i.i.d. normal; prices :math:`S_i` are lognormally distributed. """ if n < 1: raise ValueError("n must be >= 1") log_base = math.log(base_price) mu_dt = (drift - 0.5 * volatility**2) * dt sigma_sqrt_dt = volatility * math.sqrt(dt) z = sample_normal(n, seed=seed) step = pl.arange(0, n, eager=True).alias("step") return ( pl.DataFrame({"step": step, "z": z}) .with_columns(_gbm_price_expr(log_base, mu_dt, sigma_sqrt_dt)) .select("step", "price") )