Source code for biogeme.expressions.panel_log_likelihood
from __future__ import annotations
import logging
import numpy as np
import pandas as pd
import pymc as pm
import pytensor.tensor as pt
from biogeme.constants import LOG_LIKE
from biogeme.database import ContiguousPanelMap, build_contiguous_panel_map
from .base_expressions import Expression, ExpressionOrNumeric
from .bayesian import Dimension, PymcModelBuilderType
from .individual_draws import individual_draws
logger = logging.getLogger(__name__)
[docs]
class PanelLogLikelihood(Expression):
"""
Aggregate per-observation **log-probabilities** into per-individual log-likelihoods.
This expression assumes its child evaluates, for a given dataframe, to a 1-D
tensor of shape ``(obs,)`` containing the **log-probability** of each
observation. It then sums these log-probabilities within each individual
(panel) and returns a 1-D tensor of shape ``(indiv,)``.
Notes
-----
- Intended for Bayesian estimation with PyMC, which operates in log-space.
- The panel/individual id column name is taken from ``self.panel_index_name``
if available, otherwise it defaults to ``'ID'``.
- A coord named :data:`Dimension.INDIVIDUALS` is created on the active PyMC
model if it does not yet exist, to label the individuals' axis.
:param child: Expression that returns per-observation **log-probabilities**
when evaluated by the PyMC builder.
"""
def __init__(self, child: ExpressionOrNumeric) -> None:
super().__init__()
if isinstance(child, Expression):
self.child: Expression = child
else:
# Numeric constants are allowed by the Expression API; they will
# typically be wrapped upstream. We keep type hints explicit.
self.child = child # type: ignore[assignment]
individual_draws(expr=self.child)
self.children.append(self.child)
self.panel_id = None
[docs]
def deep_flat_copy(self) -> "PanelLogLikelihood":
"""
Return a deep/flat copy of the expression.
:return: A structurally independent copy whose child is a deep/flat copy.
"""
copy_child: Expression = self.child.deep_flat_copy()
return PanelLogLikelihood(child=copy_child)
def __str__(self) -> str:
"""Human-readable representation."""
return f"PanelLogLikelihood({self.child})"
def __repr__(self) -> str:
"""Unambiguous representation for debugging."""
return f"PanelLogLikelihood({repr(self.child)})"
[docs]
def recursive_construct_pymc_model_builder(self) -> PymcModelBuilderType:
"""
Build a PyMC evaluation closure that returns per-individual log-likelihoods,
using a precomputed ContiguousPanelMap to aggregate rows belonging to the
same individual via cumulative-sum + index differences.
"""
child_builder: PymcModelBuilderType = (
self.child.recursive_construct_pymc_model_builder()
)
def builder(dataframe: pd.DataFrame) -> pt.TensorVariable:
# Per-observation log-likelihood (shape: (N_obs,))
logp_obs: pt.TensorVariable = child_builder(dataframe=dataframe)
pm.Deterministic(f"{LOG_LIKE}_obs", logp_obs, dims=Dimension.OBS)
# Build the panel map; validate contiguity
panel_map: ContiguousPanelMap = build_contiguous_panel_map(
dataframe, panel_column=self.panel_id
)
n_indiv = int(panel_map.unique_ids.size)
# Segment-sum via cumsum + differences at indptr
# indptr is length K+1 with [start_0, start_1, ..., N]
indptr_pt = pt.as_tensor_variable(
panel_map.indptr.astype(np.int64)
) # (K+1,)
# cumsum over observations; pad a leading zero
s = pt.cumsum(logp_obs) # (N_obs,)
# make a scalar zero with the same dtype as logp_obs
zero = pt.zeros_like(logp_obs[:1]).sum() # scalar 0.0, dtype matches
s_pad = pt.concatenate([zero[None], s]) # (N_obs + 1,)
# ll_indiv[k] = s_pad[indptr[k+1]] - s_pad[indptr[k]]
ll_indiv = s_pad[indptr_pt[1:]] - s_pad[indptr_pt[:-1]] # (K,)
pm.Deterministic(f"{LOG_LIKE}_panel", ll_indiv, dims=Dimension.INDIVIDUALS)
return ll_indiv
return builder