Source code for biogeme.tools.derivatives

from __future__ import annotations

import logging
from typing import NamedTuple, TYPE_CHECKING

import numpy as np
from biogeme.floating_point import SQRT_EPS
from biogeme.function_output import FunctionOutput, NamedFunctionOutput
from tabulate import tabulate

if TYPE_CHECKING:
    from biogeme.calculator import CallableExpression

logger = logging.getLogger(__name__)


[docs] class CheckDerivativesResults(NamedTuple): function: float analytical_gradient: np.ndarray analytical_hessian: np.ndarray finite_differences_gradient: np.ndarray finite_differences_hessian: np.ndarray errors_gradient: np.ndarray errors_hessian: np.ndarray
[docs] def findiff_g(the_function: CallableExpression, x: np.ndarray) -> np.ndarray: """Calculates the gradient of a function :math:`f` using finite differences :param the_function: A function object that takes a vector as an argument, and returns a tuple. The first element of the tuple is the value of the function :math:`f`. The other elements are not used. :param x: argument of the function :return: numpy vector, same dimension as x, containing the gradient calculated by finite differences. """ x = x.astype(float) tau = SQRT_EPS n = len(x) g = np.zeros(n) f = the_function(x, gradient=False, hessian=False, bhhh=False).function for i in range(n): xi = x.item(i) xp = x.copy() if abs(xi) >= 1: s = tau * xi elif xi >= 0: s = tau else: s = -tau xp[i] = xi + s fp = the_function(xp, gradient=False, hessian=False, bhhh=False).function g[i] = (fp - f) / s return g
[docs] def findiff_h( the_function: CallableExpression, x: np.ndarray, ) -> np.ndarray: """Calculates the hessian of a function :math:`f` using finite differences :param the_function: A function object that takes a vector as an argument, and returns a tuple. The first element of the tuple is the value of the function :math:`f`, and the second is the gradient of the function. The other elements are not used. :param x: argument of the function :return: numpy matrix containing the hessian calculated by finite differences. """ tau = SQRT_EPS n = len(x) h = np.zeros((n, n)) the_function_output: FunctionOutput | NamedFunctionOutput = the_function( x, gradient=True, hessian=False, bhhh=False ) if isinstance(the_function_output, NamedFunctionOutput): the_function_output = the_function_output.function_output g = the_function_output.gradient eye = np.eye(n, n) for i in range(n): xi = x.item(i) if abs(xi) >= 1: s = tau * xi elif xi >= 0: s = tau else: s = -tau ei = eye[i] the_function_output: FunctionOutput | NamedFunctionOutput = the_function( x + s * ei, gradient=True, hessian=False, bhhh=False ) if isinstance(the_function_output, NamedFunctionOutput): the_function_output = the_function_output.function_output gp = the_function_output.gradient h[:, i] = (gp - g).flatten() / s return h
[docs] def check_derivatives( the_function: CallableExpression, x: np.ndarray, names: list[str] | None = None, logg: bool | None = False, ) -> CheckDerivativesResults: """Verifies the analytical derivatives of a function by comparing them with finite difference approximations. :param the_function: A function object that takes a vector as an argument, and returns a tuple: - The first element of the tuple is the value of the function :math:`f`, - the second is the gradient of the function, - the third is the hessian. :param x: arguments of the function :param names: the names of the entries of x (for reporting). :param logg: if True, messages will be displayed. :return: tuple f, g, h, gdiff, hdiff where - f is the value of the function at x, - g is the analytical gradient, - h is the analytical hessian, - gdiff is the difference between the analytical gradient and the finite difference approximation - hdiff is the difference between the analytical hessian and the finite difference approximation """ x = np.array(x, dtype=float) the_function_output: FunctionOutput = the_function( x, gradient=True, hessian=True, bhhh=False ) # if isinstance(the_function_output, NamedFunctionOutput): # the_function_output = the_function_output.function_output g_num = findiff_g(the_function, x) gdiff = the_function_output.gradient - g_num if logg: headers = ['x', 'Gradient', 'FinDiff', 'Difference'] if names is None: names = [f'x[{i}]' for i in range(len(x))] rows = [ [ f'{names[k]}', f'{the_function_output.gradient[k]:+E}', f'{g_num[k]:+E}', f'{v:+E}', ] for k, v in enumerate(gdiff) ] logger.info('Comparing first derivatives') logger.info(tabulate(rows, headers=headers, tablefmt='plain')) h_num = findiff_h(the_function, x) hdiff = the_function_output.hessian - h_num if logg: headers = ['Row', 'Col', 'Hessian', 'FinDiff', 'Difference'] rows = [ [ names[row], names[col], f'{the_function_output.hessian[row, col]:+E}', f'{h_num[row, col]:+E}', f'{hdiff[row, col]:+E}', ] for col in range(len(hdiff)) for row in range(len(hdiff)) ] logger.info('Comparing second derivatives') logger.info(tabulate(rows, headers=headers, tablefmt='plain')) return CheckDerivativesResults( function=the_function_output.function, analytical_gradient=the_function_output.gradient, analytical_hessian=the_function_output.hessian, finite_differences_gradient=g_num, finite_differences_hessian=h_num, errors_gradient=gdiff, errors_hessian=hdiff, )