from __future__ import annotations
import logging
from typing import NamedTuple, TYPE_CHECKING
import numpy as np
from biogeme.floating_point import SQRT_EPS
from biogeme.function_output import FunctionOutput, NamedFunctionOutput
from tabulate import tabulate
if TYPE_CHECKING:
from biogeme.calculator import CallableExpression
logger = logging.getLogger(__name__)
[docs]
class CheckDerivativesResults(NamedTuple):
function: float
analytical_gradient: np.ndarray
analytical_hessian: np.ndarray
finite_differences_gradient: np.ndarray
finite_differences_hessian: np.ndarray
errors_gradient: np.ndarray
errors_hessian: np.ndarray
[docs]
def findiff_g(the_function: CallableExpression, x: np.ndarray) -> np.ndarray:
"""Calculates the gradient of a function :math:`f` using finite differences
:param the_function: A function object that takes a vector as an
argument, and returns a tuple. The first
element of the tuple is the value of the
function :math:`f`. The other elements are not
used.
:param x: argument of the function
:return: numpy vector, same dimension as x, containing the gradient
calculated by finite differences.
"""
x = x.astype(float)
tau = SQRT_EPS
n = len(x)
g = np.zeros(n)
f = the_function(x, gradient=False, hessian=False, bhhh=False).function
for i in range(n):
xi = x.item(i)
xp = x.copy()
if abs(xi) >= 1:
s = tau * xi
elif xi >= 0:
s = tau
else:
s = -tau
xp[i] = xi + s
fp = the_function(xp, gradient=False, hessian=False, bhhh=False).function
g[i] = (fp - f) / s
return g
[docs]
def findiff_h(
the_function: CallableExpression,
x: np.ndarray,
) -> np.ndarray:
"""Calculates the hessian of a function :math:`f` using finite differences
:param the_function: A function object that takes a vector as an
argument, and returns a tuple. The first
element of the tuple is the value of the
function :math:`f`, and the second is the
gradient of the function. The other elements
are not used.
:param x: argument of the function
:return: numpy matrix containing the hessian calculated by
finite differences.
"""
tau = SQRT_EPS
n = len(x)
h = np.zeros((n, n))
the_function_output: FunctionOutput | NamedFunctionOutput = the_function(
x, gradient=True, hessian=False, bhhh=False
)
if isinstance(the_function_output, NamedFunctionOutput):
the_function_output = the_function_output.function_output
g = the_function_output.gradient
eye = np.eye(n, n)
for i in range(n):
xi = x.item(i)
if abs(xi) >= 1:
s = tau * xi
elif xi >= 0:
s = tau
else:
s = -tau
ei = eye[i]
the_function_output: FunctionOutput | NamedFunctionOutput = the_function(
x + s * ei, gradient=True, hessian=False, bhhh=False
)
if isinstance(the_function_output, NamedFunctionOutput):
the_function_output = the_function_output.function_output
gp = the_function_output.gradient
h[:, i] = (gp - g).flatten() / s
return h
[docs]
def check_derivatives(
the_function: CallableExpression,
x: np.ndarray,
names: list[str] | None = None,
logg: bool | None = False,
) -> CheckDerivativesResults:
"""Verifies the analytical derivatives of a function by comparing
them with finite difference approximations.
:param the_function: A function object that takes a vector as an argument,
and returns a tuple:
- The first element of the tuple is the value of the
function :math:`f`,
- the second is the gradient of the function,
- the third is the hessian.
:param x: arguments of the function
:param names: the names of the entries of x (for reporting).
:param logg: if True, messages will be displayed.
:return: tuple f, g, h, gdiff, hdiff where
- f is the value of the function at x,
- g is the analytical gradient,
- h is the analytical hessian,
- gdiff is the difference between the analytical gradient
and the finite difference approximation
- hdiff is the difference between the analytical hessian
and the finite difference approximation
"""
x = np.array(x, dtype=float)
the_function_output: FunctionOutput = the_function(
x, gradient=True, hessian=True, bhhh=False
)
# if isinstance(the_function_output, NamedFunctionOutput):
# the_function_output = the_function_output.function_output
g_num = findiff_g(the_function, x)
gdiff = the_function_output.gradient - g_num
if logg:
headers = ['x', 'Gradient', 'FinDiff', 'Difference']
if names is None:
names = [f'x[{i}]' for i in range(len(x))]
rows = [
[
f'{names[k]}',
f'{the_function_output.gradient[k]:+E}',
f'{g_num[k]:+E}',
f'{v:+E}',
]
for k, v in enumerate(gdiff)
]
logger.info('Comparing first derivatives')
logger.info(tabulate(rows, headers=headers, tablefmt='plain'))
h_num = findiff_h(the_function, x)
hdiff = the_function_output.hessian - h_num
if logg:
headers = ['Row', 'Col', 'Hessian', 'FinDiff', 'Difference']
rows = [
[
names[row],
names[col],
f'{the_function_output.hessian[row, col]:+E}',
f'{h_num[row, col]:+E}',
f'{hdiff[row, col]:+E}',
]
for col in range(len(hdiff))
for row in range(len(hdiff))
]
logger.info('Comparing second derivatives')
logger.info(tabulate(rows, headers=headers, tablefmt='plain'))
return CheckDerivativesResults(
function=the_function_output.function,
analytical_gradient=the_function_output.gradient,
analytical_hessian=the_function_output.hessian,
finite_differences_gradient=g_num,
finite_differences_hessian=h_num,
errors_gradient=gdiff,
errors_hessian=hdiff,
)