# Source code for biogeme.algorithms

"""
Optimization algorithms.

:author: Michel Bierlaire
:date: Sun Apr  5 16:48:54 2020

"""

# There seems to be a bug in PyLint.
# pylint: disable=invalid-unary-operand-type, no-member

# Too constraining
# pylint: disable=invalid-name
# pylint: disable=too-many-lines, too-many-locals
# pylint: disable=too-many-arguments, too-many-branches
# pylint: disable=too-many-statements, too-many-return-statements
# pylint: disable=bare-except

from abc import abstractmethod
import numpy as np
import scipy.linalg as la
import biogeme.exceptions as excep
import biogeme.messaging as msg

logger = msg.bioMessage()

[docs]class functionToMinimize:
"""This is an abstract class. The actual function to minimize
must be implemented in a concrete class deriving from this one.

"""

[docs]    @abstractmethod
def setVariables(self, x):

"""Set the values of the variables for which the function
has to be calculated.

:param x: values
:type x: numpy.array
"""

[docs]    @abstractmethod
def f(self, batch=None):
"""Calculate the value of the function

:param batch: for data driven functions (such as a log
likelikood function), it is possible to
approximate the value of the function using a
sample of the data called a batch. This argument
is a value between 0 and 1 representing the
percentage of the data that should be used for
thre random batch. If None, the full data set is
used. Default: None pass
:type batch: float

:return: value of the function
:rtype: float
"""

[docs]    @abstractmethod
def f_g(self, batch=None):
"""Calculate the value of the function and the gradient

:param batch: for data driven functions (such as a log
likelikood function), it is possible to
approximate the value of the function using a
sample of the data called a batch. This argument
is a value between 0 and 1 representing the
percentage of the data that should be used for
the random batch. If None, the full data set is
used. Default: None pass
:type batch: float

:return: value of the function and the gradient
:rtype: tuple float, numpy.array
"""

[docs]    @abstractmethod
def f_g_h(self, batch=None):
"""Calculate the value of the function, the gradient and the Hessian

:param batch: for data driven functions (such as a log
likelikood function), it is possible to
approximate the value of the function using a
sample of the data called a batch. This argument
is a value between 0 and 1 representing the
percentage of the data that should be used for
the random batch. If None, the full data set is
used. Default: None pass
:type batch: float

:return: value of the function, the gradient and the Hessian
:rtype: tuple float, numpy.array, numpy.array
"""

[docs]    @abstractmethod
def f_g_bhhh(self, batch=None):
"""Calculate the value of the function, the gradient and
the BHHH matrix

:param batch: for data driven functions (such as a log
likelikood function), it is possible to
approximate the value of the function using a
sample of the data called a batch. This argument
is a value between 0 and 1 representing the
percentage of the data that should be used for
the random batch. If None, the full data set is
used. Default: None pass
:type batch: float

:return: value of the function, the gradient and the BHHH
:rtype: tuple float, numpy.array, numpy.array
"""

[docs]class bioBounds:
"""This class is designed for the management of simple bound constraints"""

[docs]    def __init__(self, b):
"""

:param b: list of tuples (ell,u) containing the lower and
upper bounds for each free parameter.

:type b: list(tuple)

:raises biogeme.exceptions.biogemeError: if the bounds are incompatible

"""

def noneToMinusInfinity(x):
if x is None:
return -np.inf

return x

def noneToPlusInfinity(x):
if x is None:
return np.inf

return x

self.bounds = b
"""list of tuples (ell,u) containing the lower and upper bounds for
each free parameter.
"""

self.n = len(b) #: number of optimization variables

self.lowerBounds = [noneToMinusInfinity(bb[0]) for bb in b]
""" List of lower bounds """

self.upperBounds = [noneToPlusInfinity(bb[1]) for bb in b]
""" List of upper bounds """

wrongBounds = np.array(
[
bb[0] > bb[1]
for bb in b
if bb[0] is not None and bb[1] is not None
]
)
if wrongBounds.any():
errorMsg = (
f'Incompatible bounds for indice(s) '
f'{np.nonzero(wrongBounds)[0]}: '
f'{[b[i] for i in np.nonzero(wrongBounds)[0]]}'
)
raise excep.biogemeError(errorMsg)

def __str__(self):
return self.bounds.__str__()

def __repr__(self):
return self.bounds.__repr__()

[docs]    def project(self, x):

"""Project a point onto the feasible domain defined by the bounds.

:param x: point to project
:type x: numpy.array

:return: projected point
:rtype: numpy.array

:raises biogeme.exceptions.biogemeError: if the dimensions
are inconsistent
"""

if len(x) != self.n:
raise excep.biogemeError(
f'Incompatible size: {len(x)}' f' and {self.n}'
)

y = x
for i in range(self.n):
if self.lowerBounds[i] is not None and x[i] < self.lowerBounds[i]:
y[i] = self.lowerBounds[i]
if self.upperBounds[i] is not None and x[i] > self.upperBounds[i]:
y[i] = self.upperBounds[i]
return y

[docs]    def intersect(self, otherBounds):
"""
Create a bounds object representing the intersection of two regions.

:param otherBounds: other bound object that must be intersected.
:type otherBounds: class bioBounds

:return:  bound object, intersection of the two.
:rtype: class bioBounds

:raises biogeme.exceptions.biogemeError: if the dimensions
are inconsistent
"""

if otherBounds.n != self.n:
raise excep.biogemeError(
f'Incompatible size: {otherBounds.n} ' f'and {self.n}'
)

newBounds = [
(
np.maximum(self.lowerBounds[i], otherBounds.lowerBounds[i]),
np.minimum(self.upperBounds[i], otherBounds.upperBounds[i]),
)
for i in range(self.n)
]

# Note that an exception with be raised at the creation of the
# new 'bioBounds' object is the bounds are incompatible.

result = bioBounds(newBounds)
return result

[docs]    def intersectionWithTrustRegion(self, x, delta):
"""Create a bioBounds object representing the intersection
between the feasible domain and the trust region.

:param x: center of the trust region
:type x: numpy.array

:param delta: radius of the tust region (infinity norm)
:type delta: float

:return: intersection between the feasible region and the trus region
:rtype: class bioBounds

:raises biogeme.exceptions.biogemeError: if the dimensions
are inconsistent
"""
if len(x) != self.n:
raise excep.biogemeError(
f'Incompatible size: {len(x)}' f' and {self.n}'
)

trustRegion = bioBounds([(xk - delta, xk + delta) for xk in x])
return self.intersect(trustRegion)

[docs]    def subspace(self, selectedVariables):
"""Generate a bioBounds object for selected variables

:param selectedVariables: boolean vector. If an entry is True,
the corresponding variables is considered.
:type selectedVariables: numpy.array(bool)

:return: bound object
:rtype: class bioBounds

:raises biogeme.exceptions.biogemeError: if the dimensions
are inconsistent
"""
if len(selectedVariables) != self.n:
raise excep.biogemeError(
f'Incompatible size: ' f'{len(selectedVariables)} and {self.n}'
)

return bioBounds(
[i for i, j in zip(self.bounds, selectedVariables) if j]
)

[docs]    def feasible(self, x):
"""Check if point verifies the bound constraints

:param x: point to project
:type x: numpy.array

:return: True if x is feasible, False otherwise.
:rtype: bool

:raises biogeme.exceptions.biogemeError: if the dimensions
are inconsistent
"""
if len(x) != self.n:
raise excep.biogemeError(
f'Incompatible size: ' f'{len(x)} and {self.n}'
)
for i in range(self.n):
if (
self.lowerBounds[i] is not None
and self.lowerBounds[i] - x[i] > np.finfo(float).eps
):
return False
if (
self.upperBounds[i] is not None
and x[i] - self.upperBounds[i] > np.finfo(float).eps
):
return False
return True

[docs]    def maximumStep(self, x, d):
"""Calculates the maximum step that can be performed
along a direction while staying feasible.

:param x: reference point
:type x: numpy.array

:param d: direction
:type d: numpy.array

:return: the largest alpha such that x + alpha * d is feasible
and the list of indices achieving this value.
:rtype: float, int

:raises biogeme.exceptions.biogemeError: if the point is infeasible

"""

if not self.feasible(x):
raise excep.biogemeError(f'Infeasible point: {x}')

alpha = np.array(
[
(self.upperBounds[i] - x[i]) / d[i]
if d[i] > np.finfo(float).eps
else (self.lowerBounds[i] - x[i]) / d[i]
if d[i] < -np.finfo(float).eps
else np.inf
for i in range(self.n)
]
)
m = np.amin(alpha)
return m, np.where(alpha == m)[0]

[docs]    def activity(self, x, epsilon=np.finfo(float).eps):

"""Determines the activity status of each variable.

:param x: point for which the activity must be determined.
:type x: numpy.array

:param epsilon: a bound is considered active if the distance
to it is less rhan epsilon.
:type epsilon: float

:return: a vector, same length as x, where each entry reports
the activity of the corresponding variable:

- 0 if no bound is active
- -1 if the lower bound is active
- 1 if the upper bound is active

:rtype: numpy.array

:raises biogeme.exceptions.biogemeError: if the vector x is
not feasible

:raises biogeme.exceptions.biogemeError: if the dimensions
of x and bounds do not match.

"""
if len(x) != self.n:
raise excep.biogemeError(
f'Incompatible size: {len(x)}' f' and {self.n}'
)
if not self.feasible(x):
raise excep.biogemeError(
f'{x} is not feasible for the ' f'bounds {self.bounds}'
)

activity = np.zeros_like(x, dtype=int)
for i in range(self.n):
if (
self.lowerBounds[i] != -np.inf
and x[i] - self.lowerBounds[i] <= epsilon
):
activity[i] = -1
elif (
self.upperBounds[i] != np.inf
and self.upperBounds[i] - x[i] <= epsilon
):
activity[i] = 1
return activity

[docs]    def breakpoints(self, x, d):
"""Projects the direction d, starting from x,
on the intersection of the bound constraints

:param x: current point
:type x: numpy.array

:param d: search direction
:type d: numpy.array

:return: list of tuple (index, value), where index is the index
of the variable, and value the value of the corresponding
breakpoint.
:rtype: list(tuple(int,float))

:raises biogeme.exceptions.biogemeError: if the dimensions
are inconsistent

:raises biogeme.exceptions.biogemeError: if x is infeasible

"""
if len(d) != self.n:
raise excep.biogemeError(
f'Incompatible size: {self.n}' f' and {len(d)}'
)
bp = [
(self.upperBounds[i] - x[i]) / d[i]
if d[i] > np.finfo(float).eps
else (self.lowerBounds[i] - x[i]) / d[i]
if d[i] < -np.finfo(float).eps
else 0
for i in range(self.n)
]

if any(b < 0 for b in bp):
raise excep.biogemeError('Infeasible point')

return sorted(enumerate(bp), key=lambda x: x[1])

[docs]    def generalizedCauchyPoint(self, xk, gk, H, direction):
"""Implementation of Step 2 of the Specific Algorithm by
Conn et al. (1988)_.

.. _Conn et al. (1988): https://www.ams.org/journals/mcom/1988-50-182/S0025-5718-1988-0929544-3/S0025-5718-1988-0929544-3.pdf

The quadratic model is defined as

.. math:: m(x) = f(x_k) + (x - x_k)^T g_k +
\\frac{1}{2} (x-x_k)^T H (x-x_k).

:param xk: current point
:type xk: numpy.array. Dimension n.

:param gk: vector g involved in the quadratic model definition.
:type gk: numpy.array. Dimension n.

:param H: matrix H involved in the quadratic model definition.
:type H: numpy.array. Dimension n x n.

:param direction: direction along which the GCP is searched.
:type direction: numpy.array. Dimension n.

:return: generalized Cauchy point based on inexact line search.
:rtype: numpy.array. Dimension n.

:raises biogeme.exceptions.biogemeError: if the dimensions are
inconsistent
:raises biogeme.exceptions.biogemeError: if xk is infeasible
"""

if len(xk) != self.n:
raise excep.biogemeError(
f'Incompatible size: {len(xk)}' f' and {self.n}'
)
if len(gk) != self.n:
raise excep.biogemeError(
f'Incompatible size: {len(gk)}' f' and {self.n}'
)
if H.shape[0] != self.n or H.shape[1] != self.n:
raise excep.biogemeError(
f'Incompatible size: {H.shape}' f' and {self.n}'
)

if not self.feasible(xk):
raise excep.biogemeError('Infeasible iterate')

x = xk
g = gk - H @ xk
d = direction

n = len(xk)

J = set()

fprime = np.inner(gk, d)

if fprime >= 0:
if n <= 10:
logger.warning(
f'GCP: direction {d} is not a descent '
f'direction at {x}.'
)
else:
logger.warning('GCP: not a descent direction.')

fsecond = np.inner(d, H @ d)

while len(J) < self.n:
delta_t, ind = self.maximumStep(x, d)
# J is the set of active variables
J.update(ind)

# Test whether the GCP has been found
ratio = -fprime / fsecond
if fsecond > 0 and 0 < ratio < delta_t:
x = x + ratio * d
return x

# Update line derivatives
bd = np.zeros_like(d)
bd[ind] = d[ind]
b = H @ bd

# In theory, x + delta_t * d must be feasible. However,
# there may be some numerical problem. Therefore, we
# project it on the feasible domain to make sure to obtain
# a feasible point.
x = self.project(x + delta_t * d)

dg = np.sum([d[i] * g[i] for i in ind])
fprime += delta_t * fsecond - np.inner(b, x) - dg
fsecond += np.inner(b, bd - 2 * d)
d[ind] = 0.0

if fprime >= 0:
return x

return x

[docs]def schnabelEskow(
A,
tau=np.finfo(np.float64).eps ** 0.3333,
taubar=np.finfo(np.float64).eps ** 0.6666,
mu=0.1,
):

"""Modified Cholesky factorization by Schnabel and Eskow (1999)_.

.. _Schnabel and Eskow (1999): https://doi.org/10.1137/s105262349833266x

If the matrix is 'safely' positive definite, the output is the
classical Cholesky factor. If not, the diagonal elements are
inflated in order to make it positive definite. The factor :math:L
is such that :math:A + E = PLL^TP^T, where :math:E is a diagonal
matrix contaninig the terms added to the diagonal, :math:P is a
permutation matrix, and :math:L is w lower triangular matrix.

:param A: matrix to factorize. Must be square and symmetric.
:type A: numpy.array
:param tau: tolerance factor.
Default: :math:\\varepsilon^{\\frac{1}{3}}.
See Schnabel and Eskow (1999)_
:type tau: float
:param taubar: tolerance factor.
Default: :math:\\varepsilon^{\\frac{2}{3}}.
See Schnabel and Eskow (1999)_
:type taubar: float
:param mu: tolerance factor.
Default: 0.1.  See Schnabel and Eskow (1999)_
:type mu: float

:return: tuple :math:L, :math:E, :math:P,
where :math:A + E = PLL^TP^T.
:rtype: numpy.array, numpy.array, numpy.array

:raises biogeme.exceptions.biogemeError: if the matrix A is not square.
:raises biogeme.exceptions.biogemeError: if the matrix A is not symmetric.
"""

def pivot(j):
A[j, j] = np.sqrt(A[j, j])
for i in range(j + 1, dim):
A[j, i] = A[i, j] = A[i, j] / A[j, j]
A[i, j + 1 : i + 1] -= A[i, j] * A[j + 1 : i + 1, j]
A[j + 1 : i + 1, i] = A[i, j + 1 : i + 1]

def permute(i, j):
A[[i, j]] = A[[j, i]]
E[[i, j]] = E[[j, i]]
A[:, [i, j]] = A[:, [j, i]]
P[:, [i, j]] = P[:, [j, i]]

A = A.astype(np.float64)
dim = A.shape[0]
if A.shape[1] != dim:
raise excep.biogemeError('The matrix must be square')

if not np.all(np.abs(A - A.T) < np.sqrt(np.finfo(np.float64).eps)):
raise excep.biogemeError('The matrix must be symmetric')

E = np.zeros(dim, dtype=np.float64)
P = np.identity(dim)
phaseOne = True
gamma = abs(A.diagonal()).max()
j = 0
while j < dim and phaseOne is True:
a_max = A.diagonal()[j:].max()
a_min = A.diagonal()[j:].min()
if a_max < taubar * gamma or a_min < -mu * a_max:
phaseOne = False
break

# Pivot on maximum diagonal of remaining submatrix
i = j + np.argmax(A.diagonal()[j:])
if i != j:
# Switch rows and columns of i and j of A
permute(i, j)
if j < dim - 1 and (
(
A.diagonal()[j + 1 :] - A[j + 1 :, j] ** 2 / A.diagonal()[j]
).min()
< -mu * gamma
):
phaseOne = False  # go to phase two
else:
# perform jth iteration of factorization
pivot(j)
j += 1

# Phase two, A not positive-definite
if not phaseOne:
if j == dim - 1:
E[-1] = delta = -A[-1, -1] + max(
tau * (-A[-1, -1]) / (1 - tau), taubar * gamma
)
A[-1, -1] += delta
A[-1, -1] = np.sqrt(A[-1, -1])
else:
deltaPrev = 0.0
g = np.zeros(dim)
k = j - 1  # k = number of iterations performed in phase one
# Calculate lower Gerschgorin bounds of A[k+1]
for i in range(k + 1, dim):
g[i] = (
A[i, i]
- abs(A[i, k + 1 : i]).sum()
- abs(A[i + 1 : dim, i]).sum()
)
# Modified Cholesky Decomposition
for j in range(k + 1, dim - 2):
# Pivot on maximum lower Gerschgorin bound estimate
i = j + np.argmax(g[j:])
if i != j:
# Switch rows and columns of i and j of A
permute(i, j)
# Calculate E[j, j] and add to diagonal
norm_j = abs(A[j + 1 : dim, j]).sum()
E[j] = delta = max(
0, -A[j, j] + max(norm_j, taubar * gamma), deltaPrev
)
if delta > 0:
A[j, j] += delta
deltaPrev = delta  # deltaPrev will contain E_inf
# Update Gerschgorin bound estimates
if A[j, j] != norm_j:
temp = 1.0 - norm_j / A[j, j]
g[j + 1 :] += abs(A[j + 1 :, j]) * temp
# perform jth iteration of factorization
pivot(j)

# Final 2 by 2 submatrix
e = np.linalg.eigvalsh(A[-2:, -2:])
e.sort()
E[-2] = E[-1] = delta = max(
0,
-e[0] + max(tau * (e[1] - e[0]) / (1 - tau), taubar * gamma),
deltaPrev,
)
if delta > 0:
A[-2, -2] += delta
A[-1, -1] += delta
deltaPrev = delta
A[-2, -2] = np.sqrt(A[-2, -2])  # overwrites A[-2, -2]
A[-1, -2] = A[-1, -2] / A[-2, -2]  # overwrites A[-1, -2]
A[-2, -1] = A[-1, -2]
# overwrites A[-1, -1]
A[-1, -1] = np.sqrt(A[-1, -1] - A[-1, -2] * A[-1, -2])

return np.tril(A), np.diag(P @ E), P

[docs]def lineSearch(fct, x, f, g, d, alpha0=1.0, beta1=1.0e-4, beta2=0.99, lbd=2.0):
"""
Calculate a step along a direction that satisfies both Wolfe conditions

:param fct: object to calculate the objective function and its derivatives.
:type fct: optimization.functionToMinimize

:param x: current iterate.
:type x: numpy.array

:param f: value of the objective function at the current iterate.
:type f: float

:param g: value of the gradient at the current iterate.
:type g: numpy.array

:param d: descent direction.
:type d: numpy.array

:param alpha0: first step to test.
:type alpha0: float

:param beta1: parameter of the first Wolfe condition.
:type beta1: float

:param beta2: parameter of the second Wolfe condition.
:type beta2: float

:param lbd: expansion factor for a short step.
:type lbd: float

:return: a step verifing both Wolfe conditions
:rtype: float

:raises biogeme.exceptions.biogemeError: if lbd :math:\\leq 1
:raises biogeme.exceptions.biogemeError: if alpha0 :math:\\leq 0
:raises biogeme.exceptions.biogemeError: if beta1 :math:\\geq beta2
:raises biogeme.exceptions.biogemeError: if d is not a descent
direction

"""
if lbd <= 1:
raise excep.biogemeError(f'lambda is {lbd} and must be > 1')
if alpha0 <= 0:
raise excep.biogemeError(f'alpha0 is {alpha0} and must be > 0')
if beta1 >= beta2:
errorMsg = (
f'Incompatible Wolfe cond. parameters: '
f'beta1= {beta1} is greater than '
f'beta2={beta2}'
)
raise excep.biogemeError(errorMsg)

nfev = 1
deriv = np.inner(g, d)

if deriv >= 0:
raise excep.biogemeError(f'd is not a descent direction: {deriv} >= 0')

alpha = alpha0
alphal = 0
alphar = np.finfo(np.float128).max
finished = False
while not finished:
xnew = x + alpha * d
fct.setVariables(xnew)
fnew, gnew = fct.f_g()
nfev += 1
finished = True
# First Wolfe condition violated?
if fnew > f + alpha * beta1 * deriv:
alphar = alpha
alpha = (alphal + alphar) / 2.0
finished = False
elif np.inner(gnew, d) < beta2 * deriv:
alphal = alpha
if alphar == np.finfo(np.float128).max:
alpha = lbd * alpha
else:
alpha = (alphal + alphar) / 2.0
finished = False
return alpha, nfev

[docs]def relativeGradient(x, f, g, typx, typf):
"""Calculates the relative gradients.

It is typically used as stopping criterion.

:param x: current iterate.
:type x: numpy.array
:param f: value of f(x)
:type f: float
:param g: :math:\\nabla f(x), gradient of f at x
:type g: numpy.array
:param typx: typical value for x.
:type typx: numpy.array
:param typf: typical value for f.
:type typf: float

.. math:: \\max_{i=1,\\ldots,n}\\frac{(\\nabla f(x))_i
\\max(x_i,\\text{typx}_i)}
{\\max(|f(x)|, \\text{typf})}

:rtype: float
"""
[
g[i] * max(abs(x[i]), typx[i]) / max(abs(f), typf)
for i in range(len(x))
]
)
if np.isfinite(result):
return result

return np.finfo(float).max

[docs]def newtonLineSearch(
fct, x0, eps=np.finfo(np.float64).eps ** 0.3333, maxiter=100
):
"""
Newton method with inexact line search (Wolfe conditions)

:param fct: object to calculate the objective function and its derivatives.
:type fct: optimization.functionToMinimize

:param x0: starting point
:type x0: numpy.array

:param eps: the algorithm stops when this precision is reached.
Default: :math:\\varepsilon^{\\frac{1}{3}}
:type eps: float

:param maxiter: the algorithm stops if this number of iterations
is reached. Defaut: 100
:type maxiter: int

:return: x, messages

- x is the solution generated by the algorithm,
- messages is a dictionary describing information about the lagorithm

:rtype: numpay.array, dict(str:object)

"""

xk = x0
fct.setVariables(xk)
f, g, H = fct.f_g_h()
nfev = 1
ngev = 1
nhev = 1
typx = np.ones(np.asarray(xk).shape)
typf = max(np.abs(f), 1.0)
relgrad = relativeGradient(xk, f, g, typx, typf)
if relgrad <= eps:
message = f'Relative gradient = {relgrad:.3g} <= {eps:.2g}'
messages = {
'Algorithm': 'Unconstrained Newton with line search',
'Number of iterations': 0,
'Number of function evaluations': nfev,
'Number of gradient evaluations': ngev,
'Number of hessian evaluations': nhev,
'Cause of termination': message,
}
return xk, messages

k = 0
cont = True
while cont:
L, _, P = schnabelEskow(H)
y3 = -P.T @ g
y2 = la.solve_triangular(L, y3, lower=True)
y1 = la.solve_triangular(L.T, y2, lower=False)
d = P @ y1
alpha, nfls = lineSearch(fct, xk, f, g, d)
nfev += nfls
ngev += nfls
xk = xk + alpha * d
fct.setVariables(xk)
f, g, H = fct.f_g_h()
nfev += 1
ngev += 1
nhev += 1
k += 1
typf = max(np.abs(f), 1.0)
relgrad = relativeGradient(xk, f, g, typx, typf)
if relgrad <= eps:
message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}'
cont = False
if k == maxiter:
message = f'Maximum number of iterations reached: {maxiter}'
cont = False
logger.detailed(
)

messages = {
'Algorithm': 'Unconstrained Newton with line search',
'Number of iterations': k,
'Number of function evaluations': nfev,
'Number of gradient evaluations': ngev,
'Number of hessian evaluations': nhev,
'Cause of termination': message,
}

return xk, messages

[docs]def trustRegionIntersection(dc, d, delta):
"""Calculates the intersection with the boundary of the trust region.

Consider a trust region of radius :math:\\delta, centered at
:math:\\hat{x}. Let :math:x_c be in the trust region, and
:math:d_c = x_c - \\hat{x}, so that :math:\\|d_c\\| \\leq
\\delta. Let :math:x_d be out of the trust region, and
:math:d_d = x_d - \\hat{x}, so that :math:\\|d_d\\| \\geq
\\delta.  We calculate :math:\\lambda such that

.. math:: \\| d_c + \\lambda (d_d - d_c)\\| = \\delta

:param dc: xc - xhat.
:type dc: numpy.array
:param d: dd - dc.
:type d: numpy.array
:param delta: radius of the trust region.
:type delta: float

:return: :math:\\lambda such that :math:\\| d_c +
\\lambda (d_d - d_c)\\| = \\delta

:rtype: float

"""
a = np.inner(d, d)
b = 2 * np.inner(dc, d)
c = np.inner(dc, dc) - delta ** 2
discriminant = b * b - 4.0 * a * c
return (-b + np.sqrt(discriminant)) / (2 * a)

[docs]def cauchyNewtonDogleg(g, H):
"""Calculate the Cauchy, the Newton and the dogleg points.

The Cauchy point is defined as

.. math:: d_c = - \\frac{\\nabla f(x)^T \\nabla f(x)}
{\\nabla f(x)^T \\nabla^2 f(x)
\\nabla f(x)} \\nabla f(x)

The Newton point :math:d_n verifies Newton equation:

.. math:: H_s d_n = - \\nabla f(x)

where :math:H_s is a positive definite matrix generated with the
method by Schnabel and Eskow (1999)_.

The Dogleg point is

.. math:: d_d = \\eta d_n

where

.. math:: \\eta = 0.2 + 0.8 \\frac{\\alpha^2}{\\beta |\\nabla f(x)^T d_n|}

and :math:\\alpha= \\nabla f(x)^T \\nabla f(x),
:math:\\beta=\\nabla f(x)^T \\nabla^2 f(x)\\nabla f(x).

:param g: gradient :math:\\nabla f(x)

:type g: numpy.array

:param H: hessian :math:\\nabla^2 f(x)

:type H: numpy.array

:return: tuple with Cauchy point, Newton point, Dogleg point
:rtype: numpy.array, numpy.array, numpy.array

:raises biogeme.exceptions.biogemeError: if the quadratic model is
not convex.

"""
alpha = np.inner(g, g)
beta = np.inner(g, H @ g)
dc = -(alpha / beta) * g
L, E, P = schnabelEskow(H)
negativeEigenvalue = E < 0
if np.any(negativeEigenvalue):
print(E)
raise excep.biogemeError(
'The dogleg method requires a ' 'convex optimization problem.'
)

y3 = -P.T @ g
y2 = la.solve_triangular(L, y3, lower=True)
y1 = la.solve_triangular(L.T, y2, lower=False)
dn = P @ y1
eta = 0.2 + (0.8 * alpha * alpha / (beta * abs(np.inner(g, dn))))
return dc, dn, eta * dn

[docs]def dogleg(g, H, delta):
"""
Find an approximation of the trust region subproblem using
the dogleg method

:param g: gradient of the quadratic model.
:type g: numpy.array
:param H: hessian of the quadratic model.
:type H: numpy.array
:param delta: radius of the trust region.
:type delta: float

:return: d, diagnostic where

- d is an approximate solution of the trust region subproblem
- diagnostic is the nature of the solution:

* -2 if negative curvature along Newton direction
* -1 if negative curvature along Cauchy direction
(i.e. along the gradient)
* 1 if partial Cauchy step
* 2 if Newton step
* 3 if partial Newton step
* 4 if Dogleg

:rtype: numpy.array, int
"""

dc, dn, dl = cauchyNewtonDogleg(g, H)

# Check if the model is convex along the gradient direction

alpha = np.inner(g, g)
beta = np.inner(g, H @ g)
if beta <= 0:
dstar = -delta * g / np.sqrt(alpha)
return dstar, -1

# Compute the Cauchy point

normdc = alpha * np.sqrt(alpha) / beta
if normdc >= delta:
# The Cauchy point is outside the trust
# region. We move along the Cauchy
# direction until the border of the trust
# region.

dstar = (delta / normdc) * dc
return dstar, 1

# Compute Newton point

normdn = la.norm(dn)

# Check the convexity of the model along Newton direction

if np.inner(dn, H @ dn) <= 0.0:
# Return the Cauchy point
return dc, -2

if normdn <= delta:
# Newton point is inside the trust region
return dn, 2

# Compute the dogleg point

eta = 0.2 + (0.8 * alpha * alpha / (beta * abs(np.inner(g, dn))))

partieldn = eta * la.norm(dn)

if partieldn <= delta:
# Dogleg point is inside the trust region
dstar = (delta / normdn) * dn
return dstar, 3

# Between Cauchy and dogleg
nu = dl - dc
lbd = trustRegionIntersection(dc, nu, delta)
dstar = dc + lbd * nu
return dstar, 4

[docs]def truncatedConjugateGradient(g, H, delta):
"""Find an approximation of the trust region subproblem using the
truncated conjugate gradient method

:param g: gradient of the quadratic model.
:type g: numpy.array
:param H: hessian of the quadrartic model.
:type H: numpy.array
:param delta: radius of the trust region.
:type delta: float

:return: d, diagnostic, where

- d is the approximate solution of the trust region subproblem,
- diagnostic is the nature of the solution:

* 1 for convergence,
* 2 if out of the trust region,
* 3 if negative curvature detected.
* 4 if a numerical problem has been encountered

:rtype: numpy.array, int

"""
tol = 1.0e-6
n = len(g)
xk = np.zeros(n)
gk = g
dk = -gk
for _ in range(n):
try:
curv = np.inner(dk, H @ dk)
if curv <= 0:
# Negative curvature has been detected
diagnostic = 3
a = np.inner(dk, dk)
b = 2 * np.inner(xk, dk)
c = np.inner(xk, xk) - delta * delta
rho = b * b - 4 * a * c
step = xk + ((-b + np.sqrt(rho)) / (2 * a)) * dk
return step, diagnostic
alphak = -np.inner(dk, gk) / curv
xkp1 = xk + alphak * dk
if np.isnan(xkp1).any() or la.norm(xkp1) > delta:
# Out of the trust region
diagnostic = 2
a = np.inner(dk, dk)
b = 2 * np.inner(xk, dk)
c = np.inner(xk, xk) - delta * delta
rho = b * b - 4 * a * c
step = xk + ((-b + np.sqrt(rho)) / (2 * a)) * dk
return step, diagnostic
xk = xkp1
gkp1 = H @ xk + g
betak = np.inner(gkp1, gkp1) / np.inner(gk, gk)
dk = -gkp1 + betak * dk
gk = gkp1
if la.norm(gkp1) <= tol:
diagnostic = 1
step = xk
return step, diagnostic
except ValueError:
# Numerical problem
diagnostic = 4
a = np.inner(dk, dk)
b = 2 * np.inner(xk, dk)
c = np.inner(xk, xk) - delta * delta
rho = b * b - 4 * a * c
step = xk + ((-b + np.sqrt(rho)) / (2 * a)) * dk
return step, diagnostic
diagnostic = 1
step = xk
return step, diagnostic

[docs]def newtonTrustRegion(
fct,
x0,
delta0=1.0,
eps=np.finfo(np.float64).eps ** 0.3333,
dl=False,
maxiter=1000,
eta1=0.01,
eta2=0.9,
):
"""Newton method with trust region

:param fct: object to calculate the objective function and its derivatives.
:type fct: optimization.functionToMinimize

:param x0: starting point
:type x0: numpy.array

:param delta0: initial radius of the trust region. Default: 100.
:type delta0: float

:param eps: the algorithm stops when this precision is reached.
Default: :math:\\varepsilon^{\\frac{1}{3}}
:type eps: float

:param dl: If True, the Dogleg method is used to solve the
trut region subproblem. If False, the truncated conjugate
gradient is used. Default: False.
:type dl: bool

:param maxiter: the algorithm stops if this number of iterations
is reached. Default: 1000.

:type maxiter: int

:param eta1: threshold for failed iterations. Default: 0.01.
:type eta1: float

:param eta2: threshold for very successful iterations. Default 0.9.
:type eta2: float

:return: tuple x, messages, where

- x is the solution found,
- messages is a dictionary reporting various aspects
related to the run of the algorithm.

:rtype: numpy.array, dict(str:object)

"""

k = 0
xk = x0
fct.setVariables(xk)
f, g, H = fct.f_g_h()
nfev = 1
ngev = 1
nhev = 1
typx = np.ones(np.asarray(xk).shape)
typf = max(np.abs(f), 1.0)
relgrad = relativeGradient(xk, f, g, typx, typf)
if relgrad <= eps:
message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}'
messages = {
'Algorithm': 'Unconstrained Newton with trust region',
'Cause of termination': message,
'Number of iterations': 0,
'Number of function evaluations': nfev,
'Number of gradient evaluations': ngev,
'Number of hessian evaluations': nhev,
}
return xk, messages
delta = delta0
nfev = 0
cont = True
maxDelta = np.finfo(float).max
minDelta = np.finfo(float).eps
rho = 0.0
while cont:
k += 1
if dl:
step, _ = dogleg(g, H, delta)
else:
step, _ = truncatedConjugateGradient(g, H, delta)
xc = xk + step
fct.setVariables(xc)
# Calculate the value of the function
fc = fct.f()
nfev += 1
num = f - fc
denom = -np.inner(step, g) - 0.5 * np.inner(step, H @ step)
rho = num / denom
if rho < eta1:
# Failure: reduce the trust region
delta = la.norm(step) / 2.0
status = '-'
else:
# Candidate accepted
fc, gc, Hc = fct.f_g_h()
nfev += 1
ngev += 1
nhev += 1
xk = xc
f = fc
g = gc
H = Hc
if rho >= eta2:
# Enlarge the trust region
delta = min(2 * delta, maxDelta)
status = '++'
else:
status = '+'
relgrad = relativeGradient(xk, f, g, typx, typf)
if relgrad <= eps:
message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}'
cont = False
if delta <= minDelta:
message = f'Trust region is too small: {delta}'
cont = False
if k == maxiter:
message = f'Maximum number of iterations reached: {maxiter}'
cont = False
logger.detailed(
f'delta={delta:6.2g} '
f'rho={rho:6.2g} {status}'
)

messages = {
'Algorithm': 'Unconstrained Newton with trust region',
'Cause of termination': message,
'Number of iterations': k,
'Number of function evaluations': nfev,
'Number of gradient evaluations': ngev,
'Number of hessian evaluations': nhev,
}

return xk, messages

[docs]def bfgs(H, d, y):
"""Update the BFGS matrix. Formula (13.12) of Bierlaire (2015)_
where the method proposed by Powell (1977)_ is applied

.. _Bierlaire (2015): http://optimizationprinciplesalgorithms.com/
.. _Powell (1977): https://link.springer.com/content/pdf/10.1007/BFb0067703.pdf

:param H: current approximation of the inverse of the Hessian
:type H: numpy.array (2D)

:param d: difference between two consecutive iterates.
:type d: numpy.array (1D)

:param y: difference between two consecutive gradients.
:type y: numpy.array (1D)

:return: updated approximation of the inverse of the Hessian.
:rtype: numpy.array (2D)

"""
Hd = H @ d
dHd = np.inner(d, Hd)
denom = np.inner(d, y)
if denom >= 0.2 * dHd:
eta = y
else:
theta = 0.8 * dHd / (dHd - denom)
eta = theta * y + (1 - theta) * Hd

return H - np.outer(Hd, Hd) / dHd + np.outer(eta, eta) / np.inner(d, eta)

[docs]def inverseBfgs(Hinv, d, y):
"""Update the inverse BFGS matrix. Formula (13.13) of Bierlaire (2015)_

.. _Bierlaire (2015): http://optimizationprinciplesalgorithms.com/

:param Hinv: current approximation of the inverse of the Hessian
:type Hinv: numpy.array (2D)

:param d: difference between two consecutive iterates.
:type d: numpy.array (1D)

:param y: difference between two consecutive gradients.
:type y: numpy.array (1D)

:return: updated approximation of the inverse of the Hessian.
:rtype: numpy.array (2D)
"""
n = len(d)

denom = np.inner(d, y)
if denom <= 0.0:
logger.warning(f"Unable to perform BFGS update as d'y = {denom} <= 0")
return Hinv
dy = np.outer(d, y)
yd = np.outer(y, d)
dd = np.outer(d, d)
eye = np.identity(n)
return ((eye - (dy / denom)) @ Hinv @ (eye - (yd / denom))) + dd / denom

[docs]def bfgsLineSearch(
fct,
x0,
initBfgs=None,
eps=np.finfo(np.float64).eps ** 0.3333,
maxiter=1000,
):
"""BFGS method with inexact line search (Wolfe conditions)

:param fct: object to calculate the objective function and its derivatives.
:type fct: optimization.functionToMinimize

:param x0: starting point
:type x0: numpy.array

:param initBfgs: matrix used to initialize BFGS. If None, the
identity matrix is used. Default: None.
:type initBfgs: numpy.array

:param eps: the algorithm stops when this precision is reached.
Default: :math:\\varepsilon^{\\frac{1}{3}}
:type eps: float

:param maxiter: the algorithm stops if this number of iterations
is reached. Default: 1000
:type maxiter: int

:return: tuple x, messages, where

- x is the solution found,

- messages is a dictionary reporting various aspects
related to the run of the algorithm.

:rtype: numpy.array, dict(str:object)

:raises biogeme.exceptions.biogemeError: if the dimensions of the
matrix initBfgs do not match the length of x0.

"""

n = len(x0)
xk = x0
fct.setVariables(xk)
f, g = fct.f_g()
nfev = 1
ngev = 1
if initBfgs is None:
Hinv = np.identity(n)
else:
if initBfgs.shape != (n, n):
errorMsg = (
f'BFGS must be initialized with a {n}x{n} '
f'matrix and not a {initBfgs.shape[0]}'
'x{initBfgs.shape[1]} matrix.'
)
raise excep.biogemeError(errorMsg)
Hinv = initBfgs
typx = np.ones(np.asarray(xk).shape)
typf = max(np.abs(f), 1.0)
relgrad = relativeGradient(xk, f, g, typx, typf)
if relgrad <= eps:
message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}'
messages = {
'Algorithm': 'Inverse BFGS with line search',
'Cause of termination': message,
'Number of iterations': 0,
'Number of function evaluations': nfev,
'Number of gradient evaluations': ngev,
}
return xk, messages
k = 0
nfev = 0
cont = True
while cont:
d = -Hinv @ g
alpha, nfls = lineSearch(fct, xk, f, g, d)
nfev += nfls
delta = alpha * d
xk = xk + delta
gprev = g
fct.setVariables(xk)
f, g = fct.f_g()
nfev += 1
ngev += 1
Hinv = inverseBfgs(Hinv, delta, g - gprev)
nfev += 1
k += 1
relgrad = relativeGradient(xk, f, g, typx, typf)
if relgrad <= eps:
message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}'
cont = False
if k == maxiter:
message = f'Maximum number of iterations reached: {maxiter}'
cont = False
logger.detailed(
)
messages = {
'Algorithm': 'Inverse BFGS with line search',
'Cause of termination': message,
'Number of iterations': k,
'Number of function evaluations': nfev,
'Number of gradient evaluations': ngev,
}

return xk, messages

[docs]def bfgsTrustRegion(
fct,
x0,
initBfgs=None,
delta0=1.0,
eps=np.finfo(np.float64).eps ** 0.3333,
dl=False,
maxiter=1000,
eta1=0.01,
eta2=0.9,
):
"""BFGS method with trust region

:param fct: object to calculate the objective function and its derivatives.
:type fct: optimization.functionToMinimize

:param x0: starting point
:type x0: numpy.array

:param initBfgs: matrix used to initialize BFGS. If None, the
identity matrix is used. Default: None.
:type initBfgs: numpy.array

:param delta0: initial radius of the trust region. Default: 100.
:type delta0: float

:param eps: the algorithm stops when this precision is reached.
Default: :math:\\varepsilon^{\\frac{1}{3}}
:type eps: float

:param dl: If True, the Dogleg method is used to solve the
trut region subproblem. If False, the truncated conjugate
gradient is used. Default: False.
:type dl: bool

:param maxiter: the algorithm stops if this number of iterations
is reached. Default: 1000.
:type maxiter: int

:param eta1: threshold for failed iterations. Default: 0.01.
:type eta1: float

:param eta2: threshold for very successful iterations. Default 0.9.
:type eta2: float

:return: tuple x, messages, where

- x is the solution found,

- messages is a dictionary reporting various aspects
related to the run of the algorithm.

:rtype: numpy.array, dict(str:object)

:raises biogeme.exceptions.biogemeError: if the dimensions of the
matrix initBfgs do not match the length of x0.

"""
k = 0
xk = x0
n = len(x0)
fct.setVariables(xk)
f, g = fct.f_g()
nfev = 1
ngev = 1
if initBfgs is None:
H = np.identity(n)
else:
if initBfgs.shape != (n, n):
errorMsg = (
f'BFGS must be initialized with a {n}x{n} '
f'matrix and not a {initBfgs.shape[0]}'
f'x{initBfgs.shape[1]} matrix.'
)
raise excep.biogemeError(errorMsg)
H = initBfgs
typx = np.ones(np.asarray(xk).shape)
typf = max(np.abs(f), 1.0)
relgrad = relativeGradient(xk, f, g, typx, typf)
if relgrad <= eps:
message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}'
messages = {
'Algorithm': 'BFGS with trust region',
'Cause of termination': message,
'Number of iterations': 0,
'Number of function evaluations': nfev,
'Number of gradient evaluations': ngev,
}
return xk, messages
delta = delta0
cont = True
maxDelta = np.finfo(float).max
minDelta = np.finfo(float).eps
rho = 0.0
while cont:
k += 1
if dl:
step, _ = dogleg(g, H, delta)
else:
step, _ = truncatedConjugateGradient(g, H, delta)
xc = xk + step
fct.setVariables(xc)
# Calculate the value of the function
fc = fct.f()
nfev += 1
if fc >= f:
delta = la.norm(step) / 2.0
status = '-'
else:
num = f - fc
denom = -np.inner(step, g) - 0.5 * np.inner(step, H @ step)
rho = num / denom
if rho < eta1:
# Failure: reduce the trust region
delta = la.norm(step) / 2.0
status = '-'
else:
# Candidate accepted
fc, gc = fct.f_g()
nfev += 1
ngev += 1
d = xc - xk
y = gc - g
xk = xc
f = fc
g = gc
H = bfgs(H, d, y)
if rho >= eta2:
# Enlarge the trust region
delta = min(2 * delta, maxDelta)
status = '++'
else:
status = '+'
relgrad = relativeGradient(xk, f, g, typx, typf)
if relgrad <= eps:
message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}'
cont = False
if delta <= minDelta:
message = f'Trust region is too small: {delta}'
cont = False
if k == maxiter:
message = f'Maximum number of iterations reached: {maxiter}'
cont = False
logger.detailed(
f' delta={delta:6.2g} '
f'rho={rho:6.2g} {status}'
)

messages = {
'Algorithm': 'BFGS with trust region',
'Cause of termination': message,
'Number of iterations': k,
'Number of function evaluations': nfev,
'Number of gradient evaluations': ngev,
}

return xk, messages

xk,
gk,
Hk,
delta,
bounds,
infeasibleIterate=False,
tol=np.finfo(np.float64).eps ** 0.3333,
):
"""Find an approximation of the solution of the trust region
subproblem using the truncated conjugate gradient method within
the subspace of free variables. Free variables are those
corresponding to inactive constraints at the generalized Cauchy
point.

:param xk: current iterate.
:type xk: numpy.array

:param gk: gradient of the quadratic model.
:type gk: numpy.array

:param Hk: hessian of the quadrartic model.
:type Hk: numpy.array

:param delta: radius of the trust region.
:type delta: float

:param bounds: bounds on the variables.
:type bounds: class bioBounds

:param infeasibleIterate: if True, the algorithm may continue
until termination.  The result will then
be projected on the feasible domain.  If
False, the algorithm stops as soon as an
infeasible iterate is generated.
Default: False.

:type infeasibleIterate: bool

:param tol: tolerance used for stopping criterion.
:type tol: float

:return: d, diagnostic, where

- d is the approximate solution of the trust region subproblem,
- diagnostic is the nature of the solution:

* 1 for convergence,
* 2 if out of the trust region,
* 3 if negative curvature detected.
* 4 if a numerical problem has been encountered

:rtype: numpy.array, int

:raises biogeme.exceptions.biogemeError: if the dimensions are inconsistent

"""

if np.isnan(xk).any():
raise excep.biogemeError(f'Invalid xk: {xk}')

if np.isnan(gk).any():
raise excep.biogemeError(f'Invalid gk: {gk}')

if np.isnan(Hk).any():
raise excep.biogemeError(f'Invalid Hk: {Hk}')

# First, we calculate the intersection between the trust region on
# the bounds. The trust region is also a bound constraint (based
# on infinity norm) of radius 'delta', centered at xk.

intersection = bounds.intersectionWithTrustRegion(xk, delta)

# Then, we calculate the generalized Cauchy point
gcp = intersection.generalizedCauchyPoint(xk, gk, Hk, -gk)

x = gcp
r = -gk - Hk @ (x - xk)

etak = tol

activityStatus = intersection.activity(gcp)
freeVariables = activityStatus == 0

if not freeVariables.any():
return gcp, 1

xbar = x[freeVariables]
rbar = r[freeVariables]

# Extract the  bounds for the free variables
boundsBar = intersection.subspace(freeVariables)
Bbar = Hk[freeVariables][:, freeVariables]
pbar = np.zeros_like(rbar)
rho1 = 1
rho2 = np.inner(rbar, rbar)

while rho2 >= etak * etak:
try:
beta = rho2 / rho1
pbar = rbar + beta * pbar
ybar = Bbar @ pbar

if boundsBar.feasible(xbar):
feasible = True
alpha1, _ = boundsBar.maximumStep(xbar, pbar)
else:
feasible = False

if np.inner(pbar, ybar) <= 0:
# Negative curvature has been detected.
if feasible:
x[freeVariables] = xbar + alpha1 * pbar
else:
x[freeVariables] = xbar
return bounds.project(x), 3

alpha2 = rho2 / np.inner(pbar, ybar)
if not infeasibleIterate and alpha2 > alpha1:
# Infeasible iterate
x[freeVariables] = xbar + alpha1 * pbar
return x, 2

xbar = xbar + alpha2 * pbar
rbar = rbar - alpha2 * ybar
rho1 = rho2
rho2 = np.inner(rbar, rbar)

except ValueError:
# Numerical problem detected. Return the current value of x
logger.warning('Numerical problem in conjugate gradient algorithm')
x[freeVariables] = xbar
if infeasibleIterate:
return bounds.project(x), 4
return x, 4

x[freeVariables] = xbar
if infeasibleIterate:
return bounds.project(x), 4
return x, 1

[docs]def simpleBoundsNewtonAlgorithm(
fct,
bounds,
x0,
proportionTrueHessian=1.0,
delta0=1.0,
tol=np.finfo(np.float64).eps ** 0.3333,
steptol=1.0e-5,
cgtol=np.finfo(np.float64).eps ** 0.3333,
maxiter=1000,
eta1=0.01,
eta2=0.9,
enlargingFactor=10,
):
"""Trust region algorithm for problems with simple bounds

:param fct: object to calculate the objective function and its derivatives.
:type fct: optimization.functionToMinimize

:param bounds: bounds on the variables
:type bounds: class bounds

:param x0: starting point
:type x0: numpy.array

:param proportionTrueHessian: proportion of the iterations where
the true hessian is calculated. When
not, the BFGS update is used. If
1.0, it is used for all
iterations. If 0.0, it is not used
at all.
:type proportionTrueHessian: float

:param infeasibleConjugateGradient: if True, the conjugate
gradient algorithm may generate until
termination.  The result will then be
projected on the feasible domain.  If
False, the algorithm stops as soon as an
infeasible iterate is generated.
Default: False.

:param delta0: initial radius of the trust region. Default: 100.
:type delta0: float

:param tol: the algorithm stops when this precision is reached.
Default: :math:\\varepsilon^{\\frac{1}{3}}
:type tol: float

:param steptol: the algorithm stops when the relative change in x
is below this threshold. Basically, if p significant
digits of x are needed, steptol should be set to
1.0e-p.  Default: :math:10^{-5}

:type steptol: float

:param cgtol: the conjugate gradient algorithm stops when this
precision is reached.  Default:
:math:\\varepsilon^{\\frac{1}{3}}

:type cgtol: float

:param maxiter: the algorithm stops if this number of iterations
is reached. Default: 1000.

:type maxiter: int

:param eta1: threshold for failed iterations. Default: 0.01.
:type eta1: float

:param eta2: threshold for very successful iterations. Default 0.9.
:type eta2: float

:param enlargingFactor: if an iteration is very successful, the
radius of the trust region is multiplied
by this factor. Default 10.
:type enlargingFactor: float

:return: x, messages

- x is the solution generated by the algorithm,
- messages is a dictionary describing information about the lagorithm

:rtype: numpay.array, dict(str:object)

:raises biogeme.exceptions.biogemeError: if the dimensions of the
matrix initBfgs do not match the length of x0.

"""
if len(x0) != bounds.n:
raise excep.biogemeError(
f'Incompatible size:' f' {len(x0)} and {len(bounds)}'
)

if not bounds.feasible(x0):
logger.warning(
'Initial point not feasible. '
'It will be projected onto the feasible domain.'
)

numberOfTrueHessian = 0
numberOfMatrices = 0

if proportionTrueHessian == 1.0:
algo = 'Newton with trust region for simple bound constraints'
elif proportionTrueHessian == 0.0:
algo = 'BFGS with trust region for simple bound constraints'
else:
algo = (
f'Hybrid Newton [{100*proportionTrueHessian}%] with trust '
f'region for simple bound constraints'
)

k = 0
xk = bounds.project(x0)
fct.setVariables(xk)
nfev = 0
ngev = 0
nhev = 0
if proportionTrueHessian > 0:
f, g, H = fct.f_g_h()
nfev += 1
ngev += 1
nhev += 1

numberOfTrueHessian += 1
numberOfMatrices += 1
# If there is a numerical problem with the Hessian, we use BFGS instead
if np.isnan(H).any() or np.linalg.norm(H) > 1.0e100:
logger.warning(
f'Numerical problem with the second derivative'
f' matrix at the starting point. Norm = '
f'{np.linalg.norm(H)}. Replaced by the '
f'identity matrix.'
)
H = np.eye(len(xk))
else:
f, g = fct.f_g()
nfev += 1
ngev += 1
H = np.eye(len(xk))
numberOfMatrices += 1

projectedGradient = bounds.project(xk - g) - xk
typx = np.ones(np.asarray(xk).shape)
typf = max(np.abs(f), 1.0)
relchange = None
if relgrad <= tol:
message = f'Relative gradient = {relgrad:.2g} <= {tol:.2g}'
messages = {
'Algorithm': algo,
'Number of iterations': 0,
'Number of function evaluations': nfev,
'Number of gradient evaluations': ngev,
'Number of hessian evaluations': nhev,
'Cause of termination': message,
}
return xk, messages

delta = delta0
cont = True
maxDelta = np.finfo(float).max
minDelta = np.finfo(float).eps
rho = 0.0
while cont:
k += 1

# Solve the quandratic problem in the subspace defined by the GCP

xc, _ = truncatedConjugateGradientSubspace(
xk, g, H, delta, bounds, infeasibleConjugateGradient, cgtol
)

if np.isnan(xc).any():
delta = delta / 2.0
status = '-'
else:
fct.setVariables(xc)
fc = fct.f()
nfev += 1

num = f - fc
step = xc - xk
denom = -np.inner(step, g) - 0.5 * np.inner(step, H @ step)
rho = num / denom

if rho < eta1:
# Failure: reduce the trust region
delta = min(delta / 2.0, la.norm(step, np.inf) / 2.0)
status = '-'
else:
# Candidate accepted
if (
proportionTrueHessian > 0
and float(numberOfTrueHessian) / float(numberOfMatrices)
<= proportionTrueHessian
):

try:
fc, gc, Hc = fct.f_g_h()
nfev += 1
ngev += 1
nhev += 1
numberOfTrueHessian += 1
numberOfMatrices += 1
# If there is a numerical problem with the
# Hessian, we apply BFGS
if np.linalg.norm(Hc) > 1.0e100:
logger.warning(
f'Numerical problem with the second '
f'derivative matrix. '
f'Norm = {np.linalg.norm(Hc)}. '
f'Replaced by the identity matrix.'
)
y = gc - g
Hc = bfgs(H, step, y)
fghCalculated = True
except excep.biogemeError:
# Failure: reduce the trust region
delta = min(delta / 2.0, la.norm(step, np.inf) / 2.0)
status = '-'
fghCalculated = False
else:
try:
fc, gc = fct.f_g()
nfev += 1
ngev += 1
y = gc - g
Hc = bfgs(H, step, y)
numberOfMatrices += 1
fghCalculated = True
except excep.biogemeError:
# Failure: reduce the trust region
delta = min(delta / 2.0, la.norm(step, np.inf) / 2.0)
status = '-'
fghCalculated = False
if fghCalculated:
nfev += 1
xpred = xk
xk = xc
f = fc
g = gc
H = Hc
if rho >= eta2:
# Enlarge the trust region
delta = min(enlargingFactor * delta, maxDelta)
status = '++'
else:
status = '+'

projectedGradient = bounds.project(xk - g) - xk
xk, f, projectedGradient, typx, typf
)
if relgrad <= tol:
message = (
)
cont = False
relchange = relativeChange(xk, xpred, typx)
if relchange <= steptol:
message = (
f'Relative change = '
f'{relchange:.3g} <= {steptol:.2g}'
)
cont = False
if delta <= minDelta:
message = f'Trust region is too small: {delta}'
cont = False
if k == maxiter:
message = f'Maximum number of iterations reached: {maxiter}'
cont = False
if relchange is None:
logger.detailed(
f'{k} f={f:10.7g} projected '
f'delta={delta:6.2g} rho={rho:6.2g} {status}'
)
else:
logger.detailed(
f'{k} f={f:10.7g} projected '
f'rel. change={relchange:6.2g} '
f'delta={delta:6.2g} rho={rho:6.2g} {status}'
)
if numberOfMatrices != 0:
actualProp = 100 * float(numberOfTrueHessian) / float(numberOfMatrices)
else:
actualProp = 0
m = f'Proportion of Hessian calculation: ' f'{actualProp}%'
logger.detailed(m)

messages = {
'Algorithm': algo,
'Proportion analytical hessian': f'{actualProp:.1f}%',
'Relative change': relchange,
'Number of iterations': k,
'Number of function evaluations': nfev,
'Number of gradient evaluations': ngev,
'Number of hessian evaluations': nhev,
'Cause of termination': message,
}

return xk, messages

[docs]def relativeChange(x, xpred, typx):
"""Calculates the relative change.

It is typically used as stopping criterion.

:param x: current iterate.
:type x: numpy.array

:param xpred: previous iterate.
:type xpred: numpy.array

:param typx: typical value for x.
:type typx: numpy.array

:return: relative change:

.. math:: \\max_i \\frac{|(x_k)_i - (x_{k-1})_i|}{\\max(|(x_k)_i|,
\\text{typx}_i)}

:rtype: float

"""

relx = np.array(
[abs(x[i] - xpred[i]) / max(abs(x[i]), typx[i]) for i in range(len(x))]
)
result = relx.max()
if np.isfinite(result):
return result

return np.finfo(float).max