Source code for biogeme.results_processing.recycle_pickle

"""
Recycle old pickle files and transform them into YAML files

Michel Bierlaire
Wed Oct 2 06:43:33 2024
"""

import pickle
from datetime import timedelta
from typing import Any

import numpy as np

from .raw_estimation_results import RawEstimationResults, serialize_to_yaml
from ..exceptions import BiogemeError


[docs] class RawResults: """Class containing the raw results from the estimation""" def __init__( self, ): """ Constructor """ self.modelName = None self.userNotes = None self.nparam = None self.betaValues = None self.betaNames = None self.initLogLike = None self.nullLogLike = None self.betas = None self.logLike = None self.g = None self.H = None self.bhhh = None self.dataname = None self.sampleSize = None self.numberOfObservations = None self.monte_carlo = None self.numberOfDraws = None self.typesOfDraws = None self.excludedData = None self.drawsProcessingTime = None self.gradientNorm = None self.optimizationMessages = None self.convergence = None self.htmlFileName = None self.F12FileName = None self.latexFileName = None self.pickleFileName = None self.bootstrap = None self.bootstrap_time = None self.secondOrderTable = None
[docs] class Beta: """Class gathering the information related to the parameters of the model """ def __init__(self): """ Constructor """ self.name = None self.value = None self.lb = None self.ub = None self.stdErr = None self.tTest = None self.pValue = None self.robust_stdErr = None self.robust_tTest = None self.robust_pValue = None self.bootstrap_stdErr = None self.bootstrap_tTest = None self.bootstrap_pValue = None
[docs] class BiogemeUnpickler(pickle.Unpickler):
[docs] def find_class(self, module, name): # If the module and class match the missing class, return the fake class if module == "biogeme.results" and name == "beta": return Beta if module == 'biogeme.results' and name == 'rawResults': return RawResults # Otherwise, proceed as normal return super().find_class(module, name)
[docs] def read_pickle_biogeme_3_2_14(filename: str) -> RawEstimationResults: """ :param filename: name of the pickle file :return: raw estimation results """ with open(file=filename, mode='br') as file: pickled_results = pickle.load(file=file) model_name: str = pickled_results.modelName user_notes: str = pickled_results.userNotes beta_names: list[str] = [beta.name for beta in pickled_results.betas] beta_values: list[float] = [beta.value for beta in pickled_results.betas] lower_bounds: list[float] = [beta.lb for beta in pickled_results.betas] upper_bounds: list[float] = [beta.ub for beta in pickled_results.betas] gradient: list[float] = pickled_results.g.tolist() hessian: list[list[float]] = pickled_results.H.tolist() bhhh: list[list[float]] = pickled_results.bhhh.tolist() null_log_likelihood: float = pickled_results.nullLogLike initial_log_likelihood: float = pickled_results.initLogLike final_log_likelihood: float = pickled_results.logLike data_name: str = pickled_results.dataname sample_size: int = pickled_results.sampleSize number_of_observations: int = pickled_results.numberOfObservations monte_carlo: bool = pickled_results.monte_carlo number_of_draws: int = pickled_results.numberOfDraws types_of_draws: dict[str, str] = pickled_results.typesOfDraws number_of_excluded_data: int = pickled_results.excludedData draws_processing_time: timedelta = pickled_results.drawsProcessingTime optimization_messages: dict[str, Any] = pickled_results.optimizationMessages for key, value in optimization_messages.items(): if isinstance(value, np.ndarray): # Check if the value is a numpy array optimization_messages[key] = value.tolist() convergence: bool = pickled_results.convergence bootstrap: list[list[float]] = pickled_results.bootstrap.tolist() try: bootstrap_time: timedelta | None = pickled_results.bootstrap_time except AttributeError: bootstrap_time = None raw_estimation_results = RawEstimationResults( model_name=model_name, user_notes=user_notes, beta_names=beta_names, beta_values=beta_values, lower_bounds=lower_bounds, upper_bounds=upper_bounds, gradient=gradient, hessian=hessian, bhhh=bhhh, null_log_likelihood=null_log_likelihood, initial_log_likelihood=initial_log_likelihood, final_log_likelihood=final_log_likelihood, data_name=data_name, sample_size=sample_size, number_of_observations=number_of_observations, monte_carlo=monte_carlo, number_of_draws=number_of_draws, types_of_draws=types_of_draws, number_of_excluded_data=number_of_excluded_data, draws_processing_time=draws_processing_time, optimization_messages=optimization_messages, convergence=convergence, bootstrap=bootstrap, bootstrap_time=bootstrap_time, ) return raw_estimation_results
[docs] def read_pickle_biogeme_3_2_13(filename: str) -> RawEstimationResults: """ :param filename: name of the pickle file :return: raw estimation results """ with open(file=filename, mode='br') as file: pickled_results = BiogemeUnpickler(file=file).load() model_name: str = pickled_results.modelName user_notes: str = pickled_results.userNotes beta_names: list[str] = [beta.name for beta in pickled_results.betas] beta_values: list[float] = [beta.value for beta in pickled_results.betas] lower_bounds: list[float] = [beta.lb for beta in pickled_results.betas] upper_bounds: list[float] = [beta.ub for beta in pickled_results.betas] gradient: list[float] = pickled_results.g.tolist() hessian: list[list[float]] = pickled_results.H.tolist() bhhh: list[list[float]] = pickled_results.bhhh.tolist() null_log_likelihood: float = pickled_results.nullLogLike initial_log_likelihood: float = pickled_results.initLogLike final_log_likelihood: float = pickled_results.logLike data_name: str = pickled_results.dataname sample_size: int = pickled_results.sampleSize number_of_observations: int = pickled_results.numberOfObservations monte_carlo: bool = pickled_results.monteCarlo number_of_draws: int = pickled_results.numberOfDraws types_of_draws: dict[str, str] = pickled_results.typesOfDraws number_of_excluded_data: int = pickled_results.excludedData draws_processing_time: timedelta = pickled_results.drawsProcessingTime optimization_messages: dict[str, Any] = pickled_results.optimizationMessages for key, value in optimization_messages.items(): if isinstance(value, np.ndarray): # Check if the value is a numpy array optimization_messages[key] = value.tolist() convergence: bool = pickled_results.convergence bootstrap: list[list[float]] = pickled_results.bootstrap.tolist() try: bootstrap_time: timedelta | None = pickled_results.bootstrap_time except AttributeError: bootstrap_time = None raw_estimation_results = RawEstimationResults( model_name=model_name, user_notes=user_notes, beta_names=beta_names, beta_values=beta_values, lower_bounds=lower_bounds, upper_bounds=upper_bounds, gradient=gradient, hessian=hessian, bhhh=bhhh, null_log_likelihood=null_log_likelihood, initial_log_likelihood=initial_log_likelihood, final_log_likelihood=final_log_likelihood, data_name=data_name, sample_size=sample_size, number_of_observations=number_of_observations, monte_carlo=monte_carlo, number_of_draws=number_of_draws, types_of_draws=types_of_draws, number_of_excluded_data=number_of_excluded_data, draws_processing_time=draws_processing_time, optimization_messages=optimization_messages, convergence=convergence, bootstrap=bootstrap, bootstrap_time=bootstrap_time, ) return raw_estimation_results
[docs] def read_pickle_biogeme_3_2_12(filename: str) -> RawEstimationResults: """ :param filename: name of the pickle file :return: raw estimation results """ with open(file=filename, mode='br') as file: pickled_results = BiogemeUnpickler(file=file).load() model_name: str = pickled_results.modelName user_notes: str = pickled_results.userNotes beta_names: list[str] = [beta.name for beta in pickled_results.betas] beta_values: list[float] = [beta.value for beta in pickled_results.betas] lower_bounds: list[float] = [beta.lb for beta in pickled_results.betas] upper_bounds: list[float] = [beta.ub for beta in pickled_results.betas] gradient: list[float] = pickled_results.g.tolist() hessian: list[list[float]] = pickled_results.H.tolist() bhhh: list[list[float]] = pickled_results.bhhh.tolist() null_log_likelihood: float = pickled_results.nullLogLike initial_log_likelihood: float = pickled_results.initLogLike final_log_likelihood: float = pickled_results.logLike data_name: str = pickled_results.dataname sample_size: int = pickled_results.sampleSize number_of_observations: int = pickled_results.numberOfObservations monte_carlo: bool = pickled_results.monteCarlo number_of_draws: int = pickled_results.numberOfDraws types_of_draws: dict[str, str] = pickled_results.typesOfDraws number_of_excluded_data: int = pickled_results.excludedData draws_processing_time: timedelta = pickled_results.drawsProcessingTime optimization_messages: dict[str, Any] = pickled_results.optimizationMessages for key, value in optimization_messages.items(): print(f'{key} {type(value)}') if isinstance( value, (np.ndarray, np.float64, np.float32) ): # Check if the value is a numpy array optimization_messages[key] = value.tolist() convergence: bool = True bootstrap: list[list[float]] = ( pickled_results.bootstrap.tolist() if pickled_results.bootstrap is not None else None ) try: bootstrap_time: timedelta | None = pickled_results.bootstrap_time except AttributeError: bootstrap_time = None raw_estimation_results = RawEstimationResults( model_name=model_name, user_notes=user_notes, beta_names=beta_names, beta_values=beta_values, lower_bounds=lower_bounds, upper_bounds=upper_bounds, gradient=gradient, hessian=hessian, bhhh=bhhh, null_log_likelihood=null_log_likelihood, initial_log_likelihood=initial_log_likelihood, final_log_likelihood=final_log_likelihood, data_name=data_name, sample_size=sample_size, number_of_observations=number_of_observations, monte_carlo=monte_carlo, number_of_draws=number_of_draws, types_of_draws=types_of_draws, number_of_excluded_data=number_of_excluded_data, draws_processing_time=draws_processing_time, optimization_messages=optimization_messages, convergence=convergence, bootstrap=bootstrap, bootstrap_time=bootstrap_time, ) return raw_estimation_results
[docs] def read_pickle_biogeme_3_2_11(filename: str) -> RawEstimationResults: """ :param filename: name of the pickle file :return: raw estimation results """ return read_pickle_biogeme_3_2_12(filename=filename)
[docs] def read_pickle_biogeme_3_2_10(filename: str) -> RawEstimationResults: """ :param filename: name of the pickle file :return: raw estimation results """ return read_pickle_biogeme_3_2_12(filename=filename)
[docs] def read_pickle_biogeme_3_2_8(filename: str) -> RawEstimationResults: """ :param filename: name of the pickle file :return: raw estimation results """ return read_pickle_biogeme_3_2_12(filename=filename)
[docs] def read_pickle_biogeme_3_2_7(filename: str) -> RawEstimationResults: """ :param filename: name of the pickle file :return: raw estimation results """ return read_pickle_biogeme_3_2_12(filename=filename)
[docs] def read_pickle_biogeme(filename: str) -> RawEstimationResults: """Read an old pickle file, when the version of Biogeme used to create it is unknown :param filename: name of the pickle file :return: raw estimation results """ try: results = read_pickle_biogeme_3_2_14(filename=filename) return results except AttributeError: ... try: results = read_pickle_biogeme_3_2_13(filename=filename) return results except AttributeError: ... try: results = read_pickle_biogeme_3_2_12(filename=filename) return results except AttributeError: ... try: results = read_pickle_biogeme_3_2_11(filename=filename) return results except AttributeError: ... try: results = read_pickle_biogeme_3_2_10(filename=filename) return results except AttributeError: ... try: results = read_pickle_biogeme_3_2_8(filename=filename) return results except AttributeError: ... try: results = read_pickle_biogeme_3_2_7(filename=filename) return results except AttributeError: ... error_msg = f'It was not possible to identify the format of the file {filename}' raise BiogemeError(error_msg)
[docs] def pickle_to_yaml(pickle_filename: str, yaml_filename: str) -> None: """ Transforms a pickle file into a YAML format with the estimation results. :param pickle_filename: name of the input file :param yaml_filename: name of the output file """ results = read_pickle_biogeme(filename=pickle_filename) serialize_to_yaml(data=results, filename=yaml_filename)