"""MDCEV UtilitiesThis module provides helper functions for preparing and transformingdata specifically for MDCEV (Multiple Discrete Continuous Extreme Value) models.Michel BierlaireWed Mar 26 19:43:21 2025"""fromtypingimportIterableimportpandasaspdfrombiogeme.databaseimportDatabase
[docs]defmdcev_count(df:pd.DataFrame,list_of_columns:list[str],new_column:str)->pd.DataFrame:""" Computes the number of non-zero entries across specified columns, corresponding to the number of goods consumed in MDCEV. :param df: DataFrame containing the MDCEV data. :param list_of_columns: Columns representing quantities of each good. :param new_column: Name of the output column to store the count. :return: Modified DataFrame with the count column added. """df[new_column]=df[list_of_columns].apply(lambdarow:(row!=0).sum(),axis=1)returndf
[docs]defmdcev_row_split(df:pd.DataFrame,a_range:Iterable[int]|None=None)->list[Database]:""" Splits a DataFrame into a list of Database objects, one for each row, useful for row-level MDCEV processing. :param df: DataFrame to split. :param a_range: Optional subset of row indices to extract. :return: List of Database objects. """ifa_rangeisNone:a_range=range(len(df))else:max_index=len(df)-1ifany(i<0ori>max_indexforiina_range):raiseIndexError("One or more indices in a_range are out of bounds.")return[Database(name=f'row_{i}',pandas_database=df.iloc[[i]])foriina_range]