Source code for biogeme.data.optima

"""
.. _optima_data:

Data preparation for Optima
===========================

Prepare data for the Optima case study.

:author: Michel Bierlaire
:date: Wed Apr 12 20:52:37 2023

"""

import os

import pandas as pd
import biogeme.database as db
from biogeme.expressions import Variable

# Get the directory of the current file
module_dir = os.path.dirname(__file__)
# Construct the path to the data file
data_file_path = os.path.join(module_dir, 'data', 'optima.dat')


# %%
# Read the data
[docs] def read_data() -> db.Database: """Read the data from file""" df = pd.read_csv(data_file_path, sep='\t') # Exclude observations such that the chosen alternative is -1 df.drop(df[df['Choice'] == -1].index, inplace=True) # Normalize the weights sum_weight = df['Weight'].sum() number_of_rows = df.shape[0] df['normalized_weight'] = df['Weight'] * number_of_rows / sum_weight database = db.Database(name=data_file_path, pandas_database=df) _ = database.define_variable('ScaledIncome', CalculatedIncome / 1000) _ = database.define_variable('age_65_more', age >= 65) _ = database.define_variable('moreThanOneCar', NbCar > 1) _ = database.define_variable('moreThanOneBike', NbBicy > 1) _ = database.define_variable('individualHouse', HouseType == 1) _ = database.define_variable('male', Gender == 1) _ = database.define_variable( 'haveChildren', ((FamilSitu == 3) + (FamilSitu == 4)) > 0 ) _ = database.define_variable('haveGA', GenAbST == 1) _ = database.define_variable('highEducation', Education >= 6) _ = database.define_variable( 'childCenter', ((ResidChild == 1) + (ResidChild == 2)) > 0 ) _ = database.define_variable( 'childSuburb', ((ResidChild == 3) + (ResidChild == 4)) > 0 ) _ = database.define_variable('TimePT_scaled', TimePT / 200) _ = database.define_variable('TimeCar_scaled', TimeCar / 200) _ = database.define_variable('MarginalCostPT_scaled', MarginalCostPT / 10) _ = database.define_variable('CostCarCHF_scaled', CostCarCHF / 10) _ = database.define_variable('distance_km_scaled', distance_km / 5) _ = database.define_variable('PurpHWH', TripPurpose == 1) _ = database.define_variable('PurpOther', TripPurpose != 1) return database
# %% # Variables from the data Choice = Variable('Choice') TimePT = Variable('TimePT') TimeCar = Variable('TimeCar') MarginalCostPT = Variable('MarginalCostPT') CostCarCHF = Variable('CostCarCHF') distance_km = Variable('distance_km') Gender = Variable('Gender') OccupStat = Variable('OccupStat') Weight = Variable('Weight') ID = Variable('ID') DestAct = Variable('DestAct') NbTransf = Variable('NbTransf') WalkingTimePT = Variable('WalkingTimePT') WaitingTimePT = Variable('WaitingTimePT') CostPT = Variable('CostPT') CostCar = Variable('CostCar') NbHousehold = Variable('NbHousehold') NbChild = Variable('NbChild') NbCar = Variable('NbCar') NbMoto = Variable('NbMoto') NbBicy = Variable('NbBicy') NbBicyChild = Variable('NbBicyChild') NbComp = Variable('NbComp') NbTV = Variable('NbTV') Internet = Variable('Internet') NewsPaperSubs = Variable('NewsPaperSubs') NbCellPhones = Variable('NbCellPhones') NbSmartPhone = Variable('NbSmartPhone') HouseType = Variable('HouseType') OwnHouse = Variable('OwnHouse') NbRoomsHouse = Variable('NbRoomsHouse') YearsInHouse = Variable('YearsInHouse') Income = Variable('Income') BirthYear = Variable('BirthYear') Mothertongue = Variable('Mothertongue') FamilSitu = Variable('FamilSitu') SocioProfCat = Variable('SocioProfCat') CalculatedIncome = Variable('CalculatedIncome') Education = Variable('Education') HalfFareST = Variable('HalfFareST') LineRelST = Variable('LineRelST') GenAbST = Variable('GenAbST') AreaRelST = Variable('AreaRelST') OtherST = Variable('OtherST') CarAvail = Variable('CarAvail') Envir01 = Variable('Envir01') Envir02 = Variable('Envir02') Envir03 = Variable('Envir03') Envir04 = Variable('Envir04') Envir05 = Variable('Envir05') Envir06 = Variable('Envir06') Mobil01 = Variable('Mobil01') Mobil02 = Variable('Mobil02') Mobil03 = Variable('Mobil03') Mobil04 = Variable('Mobil04') Mobil05 = Variable('Mobil05') Mobil06 = Variable('Mobil06') Mobil07 = Variable('Mobil07') Mobil08 = Variable('Mobil08') Mobil09 = Variable('Mobil09') Mobil10 = Variable('Mobil10') Mobil11 = Variable('Mobil11') Mobil12 = Variable('Mobil12') Mobil13 = Variable('Mobil13') Mobil14 = Variable('Mobil14') Mobil15 = Variable('Mobil15') Mobil16 = Variable('Mobil16') Mobil17 = Variable('Mobil17') Mobil18 = Variable('Mobil18') Mobil19 = Variable('Mobil19') Mobil20 = Variable('Mobil20') Mobil21 = Variable('Mobil21') Mobil22 = Variable('Mobil22') Mobil23 = Variable('Mobil23') Mobil24 = Variable('Mobil24') Mobil25 = Variable('Mobil25') Mobil26 = Variable('Mobil26') Mobil27 = Variable('Mobil27') ResidCh01 = Variable('ResidCh01') ResidCh02 = Variable('ResidCh02') ResidCh03 = Variable('ResidCh03') ResidCh04 = Variable('ResidCh04') ResidCh05 = Variable('ResidCh05') ResidCh06 = Variable('ResidCh06') ResidCh07 = Variable('ResidCh07') LifSty01 = Variable('LifSty01') LifSty02 = Variable('LifSty02') LifSty03 = Variable('LifSty03') LifSty04 = Variable('LifSty04') LifSty05 = Variable('LifSty05') LifSty06 = Variable('LifSty06') LifSty07 = Variable('LifSty07') LifSty08 = Variable('LifSty08') LifSty09 = Variable('LifSty09') LifSty10 = Variable('LifSty10') LifSty11 = Variable('LifSty11') LifSty12 = Variable('LifSty12') LifSty13 = Variable('LifSty13') LifSty14 = Variable('LifSty14') TripPurpose = Variable('TripPurpose') TypeCommune = Variable('TypeCommune') UrbRur = Variable('UrbRur') LangCode = Variable('LangCode') ClassifCodeLine = Variable('ClassifCodeLine') frequency = Variable('frequency') ResidChild = Variable('ResidChild') NbTrajects = Variable('NbTrajects') FreqCarPar = Variable('FreqCarPar') FreqTrainPar = Variable('FreqTrainPar') FreqOtherPar = Variable('FreqOtherPar') FreqTripHouseh = Variable('FreqTripHouseh') Region = Variable('Region') InVehicleTime = Variable('InVehicleTime') ModeToSchool = Variable('ModeToSchool') ReportedDuration = Variable('ReportedDuration') CoderegionCAR = Variable('CoderegionCAR') age = Variable('age') normalized_weight = Variable('normalized_weight') ScaledIncome = Variable('ScaledIncome') age_65_more = Variable('age_65_more') moreThanOneCar = Variable('moreThanOneCar') moreThanOneBike = Variable('moreThanOneBike') individualHouse = Variable('individualHouse') male = Variable('male') haveChildren = Variable('haveChildren') haveGA = Variable('haveGA') highEducation = Variable('highEducation') childCenter = Variable('childCenter') childSuburb = Variable('childSuburb') TimePT_scaled = Variable('TimePT_scaled') TimeCar_scaled = Variable('TimeCar_scaled') MarginalCostPT_scaled = Variable('MarginalCostPT_scaled') CostCarCHF_scaled = Variable('CostCarCHF_scaled') distance_km_scaled = Variable('distance_km_scaled') PurpHWH = Variable('PurpHWH') PurpOther = Variable('PurpOther')