Source code for biogeme.data.swissmetro

"""
.. _swissmetro_data:

Data preparation for Swissmetro
===============================

Data preparation and definition of the variables.

:author: Michel Bierlaire, EPFL
:date: Mon Mar  6 15:17:03 2023

"""

import pandas as pd
import biogeme.database as db
from biogeme.expressions import Variable

import os


[docs] def read_data() -> db.Database: """Read the data from file""" # Get the directory of the current file module_dir = os.path.dirname(__file__) # Construct the path to the data file data_file_path = os.path.join(module_dir, 'data', 'swissmetro.dat') # %% # Read the data. df = pd.read_csv(data_file_path, sep='\t') database = db.Database('swissmetro', df) exclude = CHOICE == 0 # exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0 database.remove(exclude) # Definition of new variables. _ = database.define_variable('SM_COST', SM_CO * (GA == 0)) _ = database.define_variable('TRAIN_COST', TRAIN_CO * (GA == 0)) _ = database.define_variable('CAR_AV_SP', CAR_AV * (SP != 0)) _ = database.define_variable('TRAIN_AV_SP', TRAIN_AV * (SP != 0)) _ = database.define_variable('TRAIN_TT_SCALED', TRAIN_TT / 100) _ = database.define_variable('TRAIN_COST_SCALED', TRAIN_COST / 100) _ = database.define_variable('SM_TT_SCALED', SM_TT / 100) _ = database.define_variable('SM_COST_SCALED', SM_COST / 100) _ = database.define_variable('CAR_TT_SCALED', CAR_TT / 100) _ = database.define_variable('CAR_CO_SCALED', CAR_CO / 100) return database
# Definition of the variables. GROUP = Variable('GROUP') SURVEY = Variable('SURVEY') SP = Variable('SP') ID = Variable('ID') PURPOSE = Variable('PURPOSE') FIRST = Variable('FIRST') TICKET = Variable('TICKET') WHO = Variable('WHO') LUGGAGE = Variable('LUGGAGE') AGE = Variable('AGE') MALE = Variable('MALE') INCOME = Variable('INCOME') GA = Variable('GA') ORIGIN = Variable('ORIGIN') DEST = Variable('DEST') TRAIN_AV = Variable('TRAIN_AV') CAR_AV = Variable('CAR_AV') SM_AV = Variable('SM_AV') TRAIN_TT = Variable('TRAIN_TT') TRAIN_CO = Variable('TRAIN_CO') TRAIN_HE = Variable('TRAIN_HE') SM_TT = Variable('SM_TT') SM_CO = Variable('SM_CO') SM_HE = Variable('SM_HE') SM_SEATS = Variable('SM_SEATS') CAR_TT = Variable('CAR_TT') CAR_CO = Variable('CAR_CO') CHOICE = Variable('CHOICE') SM_COST = Variable('SM_COST') TRAIN_COST = Variable('TRAIN_COST') CAR_AV_SP = Variable('CAR_AV_SP') TRAIN_AV_SP = Variable('TRAIN_AV_SP') TRAIN_TT_SCALED = Variable('TRAIN_TT_SCALED') TRAIN_COST_SCALED = Variable('TRAIN_COST_SCALED') SM_TT_SCALED = Variable('SM_TT_SCALED') SM_COST_SCALED = Variable('SM_COST_SCALED') CAR_TT_SCALED = Variable('CAR_TT_SCALED') CAR_CO_SCALED = Variable('CAR_CO_SCALED')