Source code for biogeme.tools.pandas_to_latex
import math
from typing import Iterable
import numpy as np
import pandas as pd
def _add_trailing_zero(formatted_number: str) -> str:
"""Ensure there is at least one decimal digit."""
if not formatted_number:
return "0.0"
if "." in formatted_number:
if formatted_number[-1] == ".":
return f"{formatted_number}0"
return formatted_number
return f"{formatted_number}.0"
def _format_real_number(value: float) -> str:
"""Format a real number like Biogeme, for LaTeX tables.
- Use .3g formatting.
- Avoid losing the decimal point.
- Preserve scientific notation if used.
"""
formatted_value = f"{value:.3g}"
# If scientific notation is used, protect the mantissa
if "e" in formatted_value:
left, right = formatted_value.split("e")
return f"{_add_trailing_zero(left)}e{right}"
if "E" in formatted_value:
left, right = formatted_value.split("E")
return f"{_add_trailing_zero(left)}E{right}"
return _add_trailing_zero(formatted_value)
[docs]
def dataframe_to_latex_decimal(
df: pd.DataFrame,
float_columns: Iterable[str] | None = None,
include_index: bool = True,
caption: str | None = None,
label: str | None = None,
) -> str:
"""Generate a LaTeX tabular with r@{.}l alignment for float columns.
Parameters
----------
df
Input DataFrame.
float_columns
Names of columns to treat as numeric with decimal alignment.
If None, all columns with float dtype are used.
include_index
If True, include the index as the first column (left aligned).
caption
Optional LaTeX caption (without \\caption{} wrapper).
label
Optional LaTeX label, used as \\label{...} if provided.
Returns
-------
latex
A LaTeX string with \\begin{tabular} ... \\end{tabular}.
"""
if float_columns is None:
float_columns = [
c for c in df.columns if np.issubdtype(df[c].dtype, np.floating)
]
float_columns = list(float_columns)
# Column alignment specification
col_specs: list[str] = []
if include_index:
col_specs.append("l")
for col in df.columns:
if col in float_columns:
col_specs.append("r@{.}l")
else:
col_specs.append("l")
col_spec_str = "".join(col_specs)
lines: list[str] = []
lines.append(f"\\begin{{tabular}}{{{col_spec_str}}}")
# Optional caption/label for a standalone table environment
if caption is not None or label is not None:
lines.append("\\hline")
if caption is not None:
lines.append(f"\\multicolumn{{{len(col_specs)}}}{{c}}{{{caption}}}\\\\")
if label is not None:
lines.append(
f"\\multicolumn{{{len(col_specs)}}}{{c}}{{\\label{{{label}}}}}\\\\"
)
lines.append("\\hline")
# Header row
header_cells: list[str] = []
if include_index:
header_cells.append("") # index column has no header
for col in df.columns:
safe_name = str(col).replace("_", r"\_")
if col in float_columns:
# Span the two decimal-aligned columns
header_cells.append(f"\\multicolumn{{2}}{{c}}{{{safe_name}}}")
else:
header_cells.append(safe_name)
lines.append(" & ".join(header_cells) + r" \\")
lines.append(r"\hline")
# Data rows
for idx, row in df.iterrows():
row_cells: list[str] = []
if include_index:
idx_str = str(idx).replace("_", r"\_")
row_cells.append(idx_str)
for col in df.columns:
val = row[col]
if col in float_columns:
if val is None or (isinstance(val, float) and math.isnan(val)):
# Empty numeric cell: keep both parts empty
row_cells.append("") # integer part
row_cells.append("") # fractional part
else:
formatted = _format_real_number(float(val))
# r@{.}l: replace the dot by '&' so we supply two cells
left_right = formatted.split(".", maxsplit=1)
if len(left_right) == 2:
left, right = left_right
else:
# Should not happen thanks to _format_real_number, but be safe
left, right = formatted, ""
row_cells.append(left)
row_cells.append(right)
else:
cell = str(val)
cell = cell.replace("_", r"\_")
row_cells.append(cell)
lines.append(" & ".join(row_cells) + r" \\")
lines.append(r"\end{tabular}")
return "\n".join(lines)