Source code for ogphl.macro_params

"""
This module uses data from World Bank WDI, the IMF, and UN ILO to find
values for parameters for the OG-PHL model that rely on macro data for
calibration.
"""

# imports
import datetime
from io import StringIO
from pathlib import Path

import numpy as np
import pandas as pd
import requests

GDP_GROWTH_START_YEAR = 2000
GDP_GROWTH_END_YEAR = 2019
EXTERNAL_DEBT_REPORTING_LAG_YEARS = 2
ILOSTAT_HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/91.0.4472.124 Safari/537.36"
    )
}


def _fetch_wb_data(indicators, country_iso, start_year, end_year, source):
    """
    Fetch a set of World Bank indicators and return a single DataFrame.

    Args:
        indicators (dict): mapping of human-readable labels to indicator codes
        country_iso (str): ISO country code
        start_year (int): first year to request
        end_year (int): last year to request
        source (int): World Bank source ID

    Returns:
        pandas.DataFrame: DataFrame indexed by year/quarter label
    """
    if source == 2:
        date_range = f"{start_year}:{end_year}"
    elif source == 20:
        date_range = f"{start_year}Q1:{end_year}Q4"
    else:
        raise ValueError(f"Unsupported World Bank source: {source}")

    data_frames = []
    for label, indicator_code in indicators.items():
        response = requests.get(
            (
                "https://api.worldbank.org/v2/country/"
                f"{country_iso}/indicator/{indicator_code}"
            ),
            params={
                "date": date_range,
                "source": source,
                "format": "json",
                "per_page": 10000,
            },
            timeout=30,
        )
        response.raise_for_status()
        try:
            payload = response.json()
        except ValueError as exc:
            raise ValueError(
                f"Malformed World Bank response for {indicator_code}"
            ) from exc

        if (
            not isinstance(payload, list)
            or len(payload) < 2
            or not isinstance(payload[1], list)
            or not payload[1]
        ):
            raise ValueError(
                f"Empty or malformed World Bank response for {indicator_code}"
            )

        series_data = {}
        for row in payload[1]:
            date = row.get("date")
            if date is None:
                continue
            series_data[date] = row.get("value")

        if not series_data:
            raise ValueError(
                f"No dated observations in World Bank response for "
                f"{indicator_code}"
            )

        series = pd.Series(series_data, name=label)
        series = pd.to_numeric(series, errors="coerce")
        data_frames.append(series.to_frame())

    data = pd.concat(data_frames, axis=1)
    data.index.name = "year"
    return data.sort_index(ascending=False)


def _annual_index(data):
    """
    Convert a World Bank annual response index to integer years.
    """
    annual_data = data.copy()
    annual_data.index = pd.to_numeric(annual_data.index, errors="coerce")
    annual_data = annual_data.loc[annual_data.index.notna()]
    annual_data.index = annual_data.index.astype(int)
    return annual_data.sort_index()


def _latest_at_or_before(series, target_year, source_name):
    """
    Return the value for target_year or the latest nonmissing prior year.
    """
    valid = series.dropna()
    valid = valid.loc[valid.index <= int(target_year)]
    if valid.empty:
        raise ValueError(
            f"No complete {source_name} data available up to {target_year}"
        )

    selected_year = (
        int(target_year)
        if int(target_year) in valid.index
        else int(valid.index.max())
    )
    if selected_year != int(target_year):
        print(
            f"Warning: No {source_name} data for {target_year}. "
            f"Using last available year: {selected_year}"
        )
    return valid.loc[selected_year]


def _get_imf_macro_params(country_iso, target_year, data_path=None):
    """
    Fetch IMF GFS data and compute alpha_T and alpha_G.

    Args:
        country_iso (str): ISO alpha-3 country code
        target_year (int): preferred calibration year
        data_path (str | Path | None): optional path to save IMF CSV data

    Returns:
        dict: IMF-derived macro parameters
    """
    required_indicators = {"G2_T", "G24_T", "G27_T", "G271_T"}
    data_path = Path(data_path) if data_path is not None else None
    response = requests.get(
        (
            "https://api.imf.org/external/sdmx/3.0/data/dataflow/"
            f"IMF.STA/GFS_SOO/12.0.0/"
            f"{country_iso}.S1311.G2M.*.POGDP_PT.A"
        ),
        timeout=30,
    )
    response.raise_for_status()
    try:
        payload = response.json()
        data = payload["data"]
        structure = data["structures"][0]
        data_set = data["dataSets"][0]
        series_dimensions = structure["dimensions"]["series"]
        observation_years = [
            value.get("id", value.get("value"))
            for value in structure["dimensions"]["observation"][0]["values"]
        ]
    except (ValueError, KeyError, IndexError, TypeError) as exc:
        raise ValueError(
            "Empty or malformed IMF response for GFS_SOO"
        ) from exc

    records = []
    for series_key, series in data_set["series"].items():
        dimension_indexes = [int(idx) for idx in series_key.split(":")]
        labels = {
            dim["id"]: dim["values"][idx]["id"]
            for dim, idx in zip(series_dimensions, dimension_indexes)
        }
        indicator = labels.get("INDICATOR")
        if indicator not in required_indicators:
            continue
        for observation_key, observation in series.get(
            "observations", {}
        ).items():
            value = observation[0]
            if value is None:
                continue
            records.append(
                {
                    "year": observation_years[int(observation_key)],
                    "indicator": indicator,
                    "value": float(value),
                    "country_iso": country_iso,
                    "sector": "S1311",
                    "dataset": "IMF.STA:GFS_SOO(12.0.0)",
                }
            )

    imf_data = pd.DataFrame(records)
    if imf_data.empty:
        raise ValueError("Empty or malformed IMF response for GFS_SOO")

    if data_path is not None:
        data_path.parent.mkdir(parents=True, exist_ok=True)
        imf_data.sort_values(["indicator", "year"]).to_csv(
            data_path, index=False
        )
        print(f"IMF data saved to {data_path}")

    imf_data["year"] = pd.to_numeric(imf_data["year"], errors="coerce")
    imf_data["value"] = pd.to_numeric(imf_data["value"], errors="coerce")
    imf_data = imf_data.dropna(subset=["year", "value"])

    available = (
        imf_data.pivot_table(
            index="year",
            columns="indicator",
            values="value",
            aggfunc="first",
        )
        .sort_index()
        .dropna(subset=sorted(required_indicators))
    )
    available = available.loc[available.index <= int(target_year)]

    if available.empty:
        raise ValueError(
            f"No complete IMF data available for {country_iso} "
            f"up to {target_year}"
        )

    selected_year = (
        int(target_year)
        if int(target_year) in available.index
        else int(available.index.max())
    )
    if selected_year != int(target_year):
        print(
            f"Warning: No IMF data for {target_year}. "
            f"Using last available year: {selected_year}"
        )

    values = available.loc[selected_year]
    return {
        "alpha_T": [(values["G27_T"] - values["G271_T"]) / 100],
        "alpha_G": [
            (values["G2_T"] - values["G24_T"] - values["G27_T"]) / 100
        ],
    }


def _get_ilo_gamma(country_iso, start_year, target_year):
    """
    Fetch ILO labor-share data and compute capital's share of income.
    """
    target = (
        "https://rplumber.ilo.org/data/indicator/"
        + "?id=LAP_2GDP_NOC_RT_A"
        + "&ref_area="
        + str(country_iso)
        + "&timefrom="
        + str(start_year)
        + "&type=both&format=.csv"
    )
    print("ILO data target = ", target)
    response = requests.get(target, headers=ILOSTAT_HEADERS, timeout=30)
    response.raise_for_status()
    ilo_data = pd.read_csv(StringIO(response.text))[["time", "obs_value"]]
    ilo_data["time"] = pd.to_numeric(ilo_data["time"], errors="coerce")
    ilo_data["obs_value"] = pd.to_numeric(
        ilo_data["obs_value"], errors="coerce"
    )
    ilo_data = ilo_data.dropna(subset=["time", "obs_value"])
    labor_share = _latest_at_or_before(
        ilo_data.set_index("time")["obs_value"],
        target_year,
        "ILOSTAT",
    )
    return [1 - (labor_share / 100)]


[docs] def get_macro_params( data_start_date=datetime.datetime(1947, 1, 1), data_end_date=datetime.datetime(2023, 1, 1), country_iso="PHL", update_from_api=False, imf_data_path=None, ): """ Compute values of parameters that are derived from macro data. Args: data_start_date (datetime): start date for data data_end_date (datetime): end date for data country_iso (str): ISO code for country update_from_api (bool): Set True to pull updated macro data imf_data_path (str | Path | None): optional path to save IMF CSV data Returns: macro_parameters (dict): dictionary of parameter values """ macro_parameters = {} wb_a_variable_dict = { "GDP per capita (constant 2015 US$)": "NY.GDP.PCAP.KD", "Real GDP (constant 2015 US$)": "NY.GDP.MKTP.KD", "Nominal GDP (current US$)": "NY.GDP.MKTP.CD", "General government final consumption expenditure (current " "US$)": "NE.CON.GOVT.CD", "External debt stocks, public and publicly guaranteed (PPG) " "(DOD, current US$)": "DT.DOD.DPPG.CD", "External debt stocks, total (DOD, current US$)": "DT.DOD.DECT.CD", r"External debt stocks (% of GNI)": "DT.DOD.DECT.GN.ZS", } if update_from_api: try: wb_data_a = _annual_index( _fetch_wb_data( wb_a_variable_dict, country_iso, data_start_date.year, data_end_date.year, source=2, ) ) # Gross national government debt as a share of GDP. This is a # documented baseline source value, not computed from the WDI pull. macro_parameters["initial_debt_ratio"] = 0.60 foreign_debt_ratio = ( wb_data_a[r"External debt stocks (% of GNI)"] * ( wb_data_a[ "External debt stocks, public and publicly " "guaranteed (PPG) (DOD, current US$)" ] / wb_data_a[ "External debt stocks, total (DOD, current US$)" ] ) / 100 ) macro_parameters["initial_foreign_debt_ratio"] = ( _latest_at_or_before( foreign_debt_ratio, data_end_date.year - EXTERNAL_DEBT_REPORTING_LAG_YEARS, "World Bank external debt", ) ) macro_parameters["zeta_D"] = [ macro_parameters["initial_foreign_debt_ratio"] ] macro_parameters["g_y_annual"] = ( wb_data_a["GDP per capita (constant 2015 US$)"] # Use the pre-pandemic growth window to avoid COVID-era # volatility driving the steady-state productivity target. .loc[GDP_GROWTH_START_YEAR:GDP_GROWTH_END_YEAR] .pct_change() .mean() ) print( f"initial_debt_ratio set from documented source: " f"{macro_parameters['initial_debt_ratio']}" ) print( f"initial_foreign_debt_ratio updated from World Bank " f"API: {macro_parameters['initial_foreign_debt_ratio']}" ) print( f"zeta_D updated from World Bank API: " f"{macro_parameters['zeta_D']}" ) print( f"g_y_annual updated from World Bank API: " f"{macro_parameters['g_y_annual']}" ) except Exception: print("Failed to retrieve data from World Bank") print("Will not update the following parameters:") print( "[initial_debt_ratio, initial_foreign_debt_ratio, zeta_D, g_y]" ) else: print("Not updating from World Bank API") if update_from_api: try: macro_parameters["gamma"] = _get_ilo_gamma( country_iso, data_start_date.year, data_end_date.year, ) print( f"gamma updated from ILOSTAT API: {macro_parameters['gamma']}" ) except Exception: print("Failed to retrieve data from ILOSTAT") print("Will not update gamma") else: print("Not updating from ILOSTAT API") if update_from_api: try: macro_parameters.update( _get_imf_macro_params( country_iso, data_end_date.year, data_path=imf_data_path, ) ) print( f"alpha_T updated from IMF data: {macro_parameters['alpha_T']}" ) print( f"alpha_G updated from IMF data: {macro_parameters['alpha_G']}" ) except Exception: print("Failed to retrieve data from IMF") print("Will not update alpha_T, alpha_G") """" Esimate the discount on sovereign yields relative to private debt Follow the methodology in Li, Magud, Werner, Witte (2021) available at: https://www.imf.org/en/Publications/WP/Issues/2021/06/04/The-Long-Run-Impact-of-Sovereign-Yields-on-Corporate-Yields-in-Emerging-Markets-50224 Steps: 1) Generate modelled corporate yields (corp_yhat) for a range of sovereign yields (sov_y) using the estimated equation in col 2 of table 8 (and figure 3). 2) Estimate the OLS using sovereign yields as the dependent variable """ try: import statsmodels.api as sm sov_y = np.arange(20, 120) / 10 corp_yhat = 8.199 - (2.975 * sov_y) + (0.478 * sov_y**2) corp_yhat = sm.add_constant(corp_yhat) mod = sm.OLS( sov_y, corp_yhat, ) res = mod.fit() # First term is the constant and needs to be divided by 100 to # have the correct unit. Second term is the coefficient. macro_parameters["r_gov_shift"] = [-res.params[0] / 100] macro_parameters["r_gov_scale"] = [res.params[1]] print( f"r_gov_shift updated from IMF data: " f"{macro_parameters['r_gov_shift']}" ) print( f"r_gov_scale updated from IMF data: " f"{macro_parameters['r_gov_scale']}" ) except Exception: print("Failed to compute r_gov_shift, r_gov_scale") print("Will not update r_gov_shift, r_gov_scale") else: print("Not updating alpha_T, alpha_G, r_gov_shift, r_gov_scale") return macro_parameters