"""
This module uses data from World Bank WDI, World Bank Quarterly Public
Sector Debt (QPSD) database, the IMF, and UN ILO to find values for
parameters for the OG-ZAF model that rely on macro data for calibration.
"""
# imports
import pandas as pd
import numpy as np
import requests
import datetime
from io import StringIO
from pathlib import Path
def _fetch_wb_data(indicators, country_iso, start_year, end_year, source):
"""
Fetch a set of World Bank indicators and return a single DataFrame.
Args:
indicators (dict): mapping of human-readable labels to indicator codes
country_iso (str): ISO country code
start_year (int): first year to request
end_year (int): last year to request
source (int): World Bank source ID
Returns:
pandas.DataFrame: DataFrame indexed by year/quarter label
"""
if source == 2:
date_range = f"{start_year}:{end_year}"
elif source == 20:
date_range = f"{start_year}Q1:{end_year}Q4"
else:
raise ValueError(f"Unsupported World Bank source: {source}")
data_frames = []
for label, indicator_code in indicators.items():
response = requests.get(
(
"https://api.worldbank.org/v2/country/"
f"{country_iso}/indicator/{indicator_code}"
),
params={
"date": date_range,
"source": source,
"format": "json",
"per_page": 10000,
},
)
response.raise_for_status()
try:
payload = response.json()
except ValueError as exc:
raise ValueError(
f"Malformed World Bank response for {indicator_code}"
) from exc
if (
not isinstance(payload, list)
or len(payload) < 2
or not isinstance(payload[1], list)
or not payload[1]
):
raise ValueError(
f"Empty or malformed World Bank response for {indicator_code}"
)
series_data = {}
for row in payload[1]:
date = row.get("date")
if date is None:
continue
series_data[date] = row.get("value")
if not series_data:
raise ValueError(
f"No dated observations in World Bank response for {indicator_code}"
)
series = pd.Series(series_data, name=label)
series = pd.to_numeric(series, errors="coerce")
data_frames.append(series.to_frame())
data = pd.concat(data_frames, axis=1)
data.index.name = "year"
# Preserve descending time order used by the existing pct_change(-1) logic.
data = data.sort_index(ascending=False)
return data
def _get_imf_macro_params(
country_iso,
target_year,
data_path=None,
):
"""
Fetch IMF GFS data and compute alpha_T and alpha_G.
Args:
country_iso (str): ISO alpha-3 country code
target_year (int): preferred calibration year
data_path (str | Path | None): optional path to save IMF CSV data
Returns:
dict: IMF-derived macro parameters
"""
required_indicators = {"G2_T", "G24_T", "G27_T", "G271_T"}
data_path = Path(data_path) if data_path is not None else None
response = requests.get(
(
"https://api.imf.org/external/sdmx/3.0/data/dataflow/"
f"IMF.STA/GFS_SOO/12.0.0/"
f"{country_iso}.S1311.G2M.*.POGDP_PT.A"
),
timeout=30,
)
response.raise_for_status()
try:
payload = response.json()
data = payload["data"]
structure = data["structures"][0]
data_set = data["dataSets"][0]
series_dimensions = structure["dimensions"]["series"]
observation_years = [
value.get("id", value.get("value"))
for value in structure["dimensions"]["observation"][0]["values"]
]
except (ValueError, KeyError, IndexError, TypeError) as exc:
raise ValueError(
"Empty or malformed IMF response for GFS_SOO"
) from exc
records = []
for series_key, series in data_set["series"].items():
dimension_indexes = [int(idx) for idx in series_key.split(":")]
labels = {
dim["id"]: dim["values"][idx]["id"]
for dim, idx in zip(series_dimensions, dimension_indexes)
}
indicator = labels.get("INDICATOR")
if indicator not in required_indicators:
continue
for observation_key, observation in series.get(
"observations", {}
).items():
value = observation[0]
if value is None:
continue
records.append(
{
"year": observation_years[int(observation_key)],
"indicator": indicator,
"value": float(value),
"country_iso": country_iso,
"sector": "S1311",
"dataset": "IMF.STA:GFS_SOO(12.0.0)",
}
)
imf_data = pd.DataFrame(records)
if imf_data.empty:
raise ValueError("Empty or malformed IMF response for GFS_SOO")
if data_path is not None:
data_path.parent.mkdir(parents=True, exist_ok=True)
imf_data.sort_values(["indicator", "year"]).to_csv(
data_path, index=False
)
print(f"IMF data saved to {data_path}")
imf_data["year"] = pd.to_numeric(imf_data["year"], errors="coerce")
imf_data["value"] = pd.to_numeric(imf_data["value"], errors="coerce")
imf_data = imf_data.dropna(subset=["year", "value"])
available = (
imf_data.pivot_table(
index="year", columns="indicator", values="value", aggfunc="first"
)
.sort_index()
.dropna(subset=sorted(required_indicators))
)
available = available.loc[available.index <= int(target_year)]
if available.empty:
raise ValueError(
f"No complete IMF data available for {country_iso} up to {target_year}"
)
selected_year = (
int(target_year)
if int(target_year) in available.index
else int(available.index.max())
)
if selected_year != int(target_year):
print(
f"Warning: No IMF data for {target_year}. "
f"Using last available year: {selected_year}"
)
values = available.loc[selected_year]
return {
"alpha_T": [(values["G27_T"] - values["G271_T"]) / 100],
"alpha_G": [
(values["G2_T"] - values["G24_T"] - values["G27_T"]) / 100
],
}
[docs]
def get_macro_params(
data_start_date=datetime.datetime(1947, 1, 1),
data_end_date=datetime.datetime(2024, 12, 31),
country_iso="ZAF",
update_from_api=False,
imf_data_year=None,
imf_data_path=None,
):
"""
Compute values of parameters that are derived from macro data
Args:
data_start_date (datetime): start date for data
data_end_date (datetime): end date for data
country_iso (str): ISO code for country
imf_data_year (int | None): IMF target year override. Defaults to
data_end_date.year when None.
imf_data_path (str | Path | None): optional path to save IMF CSV data
Returns:
macro_parameters (dict): dictionary of parameter values
"""
# initialize a dictionary of parameters
macro_parameters = {}
# baseline date formatted for World Bank data
baseline_YYYYQ = (
str(data_end_date.year)
+ "Q"
+ str(pd.Timestamp(data_end_date).quarter)
)
"""
Retrieve data from the World Bank World Development Indicators.
"""
# Dictionaries of variables and their corresponding World Bank codes
# Annual data
wb_a_variable_dict = {
"GDP per capita (constant 2015 US$)": "NY.GDP.PCAP.KD",
"Real GDP (constant 2015 US$)": "NY.GDP.MKTP.KD",
"Nominal GDP (current US$)": "NY.GDP.MKTP.CD",
"General government final consumption expenditure (current US$)": "NE.CON.GOVT.CD",
}
# Quarterly data
wb_q_variable_dict = {
"Gross PSD USD - domestic creditors": "DP.DOD.DECD.CR.PS.CD",
"Gross PSD USD - external creditors": "DP.DOD.DECX.CR.PS.CD",
"Gross PSD Gen Gov - percentage of GDP": "DP.DOD.DECT.CR.GG.Z1",
}
if update_from_api:
try:
wb_data_a = _fetch_wb_data(
wb_a_variable_dict,
country_iso,
data_start_date.year,
data_end_date.year,
source=2,
)
wb_data_q = _fetch_wb_data(
wb_q_variable_dict,
country_iso,
data_start_date.year,
data_end_date.year,
source=20,
)
# Function to get the latest valid data if baseline_YYYYQ is missing or NaN
def get_valid_data(series, baseline_YYYYQ):
value = series.get(baseline_YYYYQ, None)
if pd.isna(value):
latest_non_nan = series.dropna().last_valid_index()
if latest_non_nan is not None:
print(
f"Warning: No data for {baseline_YYYYQ}. Using last available quarter: {latest_non_nan}"
)
value = series.get(latest_non_nan, None)
else:
print(
"Warning: No historical data available. Skipping update."
)
value = None
return value
# Compute macro parameters from WB data
macro_parameters["initial_debt_ratio"] = get_valid_data(
pd.Series(wb_data_q["Gross PSD Gen Gov - percentage of GDP"])
/ 100,
baseline_YYYYQ,
)
print(
f"initial_debt_ratio updated from World Bank API: {macro_parameters['initial_debt_ratio']}"
)
# Compute initial_foreign_debt_ratio safely
if (
"Gross PSD USD - external creditors" in wb_data_q.columns
and "Gross PSD USD - domestic creditors" in wb_data_q.columns
):
total_debt = (
wb_data_q["Gross PSD USD - domestic creditors"]
+ wb_data_q["Gross PSD USD - external creditors"]
)
# Avoid division by zero
wb_data_q["foreign_debt_ratio"] = wb_data_q[
"Gross PSD USD - external creditors"
] / total_debt.replace(0, np.nan)
macro_parameters["initial_foreign_debt_ratio"] = (
get_valid_data(
wb_data_q["foreign_debt_ratio"], baseline_YYYYQ
)
)
else:
print(
"Warning: Missing debt variables in World Bank data. Skipping update for initial_foreign_debt_ratio."
)
print(
f"initial_foreign_debt_ratio updated from World Bank API: {macro_parameters['initial_foreign_debt_ratio']}"
)
# Compute zeta_D safely
macro_parameters["zeta_D"] = [
macro_parameters["initial_foreign_debt_ratio"]
] # Since it's the same formula, we use the same calculated value
print(
f"zeta_D updated from World Bank API: {macro_parameters['zeta_D']}"
)
# Compute annual GDP growth safely
if "GDP per capita (constant 2015 US$)" in wb_data_a.columns:
g_y_series = wb_data_a[
"GDP per capita (constant 2015 US$)"
].pct_change(-1)
# If all values are NaN, return None
macro_parameters["g_y_annual"] = (
g_y_series.mean() if not g_y_series.isna().all() else None
)
else:
print(
"Warning: Missing GDP per capita data in World Bank data. Skipping update for g_y_annual."
)
print(
f"g_y_annual updated from World Bank API: {macro_parameters['g_y_annual']}"
)
except Exception:
print("Failed to retrieve data from World Bank")
print("Will not update the following parameters:")
print(
"[initial_debt_ratio, initial_foreign_debt_ratio, zeta_D, g_y]"
)
else:
print("Not updating from World Bank API")
"""
Retrieve labour share data from the United Nations ILOSTAT Data API
(see https://rshiny.ilo.org/dataexplorer9/?lang=en)
The series code is SDG_1041_NOC_RT_A (capital share)
Labor share (gamma) = 1 - capital share
If this fails we will not update gamma in 'default_parameters.json'
"""
if update_from_api:
try:
target = (
"https://rplumber.ilo.org/data/indicator/"
+ "?id=SDG_1041_NOC_RT_A"
+ "&ref_area="
+ str(country_iso)
+ "&timefrom="
+ str(data_start_date.year)
+ "&timeto="
+ str(data_end_date.year)
+ "&type=both&format=.csv"
)
# Add headers
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
print("Attempting to update gamma from ILOSTAT")
response = requests.get(target, headers=headers)
if response.status_code != 200:
print(f"Error: Received status code {response.status_code}")
else:
print("Request successful.")
csv_content = StringIO(response.text)
df_temp = pd.read_csv(csv_content)
ilo_data = df_temp[["time", "obs_value"]]
# find gamma, capital's share of income
macro_parameters["gamma"] = [
1
- (
(
ilo_data.loc[
ilo_data["time"] == data_end_date.year, "obs_value"
].squeeze()
)
/ 100
)
]
print(
f"gamma updated from ILOSTAT API: {macro_parameters['gamma']}"
)
except Exception:
print("Failed to retrieve data from ILOSTAT")
print("Will not update gamma")
else:
print("Not updating from ILOSTAT API")
"""
Calibrate parameters from IMF data
"""
if update_from_api:
try:
imf_year = (
data_end_date.year if imf_data_year is None else imf_data_year
)
macro_parameters.update(
_get_imf_macro_params(
country_iso,
imf_year,
data_path=imf_data_path,
)
)
print(
f"alpha_T updated from IMF data: {macro_parameters['alpha_T']}"
)
print(
f"alpha_G updated from IMF data: {macro_parameters['alpha_G']}"
)
except Exception:
print("Failed to retrieve data from IMF")
print("Will not update alpha_T, alpha_G")
""""
Estimate the discount on sovereign yields relative to private debt
Follow the methodology in Li, Magud, Werner, Witte (2021)
available at:
https://www.imf.org/en/Publications/WP/Issues/2021/06/04/The-Long-Run-Impact-of-Sovereign-Yields-on-Corporate-Yields-in-Emerging-Markets-50224
discussion is here: https://github.com/EAPD-DRB/OG-ZAF/issues/22
Steps:
1) Generate modelled corporate yields (corp_yhat) for a range of
sovereign yields (sov_y) using the estimated equation in col 2 of
table 8 (and figure 3). 2) Estimate the OLS using sovereign yields
as the dependent variable
"""
try:
import statsmodels.api as sm
# # estimate r_gov_shift and r_gov_scale
sov_y = np.arange(20, 120) / 10
corp_yhat = 8.199 - (2.975 * sov_y) + (0.478 * sov_y**2)
corp_yhat = sm.add_constant(corp_yhat)
mod = sm.OLS(
sov_y,
corp_yhat,
)
res = mod.fit()
# First term is the constant and needs to be divided by 100 to have
# the correct unit. Second term is the coefficient
macro_parameters["r_gov_shift"] = [-res.params[0] / 100]
macro_parameters["r_gov_scale"] = [res.params[1]]
print(
f"r_gov_shift updated from IMF data: {macro_parameters['r_gov_shift']}"
)
print(
f"r_gov_scale updated from IMF data: {macro_parameters['r_gov_scale']}"
)
except Exception:
print("Failed to compute r_gov_shift, r_gov_scale")
print("Will not update r_gov_shift, r_gov_scale")
else:
print("Not updating alpha_T, alpha_G, r_gov_shift, r_gov_scale")
return macro_parameters