Source code for causal_agent.methods.difference_in_differences.diagnostics

"""Diagnostic functions for Difference-in-Differences method."""

import pandas as pd
import numpy as np
from typing import Dict, Any, Optional, List
import logging
import statsmodels.formula.api as smf 
from patsy import PatsyError


from .utils import create_post_indicator

logger = logging.getLogger(__name__)







[docs] def run_placebo_test(df: pd.DataFrame, time_var: str, group_var: str, outcome: str, treated_unit_indicator: str, covariates: List[str], treatment_period_start: Any, placebo_period_start: Any) -> Dict[str, Any]: """Runs a placebo test for DiD by assigning a fake earlier treatment period. Re-runs the DiD estimation using the placebo period and checks if the effect is non-significant. Args: df: Original DataFrame. time_var: Name of the time variable column. group_var: Name of the unit/group ID column (for clustering SE). outcome: Name of the outcome variable column. treated_unit_indicator: Name of the binary treatment group indicator column (0/1). covariates: List of covariate names. treatment_period_start: The actual treatment start period. placebo_period_start: The fake treatment start period (must be before actual start). Returns: Dictionary with placebo test results. """ logger.info(f"Running placebo test assigning treatment start at {placebo_period_start}...") placebo_result = {"passed": False, "effect_estimate": None, "p_value": None, "details": "", "error": None} if placebo_period_start >= treatment_period_start: error_msg = "Placebo period must be before the actual treatment period." logger.error(error_msg) placebo_result["error"] = error_msg placebo_result["details"] = error_msg return placebo_result try: df_placebo = df.copy() # Create placebo post and interaction terms post_placebo_col = 'post_placebo' interaction_placebo_col = 'did_interaction_placebo' df_placebo[post_placebo_col] = create_post_indicator(df_placebo, time_var, placebo_period_start) df_placebo[interaction_placebo_col] = df_placebo[treated_unit_indicator] * df_placebo[post_placebo_col] # Construct formula for placebo regression formula = f"`{outcome}` ~ `{treated_unit_indicator}` + `{post_placebo_col}` + `{interaction_placebo_col}`" if covariates: formula += f" + {' + '.join([f'`{c}`' for c in covariates])}" formula += f" + C(`{group_var}`) + C(`{time_var}`)" # Include FEs logger.debug(f"Placebo test formula: {formula}") # Fit the placebo model with clustered SE ols_model = smf.ols(formula=formula, data=df_placebo) results = ols_model.fit(cov_type='cluster', cov_kwds={'groups': df_placebo[group_var]}) # Check the significance of the placebo interaction term placebo_effect = float(results.params[interaction_placebo_col]) placebo_p_value = float(results.pvalues[interaction_placebo_col]) # Test passes if the placebo effect is not statistically significant (e.g., p > 0.1) passed_test = placebo_p_value > 0.10 placebo_result["passed"] = passed_test placebo_result["effect_estimate"] = placebo_effect placebo_result["p_value"] = placebo_p_value placebo_result["details"] = f"Placebo treatment effect estimated at {placebo_effect:.4f} (p={placebo_p_value:.4f}). Test passed: {passed_test}." logger.info(placebo_result["details"]) except (KeyError, PatsyError, ValueError, Exception) as e: error_msg = f"Error during placebo test execution: {e}" logger.error(error_msg, exc_info=True) placebo_result["details"] = error_msg placebo_result["error"] = str(e) return placebo_result
# TODO: Add function for Event Study plot (plot_event_study) # This would involve estimating effects for leads and lags around the treatment period. # Add other diagnostic functions as needed (e.g., plot_event_study)