Chapter 9. Simultaneous Equations Models#
Examples#
import statsmodels.formula.api as smf
from linearmodels import IV2SLS
from wooldridge import *
Example 9.5 - Labor Supply for Married, Working Women#
df=dataWoo("mroz")
print(smf.ols('hours ~ lwage + educ + age + kidslt6 + kidsge6 + nwifeinc', data=df).fit().summary())
df2=df.dropna(subset=['lwage'])
ivreg=IV2SLS.from_formula('hours ~ 1+ [lwage~ exper + expersq] + educ + age + kidslt6 + kidsge6 + nwifeinc', data=df2).fit(cov_type='unadjusted')
print(ivreg)
OLS Regression Results
==============================================================================
Dep. Variable: hours R-squared: 0.067
Model: OLS Adj. R-squared: 0.054
Method: Least Squares F-statistic: 5.035
Date: Mon, 11 Dec 2023 Prob (F-statistic): 5.31e-05
Time: 22:33:00 Log-Likelihood: -3440.1
No. Observations: 428 AIC: 6894.
Df Residuals: 421 BIC: 6923.
Df Model: 6
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 2114.6973 340.131 6.217 0.000 1446.131 2783.263
lwage -17.4078 54.215 -0.321 0.748 -123.974 89.159
educ -14.4449 17.968 -0.804 0.422 -49.763 20.873
age -7.7300 5.529 -1.398 0.163 -18.599 3.139
kidslt6 -342.5048 100.006 -3.425 0.001 -539.078 -145.932
kidsge6 -115.0205 30.829 -3.731 0.000 -175.619 -54.422
nwifeinc -4.2458 3.656 -1.161 0.246 -11.432 2.940
==============================================================================
Omnibus: 27.115 Durbin-Watson: 2.012
Prob(Omnibus): 0.000 Jarque-Bera (JB): 39.894
Skew: 0.473 Prob(JB): 2.17e-09
Kurtosis: 4.158 Cond. No. 454.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
IV-2SLS Estimation Summary
==============================================================================
Dep. Variable: hours R-squared: -1.7732
Estimator: IV-2SLS Adj. R-squared: -1.8128
No. Observations: 428 F-statistic: 20.796
Date: Mon, Dec 11 2023 P-value (F-stat) 0.0020
Time: 22:33:00 Distribution: chi2(6)
Cov. Estimator: unadjusted
Parameter Estimates
==============================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
------------------------------------------------------------------------------
Intercept 2432.2 589.29 4.1273 0.0000 1277.2 3587.2
educ -177.45 57.665 -3.0772 0.0021 -290.47 -64.427
age -10.784 9.4987 -1.1353 0.2562 -29.401 7.8330
kidslt6 -210.83 175.48 -1.2015 0.2296 -554.77 133.10
kidsge6 -47.557 56.450 -0.8425 0.3995 -158.20 63.084
nwifeinc -9.2491 6.4279 -1.4389 0.1502 -21.848 3.3493
lwage 1544.8 476.79 3.2400 0.0012 610.32 2479.3
==============================================================================
Endogenous: lwage
Instruments: exper, expersq
Unadjusted Covariance (Homoskedastic)
Debiased: False
ivuhat=ivreg.resids
ureg = smf.ols('ivuhat ~ educ + age + kidslt6 + kidsge6 + nwifeinc + exper + expersq', data=df2).fit()
print(ureg.summary())
OLS Regression Results
==============================================================================
Dep. Variable: ivuhat R-squared: 0.002
Model: OLS Adj. R-squared: -0.015
Method: Least Squares F-statistic: 0.1205
Date: Mon, 11 Dec 2023 Prob (F-statistic): 0.997
Time: 22:33:00 Log-Likelihood: -3672.8
No. Observations: 428 AIC: 7362.
Df Residuals: 420 BIC: 7394.
Df Model: 7
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 177.4856 621.684 0.285 0.775 -1044.513 1399.485
educ 0.3573 29.488 0.012 0.990 -57.605 58.319
age -3.4411 10.575 -0.325 0.745 -24.228 17.346
kidslt6 -8.5741 173.057 -0.050 0.961 -348.739 331.591
kidsge6 2.4540 54.476 0.045 0.964 -104.625 109.533
nwifeinc 0.8964 6.484 0.138 0.890 -11.848 13.641
exper -15.8841 26.118 -0.608 0.543 -67.223 35.455
expersq 0.6408 0.785 0.817 0.415 -0.902 2.183
==============================================================================
Omnibus: 108.503 Durbin-Watson: 2.074
Prob(Omnibus): 0.000 Jarque-Bera (JB): 682.336
Skew: 0.917 Prob(JB): 6.80e-149
Kurtosis: 8.907 Cond. No. 3.56e+03
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 3.56e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
LM1 = ureg.nobs * ureg.rsquared
from scipy.stats import chi2
p = chi2.sf(LM1, df=1)
print("LM =", round(LM1,3), "and P =", round(p,3))
LM = 0.858 and P = 0.354
OLSwg = smf.ols('lwage ~ educ + age + kidslt6 + kidsge6 + nwifeinc + exper + expersq', data=df).fit()
print(OLSwg.summary())
OLS Regression Results
==============================================================================
Dep. Variable: lwage R-squared: 0.164
Model: OLS Adj. R-squared: 0.150
Method: Least Squares F-statistic: 11.78
Date: Mon, 11 Dec 2023 Prob (F-statistic): 1.02e-13
Time: 22:33:00 Log-Likelihood: -429.74
No. Observations: 428 AIC: 875.5
Df Residuals: 420 BIC: 908.0
Df Model: 7
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept -0.3580 0.318 -1.125 0.261 -0.984 0.268
educ 0.0999 0.015 6.616 0.000 0.070 0.130
age -0.0035 0.005 -0.650 0.516 -0.014 0.007
kidslt6 -0.0559 0.089 -0.631 0.529 -0.230 0.118
kidsge6 -0.0176 0.028 -0.633 0.527 -0.072 0.037
nwifeinc 0.0057 0.003 1.715 0.087 -0.001 0.012
exper 0.0407 0.013 3.044 0.002 0.014 0.067
expersq -0.0007 0.000 -1.860 0.064 -0.002 4.24e-05
==============================================================================
Omnibus: 75.081 Durbin-Watson: 1.966
Prob(Omnibus): 0.000 Jarque-Bera (JB): 297.845
Skew: -0.715 Prob(JB): 2.11e-65
Kurtosis: 6.828 Cond. No. 3.56e+03
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 3.56e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
hypotheses = '(age = kidslt6 = kidsge6=nwifeinc =0)'
f_test = OLSwg.f_test(hypotheses)
print(f_test)
<F test: F=0.9142137685830444, p=0.4554869680438597, df_denom=420, df_num=4>
print(IV2SLS.from_formula(
'lwage ~ 1+ [hours ~ age + kidslt6 + kidsge6 + nwifeinc] + educ + exper + expersq ',
data=df2).fit(cov_type='unadjusted'))
IV-2SLS Estimation Summary
==============================================================================
Dep. Variable: lwage R-squared: 0.1112
Estimator: IV-2SLS Adj. R-squared: 0.1028
No. Observations: 428 F-statistic: 76.080
Date: Mon, Dec 11 2023 P-value (F-stat) 0.0000
Time: 22:33:00 Distribution: chi2(4)
Cov. Estimator: unadjusted
Parameter Estimates
==============================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
------------------------------------------------------------------------------
Intercept -0.6928 0.3048 -2.2729 0.0230 -1.2902 -0.0954
educ 0.1111 0.0152 7.2902 0.0000 0.0812 0.1410
exper 0.0326 0.0180 1.8182 0.0690 -0.0025 0.0678
expersq -0.0007 0.0004 -1.5374 0.1242 -0.0015 0.0002
hours 0.0002 0.0002 0.7509 0.4527 -0.0003 0.0006
==============================================================================
Endogenous: hours
Instruments: age, kidslt6, kidsge6, nwifeinc
Unadjusted Covariance (Homoskedastic)
Debiased: False
Example 9.6 - Nonlinear Labor Supply Function#
lwagesq =df2['lwage'] ** 2
educsq = df2['educ'] ** 2
agesq = df2['age'] ** 2
nwifeincsq = df2['nwifeinc'] ** 2
iv2reg=IV2SLS.from_formula(
'hours ~ 1+ [lwage + lwagesq ~ exper + expersq + educsq+ agesq+ nwifeincsq] + educ + age + kidslt6 + kidsge6 + nwifeinc',
data=df2).fit(cov_type='unadjusted')
print(iv2reg)
IV-2SLS Estimation Summary
==============================================================================
Dep. Variable: hours R-squared: -1.2003
Estimator: IV-2SLS Adj. R-squared: -1.2370
No. Observations: 428 F-statistic: 25.272
Date: Mon, Dec 11 2023 P-value (F-stat) 0.0007
Time: 22:33:00 Distribution: chi2(7)
Cov. Estimator: unadjusted
Parameter Estimates
==============================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
------------------------------------------------------------------------------
Intercept 1657.9 769.98 2.1532 0.0313 148.80 3167.1
educ -87.851 65.770 -1.3357 0.1816 -216.76 41.056
age -9.1423 8.4929 -1.0765 0.2817 -25.788 7.5035
kidslt6 -185.06 160.76 -1.1511 0.2497 -500.13 130.02
kidsge6 -58.189 49.634 -1.1724 0.2410 -155.47 39.092
nwifeinc -7.2334 5.7512 -1.2577 0.2085 -18.506 4.0388
lwage 1873.6 630.02 2.9739 0.0029 638.81 3108.4
lwagesq -437.29 346.79 -1.2610 0.2073 -1117.0 242.40
==============================================================================
Endogenous: lwage, lwagesq
Instruments: exper, expersq, educsq, agesq, nwifeincsq
Unadjusted Covariance (Homoskedastic)
Debiased: False
uhat2=iv2reg.resids
ureg2=smf.ols(
'uhat2 ~ educ + age + kidslt6 + kidsge6 + nwifeinc + exper + expersq + educsq + agesq + nwifeincsq',
data = df2).fit()
print(ureg2.summary())
OLS Regression Results
==============================================================================
Dep. Variable: uhat2 R-squared: 0.006
Model: OLS Adj. R-squared: -0.018
Method: Least Squares F-statistic: 0.2561
Date: Mon, 11 Dec 2023 Prob (F-statistic): 0.990
Time: 22:33:00 Log-Likelihood: -3622.4
No. Observations: 428 AIC: 7267.
Df Residuals: 417 BIC: 7311.
Df Model: 10
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept -1172.0415 2162.256 -0.542 0.588 -5422.322 3078.239
educ 128.0985 184.034 0.696 0.487 -233.652 489.849
age 22.7941 87.724 0.260 0.795 -149.643 195.231
kidslt6 7.4512 156.743 0.048 0.962 -300.653 315.555
kidsge6 -1.1828 50.092 -0.024 0.981 -99.647 97.281
nwifeinc 5.7645 14.081 0.409 0.682 -21.914 33.443
exper -20.3693 24.307 -0.838 0.403 -68.148 27.410
expersq 0.8696 0.739 1.178 0.240 -0.582 2.321
educsq -5.0282 7.113 -0.707 0.480 -19.011 8.955
agesq -0.3232 1.028 -0.314 0.753 -2.344 1.698
nwifeincsq -0.0699 0.207 -0.338 0.736 -0.477 0.337
==============================================================================
Omnibus: 341.641 Durbin-Watson: 1.977
Prob(Omnibus): 0.000 Jarque-Bera (JB): 7740.086
Skew: 3.239 Prob(JB): 0.00
Kurtosis: 22.800 Cond. No. 7.80e+04
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 7.8e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
LM2 = ureg2.nobs * ureg2.rsquared
p2 = chi2.sf(LM2, df=3)
print("LM2 =", round(LM2,3), "and P =", round(p2,3))
LM2 = 2.612 and P = 0.455