Chapter 10. Basic Regression Analysis with Time Series Data#
import numpy as np
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from wooldridge import *
Example 10.1. Static Phillips Curve#
df = dataWoo("phillips")
df = df[(df['year']<1997)]
print(smf.ols('df.inf ~ unem + 1', data=df).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: df.inf R-squared: 0.053
Model: OLS Adj. R-squared: 0.033
Method: Least Squares F-statistic: 2.616
Date: Mon, 11 Dec 2023 Prob (F-statistic): 0.112
Time: 18:37:13 Log-Likelihood: -124.43
No. Observations: 49 AIC: 252.9
Df Residuals: 47 BIC: 256.6
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 1.4236 1.719 0.828 0.412 -2.035 4.882
unem 0.4676 0.289 1.617 0.112 -0.114 1.049
==============================================================================
Omnibus: 8.905 Durbin-Watson: 0.803
Prob(Omnibus): 0.012 Jarque-Bera (JB): 8.336
Skew: 0.979 Prob(JB): 0.0155
Kurtosis: 3.502 Cond. No. 23.5
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Example 10.2. Effects of Inflation and Deficits on Interest Rates#
df = dataWoo("intdef")
print(smf.ols('i3 ~ df.inf + df[("def")] + 1', data=df).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: i3 R-squared: 0.602
Model: OLS Adj. R-squared: 0.587
Method: Least Squares F-statistic: 40.09
Date: Mon, 11 Dec 2023 Prob (F-statistic): 2.48e-11
Time: 18:37:13 Log-Likelihood: -112.16
No. Observations: 56 AIC: 230.3
Df Residuals: 53 BIC: 236.4
Df Model: 2
Covariance Type: nonrobust
===============================================================================
coef std err t P>|t| [0.025 0.975]
-------------------------------------------------------------------------------
Intercept 1.7333 0.432 4.012 0.000 0.867 2.600
df.inf 0.6059 0.082 7.376 0.000 0.441 0.771
df[("def")] 0.5131 0.118 4.334 0.000 0.276 0.751
==============================================================================
Omnibus: 0.260 Durbin-Watson: 0.716
Prob(Omnibus): 0.878 Jarque-Bera (JB): 0.015
Skew: -0.028 Prob(JB): 0.992
Kurtosis: 3.058 Cond. No. 9.28
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Example 10.3. Puerto Rican Employment and the Minimum Wage#
df = dataWoo("prminwge")
print(smf.ols('lprepop ~ lmincov + lusgnp', data=df).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: lprepop R-squared: 0.660
Model: OLS Adj. R-squared: 0.641
Method: Least Squares F-statistic: 34.04
Date: Mon, 11 Dec 2023 Prob (F-statistic): 6.17e-09
Time: 18:37:13 Log-Likelihood: 57.376
No. Observations: 38 AIC: -108.8
Df Residuals: 35 BIC: -103.8
Df Model: 2
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept -1.0544 0.765 -1.378 0.177 -2.608 0.499
lmincov -0.1544 0.065 -2.380 0.023 -0.286 -0.023
lusgnp -0.0122 0.089 -0.138 0.891 -0.192 0.168
==============================================================================
Omnibus: 0.079 Durbin-Watson: 0.340
Prob(Omnibus): 0.961 Jarque-Bera (JB): 0.084
Skew: 0.073 Prob(JB): 0.959
Kurtosis: 2.822 Cond. No. 676.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Example 10.4. Effects of Personal Exemption on Fertility Rates#
df = dataWoo("fertil3")
regf = smf.ols('gfr ~ pe + ww2 + pill', data=df).fit()
tsregf = smf.ols('gfr ~ pe + pe_1 + pe_2 + ww2 + pill', data=df).fit()
print(summary_col([regf, tsregf],stars=True,float_format='%0.3f',
model_names=['Model I\n(b/se)','Model II\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
====================================
Model I Model II
(b/se) (b/se)
------------------------------------
Intercept 98.682*** 95.870***
(3.208) (3.282)
R-squared 0.473 0.499
R-squared Adj. 0.450 0.459
pe 0.083*** 0.073
(0.030) (0.126)
pe_1 -0.006
(0.156)
pe_2 0.034
(0.126)
pill -31.594*** -31.305***
(4.081) (3.982)
ww2 -24.238*** -22.126**
(7.458) (10.732)
N 72 70
R2 0.473 0.499
Adj.R2 0.450 0.459
====================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
hypotheses = '(pe = pe_1 = pe_2 = 0)'
f_test = tsregf.f_test(hypotheses)
print(f_test)
<F test: F=3.972964046978578, p=0.011652005303125972, df_denom=64, df_num=3>
Example 10.5. Antidumping Filings and Chemical Imports#
print(smf.ols('lchnimp ~lchempi + lgas + lrtwex + befile6 + affile6 + afdec6', data=dataWoo("barium")).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: lchnimp R-squared: 0.305
Model: OLS Adj. R-squared: 0.271
Method: Least Squares F-statistic: 9.064
Date: Mon, 11 Dec 2023 Prob (F-statistic): 3.25e-08
Time: 18:37:13 Log-Likelihood: -114.79
No. Observations: 131 AIC: 243.6
Df Residuals: 124 BIC: 263.7
Df Model: 6
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept -17.8030 21.045 -0.846 0.399 -59.458 23.852
lchempi 3.1172 0.479 6.505 0.000 2.169 4.066
lgas 0.1964 0.907 0.217 0.829 -1.598 1.991
lrtwex 0.9830 0.400 2.457 0.015 0.191 1.775
befile6 0.0596 0.261 0.228 0.820 -0.457 0.576
affile6 -0.0324 0.264 -0.123 0.903 -0.556 0.491
afdec6 -0.5652 0.286 -1.978 0.050 -1.131 0.001
==============================================================================
Omnibus: 9.160 Durbin-Watson: 1.458
Prob(Omnibus): 0.010 Jarque-Bera (JB): 9.978
Skew: -0.491 Prob(JB): 0.00681
Kurtosis: 3.930 Cond. No. 9.62e+03
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 9.62e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
100*(np.exp(-0.5652) -1)
-43.17535139053659
Example 10.6. Election Outcomes and Economic Performance#
df= dataWoo("fair")
df = df[(df['year']<1996)]
print(smf.ols('demvote ~ partyWH + incum + partyWH:gnews + partyWH:df.inf +1', data=df).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: demvote R-squared: 0.663
Model: OLS Adj. R-squared: 0.573
Method: Least Squares F-statistic: 7.374
Date: Mon, 11 Dec 2023 Prob (F-statistic): 0.00172
Time: 18:37:13 Log-Likelihood: 34.586
No. Observations: 20 AIC: -59.17
Df Residuals: 15 BIC: -54.19
Df Model: 4
Covariance Type: nonrobust
==================================================================================
coef std err t P>|t| [0.025 0.975]
----------------------------------------------------------------------------------
Intercept 0.4811 0.012 39.228 0.000 0.455 0.507
partyWH -0.0435 0.040 -1.075 0.300 -0.130 0.043
incum 0.0544 0.023 2.323 0.035 0.004 0.104
partyWH:gnews 0.0108 0.004 2.628 0.019 0.002 0.020
partyWH:df.inf -0.0077 0.003 -2.365 0.032 -0.015 -0.001
==============================================================================
Omnibus: 3.025 Durbin-Watson: 2.171
Prob(Omnibus): 0.220 Jarque-Bera (JB): 2.074
Skew: 0.787 Prob(JB): 0.355
Kurtosis: 2.891 Cond. No. 33.0
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Example 10.7. Housing Investment and Prices#
df = dataWoo("hseinv")
reghi = smf.ols('linvpc ~ lprice', data=df).fit()
reghit = smf.ols('linvpc ~ lprice + t', data=df).fit()
print(summary_col([reghi, reghit],stars=True,float_format='%0.3f',
model_names=['Model I\n(b/se)','Model II\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
==================================
Model I Model II
(b/se) (b/se)
----------------------------------
Intercept -0.550*** -0.913***
(0.043) (0.136)
R-squared 0.208 0.341
R-squared Adj. 0.189 0.307
lprice 1.241*** -0.381
(0.382) (0.679)
t 0.010***
(0.004)
N 42 42
R2 0.208 0.341
Adj.R2 0.189 0.307
==================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
df = dataWoo("fertil3")
regt = smf.ols('gfr ~ pe + ww2 + pill + t + 1', data=df).fit()
regtsq = smf.ols('gfr ~ pe + ww2 + pill + t + tsq', data=df).fit()
print(summary_col([regt, regtsq],stars=True,float_format='%0.3f',
model_names=['Model I\n(b/se)','Model II\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
====================================
Model I Model II
(b/se) (b/se)
------------------------------------
Intercept 111.769*** 124.092***
(3.358) (4.361)
R-squared 0.662 0.727
R-squared Adj. 0.642 0.706
pe 0.279*** 0.348***
(0.040) (0.040)
pill 0.997 -10.120
(6.262) (6.336)
t -1.150*** -2.531***
(0.188) (0.389)
tsq 0.020***
(0.005)
ww2 -35.592*** -35.880***
(6.297) (5.708)
N 72 72
R2 0.662 0.727
Adj.R2 0.642 0.706
====================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
Example 10.9. Puerto Rican Employment#
print(smf.ols('lprepop ~ lmincov + lusgnp + t', data=dataWoo("prminwge")).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: lprepop R-squared: 0.847
Model: OLS Adj. R-squared: 0.834
Method: Least Squares F-statistic: 62.78
Date: Mon, 11 Dec 2023 Prob (F-statistic): 6.01e-14
Time: 18:37:13 Log-Likelihood: 72.532
No. Observations: 38 AIC: -137.1
Df Residuals: 34 BIC: -130.5
Df Model: 3
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept -8.6963 1.296 -6.711 0.000 -11.330 -6.063
lmincov -0.1687 0.044 -3.813 0.001 -0.259 -0.079
lusgnp 1.0574 0.177 5.986 0.000 0.698 1.416
t -0.0324 0.005 -6.442 0.000 -0.043 -0.022
==============================================================================
Omnibus: 6.093 Durbin-Watson: 0.908
Prob(Omnibus): 0.048 Jarque-Bera (JB): 5.533
Skew: 0.935 Prob(JB): 0.0629
Kurtosis: 3.006 Cond. No. 4.99e+03
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 4.99e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
Example 10.10. Housing Investment#
df = dataWoo("hseinv")
print(smf.ols('linvpc ~ lprice + t', data=df).fit().summary())
OLS Regression Results
==============================================================================
Dep. Variable: linvpc R-squared: 0.341
Model: OLS Adj. R-squared: 0.307
Method: Least Squares F-statistic: 10.08
Date: Mon, 11 Dec 2023 Prob (F-statistic): 0.000296
Time: 18:37:13 Log-Likelihood: 23.459
No. Observations: 42 AIC: -40.92
Df Residuals: 39 BIC: -35.71
Df Model: 2
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept -0.9131 0.136 -6.733 0.000 -1.187 -0.639
lprice -0.3810 0.679 -0.561 0.578 -1.754 0.992
t 0.0098 0.004 2.798 0.008 0.003 0.017
==============================================================================
Omnibus: 4.879 Durbin-Watson: 1.049
Prob(Omnibus): 0.087 Jarque-Bera (JB): 3.978
Skew: -0.446 Prob(JB): 0.137
Kurtosis: 4.216 Cond. No. 770.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
uhat_reg = smf.ols('linvpc ~ t', data=df).fit()
uhat = df.linvpc - uhat_reg.predict()
detrend = smf.ols('uhat ~ lprice + t', data=df).fit()
trend = smf.ols('linvpc ~ lprice + t', data=df).fit()
print(summary_col([detrend, trend],stars=True,float_format='%0.3f',
model_names=['Model I\n(b/se)','Model II\n(b/se)'],
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),
'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))
================================
Model I Model II
(b/se) (b/se)
--------------------------------
Intercept -0.072 -0.913***
(0.136) (0.136)
lprice -0.381 -0.381
(0.679) (0.679)
t 0.002 0.010***
(0.004) (0.004)
R-squared 0.008 0.341
R-squared Adj. -0.043 0.307
N 42 42
R2 0.008 0.341
Adj.R2 -0.043 0.307
================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
Example 10.11. Effects of Antidumping Filings#
antid_month = smf.ols(
'lchnimp ~ lchempi + lgas + lrtwex + befile6 + affile6 + afdec6 + feb + mar + apr + may + jun + jul + aug + sep + oct + nov + dec + 1',
data=dataWoo("barium")).fit()
print(antid_month.summary())
OLS Regression Results
==============================================================================
Dep. Variable: lchnimp R-squared: 0.358
Model: OLS Adj. R-squared: 0.262
Method: Least Squares F-statistic: 3.712
Date: Mon, 11 Dec 2023 Prob (F-statistic): 1.28e-05
Time: 18:37:14 Log-Likelihood: -109.54
No. Observations: 131 AIC: 255.1
Df Residuals: 113 BIC: 306.8
Df Model: 17
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 16.7788 32.429 0.517 0.606 -47.468 81.026
lchempi 3.2651 0.493 6.624 0.000 2.288 4.242
lgas -1.2781 1.389 -0.920 0.359 -4.030 1.474
lrtwex 0.6630 0.471 1.407 0.162 -0.271 1.597
befile6 0.1397 0.267 0.524 0.602 -0.389 0.668
affile6 0.0126 0.279 0.045 0.964 -0.539 0.565
afdec6 -0.5213 0.302 -1.726 0.087 -1.120 0.077
feb -0.4177 0.304 -1.372 0.173 -1.021 0.185
mar 0.0591 0.265 0.223 0.824 -0.465 0.584
apr -0.4515 0.268 -1.682 0.095 -0.983 0.080
may 0.0333 0.269 0.124 0.902 -0.500 0.567
jun -0.2063 0.269 -0.766 0.445 -0.740 0.327
jul 0.0038 0.279 0.014 0.989 -0.548 0.556
aug -0.1571 0.278 -0.565 0.573 -0.708 0.394
sep -0.1342 0.268 -0.501 0.617 -0.664 0.396
oct 0.0517 0.267 0.194 0.847 -0.477 0.580
nov -0.2463 0.263 -0.937 0.351 -0.767 0.274
dec 0.1328 0.271 0.489 0.626 -0.405 0.671
==============================================================================
Omnibus: 9.169 Durbin-Watson: 1.325
Prob(Omnibus): 0.010 Jarque-Bera (JB): 9.324
Skew: -0.540 Prob(JB): 0.00945
Kurtosis: 3.736 Cond. No. 1.47e+04
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.47e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
hypotheses = '(feb = mar = apr = may = jun = jul = aug = sep = oct = nov = dec= 0)'
f_test = antid_month.f_test(hypotheses)
print(f_test)
<F test: F=0.8559432867177492, p=0.585200849291337, df_denom=113, df_num=11>
antid_season = smf.ols('lchnimp ~ lchempi + lgas + lrtwex + befile6 + affile6 + afdec6 + spr + sum + fall + 1',
data=dataWoo("barium")).fit()
print(antid_season.summary())
OLS Regression Results
==============================================================================
Dep. Variable: lchnimp R-squared: 0.310
Model: OLS Adj. R-squared: 0.258
Method: Least Squares F-statistic: 6.032
Date: Mon, 11 Dec 2023 Prob (F-statistic): 5.79e-07
Time: 18:37:14 Log-Likelihood: -114.33
No. Observations: 131 AIC: 248.7
Df Residuals: 121 BIC: 277.4
Df Model: 9
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept -26.5219 23.297 -1.138 0.257 -72.645 19.602
lchempi 3.0779 0.486 6.331 0.000 2.116 4.040
lgas 0.5651 1.000 0.565 0.573 -1.415 2.545
lrtwex 1.1015 0.425 2.594 0.011 0.261 1.942
befile6 0.0767 0.265 0.289 0.773 -0.448 0.601
affile6 -0.0833 0.273 -0.305 0.761 -0.623 0.457
afdec6 -0.6212 0.295 -2.103 0.038 -1.206 -0.036
spr -0.0412 0.151 -0.273 0.786 -0.341 0.258
sum -0.1519 0.169 -0.897 0.371 -0.487 0.183
fall -0.0673 0.154 -0.436 0.664 -0.373 0.239
==============================================================================
Omnibus: 8.751 Durbin-Watson: 1.439
Prob(Omnibus): 0.013 Jarque-Bera (JB): 9.596
Skew: -0.466 Prob(JB): 0.00825
Kurtosis: 3.943 Cond. No. 1.06e+04
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.06e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
hypotheses = '(spr = sum = fall= 0)'
f_test = antid_season.f_test(hypotheses)
print(f_test)
<F test: F=0.28224471031401444, p=0.8381333029196857, df_denom=121, df_num=3>