Introductory Econometrics Using R

Also covered using Python and Stata

library(wooldridge)
library(stargazer)
library(AER)
library(dynlm)

Example 18.1. Housing Investment and Residential Price Inflation

tshseinv <- ts(hseinv)
reg1 <- dynlm(linvpc ~ t, data = tshseinv)
stargazer(reg1, no.space=TRUE, type="text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               linvpc           
## -----------------------------------------------
## t                            0.008***          
##                               (0.002)          
## Constant                     -0.841***         
##                               (0.045)          
## -----------------------------------------------
## Observations                    42             
## R2                             0.335           
## Adjusted R2                    0.319           
## Residual Std. Error       0.142 (df = 40)      
## F Statistic           20.190*** (df = 1; 40)   
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
u <- resid(reg1)
RationalDL <- dynlm( u ~ gprice + L(u, 1) +  L(gprice, 1), data = tshseinv)
GeometricDL <- dynlm( u ~ gprice + L(u, 1), data = tshseinv)

stargazer(GeometricDL, RationalDL, title="Table 18.1 Distributed Lag Models for Housing Investment: log(invpc)", column.labels=c("GeometricDL", "RationalDL"), no.space=TRUE, type="text")
## 
## Table 18.1 Distributed Lag Models for Housing Investment: log(invpc)
## =================================================================
##                                  Dependent variable:             
##                     ---------------------------------------------
##                                           u                      
##                          GeometricDL             RationalDL      
##                              (1)                    (2)          
## -----------------------------------------------------------------
## gprice                     3.095***               3.256***       
##                            (0.933)                (0.970)        
## L(u, 1)                    0.340**                0.547***       
##                            (0.132)                (0.152)        
## L(gprice, 1)                                     -2.936***       
##                                                   (0.973)        
## Constant                    -0.010                 0.006         
##                            (0.018)                (0.017)        
## -----------------------------------------------------------------
## Observations                  41                     40          
## R2                          0.407                  0.542         
## Adjusted R2                 0.375                  0.504         
## Residual Std. Error    0.111 (df = 38)        0.100 (df = 36)    
## F Statistic         13.022*** (df = 2; 38) 14.200*** (df = 3; 36)
## =================================================================
## Note:                                 *p<0.1; **p<0.05; ***p<0.01

Example 18.2. Unit Root Test for Three-Month T-Bill Rates

tsintqrt <- ts(intqrt)
uroot1 <- dynlm(cr3 ~ L(r3, 1), data = tsintqrt)
rho <- 1 + uroot1$coef[2]
rho
##  L(r3, 1) 
## 0.9092894
uroot2 <- dynlm(r3 ~ L(r3, 1), data = tsintqrt)
stargazer(uroot1, uroot2, no.space=TRUE, type="text")
## 
## ===========================================================
##                                    Dependent variable:     
##                                ----------------------------
##                                     cr3            r3      
##                                     (1)            (2)     
## -----------------------------------------------------------
## L(r3, 1)                          -0.091**      0.909***   
##                                   (0.037)        (0.037)   
## Constant                          0.625**        0.625**   
##                                   (0.261)        (0.261)   
## -----------------------------------------------------------
## Observations                        123            123     
## R2                                 0.048          0.836    
## Adjusted R2                        0.040          0.834    
## Residual Std. Error (df = 121)     1.228          1.228    
## F Statistic (df = 1; 121)         6.116**      614.595***  
## ===========================================================
## Note:                           *p<0.1; **p<0.05; ***p<0.01

Example 18.3. Unit Root Test for Annual U.S. Inflation

tsphillips <- ts(subset(phillips, phillips$year<1997))

uroot3 <- dynlm(cinf ~ inf_1 + L(cinf, 1) , data = tsphillips)
rho1 <- 1 + uroot3$coef[2]
rho1
##     inf_1 
## 0.6896748
reg2 <- dynlm(cinf ~ L(inf,1), data = tsphillips)
rho2 <- 1 + reg2$coef[2]
rho2
## L(inf, 1) 
## 0.6652586
stargazer(uroot3, reg2, no.space=TRUE, type="text")
## 
## ==============================================================
##                                Dependent variable:            
##                     ------------------------------------------
##                                        cinf                   
##                             (1)                   (2)         
## --------------------------------------------------------------
## inf_1                    -0.310***                            
##                           (0.103)                             
## L(cinf, 1)                 0.138                              
##                           (0.126)                             
## L(inf, 1)                                      -0.335***      
##                                                 (0.107)       
## Constant                  1.361**               1.277**       
##                           (0.517)               (0.558)       
## --------------------------------------------------------------
## Observations                 47                   48          
## R2                         0.172                 0.175        
## Adjusted R2                0.134                 0.158        
## Residual Std. Error   2.050 (df = 44)       2.356 (df = 46)   
## F Statistic         4.568** (df = 2; 44) 9.790*** (df = 1; 46)
## ==============================================================
## Note:                              *p<0.1; **p<0.05; ***p<0.01

Example 18.4. Unit Root in the Log of U.S. Real Gross Domestic Product

tsinven <- ts(inven)
gdp_reg <- dynlm(ggdp ~ trend(tsinven) + L(log(gdp), 1) + L(ggdp, 1), data = tsinven)
rho_g1 <- 1 + gdp_reg$coef[3]
rho_g1
## L(log(gdp), 1) 
##      0.7903792
gdp_reg2 <- dynlm(ggdp ~ L(log(gdp), 1) + L(ggdp, 1), data = tsinven)
rho_g2 <- 1 + gdp_reg2$coef[2]
rho_g2
## L(log(gdp), 1) 
##      0.9773125
stargazer(gdp_reg, gdp_reg2, no.space=TRUE, type="text")
## 
## ============================================================
##                               Dependent variable:           
##                     ----------------------------------------
##                                       ggdp                  
##                             (1)                  (2)        
## ------------------------------------------------------------
## trend(tsinven)            0.006**                           
##                           (0.003)                           
## L(log(gdp), 1)            -0.210**             -0.023*      
##                           (0.087)              (0.012)      
## L(ggdp, 1)                 0.264                0.167       
##                           (0.165)              (0.168)      
## Constant                  1.651**              0.215**      
##                           (0.666)              (0.100)      
## ------------------------------------------------------------
## Observations                 35                  35         
## R2                         0.268                0.156       
## Adjusted R2                0.197                0.103       
## Residual Std. Error   0.020 (df = 31)      0.021 (df = 32)  
## F Statistic         3.783** (df = 3; 31) 2.959* (df = 2; 32)
## ============================================================
## Note:                            *p<0.1; **p<0.05; ***p<0.01

Example 18.5. Cointegration between Fertility and Personal Exemption

tsfertil3 <- ts(fertil3)
freg1 <- dynlm(gfr ~ t + pe, data=tsfertil3 )
summary(freg1)
## 
## Time series regression with "ts" data:
## Start = 1, End = 72
## 
## Call:
## dynlm(formula = gfr ~ t + pe, data = tsfertil3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.659  -9.934   1.841  11.027  22.882 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 109.93016    3.47526  31.632  < 2e-16 ***
## t            -0.90519    0.10899  -8.305 5.53e-12 ***
## pe            0.18666    0.03463   5.391 9.23e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.2 on 69 degrees of freedom
## Multiple R-squared:  0.5002, Adjusted R-squared:  0.4857 
## F-statistic: 34.53 on 2 and 69 DF,  p-value: 4.064e-11
uf <- resid(freg1)

freg2 <- dynlm(cgfr ~ cpe, data=tsfertil3 ) # Regression in levels
summary(freg2)
## 
## Time series regression with "ts" data:
## Start = 2, End = 72
## 
## Call:
## dynlm(formula = cgfr ~ cpe, data = tsfertil3)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.980 -2.552 -0.377  1.866 14.854 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.78478    0.50204  -1.563    0.123
## cpe         -0.04268    0.02837  -1.504    0.137
## 
## Residual standard error: 4.221 on 69 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.03176,    Adjusted R-squared:  0.01773 
## F-statistic: 2.263 on 1 and 69 DF,  p-value: 0.137
Regression in levels with a single lag & time trend, manually
cu <- uf - lag(uf, -1)
freg3 <- dynlm(cu ~ L(u, 1) + L(cu, 1) + t, data=tsfertil3)
summary(freg3)
## 
## Time series regression with "ts" data:
## Start = 3, End = 43
## 
## Call:
## dynlm(formula = cu ~ L(u, 1) + L(cu, 1) + t, data = tsfertil3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.139  -2.752  -1.105   1.881  24.343 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.18791    2.49481  -1.278    0.209
## L(u, 1)     -3.18030    8.13220  -0.391    0.698
## L(cu, 1)     0.09839    0.16400   0.600    0.552
## t            0.13609    0.09699   1.403    0.169
## 
## Residual standard error: 7.145 on 37 degrees of freedom
## Multiple R-squared:  0.07913,    Adjusted R-squared:  0.004463 
## F-statistic:  1.06 on 3 and 37 DF,  p-value: 0.3779
First difference regression, with two lags (equation 11.27)
freg4 <- dynlm(cgfr ~ cpe + L(cpe, 1) +  L(cpe, 2), data = tsfertil3)
summary(freg4)
## 
## Time series regression with "ts" data:
## Start = 4, End = 72
## 
## Call:
## dynlm(formula = cgfr ~ cpe + L(cpe, 1) + L(cpe, 2), data = tsfertil3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.8307 -2.1842 -0.1912  1.8442 11.4506 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.96368    0.46776  -2.060  0.04339 *  
## cpe         -0.03620    0.02677  -1.352  0.18101    
## L(cpe, 1)   -0.01397    0.02755  -0.507  0.61385    
## L(cpe, 2)    0.10999    0.02688   4.092  0.00012 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.859 on 65 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2325, Adjusted R-squared:  0.1971 
## F-statistic: 6.563 on 3 and 65 DF,  p-value: 0.0006054

Example 18.6. Cointegrating Parameter for Interest Rates

tsintqrt <- ts(intqrt)
coreg1 <- dynlm(r6 ~ r3 + cr3 + L(cr3, 1) + L(cr3, 2) + L(cr3, -1) + L(cr3, -2), data = tsintqrt)
summary(coreg1)
## 
## Time series regression with "ts" data:
## Start = 4, End = 122
## 
## Call:
## dynlm(formula = r6 ~ r3 + cr3 + L(cr3, 1) + L(cr3, 2) + L(cr3, 
##     -1) + L(cr3, -2), data = tsintqrt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.95221 -0.13700 -0.02553  0.11817  0.99889 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.065146   0.056952   1.144  0.25512    
## r3           1.038171   0.008077 128.529  < 2e-16 ***
## cr3         -0.053123   0.019441  -2.733  0.00730 ** 
## L(cr3, 1)   -0.061137   0.019043  -3.210  0.00173 ** 
## L(cr3, 2)   -0.043778   0.018903  -2.316  0.02239 *  
## L(cr3, -1)  -0.003572   0.019122  -0.187  0.85215    
## L(cr3, -2)   0.012366   0.018970   0.652  0.51582    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2455 on 112 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.9942, Adjusted R-squared:  0.9938 
## F-statistic:  3176 on 6 and 112 DF,  p-value: < 2.2e-16
#Test serial correlation
uc <- resid(coreg1)
coreg2 <- dynlm(r6 ~ r3 + cr3 + L(cr3, 1) + L(cr3, 2) + L(cr3, -1) + L(cr3, -2) + L(uc, 1), data = tsintqrt)
summary(coreg2)
## 
## Time series regression with "ts" data:
## Start = 5, End = 122
## 
## Call:
## dynlm(formula = r6 ~ r3 + cr3 + L(cr3, 1) + L(cr3, 2) + L(cr3, 
##     -1) + L(cr3, -2) + L(uc, 1), data = tsintqrt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.94525 -0.12513 -0.03182  0.11479  1.01592 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.067599   0.057711   1.171  0.24399    
## r3           1.037809   0.008159 127.198  < 2e-16 ***
## cr3         -0.052997   0.019539  -2.712  0.00776 ** 
## L(cr3, 1)   -0.060406   0.019224  -3.142  0.00215 ** 
## L(cr3, 2)   -0.043814   0.018991  -2.307  0.02292 *  
## L(cr3, -1)  -0.003852   0.019213  -0.200  0.84148    
## L(cr3, -2)   0.010436   0.019203   0.543  0.58792    
## L(uc, 1)     0.103944   0.096191   1.081  0.28224    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2464 on 110 degrees of freedom
## Multiple R-squared:  0.9942, Adjusted R-squared:  0.9938 
## F-statistic:  2692 on 7 and 110 DF,  p-value: < 2.2e-16
summary(dynlm(uc ~ L(uc, 1)))
## 
## Time series regression with "ts" data:
## Start = 5, End = 122
## 
## Call:
## dynlm(formula = uc ~ L(uc, 1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9384 -0.1248 -0.0324  0.1139  1.0145 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.415e-05  2.209e-02   0.002    0.998
## L(uc, 1)    1.032e-01  9.340e-02   1.105    0.272
## 
## Residual standard error: 0.24 on 116 degrees of freedom
## Multiple R-squared:  0.01041,    Adjusted R-squared:  0.00188 
## F-statistic:  1.22 on 1 and 116 DF,  p-value: 0.2716
# Compare with Simple OLS
summary(lm(r6 ~ r3, data = intqrt))
## 
## Call:
## lm(formula = r6 ~ r3, data = intqrt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1379 -0.1583 -0.0251  0.1155  1.1803 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.135374   0.054867   2.467    0.015 *  
## r3          1.025899   0.007709 133.081   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2584 on 122 degrees of freedom
## Multiple R-squared:  0.9932, Adjusted R-squared:  0.9931 
## F-statistic: 1.771e+04 on 1 and 122 DF,  p-value: < 2.2e-16

Example 18.7. Error Correction Model for Holding Yields

tsintqrt <- ts(intqrt)
erreg <- dynlm(chy6 ~ L(chy3, 1) + L(hy6hy3_1, 1), data = tsintqrt)
stargazer(erreg, no.space=TRUE, type="text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                chy6            
## -----------------------------------------------
## L(chy3, 1)                   1.218***          
##                               (0.264)          
## L(hy6hy3_1, 1)               -0.840***         
##                               (0.244)          
## Constant                      0.090**          
##                               (0.043)          
## -----------------------------------------------
## Observations                    122            
## R2                             0.790           
## Adjusted R2                    0.786           
## Residual Std. Error      0.340 (df = 119)      
## F Statistic          223.789*** (df = 2; 119)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

Example 18.8. Forecasting the U.S. Unemployment Rate

tsphillips <- ts(phillips)
reg1 <- lm(unem ~ unem_1, data = tsphillips)
forreg1 <- dynlm(unem ~ unem_1 + inf_1, data = tsphillips)

forcast97 <- forreg1$coef[1] + forreg1$coef[2]*5.4 + forreg1$coef[3]*3 
forcast97 # Forcast of unemp for 1997
## (Intercept) 
##    5.352627
unem_1f <- phillips$unem_1 -  5.4
inf_1f <- phillips$inf_1 -  3
phillips2 <- (cbind (phillips, unem_1f, inf_1f))
reg2 <- lm(unem ~ unem_1f + inf_1f, data = phillips2)
reg2
## 
## Call:
## lm(formula = unem ~ unem_1f + inf_1f, data = phillips2)
## 
## Coefficients:
## (Intercept)      unem_1f       inf_1f  
##      5.3526       0.6495       0.1830
stargazer(reg1, forreg1, reg2, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
## 
## ==========================================
##                   Dependent variable:     
##              -----------------------------
##                          unem             
##                 OLS     dynamic     OLS   
##                         linear            
##                 (1)       (2)       (3)   
## ------------------------------------------
## unem_1       0.742***  0.649***           
##               (0.089)   (0.078)           
## inf_1                  0.183***           
##                         (0.039)           
## unem_1f                          0.649*** 
##                                   (0.078) 
## inf_1f                           0.183*** 
##                                   (0.039) 
## Constant     1.490***  1.296***  5.353*** 
##               (0.520)   (0.441)   (0.119) 
## ------------------------------------------
## Observations    55        55        55    
## R2             0.566     0.696     0.696  
## Adjusted R2    0.558     0.685     0.685  
## ==========================================
## Note:          *p<0.1; **p<0.05; ***p<0.01

Example 18.9. Out-of-Sample Comparisons of Unemployment Forecasts

phillips2 <- subset(phillips, phillips$year<1997)
oreg <- lm(unem ~ unem_1, data = phillips2)
RMSE <- function(error) { sqrt(mean(error^2))}
MSE <- function(error) { mean(abs(error))}
RMSE(oreg$residuals)
## [1] 1.026491
MSE(oreg$residuals)
## [1] 0.8129182
summary(abs(resid(oreg)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.09675 0.29792 0.72397 0.81292 1.05564 2.82708
oreg2 <- lm(unem ~ unem_1 + inf_1, data = phillips2)
RMSE(oreg2$residuals)
## [1] 0.8549463
MSE(oreg2$residuals)
## [1] 0.6493123
summary(abs(resid(oreg2)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0124  0.2533  0.5500  0.6493  0.8548  2.1730
stargazer(oreg, oreg2, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
## 
## =========================================
##                  Dependent variable:     
##              ----------------------------
##                          unem            
##                   (1)            (2)     
## -----------------------------------------
## unem_1          0.732***      0.647***   
##                 (0.097)        (0.084)   
## inf_1                         0.184***   
##                                (0.041)   
## Constant        1.572***       1.304**   
##                 (0.577)        (0.490)   
## -----------------------------------------
## Observations       48            48      
## R2               0.554          0.691    
## Adjusted R2      0.544          0.677    
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01

Example 18.10. Two-Year-Ahead Forecast for the Unemployment Rate

phillips2 <- subset(phillips, phillips$year<1997)
ifreg <- lm(inf ~ inf_1 , data = phillips2)
ifreg2 <- lm(inf ~ inf_1 + unem_1, data = phillips2)

stargazer(ifreg, ifreg2, keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, type="text")
## 
## =========================================
##                  Dependent variable:     
##              ----------------------------
##                          inf             
##                   (1)            (2)     
## -----------------------------------------
## inf_1           0.665***      0.659***   
##                 (0.107)        (0.111)   
## unem_1                          0.057    
##                                (0.226)   
## Constant        1.277**         0.974    
##                 (0.558)        (1.320)   
## -----------------------------------------
## Observations       48            48      
## R2               0.457          0.457    
## Adjusted R2      0.445          0.433    
## =========================================
## Note:         *p<0.1; **p<0.05; ***p<0.01
inf96 <- subset(phillips2$inf,phillips2$year==1996)
inf97 <- ifreg$coef[1] + ifreg$coef[2]*inf96
mean(inf97)
## [1] 3.272426
oreg2 <- lm(unem ~ unem_1 + inf_1, data = phillips2)
unem96 <- subset(phillips2$unem,phillips2$year==1996)
unem97 <- oreg2$coef[1] + oreg2$coef[2]*unem96 + oreg2$coef[3]*inf96
unem98 <- (oreg2$coef[1] + oreg2$coef[2]*unem97 + oreg2$coef[3]*inf97)
mean(unem98)
## [1] 5.365136