Introductory Econometrics Using R

Also covered using Python and Stata

library(wooldridge)
library(stargazer)
library(lmtest)
library(car)
options(width=120)

Example 8.1. Log wage equation with Heteroskedasticity

marrmale <- (wage1$female ==0 & wage1$married == 1)
marrfem <- (wage1$female ==1 & wage1$married == 1)
singfem <- (wage1$female ==1 & wage1$married == 0)
singmale <- (wage1$female ==0 & wage1$married == 0)
wage1c <- cbind(wage1, marrmale, marrfem, singfem, singmale)
wage_hetr <- lm(lwage ~ marrmale + marrfem + singfem + educ  + exper + expersq + tenure + tenursq + 1, data=wage1c)
yhat <- predict(wage_hetr)
residuals <- resid(wage_hetr)
plot(yhat, residuals, xlab="fitted values", ylab="residuals")

wage_robust <- coeftest(wage_hetr, vcov = hccm )
stargazer(wage_hetr, wage_robust, no.space=TRUE, type="text")
## 
## =======================================================
##                             Dependent variable:        
##                     -----------------------------------
##                              lwage                     
##                               OLS           coefficient
##                                                test    
##                               (1)               (2)    
## -------------------------------------------------------
## marrmale                   0.213***          0.213***  
##                             (0.055)           (0.058)  
## marrfem                    -0.198***         -0.198*** 
##                             (0.058)           (0.060)  
## singfem                    -0.110**           -0.110*  
##                             (0.056)           (0.058)  
## educ                       0.079***          0.079***  
##                             (0.007)           (0.008)  
## exper                      0.027***          0.027***  
##                             (0.005)           (0.005)  
## expersq                    -0.001***         -0.001*** 
##                            (0.0001)          (0.0001)  
## tenure                     0.029***          0.029***  
##                             (0.007)           (0.007)  
## tenursq                    -0.001**           -0.001*  
##                            (0.0002)          (0.0003)  
## Constant                   0.321***          0.321***  
##                             (0.100)           (0.112)  
## -------------------------------------------------------
## Observations                  526                      
## R2                           0.461                     
## Adjusted R2                  0.453                     
## Residual Std. Error    0.393 (df = 517)                
## F Statistic         55.246*** (df = 8; 517)            
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01

Example 8.2. Heteroskedasticity-Robust F Statistic

gpa3b <- subset(gpa3, gpa3$spring==1) 
gpa_hetr <- lm(cumgpa  ~ sat + hsperc + tothrs + female + black + white + 1, data=gpa3b) 
yhat <- predict(gpa_hetr)
residuals <- resid(gpa_hetr)
plot(yhat, residuals, xlab="fitted values", ylab="residuals")

gpa_robust <- coeftest(gpa_hetr, vcov = hccm)
stargazer(gpa_hetr, gpa_robust, no.space=TRUE, type="text")
## 
## =======================================================
##                             Dependent variable:        
##                     -----------------------------------
##                             cumgpa                     
##                               OLS           coefficient
##                                                test    
##                               (1)               (2)    
## -------------------------------------------------------
## sat                        0.001***          0.001***  
##                            (0.0002)          (0.0002)  
## hsperc                     -0.009***         -0.009*** 
##                             (0.001)           (0.001)  
## tothrs                     0.003***          0.003***  
##                             (0.001)           (0.001)  
## female                     0.303***          0.303***  
##                             (0.059)           (0.060)  
## black                       -0.128            -0.128   
##                             (0.147)           (0.128)  
## white                       -0.059            -0.059   
##                             (0.141)           (0.120)  
## Constant                   1.470***          1.470***  
##                             (0.230)           (0.229)  
## -------------------------------------------------------
## Observations                  366                      
## R2                           0.401                     
## Adjusted R2                  0.391                     
## Residual Std. Error    0.469 (df = 359)                
## F Statistic         39.982*** (df = 6; 359)            
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01

Example 8.3. Heteroskedasticity-Robust LM Statistic

avgsensq <- (crime1$avgsen)**2
df <- cbind(crime1, avgsensq)
crime_hetr <- lm(narr86 ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
yhat <- predict(crime_hetr)
residuals <- resid(crime_hetr)
plot(yhat, residuals, xlab="fitted values", ylab="residuals")

crime_robust <- coeftest(crime_hetr, vcov = hccm)
stargazer(crime_hetr, crime_robust, no.space=TRUE, type="text")
## 
## ========================================================
##                             Dependent variable:         
##                     ------------------------------------
##                              narr86                     
##                               OLS            coefficient
##                                                 test    
##                               (1)                (2)    
## --------------------------------------------------------
## pcnv                       -0.136***          -0.136*** 
##                             (0.040)            (0.034)  
## avgsen                       0.018*            0.018*   
##                             (0.010)            (0.010)  
## avgsensq                    -0.001*           -0.001**  
##                             (0.0003)          (0.0002)  
## ptime86                    -0.039***          -0.039*** 
##                             (0.009)            (0.006)  
## qemp86                     -0.051***          -0.051*** 
##                             (0.014)            (0.014)  
## inc86                      -0.001***          -0.001*** 
##                             (0.0003)          (0.0002)  
## black                       0.325***          0.325***  
##                             (0.045)            (0.059)  
## hispan                      0.193***          0.193***  
##                             (0.040)            (0.040)  
## Constant                    0.567***          0.567***  
##                             (0.036)            (0.040)  
## --------------------------------------------------------
## Observations                 2,725                      
## R2                           0.073                      
## Adjusted R2                  0.070                      
## Residual Std. Error    0.828 (df = 2716)                
## F Statistic         26.655*** (df = 8; 2716)            
## ========================================================
## Note:                        *p<0.1; **p<0.05; ***p<0.01
LM Statistic - not-robust (See Section 5-2)
crime_hetr_r <- lm(narr86 ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)

residuals <- resid(crime_hetr_r)

resid_reg <- lm(residuals ~ pcnv + avgsen + avgsensq + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
stargazer(crime_hetr_r, resid_reg, no.space=TRUE, type="text")
## 
## =================================================================
##                                  Dependent variable:             
##                     ---------------------------------------------
##                              narr86               residuals      
##                               (1)                    (2)         
## -----------------------------------------------------------------
## pcnv                       -0.132***                -0.003       
##                             (0.040)                (0.040)       
## avgsen                                              0.018*       
##                                                    (0.010)       
## avgsensq                                           -0.001*       
##                                                    (0.0003)      
## ptime86                    -0.038***                -0.002       
##                             (0.008)                (0.009)       
## qemp86                     -0.051***                0.0005       
##                             (0.014)                (0.014)       
## inc86                      -0.001***               0.00001       
##                             (0.0003)               (0.0003)      
## black                       0.330***                -0.005       
##                             (0.045)                (0.045)       
## hispan                      0.195***                -0.002       
##                             (0.040)                (0.040)       
## Constant                    0.570***                -0.003       
##                             (0.036)                (0.036)       
## -----------------------------------------------------------------
## Observations                 2,725                  2,725        
## R2                           0.072                  0.001        
## Adjusted R2                  0.070                  -0.002       
## Residual Std. Error    0.829 (df = 2718)      0.828 (df = 2716)  
## F Statistic         34.946*** (df = 6; 2718) 0.432 (df = 8; 2716)
## =================================================================
## Note:                                 *p<0.1; **p<0.05; ***p<0.01
LM <- nobs(resid_reg)*summary(resid_reg)$r.squared
LM
## [1] 3.462601
LM Statistic - robust
avgsen_reg <- lm(avgsen ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
resid_avg <- resid(avgsen_reg)*resid(crime_hetr_r)
avgsensq_reg <- lm(avgsensq ~ pcnv + ptime86 + qemp86 + inc86 + black + hispan + 1, data=df)
resid_avgsq <- resid(avgsensq_reg)*resid(crime_hetr_r)

one <- (df$avgsen>=0)
df2 <- cbind(resid_avg, resid_avgsq, as.data.frame(one))
lm_robust <- lm(one ~ resid_avg + resid_avgsq + 0, data=df2)
stargazer(lm_robust, no.space=TRUE, type="text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                 one            
## -----------------------------------------------
## resid_avg                     0.028**          
##                               (0.014)          
## resid_avgsq                   -0.001*          
##                               (0.001)          
## -----------------------------------------------
## Observations                   2,725           
## R2                             0.001           
## Adjusted R2                    0.001           
## Residual Std. Error      1.000 (df = 2723)     
## F Statistic            2.000 (df = 2; 2723)    
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
LM <- nobs(lm_robust)*summary(lm_robust)$r.squared
LM
## [1] 3.997085

Example 8.4. Heteroskedasticity in Housing Price Equations

hprice_hetr <- lm(price ~ lotsize + sqrft + bdrms + 1, data=hprice1)
uhat_sq <- resid(hprice_hetr)**2
uhat_sq_reg <- lm(uhat_sq ~ lotsize + sqrft + bdrms + 1, data=hprice1)
stargazer(uhat_sq_reg, no.space=TRUE, type="text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               uhat_sq          
## -----------------------------------------------
## lotsize                      0.202***          
##                               (0.071)          
## sqrft                          1.691           
##                               (1.464)          
## bdrms                        1,041.760         
##                              (996.381)         
## Constant                    -5,522.795*        
##                             (3,259.478)        
## -----------------------------------------------
## Observations                    88             
## R2                             0.160           
## Adjusted R2                    0.130           
## Residual Std. Error     6,616.646 (df = 84)    
## F Statistic            5.339*** (df = 3; 84)   
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
LM <- nobs(uhat_sq_reg)*summary(uhat_sq_reg)$r.squared
LM
## [1] 14.09239
hprice_hetr_log <- lm(lprice ~ llotsize + lsqrft + bdrms + 1, data=hprice1)
uhat_sq_log <- resid(hprice_hetr_log)**2
uhat_sq_reg_log <- lm(uhat_sq_log ~ llotsize + lsqrft + bdrms + 1, data=hprice1)
stargazer(uhat_sq_reg_log, no.space=TRUE, type="text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                             uhat_sq_log        
## -----------------------------------------------
## llotsize                      -0.007           
##                               (0.015)          
## lsqrft                        -0.063*          
##                               (0.037)          
## bdrms                          0.017           
##                               (0.011)          
## Constant                      0.510*           
##                               (0.258)          
## -----------------------------------------------
## Observations                    88             
## R2                             0.048           
## Adjusted R2                    0.014           
## Residual Std. Error       0.073 (df = 84)      
## F Statistic             1.412 (df = 3; 84)     
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
LM <- nobs(uhat_sq_reg_log)*summary(uhat_sq_reg_log)$r.squared
LM
## [1] 4.223248

Example 8.5. Special Form of the White Test

yhat <- predict(hprice_hetr_log)
yhat_sq <- yhat**2
special_reg <- lm(uhat_sq_log ~ yhat + yhat_sq + 1, data=hprice1)
stargazer(special_reg, no.space=TRUE, type="text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                             uhat_sq_log        
## -----------------------------------------------
## yhat                          -1.709           
##                               (1.163)          
## yhat_sq                        0.145           
##                               (0.101)          
## Constant                       5.047           
##                               (3.345)          
## -----------------------------------------------
## Observations                    88             
## R2                             0.039           
## Adjusted R2                    0.017           
## Residual Std. Error       0.073 (df = 85)      
## F Statistic             1.733 (df = 2; 85)     
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
LM <- nobs(special_reg)*summary(special_reg)$r.squared
LM
## [1] 3.447286

Example 8.6. Financial Wealth Equation (& Table 8.2)

k401ksubs <- subset(k401ksubs, k401ksubs$fsize==1)
age25sq <- (k401ksubs$age - 25)**2

OLS1 <- lm(nettfa ~ inc + 1, data=k401ksubs)
OLS1R <- coeftest(OLS1, vcov = hccm)
OLS2 <- lm(nettfa ~ inc + age25sq + male + e401k + 1, data=k401ksubs)
OLS2R <- coeftest(OLS2, vcov = hccm)

w <- 1/k401ksubs$inc
WLS1 <- lm(nettfa ~ inc + 1, weights=w, data=k401ksubs)
WLS2 <- lm(nettfa ~ inc + age25sq + male + e401k+ 1, weights=w, data=k401ksubs)
WLS3 <- lm(nettfa ~ inc + age25sq + male + e401k+ 1, weights=w, data=k401ksubs) # Table 8.2

stargazer(OLS1R, OLS2R, WLS1, WLS2, WLS3, type="text", column.labels=c("OLS1R", "OLS2R", "WLS1", "WLS2", "WLS3"), keep.stat=c("n","rsq", "adj.rsq"), no.space=TRUE, align = TRUE)
## 
## ==================================================================
##                               Dependent variable:                 
##              -----------------------------------------------------
##                                                nettfa             
##                   coefficient                    OLS              
##                      test                                         
##                OLS1R      OLS2R      WLS1       WLS2       WLS3   
##                 (1)        (2)        (3)       (4)        (5)    
## ------------------------------------------------------------------
## inc           0.821***   0.771***  0.787***   0.740***   0.740*** 
##               (0.104)    (0.100)    (0.063)   (0.064)    (0.064)  
## age25sq                  0.025***             0.018***   0.018*** 
##                          (0.004)              (0.002)    (0.002)  
## male                      2.478                1.841      1.841   
##                          (2.065)              (1.564)    (1.564)  
## e401k                    6.886***             5.188***   5.188*** 
##                          (2.292)              (1.703)    (1.703)  
## Constant     -10.571*** -20.985*** -9.581*** -16.703*** -16.703***
##               (2.551)    (3.520)    (1.653)   (1.958)    (1.958)  
## ------------------------------------------------------------------
## Observations                         2,017     2,017      2,017   
## R2                                   0.071     0.112      0.112   
## Adjusted R2                          0.070     0.110      0.110   
## ==================================================================
## Note:                                  *p<0.1; **p<0.05; ***p<0.01

Example 8.7. Demand for Cigarettes

OLS_smk <- lm(cigs ~ lincome + lcigpric + educ + age + agesq + restaurn + 1, data=smoke)
stargazer(OLS_smk, no.space=TRUE, single.row = TRUE,  type="text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                cigs            
## -----------------------------------------------
## lincome                    0.880 (0.728)       
## lcigpric                  -0.751 (5.773)       
## educ                     -0.501*** (0.167)     
## age                      0.771*** (0.160)      
## agesq                    -0.009*** (0.002)     
## restaurn                 -2.825** (1.112)      
## Constant                  -3.640 (24.079)      
## -----------------------------------------------
## Observations                    807            
## R2                             0.053           
## Adjusted R2                    0.046           
## Residual Std. Error      13.405 (df = 800)     
## F Statistic           7.423*** (df = 6; 800)   
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
luhat_sq <- log(resid(OLS_smk)**2) 
luhat_sq_reg <- lm(luhat_sq ~ lincome + lcigpric + educ + age + agesq + restaurn + 1, data=smoke)
luhat_sq_hat <- exp(predict(luhat_sq_reg))
w = 1/luhat_sq_hat
GLS_smk <- lm(cigs ~ lincome + lcigpric + educ + age + agesq + restaurn + 1, weights = w, data=smoke)

stargazer(OLS_smk, GLS_smk, column.labels=c("OLS","GLS"), no.space=TRUE, type="text")
## 
## ===========================================================
##                                    Dependent variable:     
##                                ----------------------------
##                                            cigs            
##                                     OLS            GLS     
##                                     (1)            (2)     
## -----------------------------------------------------------
## lincome                            0.880        1.295***   
##                                   (0.728)        (0.437)   
## lcigpric                           -0.751        -2.940    
##                                   (5.773)        (4.460)   
## educ                             -0.501***      -0.463***  
##                                   (0.167)        (0.120)   
## age                               0.771***      0.482***   
##                                   (0.160)        (0.097)   
## agesq                            -0.009***      -0.006***  
##                                   (0.002)        (0.001)   
## restaurn                          -2.825**      -3.461***  
##                                   (1.112)        (0.796)   
## Constant                           -3.640         5.635    
##                                   (24.079)      (17.803)   
## -----------------------------------------------------------
## Observations                        807            807     
## R2                                 0.053          0.113    
## Adjusted R2                        0.046          0.107    
## Residual Std. Error (df = 800)     13.405         1.579    
## F Statistic (df = 6; 800)         7.423***      17.055***  
## ===========================================================
## Note:                           *p<0.1; **p<0.05; ***p<0.01

Example 8.8. Labor Force Participation of Married Women

mroz_hetr <- lm(inlf ~ nwifeinc + educ + exper + expersq + age + kidslt6 + kidsge6 + 1, data=mroz)
mroz_robust <- coeftest(mroz_hetr, vcov = hccm) 
stargazer(mroz_hetr, mroz_robust, column.labels=c("Herosced.","Robust"), no.space=TRUE, type="text")
## 
## =======================================================
##                             Dependent variable:        
##                     -----------------------------------
##                              inlf                      
##                               OLS           coefficient
##                                                test    
##                            Herosced.          Robust   
##                               (1)               (2)    
## -------------------------------------------------------
## nwifeinc                   -0.003**          -0.003**  
##                             (0.001)           (0.002)  
## educ                       0.038***          0.038***  
##                             (0.007)           (0.007)  
## exper                      0.039***          0.039***  
##                             (0.006)           (0.006)  
## expersq                    -0.001***         -0.001*** 
##                            (0.0002)          (0.0002)  
## age                        -0.016***         -0.016*** 
##                             (0.002)           (0.002)  
## kidslt6                    -0.262***         -0.262*** 
##                             (0.034)           (0.032)  
## kidsge6                      0.013             0.013   
##                             (0.013)           (0.014)  
## Constant                   0.586***          0.586***  
##                             (0.154)           (0.154)  
## -------------------------------------------------------
## Observations                  753                      
## R2                           0.264                     
## Adjusted R2                  0.257                     
## Residual Std. Error    0.427 (df = 745)                
## F Statistic         38.218*** (df = 7; 745)            
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01

Example 8.9. Determinants of Personal Computer Ownership

parcoll <- (gpa1$fathcoll==1 | gpa1$mothcoll==1)
gpa1 <- cbind(gpa1, parcoll)
gpa_hetr <- lm(PC ~ hsGPA + ACT + parcoll  + 1, data=gpa1)
gpa_robust <- coeftest(gpa_hetr, vcov = hccm) 

w <- 1/(predict(gpa_hetr) - (predict(gpa_hetr)**2))
gpa_gls = lm(PC ~ hsGPA + ACT + parcoll  + 1, weights = w, data=gpa1)

stargazer(gpa_hetr, gpa_robust, gpa_gls, column.labels=c("Herosced.","Robust", "GLS"), no.space=TRUE, type="text")
## 
## ============================================================
##                                     Dependent variable:     
##                                -----------------------------
##                                   PC                   PC   
##                                   OLS    coefficient   OLS  
##                                             test            
##                                Herosced.   Robust      GLS  
##                                   (1)        (2)       (3)  
## ------------------------------------------------------------
## hsGPA                            0.065      0.065     0.033 
##                                 (0.137)    (0.148)   (0.130)
## ACT                              0.001      0.001     0.004 
##                                 (0.015)    (0.017)   (0.015)
## parcoll                         0.221**    0.221**   0.215**
##                                 (0.093)    (0.090)   (0.086)
## Constant                        -0.0004    -0.0004    0.026 
##                                 (0.491)    (0.512)   (0.477)
## ------------------------------------------------------------
## Observations                      141                  141  
## R2                               0.042                0.046 
## Adjusted R2                      0.021                0.026 
## Residual Std. Error (df = 137)   0.486                1.016 
## F Statistic (df = 3; 137)        1.979               2.224* 
## ============================================================
## Note:                            *p<0.1; **p<0.05; ***p<0.01