II Econometric Analysis Using R

Also available in Stata and Python versions

Chapter 4 - Single-Equation and OLS Estimation

Example4.1

Load libraries

library(wooldridge)
library(stargazer)
library(AER)
library(lmtest)
library(haven)

Wage equation for married working women

lwage_hetr <- lm(lwage ~ exper + expersq + educ + age + kidslt6 + kidsge6, data=mroz)
lwage_robust <- coeftest(lwage_hetr, vcovHC(lwage_hetr, type = "HC1") )
stargazer(lwage_hetr, lwage_robust, column.labels=c("Hetrosced.", "Robust SE"), no.space=TRUE, type="text")
## 
## =======================================================
##                             Dependent variable:        
##                     -----------------------------------
##                              lwage                     
##                               OLS           coefficient
##                                                test    
##                           Hetrosced.         Robust SE 
##                               (1)               (2)    
## -------------------------------------------------------
## exper                      0.040***          0.040***  
##                             (0.013)           (0.015)  
## expersq                     -0.001*           -0.001*  
##                            (0.0004)          (0.0004)  
## educ                       0.108***          0.108***  
##                             (0.014)           (0.014)  
## age                         -0.001            -0.001   
##                             (0.005)           (0.006)  
## kidslt6                     -0.061            -0.061   
##                             (0.089)           (0.106)  
## kidsge6                     -0.015            -0.015   
##                             (0.028)           (0.029)  
## Constant                    -0.421            -0.421   
##                             (0.317)           (0.318)  
## -------------------------------------------------------
## Observations                  428                      
## R2                           0.158                     
## Adjusted R2                  0.146                     
## Residual Std. Error    0.668 (df = 421)                
## F Statistic         13.191*** (df = 6; 421)            
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01
linearHypothesis(lwage_hetr, c("kidslt6=0", "kidsge6=0", "age=0"))
## Linear hypothesis test
## 
## Hypothesis:
## kidslt6 = 0
## kidsge6 = 0
## age = 0
## 
## Model 1: restricted model
## Model 2: lwage ~ exper + expersq + educ + age + kidslt6 + kidsge6
## 
##   Res.Df    RSS Df Sum of Sq     F Pr(>F)
## 1    424 188.31                          
## 2    421 187.99  3   0.31751 0.237 0.8705

LM1 pp.64

uhat_rst <- resid(lm(lwage ~ exper + expersq + educ, data=mroz))
summary(uhat_reg <- lm(uhat_rst ~ exper + expersq + educ + age + kidslt6 + kidsge6, data=subset(mroz, !is.na(wage))))
## 
## Call:
## lm(formula = uhat_rst ~ exper + expersq + educ + age + kidslt6 + 
##     kidsge6, data = subset(mroz, !is.na(wage)))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.08183 -0.30631  0.04606  0.37161  2.35708 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  1.011e-01  3.169e-01   0.319    0.750
## exper       -1.747e-03  1.339e-02  -0.130    0.896
## expersq      2.996e-05  4.022e-04   0.074    0.941
## educ         3.423e-04  1.440e-02   0.024    0.981
## age         -1.465e-03  5.292e-03  -0.277    0.782
## kidslt6     -6.071e-02  8.876e-02  -0.684    0.494
## kidsge6     -1.459e-02  2.790e-02  -0.523    0.601
## 
## Residual standard error: 0.6682 on 421 degrees of freedom
## Multiple R-squared:  0.001686,   Adjusted R-squared:  -0.01254 
## F-statistic: 0.1185 on 6 and 421 DF,  p-value: 0.9942
LM <- (summary(uhat_reg)$r.squared) * (nobs(uhat_reg))
LM 
## [1] 0.7216757
pchisq(LM, df=3, lower.tail = FALSE)
## [1] 0.8680941

HOME

Example4.1+

LM2 Continued on pp.65

uhat_rst <- resid(lm(lwage ~ exper + expersq + educ, data=mroz)) #u from the restricted model
u_age <- resid(lm(age~educ+exper+expersq, data=subset(mroz, !is.na(wage))))*uhat_rst
u_kidslt6 <- resid(lm(kidslt6~educ+exper+expersq, data=subset(mroz, !is.na(wage))))*uhat_rst
u_kidsge6 <- resid(lm(kidsge6~educ+exper+expersq, data=subset(mroz, !is.na(wage))))*uhat_rst
udata <- data.frame(cbind(u_age, u_kidslt6, u_kidsge6)) 
udata['one'] = 1 #Generates a vector of 1s.
summary(one_r <- lm(one ~ u_age + u_kidslt6 + u_kidsge6 + 0, data=udata))
## 
## Call:
## lm(formula = one ~ u_age + u_kidslt6 + u_kidsge6 + 0, data = udata)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## 0.7818 0.9926 0.9996 1.0055 1.2620 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)
## u_age     -0.002512   0.010715  -0.234    0.815
## u_kidslt6 -0.090505   0.169209  -0.535    0.593
## u_kidsge6 -0.026719   0.060337  -0.443    0.658
## 
## Residual standard error: 1.003 on 425 degrees of freedom
## Multiple R-squared:  0.001196,   Adjusted R-squared:  -0.005854 
## F-statistic: 0.1696 on 3 and 425 DF,  p-value: 0.9169
LM <- (summary(one_r)$r.squared) * (nobs(one_r))
LM 
## [1] 0.5118801
pchisq(LM, df=3, lower.tail = FALSE)
## [1] 0.9162738

HOME

Example4.3

Using IQ as a Proxy for Ability

nls80 <- read_dta("nls80.dta") #Data From working directory or using the bcuse command in Stata 
summary(lm(lwage ~ exper + tenure + married + south + urban + black + educ, data=nls80))
## 
## Call:
## lm(formula = lwage ~ exper + tenure + married + south + urban + 
##     black + educ, data = nls80)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.98069 -0.21996  0.00707  0.24288  1.22822 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.395497   0.113225  47.653  < 2e-16 ***
## exper        0.014043   0.003185   4.409 1.16e-05 ***
## tenure       0.011747   0.002453   4.789 1.95e-06 ***
## married      0.199417   0.039050   5.107 3.98e-07 ***
## south       -0.090904   0.026249  -3.463 0.000558 ***
## urban        0.183912   0.026958   6.822 1.62e-11 ***
## black       -0.188350   0.037667  -5.000 6.84e-07 ***
## educ         0.065431   0.006250  10.468  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared:  0.2526, Adjusted R-squared:  0.2469 
## F-statistic: 44.75 on 7 and 927 DF,  p-value: < 2.2e-16
summary(lm(lwage ~ exper + tenure + married + south + urban + black + educ + iq, data=nls80))
## 
## Call:
## lm(formula = lwage ~ exper + tenure + married + south + urban + 
##     black + educ + iq, data = nls80)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.01203 -0.22244  0.01017  0.22951  1.27478 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.1764392  0.1280006  40.441  < 2e-16 ***
## exper        0.0141458  0.0031651   4.469 8.82e-06 ***
## tenure       0.0113951  0.0024394   4.671 3.44e-06 ***
## married      0.1997644  0.0388025   5.148 3.21e-07 ***
## south       -0.0801695  0.0262529  -3.054 0.002325 ** 
## urban        0.1819463  0.0267929   6.791 1.99e-11 ***
## black       -0.1431253  0.0394925  -3.624 0.000306 ***
## educ         0.0544106  0.0069285   7.853 1.12e-14 ***
## iq           0.0035591  0.0009918   3.589 0.000350 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3632 on 926 degrees of freedom
## Multiple R-squared:  0.2628, Adjusted R-squared:  0.2564 
## F-statistic: 41.27 on 8 and 926 DF,  p-value: < 2.2e-16

HOME

Example4.4

Effects of Job Training Grants on Worker Productivity

summary(lm(lscrap ~ grant, data=subset(jtrain, jtrain$year==1988)))
## 
## Call:
## lm(formula = lscrap ~ grant, data = subset(jtrain, jtrain$year == 
##     1988))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4043 -0.9536 -0.0465  0.9636  2.8103 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   0.4085     0.2406   1.698   0.0954 .
## grant         0.0566     0.4056   0.140   0.8895  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.423 on 52 degrees of freedom
##   (103 observations deleted due to missingness)
## Multiple R-squared:  0.0003744,  Adjusted R-squared:  -0.01885 
## F-statistic: 0.01948 on 1 and 52 DF,  p-value: 0.8895
summary(lm(lscrap ~ grant + lscrap_1, data=subset(jtrain, jtrain$year==1988)))
## 
## Call:
## lm(formula = lscrap ~ grant + lscrap_1, data = subset(jtrain, 
##     jtrain$year == 1988))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9146 -0.1763  0.0057  0.2308  1.5991 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.02124    0.08910   0.238   0.8126    
## grant       -0.25397    0.14703  -1.727   0.0902 .  
## lscrap_1     0.83116    0.04444  18.701   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5127 on 51 degrees of freedom
##   (103 observations deleted due to missingness)
## Multiple R-squared:  0.8728, Adjusted R-squared:  0.8678 
## F-statistic: 174.9 on 2 and 51 DF,  p-value: < 2.2e-16

HOME

Example4.5

Return to Education Depends on Ability

summary(lwage_rg <- lm(lwage ~ exper + tenure + married + south + urban + black + educ*iq, data=nls80)  )
## 
## Call:
## lm(formula = lwage ~ exper + tenure + married + south + urban + 
##     black + educ * iq, data = nls80)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.00733 -0.21715  0.01177  0.23456  1.27305 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.6482483  0.5462963  10.339  < 2e-16 ***
## exper        0.0139072  0.0031768   4.378 1.34e-05 ***
## tenure       0.0113929  0.0024397   4.670 3.46e-06 ***
## married      0.2008658  0.0388267   5.173 2.82e-07 ***
## south       -0.0802354  0.0262560  -3.056 0.002308 ** 
## urban        0.1835758  0.0268586   6.835 1.49e-11 ***
## black       -0.1466989  0.0397013  -3.695 0.000233 ***
## educ         0.0184559  0.0410608   0.449 0.653192    
## iq          -0.0009418  0.0051625  -0.182 0.855289    
## educ:iq      0.0003399  0.0003826   0.888 0.374564    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3632 on 925 degrees of freedom
## Multiple R-squared:  0.2634, Adjusted R-squared:  0.2563 
## F-statistic: 36.76 on 9 and 925 DF,  p-value: < 2.2e-16
linearHypothesis(lwage_rg, c("educ:iq =0", "iq=0"))
## Linear hypothesis test
## 
## Hypothesis:
## educ:iq = 0
## iq = 0
## 
## Model 1: restricted model
## Model 2: lwage ~ exper + tenure + married + south + urban + black + educ * 
##     iq
## 
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1    927 123.82                                
## 2    925 122.02  2    1.8024 6.8318 0.001134 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

HOME