Also available in Stata and Python versions
Load libraries
library(wooldridge)
library(AER)
library(stargazer)
library(haven)
Testing for endogenity of educ in wage equation
df <- subset(mroz, !is.na(wage))
summary(OLS1 <- lm(educ ~ exper + expersq + motheduc + fatheduc + huseduc, data=df))
##
## Call:
## lm(formula = educ ~ exper + expersq + motheduc + fatheduc + huseduc,
## data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.6882 -1.1519 0.0097 1.0640 5.7302
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.5383110 0.4597824 12.046 < 2e-16 ***
## exper 0.0374977 0.0343102 1.093 0.275059
## expersq -0.0006002 0.0010261 -0.585 0.558899
## motheduc 0.1141532 0.0307835 3.708 0.000237 ***
## fatheduc 0.1060801 0.0295153 3.594 0.000364 ***
## huseduc 0.3752548 0.0296347 12.663 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.738 on 422 degrees of freedom
## Multiple R-squared: 0.4286, Adjusted R-squared: 0.4218
## F-statistic: 63.3 on 5 and 422 DF, p-value: < 2.2e-16
v2 <- resid(OLS1)
summary(OLS2 <- lm(lwage ~ exper + expersq + educ + v2, data=df))
##
## Call:
## lm(formula = lwage ~ exper + expersq + educ + v2, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.05797 -0.29594 0.04984 0.37935 2.34204
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1868572 0.2835905 -0.659 0.51032
## exper 0.0430973 0.0131810 3.270 0.00116 **
## expersq -0.0008628 0.0003937 -2.192 0.02895 *
## educ 0.0803918 0.0216362 3.716 0.00023 ***
## v2 0.0471890 0.0285519 1.653 0.09912 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6651 on 423 degrees of freedom
## Multiple R-squared: 0.1622, Adjusted R-squared: 0.1543
## F-statistic: 20.48 on 4 and 423 DF, p-value: 1.944e-15
OLS1 <- lm(lwage ~ educ*black +exper + expersq + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ reg668, data=card)
OLS2 <- lm(educ ~ black*nearc4 +exper + expersq + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ reg668, data=card)
v21 <- resid(OLS2)
card['b_educ'] <- card$educ * card$black
OLS3 <-lm(b_educ ~ exper + expersq + black*nearc4 + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ reg668, data=card)
v22 <- resid(OLS3)
OLS4 <- lm(lwage ~ v21 + v22 + educ*black +exper + expersq + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ +reg668, data=card)
stargazer(OLS1, OLS2, OLS3, OLS4, keep.stat=c("n", "rsq", "adj.rsq"), no.space=TRUE, type="text")
##
## ====================================================
## Dependent variable:
## ---------------------------------------
## lwage educ b_educ lwage
## (1) (2) (3) (4)
## ----------------------------------------------------
## v21 -0.057
## (0.055)
## v22 0.007
## (0.039)
## educ 0.071*** 0.127**
## (0.004) (0.055)
## black -0.419*** -0.937*** 11.550*** -0.283
## (0.079) (0.148) (0.088) (0.487)
## nearc4 0.319*** -0.091
## (0.098) (0.058)
## exper 0.082*** -0.413*** 0.053*** 0.106***
## (0.007) (0.034) (0.020) (0.024)
## expersq -0.002*** 0.001 -0.008*** -0.002***
## (0.0003) (0.002) (0.001) (0.0005)
## smsa 0.134*** 0.402*** 0.195*** 0.111***
## (0.020) (0.105) (0.062) (0.030)
## smsa66 0.025 0.025 0.047 0.018
## (0.019) (0.106) (0.063) (0.021)
## south -0.144*** -0.052 -0.253*** -0.142***
## (0.026) (0.136) (0.080) (0.027)
## reg661 -0.122*** -0.210 0.162 -0.110***
## (0.039) (0.203) (0.120) (0.041)
## reg662 -0.023 -0.289* 0.006 -0.008
## (0.028) (0.147) (0.087) (0.032)
## reg663 0.023 -0.238* 0.086 0.038
## (0.027) (0.143) (0.085) (0.031)
## reg664 -0.067* -0.093 0.113 -0.060
## (0.036) (0.186) (0.110) (0.037)
## reg665 0.003 -0.483** 0.262** 0.034
## (0.036) (0.188) (0.112) (0.048)
## reg666 0.015 -0.513** 0.335*** 0.050
## (0.040) (0.210) (0.124) (0.054)
## reg667 -0.007 -0.427** 0.296** 0.022
## (0.039) (0.206) (0.122) (0.050)
## reg668 -0.176*** 0.314 0.100 -0.191***
## (0.046) (0.242) (0.143) (0.049)
## educ:black 0.018*** 0.011
## (0.006) (0.039)
## black:nearc4 0.003 0.875***
## (0.177) (0.105)
## Constant 4.807*** 16.849*** 0.095 3.845***
## (0.075) (0.215) (0.127) (0.931)
## ----------------------------------------------------
## Observations 3,010 3,010 3,010 3,010
## R2 0.302 0.477 0.952 0.302
## Adjusted R2 0.298 0.474 0.951 0.298
## ====================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
linearHypothesis(OLS4, c("v21=0", "v22=0"))
## Linear hypothesis test
##
## Hypothesis:
## v21 = 0
## v22 = 0
##
## Model 1: restricted model
## Model 2: lwage ~ v21 + v22 + educ * black + exper + expersq + smsa + smsa66 +
## south + reg661 + reg662 + reg663 + reg664 + reg665 + reg666 +
## reg667 + +reg668
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 2993 413.82
## 2 2991 413.67 2 0.15005 0.5425 0.5814
IV
IV1 <- ivreg(lwage ~ educ + b_educ + black +exper + expersq + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ +reg668 | nearc4 + black:nearc4 + black +exper + expersq + smsa + smsa66 + south + reg661+ reg662+ reg663+ reg664+ reg665+ reg666+ reg667+ +reg668, data=card)
summary(IV1)
##
## Call:
## ivreg(formula = lwage ~ educ + b_educ + black + exper + expersq +
## smsa + smsa66 + south + reg661 + reg662 + reg663 + reg664 +
## reg665 + reg666 + reg667 + +reg668 | nearc4 + black:nearc4 +
## black + exper + expersq + smsa + smsa66 + south + reg661 +
## reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + +reg668,
## data = card)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.84372 -0.24074 0.02335 0.25163 1.42490
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.8449897 0.9693451 3.967 7.46e-05 ***
## educ 0.1273557 0.0569582 2.236 0.025429 *
## b_educ 0.0109036 0.0403571 0.270 0.787042
## black -0.2827650 0.5064228 -0.558 0.576642
## exper 0.1059116 0.0251806 4.206 2.67e-05 ***
## expersq -0.0022406 0.0004823 -4.646 3.54e-06 ***
## smsa 0.1111555 0.0316396 3.513 0.000449 ***
## smsa66 0.0180009 0.0216221 0.833 0.405179
## south -0.1424762 0.0283768 -5.021 5.45e-07 ***
## reg661 -0.1103479 0.0427259 -2.583 0.009850 **
## reg662 -0.0081783 0.0330717 -0.247 0.804702
## reg663 0.0382413 0.0327227 1.169 0.242639
## reg664 -0.0600379 0.0382978 -1.568 0.117066
## reg665 0.0337805 0.0499262 0.677 0.498707
## reg666 0.0498975 0.0559401 0.892 0.372475
## reg667 0.0216942 0.0521928 0.416 0.677692
## reg668 -0.1908353 0.0505417 -3.776 0.000163 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.387 on 2993 degrees of freedom
## Multiple R-Squared: 0.2435, Adjusted R-squared: 0.2395
## Wald test: 48.15 on 16 and 2993 DF, p-value: < 2.2e-16
Overidentifying restriction in the wage equation
summary(IV1 <- ivreg(lwage ~ educ + exper + expersq | exper + expersq + motheduc + fatheduc + huseduc, data=mroz))
##
## Call:
## ivreg(formula = lwage ~ educ + exper + expersq | exper + expersq +
## motheduc + fatheduc + huseduc, data = mroz)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.08378 -0.32135 0.03538 0.36934 2.35829
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1868572 0.2853959 -0.655 0.512997
## educ 0.0803918 0.0217740 3.692 0.000251 ***
## exper 0.0430973 0.0132649 3.249 0.001250 **
## expersq -0.0008628 0.0003962 -2.178 0.029976 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6693 on 424 degrees of freedom
## Multiple R-Squared: 0.1495, Adjusted R-squared: 0.1435
## Wald test: 11.52 on 3 and 424 DF, p-value: 2.817e-07
uhat <- resid(IV1)
uhat_reg <- lm(uhat ~ exper + expersq + motheduc + fatheduc + huseduc, data=subset(mroz, !is.na(wage)))
stargazer(uhat_reg, no.space=TRUE, type="text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## uhat
## -----------------------------------------------
## exper 0.0001
## (0.013)
## expersq -0.00001
## (0.0004)
## motheduc -0.010
## (0.012)
## fatheduc 0.001
## (0.011)
## huseduc 0.007
## (0.011)
## Constant 0.009
## (0.177)
## -----------------------------------------------
## Observations 428
## R2 0.003
## Adjusted R2 -0.009
## Residual Std. Error 0.670 (df = 422)
## F Statistic 0.220 (df = 5; 422)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
LM <- (summary(uhat_reg)$r.squared) * (nobs(uhat_reg))
LM
## [1] 1.115043
pchisq(LM, df=2, lower.tail = FALSE)
## [1] 0.5726266
Hetroskedasticity Robust
coeftest(IV1, vcovHC(IV1, type = "HC1") )
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.18685722 0.30126251 -0.6202 0.5354283
## educ 0.08039176 0.02170330 3.7041 0.0002402 ***
## exper 0.04309732 0.01530642 2.8156 0.0050951 **
## expersq -0.00086280 0.00042166 -2.0462 0.0413549 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Hetroskedasticity using LM statistic pp.137
uhat <- resid(ivreg(lwage ~ educ + exper + expersq | exper + expersq + motheduc + fatheduc + huseduc, data=mroz))
euhat <- predict(edreg<-lm(educ ~ exper + expersq + motheduc + fatheduc + huseduc, data= mroz))
rm <- resid(rmreg<-lm(motheduc~exper + expersq + euhat, data=mroz))
rf <- resid(rfreg<-lm(fatheduc~exper + expersq + euhat, data=mroz))
stargazer(edreg, rmreg,rfreg, no.space=TRUE, type="text")
##
## ==============================================================================================
## Dependent variable:
## --------------------------------------------------------------------------
## educ motheduc fatheduc
## (1) (2) (3)
## ----------------------------------------------------------------------------------------------
## exper 0.053** -0.105*** -0.107***
## (0.022) (0.034) (0.035)
## expersq -0.001 0.002 0.001
## (0.001) (0.001) (0.001)
## motheduc 0.130***
## (0.022)
## fatheduc 0.101***
## (0.021)
## huseduc 0.372***
## (0.022)
## euhat 1.425*** 1.534***
## (0.061) (0.064)
## Constant 5.116*** -7.413*** -9.170***
## (0.298) (0.742) (0.778)
## ----------------------------------------------------------------------------------------------
## Observations 753 753 753
## R2 0.466 0.430 0.442
## Adjusted R2 0.462 0.427 0.440
## Residual Std. Error 1.672 (df = 747) 2.549 (df = 749) 2.674 (df = 749)
## F Statistic 130.163*** (df = 5; 747) 187.974*** (df = 3; 749) 197.796*** (df = 3; 749)
## ==============================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
df <- data.frame(cbind(lwage=mroz$lwage, uhat, euhat, rm, rf))
## Warning in cbind(lwage = mroz$lwage, uhat, euhat, rm, rf): number of rows of
## result is not a multiple of vector length (arg 2)
df['one']=1
df <- subset(df, !is.na(lwage))
summary(LMreg <- lm(one ~ uhat:rm + uhat:rf + 0, data=df))
##
## Call:
## lm(formula = one ~ uhat:rm + uhat:rf + 0, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## 0.6041 0.9860 1.0003 1.0138 1.2486
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## uhat:rm -0.0270098 0.0289590 -0.933 0.352
## uhat:rf -0.0004977 0.0307894 -0.016 0.987
##
## Residual standard error: 1.001 on 426 degrees of freedom
## Multiple R-squared: 0.00238, Adjusted R-squared: -0.002303
## F-statistic: 0.5082 on 2 and 426 DF, p-value: 0.6019
LM <- (summary(LMreg)$r.squared) * (nobs(LMreg))
LM
## [1] 1.018745
pchisq(LM, df=2, lower.tail = FALSE)
## [1] 0.6008726
Testing for neglected nonlinearities in a wage equation
nls80 <- read_dta("nls80.dta")
summary(nls_reg <- lm(lwage ~exper + tenure + married + south + urban + black + educ, data = nls80))
##
## Call:
## lm(formula = lwage ~ exper + tenure + married + south + urban +
## black + educ, data = nls80)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.98069 -0.21996 0.00707 0.24288 1.22822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.395497 0.113225 47.653 < 2e-16 ***
## exper 0.014043 0.003185 4.409 1.16e-05 ***
## tenure 0.011747 0.002453 4.789 1.95e-06 ***
## married 0.199417 0.039050 5.107 3.98e-07 ***
## south -0.090904 0.026249 -3.463 0.000558 ***
## urban 0.183912 0.026958 6.822 1.62e-11 ***
## black -0.188350 0.037667 -5.000 6.84e-07 ***
## educ 0.065431 0.006250 10.468 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared: 0.2526, Adjusted R-squared: 0.2469
## F-statistic: 44.75 on 7 and 927 DF, p-value: < 2.2e-16
uhat <- resid(nls_reg)
wghat2 <- predict(nls_reg)^2
wghat3 <- predict(nls_reg)^3
u_reg <- lm(uhat ~ exper + tenure + married + south + urban + black + educ + wghat2 + wghat3 , data = nls80)
stargazer(u_reg, no.space=TRUE, type="text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## uhat
## -----------------------------------------------
## exper -0.762
## (1.397)
## tenure -0.638
## (1.169)
## married -10.826
## (19.840)
## south 4.935
## (9.045)
## urban -9.985
## (18.300)
## black 10.226
## (18.739)
## educ -3.552
## (6.510)
## wghat2 8.083
## (14.746)
## wghat3 -0.401
## (0.728)
## Constant -171.482
## (313.246)
## -----------------------------------------------
## Observations 935
## R2 0.0004
## Adjusted R2 -0.009
## Residual Std. Error 0.366 (df = 925)
## F Statistic 0.036 (df = 9; 925)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
LM <- (summary(u_reg)$r.squared) * (nobs(u_reg))
LM
## [1] 0.3288689
pchisq(LM, df=2, lower.tail = FALSE)
## [1] 0.8483734
Length of Time on Workers Compensation
df = subset(injury, injury$ky==1)
summary(lm(ldurat ~ afchnge*highearn, data=df))
##
## Call:
## lm(formula = ldurat ~ afchnge * highearn, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9666 -0.8872 0.0042 0.8126 4.0784
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.125615 0.030737 36.621 < 2e-16 ***
## afchnge 0.007657 0.044717 0.171 0.86404
## highearn 0.256479 0.047446 5.406 6.72e-08 ***
## afchnge:highearn 0.190601 0.068509 2.782 0.00542 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.269 on 5622 degrees of freedom
## Multiple R-squared: 0.02066, Adjusted R-squared: 0.02014
## F-statistic: 39.54 on 3 and 5622 DF, p-value: < 2.2e-16