Verbeek5. Modern Econometrics Using R - Chap7

### Chapter 7. Models with Limited Dependent Variables

#### Table 7.2

library(haven)
library(AER)
library(stargazer)
library(MASS)
library(censReg)
library(sampleSelection)
library(dplyr)

Binary choice models for applying for unemployment benefits (blue-collar workers)

df <- read_dta("Data/benefits.dta")
df$agesq <- (df$age^2)/10
df$rrsq <- (df$rr^2)

OLS1 <- lm(y ~ rr + rrsq + age + agesq + tenure + slack + abol + seasonal + head + married + dkids + dykids + smsa + nwhite +  yrdispl + school12 + male + statemb + stateur, data = df)
Logit1 <- glm(y ~ rr + rrsq + age + agesq + tenure + slack + abol + seasonal + head + married + dkids + dykids + smsa + nwhite + yrdispl + school12 + male + statemb + stateur,data = df, family=binomial)
Probit1 <- glm(y ~ rr + rrsq + age + agesq + tenure + slack + abol + seasonal + head + married + dkids + dykids + smsa + nwhite + yrdispl + school12 + male + statemb + stateur,  data = df, family=binomial(link ="probit"))

stargazer(OLS1, Logit1, Probit1, no.space=TRUE, type="text",
title = "Table 7.2 Binary choice models for applying for unemployment benefits")
##
## Table 7.2 Binary choice models for applying for unemployment benefits
## ===================================================================
##                                   Dependent variable:
##                     -----------------------------------------------
##                                            y
##                                OLS             logistic    probit
##                                (1)               (2)        (3)
## -------------------------------------------------------------------
## rr                            0.629             3.068      1.863*
##                              (0.384)           (1.868)    (1.129)
## rrsq                        -1.019**           -4.891**   -2.980**
##                              (0.481)           (2.334)    (1.412)
## age                         0.016***           0.068***   0.042***
##                              (0.005)           (0.024)    (0.014)
## agesq                       -0.001**           -0.006**   -0.004**
##                              (0.001)           (0.003)    (0.002)
## tenure                      0.006***           0.031***   0.018***
##                              (0.001)           (0.007)    (0.004)
## slack                       0.128***           0.625***   0.375***
##                              (0.014)           (0.071)    (0.042)
## abol                         -0.007             -0.036     -0.022
##                              (0.025)           (0.118)    (0.072)
## seasonal                      0.058             0.271      0.161
##                              (0.036)           (0.171)    (0.104)
##                              (0.017)           (0.081)    (0.049)
## married                     0.049***           0.242***   0.145***
##                              (0.016)           (0.079)    (0.048)
## dkids                        -0.031*           -0.158*    -0.097*
##                              (0.017)           (0.086)    (0.052)
## dykids                       0.043**           0.206**    0.124**
##                              (0.020)           (0.097)    (0.059)
## smsa                        -0.035**           -0.170**   -0.100**
##                              (0.014)           (0.070)    (0.042)
## nwhite                        0.017             0.074      0.052
##                              (0.019)           (0.093)    (0.056)
## yrdispl                     -0.013***         -0.064***  -0.038***
##                              (0.003)           (0.015)    (0.009)
## school12                     -0.014             -0.065     -0.042
##                              (0.017)           (0.082)    (0.050)
## male                        -0.036**           -0.180**   -0.107**
##                              (0.018)           (0.088)    (0.053)
## statemb                     0.001***           0.006***   0.004***
##                             (0.0002)           (0.001)    (0.001)
## stateur                     0.018***           0.096***   0.057***
##                              (0.003)           (0.016)    (0.009)
## Constant                     -0.077           -2.800***  -1.700***
##                              (0.122)           (0.604)    (0.363)
## -------------------------------------------------------------------
## Observations                  4,877             4,877      4,877
## R2                            0.067
## Log Likelihood                                -2,873.197 -2,874.071
## Akaike Inf. Crit.                             5,786.393  5,788.142
## Residual Std. Error     0.450 (df = 4857)
## F Statistic         18.331*** (df = 19; 4857)
## ===================================================================
## Note:                                   *p<0.1; **p<0.05; ***p<0.01

#### Table 7.3

Cross-tabulation of actual and predicted outcomes (logit model)

y <- df$y yfit <- Logit1$fitted.values
yhat <- rep(0, length(y))
yhat[which(yfit>0.5)] = 1
table(y, yhat)
##    yhat
## y      0    1
##   0  242 1300
##   1  171 3164

R2p Criterion, loglikelihhod value and the pseudo and McFadden R2

ltab <-table(y, yhat)
ltabr <- rowSums(ltab)

round(1-(ltab[1, 2] +ltab[2, 1])/sum(ltab[1, ]), 3)
## [1] 0.046
round(ltabr[1]*log(ltabr[1]/sum(ltabr)) + ltabr[2]*log(ltabr[2]/sum(ltabr)), 3)
##         0
## -3043.028
round(ltab[1, 1]/sum(ltab[1,]) + ltab[2, 2]/sum(ltab[2,]), 3)
## [1] 1.106

#### Table 7.4

Summary statistics

df <- read_dta("Data/credit.dta")
sum_stat <- round(cbind(apply(df,2,mean), apply(df,2,median), apply(df,2,min), apply(df,2,max)), 3)
colnames(sum_stat) <- c("average","median", "minimum", "maximum")
sum_stat
##          average median minimum maximum
## booklev    0.293  0.264   0.000   0.999
## ebit       0.094  0.090  -0.384   0.652
## invgrade   0.472  0.000   0.000   1.000
## logsales   7.996  7.884   1.100  12.701
## marklev    0.255  0.211   0.000   0.965
## rating     3.499  3.000   1.000   7.000
## reta       0.157  0.180  -0.996   0.980
## wka        0.140  0.123  -0.412   0.748

#### Table 7.5

Estimation results binary and ordered logit, MLE

df$rating_bin <- ifelse(df$rating>3,1,0)
summary(logit <- glm(rating_bin ~ booklev + ebit + logsales + reta + wka, data = df, family=binomial))
##
## Call:
## glm(formula = rating_bin ~ booklev + ebit + logsales + reta +
##     wka, family = binomial, data = df)
##
## Deviance Residuals:
##     Min       1Q   Median       3Q      Max
## -2.5840  -0.5411  -0.0491   0.5286   3.4210
##
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.21432    0.86685  -9.476  < 2e-16 ***
## booklev     -4.42727    0.77142  -5.739 9.52e-09 ***
## ebit         4.35474    1.43992   3.024  0.00249 **
## logsales     1.08159    0.09568  11.304  < 2e-16 ***
## reta         4.11611    0.48851   8.426  < 2e-16 ***
## wka         -4.01249    0.74791  -5.365 8.10e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
##     Null deviance: 1273.95  on 920  degrees of freedom
## Residual deviance:  682.16  on 915  degrees of freedom
## AIC: 694.16
##
## Number of Fisher Scoring iterations: 6
summary(ologit <- polr(factor(rating) ~ booklev + ebit + logsales + reta + wka, data = df, method="logistic",  Hess = TRUE))
## Call:
## polr(formula = factor(rating) ~ booklev + ebit + logsales + reta +
##     wka, data = df, Hess = TRUE, method = "logistic")
##
## Coefficients:
##            Value Std. Error t value
## booklev  -2.7517    0.47730  -5.765
## ebit      4.7313    0.94476   5.008
## logsales  0.9406    0.05884  15.984
## reta      3.5600    0.30229  11.777
## wka      -2.5802    0.48263  -5.346
##
## Intercepts:
##     Value   Std. Error t value
## 1|2 -0.3696  0.6332    -0.5837
## 2|3  4.8805  0.5208     9.3709
## 3|4  7.6259  0.5512    13.8361
## 4|5  9.8849  0.5916    16.7078
## 5|6 12.8829  0.6734    19.1315
## 6|7 14.7825  0.7841    18.8527
##
## Residual Deviance: 1930.614
## AIC: 1952.614

#### Table 7.6

Ordered probit model for willingness to pay

df <- read_dta("Data/wtp.dta")
df$wtp <- as.factor(df$depvar)
df$nadlnx <- df$nadults*df$lnx Tobit1 <- censReg(share1 ~ age + nadults + nkids + nkids2 + lnx + agelnx + nadlnx, data=df) Tobit2 <- censReg(share2 ~ age + nadults + nkids + nkids2 + lnx + agelnx + nadlnx, data=df) stargazer(Tobit1, Tobit2, type="text", no.space = T, single.row = T, title="Table 7.9 Tobit models for budget shares alcohol and tobacco", column.labels = c("Alchol", "Tobacco"))  ## ## Table 7.9 Tobit models for budget shares alcohol and tobacco ## ======================================================= ## Dependent variable: ## ----------------------------------- ## share1 share2 ## Alchol Tobacco ## (1) (2) ## ------------------------------------------------------- ## age 0.013 (0.011) -0.126*** (0.024) ## nadults 0.029* (0.017) 0.015 (0.038) ## nkids -0.003*** (0.001) 0.004*** (0.001) ## nkids2 -0.004 (0.002) -0.010* (0.005) ## lnx 0.013*** (0.003) -0.044*** (0.007) ## agelnx -0.001 (0.001) 0.009*** (0.002) ## nadlnx -0.002* (0.001) -0.001 (0.003) ## logSigma -3.712*** (0.015) -3.037*** (0.025) ## Constant -0.159*** (0.044) 0.590*** (0.093) ## ------------------------------------------------------- ## Observations 2,724 2,724 ## Log Likelihood 4,755.371 758.701 ## Akaike Inf. Crit. -9,492.742 -1,499.401 ## Bayesian Inf. Crit. -9,439.553 -1,446.212 ## ======================================================= ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Table 7.10 Models for budget shares alcohol and tobacco, estimated by OLS using positive observations only AlcohOLS <- lm(share1 ~ age + nadults + nkids + nkids2 + lnx + agelnx + nadlnx, data=df, subset=share1>0) TobaccOLS <- lm(share2 ~ age + nadults + nkids + nkids2 + lnx + agelnx + nadlnx, data=df, subset=share2>0) stargazer(AlcohOLS, TobaccOLS, type="text", no.space = T, title ="Table 7.10 Models for budget shares alcohol and tobacco", column.labels = c("Alchol", "Tobacco"), single.row = T) ## ## Table 7.10 Models for budget shares alcohol and tobacco ## ===================================================================== ## Dependent variable: ## ------------------------------------------------- ## share1 share2 ## Alchol Tobacco ## (1) (2) ## --------------------------------------------------------------------- ## age 0.008 (0.011) -0.031 (0.021) ## nadults -0.013 (0.016) -0.013 (0.032) ## nkids -0.002*** (0.001) 0.001 (0.001) ## nkids2 -0.002 (0.002) -0.003 (0.005) ## lnx -0.002 (0.003) -0.034*** (0.005) ## agelnx -0.0004 (0.001) 0.002 (0.002) ## nadlnx 0.001 (0.001) 0.001 (0.002) ## Constant 0.053 (0.044) 0.490*** (0.074) ## --------------------------------------------------------------------- ## Observations 2,258 1,036 ## R2 0.051 0.154 ## Adjusted R2 0.048 0.148 ## Residual Std. Error 0.022 (df = 2250) 0.029 (df = 1028) ## F Statistic 17.270*** (df = 7; 2250) 26.732*** (df = 7; 1028) ## ===================================================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Table 7.11 Probit models for abstention of alcohol and tobacco df$Alchol <- ifelse(df$share1>0, 1, 0) df$Tobacco <- ifelse(df\$share2>0, T, F)
Probit_Alchol <- glm(
Alchol ~ age + nadults + nkids + nkids2 + lnx + agelnx + nadlnx + bluecol + whitecol, data=df,
family=binomial("probit"))
Probit_Tobacco <- glm(
Tobacco ~ age + nadults + nkids + nkids2 + lnx + agelnx + nadlnx + bluecol + whitecol,
data=df,family=binomial("probit"))

stargazer(Probit_Alchol, Probit_Tobacco, type="text", no.space = T,
title="Table 7.11 Probit models for abstention of alcohol and tobacco",
column.labels = c("Alchol", "Tobacco"), single.row = T)
##
## Table 7.11 Probit models for abstention of alcohol and tobacco
## ======================================================
##                           Dependent variable:
##                   ------------------------------------
##                         Alchol            Tobacco
##                         Alchol            Tobacco
##                          (1)                (2)
## ------------------------------------------------------
## age                 0.668 (0.652)    -2.483*** (0.560)
## nadults            2.255** (1.020)     0.485 (0.872)
## nkids              -0.077** (0.037)  0.081*** (0.031)
## nkids2              -0.186 (0.140)    -0.212* (0.123)
## lnx                1.236*** (0.191)  -0.632*** (0.163)
## agelnx              -0.045 (0.049)   0.175*** (0.041)
## nadlnx             -0.169** (0.074)   -0.025 (0.063)
## bluecol             -0.061 (0.098)    0.206** (0.084)
## whitecol            0.051 (0.085)      0.022 (0.070)
## Constant          -15.882*** (2.570) 8.244*** (2.213)
## ------------------------------------------------------
## Observations            2,724              2,724
## Log Likelihood        -1,159.865        -1,754.886
## Akaike Inf. Crit.     2,339.729          3,529.772
## ======================================================
## Note:                      *p<0.1; **p<0.05; ***p<0.01

#### Table 7.12

Two-step estimation of Engel curves for alcohol and tobacco (tobit II model)

summary(Tobit_Alchol <- selection(Alchol ~ age + nadults + nkids + nkids2 + lnx +  agelnx + nadlnx + bluecol + whitecol, share1 ~ age + nadults + nkids + nkids2 + lnx +  agelnx + nadlnx, method="2step", data=df))
## --------------------------------------------
## Tobit 2 model (sample selection model)
## 2-step Heckman / heckit estimation
## 2724 observations (466 censored and 2258 observed)
## 21 free parameters (df = 2704)
## Probit selection equation:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.88236    2.57393  -6.170 7.83e-10 ***
## age           0.66785    0.65200   1.024   0.3058
## nadults       2.25540    1.02453   2.201   0.0278 *
## nkids        -0.07705    0.03725  -2.069   0.0387 *
## nkids2       -0.18572    0.14083  -1.319   0.1874
## lnx           1.23553    0.19130   6.459 1.25e-10 ***
## agelnx       -0.04480    0.04854  -0.923   0.3561
## nadlnx       -0.16879    0.07423  -2.274   0.0231 *
## bluecol      -0.06117    0.09777  -0.626   0.5316
## whitecol      0.05056    0.08471   0.597   0.5506
## Outcome equation:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.0542676  0.1329936   0.408  0.68327
## age          0.0077097  0.0130468   0.591  0.55462
## nadults     -0.0133445  0.0247046  -0.540  0.58913
## nkids       -0.0020244  0.0007637  -2.651  0.00808 **
## nkids2      -0.0024127  0.0025715  -0.938  0.34821
## lnx         -0.0024288  0.0093674  -0.259  0.79544
## agelnx      -0.0004044  0.0009420  -0.429  0.66772
## nadlnx       0.0008461  0.0018047   0.469  0.63922
##    Error terms:
##                 Estimate Std. Error t value Pr(>|t|)
## invMillsRatio -0.0002045  0.0165285  -0.012     0.99
## sigma          0.0214876         NA      NA       NA
## rho           -0.0095177         NA      NA       NA
## --------------------------------------------
summary(Tobit_Tobacco <- selection(Tobacco ~ age + nadults + nkids + nkids2 + lnx + agelnx + nadlnx + bluecol + whitecol, share2 ~ age + nadults + nkids + nkids2 +agelnx+nadlnx+ lnx, method="2step", data=df))
## --------------------------------------------
## Tobit 2 model (sample selection model)
## 2-step Heckman / heckit estimation
## 2724 observations (1688 censored and 1036 observed)
## 21 free parameters (df = 2704)
## Probit selection equation:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  8.24447    2.21108   3.729 0.000196 ***
## age         -2.48300    0.55960  -4.437 9.48e-06 ***
## nadults      0.48520    0.87174   0.557 0.577851
## nkids        0.08128    0.03083   2.637 0.008424 **
## nkids2      -0.21166    0.12305  -1.720 0.085522 .
## lnx         -0.63208    0.16320  -3.873 0.000110 ***
## agelnx       0.17474    0.04131   4.230 2.41e-05 ***
## nadlnx      -0.02534    0.06292  -0.403 0.687181
## bluecol      0.20642    0.08343   2.474 0.013417 *
## whitecol     0.02153    0.06943   0.310 0.756453
## Outcome equation:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.4515814  0.1086284   4.157 3.32e-05 ***
## age         -0.0172991  0.0358591  -0.482 0.629547
## nadults     -0.0174379  0.0339635  -0.513 0.607692
## nkids        0.0007643  0.0015130   0.505 0.613469
## nkids2      -0.0020755  0.0053883  -0.385 0.700125
## agelnx       0.0012243  0.0025454   0.481 0.630568
## nadlnx       0.0013650  0.0024238   0.563 0.573370
## lnx         -0.0301094  0.0090459  -3.329 0.000885 ***
##    Error terms:
##                Estimate Std. Error t value Pr(>|t|)
## invMillsRatio -0.009018   0.018589  -0.485    0.628
## sigma          0.029881         NA      NA       NA
## rho           -0.301792         NA      NA       NA
## --------------------------------------------
stargazer(Tobit_Alchol, Tobit_Tobacco, type="text", no.space = T,
title="Table 7.12 Two-step estimation of Engel curves for alcohol and tobacco",
column.labels = c("Alchol", "Tobacco"), single.row = T)
##
## Table 7.12 Two-step estimation of Engel curves for alcohol and tobacco
## =======================================================
##                             Dependent variable:
##                     -----------------------------------
##                          share1            share2
##                          Alchol            Tobacco
##                            (1)               (2)
## -------------------------------------------------------
## age                   0.008 (0.013)    -0.017 (0.036)
## nadults              -0.013 (0.025)    -0.017 (0.034)
## nkids               -0.002*** (0.001)   0.001 (0.002)
## nkids2               -0.002 (0.003)    -0.002 (0.005)
## lnx                  -0.002 (0.009)   -0.030*** (0.009)
## agelnx               -0.0004 (0.001)    0.001 (0.003)
## nadlnx                0.001 (0.002)     0.001 (0.002)
## Constant              0.054 (0.133)   0.452*** (0.109)
## -------------------------------------------------------
## Observations              2,724             2,724
## rho                      -0.010            -0.302
## Inverse Mills Ratio  -0.0002 (0.017)   -0.009 (0.019)
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01

#### Table 7.13

#Data not available