### Chapter 7. Models with Limited Dependent Variables

#### Table 7.2

library(haven)
library(AER)
library(stargazer)
library(MASS)
library(censReg)
library(sampleSelection)
library(dplyr)

Binary choice models for applying for unemployment benefits (blue-collar workers)

df <- read_dta("Data/benefits.dta")
df$agesq <- (df$age^2)/10
df$rrsq <- (df$rr^2)

OLS1 <- lm(y ~ rr + rrsq + age + agesq + tenure + slack + abol + seasonal + head + married + dkids + dykids + smsa + nwhite +  yrdispl + school12 + male + statemb + stateur, data = df)
Logit1 <- glm(y ~ rr + rrsq + age + agesq + tenure + slack + abol + seasonal + head + married + dkids + dykids + smsa + nwhite + yrdispl + school12 + male + statemb + stateur,data = df, family=binomial)
Probit1 <- glm(y ~ rr + rrsq + age + agesq + tenure + slack + abol + seasonal + head + married + dkids + dykids + smsa + nwhite + yrdispl + school12 + male + statemb + stateur,  data = df, family=binomial(link ="probit"))

stargazer(OLS1, Logit1, Probit1, no.space=TRUE, type="text",
title = "Table 7.2 Binary choice models for applying for unemployment benefits")
#### Table 7.3

Cross-tabulation of actual and predicted outcomes (logit model)

y <- df$y yfit <- Logit1$fitted.values
yhat <- rep(0, length(y))
yhat[which(yfit>0.5)] = 1
table(y, yhat)
#### Table 7.4

Summary statistics

df <- read_dta("Data/credit.dta")
sum_stat <- round(cbind(apply(df,2,mean), apply(df,2,median), apply(df,2,min), apply(df,2,max)), 3)
colnames(sum_stat) <- c("average","median", "minimum", "maximum")
sum_stat
##          average median minimum maximum
## booklev    0.293  0.264   0.000   0.999
## ebit       0.094  0.090  -0.384   0.652
## invgrade   0.472  0.000   0.000   1.000
## logsales   7.996  7.884   1.100  12.701
## marklev    0.255  0.211   0.000   0.965
## rating     3.499  3.000   1.000   7.000
## reta       0.157  0.180  -0.996   0.980
## wka        0.140  0.123  -0.412   0.748

#### Table 7.5

Estimation results binary and ordered logit, MLE

df$rating_bin <- ifelse(df$rating>3,1,0)
summary(logit <- glm(rating_bin ~ booklev + ebit + logsales + reta + wka, data = df, family=binomial))
#### Table 7.6

Ordered probit model for willingness to pay

df <- read_dta("Data/wtp.dta")
df$wtp <- as.factor(df$depvar)
df$nadlnx <- df$nadults*df$lnx Tobit1 <- censReg(share1 ~ age + nadults + nkids + nkids2 + lnx + agelnx + nadlnx, data=df) Tobit2 <- censReg(share2 ~ age + nadults + nkids + nkids2 + lnx + agelnx + nadlnx, data=df) stargazer(Tobit1, Tobit2, type="text", no.space = T, single.row = T, title="Table 7.9 Tobit models for budget shares alcohol and tobacco", column.labels = c("Alchol", "Tobacco"))  ## ## Table 7.9 Tobit models for budget shares alcohol and tobacco ## ======================================================= ## Dependent variable: ## ----------------------------------- ## share1 share2 ## Alchol Tobacco ## (1) (2) ## ------------------------------------------------------- ## age 0.013 (0.011) -0.126*** (0.024) ## nadults 0.029* (0.017) 0.015 (0.038) ## nkids -0.003*** (0.001) 0.004*** (0.001) ## nkids2 -0.004 (0.002) -0.010* (0.005) ## lnx 0.013*** (0.003) -0.044*** (0.007) ## agelnx -0.001 (0.001) 0.009*** (0.002) ## nadlnx -0.002* (0.001) -0.001 (0.003) ## logSigma -3.712*** (0.015) -3.037*** (0.025) ## Constant -0.159*** (0.044) 0.590*** (0.093) ## ------------------------------------------------------- ## Observations 2,724 2,724 ## Log Likelihood 4,755.371 758.701 ## Akaike Inf. Crit. -9,492.742 -1,499.401 ## Bayesian Inf. Crit. -9,439.553 -1,446.212 ## ======================================================= ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Table 7.10 Models for budget shares alcohol and tobacco, estimated by OLS using positive observations only AlcohOLS <- lm(share1 ~ age + nadults + nkids + nkids2 + lnx + agelnx + nadlnx, data=df, subset=share1>0) TobaccOLS <- lm(share2 ~ age + nadults + nkids + nkids2 + lnx + agelnx + nadlnx, data=df, subset=share2>0) stargazer(AlcohOLS, TobaccOLS, type="text", no.space = T, title ="Table 7.10 Models for budget shares alcohol and tobacco", column.labels = c("Alchol", "Tobacco"), single.row = T) ## ## Table 7.10 Models for budget shares alcohol and tobacco ## ===================================================================== ## Dependent variable: ## ------------------------------------------------- ## share1 share2 ## Alchol Tobacco ## (1) (2) ## --------------------------------------------------------------------- ## age 0.008 (0.011) -0.031 (0.021) ## nadults -0.013 (0.016) -0.013 (0.032) ## nkids -0.002*** (0.001) 0.001 (0.001) ## nkids2 -0.002 (0.002) -0.003 (0.005) ## lnx -0.002 (0.003) -0.034*** (0.005) ## agelnx -0.0004 (0.001) 0.002 (0.002) ## nadlnx 0.001 (0.001) 0.001 (0.002) ## Constant 0.053 (0.044) 0.490*** (0.074) ## --------------------------------------------------------------------- ## Observations 2,258 1,036 ## R2 0.051 0.154 ## Adjusted R2 0.048 0.148 ## Residual Std. Error 0.022 (df = 2250) 0.029 (df = 1028) ## F Statistic 17.270*** (df = 7; 2250) 26.732*** (df = 7; 1028) ## ===================================================================== ## Note: *p<0.1; **p<0.05; ***p<0.01 #### Table 7.11 Probit models for abstention of alcohol and tobacco df$Alchol <- ifelse(df$share1>0, 1, 0) df$Tobacco <- ifelse(df\$share2>0, T, F)
Probit_Alchol <- glm(
Alchol ~ age + nadults + nkids + nkids2 + lnx + agelnx + nadlnx + bluecol + whitecol, data=df,
family=binomial("probit"))
Probit_Tobacco <- glm(
Tobacco ~ age + nadults + nkids + nkids2 + lnx + agelnx + nadlnx + bluecol + whitecol,
data=df,family=binomial("probit"))

stargazer(Probit_Alchol, Probit_Tobacco, type="text", no.space = T,
title="Table 7.11 Probit models for abstention of alcohol and tobacco",
column.labels = c("Alchol", "Tobacco"), single.row = T)
#### Table 7.12

Two-step estimation of Engel curves for alcohol and tobacco (tobit II model)

stargazer(Tobit_Alchol, Tobit_Tobacco, type="text", no.space = T,
title="Table 7.12 Two-step estimation of Engel curves for alcohol and tobacco",
column.labels = c("Alchol", "Tobacco"), single.row = T)
##
## Table 7.12 Two-step estimation of Engel curves for alcohol and tobacco
## =======================================================
##                             Dependent variable:
##                     -----------------------------------
##                          share1            share2
##                          Alchol            Tobacco
##                            (1)               (2)
## -------------------------------------------------------
## age                   0.008 (0.013)    -0.017 (0.036)
## nadults              -0.013 (0.025)    -0.017 (0.034)
## nkids               -0.002*** (0.001)   0.001 (0.002)
## nkids2               -0.002 (0.003)    -0.002 (0.005)
## lnx                  -0.002 (0.009)   -0.030*** (0.009)
## agelnx               -0.0004 (0.001)    0.001 (0.003)
## nadlnx                0.001 (0.002)     0.001 (0.002)
## Constant              0.054 (0.133)   0.452*** (0.109)
## -------------------------------------------------------
## Observations              2,724             2,724
## rho                      -0.010            -0.302
## Inverse Mills Ratio  -0.0002 (0.017)   -0.009 (0.019)
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01

#### Table 7.13

