# Chapter 4. Single-Equation and OLS Estimation#

## Examples#

### Example 4.1. Wage equation for married working women#

import pandas as pd
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from wooldridge import *

df=dataWoo("mroz")
lwage_hetr =smf.ols('lwage ~ exper + expersq + educ + age + kidslt6 + kidsge6', data=df).fit()
lwage_robust =smf.ols('lwage ~ exper + expersq + educ + age + kidslt6 + kidsge6', data=df).fit(cov_type='HC1')

print(summary_col([lwage_hetr, lwage_robust],stars=True,float_format='%0.3f',
info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
'R2':lambda x: "{:.3f}".format(x.rsquared),

library(wooldridge)
library(AER)
library(lmtest)
library(stargazer)
lwage_hetr <- lm(lwage ~ exper + expersq + educ + age + kidslt6 + kidsge6, data=mroz)
lwage_robust <- coeftest(lwage_hetr, vcovHC(lwage_hetr, type = "HC1") )
stargazer(lwage_hetr, lwage_robust, column.labels=c("Hetrosced.", "Robust SE"), no.space=TRUE, type="text")

bcuse mroz, clear  nodesc
eststo ols: reg lwage exper expersq educ age kidslt6 kidsge6
eststo ols_r: reg lwage exper expersq educ age kidslt6 kidsge6, r
estout ols ols_r, cells(b(star fmt(4)) se(par fmt(4))) stats( N, fmt(%9.0g)

================================
lwage I  lwage II
--------------------------------
Intercept      -0.421   -0.421
(0.317)  (0.318)
exper          0.040*** 0.040***
(0.013)  (0.015)
expersq        -0.001*  -0.001*
(0.000)  (0.000)
educ           0.108*** 0.108***
(0.014)  (0.014)
age            -0.001   -0.001
(0.005)  (0.006)
kidslt6        -0.061   -0.061
(0.089)  (0.106)
kidsge6        -0.015   -0.015
(0.028)  (0.029)
R-squared      0.158    0.158
N              428      428
R2             0.158    0.158
================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01

print(lwage_hetr.f_test('(kidslt6=kidsge6=age=0)'))


linearHypothesis(lwage_hetr, c("kidslt6=0", "kidsge6=0", "age=0"))

test kidsge6 kidslt6 age

<F test: F=0.23702385952852514, p=0.8705394717167727, df_denom=421, df_num=3>

uhat_rst = smf.ols('lwage ~ exper + expersq + educ', data=df).fit().resid
uhat_reg = smf.ols('uhat_rst ~ exper + expersq + educ + age + kidslt6 + kidsge6', data=df.dropna()).fit()
print(uhat_reg.summary())


uhat_rst <- resid(lm(lwage ~ exper + expersq + educ, data=mroz))
summary(uhat_reg <- lm(uhat_rst ~ exper + expersq + educ + age + kidslt6 + kidsge6,
data=subset(mroz, !is.na(wage))))

reg lwage exper expersq educ
predict u_r, residual
reg u_r exper expersq educ age kidslt6 kidsge6

                            OLS Regression Results
==============================================================================
Dep. Variable:               uhat_rst   R-squared:                       0.002
Method:                 Least Squares   F-statistic:                    0.1185
Date:                Mon, 11 Dec 2023   Prob (F-statistic):              0.994
Time:                        22:32:29   Log-Likelihood:                -431.24
No. Observations:                 428   AIC:                             876.5
Df Residuals:                     421   BIC:                             904.9
Df Model:                           6
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.1011      0.317      0.319      0.750      -0.522       0.724
exper         -0.0017      0.013     -0.130      0.896      -0.028       0.025
expersq     2.996e-05      0.000      0.074      0.941      -0.001       0.001
educ           0.0003      0.014      0.024      0.981      -0.028       0.029
age           -0.0015      0.005     -0.277      0.782      -0.012       0.009
kidslt6       -0.0607      0.089     -0.684      0.494      -0.235       0.114
kidsge6       -0.0146      0.028     -0.523      0.601      -0.069       0.040
==============================================================================
Omnibus:                       76.463   Durbin-Watson:                   1.963
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              301.786
Skew:                          -0.732   Prob(JB):                     2.94e-66
Kurtosis:                       6.844   Cond. No.                     3.54e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 3.54e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

LM1 = uhat_reg.nobs * uhat_reg.rsquared
LM1


LM <- (summary(uhat_reg)$r.squared) * (nobs(uhat_reg)) LM  display "LM =" e(r2)*e(N)  0.7216757067709283  import scipy.stats as ss ss.chi2.sf(LM1, 3)  pchisq(LM, df=3, lower.tail = FALSE)   di chi2tail(3, e(r2)*e(N))  0.8680941471323707  ### Example 4.1. … LM2 Continued on pp.65# uhat_rst = smf.ols('lwage ~ exper + expersq + educ', data=df).fit().resid df['u_age'] = smf.ols('age~educ+exper+expersq', data=df.dropna()).fit().resid*uhat_rst df['u_kidslt6'] = smf.ols('kidslt6~educ+exper+expersq', data=df.dropna()).fit().resid*uhat_rst df['u_kidsge6'] = smf.ols('kidsge6~educ+exper+expersq', data=df.dropna()).fit().resid*uhat_rst df['one'] = 1 #Generates a vector of 1s. one_r = smf.ols('one ~ u_age + u_kidslt6 + u_kidsge6 + 0', data=df.dropna()).fit() print(one_r.summary())  uhat_rst <- resid(lm(lwage ~ exper + expersq + educ, data=mroz)) #u from the restricted model u_age <- resid(lm(age~educ+exper+expersq, data=subset(mroz, !is.na(wage))))*uhat_rst u_kidslt6 <- resid(lm(kidslt6~educ+exper+expersq, data=subset(mroz, !is.na(wage))))*uhat_rst u_kidsge6 <- resid(lm(kidsge6~educ+exper+expersq, data=subset(mroz, !is.na(wage))))*uhat_rst udata <- data.frame(cbind(u_age, u_kidslt6, u_kidsge6)) udata['one'] = 1 #Generates a vector of 1s. summary(one_r <- lm(one ~ u_age + u_kidslt6 + u_kidsge6 + 0, data=udata))  foreach x of var age kidslt kidsg{ reg x' exper* edu predict r_x', residual gen uresx'= u_r*r_x' } gen one=1 reg one ures*, noc   OLS Regression Results ======================================================================================= Dep. Variable: one R-squared (uncentered): 0.001 Model: OLS Adj. R-squared (uncentered): -0.006 Method: Least Squares F-statistic: 0.1696 Date: Mon, 11 Dec 2023 Prob (F-statistic): 0.917 Time: 22:32:29 Log-Likelihood: -607.05 No. Observations: 428 AIC: 1220. Df Residuals: 425 BIC: 1232. Df Model: 3 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ u_age -0.0025 0.011 -0.234 0.815 -0.024 0.019 u_kidslt6 -0.0905 0.169 -0.535 0.593 -0.423 0.242 u_kidsge6 -0.0267 0.060 -0.443 0.658 -0.145 0.092 ============================================================================== Omnibus: 177.849 Durbin-Watson: 0.002 Prob(Omnibus): 0.000 Jarque-Bera (JB): 7632.953 Skew: 1.016 Prob(JB): 0.00 Kurtosis: 23.588 Cond. No. 17.3 ============================================================================== Notes: [1] R² is computed without centering (uncentered) since the model does not contain a constant. [2] Standard Errors assume that the covariance matrix of the errors is correctly specified.  LM2 = one_r.nobs - one_r.ssr LM2  LM2 <- (nobs(one_r))-sum(resid(one_r)^2) LM2  display "LM2 = N-SSRo =" e(N)-e(rss)  0.511880062357136  ss.chi2.sf(LM2, 3)  pchisq(LM2, df=3, lower.tail = FALSE)  di chi2tail(3, e(N)-e(rss))  0.9162737684759361  ### Example 4.3 Using IQ as a Proxy for Ability# df=pd.read_csv("nls80.csv") print(smf.ols('lwage ~ exper + tenure + married + south + urban + black + educ', data=df).fit().summary())  nls80 <- read_dta("nls80.dta") summary(lm(lwage ~ exper + tenure + married + south + urban + black + educ, data=nls80))  bcuse nls80, clear nodesc reg lwage exper tenure married south urban black educ   OLS Regression Results ============================================================================== Dep. Variable: lwage R-squared: 0.253 Model: OLS Adj. R-squared: 0.247 Method: Least Squares F-statistic: 44.75 Date: Mon, 11 Dec 2023 Prob (F-statistic): 1.16e-54 Time: 22:32:29 Log-Likelihood: -381.55 No. Observations: 935 AIC: 779.1 Df Residuals: 927 BIC: 817.8 Df Model: 7 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 5.3955 0.113 47.653 0.000 5.173 5.618 exper 0.0140 0.003 4.409 0.000 0.008 0.020 tenure 0.0117 0.002 4.789 0.000 0.007 0.017 married 0.1994 0.039 5.107 0.000 0.123 0.276 south -0.0909 0.026 -3.463 0.001 -0.142 -0.039 urban 0.1839 0.027 6.822 0.000 0.131 0.237 black -0.1883 0.038 -5.000 0.000 -0.262 -0.114 educ 0.0654 0.006 10.468 0.000 0.053 0.078 ============================================================================== Omnibus: 38.227 Durbin-Watson: 1.823 Prob(Omnibus): 0.000 Jarque-Bera (JB): 83.390 Skew: -0.224 Prob(JB): 7.80e-19 Kurtosis: 4.393 Cond. No. 187. ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.  print(smf.ols('lwage ~ exper + tenure + married + south + urban + black + educ + iq', data=df).fit().summary())  summary(lm(lwage ~ exper + tenure + married + south + urban + black + educ + iq, data=nls80))  reg lwage exper tenure married south urban black educ iq   OLS Regression Results ============================================================================== Dep. Variable: lwage R-squared: 0.263 Model: OLS Adj. R-squared: 0.256 Method: Least Squares F-statistic: 41.27 Date: Mon, 11 Dec 2023 Prob (F-statistic): 1.52e-56 Time: 22:32:29 Log-Likelihood: -375.09 No. Observations: 935 AIC: 768.2 Df Residuals: 926 BIC: 811.7 Df Model: 8 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ Intercept 5.1764 0.128 40.441 0.000 4.925 5.428 exper 0.0141 0.003 4.469 0.000 0.008 0.020 tenure 0.0114 0.002 4.671 0.000 0.007 0.016 married 0.1998 0.039 5.148 0.000 0.124 0.276 south -0.0802 0.026 -3.054 0.002 -0.132 -0.029 urban 0.1819 0.027 6.791 0.000 0.129 0.235 black -0.1431 0.039 -3.624 0.000 -0.221 -0.066 educ 0.0544 0.007 7.853 0.000 0.041 0.068 iq 0.0036 0.001 3.589 0.000 0.002 0.006 ============================================================================== Omnibus: 43.456 Durbin-Watson: 1.820 Prob(Omnibus): 0.000 Jarque-Bera (JB): 99.739 Skew: -0.248 Prob(JB): 2.20e-22 Kurtosis: 4.521 Cond. No. 1.13e+03 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The condition number is large, 1.13e+03. This might indicate that there are strong multicollinearity or other numerical problems.  ### Example 4.4 Effects of Job Training Grants on Worker Productivity# df = dataWoo("jtrain") df = df[(df['year']==1988)] print(smf.ols('lscrap ~ grant', data=df).fit().summary())  summary(lm(lscrap ~ grant, data=subset(jtrain, jtrain$year==1988)))

use jtrain1, clear
regress lscrap grant if year==1988

                            OLS Regression Results
==============================================================================
Dep. Variable:                 lscrap   R-squared:                       0.000
Method:                 Least Squares   F-statistic:                   0.01948
Date:                Mon, 11 Dec 2023   Prob (F-statistic):              0.890
Time:                        22:32:29   Log-Likelihood:                -94.660
No. Observations:                  54   AIC:                             193.3
Df Residuals:                      52   BIC:                             197.3
Df Model:                           1
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.4085      0.241      1.698      0.095      -0.074       0.891
grant          0.0566      0.406      0.140      0.890      -0.757       0.870
==============================================================================
Omnibus:                        0.074   Durbin-Watson:                   1.931
Prob(Omnibus):                  0.963   Jarque-Bera (JB):                0.086
Skew:                          -0.067   Prob(JB):                        0.958
Kurtosis:                       2.859   Cond. No.                         2.42
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

print(smf.ols('lscrap ~ grant+lscrap_1', data=df).fit().summary())

summary(lm(lscrap ~ grant + lscrap_1, data=subset(jtrain, jtrain\$year==1988)))

regress lscrap grant lscrap_1 if year==1988

                            OLS Regression Results
==============================================================================
Dep. Variable:                 lscrap   R-squared:                       0.873
Method:                 Least Squares   F-statistic:                     174.9
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           1.47e-23
Time:                        22:32:29   Log-Likelihood:                -39.000
No. Observations:                  54   AIC:                             84.00
Df Residuals:                      51   BIC:                             89.97
Df Model:                           2
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0212      0.089      0.238      0.813      -0.158       0.200
grant         -0.2540      0.147     -1.727      0.090      -0.549       0.041
lscrap_1       0.8312      0.044     18.701      0.000       0.742       0.920
==============================================================================
Omnibus:                       13.769   Durbin-Watson:                   1.541
Prob(Omnibus):                  0.001   Jarque-Bera (JB):               35.075
Skew:                          -0.526   Prob(JB):                     2.42e-08
Kurtosis:                       6.805   Cond. No.                         3.95
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


df=pd.read_csv("nls80.csv")
nls_reg = smf.ols('lwage ~ exper + tenure + married + south + urban + black + educ*iq', data=df).fit()
print(nls_reg.summary())

summary(lwage_rg <- lm(lwage ~ exper + tenure + married + south + urban + black + educ*iq, data=nls80)  )

bcuse nls80, clear  nodesc
reg lwage exper tenure married south urban black educ iq c.educ#c.iq

                            OLS Regression Results
==============================================================================
Dep. Variable:                  lwage   R-squared:                       0.263
Method:                 Least Squares   F-statistic:                     36.76
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           6.85e-56
Time:                        22:32:29   Log-Likelihood:                -374.69
No. Observations:                 935   AIC:                             769.4
Df Residuals:                     925   BIC:                             817.8
Df Model:                           9
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      5.6482      0.546     10.339      0.000       4.576       6.720
exper          0.0139      0.003      4.378      0.000       0.008       0.020
tenure         0.0114      0.002      4.670      0.000       0.007       0.016
married        0.2009      0.039      5.173      0.000       0.125       0.277
south         -0.0802      0.026     -3.056      0.002      -0.132      -0.029
urban          0.1836      0.027      6.835      0.000       0.131       0.236
black         -0.1467      0.040     -3.695      0.000      -0.225      -0.069
educ           0.0185      0.041      0.449      0.653      -0.062       0.099
iq            -0.0009      0.005     -0.182      0.855      -0.011       0.009
educ:iq        0.0003      0.000      0.888      0.375      -0.000       0.001
==============================================================================
Omnibus:                       43.073   Durbin-Watson:                   1.822
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               98.942
Skew:                          -0.245   Prob(JB):                     3.27e-22
Kurtosis:                       4.517   Cond. No.                     6.62e+04
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 6.62e+04. This might indicate that there are
strong multicollinearity or other numerical problems.

print(nls_reg.f_test('(educ:iq=iq=0)'))

linearHypothesis(lwage_rg, c("educ:iq =0", "iq=0"))

test iq c.educ#c.iq

<F test: F=6.83181678824927, p=0.0011341745453863027, df_denom=925, df_num=2>
`