# Chapter 7. Systems of Equations#

## Examples#

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

from wooldridge import *


### Example 7.3 - SUR System for Wages and Fringe Benefits#

df=dataWoo("fringe")

reg1 = smf.ols(
'hrearn ~ educ + exper + expersq + tenure +  tenuresq + union + south + nrtheast + nrthcen + married + white  + male',
data=df).fit()
reg2 = smf.ols(
'hrbens ~ educ + exper + expersq + tenure +  tenuresq + union + south + nrtheast + nrthcen + married + white  + male',
data=df).fit()
print(reg1.summary())
print(reg2.summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                 hrearn   R-squared:                       0.205
Model:                            OLS   Adj. R-squared:                  0.189
Method:                 Least Squares   F-statistic:                     12.96
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           7.45e-24
Time:                        22:32:52   Log-Likelihood:                -1773.8
No. Observations:                 616   AIC:                             3574.
Df Residuals:                     603   BIC:                             3631.
Df Model:                          12
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.6321      1.228     -2.143      0.033      -5.044      -0.220
educ           0.4588      0.069      6.637      0.000       0.323       0.595
exper         -0.0758      0.057     -1.323      0.186      -0.188       0.037
expersq        0.0040      0.001      3.391      0.001       0.002       0.006
tenure         0.1101      0.084      1.314      0.190      -0.055       0.275
tenuresq      -0.0051      0.003     -1.547      0.122      -0.012       0.001
union          0.8080      0.408      1.981      0.048       0.007       1.609
south         -0.4566      0.552     -0.828      0.408      -1.540       0.627
nrtheast      -1.1508      0.606     -1.900      0.058      -2.340       0.039
nrthcen       -0.6363      0.556     -1.144      0.253      -1.728       0.456
married        0.6424      0.418      1.538      0.125      -0.178       1.463
white          1.1409      0.612      1.864      0.063      -0.061       2.343
male           1.7847      0.398      4.484      0.000       1.003       2.566
==============================================================================
Omnibus:                      930.155   Durbin-Watson:                   1.919
Prob(Omnibus):                  0.000   Jarque-Bera (JB):           356331.138
Skew:                           8.292   Prob(JB):                         0.00
Kurtosis:                     119.653   Cond. No.                     5.89e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 5.89e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
OLS Regression Results
==============================================================================
Dep. Variable:                 hrbens   R-squared:                       0.399
Model:                            OLS   Adj. R-squared:                  0.387
Method:                 Least Squares   F-statistic:                     33.32
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           5.68e-59
Time:                        22:32:52   Log-Likelihood:                -465.61
No. Observations:                 616   AIC:                             957.2
Df Residuals:                     603   BIC:                             1015.
Df Model:                          12
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.8897      0.147     -6.058      0.000      -1.178      -0.601
educ           0.0768      0.008      9.290      0.000       0.061       0.093
exper          0.0226      0.007      3.291      0.001       0.009       0.036
expersq       -0.0005      0.000     -3.360      0.001      -0.001      -0.000
tenure         0.0536      0.010      5.344      0.000       0.034       0.073
tenuresq      -0.0012      0.000     -2.970      0.003      -0.002      -0.000
union          0.3659      0.049      7.503      0.000       0.270       0.462
south         -0.0227      0.066     -0.344      0.731      -0.152       0.107
nrtheast      -0.0567      0.072     -0.783      0.434      -0.199       0.086
nrthcen       -0.0380      0.066     -0.571      0.568      -0.169       0.093
married        0.0579      0.050      1.158      0.247      -0.040       0.156
white          0.0902      0.073      1.232      0.218      -0.054       0.234
male           0.2683      0.048      5.638      0.000       0.175       0.362
==============================================================================
Omnibus:                       16.718   Durbin-Watson:                   1.227
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               17.380
Skew:                           0.387   Prob(JB):                     0.000168
Kurtosis:                       3.282   Cond. No.                     5.89e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 5.89e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

residuals_corr = np.corrcoef(reg1.resid, reg2.resid)[0, 1]
print("Correlation of residuals:", round(residuals_corr,2))

Correlation of residuals: 0.32

hypothesis = '(white = 0), (married = 0)'
reg2.f_test(hypothesis)

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=1.5430432890309778, p=0.21457238287953384, df_denom=603, df_num=2>


### 7.2 Example 7.7 Effects of Job Training Grants on Firm Scrap Rates#

df=dataWoo("jtrain")

print(smf.ols('lscrap ~ d88 + d89 + grant + grant_1', data=df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                 lscrap   R-squared:                       0.017
Model:                            OLS   Adj. R-squared:                 -0.008
Method:                 Least Squares   F-statistic:                    0.6914
Date:                Mon, 11 Dec 2023   Prob (F-statistic):              0.599
Time:                        22:32:52   Log-Likelihood:                -292.17
No. Observations:                 162   AIC:                             594.3
Df Residuals:                     157   BIC:                             609.8
Df Model:                           4
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.5974      0.203      2.942      0.004       0.196       0.999
d88           -0.2394      0.311     -0.770      0.442      -0.853       0.375
d89           -0.4965      0.338     -1.469      0.144      -1.164       0.171
grant          0.2000      0.338      0.591      0.555      -0.468       0.868
grant_1        0.0489      0.436      0.112      0.911      -0.812       0.910
==============================================================================
Omnibus:                        4.667   Durbin-Watson:                   0.805
Prob(Omnibus):                  0.097   Jarque-Bera (JB):                4.300
Skew:                          -0.313   Prob(JB):                        0.116
Kurtosis:                       3.496   Cond. No.                         5.06
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


### Example 7.8 - Effect of Being in Season on Grade Point Average#

df=dataWoo("gpa3")

print(smf.ols(
'trmgpa ~ spring + cumgpa + crsgpa + frstsem + season + sat + verbmath + hsperc + hssize + black + female',
data=df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                 trmgpa   R-squared:                       0.519
Model:                            OLS   Adj. R-squared:                  0.512
Method:                 Least Squares   F-statistic:                     70.64
Date:                Mon, 11 Dec 2023   Prob (F-statistic):          1.20e-106
Time:                        22:32:52   Log-Likelihood:                -567.69
No. Observations:                 732   AIC:                             1159.
Df Residuals:                     720   BIC:                             1215.
Df Model:                          11
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.0676      0.338     -6.115      0.000      -2.731      -1.404
spring        -0.0122      0.046     -0.262      0.794      -0.103       0.079
cumgpa         0.3146      0.040      7.770      0.000       0.235       0.394
crsgpa         0.9840      0.096     10.247      0.000       0.795       1.173
frstsem        0.7691      0.120      6.387      0.000       0.533       1.006
season        -0.0463      0.047     -0.982      0.326      -0.139       0.046
sat            0.0014      0.000      9.628      0.000       0.001       0.002
verbmath      -0.1126      0.131     -0.862      0.389      -0.369       0.144
hsperc        -0.0066      0.001     -6.475      0.000      -0.009      -0.005
hssize     -5.761e-05   9.94e-05     -0.580      0.562      -0.000       0.000
black         -0.2313      0.054     -4.257      0.000      -0.338      -0.125
female         0.2856      0.051      5.603      0.000       0.185       0.386
==============================================================================
Omnibus:                       19.611   Durbin-Watson:                   1.751
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               21.685
Skew:                          -0.348   Prob(JB):                     1.96e-05
Kurtosis:                       3.475   Cond. No.                     1.73e+04
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.73e+04. This might indicate that there are
strong multicollinearity or other numerical problems.

dfp = pd.DataFrame(df.set_index(['id', 'term']))

from linearmodels import PooledOLS

pool1 = PooledOLS.from_formula(
'trmgpa ~ 1+ spring + cumgpa + crsgpa + frstsem + season + sat + verbmath + hsperc + hssize + black + female',
data=dfp).fit()
print(pool1.summary)

                          PooledOLS Estimation Summary
================================================================================
Dep. Variable:                 trmgpa   R-squared:                        0.5191
Estimator:                  PooledOLS   R-squared (Between):              0.6365
No. Observations:                 732   R-squared (Within):              -0.0105
Date:                Mon, Dec 11 2023   R-squared (Overall):              0.5191
Time:                        22:32:53   Log-likelihood                   -567.69
F-statistic:                      70.641
Entities:                         366   P-value                           0.0000
Avg Obs:                       2.0000   Distribution:                  F(11,720)
Min Obs:                       2.0000
Max Obs:                       2.0000   F-statistic (robust):             70.641
P-value                           0.0000
Time periods:                       2   Distribution:                  F(11,720)
Avg Obs:                       366.00
Min Obs:                       366.00
Max Obs:                       366.00

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept     -2.0676     0.3381    -6.1153     0.0000     -2.7314     -1.4038
spring        -0.0122     0.0465    -0.2615     0.7938     -0.1034      0.0791
cumgpa         0.3146     0.0405     7.7699     0.0000      0.2351      0.3941
crsgpa         0.9840     0.0960     10.247     0.0000      0.7955      1.1726
frstsem        0.7691     0.1204     6.3872     0.0000      0.5327      1.0055
season        -0.0463     0.0471    -0.9823     0.3263     -0.1387      0.0462
sat            0.0014     0.0001     9.6282     0.0000      0.0011      0.0017
verbmath      -0.1126     0.1306    -0.8622     0.3889     -0.3690      0.1438
hsperc        -0.0066     0.0010    -6.4750     0.0000     -0.0086     -0.0046
hssize     -5.761e-05  9.937e-05    -0.5798     0.5623     -0.0003      0.0001
black         -0.2313     0.0543    -4.2567     0.0000     -0.3380     -0.1246
female         0.2856     0.0510     5.6030     0.0000      0.1855      0.3856
==============================================================================


### Example 7.9 Athletes’ Grade Point Averages, continued#

df=dataWoo("gpa3")

df['uhat'] = smf.ols(
'trmgpa ~ spring + cumgpa + crsgpa + frstsem + season + sat + verbmath + hsperc + hssize + black + female',
data=df).fit().resid

df['uhat_1'] = df.groupby('id')['uhat'].shift(1) # generates lagged variable

print(smf.ols(
'trmgpa ~ cumgpa + crsgpa + season + sat + verbmath + hsperc + hssize + black + female + uhat_1',
data=df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                 trmgpa   R-squared:                       0.616
Model:                            OLS   Adj. R-squared:                  0.605
Method:                 Least Squares   F-statistic:                     56.83
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           1.31e-67
Time:                        22:32:53   Log-Likelihood:                -249.22
No. Observations:                 366   AIC:                             520.4
Df Residuals:                     355   BIC:                             563.4
Df Model:                          10
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -2.2663      0.425     -5.337      0.000      -3.101      -1.431
cumgpa         0.3489      0.072      4.843      0.000       0.207       0.491
crsgpa         1.0006      0.118      8.503      0.000       0.769       1.232
season        -0.0271      0.058     -0.468      0.640      -0.141       0.087
sat            0.0014      0.000      7.094      0.000       0.001       0.002
verbmath      -0.1137      0.170     -0.668      0.505      -0.449       0.221
hsperc        -0.0050      0.001     -3.495      0.001      -0.008      -0.002
hssize     -8.435e-05      0.000     -0.655      0.513      -0.000       0.000
black         -0.2407      0.071     -3.406      0.001      -0.380      -0.102
female         0.2919      0.073      3.985      0.000       0.148       0.436
uhat_1         0.1942      0.061      3.173      0.002       0.074       0.315
==============================================================================
Omnibus:                       31.529   Durbin-Watson:                   1.955
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               47.785
Skew:                          -0.585   Prob(JB):                     4.20e-11
Kurtosis:                       4.329   Cond. No.                     1.67e+04
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.67e+04. This might indicate that there are
strong multicollinearity or other numerical problems.

print(smf.ols('uhat ~  uhat_1', data=df).fit().summary())

                            OLS Regression Results
==============================================================================
Dep. Variable:                   uhat   R-squared:                       0.057
Model:                            OLS   Adj. R-squared:                  0.055
Method:                 Least Squares   F-statistic:                     22.04
Date:                Mon, 11 Dec 2023   Prob (F-statistic):           3.79e-06
Time:                        22:32:53   Log-Likelihood:                -250.10
No. Observations:                 366   AIC:                             504.2
Df Residuals:                     364   BIC:                             512.0
Df Model:                           1
Covariance Type:            nonrobust
==============================================================================
coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   1.034e-14      0.025   4.12e-13      1.000      -0.049       0.049
uhat_1         0.2122      0.045      4.695      0.000       0.123       0.301
==============================================================================
Omnibus:                       29.996   Durbin-Watson:                   1.937
Prob(Omnibus):                  0.000   Jarque-Bera (JB):               43.958
Skew:                          -0.574   Prob(JB):                     2.85e-10
Kurtosis:                       4.251   Cond. No.                         1.80
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.