# Chapter 10. Basic Linear Unobserved Effects Panel Data Models#

import pandas as pd
import statsmodels.formula.api as smf

from wooldridge import *


## Example 10.4#

df = dataWoo("jtrain")
dfp= pd.DataFrame(df.set_index(['fcode', 'year'], inplace=True))
from linearmodels import RandomEffects
re1 = RandomEffects.from_formula('lscrap ~1 + d88 + d89 + union + grant + grant_1',
data=df.dropna(subset=['lscrap'])).fit()
print(re1)

                        RandomEffects Estimation Summary
================================================================================
Dep. Variable:                 lscrap   R-squared:                        0.1486
Estimator:              RandomEffects   R-squared (Between):              0.0184
No. Observations:                 162   R-squared (Within):               0.2005
Date:                Mon, Dec 11 2023   R-squared (Overall):              0.0349
Time:                        21:15:00   Log-likelihood                   -113.26
F-statistic:                      5.4474
Entities:                          54   P-value                           0.0001
Avg Obs:                       3.0000   Distribution:                   F(5,156)
Min Obs:                       3.0000
Max Obs:                       3.0000   F-statistic (robust):             5.4474
P-value                           0.0001
Time periods:                       3   Distribution:                   F(5,156)
Avg Obs:                       54.000
Min Obs:                       54.000
Max Obs:                       54.000

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.4148     0.2458     1.6878     0.0935     -0.0707      0.9003
d88           -0.0931     0.1086    -0.8569     0.3928     -0.3076      0.1215
d89           -0.2692     0.1310    -2.0544     0.0416     -0.5280     -0.0104
union          0.5478     0.4149     1.3204     0.1886     -0.2717      1.3674
grant         -0.2158     0.1471    -1.4669     0.1444     -0.5064      0.0748
grant_1       -0.3784     0.2044    -1.8506     0.0661     -0.7822      0.0255
==============================================================================

print(re1.wald_test(formula='grant = grant_1 = 0'))

Linear Equality Hypothesis Test
H0: Linear equality constraint is valid
Statistic: 3.7246
P-value: 0.1553
Distributed: chi2(2)


## Example 10.5#

from linearmodels import PanelOLS
fe1 = PanelOLS.from_formula('lscrap ~1 + d88 + d89 + grant + grant_1 + EntityEffects',
data=df.dropna(subset=['lscrap'])).fit()
print(fe1)

                          PanelOLS Estimation Summary
================================================================================
Dep. Variable:                 lscrap   R-squared:                        0.2010
Estimator:                   PanelOLS   R-squared (Between):             -0.0177
No. Observations:                 162   R-squared (Within):               0.2010
Date:                Mon, Dec 11 2023   R-squared (Overall):              0.0021
Time:                        21:15:00   Log-likelihood                   -80.946
F-statistic:                      6.5426
Entities:                          54   P-value                           0.0001
Avg Obs:                       3.0000   Distribution:                   F(4,104)
Min Obs:                       3.0000
Max Obs:                       3.0000   F-statistic (robust):             6.5426
P-value                           0.0001
Time periods:                       3   Distribution:                   F(4,104)
Avg Obs:                       54.000
Min Obs:                       54.000
Max Obs:                       54.000

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.5974     0.0677     8.8202     0.0000      0.4631      0.7318
d88           -0.0802     0.1095    -0.7327     0.4654     -0.2973      0.1369
d89           -0.2472     0.1332    -1.8556     0.0663     -0.5114      0.0170
grant         -0.2523     0.1506    -1.6751     0.0969     -0.5510      0.0464
grant_1       -0.4216     0.2102    -2.0057     0.0475     -0.8384     -0.0048
==============================================================================

F-test for Poolability: 24.661
P-value: 0.0000
Distribution: F(53,104)

Included effects: Entity

print(fe1.wald_test(formula='grant = grant_1 = 0'))

Linear Equality Hypothesis Test
H0: Linear equality constraint is valid
Statistic: 4.4588
P-value: 0.1076
Distributed: chi2(2)


## Example 10.5+#

# robust hetero cov
fe1_r = PanelOLS.from_formula('lscrap ~ + d88 + d89 + grant + grant_1 + EntityEffects',
data=df.dropna(subset=['lscrap'])).fit(cov_type='clustered',cluster_entity=True)
print(fe1_r)

                          PanelOLS Estimation Summary
================================================================================
Dep. Variable:                 lscrap   R-squared:                        0.2010
Estimator:                   PanelOLS   R-squared (Between):             -0.1103
No. Observations:                 162   R-squared (Within):               0.2010
Date:                Mon, Dec 11 2023   R-squared (Overall):             -0.0839
Time:                        21:15:00   Log-likelihood                   -80.946
Cov. Estimator:             Clustered
F-statistic:                      6.5426
Entities:                          54   P-value                           0.0001
Avg Obs:                       3.0000   Distribution:                   F(4,104)
Min Obs:                       3.0000
Max Obs:                       3.0000   F-statistic (robust):             7.2045
P-value                           0.0000
Time periods:                       3   Distribution:                   F(4,104)
Avg Obs:                       54.000
Min Obs:                       54.000
Max Obs:                       54.000

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
d88           -0.0802     0.0969    -0.8276     0.4098     -0.2724      0.1120
d89           -0.2472     0.1949    -1.2681     0.2076     -0.6338      0.1394
grant         -0.2523     0.1421    -1.7757     0.0787     -0.5341      0.0295
grant_1       -0.4216     0.2798    -1.5067     0.1349     -0.9765      0.1333
==============================================================================

F-test for Poolability: 24.661
P-value: 0.0000
Distribution: F(53,104)

Included effects: Entity


## Example 10.6#

df=df.dropna(subset=['clscrap'])
from linearmodels import PooledOLS
df1 = PooledOLS.from_formula ('clscrap ~ 1+ d89 + cgrant + cgrant_1',
data=df).fit() # Using differenced variables from the dataset
print(df1)

                          PooledOLS Estimation Summary
================================================================================
Dep. Variable:                clscrap   R-squared:                        0.0365
Estimator:                  PooledOLS   R-squared (Between):              0.0492
No. Observations:                 108   R-squared (Within):               0.0153
Date:                Mon, Dec 11 2023   R-squared (Overall):              0.0365
Time:                        21:15:00   Log-likelihood                   -91.764
F-statistic:                      1.3139
Entities:                          54   P-value                           0.2739
Avg Obs:                       2.0000   Distribution:                   F(3,104)
Min Obs:                       2.0000
Max Obs:                       2.0000   F-statistic (robust):             1.3139
P-value                           0.2739
Time periods:                       2   Distribution:                   F(3,104)
Avg Obs:                       54.000
Min Obs:                       54.000
Max Obs:                       54.000

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept     -0.0906     0.0910    -0.9960     0.3216     -0.2710      0.0898
d89           -0.0962     0.1254    -0.7669     0.4449     -0.3450      0.1526
cgrant        -0.2228     0.1307    -1.7040     0.0914     -0.4820      0.0365
cgrant_1      -0.3512     0.2351    -1.4941     0.1382     -0.8174      0.1149
==============================================================================

#Alternativelly, by generating first differenced variables
df = dataWoo("jtrain")
dfp= pd.DataFrame(df.set_index(['fcode', 'year'], inplace=True))

df['dgrant']= df['grant'] - df['grant'].shift(1)
df['dgrant_1'] = df['grant_1'] - df['grant_1'].shift(1)
df['dlscrap']= df['lscrap'] - df['lscrap'].shift(1)
from linearmodels import PooledOLS
fd2 = PooledOLS.from_formula('dlscrap ~ 1+ d89 + dgrant + dgrant_1',
data=df.dropna(subset=['clscrap'])).fit()
print(fd2)

                          PooledOLS Estimation Summary
================================================================================
Dep. Variable:                dlscrap   R-squared:                        0.0365
Estimator:                  PooledOLS   R-squared (Between):              0.0492
No. Observations:                 108   R-squared (Within):               0.0153
Date:                Mon, Dec 11 2023   R-squared (Overall):              0.0365
Time:                        21:15:00   Log-likelihood                   -91.764
F-statistic:                      1.3139
Entities:                          54   P-value                           0.2739
Avg Obs:                       2.0000   Distribution:                   F(3,104)
Min Obs:                       2.0000
Max Obs:                       2.0000   F-statistic (robust):             1.3139
P-value                           0.2739
Time periods:                       2   Distribution:                   F(3,104)
Avg Obs:                       54.000
Min Obs:                       54.000
Max Obs:                       54.000

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept     -0.0906     0.0910    -0.9960     0.3216     -0.2710      0.0898
d89           -0.0962     0.1254    -0.7669     0.4449     -0.3450      0.1526
dgrant        -0.2228     0.1307    -1.7040     0.0914     -0.4820      0.0365
dgrant_1      -0.3512     0.2351    -1.4941     0.1382     -0.8174      0.1149
==============================================================================

print(fd2.wald_test(formula='dgrant = dgrant_1 = 0'))

Linear Equality Hypothesis Test
H0: Linear equality constraint is valid
Statistic: 3.0591
P-value: 0.2166
Distributed: chi2(2)

fd2 = PooledOLS.from_formula('dlscrap ~ 1+ d89 + dgrant + dgrant_1',
data=df.dropna(subset=['clscrap'])).fit(cov_type='clustered')
print(fd2)

                          PooledOLS Estimation Summary
================================================================================
Dep. Variable:                dlscrap   R-squared:                        0.0365
Estimator:                  PooledOLS   R-squared (Between):              0.0492
No. Observations:                 108   R-squared (Within):               0.0153
Date:                Mon, Dec 11 2023   R-squared (Overall):              0.0365
Time:                        21:15:00   Log-likelihood                   -91.764
Cov. Estimator:             Clustered
F-statistic:                      1.3139
Entities:                          54   P-value                           0.2739
Avg Obs:                       2.0000   Distribution:                   F(3,104)
Min Obs:                       2.0000
Max Obs:                       2.0000   F-statistic (robust):             1.9549
P-value                           0.1254
Time periods:                       2   Distribution:                   F(3,104)
Avg Obs:                       54.000
Min Obs:                       54.000
Max Obs:                       54.000

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept     -0.0906     0.0847    -1.0698     0.2872     -0.2586      0.0773
d89           -0.0962     0.1285    -0.7489     0.4556     -0.3510      0.1585
dgrant        -0.2228     0.1239    -1.7986     0.0750     -0.4684      0.0228
dgrant_1      -0.3512     0.2477    -1.4179     0.1592     -0.8425      0.1400
==============================================================================


## Example 10.6 (continued)#

df['uhat']=fd2.resids
df['uhat_1']=df['uhat'].shift(1)

print(PooledOLS.from_formula('uhat~1+uhat_1',
data=df.dropna(subset=['uhat','uhat_1'])).fit())

                          PooledOLS Estimation Summary
================================================================================
Dep. Variable:                   uhat   R-squared:                        0.0562
Estimator:                  PooledOLS   R-squared (Between):              0.0562
No. Observations:                  54   R-squared (Within):               0.0000
Date:                Mon, Dec 11 2023   R-squared (Overall):              0.0562
Time:                        21:15:00   Log-likelihood                   -44.302
F-statistic:                      3.0963
Entities:                          54   P-value                           0.0844
Avg Obs:                       1.0000   Distribution:                    F(1,52)
Min Obs:                       1.0000
Max Obs:                       1.0000   F-statistic (robust):             3.0963
P-value                           0.0844
Time periods:                       1   Distribution:                    F(1,52)
Avg Obs:                       54.000
Min Obs:                       54.000
Max Obs:                       54.000

Parameter Estimates
==============================================================================
Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept   4.184e-17     0.0762  5.489e-16     1.0000     -0.1529      0.1529
uhat_1         0.2369     0.1346     1.7596     0.0844     -0.0333      0.5071
==============================================================================