# @title
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
from statsmodels.stats.diagnostic import het_breuschpagan, acorr_breusch_godfrey
from statsmodels.stats.stattools import jarque_bera
from statsmodels.stats.sandwich_covariance import cov_hac
import yfinance as yf

# @title
# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Set plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

# @title
# Define tickers
stock_ticker = "NVDA"
market_ticker = "^GSPC"  # S&P 500
gold_ticker = "GC=F"      # Gold Futures in USD

# Download 5 years of historical data
print("Downloading data...")
data = yf.download([stock_ticker, market_ticker, gold_ticker],
                   start="2019-01-09",
                   end="2024-01-09",
                   group_by='ticker')  # Group by ticker

print("✓ Data downloaded!")
print("\nData structure:")
print(data.head())

# Extract adjusted close prices for each ticker
prices = pd.DataFrame()
prices['NVDA'] = data[stock_ticker]['Close']
prices['SNP500'] = data[market_ticker]['Close']
prices['Gold'] = data[gold_ticker]['Close']

# Remove any rows with missing values
prices = prices.dropna()


print(f"Date range: {prices.index[0]} to {prices.index[-1]}")
print(f"Total observations: {len(prices)}")
print("\nFirst few rows of prices:")
print(prices.head())
print("\nLast few rows of prices:")
print(prices.tail())
print("\nMissing values:")
print(prices.isnull().sum())

Downloading data...

/tmp/ipython-input-457142551.py:8: FutureWarning: YF.download() has changed argument auto_adjust default to True
  data = yf.download([stock_ticker, market_ticker, gold_ticker],
[*********************100%***********************]  3 of 3 completed

✓ Data downloaded!

Data structure:
Ticker             GC=F                                                \
Price              Open         High          Low        Close Volume   
Date                                                                    
2019-01-09  1279.400024  1289.300049  1279.199951  1289.300049    102   
2019-01-10  1284.699951  1284.699951  1284.699951  1284.699951      0   
2019-01-11  1291.199951  1291.199951  1286.300049  1287.099976     11   
2019-01-14  1292.400024  1293.900024  1289.099976  1289.099976      7   
2019-01-15  1290.099976  1292.699951  1286.199951  1286.199951      3   

Ticker            ^GSPC                                                     \
Price              Open         High          Low        Close      Volume   
Date                                                                         
2019-01-09  2580.000000  2595.320068  2568.889893  2584.959961  4088740000   
2019-01-10  2573.510010  2597.820068  2562.020020  2596.639893  3721300000   
2019-01-11  2588.110107  2596.270020  2577.399902  2596.260010  3447460000   
2019-01-14  2580.310059  2589.320068  2570.409912  2582.610107  3689370000   
2019-01-15  2585.100098  2613.080078  2585.100098  2610.300049  3601180000   

Ticker          NVDA                                           
Price           Open      High       Low     Close     Volume  
Date                                                           
2019-01-09  3.518180  3.582395  3.467602  3.535040  617260000  
2019-01-10  3.515702  3.609420  3.455206  3.600743  523156000  
2019-01-11  3.578429  3.712809  3.550660  3.689999  874764000  
2019-01-14  3.637684  3.755205  3.614131  3.729916  730168000  
2019-01-15  3.762643  3.802065  3.697437  3.715784  617012000  
Date range: 2019-01-09 00:00:00 to 2024-01-08 00:00:00
Total observations: 1258

First few rows of prices:
                NVDA       SNP500         Gold
Date                                          
2019-01-09  3.535040  2584.959961  1289.300049
2019-01-10  3.600743  2596.639893  1284.699951
2019-01-11  3.689999  2596.260010  1287.099976
2019-01-14  3.729916  2582.610107  1289.099976
2019-01-15  3.715784  2610.300049  1286.199951

Last few rows of prices:
                 NVDA       SNP500         Gold
Date                                           
2024-01-02  48.143860  4742.830078  2064.399902
2024-01-03  47.545158  4704.810059  2034.199951
2024-01-04  47.973949  4688.680176  2042.300049
2024-01-05  49.072392  4697.240234  2042.400024
2024-01-08  52.226810  4763.540039  2026.599976

Missing values:
NVDA      0
SNP500    0
Gold      0
dtype: int64

# @title
# Calculate daily percentage returns
returns = prices.pct_change() * 100  # Convert to percentage
returns = returns.dropna()  # Remove first row (NaN)

# Rename columns for clarity
returns.columns = ['rGold', 'rNVDA', 'rSNP500']


print(f"Total return observations: {len(returns)}")
print("\nDescriptive statistics of returns:")
print(returns.describe())
print("\nFirst few rows of returns:")
print(returns.head(10))

Total return observations: 1257

Descriptive statistics of returns:
             rGold        rNVDA      rSNP500
count  1257.000000  1257.000000  1257.000000
mean      0.267000     0.057643     0.040877
std       3.253928     1.338484     0.989144
min     -18.452095   -11.984055    -4.978726
25%      -1.515212    -0.524238    -0.420516
50%       0.305382     0.088779     0.049862
75%       1.981003     0.714779     0.553423
max      24.369632     9.382774     5.947668

First few rows of returns:
               rGold     rNVDA   rSNP500
Date                                    
2019-01-10  1.858619  0.451842 -0.356790
2019-01-11  2.478836 -0.014630  0.186816
2019-01-14  1.081742 -0.525753  0.155388
2019-01-15 -0.378877  1.072169 -0.224965
2019-01-16 -0.687258  0.222199  0.419843
2019-01-17  1.934944  0.759140 -0.046452
2019-01-18  3.433977  1.318305 -0.751352
2019-01-22 -5.199768 -1.415731  0.093651
2019-01-23  0.349531  0.220291  0.046782
2019-01-24  5.727109  0.137573 -0.311745

# @title
# Check for any missing values
print("Missing values in returns:")
print(returns.isnull().sum())

# Check for any infinite values
print("\nInfinite values in returns:")
print(np.isinf(returns).sum())

# Basic statistics
print("\n" + "="*50)
print("SUMMARY STATISTICS")
print("="*50)
for col in returns.columns:
    print(f"\n{col}:")
    print(f"  Mean: {returns[col].mean():.4f}%")
    print(f"  Std Dev: {returns[col].std():.4f}%")
    print(f"  Min: {returns[col].min():.4f}%")
    print(f"  Max: {returns[col].max():.4f}%")
    print(f"  Observations: {len(returns[col])}")

Missing values in returns:
rGold      0
rNVDA      0
rSNP500    0
dtype: int64

Infinite values in returns:
rGold      0
rNVDA      0
rSNP500    0
dtype: int64

==================================================
SUMMARY STATISTICS
==================================================

rGold:
  Mean: 0.2670%
  Std Dev: 3.2539%
  Min: -18.4521%
  Max: 24.3696%
  Observations: 1257

rNVDA:
  Mean: 0.0576%
  Std Dev: 1.3385%
  Min: -11.9841%
  Max: 9.3828%
  Observations: 1257

rSNP500:
  Mean: 0.0409%
  Std Dev: 0.9891%
  Min: -4.9787%
  Max: 5.9477%
  Observations: 1257

# @title
# Question 1a: Regression with market and gold
# Model: ri = β0 + β1*rm + β2*rg + ui

# Prepare the data
y = returns['rNVDA']  # Dependent variable (stock returns)
X = returns[['rSNP500', 'rGold']]  # Independent variables (market and gold)
X = sm.add_constant(X)  # Add intercept

# Run OLS regression
model_1a = sm.OLS(y, X).fit()

# Display results
print("="*70)
print("QUESTION 1a: OLS REGRESSION WITH MARKET AND GOLD")
print("="*70)
print("Model: rNVDA = β0 + β1*rSNP500 + β2*rGold + u")
print("\n")
print(model_1a.summary())

# Store coefficients for later use
beta0_1a = model_1a.params['const']
beta1_1a = model_1a.params['rSNP500']
beta2_1a = model_1a.params['rGold']

print("\n" + "="*70)
print("KEY COEFFICIENTS:")
print("="*70)
print(f"β0 (Intercept): {beta0_1a:.6f}")
print(f"β1 (Market):    {beta1_1a:.6f}")
print(f"β2 (Gold):      {beta2_1a:.6f}")
print(f"R-squared:      {model_1a.rsquared:.6f}")
print(f"Adj R-squared:  {model_1a.rsquared_adj:.6f}")

======================================================================
QUESTION 1a: OLS REGRESSION WITH MARKET AND GOLD
======================================================================
Model: rNVDA = β0 + β1*rSNP500 + β2*rGold + u


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  rNVDA   R-squared:                       0.507
Model:                            OLS   Adj. R-squared:                  0.507
Method:                 Least Squares   F-statistic:                     645.6
Date:                Thu, 20 Nov 2025   Prob (F-statistic):          1.76e-193
Time:                        21:39:08   Log-Likelihood:                -1704.7
No. Observations:                1257   AIC:                             3415.
Df Residuals:                    1254   BIC:                             3431.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0221      0.027     -0.832      0.406      -0.074       0.030
rSNP500        0.0482      0.027      1.791      0.074      -0.005       0.101
rGold          0.2915      0.008     35.632      0.000       0.275       0.307
==============================================================================
Omnibus:                      252.639   Durbin-Watson:                   2.263
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             3755.271
Skew:                          -0.480   Prob(JB):                         0.00
Kurtosis:                      11.413   Cond. No.                         3.36
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

======================================================================
KEY COEFFICIENTS:
======================================================================
β0 (Intercept): -0.022145
β1 (Market):    0.048191
β2 (Gold):      0.291452
R-squared:      0.507306
Adj R-squared:  0.506520

# @title
# Question 1b: Regression without gold
# Model: ri = β0 + β1*rm + ui

print("\n" + "="*70)
print("QUESTION 1b: OLS REGRESSION WITHOUT GOLD")
print("="*70)
print("Model: rNVDA = β0 + β1*rSNP500 + u")
print("\n")

# Prepare the data
y = returns['rNVDA']  # Dependent variable
X_1b = returns[['rSNP500']]  # Only market returns
X_1b = sm.add_constant(X_1b)  # Add intercept

# Run OLS regression
model_1b = sm.OLS(y, X_1b).fit()

# Display results
print(model_1b.summary())

# Store coefficients
beta0_1b = model_1b.params['const']
beta1_1b = model_1b.params['rSNP500']

print("\n" + "="*70)
print("KEY COEFFICIENTS:")
print("="*70)
print(f"β0 (Intercept): {beta0_1b:.6f}")
print(f"β1 (Market):    {beta1_1b:.6f}")
print(f"R-squared:      {model_1b.rsquared:.6f}")
print(f"Adj R-squared:  {model_1b.rsquared_adj:.6f}")

# Comparison between models
print("\n" + "="*70)
print("MODEL COMPARISON (1a vs 1b)")
print("="*70)
print(f"{'Metric':<20} {'Model 1a (with Gold)':<25} {'Model 1b (without Gold)':<25}")
print("-"*70)
print(f"{'R-squared':<20} {model_1a.rsquared:<25.6f} {model_1b.rsquared:<25.6f}")
print(f"{'Adj R-squared':<20} {model_1a.rsquared_adj:<25.6f} {model_1b.rsquared_adj:<25.6f}")
print(f"{'AIC':<20} {model_1a.aic:<25.2f} {model_1b.aic:<25.2f}")
print(f"{'BIC':<20} {model_1a.bic:<25.2f} {model_1b.bic:<25.2f}")
print(f"{'Market Beta':<20} {beta1_1a:<25.6f} {beta1_1b:<25.6f}")
print(f"{'F-statistic':<20} {model_1a.fvalue:<25.2f} {model_1b.fvalue:<25.2f}")

======================================================================
QUESTION 1b: OLS REGRESSION WITHOUT GOLD
======================================================================
Model: rNVDA = β0 + β1*rSNP500 + u


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  rNVDA   R-squared:                       0.008
Model:                            OLS   Adj. R-squared:                  0.008
Method:                 Least Squares   F-statistic:                     10.70
Date:                Thu, 20 Nov 2025   Prob (F-statistic):            0.00110
Time:                        21:39:08   Log-Likelihood:                -2144.2
No. Observations:                1257   AIC:                             4292.
Df Residuals:                    1255   BIC:                             4303.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0526      0.038      1.396      0.163      -0.021       0.126
rSNP500        0.1244      0.038      3.272      0.001       0.050       0.199
==============================================================================
Omnibus:                      309.452   Durbin-Watson:                   2.387
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             8363.627
Skew:                          -0.503   Prob(JB):                         0.00
Kurtosis:                      15.597   Cond. No.                         1.04
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

======================================================================
KEY COEFFICIENTS:
======================================================================
β0 (Intercept): 0.052556
β1 (Market):    0.124442
R-squared:      0.008457
Adj R-squared:  0.007667

======================================================================
MODEL COMPARISON (1a vs 1b)
======================================================================
Metric               Model 1a (with Gold)      Model 1b (without Gold)  
----------------------------------------------------------------------
R-squared            0.507306                  0.008457                 
Adj R-squared        0.506520                  0.007667                 
AIC                  3415.35                   4292.46                  
BIC                  3430.76                   4302.73                  
Market Beta          0.048191                  0.124442                 
F-statistic          645.60                    10.70

# @title
# Question 2: Test if β1 = 1 (market beta equals 1)
# H0: β1 = 1
# H1: β1 ≠ 1

print("="*70)
print("QUESTION 2: HYPOTHESIS TEST FOR β₁ = 1")
print("="*70)
print("H₀: β₁ = 1")
print("H₁: β₁ ≠ 1")
print("\n")

# Extract coefficient and standard error for β1 (rSNP500)
beta1_estimate = model_1a.params['rSNP500']
beta1_se = model_1a.bse['rSNP500']

# Calculate t-statistic for H0: β1 = 1
t_statistic = (beta1_estimate - 1) / beta1_se

# Degrees of freedom
df = model_1a.df_resid

# Calculate p-value (two-tailed test)
p_value = 2 * (1 - stats.t.cdf(abs(t_statistic), df))

# Critical value at 5% significance level (two-tailed)
alpha = 0.05
t_critical = stats.t.ppf(1 - alpha/2, df)

print(f"Estimated β₁:        {beta1_estimate:.6f}")
print(f"Standard Error:      {beta1_se:.6f}")
print(f"Hypothesized Value:  1.0000")
print(f"\nTest Statistic (t):  {t_statistic:.6f}")
print(f"Degrees of Freedom:  {df}")
print(f"P-value:             {p_value:.6f}")
print(f"Critical Value (5%): ±{t_critical:.6f}")

# Decision
print("\n" + "-"*70)
print("DECISION:")
print("-"*70)
if p_value < alpha:
    print(f"✓ REJECT H₀ (p-value = {p_value:.6f} < 0.05)")
    print(f"  There is sufficient evidence that β₁ ≠ 1")
else:
    print(f"✗ FAIL TO REJECT H₀ (p-value = {p_value:.6f} > 0.05)")
    print(f"  There is insufficient evidence that β₁ ≠ 1")

# Confidence interval
ci_lower = beta1_estimate - t_critical * beta1_se
ci_upper = beta1_estimate + t_critical * beta1_se
print(f"\n95% Confidence Interval for β₁: [{ci_lower:.6f}, {ci_upper:.6f}]")
print(f"Does the CI contain 1? {'YES' if ci_lower <= 1 <= ci_upper else 'NO'}")

======================================================================
QUESTION 2: HYPOTHESIS TEST FOR β₁ = 1
======================================================================
H₀: β₁ = 1
H₁: β₁ ≠ 1


Estimated β₁:        0.048191
Standard Error:      0.026907
Hypothesized Value:  1.0000

Test Statistic (t):  -35.373545
Degrees of Freedom:  1254.0
P-value:             0.000000
Critical Value (5%): ±1.961858

----------------------------------------------------------------------
DECISION:
----------------------------------------------------------------------
✓ REJECT H₀ (p-value = 0.000000 < 0.05)
  There is sufficient evidence that β₁ ≠ 1

95% Confidence Interval for β₁: [-0.004598, 0.100979]
Does the CI contain 1? NO

# @title
# Question 3: Test if β0 = β1 = β2 = 1
# This is an F-test for multiple restrictions

print("\n" + "="*70)
print("QUESTION 3: JOINT HYPOTHESIS TEST")
print("="*70)
print("H₀: β₀ = 1 AND β₁ = 1 AND β₂ = 1")
print("H₁: At least one coefficient ≠ 1")
print("\n")

# Define the restrictions: R*β = q
# Where β = [β0, β1, β2]' and we test if each equals 1

# Create restriction matrix
# We want to test: β0 = 1, β1 = 1, β2 = 1
restrictions = 'const = 1, rSNP500 = 1, rGold = 1'

# Perform F-test
f_test = model_1a.f_test(restrictions)

print("TEST RESULTS:")
print("-"*70)
print(f"F-statistic:     {f_test.fvalue:.6f}")
print(f"P-value:         {f_test.pvalue:.6f}")
print(f"Degrees of Freedom: ({f_test.df_num:.0f}, {f_test.df_denom:.0f})")

# Decision
alpha = 0.05
print("\n" + "-"*70)
print("DECISION:")
print("-"*70)
if f_test.pvalue < alpha:
    print(f"✓ REJECT H₀ (p-value = {f_test.pvalue:.6f} < 0.05)")
    print(f"  At least one coefficient is significantly different from 1")
else:
    print(f"✗ FAIL TO REJECT H₀ (p-value = {f_test.pvalue:.6f} > 0.05)")
    print(f"  Cannot conclude that any coefficient differs from 1")

# Show individual coefficients vs hypothesized value of 1
print("\n" + "-"*70)
print("INDIVIDUAL COEFFICIENT COMPARISON:")
print("-"*70)
print(f"{'Coefficient':<15} {'Estimate':<15} {'Hypothesized':<15} {'Difference':<15}")
print("-"*70)
print(f"{'β₀ (const)':<15} {beta0_1a:<15.6f} {1.0:<15.6f} {beta0_1a - 1:<15.6f}")
print(f"{'β₁ (SNP500)':<15} {beta1_1a:<15.6f} {1.0:<15.6f} {beta1_1a - 1:<15.6f}")
print(f"{'β₂ (Gold)':<15} {beta2_1a:<15.6f} {1.0:<15.6f} {beta2_1a - 1:<15.6f}")

# Also show t-statistics for each individual test
print("\n" + "-"*70)
print("INDIVIDUAL t-TESTS (each coefficient = 1):")
print("-"*70)
for param_name in ['const', 'rSNP500', 'rGold']:
    coef = model_1a.params[param_name]
    se = model_1a.bse[param_name]
    t_stat = (coef - 1) / se
    p_val = 2 * (1 - stats.t.cdf(abs(t_stat), model_1a.df_resid))
    print(f"{param_name:<15} t = {t_stat:8.4f},  p-value = {p_val:.6f}")

======================================================================
QUESTION 3: JOINT HYPOTHESIS TEST
======================================================================
H₀: β₀ = 1 AND β₁ = 1 AND β₂ = 1
H₁: At least one coefficient ≠ 1


TEST RESULTS:
----------------------------------------------------------------------
F-statistic:     3841.302138
P-value:         0.000000
Degrees of Freedom: (3, 1254)

----------------------------------------------------------------------
DECISION:
----------------------------------------------------------------------
✓ REJECT H₀ (p-value = 0.000000 < 0.05)
  At least one coefficient is significantly different from 1

----------------------------------------------------------------------
INDIVIDUAL COEFFICIENT COMPARISON:
----------------------------------------------------------------------
Coefficient     Estimate        Hypothesized    Difference     
----------------------------------------------------------------------
β₀ (const)      -0.022145       1.000000        -1.022145      
β₁ (SNP500)     0.048191        1.000000        -0.951809      
β₂ (Gold)       0.291452        1.000000        -0.708548      

----------------------------------------------------------------------
INDIVIDUAL t-TESTS (each coefficient = 1):
----------------------------------------------------------------------
const           t = -38.3894,  p-value = 0.000000
rSNP500         t = -35.3735,  p-value = 0.000000
rGold           t = -86.6256,  p-value = 0.000000

# Question 4: Plot residuals of Model 1a

print("\n" + "="*70)
print("QUESTION 4: RESIDUAL ANALYSIS")
print("="*70)

residuals = model_1a.resid
fitted = model_1a.fittedvalues

plt.figure(figsize=(14, 6))

# --- Panel 1: Residuals over time ---
plt.subplot(1, 2, 1)
plt.plot(residuals.index, residuals, alpha=0.6, linewidth=0.8)
plt.axhline(0, color='r', linestyle='--', linewidth=1.2)
plt.title("Residuals Over Time (Model 1a)")
plt.xlabel("Date")
plt.ylabel("Residuals")
plt.grid(True, alpha=0.3)

# --- Panel 2: Residuals vs Fitted ---
plt.subplot(1, 2, 2)
plt.scatter(fitted, residuals, alpha=0.5, s=12)
plt.axhline(0, color='r', linestyle='--', linewidth=1.2)
plt.title("Residuals vs Fitted Values")
plt.xlabel("Fitted Values")
plt.ylabel("Residuals")
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

======================================================================
QUESTION 4: RESIDUAL ANALYSIS
======================================================================

# @title
# Question 5: Test for normality in residuals

print("\n" + "="*70)
print("QUESTION 5: NORMALITY TEST")
print("="*70)

residuals = model_1a.resid

# Jarque-Bera Test
jb_statistic, jb_pvalue, jb_skew, jb_kurtosis = jarque_bera(residuals)

print("\n1. JARQUE-BERA TEST")
print("-"*70)
print("H0: Residuals are normally distributed")
print("H1: Residuals are not normally distributed\n")

print(f"Test Statistic:  {jb_statistic:.6f}")
print(f"P-value:         {jb_pvalue:.6f}")
print(f"Skewness:        {jb_skew:.6f}")
print(f"Kurtosis:        {jb_kurtosis:.6f}")

alpha = 0.05
if jb_pvalue < alpha:
    print(f"\n✓ REJECT H0 (p-value = {jb_pvalue:.6f} < 0.05)")
    print("  Strong evidence against normality")
else:
    print(f"\n✗ FAIL TO REJECT H0 (p-value = {jb_pvalue:.6f} > 0.05)")
    print("  Insufficient evidence against normality")

======================================================================
QUESTION 5: NORMALITY TEST
======================================================================

1. JARQUE-BERA TEST
----------------------------------------------------------------------
H0: Residuals are normally distributed
H1: Residuals are not normally distributed

Test Statistic:  3755.271132
P-value:         0.000000
Skewness:        -0.480044
Kurtosis:        11.412961

✓ REJECT H0 (p-value = 0.000000 < 0.05)
  Strong evidence against normality

# @title
# Question 6: Test for heteroscedasticity

print("\n" + "="*70)
print("QUESTION 6: HETEROSCEDASTICITY TESTS")
print("="*70)

residuals = model_1a.resid
X = returns[['rSNP500', 'rGold']]
X = sm.add_constant(X)

# 1. Breusch-Pagan Test
print("\n1. BREUSCH-PAGAN TEST")
print("-"*70)
print("H₀: Homoscedasticity (constant variance)")
print("H₁: Heteroscedasticity (non-constant variance)")
print()

bp_test_stat, bp_pvalue, bp_f_stat, bp_f_pvalue = het_breuschpagan(residuals, X)

print(f"LM Statistic:    {bp_test_stat:.6f}")
print(f"LM P-value:      {bp_pvalue:.6f}")
print(f"F-statistic:     {bp_f_stat:.6f}")
print(f"F P-value:       {bp_f_pvalue:.6f}")

alpha = 0.05
if bp_pvalue < alpha:
    print(f"\n✓ REJECT H₀ (p-value = {bp_pvalue:.6f} < 0.05)")
    print("  Evidence of heteroscedasticity")
else:
    print(f"\n✗ FAIL TO REJECT H₀ (p-value = {bp_pvalue:.6f} > 0.05)")
    print("  No evidence of heteroscedasticity")

======================================================================
QUESTION 6: HETEROSCEDASTICITY TESTS
======================================================================

1. BREUSCH-PAGAN TEST
----------------------------------------------------------------------
H₀: Homoscedasticity (constant variance)
H₁: Heteroscedasticity (non-constant variance)

LM Statistic:    8.852945
LM P-value:      0.011957
F-statistic:     4.447230
F P-value:       0.011896

✓ REJECT H₀ (p-value = 0.011957 < 0.05)
  Evidence of heteroscedasticity

# @title
# Question 7: Test for autocorrelation

print("\n" + "="*70)
print("QUESTION 7: AUTOCORRELATION TESTS")
print("="*70)

residuals = model_1a.resid

# 1. Durbin-Watson Test (already in model summary)
print("\n1. DURBIN-WATSON TEST")
print("-"*70)
print("Tests for first-order autocorrelation (AR(1))")
print("H₀: No autocorrelation (ρ = 0)")
print("H₁: Autocorrelation present (ρ ≠ 0)")
print()

from statsmodels.stats.stattools import durbin_watson
dw_statistic = durbin_watson(residuals)

print(f"Durbin-Watson Statistic: {dw_statistic:.6f}")
print()
print("Interpretation:")
print("  DW ≈ 2.0  → No autocorrelation")
print("  DW < 2.0  → Positive autocorrelation")
print("  DW > 2.0  → Negative autocorrelation")
print("  DW ≈ 0    → Strong positive autocorrelation")
print("  DW ≈ 4    → Strong negative autocorrelation")

if 1.5 < dw_statistic < 2.5:
    print(f"\n✓ No strong evidence of autocorrelation (DW = {dw_statistic:.3f} ≈ 2)")
else:
    print(f"\n✗ Evidence of autocorrelation (DW = {dw_statistic:.3f} deviates from 2)")

# 2. Breusch-Godfrey Test (Lagrange Multiplier test)
print("\n" + "="*70)
print("2. BREUSCH-GODFREY TEST (LM Test)")
print("-"*70)
print("Tests for autocorrelation up to specified lag")
print("H₀: No autocorrelation up to lag p")
print("H₁: Autocorrelation present")
print()

# Test for autocorrelation up to lag 5
max_lag = 5
bg_test = acorr_breusch_godfrey(model_1a, nlags=max_lag)

print(f"Testing up to lag {max_lag}:")
print(f"LM Statistic:    {bg_test[0]:.6f}")
print(f"P-value:         {bg_test[1]:.6f}")
print(f"F-statistic:     {bg_test[2]:.6f}")
print(f"F P-value:       {bg_test[3]:.6f}")

alpha = 0.05
if bg_test[1] < alpha:
    print(f"\n✓ REJECT H₀ (p-value = {bg_test[1]:.6f} < 0.05)")
    print("  Evidence of autocorrelation")
else:
    print(f"\n✗ FAIL TO REJECT H₀ (p-value = {bg_test[1]:.6f} > 0.05)")
    print("  No evidence of autocorrelation")

======================================================================
QUESTION 7: AUTOCORRELATION TESTS
======================================================================

1. DURBIN-WATSON TEST
----------------------------------------------------------------------
Tests for first-order autocorrelation (AR(1))
H₀: No autocorrelation (ρ = 0)
H₁: Autocorrelation present (ρ ≠ 0)

Durbin-Watson Statistic: 2.262998

Interpretation:
  DW ≈ 2.0  → No autocorrelation
  DW < 2.0  → Positive autocorrelation
  DW > 2.0  → Negative autocorrelation
  DW ≈ 0    → Strong positive autocorrelation
  DW ≈ 4    → Strong negative autocorrelation

✓ No strong evidence of autocorrelation (DW = 2.263 ≈ 2)

======================================================================
2. BREUSCH-GODFREY TEST (LM Test)
----------------------------------------------------------------------
Tests for autocorrelation up to specified lag
H₀: No autocorrelation up to lag p
H₁: Autocorrelation present

Testing up to lag 5:
LM Statistic:    39.206704
P-value:         0.000000
F-statistic:     8.042280
F P-value:       0.000000

✓ REJECT H₀ (p-value = 0.000000 < 0.05)
  Evidence of autocorrelation

# @title
# Question 8: Newey-West HAC Standard Errors

print("\n" + "="*70)
print("QUESTION 8: NEWEY-WEST HAC STANDARD ERRORS")
print("="*70)

residuals = model_1a.resid
n_obs = len(returns)
maxlags = int(np.floor(4 * (n_obs / 100) ** (2/9)))

print(f"Sample size: {n_obs}")
print(f"Newey-West lags: {maxlags}")
print()

y = returns['rNVDA']
X = returns[['rSNP500', 'rGold']]
X = sm.add_constant(X)

# Estimate with Newey-West HAC standard errors
model_nw = sm.OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': maxlags})

print(model_nw.summary())

======================================================================
QUESTION 8: NEWEY-WEST HAC STANDARD ERRORS
======================================================================
Sample size: 1257
Newey-West lags: 7

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  rNVDA   R-squared:                       0.507
Model:                            OLS   Adj. R-squared:                  0.507
Method:                 Least Squares   F-statistic:                     40.13
Date:                Thu, 20 Nov 2025   Prob (F-statistic):           1.28e-17
Time:                        21:52:57   Log-Likelihood:                -1704.7
No. Observations:                1257   AIC:                             3415.
Df Residuals:                    1254   BIC:                             3431.
Df Model:                           2                                         
Covariance Type:                  HAC                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0221      0.028     -0.783      0.433      -0.078       0.033
rSNP500        0.0482      0.039      1.235      0.217      -0.028       0.125
rGold          0.2915      0.033      8.939      0.000       0.228       0.355
==============================================================================
Omnibus:                      252.639   Durbin-Watson:                   2.263
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             3755.271
Skew:                          -0.480   Prob(JB):                         0.00
Kurtosis:                      11.413   Cond. No.                         3.36
==============================================================================

Notes:
[1] Standard Errors are heteroscedasticity and autocorrelation robust (HAC) using 7 lags and without small sample correction

1a. Interpretation of Model 1a¶

1b. Model Comparison¶

2. Test of H0: β1 = 1¶

3. Joint Test of H0: β0 = β1 = β2 = 1¶

4. Residual Plot Interpretation¶

5. Normality Test¶

6. Heteroscedasticity Test¶

7. Autocorrelation Test¶

8. Newey West Robust Standard Errors¶