giovedì 6 febbraio 2025

Bayesian linear regression

 






import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


# Load CSV data
def load_data(csv_path):
data = pd.read_csv(csv_path)
return data


# Define the likelihood function (Gaussian likelihood)
def likelihood(y, X, beta):
predictions = np.dot(X, beta)
return -0.5 * np.sum((y - predictions) ** 2)


# Define the prior function (Normal prior)
def prior(beta, sigma=10):
return -0.5 * np.sum(beta ** 2) / (sigma ** 2)


# Define the proposal distribution (random walk)
def proposal(beta, step_size=0.1):
return beta + np.random.normal(0, step_size, size=beta.shape)


# Metropolis-Hastings sampler
def metropolis_hastings(X, y, initial_beta, n_samples, step_size=0.1):
beta = initial_beta
samples = []

for _ in range(n_samples):
# Propose new beta
beta_new = proposal(beta, step_size)

# Compute likelihood and prior for current and proposed beta
current_likelihood = likelihood(y, X, beta) + prior(beta)
new_likelihood = likelihood(y, X, beta_new) + prior(beta_new)

# Metropolis-Hastings acceptance step
acceptance_ratio = min(1, np.exp(new_likelihood - current_likelihood))

# Accept or reject the proposed beta
if np.random.rand() < acceptance_ratio:
beta = beta_new

samples.append(beta)

return np.array(samples)


# Prediction function based on posterior samples
def predict(X, posterior_samples):
predictions = np.dot(X, posterior_samples.T)
return predictions.mean(axis=1), predictions.std(axis=1)


def predictLeastSquare(X, beta):
# Add intercept term to the new data
X_with_intercept = np.hstack([np.ones((X.shape[0], 1)), X])
return X_with_intercept @ beta

# Forecast function for new input data
def forecast(X_new, posterior_samples):
# X_new is the new data for which we want to forecast
return predict(X_new, posterior_samples)


def linear_least_squares(X, y):
# Add intercept term (column of ones)
X_with_intercept = np.hstack([np.ones((X.shape[0], 1)), X])

# Compute the coefficients using the normal equation: β = (X^T X)^(-1) X^T y
beta = np.linalg.inv(X_with_intercept.T @ X_with_intercept) @ X_with_intercept.T @ y
return beta

# Main function
def run_bayesian_linear_regression(csv_path, n_samples=1000000, step_size=0.1):
# Load data from CSV
data = load_data(csv_path)

# Assuming the last column is the target 'y' and the rest are features 'X'
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

beta = linear_least_squares(X, y)
y_pred = predictLeastSquare(X, beta)

# Add a column of ones for the intercept term (beta_0)
X = np.hstack([np.ones((X.shape[0], 1)), X])

# Initialize the parameters (beta_0, beta_1, ...)
initial_beta = np.zeros(X.shape[1])

# Perform Metropolis-Hastings sampling
posterior_samples = metropolis_hastings(X, y, initial_beta, n_samples, step_size)

# Get predictions
mean_predictions, std_predictions = predict(X, posterior_samples)


# Plot results
plt.figure(figsize=(10, 6))
plt.plot(y, label="True values")
plt.plot(mean_predictions, label="Bayesian Regr")
plt.xlabel("Time")
plt.ylabel("1/V")
plt.fill_between(range(len(y)), mean_predictions - std_predictions,
mean_predictions + std_predictions, alpha=0.3, label="Err")
plt.plot(y_pred, label="Least Square", color='red')

plt.legend()
plt.show()

return posterior_samples


# Usage example
csv_path = 'dati.csv' # Replace with the path to your CSV file
posterior_samples = run_bayesian_linear_regression(csv_path)


new_data = pd.DataFrame({
'feature1': [30, 35] # Replace with your features
})

X_new = np.hstack([np.ones((new_data.shape[0], 1)), new_data.values])

# Forecast values using the posterior samples
mean_forecast, std_forecast = forecast(X_new, posterior_samples)

# Output the forecasts
print("Forecasted values (mean):", mean_forecast)
print("Forecasted values (95% CI):", mean_forecast - 1.96 * std_forecast, mean_forecast + 1.96 * std_forecast)

Bayesian linear regression

  import sys import numpy as np import pandas as pd import matplotlib.pyplot as plt # Load CSV data def load_data (csv_path): data = pd....