import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


import warnings
warnings.filterwarnings("ignore")

def expensive_function(x):
    return np.sin(x)+0.1 * np.random.randn(*x.shape)
X_train = np.linspace(0,10,20).reshape(-1,1)
y_train = expensive_function(X_train)

rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train,y_train.ravel())

X_test = np.linspace(0,10,200).reshape(-1,1)
y_true = np.sin(X_test)
y_pred = rf_model.predict(X_test)

plt.figure(figsize=(10,5))
plt.plot(X_test, y_true, label="True Function (sin(x))", color="green")
plt.scatter(X_train, y_train, label="Training Data", color="black")
plt.plot(X_test, y_pred, label="Surrogate Model(Random Forest)", color="red")
plt.title("Random Forest as a Surrogate Model")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()
plt.grid(True)
plt.show()

mse = mean_squared_error(y_true,y_pred)
print(f"Mean Squared Error of Surrogate Model: {mse:.4f}")