import numpy as np
import pandas as pd
import math
import sklearn.metrics

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import (
    mean_squared_error,
    r2_score,
    mean_absolute_error,
    mean_absolute_percentage_error,
    explained_variance_score
)

from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR, NuSVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import (
    RandomForestRegressor,
    ExtraTreesRegressor,
    HistGradientBoostingRegressor,
    GradientBoostingRegressor
)

import xgboost as xgb
import tensorflow as tf



scaler = MinMaxScaler()

X_train = pd.DataFrame(
    scaler.fit_transform(X_train0),
    columns=X_train0.columns
)

X_test = pd.DataFrame(
    scaler.transform(X_test0),   # IMPORTANT: transform only (not fit_transform)
    columns=X_test0.columns
)




def evaluate_model(y_test, y_pred):
    print("Mean squared error: %.4f" % mean_squared_error(y_test, y_pred))
    print("R2 score : %.4f" % r2_score(y_test, y_pred))
    print("MAE : %.3f" % mean_absolute_error(y_test, y_pred))
    print("MAPE : %.3f" % mean_absolute_percentage_error(y_test, y_pred))
    print("RMSE : %.3f" % math.sqrt(mean_squared_error(y_test, y_pred)))
    print("Explained variance : %.3f" % explained_variance_score(y_test, y_pred))

###############
###Decision Tree Regressor
DTR = DecisionTreeRegressor(criterion='squared_error', random_state=0)
DTR.fit(X_train, y_train)

y_pred = DTR.predict(X_test)
print(pd.DataFrame(y_pred, columns=["Predicted"]).to_string(index=False))
evaluate_model(y_test, y_pred)

###############
###SVR
svr = SVR(kernel='rbf', gamma=0.1, C=80, epsilon=0.01)
svr.fit(X_train, y_train)

y_pred = svr.predict(X_test)
print(pd.DataFrame(y_pred, columns=["Predicted"]).to_string(index=False))
evaluate_model(y_test, y_pred)

###############
###NuSVR
nsvr = NuSVR(kernel='rbf', C=200, gamma=0.1, nu=0.4, tol=0.003)
nsvr.fit(X_train, y_train)

y_pred = nsvr.predict(X_test)
print(pd.DataFrame(y_pred, columns=["Predicted"]).to_string(index=False))
evaluate_model(y_test, y_pred)

###############
### Gaussian Process Regression
kernel = RBF(length_scale=1, length_scale_bounds=(0, 5))

gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.2, random_state=42)
gpr.fit(X_train, y_train)

y_pred = gpr.predict(X_test)
print(pd.DataFrame(y_pred, columns=["Predicted"]).to_string(index=False))
evaluate_model(y_test, y_pred)
kernel = RBF(length_scale=1, length_scale_bounds=(0, 5))

gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.2, random_state=42)
gpr.fit(X_train, y_train)

y_pred = gpr.predict(X_test)
print(pd.DataFrame(y_pred, columns=["Predicted"]).to_string(index=False))
evaluate_model(y_test, y_pred)


###############
###XGBoost (Grid Search)
gbm = xgb.XGBRegressor(objective='reg:squarederror')

param_grid = {
    'colsample_bytree': np.linspace(0.5, 0.9, 5),
    'n_estimators': [100, 200],
    'max_depth': [10, 15, 20, 25]
}

grid = GridSearchCV(
    gbm,
    param_grid=param_grid,
    scoring='neg_mean_squared_error',
    cv=5,
    verbose=1
)

grid.fit(X_train, y_train)

y_pred = grid.predict(X_test)
print(grid.best_params_)

print(pd.DataFrame(y_pred, columns=["Predicted"]).to_string(index=False))
evaluate_model(y_test, y_pred)

###############
###Random Forest (Random Search)
rf = RandomForestRegressor()

param_grid = {
    'n_estimators': [20, 50, 100, 150, 200],
    'max_features': ['auto', 'sqrt'],
    'max_depth': list(range(10, 110, 10)) + [None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

rf_random = RandomizedSearchCV(
    rf,
    param_distributions=param_grid,
    n_iter=200,
    cv=5,
    verbose=2,
    random_state=42,
    n_jobs=-1
)

rf_random.fit(X_train, y_train)

y_pred = rf_random.predict(X_test)
print(pd.DataFrame(y_pred, columns=["Predicted"]).to_string(index=False))
evaluate_model(y_test, y_pred)

###############
###Extra Trees Regressor
etr = ExtraTreesRegressor(
    n_estimators=7,
    max_depth=30,
    max_features=7,
    random_state=0
)

etr.fit(X_train, y_train)

y_pred = etr.predict(X_test)
print(pd.DataFrame(y_pred, columns=["Predicted"]).to_string(index=False))
evaluate_model(y_test, y_pred)

###############
###HistGradientBoosting
hgbr = HistGradientBoostingRegressor(
    loss='squared_error',
    learning_rate=0.77,
    max_iter=95,
    max_leaf_nodes=20,
    min_samples_leaf=30,
    l2_regularization=1,
    max_bins=25,
    random_state=0
)

hgbr.fit(X_train, y_train)

y_pred = hgbr.predict(X_test)
print(pd.DataFrame(y_pred, columns=["Predicted"]).to_string(index=False))
evaluate_model(y_test, y_pred)

###############
###Gradient Boosting Regressor
gbr = GradientBoostingRegressor(
    loss='squared_error',
    learning_rate=0.1,
    max_leaf_nodes=20,
    min_samples_leaf=15,
    validation_fraction=0.2,
    n_iter_no_change=5,
    random_state=0
)

gbr.fit(X_train, y_train)

y_pred = gbr.predict(X_test)
print(pd.DataFrame(y_pred, columns=["Predicted"]).to_string(index=False))
evaluate_model(y_test, y_pred)

###############
###ANN (Keras)
ann = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(1)
])

ann.compile(optimizer="adam", loss="mse")

ann.fit(X_train, y_train, batch_size=8, epochs=100)

y_pred = ann.predict(X_test)
print(pd.DataFrame(y_pred, columns=["Predicted"]).to_string(index=False))
evaluate_model(y_test, y_pred)
###############

