{ "cells": [ { "cell_type": "markdown", "id": "d01778f4", "metadata": {}, "source": [ "\n", "# Regression with GUIDE\n", "\n", "This notebook demonstrates how to use `GuideTreeRegressor` and `GuideGradientBoostingRegressor` on the **Diabetes** dataset.\n", "\n", "We will compare:\n", "1. **Single GUIDE Tree:** Interpretable, unbiased variable selection.\n", "2. **GUIDE Random Forest:** Robust ensemble.\n", "3. **GUIDE Gradient Boosting:** High-performance ensemble.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "b3f462bb", "metadata": {}, "outputs": [], "source": [ "\n", "import matplotlib.pyplot as plt\n", "from sklearn.datasets import load_diabetes\n", "from sklearn.metrics import r2_score\n", "from sklearn.model_selection import train_test_split\n", "\n", "from pyguide import (\n", " GuideGradientBoostingRegressor,\n", " GuideRandomForestRegressor,\n", " GuideTreeRegressor,\n", " plot_tree,\n", ")\n", "\n", "# Load Data\n", "X, y = load_diabetes(return_X_y=True, as_frame=True)\n", "feature_names = X.columns\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "print(f\"Training samples: {len(X_train)}\")\n", "print(f\"Features: {len(feature_names)}\")\n" ] }, { "cell_type": "markdown", "id": "a3e038ac", "metadata": {}, "source": [ "## 1. Single GUIDE Tree" ] }, { "cell_type": "code", "execution_count": null, "id": "9e011813", "metadata": {}, "outputs": [], "source": [ "\n", "# Initialize GUIDE Regressor\n", "# interaction_depth=1 enables pairwise interaction detection\n", "reg = GuideTreeRegressor(max_depth=3, interaction_depth=1)\n", "reg.fit(X_train, y_train)\n", "\n", "# Evaluate\n", "y_pred = reg.predict(X_test)\n", "r2 = r2_score(y_test, y_pred)\n", "print(f\"Single Tree R2: {r2:.4f}\")\n", "\n", "# Visualize\n", "plt.figure(figsize=(12, 8))\n", "plot_tree(reg, feature_names=feature_names, fontsize=10)\n", "plt.title(\"GUIDE Regression Tree (Diabetes)\")\n", "plt.show()\n" ] }, { "cell_type": "markdown", "id": "391dfea1", "metadata": {}, "source": [ "\n", "## 2. GUIDE Gradient Boosting\n", "\n", "Gradient Boosting builds trees sequentially to correct the errors of previous trees.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "5634b857", "metadata": {}, "outputs": [], "source": [ "\n", "gbm = GuideGradientBoostingRegressor(\n", " n_estimators=100,\n", " learning_rate=0.1,\n", " max_depth=3,\n", " subsample=0.8,\n", " random_state=42\n", ")\n", "gbm.fit(X_train, y_train)\n", "\n", "y_pred_gbm = gbm.predict(X_test)\n", "r2_gbm = r2_score(y_test, y_pred_gbm)\n", "print(f\"Gradient Boosting R2: {r2_gbm:.4f}\")\n" ] }, { "cell_type": "markdown", "id": "6fc4f4ba", "metadata": {}, "source": [ "\n", "## 3. Comparison with Random Forest\n" ] }, { "cell_type": "code", "execution_count": null, "id": "aa5267c4", "metadata": {}, "outputs": [], "source": [ "\n", "rf = GuideRandomForestRegressor(n_estimators=100, max_depth=5, random_state=42)\n", "rf.fit(X_train, y_train)\n", "\n", "y_pred_rf = rf.predict(X_test)\n", "r2_rf = r2_score(y_test, y_pred_rf)\n", "print(f\"Random Forest R2: {r2_rf:.4f}\")\n" ] } ], "metadata": {}, "nbformat": 4, "nbformat_minor": 5 }