Abdelrahma12 commited on
Commit
ea29aa5
·
verified ·
1 Parent(s): 2d20584

Upload Heart.ipynb

Browse files
Files changed (1) hide show
  1. Heart.ipynb +920 -0
Heart.ipynb ADDED
@@ -0,0 +1,920 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "source": [
6
+ "from google.colab import drive\n",
7
+ "drive.mount('/content/drive')"
8
+ ],
9
+ "metadata": {
10
+ "colab": {
11
+ "base_uri": "https://localhost:8080/"
12
+ },
13
+ "id": "aifgSPecKkfY",
14
+ "outputId": "9db7f3b7-2a36-42b6-8eb3-6ca07425437d"
15
+ },
16
+ "id": "aifgSPecKkfY",
17
+ "execution_count": null,
18
+ "outputs": [
19
+ {
20
+ "output_type": "stream",
21
+ "name": "stdout",
22
+ "text": [
23
+ "Mounted at /content/drive\n"
24
+ ]
25
+ }
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "markdown",
30
+ "id": "aacf5211",
31
+ "metadata": {
32
+ "id": "aacf5211"
33
+ },
34
+ "source": [
35
+ "###Importing Liberaries"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": null,
41
+ "id": "24577b88",
42
+ "metadata": {
43
+ "id": "24577b88"
44
+ },
45
+ "outputs": [],
46
+ "source": [
47
+ "import numpy as np\n",
48
+ "import pandas as pd\n",
49
+ "import matplotlib.pyplot as plt\n",
50
+ "import seaborn as sns\n",
51
+ "from sklearn.model_selection import GridSearchCV\n",
52
+ "from sklearn.linear_model import LogisticRegression\n",
53
+ "from sklearn.ensemble import RandomForestClassifier\n",
54
+ "from sklearn.neural_network import MLPClassifier\n",
55
+ "from sklearn.neighbors import KNeighborsClassifier\n",
56
+ "from xgboost import XGBClassifier\n",
57
+ "from sklearn.svm import SVC\n",
58
+ "from sklearn.metrics import accuracy_score, classification_report\n",
59
+ "import warnings\n",
60
+ "warnings.filterwarnings('ignore')"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "markdown",
65
+ "id": "d70990dc",
66
+ "metadata": {
67
+ "id": "d70990dc"
68
+ },
69
+ "source": [
70
+ "### Data Load"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "execution_count": null,
76
+ "id": "3de86ddb",
77
+ "metadata": {
78
+ "id": "3de86ddb",
79
+ "colab": {
80
+ "base_uri": "https://localhost:8080/",
81
+ "height": 321
82
+ },
83
+ "outputId": "05c87a38-574b-4a6d-bb07-1edd7a9afd42"
84
+ },
85
+ "outputs": [
86
+ {
87
+ "output_type": "error",
88
+ "ename": "FileNotFoundError",
89
+ "evalue": "[Errno 2] No such file or directory: '/content/drive/MyDrive/heart_failure_clinical_records_dataset.csv'",
90
+ "traceback": [
91
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
92
+ "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
93
+ "\u001b[0;32m/tmp/ipython-input-4048807198.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mr'/content/drive/MyDrive/heart_failure_clinical_records_dataset.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
94
+ "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m 1024\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwds_defaults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1025\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1026\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1027\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1028\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
95
+ "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 618\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 619\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 620\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 621\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 622\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
96
+ "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 1618\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1619\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandles\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mIOHandles\u001b[0m \u001b[0;34m|\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1620\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1621\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1622\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
97
+ "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m 1878\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"b\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1879\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m\"b\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1880\u001b[0;31m self.handles = get_handle(\n\u001b[0m\u001b[1;32m 1881\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1882\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
98
+ "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/pandas/io/common.py\u001b[0m in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 871\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m\"b\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 872\u001b[0m \u001b[0;31m# Encoding\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 873\u001b[0;31m handle = open(\n\u001b[0m\u001b[1;32m 874\u001b[0m \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 875\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
99
+ "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/content/drive/MyDrive/heart_failure_clinical_records_dataset.csv'"
100
+ ]
101
+ }
102
+ ],
103
+ "source": [
104
+ "data = pd.read_csv(r'/content/drive/MyDrive/heart_failure_clinical_records_dataset.csv')"
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "markdown",
109
+ "source": [
110
+ "### Data Exploratory"
111
+ ],
112
+ "metadata": {
113
+ "id": "P20f_aZ0nanU"
114
+ },
115
+ "id": "P20f_aZ0nanU"
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "source": [
120
+ "data"
121
+ ],
122
+ "metadata": {
123
+ "id": "R0JxTMpInaUs"
124
+ },
125
+ "id": "R0JxTMpInaUs",
126
+ "execution_count": null,
127
+ "outputs": []
128
+ },
129
+ {
130
+ "cell_type": "code",
131
+ "execution_count": null,
132
+ "id": "c7f83776",
133
+ "metadata": {
134
+ "id": "c7f83776"
135
+ },
136
+ "outputs": [],
137
+ "source": [
138
+ "data.head()"
139
+ ]
140
+ },
141
+ {
142
+ "cell_type": "code",
143
+ "execution_count": null,
144
+ "id": "ac3d6a1e",
145
+ "metadata": {
146
+ "id": "ac3d6a1e"
147
+ },
148
+ "outputs": [],
149
+ "source": [
150
+ "data.info()"
151
+ ]
152
+ },
153
+ {
154
+ "cell_type": "code",
155
+ "execution_count": null,
156
+ "id": "e754b5e8",
157
+ "metadata": {
158
+ "id": "e754b5e8"
159
+ },
160
+ "outputs": [],
161
+ "source": [
162
+ "data.isnull().sum()"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": null,
168
+ "id": "e95bcd68",
169
+ "metadata": {
170
+ "id": "e95bcd68"
171
+ },
172
+ "outputs": [],
173
+ "source": [
174
+ "data.duplicated().sum()"
175
+ ]
176
+ },
177
+ {
178
+ "cell_type": "code",
179
+ "execution_count": null,
180
+ "id": "2ce23598",
181
+ "metadata": {
182
+ "id": "2ce23598"
183
+ },
184
+ "outputs": [],
185
+ "source": [
186
+ "labels = [\"40-45\", \"46-50\", \"51-55\", \"56-60\", \"61-65\", \"66-70\", \"71-75\", \"76-80\", \"81-95\"]\n",
187
+ "data['age_group'] = pd.cut(data['age'], bins=[40, 45, 50, 55, 60, 65, 70, 75, 80, 95], labels=labels)"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "markdown",
192
+ "id": "852a3203",
193
+ "metadata": {
194
+ "id": "852a3203"
195
+ },
196
+ "source": [
197
+ "### Data Visualization"
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "code",
202
+ "execution_count": null,
203
+ "id": "fc5f6131",
204
+ "metadata": {
205
+ "id": "fc5f6131"
206
+ },
207
+ "outputs": [],
208
+ "source": [
209
+ "plt.figure(figsize=(10,6))\n",
210
+ "sns.countplot(data=data, x='age_group', hue='DEATH_EVENT', palette=[\"lightblue\", \"red\"])\n",
211
+ "plt.title(\"Death Count by Age Group\")\n",
212
+ "plt.xlabel(\"Age Group\")\n",
213
+ "plt.ylabel(\"Patient Count\")\n",
214
+ "plt.legend([\"Survived\", \"Died\"])\n",
215
+ "plt.show()"
216
+ ]
217
+ },
218
+ {
219
+ "cell_type": "code",
220
+ "source": [
221
+ "corr_matrix = data.drop(columns=['age_group']).corr()\n",
222
+ "plt.figure(figsize=(12, 10))\n",
223
+ "sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=\".2f\")\n",
224
+ "plt.title('Correlation Matrix of Heart Failure Clinical Records')\n",
225
+ "plt.show()"
226
+ ],
227
+ "metadata": {
228
+ "id": "687Lx-xInvLN"
229
+ },
230
+ "id": "687Lx-xInvLN",
231
+ "execution_count": null,
232
+ "outputs": []
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "source": [
237
+ "death_counts = data['DEATH_EVENT'].value_counts()\n",
238
+ "plt.figure(figsize=(6, 6))\n",
239
+ "plt.pie(death_counts, labels=['Not Died', 'Died'], autopct='%1.1f%%', startangle=90, colors=['skyblue', 'lightcoral'])\n",
240
+ "plt.title('Distribution of DEATH_EVENT')\n",
241
+ "plt.show()"
242
+ ],
243
+ "metadata": {
244
+ "id": "CFGNvM9un7CB"
245
+ },
246
+ "id": "CFGNvM9un7CB",
247
+ "execution_count": null,
248
+ "outputs": []
249
+ },
250
+ {
251
+ "cell_type": "code",
252
+ "source": [
253
+ "# Select a subset of numerical features that showed some correlation with DEATH_EVENT\n",
254
+ "selected_features = ['time', 'serum_creatinine', 'ejection_fraction', 'age', 'serum_sodium', 'DEATH_EVENT']\n",
255
+ "\n",
256
+ "sns.pairplot(data[selected_features], hue='DEATH_EVENT', diag_kind='kde')\n",
257
+ "plt.suptitle('Pairplot of Selected Numerical Features by DEATH_EVENT', y=1.02)\n",
258
+ "plt.show()"
259
+ ],
260
+ "metadata": {
261
+ "id": "akxmasIGn_Ps"
262
+ },
263
+ "id": "akxmasIGn_Ps",
264
+ "execution_count": null,
265
+ "outputs": []
266
+ },
267
+ {
268
+ "cell_type": "markdown",
269
+ "source": [
270
+ "# Data Preprocessing"
271
+ ],
272
+ "metadata": {
273
+ "id": "lAmTgq0AoJbP"
274
+ },
275
+ "id": "lAmTgq0AoJbP"
276
+ },
277
+ {
278
+ "cell_type": "markdown",
279
+ "id": "6318b50d",
280
+ "metadata": {
281
+ "id": "6318b50d"
282
+ },
283
+ "source": [
284
+ "### Data Split\n"
285
+ ]
286
+ },
287
+ {
288
+ "cell_type": "code",
289
+ "execution_count": null,
290
+ "id": "f9bbf4a6",
291
+ "metadata": {
292
+ "id": "f9bbf4a6"
293
+ },
294
+ "outputs": [],
295
+ "source": [
296
+ "data.drop(columns=['age_group'], inplace=True)"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "execution_count": null,
302
+ "id": "67245c6b",
303
+ "metadata": {
304
+ "id": "67245c6b"
305
+ },
306
+ "outputs": [],
307
+ "source": [
308
+ "X = data.drop('DEATH_EVENT', axis=1)\n",
309
+ "y = data['DEATH_EVENT']\n",
310
+ "from sklearn.model_selection import train_test_split\n",
311
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)"
312
+ ]
313
+ },
314
+ {
315
+ "cell_type": "markdown",
316
+ "source": [
317
+ "### Feature Scaling"
318
+ ],
319
+ "metadata": {
320
+ "id": "9RC0CaRQoPSL"
321
+ },
322
+ "id": "9RC0CaRQoPSL"
323
+ },
324
+ {
325
+ "cell_type": "code",
326
+ "execution_count": null,
327
+ "id": "eff46e4d",
328
+ "metadata": {
329
+ "id": "eff46e4d"
330
+ },
331
+ "outputs": [],
332
+ "source": [
333
+ "from sklearn.preprocessing import StandardScaler\n",
334
+ "scaler = StandardScaler()\n",
335
+ "continuous_features = ['age', 'creatinine_phosphokinase', 'ejection_fraction', 'platelets', 'serum_creatinine', 'serum_sodium', 'time']\n",
336
+ "X_train[continuous_features] = scaler.fit_transform(X_train[continuous_features])\n",
337
+ "X_test[continuous_features] = scaler.transform(X_test[continuous_features])"
338
+ ]
339
+ },
340
+ {
341
+ "cell_type": "markdown",
342
+ "source": [
343
+ "#Modeling"
344
+ ],
345
+ "metadata": {
346
+ "id": "RgfpGCrFoYYo"
347
+ },
348
+ "id": "RgfpGCrFoYYo"
349
+ },
350
+ {
351
+ "cell_type": "markdown",
352
+ "id": "c6c49e64",
353
+ "metadata": {
354
+ "id": "c6c49e64"
355
+ },
356
+ "source": [
357
+ "### Logistic Regression"
358
+ ]
359
+ },
360
+ {
361
+ "cell_type": "code",
362
+ "execution_count": null,
363
+ "id": "c65331a8",
364
+ "metadata": {
365
+ "id": "c65331a8"
366
+ },
367
+ "outputs": [],
368
+ "source": [
369
+ "log_params = {\n",
370
+ " 'penalty': ['l1', 'l2', 'elasticnet', 'none'],\n",
371
+ " 'C': [0.01, 0.1, 1, 10, 100],\n",
372
+ " 'solver': ['lbfgs', 'saga'],\n",
373
+ " 'max_iter': [1000]\n",
374
+ "}\n",
375
+ "\n",
376
+ "log_grid = GridSearchCV(LogisticRegression(random_state=42), log_params, cv=5)\n",
377
+ "log_grid.fit(X_train, y_train)\n",
378
+ "\n",
379
+ "print(\" Logistic Regression Best Params:\", log_grid.best_params_)"
380
+ ]
381
+ },
382
+ {
383
+ "cell_type": "markdown",
384
+ "source": [
385
+ "####Evaluation"
386
+ ],
387
+ "metadata": {
388
+ "id": "A7F1ne-9okC3"
389
+ },
390
+ "id": "A7F1ne-9okC3"
391
+ },
392
+ {
393
+ "cell_type": "code",
394
+ "execution_count": null,
395
+ "id": "bb425d64",
396
+ "metadata": {
397
+ "id": "bb425d64"
398
+ },
399
+ "outputs": [],
400
+ "source": [
401
+ "log_model = LogisticRegression(\n",
402
+ " penalty='l2',\n",
403
+ " C=0.1,\n",
404
+ " solver='lbfgs',\n",
405
+ " max_iter=1000,\n",
406
+ " random_state=42\n",
407
+ ")\n",
408
+ "log_model.fit(X_train, y_train)\n",
409
+ "y_pred_log = log_model.predict(X_test)\n",
410
+ "print(\" Logistic Regression\")\n",
411
+ "print(f\"Accuracy: {accuracy_score(y_test, y_pred_log):.4f}\")\n",
412
+ "print(classification_report(y_test, y_pred_log))"
413
+ ]
414
+ },
415
+ {
416
+ "cell_type": "markdown",
417
+ "id": "9ec5c7bd",
418
+ "metadata": {
419
+ "id": "9ec5c7bd"
420
+ },
421
+ "source": [
422
+ "### Random Forest"
423
+ ]
424
+ },
425
+ {
426
+ "cell_type": "code",
427
+ "execution_count": null,
428
+ "id": "355a5349",
429
+ "metadata": {
430
+ "id": "355a5349"
431
+ },
432
+ "outputs": [],
433
+ "source": [
434
+ "rf_params = {\n",
435
+ " 'n_estimators': [50, 100, 200],\n",
436
+ " 'max_depth': [None, 5, 10],\n",
437
+ " 'min_samples_split': [2, 5],\n",
438
+ " 'min_samples_leaf': [1, 2]\n",
439
+ "}\n",
440
+ "\n",
441
+ "rf_grid = GridSearchCV(RandomForestClassifier(random_state=42), rf_params, cv=5)\n",
442
+ "rf_grid.fit(X_train, y_train)\n",
443
+ "\n",
444
+ "print(\" Random Forest Best Params:\", rf_grid.best_params_)\n"
445
+ ]
446
+ },
447
+ {
448
+ "cell_type": "markdown",
449
+ "source": [
450
+ "####Evaluation"
451
+ ],
452
+ "metadata": {
453
+ "id": "ZgnqGv2_onMp"
454
+ },
455
+ "id": "ZgnqGv2_onMp"
456
+ },
457
+ {
458
+ "cell_type": "code",
459
+ "execution_count": null,
460
+ "id": "7a814143",
461
+ "metadata": {
462
+ "id": "7a814143"
463
+ },
464
+ "outputs": [],
465
+ "source": [
466
+ "rf_model = RandomForestClassifier(\n",
467
+ " n_estimators=50, max_depth=5,\n",
468
+ " min_samples_leaf=2, min_samples_split=5,\n",
469
+ " random_state=42\n",
470
+ ")\n",
471
+ "rf_model.fit(X_train, y_train)\n",
472
+ "y_pred_rf = rf_model.predict(X_test)\n",
473
+ "print(\" Random Forest\")\n",
474
+ "print(f\"Accuracy: {accuracy_score(y_test, y_pred_rf):.4f}\")\n",
475
+ "print(classification_report(y_test, y_pred_rf))\n"
476
+ ]
477
+ },
478
+ {
479
+ "cell_type": "markdown",
480
+ "id": "8ae23a4c",
481
+ "metadata": {
482
+ "id": "8ae23a4c"
483
+ },
484
+ "source": [
485
+ "### SVM"
486
+ ]
487
+ },
488
+ {
489
+ "cell_type": "code",
490
+ "execution_count": null,
491
+ "id": "98d79b19",
492
+ "metadata": {
493
+ "id": "98d79b19"
494
+ },
495
+ "outputs": [],
496
+ "source": [
497
+ "svm_params = {\n",
498
+ " 'kernel': ['linear', 'rbf'],\n",
499
+ " 'C': [0.1, 1, 10],\n",
500
+ " 'gamma': ['scale', 'auto']\n",
501
+ "}\n",
502
+ "\n",
503
+ "svm_grid = GridSearchCV(SVC(probability=True, random_state=42), svm_params, cv=5)\n",
504
+ "svm_grid.fit(X_train, y_train)\n",
505
+ "\n",
506
+ "print(\" SVM Best Params:\", svm_grid.best_params_)"
507
+ ]
508
+ },
509
+ {
510
+ "cell_type": "markdown",
511
+ "source": [
512
+ "#### Evaluation"
513
+ ],
514
+ "metadata": {
515
+ "id": "lGcRpN66oqox"
516
+ },
517
+ "id": "lGcRpN66oqox"
518
+ },
519
+ {
520
+ "cell_type": "code",
521
+ "execution_count": null,
522
+ "id": "5b3f845f",
523
+ "metadata": {
524
+ "id": "5b3f845f"
525
+ },
526
+ "outputs": [],
527
+ "source": [
528
+ "svm_model = SVC(\n",
529
+ " C=0.1, gamma='scale', kernel='linear',\n",
530
+ " probability=True, random_state=42\n",
531
+ ")\n",
532
+ "svm_model.fit(X_train, y_train)\n",
533
+ "y_pred_svm = svm_model.predict(X_test)\n",
534
+ "print(\"\\n SVM\")\n",
535
+ "print(f\"Accuracy: {accuracy_score(y_test, y_pred_svm):.4f}\")\n",
536
+ "print(classification_report(y_test, y_pred_svm))"
537
+ ]
538
+ },
539
+ {
540
+ "cell_type": "markdown",
541
+ "id": "397c4db9",
542
+ "metadata": {
543
+ "id": "397c4db9"
544
+ },
545
+ "source": [
546
+ "### MLP"
547
+ ]
548
+ },
549
+ {
550
+ "cell_type": "code",
551
+ "execution_count": null,
552
+ "id": "161c3769",
553
+ "metadata": {
554
+ "id": "161c3769"
555
+ },
556
+ "outputs": [],
557
+ "source": [
558
+ "mlp_params = {\n",
559
+ " 'hidden_layer_sizes': [(64,), (64, 32), (128, 64)],\n",
560
+ " 'activation': ['relu', 'tanh'],\n",
561
+ " 'alpha': [0.0001, 0.001],\n",
562
+ " 'learning_rate': ['constant', 'adaptive']\n",
563
+ "}\n",
564
+ "\n",
565
+ "mlp_grid = GridSearchCV(MLPClassifier(max_iter=1000, random_state=42), mlp_params, cv=5)\n",
566
+ "mlp_grid.fit(X_train, y_train)\n",
567
+ "\n",
568
+ "print(\" MLP Best Params:\", mlp_grid.best_params_)\n"
569
+ ]
570
+ },
571
+ {
572
+ "cell_type": "markdown",
573
+ "source": [
574
+ "#### Evaluation"
575
+ ],
576
+ "metadata": {
577
+ "id": "xP9abpojovRZ"
578
+ },
579
+ "id": "xP9abpojovRZ"
580
+ },
581
+ {
582
+ "cell_type": "code",
583
+ "execution_count": null,
584
+ "id": "c3f80cb8",
585
+ "metadata": {
586
+ "id": "c3f80cb8"
587
+ },
588
+ "outputs": [],
589
+ "source": [
590
+ "mlp_model = MLPClassifier(\n",
591
+ " hidden_layer_sizes=(64, 32),\n",
592
+ " activation='tanh',\n",
593
+ " alpha=0.0001,\n",
594
+ " learning_rate='constant',\n",
595
+ " max_iter=1000,\n",
596
+ " random_state=42\n",
597
+ ")\n",
598
+ "mlp_model.fit(X_train, y_train)\n",
599
+ "y_pred_mlp = mlp_model.predict(X_test)\n",
600
+ "print(\"\\n MLP Neural Network\")\n",
601
+ "print(f\"Accuracy: {accuracy_score(y_test, y_pred_mlp):.4f}\")\n",
602
+ "print(classification_report(y_test, y_pred_mlp))"
603
+ ]
604
+ },
605
+ {
606
+ "cell_type": "markdown",
607
+ "id": "26b1f47b",
608
+ "metadata": {
609
+ "id": "26b1f47b"
610
+ },
611
+ "source": [
612
+ "### XGBoost"
613
+ ]
614
+ },
615
+ {
616
+ "cell_type": "code",
617
+ "execution_count": null,
618
+ "id": "c2cccaf0",
619
+ "metadata": {
620
+ "id": "c2cccaf0"
621
+ },
622
+ "outputs": [],
623
+ "source": [
624
+ "xgb_params = {\n",
625
+ " 'n_estimators': [50, 100, 200],\n",
626
+ " 'max_depth': [3, 4, 5],\n",
627
+ " 'learning_rate': [0.01, 0.1, 0.2]\n",
628
+ "}\n",
629
+ "\n",
630
+ "xgb_grid = GridSearchCV(\n",
631
+ " XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),\n",
632
+ " xgb_params, cv=5\n",
633
+ ")\n",
634
+ "xgb_grid.fit(X_train, y_train)\n",
635
+ "\n",
636
+ "print(\" XGBoost Best Params:\", xgb_grid.best_params_)\n"
637
+ ]
638
+ },
639
+ {
640
+ "cell_type": "markdown",
641
+ "source": [
642
+ "#### Evaluation"
643
+ ],
644
+ "metadata": {
645
+ "id": "gzj365Wkoyni"
646
+ },
647
+ "id": "gzj365Wkoyni"
648
+ },
649
+ {
650
+ "cell_type": "code",
651
+ "execution_count": null,
652
+ "id": "01cefcfa",
653
+ "metadata": {
654
+ "id": "01cefcfa"
655
+ },
656
+ "outputs": [],
657
+ "source": [
658
+ "xgb_model = XGBClassifier(\n",
659
+ " n_estimators=50,\n",
660
+ " max_depth=4,\n",
661
+ " learning_rate=0.2,\n",
662
+ " use_label_encoder=False,\n",
663
+ " eval_metric='logloss',\n",
664
+ " random_state=42\n",
665
+ ")\n",
666
+ "xgb_model.fit(X_train, y_train)\n",
667
+ "y_pred_xgb = xgb_model.predict(X_test)\n",
668
+ "print(\"\\n XGBoost\")\n",
669
+ "print(f\"Accuracy: {accuracy_score(y_test, y_pred_xgb):.4f}\")\n",
670
+ "print(classification_report(y_test, y_pred_xgb))"
671
+ ]
672
+ },
673
+ {
674
+ "cell_type": "markdown",
675
+ "id": "eecde701",
676
+ "metadata": {
677
+ "id": "eecde701"
678
+ },
679
+ "source": [
680
+ "### KNN"
681
+ ]
682
+ },
683
+ {
684
+ "cell_type": "code",
685
+ "execution_count": null,
686
+ "id": "985c647f",
687
+ "metadata": {
688
+ "id": "985c647f"
689
+ },
690
+ "outputs": [],
691
+ "source": [
692
+ "knn_params = {\n",
693
+ " 'n_neighbors': [3, 5, 7, 9],\n",
694
+ " 'weights': ['uniform', 'distance'],\n",
695
+ " 'metric': ['euclidean', 'manhattan']\n",
696
+ "}\n",
697
+ "\n",
698
+ "knn_grid = GridSearchCV(KNeighborsClassifier(), knn_params, cv=5)\n",
699
+ "knn_grid.fit(X_train, y_train)\n",
700
+ "\n",
701
+ "print(\" KNN Best Params:\", knn_grid.best_params_)"
702
+ ]
703
+ },
704
+ {
705
+ "cell_type": "markdown",
706
+ "source": [
707
+ "#### Evaluation"
708
+ ],
709
+ "metadata": {
710
+ "id": "20E5x9Rmo3Le"
711
+ },
712
+ "id": "20E5x9Rmo3Le"
713
+ },
714
+ {
715
+ "cell_type": "code",
716
+ "execution_count": null,
717
+ "id": "a5f50c88",
718
+ "metadata": {
719
+ "id": "a5f50c88"
720
+ },
721
+ "outputs": [],
722
+ "source": [
723
+ "knn_model = KNeighborsClassifier(\n",
724
+ " n_neighbors=5,\n",
725
+ " weights='uniform',\n",
726
+ " metric='euclidean'\n",
727
+ ")\n",
728
+ "knn_model.fit(X_train, y_train)\n",
729
+ "y_pred_knn = knn_model.predict(X_test)\n",
730
+ "print(\"\\n KNN\")\n",
731
+ "print(f\"Accuracy: {accuracy_score(y_test, y_pred_knn):.4f}\")\n",
732
+ "print(classification_report(y_test, y_pred_knn))"
733
+ ]
734
+ },
735
+ {
736
+ "cell_type": "markdown",
737
+ "id": "658b2f4c",
738
+ "metadata": {
739
+ "id": "658b2f4c"
740
+ },
741
+ "source": [
742
+ "### Models Accuracies"
743
+ ]
744
+ },
745
+ {
746
+ "cell_type": "code",
747
+ "execution_count": null,
748
+ "id": "8eb234da",
749
+ "metadata": {
750
+ "id": "8eb234da"
751
+ },
752
+ "outputs": [],
753
+ "source": [
754
+ "models = [\n",
755
+ " 'Random Forest', 'SVM', 'MLP',\n",
756
+ " 'XGBoost', 'KNN', 'Logistic Regression'\n",
757
+ "]\n",
758
+ "accuracies = [\n",
759
+ " 0.85, 0.8333, 0.6833,\n",
760
+ " 0.8333, 0.7167, 0.8333\n",
761
+ "]\n",
762
+ "\n",
763
+ "plt.figure(figsize=(10, 6))\n",
764
+ "plt.bar(models, accuracies, color=['blue', 'green', 'purple', 'orange', 'red', 'cyan'])\n",
765
+ "plt.ylim(0, 1)\n",
766
+ "plt.ylabel('Accuracy')\n",
767
+ "plt.title('Model Accuracy Comparison')\n",
768
+ "\n",
769
+ "plt.xticks(rotation=30)\n",
770
+ "plt.show()"
771
+ ]
772
+ },
773
+ {
774
+ "cell_type": "code",
775
+ "execution_count": null,
776
+ "id": "0ef22c6c",
777
+ "metadata": {
778
+ "id": "0ef22c6c"
779
+ },
780
+ "outputs": [],
781
+ "source": [
782
+ "import gradio as gr\n",
783
+ "from sklearn.preprocessing import StandardScaler\n",
784
+ "import joblib"
785
+ ]
786
+ },
787
+ {
788
+ "cell_type": "code",
789
+ "execution_count": null,
790
+ "id": "28aa35d9",
791
+ "metadata": {
792
+ "id": "28aa35d9"
793
+ },
794
+ "outputs": [],
795
+ "source": [
796
+ "joblib.dump(rf_model, \"heart_model.pkl\")\n",
797
+ "joblib.dump(scaler, \"scaler.pkl\")\n",
798
+ "print(\"Model and scaler saved successfully\")"
799
+ ]
800
+ },
801
+ {
802
+ "cell_type": "code",
803
+ "execution_count": null,
804
+ "id": "165b4cab",
805
+ "metadata": {
806
+ "id": "165b4cab"
807
+ },
808
+ "outputs": [],
809
+ "source": [
810
+ "model = joblib.load(\"heart_model.pkl\")\n",
811
+ "scaler = joblib.load(\"scaler.pkl\")"
812
+ ]
813
+ },
814
+ {
815
+ "cell_type": "code",
816
+ "execution_count": null,
817
+ "id": "c41a4646",
818
+ "metadata": {
819
+ "id": "c41a4646"
820
+ },
821
+ "outputs": [],
822
+ "source": [
823
+ "def predict_heart_risk(age, cpk, ef, platelets, sc, ss, time, anaemia, diabetes, high_bp, sex, smoking):\n",
824
+ " data = pd.DataFrame([[\n",
825
+ " age, anaemia, cpk, diabetes, ef, high_bp,\n",
826
+ " platelets, sc, ss, sex, smoking, time\n",
827
+ " ]], columns=[\n",
828
+ " 'age', 'anaemia', 'creatinine_phosphokinase', 'diabetes',\n",
829
+ " 'ejection_fraction', 'high_blood_pressure', 'platelets',\n",
830
+ " 'serum_creatinine', 'serum_sodium', 'sex', 'smoking', 'time'\n",
831
+ " ])\n",
832
+ "\n",
833
+ "\n",
834
+ " continuous_features = ['age', 'creatinine_phosphokinase', 'ejection_fraction','platelets', 'serum_creatinine', 'serum_sodium', 'time']\n",
835
+ " data[continuous_features] = scaler.transform(data[continuous_features])\n",
836
+ "\n",
837
+ " prediction = model.predict(data)[0]\n",
838
+ " return \" At Risk\" if prediction == 1 else \" Not At Risk\""
839
+ ]
840
+ },
841
+ {
842
+ "cell_type": "code",
843
+ "execution_count": null,
844
+ "id": "5ca7be47",
845
+ "metadata": {
846
+ "id": "5ca7be47"
847
+ },
848
+ "outputs": [],
849
+ "source": [
850
+ "inputs = [\n",
851
+ " gr.Number(label=\"Age\"),\n",
852
+ " gr.Number(label=\"Creatinine Phosphokinase, Range [0,100000]\"),\n",
853
+ " gr.Number(label=\"Ejection Fraction, Range [5,85] \"),\n",
854
+ " gr.Number(label=\"Platelets, Range [5000,2000000]\"),\n",
855
+ " gr.Number(label=\"Serum Creatinine, Range [0.1,60]\"),\n",
856
+ " gr.Number(label=\"Serum Sodium, Range [95,255]\"),\n",
857
+ " gr.Number(label=\"Follow-up Time (days)\"),\n",
858
+ " gr.Radio([0, 1], label=\"Anaemia (0=No, 1=Yes)\"),\n",
859
+ " gr.Radio([0, 1], label=\"Diabetes (0=No, 1=Yes)\"),\n",
860
+ " gr.Radio([0, 1], label=\"High Blood Pressure (0=No, 1=Yes)\"),\n",
861
+ " gr.Radio([0, 1], label=\"Sex (0=Female, 1=Male)\"),\n",
862
+ " gr.Radio([0, 1], label=\"Smoking (0=No, 1=Yes)\")\n",
863
+ "]"
864
+ ]
865
+ },
866
+ {
867
+ "cell_type": "code",
868
+ "execution_count": null,
869
+ "id": "563bc8b2",
870
+ "metadata": {
871
+ "id": "563bc8b2"
872
+ },
873
+ "outputs": [],
874
+ "source": [
875
+ "gr.Interface(\n",
876
+ " fn=predict_heart_risk,\n",
877
+ " inputs=inputs,\n",
878
+ " outputs=\"text\",\n",
879
+ " title=\" Heart Failure Risk Predictor\",\n",
880
+ " description=\"Enter patient data to predict if they are at risk of heart failure.\",\n",
881
+ " allow_flagging=\"never\"\n",
882
+ ").launch()"
883
+ ]
884
+ },
885
+ {
886
+ "cell_type": "code",
887
+ "source": [],
888
+ "metadata": {
889
+ "id": "OlW7PfhJLXlE"
890
+ },
891
+ "id": "OlW7PfhJLXlE",
892
+ "execution_count": null,
893
+ "outputs": []
894
+ }
895
+ ],
896
+ "metadata": {
897
+ "kernelspec": {
898
+ "display_name": "Python 3",
899
+ "language": "python",
900
+ "name": "python3"
901
+ },
902
+ "language_info": {
903
+ "codemirror_mode": {
904
+ "name": "ipython",
905
+ "version": 3
906
+ },
907
+ "file_extension": ".py",
908
+ "mimetype": "text/x-python",
909
+ "name": "python",
910
+ "nbconvert_exporter": "python",
911
+ "pygments_lexer": "ipython3",
912
+ "version": "3.12.4"
913
+ },
914
+ "colab": {
915
+ "provenance": []
916
+ }
917
+ },
918
+ "nbformat": 4,
919
+ "nbformat_minor": 5
920
+ }