diabetes

import pandas as pd

import numpy as np

import seaborn as sns

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier

from sklearn.preprocessing import StandardScaler

from sklearn.metrics import (

    confusion_matrix,

    accuracy_score,

    precision_score,

    recall_score,

    classification_report

)

plt.figure(figsize=(8,6))

sns.heatmap(df.corr(), annot=True, fmt=’.2f’, cmap=’coolwarm’)

plt.title(‘Feature Correlations’) #nikalo correlations

plt.show()

x = df.drop(columns=[‘Outcome’], axis=1)

y = df[‘Outcome’]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

scaler = StandardScaler() #only for independant values

x_train = scaler.fit_transform(x_train)

x_test = scaler.transform(x_test)

print(‘Train/test shapes:’, x_train.shape, x_test.shape)

knn = KNeighborsClassifier(n_neighbors=5)

knn.fit(x_train, y_train)

y_pred = knn.predict(x_test)

CM = confusion_matrix(y_test, y_pred)

acc = accuracy_score(y_test, y_pred)

err = 1 – acc

prec = precision_score(y_test, y_pred)

rec = recall_score(y_test, y_pred)

print(‘Confusion Matrix:\n’, CM)

print(f’Accuracy: {acc:.4f}’)

print(f’Error rate: {err:.4f}’)

print(f’Precision: {prec:.4f}’)

print(f’Recall: {rec:.4f}’)

print(‘\nClassification report:\n’, classification_report(y_test, y_pred))