from sklearn.preprocessing import LabelEncoder
def convert_categories(col):
le = LabelEncoder()
df[col] = le.fit_transform(df[col].values) #independant column so fit_transform
categories = [‘PRODUCTLINE’,’PRODUCTCODE’,’COUNTRY’,’DEALSIZE’]
for col in categories:
convert_categories(col)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
data = sc.fit_transform(df)
from sklearn.cluster import KMeans
wcss = []
for k in range(1,15):
kmeans = KMeans(n_clusters=k,init=’k-means++’,random_state=15)
kmeans.fit(data)
wcss.append(kmeans.inertia_)
k = list(range(1,15))
plt.plot(k,wcss,marker=’o’)
plt.xlabel(‘Clusters’)
plt.ylabel(‘scores’)
plt.title(‘Finding right number of clusters’)
plt.grid()
plt.show()
