sales

from sklearn.preprocessing import LabelEncoder

def convert_categories(col):

    le = LabelEncoder()

    df[col] = le.fit_transform(df[col].values) #independant column so fit_transform

categories = [‘PRODUCTLINE’,’PRODUCTCODE’,’COUNTRY’,’DEALSIZE’]

for col in categories:

    convert_categories(col)

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

data = sc.fit_transform(df)

from sklearn.cluster import KMeans

wcss = []

for k in range(1,15):

    kmeans = KMeans(n_clusters=k,init=’k-means++’,random_state=15)

    kmeans.fit(data)

    wcss.append(kmeans.inertia_)

k = list(range(1,15))

plt.plot(k,wcss,marker=’o’)

plt.xlabel(‘Clusters’)

plt.ylabel(‘scores’)

plt.title(‘Finding right number of clusters’)

plt.grid()

plt.show()