import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


work = pd.read_csv('./data/work_retention.csv')


work.tail()


X_df = work.drop(columns = 'retain')


X_df


sns.stripplot(data = X_df)
plt.show()


X_df.describe()


from sklearn.preprocessing import StandardScaler as SS


# https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
# z = (x - u) / s 

ss = SS()
a = ss.fit(X_df)   #compute the mean and std to be used for later scaling. 
b = ss.transform(X_df) #Perform standardization by centering and scaling.


#a = ss.fit(X_df) 
#ss.transform(X_df)
X_scaled = ss.fit_transform(X_df) #fit to data and then transform


X_scaled[0:5] #What type of object is this???


X_scaled_df = pd.DataFrame(X_scaled)
X_scaled_df.columns = X_df.columns


X_scaled_df.head()


sns.stripplot(data = X_scaled_df)
plt.show()


#Scaled


corr_matrix = X_scaled_df.corr()
mask = np.triu(corr_matrix)
sns.heatmap(corr_matrix, cmap='BuPu', vmin=-1, vmax=1,
            annot=True, fmt='.2f', 
           mask = mask, linewidths=.2)
plt.tick_params(size = 0, labelsize = 10)
plt.xticks(rotation = 45)
plt.yticks(rotation = 45)
plt.show()


#Unscaled


corr_matrix = X_df.corr()
mask = np.triu(corr_matrix)
sns.heatmap(corr_matrix, cmap='BuPu', vmin=-1, vmax=1,
            annot=True, fmt='.2f', 
           mask = mask)
plt.tick_params(size = 0, labelsize = 12)
plt.xticks(rotation = 45)
plt.yticks(rotation = 45)
plt.show()


plt.scatter(X_df['years worked'], X_df['commute'])
plt.xlabel('Years worked', fontsize = 12)
plt.ylabel('Commute', fontsize = 12)
plt.show()


X_df


y = work['retain']
labels = ['quit','stay']
colors = ['cornflowerblue', 'slategrey']
for yi in [0,1]:
    idx = y == yi
    plt.scatter(X_df.loc[idx,'years worked'], X_df.loc[idx,'commute'], color = colors[yi],
                label = labels[yi])
plt.xlabel('Years worked')
plt.ylabel('Commute')
plt.legend()
plt.show()

	age	children	commute	salary	years worked
195	61	0	0.883333	21661	0.82
196	37	0	0.883333	17181	1.29
197	56	1	1.116667	63675	0.91
198	36	4	1.166667	45895	0.97
199	55	1	0.916667	21965	0.74

	age	children	commute	salary	years worked
0	63	0	0.095167	54233	2.18
1	64	0	0.044500	64739	4.53
2	40	1	0.159167	95953	4.57
3	58	5	0.091167	59038	2.99
4	63	0	0.016000	7958	4.36
...	...	...	...	...	...
195	61	0	0.883333	21661	0.82
196	37	0	0.883333	17181	1.29
197	56	1	1.116667	63675	0.91
198	36	4	1.166667	45895	0.97
199	55	1	0.916667	21965	0.74

	age	children	commute	salary	years worked
0	1.540846	-0.974532	-0.792575	0.250870	-0.501449
1	1.624202	-0.974532	-0.934813	0.662036	1.216092
2	-0.376355	-0.014402	-0.612905	1.883636	1.245327
3	1.124063	3.826119	-0.803804	0.438920	0.090555
4	1.540846	-0.974532	-1.014822	-1.560161	1.091845