#-----------------------------------------------------------------------------------------------------------------------------
#------------------------------------------------[ Libraries Import ]---------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
from sklearn.preprocessing import LabelEncoder
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from adjustText import adjust_text
import seaborn as sns
#-----------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------[ Import DataBase ]-------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
dataTelecom=pd.read_excel('S:\\TunisieTelecom\\TelecomData.xlsx')
dataTelecom.head(100)
STATUT | OFFRE | ANC_M | HANDSET | revenu_voix | revenu_inter | NB_JOUR_ACTIVITE_TAXE | NB_JOUR_APPEL_TAXE | DUREE_APPEL_TOT | DUREE_APPEL_TAXEE | ... | FREQ_USSD_VOIX | FREQ_USSD_SMS | VOLUME_SESSION | VOLUME_SESSION_WEEKEND | REVENU_VAS | ARPU | P_revenu_data | P_revenu_voix_c | P_revenu_vas_c | id_client | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Active | Offre30 | 123 | 2G | 12.709375 | 1.464447 | 10.366042 | 10.658842 | 27.261836 | 2.125971 | ... | 28.679570 | 146.930634 | 200000.000000 | 54.025577 | 1.059822 | 4.325398 | 0.136685 | 0.943095 | 0.279541 | 1.0 |
1 | Active | Offre8 | 98 | 2G | 3.000000 | 0.756078 | 1.330736 | 7.079320 | 19.883099 | 7.735475 | ... | 28.490597 | 169.219363 | 3337.992419 | 24.789260 | 1.411803 | 2.677563 | 0.533695 | 0.540374 | 0.712010 | 2.0 |
2 | Active | Offre24 | 90 | 4G | 32.514156 | 0.681197 | 0.158160 | 4.367702 | 133.476368 | 7.681088 | ... | 190.000000 | 44.403308 | 107082.775926 | 160.031496 | 18.195224 | 1.954007 | 0.799606 | 0.071368 | 0.376754 | 3.0 |
3 | Active | Offre10 | 226 | 2G | 3.821551 | 5.265345 | 4.003452 | 3.086766 | 35.053364 | 8.540951 | ... | 19.412661 | 190.000000 | 128700.752169 | 102.668979 | 17.746873 | 2.496790 | 0.426526 | 0.314082 | 0.676112 | 4.0 |
4 | Active | Offre1 | 139 | 2G | 60.009385 | 1.957144 | 5.689241 | 0.403300 | 0.844956 | 12.355853 | ... | 81.567146 | 6.365788 | 75654.384291 | 0.341570 | 7.280861 | 2.291381 | 0.319736 | 0.600057 | 0.719450 | 5.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
95 | Active | Offre8 | 159 | 2G | 3.000000 | 2.250293 | 21.961465 | 2.872333 | 1.826910 | 27.907662 | ... | 12.954468 | 23.226754 | 32664.259485 | 21.276030 | 6.134130 | 0.689345 | 0.857627 | 0.392768 | 0.657034 | 96.0 |
96 | Suspended | Offre2 | 145 | 2G | 3.000000 | 3.473084 | 3.575555 | 0.298674 | 80.858548 | 5.349497 | ... | 153.474792 | 190.000000 | 82909.748831 | 26.213769 | 1.504337 | 0.164892 | 0.980014 | 0.467268 | 0.150409 | 97.0 |
97 | Active | Offre10 | 185 | 2G | 4.531545 | 2.142349 | 3.470616 | 6.400014 | 107.330103 | 5.199869 | ... | 157.487581 | 36.176280 | 141996.977868 | 42.321968 | 1.637815 | 1.848815 | 0.772398 | 0.407095 | 0.545033 | 98.0 |
98 | Active | Offre15 | 197 | 3G | 11.762926 | 3.474145 | 2.842419 | 10.629819 | 86.011908 | 0.124836 | ... | 159.332900 | 77.858228 | 12052.670191 | 46.685004 | 1.422138 | 1.944146 | 0.152620 | 0.243845 | 0.387906 | 99.0 |
99 | Suspended | Offre30 | 178 | 2G | 17.606096 | 0.019640 | 1.850818 | 0.184009 | 89.864928 | 5.858613 | ... | 18.339092 | 47.120168 | 134081.459601 | 10.649038 | 0.640634 | 2.204618 | 0.893336 | 0.293854 | 0.121368 | 100.0 |
100 rows × 64 columns
selected_variables = [
'DUREE_APPEL_TOT',
'nb_sms_tot' ,
'revenu_cdr_c',
'MNT_RECH',
'FREQ_ACT_OUT',
'NB_JOUR_APPEL_TAXE' ,
'NB_SMS_TAXE' ,
'NB_RECH_SUP5',
'MNT_TRANSFERT_OUT' ,
'revenu_voix' ,
'MNT_FORFAIT_DATA',
'NB_FORFAIT_VOIX' ,
'OFFRE',
'VOLUME_SESSION',
'P_revenu_data',
'MNT_FORFAIT',
'FREQ_USSD',
'P_FF_Data' ,
'Duree_onnet_tot',
'Duree_offnet_tot' ,
]
# Select the desired variables from the dataframe
dataTelecomSelected = dataTelecom[selected_variables].copy()
dataTelecomSelected.head(10)
DUREE_APPEL_TOT | nb_sms_tot | revenu_cdr_c | MNT_RECH | FREQ_ACT_OUT | NB_JOUR_APPEL_TAXE | NB_SMS_TAXE | NB_RECH_SUP5 | MNT_TRANSFERT_OUT | revenu_voix | MNT_FORFAIT_DATA | NB_FORFAIT_VOIX | OFFRE | VOLUME_SESSION | P_revenu_data | MNT_FORFAIT | FREQ_USSD | P_FF_Data | Duree_onnet_tot | Duree_offnet_tot | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 27.261836 | 3.685763 | 19.941537 | 12.306756 | 10.667904 | 10.658842 | 1.624059 | 2.00 | 8.877510 | 12.709375 | 2.059265 | 8.589441 | Offre30 | 200000.000000 | 0.136685 | 10.882398 | 190.000000 | 0.498091 | 30.583118 | 27.219712 |
1 | 19.883099 | 1.476121 | 3.908449 | 10.348607 | 13.650949 | 7.079320 | 7.059615 | 0.67 | 1.633333 | 3.000000 | 20.025208 | 5.685491 | Offre8 | 3337.992419 | 0.533695 | 1.049276 | 18.623378 | 0.096339 | 13.426765 | 32.571522 |
2 | 133.476368 | 6.162948 | 32.541849 | 3.939266 | 7.079317 | 4.367702 | 1.638128 | 0.00 | 0.011593 | 32.514156 | 5.397060 | 4.417241 | Offre24 | 107082.775926 | 0.799606 | 4.041068 | 66.449461 | 0.667540 | 6.217262 | 24.861880 |
3 | 35.053364 | 1.448782 | 4.145009 | 2.408488 | 17.423588 | 3.086766 | 0.016726 | 0.33 | 2.003034 | 3.821551 | 0.534564 | 0.107662 | Offre10 | 128700.752169 | 0.426526 | 6.441283 | 19.259807 | 0.407815 | 5.837191 | 37.779013 |
4 | 0.844956 | 1.131308 | 60.496821 | 10.870592 | 0.067246 | 0.403300 | 3.934964 | 0.33 | 2.755649 | 60.009385 | 0.031219 | 2.276588 | Offre1 | 75654.384291 | 0.319736 | 0.805758 | 38.034894 | 0.828712 | 10.395156 | 2.831625 |
5 | 26.779409 | 1.477695 | 3.625786 | 40.974329 | 1.374602 | 3.346885 | 1.038573 | 1.67 | 3.748974 | 3.000000 | 2.137682 | 1.144439 | Offre16 | 65253.482895 | 0.274582 | 1.253000 | 23.161888 | 0.335963 | 42.091494 | 90.670997 |
6 | 2.113332 | 4.376941 | 33.872104 | 13.929977 | 0.730859 | 5.357250 | 0.488122 | 0.00 | 1.547834 | 31.953274 | 0.830859 | 2.403743 | Offre10 | 45157.810902 | 0.566405 | 2.536099 | 190.000000 | 0.211775 | 19.369493 | 32.897501 |
7 | 5.922602 | 0.773370 | 15.836382 | 10.793736 | 1.060993 | 0.100707 | 4.178468 | 1.00 | 3.345468 | 10.331387 | 1.547844 | 15.417804 | Offre6 | 37165.463625 | 0.620034 | 0.385122 | 21.880660 | 0.205227 | 0.059797 | 4.935726 |
8 | 68.691901 | 1.314796 | 21.616162 | 0.516740 | 4.607737 | 2.281113 | 3.947526 | 6.67 | 11.943487 | 14.262607 | 19.898847 | 0.068199 | Offre23 | 200000.000000 | 0.087045 | 6.721631 | 82.203454 | 0.203176 | 21.697246 | 0.627422 |
9 | 37.872839 | 0.901089 | 9.153639 | 8.129576 | 15.158324 | 11.425205 | 0.109140 | 1.00 | 1.932114 | 5.870684 | 17.095836 | 1.245533 | Offre8 | 65233.389597 | 0.708388 | 1.570219 | 49.499533 | 0.247614 | 26.167306 | 59.049667 |
dataTelecomSelectedSample=dataTelecomSelected.sample(100)
dataTelecomSelectedSample.head(150)
DUREE_APPEL_TOT | nb_sms_tot | revenu_cdr_c | MNT_RECH | FREQ_ACT_OUT | NB_JOUR_APPEL_TAXE | NB_SMS_TAXE | NB_RECH_SUP5 | MNT_TRANSFERT_OUT | revenu_voix | MNT_FORFAIT_DATA | NB_FORFAIT_VOIX | OFFRE | VOLUME_SESSION | P_revenu_data | MNT_FORFAIT | FREQ_USSD | P_FF_Data | Duree_onnet_tot | Duree_offnet_tot | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
19257 | 30.090072 | 8.036033 | 6.628774 | 21.796702 | 4.390850 | 3.572585 | 2.230938 | 0.33 | 6.576157 | 5.973346 | 8.614453 | 2.382774 | Offre12 | 96933.695160 | 0.596794 | 10.756903 | 143.642413 | 0.014595 | 44.241075 | 34.738155 |
38166 | 577.035853 | 8.792087 | 8.468283 | 6.180827 | 1.926081 | 7.041788 | 0.025432 | 5.00 | 0.692279 | 3.000000 | 0.417457 | 12.181873 | Offre7 | 77383.665104 | 0.158712 | 3.796006 | 57.790428 | 0.516101 | 39.859481 | 8.263161 |
79907 | 126.227433 | 2.056120 | 9.733714 | 4.687255 | 4.980074 | 8.250577 | 0.144793 | 0.00 | 5.995859 | 3.000000 | 3.374781 | 8.150515 | Offre13 | 186211.083505 | 0.935933 | 1.266953 | 46.308896 | 0.645644 | 12.738402 | 15.707098 |
9564 | 0.042172 | 3.331580 | 10.712672 | 12.834082 | 13.015438 | 10.310605 | 6.283435 | 2.67 | 6.088506 | 10.611291 | 13.975665 | 7.204972 | Offre5 | 120468.526004 | 0.154198 | 1.421667 | 37.609093 | 0.708598 | 41.733081 | 3.447804 |
21204 | 39.975826 | 4.348320 | 9.195774 | 3.873290 | 7.742715 | 4.286317 | 3.506429 | 0.00 | 0.665510 | 7.944436 | 10.043437 | 1.146157 | Offre24 | 53403.671898 | 0.089654 | 4.067660 | 190.000000 | 0.841939 | 74.725692 | 22.666880 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
87907 | 10.833286 | 22.632752 | 7.434685 | 16.818580 | 1.330702 | 30.000000 | 2.258939 | 0.00 | 8.323257 | 6.718734 | 4.107238 | 1.454253 | Offre29 | 14172.653991 | 0.825286 | 1.269155 | 47.655680 | 0.445803 | 5.349727 | 9.497032 |
69701 | 439.174973 | 6.930242 | 10.579883 | 3.869375 | 3.062471 | 4.274253 | 0.701469 | 3.67 | 3.708045 | 8.752228 | 8.264411 | 5.081549 | Offre18 | 96018.498183 | 0.689052 | 3.429793 | 94.129045 | 0.630098 | 71.752719 | 6.520351 |
1612 | 34.254143 | 6.868878 | 14.567883 | 1.299701 | 0.257181 | 3.908668 | 2.735732 | 2.33 | 1.125163 | 3.000000 | 4.784903 | 18.786315 | Offre5 | 74526.004536 | 0.463658 | 3.038143 | 14.416576 | 0.491777 | 7.667571 | 16.154592 |
43159 | 81.030834 | 1.408071 | 3.985414 | 1.817113 | 5.775719 | 7.573036 | 4.426431 | 0.00 | 4.559158 | 3.000000 | 11.190860 | 0.234017 | Offre3 | 70646.213006 | 0.939057 | 10.895926 | 174.699935 | 0.606876 | 50.623577 | 58.188738 |
34600 | 2.058911 | 8.345012 | 3.530926 | 2.438283 | 7.900239 | 2.923418 | 2.510042 | 0.00 | 1.223616 | 3.000000 | 10.885480 | 0.874752 | Offre17 | 85687.964862 | 0.818186 | 4.716647 | 71.896790 | 0.599819 | 73.497484 | 6.421968 |
100 rows × 20 columns
#-----------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------[ Label Encoder ]---------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
label_encoder = LabelEncoder()
encoded_offre = label_encoder.fit_transform(dataTelecomSelectedSample['OFFRE'])
dataTelecomSelectedSample['OFFRE']=encoded_offre
dataTelecomSelectedSample.head(100)
DUREE_APPEL_TOT | nb_sms_tot | revenu_cdr_c | MNT_RECH | FREQ_ACT_OUT | NB_JOUR_APPEL_TAXE | NB_SMS_TAXE | NB_RECH_SUP5 | MNT_TRANSFERT_OUT | revenu_voix | MNT_FORFAIT_DATA | NB_FORFAIT_VOIX | OFFRE | VOLUME_SESSION | P_revenu_data | MNT_FORFAIT | FREQ_USSD | P_FF_Data | Duree_onnet_tot | Duree_offnet_tot | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
19257 | 30.090072 | 8.036033 | 6.628774 | 21.796702 | 4.390850 | 3.572585 | 2.230938 | 0.33 | 6.576157 | 5.973346 | 8.614453 | 2.382774 | 3 | 96933.695160 | 0.596794 | 10.756903 | 143.642413 | 0.014595 | 44.241075 | 34.738155 |
38166 | 577.035853 | 8.792087 | 8.468283 | 6.180827 | 1.926081 | 7.041788 | 0.025432 | 5.00 | 0.692279 | 3.000000 | 0.417457 | 12.181873 | 26 | 77383.665104 | 0.158712 | 3.796006 | 57.790428 | 0.516101 | 39.859481 | 8.263161 |
79907 | 126.227433 | 2.056120 | 9.733714 | 4.687255 | 4.980074 | 8.250577 | 0.144793 | 0.00 | 5.995859 | 3.000000 | 3.374781 | 8.150515 | 4 | 186211.083505 | 0.935933 | 1.266953 | 46.308896 | 0.645644 | 12.738402 | 15.707098 |
9564 | 0.042172 | 3.331580 | 10.712672 | 12.834082 | 13.015438 | 10.310605 | 6.283435 | 2.67 | 6.088506 | 10.611291 | 13.975665 | 7.204972 | 24 | 120468.526004 | 0.154198 | 1.421667 | 37.609093 | 0.708598 | 41.733081 | 3.447804 |
21204 | 39.975826 | 4.348320 | 9.195774 | 3.873290 | 7.742715 | 4.286317 | 3.506429 | 0.00 | 0.665510 | 7.944436 | 10.043437 | 1.146157 | 16 | 53403.671898 | 0.089654 | 4.067660 | 190.000000 | 0.841939 | 74.725692 | 22.666880 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
87907 | 10.833286 | 22.632752 | 7.434685 | 16.818580 | 1.330702 | 30.000000 | 2.258939 | 0.00 | 8.323257 | 6.718734 | 4.107238 | 1.454253 | 20 | 14172.653991 | 0.825286 | 1.269155 | 47.655680 | 0.445803 | 5.349727 | 9.497032 |
69701 | 439.174973 | 6.930242 | 10.579883 | 3.869375 | 3.062471 | 4.274253 | 0.701469 | 3.67 | 3.708045 | 8.752228 | 8.264411 | 5.081549 | 9 | 96018.498183 | 0.689052 | 3.429793 | 94.129045 | 0.630098 | 71.752719 | 6.520351 |
1612 | 34.254143 | 6.868878 | 14.567883 | 1.299701 | 0.257181 | 3.908668 | 2.735732 | 2.33 | 1.125163 | 3.000000 | 4.784903 | 18.786315 | 24 | 74526.004536 | 0.463658 | 3.038143 | 14.416576 | 0.491777 | 7.667571 | 16.154592 |
43159 | 81.030834 | 1.408071 | 3.985414 | 1.817113 | 5.775719 | 7.573036 | 4.426431 | 0.00 | 4.559158 | 3.000000 | 11.190860 | 0.234017 | 21 | 70646.213006 | 0.939057 | 10.895926 | 174.699935 | 0.606876 | 50.623577 | 58.188738 |
34600 | 2.058911 | 8.345012 | 3.530926 | 2.438283 | 7.900239 | 2.923418 | 2.510042 | 0.00 | 1.223616 | 3.000000 | 10.885480 | 0.874752 | 8 | 85687.964862 | 0.818186 | 4.716647 | 71.896790 | 0.599819 | 73.497484 | 6.421968 |
100 rows × 20 columns
#-----------------------------------------------------------------------------------------------------------------------------
#----------------------------------------------[ PCA In Different Colors ]----------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
X = dataTelecomSelectedSample.iloc[:,0:20]
X
DUREE_APPEL_TOT | nb_sms_tot | revenu_cdr_c | MNT_RECH | FREQ_ACT_OUT | NB_JOUR_APPEL_TAXE | NB_SMS_TAXE | NB_RECH_SUP5 | MNT_TRANSFERT_OUT | revenu_voix | MNT_FORFAIT_DATA | NB_FORFAIT_VOIX | OFFRE | VOLUME_SESSION | P_revenu_data | MNT_FORFAIT | FREQ_USSD | P_FF_Data | Duree_onnet_tot | Duree_offnet_tot | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
19257 | 30.090072 | 8.036033 | 6.628774 | 21.796702 | 4.390850 | 3.572585 | 2.230938 | 0.33 | 6.576157 | 5.973346 | 8.614453 | 2.382774 | 3 | 96933.695160 | 0.596794 | 10.756903 | 143.642413 | 0.014595 | 44.241075 | 34.738155 |
38166 | 577.035853 | 8.792087 | 8.468283 | 6.180827 | 1.926081 | 7.041788 | 0.025432 | 5.00 | 0.692279 | 3.000000 | 0.417457 | 12.181873 | 26 | 77383.665104 | 0.158712 | 3.796006 | 57.790428 | 0.516101 | 39.859481 | 8.263161 |
79907 | 126.227433 | 2.056120 | 9.733714 | 4.687255 | 4.980074 | 8.250577 | 0.144793 | 0.00 | 5.995859 | 3.000000 | 3.374781 | 8.150515 | 4 | 186211.083505 | 0.935933 | 1.266953 | 46.308896 | 0.645644 | 12.738402 | 15.707098 |
9564 | 0.042172 | 3.331580 | 10.712672 | 12.834082 | 13.015438 | 10.310605 | 6.283435 | 2.67 | 6.088506 | 10.611291 | 13.975665 | 7.204972 | 24 | 120468.526004 | 0.154198 | 1.421667 | 37.609093 | 0.708598 | 41.733081 | 3.447804 |
21204 | 39.975826 | 4.348320 | 9.195774 | 3.873290 | 7.742715 | 4.286317 | 3.506429 | 0.00 | 0.665510 | 7.944436 | 10.043437 | 1.146157 | 16 | 53403.671898 | 0.089654 | 4.067660 | 190.000000 | 0.841939 | 74.725692 | 22.666880 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
87907 | 10.833286 | 22.632752 | 7.434685 | 16.818580 | 1.330702 | 30.000000 | 2.258939 | 0.00 | 8.323257 | 6.718734 | 4.107238 | 1.454253 | 20 | 14172.653991 | 0.825286 | 1.269155 | 47.655680 | 0.445803 | 5.349727 | 9.497032 |
69701 | 439.174973 | 6.930242 | 10.579883 | 3.869375 | 3.062471 | 4.274253 | 0.701469 | 3.67 | 3.708045 | 8.752228 | 8.264411 | 5.081549 | 9 | 96018.498183 | 0.689052 | 3.429793 | 94.129045 | 0.630098 | 71.752719 | 6.520351 |
1612 | 34.254143 | 6.868878 | 14.567883 | 1.299701 | 0.257181 | 3.908668 | 2.735732 | 2.33 | 1.125163 | 3.000000 | 4.784903 | 18.786315 | 24 | 74526.004536 | 0.463658 | 3.038143 | 14.416576 | 0.491777 | 7.667571 | 16.154592 |
43159 | 81.030834 | 1.408071 | 3.985414 | 1.817113 | 5.775719 | 7.573036 | 4.426431 | 0.00 | 4.559158 | 3.000000 | 11.190860 | 0.234017 | 21 | 70646.213006 | 0.939057 | 10.895926 | 174.699935 | 0.606876 | 50.623577 | 58.188738 |
34600 | 2.058911 | 8.345012 | 3.530926 | 2.438283 | 7.900239 | 2.923418 | 2.510042 | 0.00 | 1.223616 | 3.000000 | 10.885480 | 0.874752 | 8 | 85687.964862 | 0.818186 | 4.716647 | 71.896790 | 0.599819 | 73.497484 | 6.421968 |
100 rows × 20 columns
# Les 20 colonnes de mesures qui décrivent différents dataTelecomSelectedSample
X = dataTelecomSelectedSample.iloc[:,0:20]
colors = dataTelecomSelectedSample.OFFRE.astype('category')
# Les couleurs déduites de chaque espèce de dataTelecomSelectedSample
y = colors.cat.codes
print(X) ; print(y)
scatter_matrix(X,c=y) ; plt.show()
DUREE_APPEL_TOT nb_sms_tot revenu_cdr_c MNT_RECH FREQ_ACT_OUT \ 19257 30.090072 8.036033 6.628774 21.796702 4.390850 38166 577.035853 8.792087 8.468283 6.180827 1.926081 79907 126.227433 2.056120 9.733714 4.687255 4.980074 9564 0.042172 3.331580 10.712672 12.834082 13.015438 21204 39.975826 4.348320 9.195774 3.873290 7.742715 ... ... ... ... ... ... 87907 10.833286 22.632752 7.434685 16.818580 1.330702 69701 439.174973 6.930242 10.579883 3.869375 3.062471 1612 34.254143 6.868878 14.567883 1.299701 0.257181 43159 81.030834 1.408071 3.985414 1.817113 5.775719 34600 2.058911 8.345012 3.530926 2.438283 7.900239 NB_JOUR_APPEL_TAXE NB_SMS_TAXE NB_RECH_SUP5 MNT_TRANSFERT_OUT \ 19257 3.572585 2.230938 0.33 6.576157 38166 7.041788 0.025432 5.00 0.692279 79907 8.250577 0.144793 0.00 5.995859 9564 10.310605 6.283435 2.67 6.088506 21204 4.286317 3.506429 0.00 0.665510 ... ... ... ... ... 87907 30.000000 2.258939 0.00 8.323257 69701 4.274253 0.701469 3.67 3.708045 1612 3.908668 2.735732 2.33 1.125163 43159 7.573036 4.426431 0.00 4.559158 34600 2.923418 2.510042 0.00 1.223616 revenu_voix MNT_FORFAIT_DATA NB_FORFAIT_VOIX OFFRE VOLUME_SESSION \ 19257 5.973346 8.614453 2.382774 3 96933.695160 38166 3.000000 0.417457 12.181873 26 77383.665104 79907 3.000000 3.374781 8.150515 4 186211.083505 9564 10.611291 13.975665 7.204972 24 120468.526004 21204 7.944436 10.043437 1.146157 16 53403.671898 ... ... ... ... ... ... 87907 6.718734 4.107238 1.454253 20 14172.653991 69701 8.752228 8.264411 5.081549 9 96018.498183 1612 3.000000 4.784903 18.786315 24 74526.004536 43159 3.000000 11.190860 0.234017 21 70646.213006 34600 3.000000 10.885480 0.874752 8 85687.964862 P_revenu_data MNT_FORFAIT FREQ_USSD P_FF_Data Duree_onnet_tot \ 19257 0.596794 10.756903 143.642413 0.014595 44.241075 38166 0.158712 3.796006 57.790428 0.516101 39.859481 79907 0.935933 1.266953 46.308896 0.645644 12.738402 9564 0.154198 1.421667 37.609093 0.708598 41.733081 21204 0.089654 4.067660 190.000000 0.841939 74.725692 ... ... ... ... ... ... 87907 0.825286 1.269155 47.655680 0.445803 5.349727 69701 0.689052 3.429793 94.129045 0.630098 71.752719 1612 0.463658 3.038143 14.416576 0.491777 7.667571 43159 0.939057 10.895926 174.699935 0.606876 50.623577 34600 0.818186 4.716647 71.896790 0.599819 73.497484 Duree_offnet_tot 19257 34.738155 38166 8.263161 79907 15.707098 9564 3.447804 21204 22.666880 ... ... 87907 9.497032 69701 6.520351 1612 16.154592 43159 58.188738 34600 6.421968 [100 rows x 20 columns] 19257 3 38166 26 79907 4 9564 24 21204 16 .. 87907 20 69701 9 1612 24 43159 21 34600 8 Length: 100, dtype: int8
mypca = PCA(n_components=3) # Here we set the number of components to keep as 3
# PCA Model
mypca.fit(X)
# Percentage of variance explained by each of the selected components.
print(mypca.singular_values_) # Variance values
print(mypca.explained_variance_ratio_) # Percentages
# Principal axes in the feature space, representing the directions of maximum variance in the data. Components are sorted by explained variance.
print(mypca.components_)
# PCA Results
data_output = mypca.fit_transform(X)
# Estimated noise related to covariance
print(mypca.noise_variance_)
[564941.22927323 1149.42806145 599.59527821] [9.99993781e-01 4.13956159e-06 1.12643767e-06] [[ 2.88108368e-04 3.12680649e-06 5.26615536e-06 4.54496005e-06 5.67742455e-06 6.40142272e-06 -8.68600113e-07 -2.65358195e-06 -1.44663598e-06 3.38948997e-06 9.45387932e-06 -1.54292830e-05 -2.23363931e-05 9.99999946e-01 -7.45822317e-07 6.61593278e-06 -1.47328229e-04 3.32983634e-07 -2.25310724e-05 2.94022681e-05] [ 9.97311085e-01 7.22821679e-03 1.92414007e-02 -7.17861907e-03 2.10169687e-03 -1.08034265e-02 -2.76084222e-03 3.43901500e-03 1.11028400e-03 1.89063725e-02 -4.02245801e-03 2.16942301e-03 8.66008066e-03 -2.91240162e-04 -4.64452049e-05 -6.84484088e-03 -3.14723181e-02 -2.98461637e-04 -3.56400744e-02 -4.46507837e-02] [ 4.08648966e-02 -2.19314886e-04 -5.59803698e-03 2.65465205e-02 5.30864593e-04 -1.05352589e-02 -1.05896720e-03 -5.80377396e-03 -8.62801219e-03 5.52831256e-03 -9.71655462e-03 6.03946452e-03 -7.12011394e-03 1.34764393e-04 4.04697045e-04 1.59891469e-02 9.75491868e-01 4.55087713e-04 2.05332097e-01 5.62102713e-02]] 180.69185527562942
pca = PCA(n_components=2)
pca.fit(dataTelecomSelectedSample)
data_mean = np.mean(dataTelecomSelectedSample, axis=0)
data_std = np.std(dataTelecomSelectedSample, axis=0)
data_standardized = (dataTelecomSelectedSample - data_mean) / data_std
pca_result=pca.fit_transform(data_standardized)
# Get the coordinates of the variables on the first two principal components
variable_coordinates = pca.components_.T
# Calculate the explained variance ratio
explained_variance_ratio = pca.explained_variance_ratio_
# Create the figure and axis
fig, ax = plt.subplots(figsize=(12, 12))
# Plot the correlation circle
circle = plt.Circle((0, 0), radius=1, edgecolor='black', facecolor='None')
ax.add_patch(circle)
ax.set_xlim(-1.1, 1.1)
ax.set_ylim(-1.1, 1.1)
sns.scatterplot(x=pca_result[:, 0], y=pca_result[:, 1], color='yellow', alpha=0.5, legend=False)
ax.axhline(0, color='black', linewidth=0.5)
ax.axvline(0, color='black', linewidth=0.5)
# Add variable labels to the plot
texts = []
colors = ['red', 'blue', 'green', 'orange', 'purple', 'brown', 'gray', 'cyan', 'magenta', 'yellow']
for i, variable in enumerate(dataTelecomSelectedSample.columns[:]):
color=colors[i % len(colors)]
arrow = ax.arrow(0, 0, variable_coordinates[i, 0], variable_coordinates[i, 1], head_width=0.05, head_length=0.1, fc=color, ec=color)
texts.append(plt.text(variable_coordinates[i, 0] + 0.05, variable_coordinates[i, 1] + 0.05, variable, fontsize=12, rotation=45,color=color))
# Adjust the position of variable labels to avoid overlap
adjust_text(texts, arrowprops=dict(arrowstyle="-", color='black'))
# Add axis labels
ax.set_xlabel('PC1 ({}%)'.format(round(explained_variance_ratio[0] * 100, 2)), fontsize=14)
ax.set_ylabel('PC2 ({}%)'.format(round(explained_variance_ratio[1] * 100, 2)), fontsize=14)
# Set plot title
plt.title('PCA Correlation Circle', fontsize=16)
# Equal aspect ratio
ax.set_aspect('equal', adjustable='box')
# Show the plot
plt.show()