#-----------------------------------------------------------------------------------------------------------------------------
#------------------------------------------------[ Libraries Import ]---------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
from sklearn.preprocessing import LabelEncoder
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from adjustText import adjust_text
import seaborn as sns
#-----------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------[ Import DataBase ]-------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
dataTelecom=pd.read_excel('S:\\TunisieTelecom\\TelecomData.xlsx')
dataTelecom.head(100)
| STATUT | OFFRE | ANC_M | HANDSET | revenu_voix | revenu_inter | NB_JOUR_ACTIVITE_TAXE | NB_JOUR_APPEL_TAXE | DUREE_APPEL_TOT | DUREE_APPEL_TAXEE | ... | FREQ_USSD_VOIX | FREQ_USSD_SMS | VOLUME_SESSION | VOLUME_SESSION_WEEKEND | REVENU_VAS | ARPU | P_revenu_data | P_revenu_voix_c | P_revenu_vas_c | id_client | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Active | Offre30 | 123 | 2G | 12.709375 | 1.464447 | 10.366042 | 10.658842 | 27.261836 | 2.125971 | ... | 28.679570 | 146.930634 | 200000.000000 | 54.025577 | 1.059822 | 4.325398 | 0.136685 | 0.943095 | 0.279541 | 1.0 |
| 1 | Active | Offre8 | 98 | 2G | 3.000000 | 0.756078 | 1.330736 | 7.079320 | 19.883099 | 7.735475 | ... | 28.490597 | 169.219363 | 3337.992419 | 24.789260 | 1.411803 | 2.677563 | 0.533695 | 0.540374 | 0.712010 | 2.0 |
| 2 | Active | Offre24 | 90 | 4G | 32.514156 | 0.681197 | 0.158160 | 4.367702 | 133.476368 | 7.681088 | ... | 190.000000 | 44.403308 | 107082.775926 | 160.031496 | 18.195224 | 1.954007 | 0.799606 | 0.071368 | 0.376754 | 3.0 |
| 3 | Active | Offre10 | 226 | 2G | 3.821551 | 5.265345 | 4.003452 | 3.086766 | 35.053364 | 8.540951 | ... | 19.412661 | 190.000000 | 128700.752169 | 102.668979 | 17.746873 | 2.496790 | 0.426526 | 0.314082 | 0.676112 | 4.0 |
| 4 | Active | Offre1 | 139 | 2G | 60.009385 | 1.957144 | 5.689241 | 0.403300 | 0.844956 | 12.355853 | ... | 81.567146 | 6.365788 | 75654.384291 | 0.341570 | 7.280861 | 2.291381 | 0.319736 | 0.600057 | 0.719450 | 5.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 95 | Active | Offre8 | 159 | 2G | 3.000000 | 2.250293 | 21.961465 | 2.872333 | 1.826910 | 27.907662 | ... | 12.954468 | 23.226754 | 32664.259485 | 21.276030 | 6.134130 | 0.689345 | 0.857627 | 0.392768 | 0.657034 | 96.0 |
| 96 | Suspended | Offre2 | 145 | 2G | 3.000000 | 3.473084 | 3.575555 | 0.298674 | 80.858548 | 5.349497 | ... | 153.474792 | 190.000000 | 82909.748831 | 26.213769 | 1.504337 | 0.164892 | 0.980014 | 0.467268 | 0.150409 | 97.0 |
| 97 | Active | Offre10 | 185 | 2G | 4.531545 | 2.142349 | 3.470616 | 6.400014 | 107.330103 | 5.199869 | ... | 157.487581 | 36.176280 | 141996.977868 | 42.321968 | 1.637815 | 1.848815 | 0.772398 | 0.407095 | 0.545033 | 98.0 |
| 98 | Active | Offre15 | 197 | 3G | 11.762926 | 3.474145 | 2.842419 | 10.629819 | 86.011908 | 0.124836 | ... | 159.332900 | 77.858228 | 12052.670191 | 46.685004 | 1.422138 | 1.944146 | 0.152620 | 0.243845 | 0.387906 | 99.0 |
| 99 | Suspended | Offre30 | 178 | 2G | 17.606096 | 0.019640 | 1.850818 | 0.184009 | 89.864928 | 5.858613 | ... | 18.339092 | 47.120168 | 134081.459601 | 10.649038 | 0.640634 | 2.204618 | 0.893336 | 0.293854 | 0.121368 | 100.0 |
100 rows × 64 columns
selected_variables = [
'DUREE_APPEL_TOT',
'nb_sms_tot' ,
'revenu_cdr_c',
'MNT_RECH',
'FREQ_ACT_OUT',
'NB_JOUR_APPEL_TAXE' ,
'NB_SMS_TAXE' ,
'NB_RECH_SUP5',
'MNT_TRANSFERT_OUT' ,
'revenu_voix' ,
'MNT_FORFAIT_DATA',
'NB_FORFAIT_VOIX' ,
'OFFRE',
'VOLUME_SESSION',
'P_revenu_data',
'MNT_FORFAIT',
'FREQ_USSD',
'P_FF_Data' ,
'Duree_onnet_tot',
'Duree_offnet_tot' ,
]
# Select the desired variables from the dataframe
dataTelecomSelected = dataTelecom[selected_variables].copy()
dataTelecomSelected.head(10)
| DUREE_APPEL_TOT | nb_sms_tot | revenu_cdr_c | MNT_RECH | FREQ_ACT_OUT | NB_JOUR_APPEL_TAXE | NB_SMS_TAXE | NB_RECH_SUP5 | MNT_TRANSFERT_OUT | revenu_voix | MNT_FORFAIT_DATA | NB_FORFAIT_VOIX | OFFRE | VOLUME_SESSION | P_revenu_data | MNT_FORFAIT | FREQ_USSD | P_FF_Data | Duree_onnet_tot | Duree_offnet_tot | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 27.261836 | 3.685763 | 19.941537 | 12.306756 | 10.667904 | 10.658842 | 1.624059 | 2.00 | 8.877510 | 12.709375 | 2.059265 | 8.589441 | Offre30 | 200000.000000 | 0.136685 | 10.882398 | 190.000000 | 0.498091 | 30.583118 | 27.219712 |
| 1 | 19.883099 | 1.476121 | 3.908449 | 10.348607 | 13.650949 | 7.079320 | 7.059615 | 0.67 | 1.633333 | 3.000000 | 20.025208 | 5.685491 | Offre8 | 3337.992419 | 0.533695 | 1.049276 | 18.623378 | 0.096339 | 13.426765 | 32.571522 |
| 2 | 133.476368 | 6.162948 | 32.541849 | 3.939266 | 7.079317 | 4.367702 | 1.638128 | 0.00 | 0.011593 | 32.514156 | 5.397060 | 4.417241 | Offre24 | 107082.775926 | 0.799606 | 4.041068 | 66.449461 | 0.667540 | 6.217262 | 24.861880 |
| 3 | 35.053364 | 1.448782 | 4.145009 | 2.408488 | 17.423588 | 3.086766 | 0.016726 | 0.33 | 2.003034 | 3.821551 | 0.534564 | 0.107662 | Offre10 | 128700.752169 | 0.426526 | 6.441283 | 19.259807 | 0.407815 | 5.837191 | 37.779013 |
| 4 | 0.844956 | 1.131308 | 60.496821 | 10.870592 | 0.067246 | 0.403300 | 3.934964 | 0.33 | 2.755649 | 60.009385 | 0.031219 | 2.276588 | Offre1 | 75654.384291 | 0.319736 | 0.805758 | 38.034894 | 0.828712 | 10.395156 | 2.831625 |
| 5 | 26.779409 | 1.477695 | 3.625786 | 40.974329 | 1.374602 | 3.346885 | 1.038573 | 1.67 | 3.748974 | 3.000000 | 2.137682 | 1.144439 | Offre16 | 65253.482895 | 0.274582 | 1.253000 | 23.161888 | 0.335963 | 42.091494 | 90.670997 |
| 6 | 2.113332 | 4.376941 | 33.872104 | 13.929977 | 0.730859 | 5.357250 | 0.488122 | 0.00 | 1.547834 | 31.953274 | 0.830859 | 2.403743 | Offre10 | 45157.810902 | 0.566405 | 2.536099 | 190.000000 | 0.211775 | 19.369493 | 32.897501 |
| 7 | 5.922602 | 0.773370 | 15.836382 | 10.793736 | 1.060993 | 0.100707 | 4.178468 | 1.00 | 3.345468 | 10.331387 | 1.547844 | 15.417804 | Offre6 | 37165.463625 | 0.620034 | 0.385122 | 21.880660 | 0.205227 | 0.059797 | 4.935726 |
| 8 | 68.691901 | 1.314796 | 21.616162 | 0.516740 | 4.607737 | 2.281113 | 3.947526 | 6.67 | 11.943487 | 14.262607 | 19.898847 | 0.068199 | Offre23 | 200000.000000 | 0.087045 | 6.721631 | 82.203454 | 0.203176 | 21.697246 | 0.627422 |
| 9 | 37.872839 | 0.901089 | 9.153639 | 8.129576 | 15.158324 | 11.425205 | 0.109140 | 1.00 | 1.932114 | 5.870684 | 17.095836 | 1.245533 | Offre8 | 65233.389597 | 0.708388 | 1.570219 | 49.499533 | 0.247614 | 26.167306 | 59.049667 |
dataTelecomSelectedSample=dataTelecomSelected.sample(100)
dataTelecomSelectedSample.head(150)
| DUREE_APPEL_TOT | nb_sms_tot | revenu_cdr_c | MNT_RECH | FREQ_ACT_OUT | NB_JOUR_APPEL_TAXE | NB_SMS_TAXE | NB_RECH_SUP5 | MNT_TRANSFERT_OUT | revenu_voix | MNT_FORFAIT_DATA | NB_FORFAIT_VOIX | OFFRE | VOLUME_SESSION | P_revenu_data | MNT_FORFAIT | FREQ_USSD | P_FF_Data | Duree_onnet_tot | Duree_offnet_tot | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 19257 | 30.090072 | 8.036033 | 6.628774 | 21.796702 | 4.390850 | 3.572585 | 2.230938 | 0.33 | 6.576157 | 5.973346 | 8.614453 | 2.382774 | Offre12 | 96933.695160 | 0.596794 | 10.756903 | 143.642413 | 0.014595 | 44.241075 | 34.738155 |
| 38166 | 577.035853 | 8.792087 | 8.468283 | 6.180827 | 1.926081 | 7.041788 | 0.025432 | 5.00 | 0.692279 | 3.000000 | 0.417457 | 12.181873 | Offre7 | 77383.665104 | 0.158712 | 3.796006 | 57.790428 | 0.516101 | 39.859481 | 8.263161 |
| 79907 | 126.227433 | 2.056120 | 9.733714 | 4.687255 | 4.980074 | 8.250577 | 0.144793 | 0.00 | 5.995859 | 3.000000 | 3.374781 | 8.150515 | Offre13 | 186211.083505 | 0.935933 | 1.266953 | 46.308896 | 0.645644 | 12.738402 | 15.707098 |
| 9564 | 0.042172 | 3.331580 | 10.712672 | 12.834082 | 13.015438 | 10.310605 | 6.283435 | 2.67 | 6.088506 | 10.611291 | 13.975665 | 7.204972 | Offre5 | 120468.526004 | 0.154198 | 1.421667 | 37.609093 | 0.708598 | 41.733081 | 3.447804 |
| 21204 | 39.975826 | 4.348320 | 9.195774 | 3.873290 | 7.742715 | 4.286317 | 3.506429 | 0.00 | 0.665510 | 7.944436 | 10.043437 | 1.146157 | Offre24 | 53403.671898 | 0.089654 | 4.067660 | 190.000000 | 0.841939 | 74.725692 | 22.666880 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 87907 | 10.833286 | 22.632752 | 7.434685 | 16.818580 | 1.330702 | 30.000000 | 2.258939 | 0.00 | 8.323257 | 6.718734 | 4.107238 | 1.454253 | Offre29 | 14172.653991 | 0.825286 | 1.269155 | 47.655680 | 0.445803 | 5.349727 | 9.497032 |
| 69701 | 439.174973 | 6.930242 | 10.579883 | 3.869375 | 3.062471 | 4.274253 | 0.701469 | 3.67 | 3.708045 | 8.752228 | 8.264411 | 5.081549 | Offre18 | 96018.498183 | 0.689052 | 3.429793 | 94.129045 | 0.630098 | 71.752719 | 6.520351 |
| 1612 | 34.254143 | 6.868878 | 14.567883 | 1.299701 | 0.257181 | 3.908668 | 2.735732 | 2.33 | 1.125163 | 3.000000 | 4.784903 | 18.786315 | Offre5 | 74526.004536 | 0.463658 | 3.038143 | 14.416576 | 0.491777 | 7.667571 | 16.154592 |
| 43159 | 81.030834 | 1.408071 | 3.985414 | 1.817113 | 5.775719 | 7.573036 | 4.426431 | 0.00 | 4.559158 | 3.000000 | 11.190860 | 0.234017 | Offre3 | 70646.213006 | 0.939057 | 10.895926 | 174.699935 | 0.606876 | 50.623577 | 58.188738 |
| 34600 | 2.058911 | 8.345012 | 3.530926 | 2.438283 | 7.900239 | 2.923418 | 2.510042 | 0.00 | 1.223616 | 3.000000 | 10.885480 | 0.874752 | Offre17 | 85687.964862 | 0.818186 | 4.716647 | 71.896790 | 0.599819 | 73.497484 | 6.421968 |
100 rows × 20 columns
#-----------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------[ Label Encoder ]---------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
label_encoder = LabelEncoder()
encoded_offre = label_encoder.fit_transform(dataTelecomSelectedSample['OFFRE'])
dataTelecomSelectedSample['OFFRE']=encoded_offre
dataTelecomSelectedSample.head(100)
| DUREE_APPEL_TOT | nb_sms_tot | revenu_cdr_c | MNT_RECH | FREQ_ACT_OUT | NB_JOUR_APPEL_TAXE | NB_SMS_TAXE | NB_RECH_SUP5 | MNT_TRANSFERT_OUT | revenu_voix | MNT_FORFAIT_DATA | NB_FORFAIT_VOIX | OFFRE | VOLUME_SESSION | P_revenu_data | MNT_FORFAIT | FREQ_USSD | P_FF_Data | Duree_onnet_tot | Duree_offnet_tot | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 19257 | 30.090072 | 8.036033 | 6.628774 | 21.796702 | 4.390850 | 3.572585 | 2.230938 | 0.33 | 6.576157 | 5.973346 | 8.614453 | 2.382774 | 3 | 96933.695160 | 0.596794 | 10.756903 | 143.642413 | 0.014595 | 44.241075 | 34.738155 |
| 38166 | 577.035853 | 8.792087 | 8.468283 | 6.180827 | 1.926081 | 7.041788 | 0.025432 | 5.00 | 0.692279 | 3.000000 | 0.417457 | 12.181873 | 26 | 77383.665104 | 0.158712 | 3.796006 | 57.790428 | 0.516101 | 39.859481 | 8.263161 |
| 79907 | 126.227433 | 2.056120 | 9.733714 | 4.687255 | 4.980074 | 8.250577 | 0.144793 | 0.00 | 5.995859 | 3.000000 | 3.374781 | 8.150515 | 4 | 186211.083505 | 0.935933 | 1.266953 | 46.308896 | 0.645644 | 12.738402 | 15.707098 |
| 9564 | 0.042172 | 3.331580 | 10.712672 | 12.834082 | 13.015438 | 10.310605 | 6.283435 | 2.67 | 6.088506 | 10.611291 | 13.975665 | 7.204972 | 24 | 120468.526004 | 0.154198 | 1.421667 | 37.609093 | 0.708598 | 41.733081 | 3.447804 |
| 21204 | 39.975826 | 4.348320 | 9.195774 | 3.873290 | 7.742715 | 4.286317 | 3.506429 | 0.00 | 0.665510 | 7.944436 | 10.043437 | 1.146157 | 16 | 53403.671898 | 0.089654 | 4.067660 | 190.000000 | 0.841939 | 74.725692 | 22.666880 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 87907 | 10.833286 | 22.632752 | 7.434685 | 16.818580 | 1.330702 | 30.000000 | 2.258939 | 0.00 | 8.323257 | 6.718734 | 4.107238 | 1.454253 | 20 | 14172.653991 | 0.825286 | 1.269155 | 47.655680 | 0.445803 | 5.349727 | 9.497032 |
| 69701 | 439.174973 | 6.930242 | 10.579883 | 3.869375 | 3.062471 | 4.274253 | 0.701469 | 3.67 | 3.708045 | 8.752228 | 8.264411 | 5.081549 | 9 | 96018.498183 | 0.689052 | 3.429793 | 94.129045 | 0.630098 | 71.752719 | 6.520351 |
| 1612 | 34.254143 | 6.868878 | 14.567883 | 1.299701 | 0.257181 | 3.908668 | 2.735732 | 2.33 | 1.125163 | 3.000000 | 4.784903 | 18.786315 | 24 | 74526.004536 | 0.463658 | 3.038143 | 14.416576 | 0.491777 | 7.667571 | 16.154592 |
| 43159 | 81.030834 | 1.408071 | 3.985414 | 1.817113 | 5.775719 | 7.573036 | 4.426431 | 0.00 | 4.559158 | 3.000000 | 11.190860 | 0.234017 | 21 | 70646.213006 | 0.939057 | 10.895926 | 174.699935 | 0.606876 | 50.623577 | 58.188738 |
| 34600 | 2.058911 | 8.345012 | 3.530926 | 2.438283 | 7.900239 | 2.923418 | 2.510042 | 0.00 | 1.223616 | 3.000000 | 10.885480 | 0.874752 | 8 | 85687.964862 | 0.818186 | 4.716647 | 71.896790 | 0.599819 | 73.497484 | 6.421968 |
100 rows × 20 columns
#-----------------------------------------------------------------------------------------------------------------------------
#----------------------------------------------[ PCA In Different Colors ]----------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
X = dataTelecomSelectedSample.iloc[:,0:20]
X
| DUREE_APPEL_TOT | nb_sms_tot | revenu_cdr_c | MNT_RECH | FREQ_ACT_OUT | NB_JOUR_APPEL_TAXE | NB_SMS_TAXE | NB_RECH_SUP5 | MNT_TRANSFERT_OUT | revenu_voix | MNT_FORFAIT_DATA | NB_FORFAIT_VOIX | OFFRE | VOLUME_SESSION | P_revenu_data | MNT_FORFAIT | FREQ_USSD | P_FF_Data | Duree_onnet_tot | Duree_offnet_tot | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 19257 | 30.090072 | 8.036033 | 6.628774 | 21.796702 | 4.390850 | 3.572585 | 2.230938 | 0.33 | 6.576157 | 5.973346 | 8.614453 | 2.382774 | 3 | 96933.695160 | 0.596794 | 10.756903 | 143.642413 | 0.014595 | 44.241075 | 34.738155 |
| 38166 | 577.035853 | 8.792087 | 8.468283 | 6.180827 | 1.926081 | 7.041788 | 0.025432 | 5.00 | 0.692279 | 3.000000 | 0.417457 | 12.181873 | 26 | 77383.665104 | 0.158712 | 3.796006 | 57.790428 | 0.516101 | 39.859481 | 8.263161 |
| 79907 | 126.227433 | 2.056120 | 9.733714 | 4.687255 | 4.980074 | 8.250577 | 0.144793 | 0.00 | 5.995859 | 3.000000 | 3.374781 | 8.150515 | 4 | 186211.083505 | 0.935933 | 1.266953 | 46.308896 | 0.645644 | 12.738402 | 15.707098 |
| 9564 | 0.042172 | 3.331580 | 10.712672 | 12.834082 | 13.015438 | 10.310605 | 6.283435 | 2.67 | 6.088506 | 10.611291 | 13.975665 | 7.204972 | 24 | 120468.526004 | 0.154198 | 1.421667 | 37.609093 | 0.708598 | 41.733081 | 3.447804 |
| 21204 | 39.975826 | 4.348320 | 9.195774 | 3.873290 | 7.742715 | 4.286317 | 3.506429 | 0.00 | 0.665510 | 7.944436 | 10.043437 | 1.146157 | 16 | 53403.671898 | 0.089654 | 4.067660 | 190.000000 | 0.841939 | 74.725692 | 22.666880 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 87907 | 10.833286 | 22.632752 | 7.434685 | 16.818580 | 1.330702 | 30.000000 | 2.258939 | 0.00 | 8.323257 | 6.718734 | 4.107238 | 1.454253 | 20 | 14172.653991 | 0.825286 | 1.269155 | 47.655680 | 0.445803 | 5.349727 | 9.497032 |
| 69701 | 439.174973 | 6.930242 | 10.579883 | 3.869375 | 3.062471 | 4.274253 | 0.701469 | 3.67 | 3.708045 | 8.752228 | 8.264411 | 5.081549 | 9 | 96018.498183 | 0.689052 | 3.429793 | 94.129045 | 0.630098 | 71.752719 | 6.520351 |
| 1612 | 34.254143 | 6.868878 | 14.567883 | 1.299701 | 0.257181 | 3.908668 | 2.735732 | 2.33 | 1.125163 | 3.000000 | 4.784903 | 18.786315 | 24 | 74526.004536 | 0.463658 | 3.038143 | 14.416576 | 0.491777 | 7.667571 | 16.154592 |
| 43159 | 81.030834 | 1.408071 | 3.985414 | 1.817113 | 5.775719 | 7.573036 | 4.426431 | 0.00 | 4.559158 | 3.000000 | 11.190860 | 0.234017 | 21 | 70646.213006 | 0.939057 | 10.895926 | 174.699935 | 0.606876 | 50.623577 | 58.188738 |
| 34600 | 2.058911 | 8.345012 | 3.530926 | 2.438283 | 7.900239 | 2.923418 | 2.510042 | 0.00 | 1.223616 | 3.000000 | 10.885480 | 0.874752 | 8 | 85687.964862 | 0.818186 | 4.716647 | 71.896790 | 0.599819 | 73.497484 | 6.421968 |
100 rows × 20 columns
# Les 20 colonnes de mesures qui décrivent différents dataTelecomSelectedSample
X = dataTelecomSelectedSample.iloc[:,0:20]
colors = dataTelecomSelectedSample.OFFRE.astype('category')
# Les couleurs déduites de chaque espèce de dataTelecomSelectedSample
y = colors.cat.codes
print(X) ; print(y)
scatter_matrix(X,c=y) ; plt.show()
DUREE_APPEL_TOT nb_sms_tot revenu_cdr_c MNT_RECH FREQ_ACT_OUT \
19257 30.090072 8.036033 6.628774 21.796702 4.390850
38166 577.035853 8.792087 8.468283 6.180827 1.926081
79907 126.227433 2.056120 9.733714 4.687255 4.980074
9564 0.042172 3.331580 10.712672 12.834082 13.015438
21204 39.975826 4.348320 9.195774 3.873290 7.742715
... ... ... ... ... ...
87907 10.833286 22.632752 7.434685 16.818580 1.330702
69701 439.174973 6.930242 10.579883 3.869375 3.062471
1612 34.254143 6.868878 14.567883 1.299701 0.257181
43159 81.030834 1.408071 3.985414 1.817113 5.775719
34600 2.058911 8.345012 3.530926 2.438283 7.900239
NB_JOUR_APPEL_TAXE NB_SMS_TAXE NB_RECH_SUP5 MNT_TRANSFERT_OUT \
19257 3.572585 2.230938 0.33 6.576157
38166 7.041788 0.025432 5.00 0.692279
79907 8.250577 0.144793 0.00 5.995859
9564 10.310605 6.283435 2.67 6.088506
21204 4.286317 3.506429 0.00 0.665510
... ... ... ... ...
87907 30.000000 2.258939 0.00 8.323257
69701 4.274253 0.701469 3.67 3.708045
1612 3.908668 2.735732 2.33 1.125163
43159 7.573036 4.426431 0.00 4.559158
34600 2.923418 2.510042 0.00 1.223616
revenu_voix MNT_FORFAIT_DATA NB_FORFAIT_VOIX OFFRE VOLUME_SESSION \
19257 5.973346 8.614453 2.382774 3 96933.695160
38166 3.000000 0.417457 12.181873 26 77383.665104
79907 3.000000 3.374781 8.150515 4 186211.083505
9564 10.611291 13.975665 7.204972 24 120468.526004
21204 7.944436 10.043437 1.146157 16 53403.671898
... ... ... ... ... ...
87907 6.718734 4.107238 1.454253 20 14172.653991
69701 8.752228 8.264411 5.081549 9 96018.498183
1612 3.000000 4.784903 18.786315 24 74526.004536
43159 3.000000 11.190860 0.234017 21 70646.213006
34600 3.000000 10.885480 0.874752 8 85687.964862
P_revenu_data MNT_FORFAIT FREQ_USSD P_FF_Data Duree_onnet_tot \
19257 0.596794 10.756903 143.642413 0.014595 44.241075
38166 0.158712 3.796006 57.790428 0.516101 39.859481
79907 0.935933 1.266953 46.308896 0.645644 12.738402
9564 0.154198 1.421667 37.609093 0.708598 41.733081
21204 0.089654 4.067660 190.000000 0.841939 74.725692
... ... ... ... ... ...
87907 0.825286 1.269155 47.655680 0.445803 5.349727
69701 0.689052 3.429793 94.129045 0.630098 71.752719
1612 0.463658 3.038143 14.416576 0.491777 7.667571
43159 0.939057 10.895926 174.699935 0.606876 50.623577
34600 0.818186 4.716647 71.896790 0.599819 73.497484
Duree_offnet_tot
19257 34.738155
38166 8.263161
79907 15.707098
9564 3.447804
21204 22.666880
... ...
87907 9.497032
69701 6.520351
1612 16.154592
43159 58.188738
34600 6.421968
[100 rows x 20 columns]
19257 3
38166 26
79907 4
9564 24
21204 16
..
87907 20
69701 9
1612 24
43159 21
34600 8
Length: 100, dtype: int8
mypca = PCA(n_components=3) # Here we set the number of components to keep as 3
# PCA Model
mypca.fit(X)
# Percentage of variance explained by each of the selected components.
print(mypca.singular_values_) # Variance values
print(mypca.explained_variance_ratio_) # Percentages
# Principal axes in the feature space, representing the directions of maximum variance in the data. Components are sorted by explained variance.
print(mypca.components_)
# PCA Results
data_output = mypca.fit_transform(X)
# Estimated noise related to covariance
print(mypca.noise_variance_)
[564941.22927323 1149.42806145 599.59527821] [9.99993781e-01 4.13956159e-06 1.12643767e-06] [[ 2.88108368e-04 3.12680649e-06 5.26615536e-06 4.54496005e-06 5.67742455e-06 6.40142272e-06 -8.68600113e-07 -2.65358195e-06 -1.44663598e-06 3.38948997e-06 9.45387932e-06 -1.54292830e-05 -2.23363931e-05 9.99999946e-01 -7.45822317e-07 6.61593278e-06 -1.47328229e-04 3.32983634e-07 -2.25310724e-05 2.94022681e-05] [ 9.97311085e-01 7.22821679e-03 1.92414007e-02 -7.17861907e-03 2.10169687e-03 -1.08034265e-02 -2.76084222e-03 3.43901500e-03 1.11028400e-03 1.89063725e-02 -4.02245801e-03 2.16942301e-03 8.66008066e-03 -2.91240162e-04 -4.64452049e-05 -6.84484088e-03 -3.14723181e-02 -2.98461637e-04 -3.56400744e-02 -4.46507837e-02] [ 4.08648966e-02 -2.19314886e-04 -5.59803698e-03 2.65465205e-02 5.30864593e-04 -1.05352589e-02 -1.05896720e-03 -5.80377396e-03 -8.62801219e-03 5.52831256e-03 -9.71655462e-03 6.03946452e-03 -7.12011394e-03 1.34764393e-04 4.04697045e-04 1.59891469e-02 9.75491868e-01 4.55087713e-04 2.05332097e-01 5.62102713e-02]] 180.69185527562942
pca = PCA(n_components=2)
pca.fit(dataTelecomSelectedSample)
data_mean = np.mean(dataTelecomSelectedSample, axis=0)
data_std = np.std(dataTelecomSelectedSample, axis=0)
data_standardized = (dataTelecomSelectedSample - data_mean) / data_std
pca_result=pca.fit_transform(data_standardized)
# Get the coordinates of the variables on the first two principal components
variable_coordinates = pca.components_.T
# Calculate the explained variance ratio
explained_variance_ratio = pca.explained_variance_ratio_
# Create the figure and axis
fig, ax = plt.subplots(figsize=(12, 12))
# Plot the correlation circle
circle = plt.Circle((0, 0), radius=1, edgecolor='black', facecolor='None')
ax.add_patch(circle)
ax.set_xlim(-1.1, 1.1)
ax.set_ylim(-1.1, 1.1)
sns.scatterplot(x=pca_result[:, 0], y=pca_result[:, 1], color='yellow', alpha=0.5, legend=False)
ax.axhline(0, color='black', linewidth=0.5)
ax.axvline(0, color='black', linewidth=0.5)
# Add variable labels to the plot
texts = []
colors = ['red', 'blue', 'green', 'orange', 'purple', 'brown', 'gray', 'cyan', 'magenta', 'yellow']
for i, variable in enumerate(dataTelecomSelectedSample.columns[:]):
color=colors[i % len(colors)]
arrow = ax.arrow(0, 0, variable_coordinates[i, 0], variable_coordinates[i, 1], head_width=0.05, head_length=0.1, fc=color, ec=color)
texts.append(plt.text(variable_coordinates[i, 0] + 0.05, variable_coordinates[i, 1] + 0.05, variable, fontsize=12, rotation=45,color=color))
# Adjust the position of variable labels to avoid overlap
adjust_text(texts, arrowprops=dict(arrowstyle="-", color='black'))
# Add axis labels
ax.set_xlabel('PC1 ({}%)'.format(round(explained_variance_ratio[0] * 100, 2)), fontsize=14)
ax.set_ylabel('PC2 ({}%)'.format(round(explained_variance_ratio[1] * 100, 2)), fontsize=14)
# Set plot title
plt.title('PCA Correlation Circle', fontsize=16)
# Equal aspect ratio
ax.set_aspect('equal', adjustable='box')
# Show the plot
plt.show()