In [1]:
#-----------------------------------------------------------------------------------------------------------------------------
#------------------------------------------------[ Libraries Import ]---------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
In [2]:
from sklearn.preprocessing import LabelEncoder
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from adjustText import adjust_text
import seaborn as sns
In [3]:
#-----------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------[ Import DataBase ]-------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
In [4]:
dataTelecom=pd.read_excel('S:\\TunisieTelecom\\TelecomData.xlsx')
dataTelecom.head(100)
Out[4]:
STATUT OFFRE ANC_M HANDSET revenu_voix revenu_inter NB_JOUR_ACTIVITE_TAXE NB_JOUR_APPEL_TAXE DUREE_APPEL_TOT DUREE_APPEL_TAXEE ... FREQ_USSD_VOIX FREQ_USSD_SMS VOLUME_SESSION VOLUME_SESSION_WEEKEND REVENU_VAS ARPU P_revenu_data P_revenu_voix_c P_revenu_vas_c id_client
0 Active Offre30 123 2G 12.709375 1.464447 10.366042 10.658842 27.261836 2.125971 ... 28.679570 146.930634 200000.000000 54.025577 1.059822 4.325398 0.136685 0.943095 0.279541 1.0
1 Active Offre8 98 2G 3.000000 0.756078 1.330736 7.079320 19.883099 7.735475 ... 28.490597 169.219363 3337.992419 24.789260 1.411803 2.677563 0.533695 0.540374 0.712010 2.0
2 Active Offre24 90 4G 32.514156 0.681197 0.158160 4.367702 133.476368 7.681088 ... 190.000000 44.403308 107082.775926 160.031496 18.195224 1.954007 0.799606 0.071368 0.376754 3.0
3 Active Offre10 226 2G 3.821551 5.265345 4.003452 3.086766 35.053364 8.540951 ... 19.412661 190.000000 128700.752169 102.668979 17.746873 2.496790 0.426526 0.314082 0.676112 4.0
4 Active Offre1 139 2G 60.009385 1.957144 5.689241 0.403300 0.844956 12.355853 ... 81.567146 6.365788 75654.384291 0.341570 7.280861 2.291381 0.319736 0.600057 0.719450 5.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
95 Active Offre8 159 2G 3.000000 2.250293 21.961465 2.872333 1.826910 27.907662 ... 12.954468 23.226754 32664.259485 21.276030 6.134130 0.689345 0.857627 0.392768 0.657034 96.0
96 Suspended Offre2 145 2G 3.000000 3.473084 3.575555 0.298674 80.858548 5.349497 ... 153.474792 190.000000 82909.748831 26.213769 1.504337 0.164892 0.980014 0.467268 0.150409 97.0
97 Active Offre10 185 2G 4.531545 2.142349 3.470616 6.400014 107.330103 5.199869 ... 157.487581 36.176280 141996.977868 42.321968 1.637815 1.848815 0.772398 0.407095 0.545033 98.0
98 Active Offre15 197 3G 11.762926 3.474145 2.842419 10.629819 86.011908 0.124836 ... 159.332900 77.858228 12052.670191 46.685004 1.422138 1.944146 0.152620 0.243845 0.387906 99.0
99 Suspended Offre30 178 2G 17.606096 0.019640 1.850818 0.184009 89.864928 5.858613 ... 18.339092 47.120168 134081.459601 10.649038 0.640634 2.204618 0.893336 0.293854 0.121368 100.0

100 rows × 64 columns

In [5]:
selected_variables = [
    'DUREE_APPEL_TOT',
    'nb_sms_tot' ,
    'revenu_cdr_c',
    'MNT_RECH',
    'FREQ_ACT_OUT',
    'NB_JOUR_APPEL_TAXE' ,
    'NB_SMS_TAXE' ,
    'NB_RECH_SUP5',
    'MNT_TRANSFERT_OUT' ,
    'revenu_voix' ,
    'MNT_FORFAIT_DATA',
    'NB_FORFAIT_VOIX' ,
    'OFFRE',
    'VOLUME_SESSION',
    'P_revenu_data',
    'MNT_FORFAIT',
    'FREQ_USSD',
    'P_FF_Data' ,
    'Duree_onnet_tot',
    'Duree_offnet_tot' ,
]
# Select the desired variables from the dataframe
dataTelecomSelected = dataTelecom[selected_variables].copy()
dataTelecomSelected.head(10)
Out[5]:
DUREE_APPEL_TOT nb_sms_tot revenu_cdr_c MNT_RECH FREQ_ACT_OUT NB_JOUR_APPEL_TAXE NB_SMS_TAXE NB_RECH_SUP5 MNT_TRANSFERT_OUT revenu_voix MNT_FORFAIT_DATA NB_FORFAIT_VOIX OFFRE VOLUME_SESSION P_revenu_data MNT_FORFAIT FREQ_USSD P_FF_Data Duree_onnet_tot Duree_offnet_tot
0 27.261836 3.685763 19.941537 12.306756 10.667904 10.658842 1.624059 2.00 8.877510 12.709375 2.059265 8.589441 Offre30 200000.000000 0.136685 10.882398 190.000000 0.498091 30.583118 27.219712
1 19.883099 1.476121 3.908449 10.348607 13.650949 7.079320 7.059615 0.67 1.633333 3.000000 20.025208 5.685491 Offre8 3337.992419 0.533695 1.049276 18.623378 0.096339 13.426765 32.571522
2 133.476368 6.162948 32.541849 3.939266 7.079317 4.367702 1.638128 0.00 0.011593 32.514156 5.397060 4.417241 Offre24 107082.775926 0.799606 4.041068 66.449461 0.667540 6.217262 24.861880
3 35.053364 1.448782 4.145009 2.408488 17.423588 3.086766 0.016726 0.33 2.003034 3.821551 0.534564 0.107662 Offre10 128700.752169 0.426526 6.441283 19.259807 0.407815 5.837191 37.779013
4 0.844956 1.131308 60.496821 10.870592 0.067246 0.403300 3.934964 0.33 2.755649 60.009385 0.031219 2.276588 Offre1 75654.384291 0.319736 0.805758 38.034894 0.828712 10.395156 2.831625
5 26.779409 1.477695 3.625786 40.974329 1.374602 3.346885 1.038573 1.67 3.748974 3.000000 2.137682 1.144439 Offre16 65253.482895 0.274582 1.253000 23.161888 0.335963 42.091494 90.670997
6 2.113332 4.376941 33.872104 13.929977 0.730859 5.357250 0.488122 0.00 1.547834 31.953274 0.830859 2.403743 Offre10 45157.810902 0.566405 2.536099 190.000000 0.211775 19.369493 32.897501
7 5.922602 0.773370 15.836382 10.793736 1.060993 0.100707 4.178468 1.00 3.345468 10.331387 1.547844 15.417804 Offre6 37165.463625 0.620034 0.385122 21.880660 0.205227 0.059797 4.935726
8 68.691901 1.314796 21.616162 0.516740 4.607737 2.281113 3.947526 6.67 11.943487 14.262607 19.898847 0.068199 Offre23 200000.000000 0.087045 6.721631 82.203454 0.203176 21.697246 0.627422
9 37.872839 0.901089 9.153639 8.129576 15.158324 11.425205 0.109140 1.00 1.932114 5.870684 17.095836 1.245533 Offre8 65233.389597 0.708388 1.570219 49.499533 0.247614 26.167306 59.049667
In [6]:
dataTelecomSelectedSample=dataTelecomSelected.sample(100)
dataTelecomSelectedSample.head(150)
Out[6]:
DUREE_APPEL_TOT nb_sms_tot revenu_cdr_c MNT_RECH FREQ_ACT_OUT NB_JOUR_APPEL_TAXE NB_SMS_TAXE NB_RECH_SUP5 MNT_TRANSFERT_OUT revenu_voix MNT_FORFAIT_DATA NB_FORFAIT_VOIX OFFRE VOLUME_SESSION P_revenu_data MNT_FORFAIT FREQ_USSD P_FF_Data Duree_onnet_tot Duree_offnet_tot
19257 30.090072 8.036033 6.628774 21.796702 4.390850 3.572585 2.230938 0.33 6.576157 5.973346 8.614453 2.382774 Offre12 96933.695160 0.596794 10.756903 143.642413 0.014595 44.241075 34.738155
38166 577.035853 8.792087 8.468283 6.180827 1.926081 7.041788 0.025432 5.00 0.692279 3.000000 0.417457 12.181873 Offre7 77383.665104 0.158712 3.796006 57.790428 0.516101 39.859481 8.263161
79907 126.227433 2.056120 9.733714 4.687255 4.980074 8.250577 0.144793 0.00 5.995859 3.000000 3.374781 8.150515 Offre13 186211.083505 0.935933 1.266953 46.308896 0.645644 12.738402 15.707098
9564 0.042172 3.331580 10.712672 12.834082 13.015438 10.310605 6.283435 2.67 6.088506 10.611291 13.975665 7.204972 Offre5 120468.526004 0.154198 1.421667 37.609093 0.708598 41.733081 3.447804
21204 39.975826 4.348320 9.195774 3.873290 7.742715 4.286317 3.506429 0.00 0.665510 7.944436 10.043437 1.146157 Offre24 53403.671898 0.089654 4.067660 190.000000 0.841939 74.725692 22.666880
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
87907 10.833286 22.632752 7.434685 16.818580 1.330702 30.000000 2.258939 0.00 8.323257 6.718734 4.107238 1.454253 Offre29 14172.653991 0.825286 1.269155 47.655680 0.445803 5.349727 9.497032
69701 439.174973 6.930242 10.579883 3.869375 3.062471 4.274253 0.701469 3.67 3.708045 8.752228 8.264411 5.081549 Offre18 96018.498183 0.689052 3.429793 94.129045 0.630098 71.752719 6.520351
1612 34.254143 6.868878 14.567883 1.299701 0.257181 3.908668 2.735732 2.33 1.125163 3.000000 4.784903 18.786315 Offre5 74526.004536 0.463658 3.038143 14.416576 0.491777 7.667571 16.154592
43159 81.030834 1.408071 3.985414 1.817113 5.775719 7.573036 4.426431 0.00 4.559158 3.000000 11.190860 0.234017 Offre3 70646.213006 0.939057 10.895926 174.699935 0.606876 50.623577 58.188738
34600 2.058911 8.345012 3.530926 2.438283 7.900239 2.923418 2.510042 0.00 1.223616 3.000000 10.885480 0.874752 Offre17 85687.964862 0.818186 4.716647 71.896790 0.599819 73.497484 6.421968

100 rows × 20 columns

In [7]:
#-----------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------[ Label Encoder ]---------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
In [8]:
label_encoder = LabelEncoder()
encoded_offre = label_encoder.fit_transform(dataTelecomSelectedSample['OFFRE'])
dataTelecomSelectedSample['OFFRE']=encoded_offre
dataTelecomSelectedSample.head(100)
Out[8]:
DUREE_APPEL_TOT nb_sms_tot revenu_cdr_c MNT_RECH FREQ_ACT_OUT NB_JOUR_APPEL_TAXE NB_SMS_TAXE NB_RECH_SUP5 MNT_TRANSFERT_OUT revenu_voix MNT_FORFAIT_DATA NB_FORFAIT_VOIX OFFRE VOLUME_SESSION P_revenu_data MNT_FORFAIT FREQ_USSD P_FF_Data Duree_onnet_tot Duree_offnet_tot
19257 30.090072 8.036033 6.628774 21.796702 4.390850 3.572585 2.230938 0.33 6.576157 5.973346 8.614453 2.382774 3 96933.695160 0.596794 10.756903 143.642413 0.014595 44.241075 34.738155
38166 577.035853 8.792087 8.468283 6.180827 1.926081 7.041788 0.025432 5.00 0.692279 3.000000 0.417457 12.181873 26 77383.665104 0.158712 3.796006 57.790428 0.516101 39.859481 8.263161
79907 126.227433 2.056120 9.733714 4.687255 4.980074 8.250577 0.144793 0.00 5.995859 3.000000 3.374781 8.150515 4 186211.083505 0.935933 1.266953 46.308896 0.645644 12.738402 15.707098
9564 0.042172 3.331580 10.712672 12.834082 13.015438 10.310605 6.283435 2.67 6.088506 10.611291 13.975665 7.204972 24 120468.526004 0.154198 1.421667 37.609093 0.708598 41.733081 3.447804
21204 39.975826 4.348320 9.195774 3.873290 7.742715 4.286317 3.506429 0.00 0.665510 7.944436 10.043437 1.146157 16 53403.671898 0.089654 4.067660 190.000000 0.841939 74.725692 22.666880
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
87907 10.833286 22.632752 7.434685 16.818580 1.330702 30.000000 2.258939 0.00 8.323257 6.718734 4.107238 1.454253 20 14172.653991 0.825286 1.269155 47.655680 0.445803 5.349727 9.497032
69701 439.174973 6.930242 10.579883 3.869375 3.062471 4.274253 0.701469 3.67 3.708045 8.752228 8.264411 5.081549 9 96018.498183 0.689052 3.429793 94.129045 0.630098 71.752719 6.520351
1612 34.254143 6.868878 14.567883 1.299701 0.257181 3.908668 2.735732 2.33 1.125163 3.000000 4.784903 18.786315 24 74526.004536 0.463658 3.038143 14.416576 0.491777 7.667571 16.154592
43159 81.030834 1.408071 3.985414 1.817113 5.775719 7.573036 4.426431 0.00 4.559158 3.000000 11.190860 0.234017 21 70646.213006 0.939057 10.895926 174.699935 0.606876 50.623577 58.188738
34600 2.058911 8.345012 3.530926 2.438283 7.900239 2.923418 2.510042 0.00 1.223616 3.000000 10.885480 0.874752 8 85687.964862 0.818186 4.716647 71.896790 0.599819 73.497484 6.421968

100 rows × 20 columns

In [9]:
#-----------------------------------------------------------------------------------------------------------------------------
#----------------------------------------------[ PCA In Different Colors ]----------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
In [10]:
X = dataTelecomSelectedSample.iloc[:,0:20]
X
Out[10]:
DUREE_APPEL_TOT nb_sms_tot revenu_cdr_c MNT_RECH FREQ_ACT_OUT NB_JOUR_APPEL_TAXE NB_SMS_TAXE NB_RECH_SUP5 MNT_TRANSFERT_OUT revenu_voix MNT_FORFAIT_DATA NB_FORFAIT_VOIX OFFRE VOLUME_SESSION P_revenu_data MNT_FORFAIT FREQ_USSD P_FF_Data Duree_onnet_tot Duree_offnet_tot
19257 30.090072 8.036033 6.628774 21.796702 4.390850 3.572585 2.230938 0.33 6.576157 5.973346 8.614453 2.382774 3 96933.695160 0.596794 10.756903 143.642413 0.014595 44.241075 34.738155
38166 577.035853 8.792087 8.468283 6.180827 1.926081 7.041788 0.025432 5.00 0.692279 3.000000 0.417457 12.181873 26 77383.665104 0.158712 3.796006 57.790428 0.516101 39.859481 8.263161
79907 126.227433 2.056120 9.733714 4.687255 4.980074 8.250577 0.144793 0.00 5.995859 3.000000 3.374781 8.150515 4 186211.083505 0.935933 1.266953 46.308896 0.645644 12.738402 15.707098
9564 0.042172 3.331580 10.712672 12.834082 13.015438 10.310605 6.283435 2.67 6.088506 10.611291 13.975665 7.204972 24 120468.526004 0.154198 1.421667 37.609093 0.708598 41.733081 3.447804
21204 39.975826 4.348320 9.195774 3.873290 7.742715 4.286317 3.506429 0.00 0.665510 7.944436 10.043437 1.146157 16 53403.671898 0.089654 4.067660 190.000000 0.841939 74.725692 22.666880
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
87907 10.833286 22.632752 7.434685 16.818580 1.330702 30.000000 2.258939 0.00 8.323257 6.718734 4.107238 1.454253 20 14172.653991 0.825286 1.269155 47.655680 0.445803 5.349727 9.497032
69701 439.174973 6.930242 10.579883 3.869375 3.062471 4.274253 0.701469 3.67 3.708045 8.752228 8.264411 5.081549 9 96018.498183 0.689052 3.429793 94.129045 0.630098 71.752719 6.520351
1612 34.254143 6.868878 14.567883 1.299701 0.257181 3.908668 2.735732 2.33 1.125163 3.000000 4.784903 18.786315 24 74526.004536 0.463658 3.038143 14.416576 0.491777 7.667571 16.154592
43159 81.030834 1.408071 3.985414 1.817113 5.775719 7.573036 4.426431 0.00 4.559158 3.000000 11.190860 0.234017 21 70646.213006 0.939057 10.895926 174.699935 0.606876 50.623577 58.188738
34600 2.058911 8.345012 3.530926 2.438283 7.900239 2.923418 2.510042 0.00 1.223616 3.000000 10.885480 0.874752 8 85687.964862 0.818186 4.716647 71.896790 0.599819 73.497484 6.421968

100 rows × 20 columns

In [11]:
# Les 20 colonnes de mesures qui décrivent différents dataTelecomSelectedSample

X = dataTelecomSelectedSample.iloc[:,0:20]

colors = dataTelecomSelectedSample.OFFRE.astype('category')

# Les couleurs déduites de chaque espèce de dataTelecomSelectedSample

y = colors.cat.codes

print(X) ; print(y)


scatter_matrix(X,c=y) ; plt.show()
       DUREE_APPEL_TOT  nb_sms_tot  revenu_cdr_c   MNT_RECH  FREQ_ACT_OUT  \
19257        30.090072    8.036033      6.628774  21.796702      4.390850   
38166       577.035853    8.792087      8.468283   6.180827      1.926081   
79907       126.227433    2.056120      9.733714   4.687255      4.980074   
9564          0.042172    3.331580     10.712672  12.834082     13.015438   
21204        39.975826    4.348320      9.195774   3.873290      7.742715   
...                ...         ...           ...        ...           ...   
87907        10.833286   22.632752      7.434685  16.818580      1.330702   
69701       439.174973    6.930242     10.579883   3.869375      3.062471   
1612         34.254143    6.868878     14.567883   1.299701      0.257181   
43159        81.030834    1.408071      3.985414   1.817113      5.775719   
34600         2.058911    8.345012      3.530926   2.438283      7.900239   

       NB_JOUR_APPEL_TAXE  NB_SMS_TAXE  NB_RECH_SUP5  MNT_TRANSFERT_OUT  \
19257            3.572585     2.230938          0.33           6.576157   
38166            7.041788     0.025432          5.00           0.692279   
79907            8.250577     0.144793          0.00           5.995859   
9564            10.310605     6.283435          2.67           6.088506   
21204            4.286317     3.506429          0.00           0.665510   
...                   ...          ...           ...                ...   
87907           30.000000     2.258939          0.00           8.323257   
69701            4.274253     0.701469          3.67           3.708045   
1612             3.908668     2.735732          2.33           1.125163   
43159            7.573036     4.426431          0.00           4.559158   
34600            2.923418     2.510042          0.00           1.223616   

       revenu_voix  MNT_FORFAIT_DATA  NB_FORFAIT_VOIX  OFFRE  VOLUME_SESSION  \
19257     5.973346          8.614453         2.382774      3    96933.695160   
38166     3.000000          0.417457        12.181873     26    77383.665104   
79907     3.000000          3.374781         8.150515      4   186211.083505   
9564     10.611291         13.975665         7.204972     24   120468.526004   
21204     7.944436         10.043437         1.146157     16    53403.671898   
...            ...               ...              ...    ...             ...   
87907     6.718734          4.107238         1.454253     20    14172.653991   
69701     8.752228          8.264411         5.081549      9    96018.498183   
1612      3.000000          4.784903        18.786315     24    74526.004536   
43159     3.000000         11.190860         0.234017     21    70646.213006   
34600     3.000000         10.885480         0.874752      8    85687.964862   

       P_revenu_data  MNT_FORFAIT   FREQ_USSD  P_FF_Data  Duree_onnet_tot  \
19257       0.596794    10.756903  143.642413   0.014595        44.241075   
38166       0.158712     3.796006   57.790428   0.516101        39.859481   
79907       0.935933     1.266953   46.308896   0.645644        12.738402   
9564        0.154198     1.421667   37.609093   0.708598        41.733081   
21204       0.089654     4.067660  190.000000   0.841939        74.725692   
...              ...          ...         ...        ...              ...   
87907       0.825286     1.269155   47.655680   0.445803         5.349727   
69701       0.689052     3.429793   94.129045   0.630098        71.752719   
1612        0.463658     3.038143   14.416576   0.491777         7.667571   
43159       0.939057    10.895926  174.699935   0.606876        50.623577   
34600       0.818186     4.716647   71.896790   0.599819        73.497484   

       Duree_offnet_tot  
19257         34.738155  
38166          8.263161  
79907         15.707098  
9564           3.447804  
21204         22.666880  
...                 ...  
87907          9.497032  
69701          6.520351  
1612          16.154592  
43159         58.188738  
34600          6.421968  

[100 rows x 20 columns]
19257     3
38166    26
79907     4
9564     24
21204    16
         ..
87907    20
69701     9
1612     24
43159    21
34600     8
Length: 100, dtype: int8
In [12]:
mypca = PCA(n_components=3)  # Here we set the number of components to keep as 3

# PCA Model

mypca.fit(X)

# Percentage of variance explained by each of the selected components.

print(mypca.singular_values_)  # Variance values

print(mypca.explained_variance_ratio_)  # Percentages

# Principal axes in the feature space, representing the directions of maximum variance in the data. Components are sorted by explained variance.

print(mypca.components_)

# PCA Results

data_output = mypca.fit_transform(X)

# Estimated noise related to covariance

print(mypca.noise_variance_)
[564941.22927323   1149.42806145    599.59527821]
[9.99993781e-01 4.13956159e-06 1.12643767e-06]
[[ 2.88108368e-04  3.12680649e-06  5.26615536e-06  4.54496005e-06
   5.67742455e-06  6.40142272e-06 -8.68600113e-07 -2.65358195e-06
  -1.44663598e-06  3.38948997e-06  9.45387932e-06 -1.54292830e-05
  -2.23363931e-05  9.99999946e-01 -7.45822317e-07  6.61593278e-06
  -1.47328229e-04  3.32983634e-07 -2.25310724e-05  2.94022681e-05]
 [ 9.97311085e-01  7.22821679e-03  1.92414007e-02 -7.17861907e-03
   2.10169687e-03 -1.08034265e-02 -2.76084222e-03  3.43901500e-03
   1.11028400e-03  1.89063725e-02 -4.02245801e-03  2.16942301e-03
   8.66008066e-03 -2.91240162e-04 -4.64452049e-05 -6.84484088e-03
  -3.14723181e-02 -2.98461637e-04 -3.56400744e-02 -4.46507837e-02]
 [ 4.08648966e-02 -2.19314886e-04 -5.59803698e-03  2.65465205e-02
   5.30864593e-04 -1.05352589e-02 -1.05896720e-03 -5.80377396e-03
  -8.62801219e-03  5.52831256e-03 -9.71655462e-03  6.03946452e-03
  -7.12011394e-03  1.34764393e-04  4.04697045e-04  1.59891469e-02
   9.75491868e-01  4.55087713e-04  2.05332097e-01  5.62102713e-02]]
180.69185527562942
In [13]:
pca = PCA(n_components=2)
pca.fit(dataTelecomSelectedSample)

data_mean = np.mean(dataTelecomSelectedSample, axis=0)
data_std = np.std(dataTelecomSelectedSample, axis=0)
data_standardized = (dataTelecomSelectedSample - data_mean) / data_std

pca_result=pca.fit_transform(data_standardized)
# Get the coordinates of the variables on the first two principal components
variable_coordinates = pca.components_.T

# Calculate the explained variance ratio
explained_variance_ratio = pca.explained_variance_ratio_

# Create the figure and axis
fig, ax = plt.subplots(figsize=(12, 12))

# Plot the correlation circle
circle = plt.Circle((0, 0), radius=1, edgecolor='black', facecolor='None')
ax.add_patch(circle)
ax.set_xlim(-1.1, 1.1)
ax.set_ylim(-1.1, 1.1)

sns.scatterplot(x=pca_result[:, 0], y=pca_result[:, 1], color='yellow', alpha=0.5, legend=False)

ax.axhline(0, color='black', linewidth=0.5)
ax.axvline(0, color='black', linewidth=0.5)

# Add variable labels to the plot
texts = []
colors = ['red', 'blue', 'green', 'orange', 'purple', 'brown', 'gray', 'cyan', 'magenta', 'yellow']

for i, variable in enumerate(dataTelecomSelectedSample.columns[:]):
    color=colors[i % len(colors)]
    arrow = ax.arrow(0, 0, variable_coordinates[i, 0], variable_coordinates[i, 1], head_width=0.05, head_length=0.1, fc=color, ec=color)
    texts.append(plt.text(variable_coordinates[i, 0] + 0.05, variable_coordinates[i, 1] + 0.05, variable, fontsize=12, rotation=45,color=color))

# Adjust the position of variable labels to avoid overlap
adjust_text(texts, arrowprops=dict(arrowstyle="-", color='black'))

# Add axis labels
ax.set_xlabel('PC1 ({}%)'.format(round(explained_variance_ratio[0] * 100, 2)), fontsize=14)
ax.set_ylabel('PC2 ({}%)'.format(round(explained_variance_ratio[1] * 100, 2)), fontsize=14)

# Set plot title
plt.title('PCA Correlation Circle', fontsize=16)

# Equal aspect ratio
ax.set_aspect('equal', adjustable='box')

# Show the plot
plt.show()