#-----------------------------------------------------------------------------------------------------------------------------
#----------------------------------------------[ Libraries Imports ]----------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, confusion_matrix
#-----------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------[ Import DataBase ]-------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
dataTelecom=pd.read_excel('S:\\TunisieTelecom\\TelecomData.xlsx')
# selection of the most important variables for each clustering
selected_variables = [
'STATUT','FREQ_USSD', 'revenu_cdr_c','DUREE_APPEL_TOT','MNT_RECH','VOLUME_SESSION', 'MNT_FORFAIT_DATA','FREQ_ACT_OUT'
]
# Select the desired variables from the dataframe
dataTelecomSelected = dataTelecom[selected_variables].copy()
dataTelecomSelected.dropna()
dataTelecomSelectedSample=dataTelecomSelected.sample(n=1000)
dataTelecomSelectedSample.head(100)
STATUT | FREQ_USSD | revenu_cdr_c | DUREE_APPEL_TOT | MNT_RECH | VOLUME_SESSION | MNT_FORFAIT_DATA | FREQ_ACT_OUT | |
---|---|---|---|---|---|---|---|---|
29427 | Suspended | 110.624035 | 3.739770 | 238.831042 | 3.579538 | 75081.528201 | 4.256432 | 0.381994 |
55163 | Active | 24.685142 | 17.523111 | 109.456784 | 2.442152 | 63029.781037 | 11.406349 | 1.750035 |
70280 | Suspended | 7.035264 | 10.058542 | 72.785925 | 14.708630 | 59399.314440 | 17.656907 | 13.444205 |
12740 | Suspended | 32.597764 | 36.193214 | 288.289650 | 12.968408 | 151183.965547 | 1.358584 | 6.643898 |
39649 | Suspended | 81.990782 | 11.034485 | 76.323840 | 21.503203 | 44823.752872 | 5.163995 | 1.560619 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
24967 | Suspended | 69.579869 | 11.130183 | 20.005801 | 24.204721 | 123282.723577 | 11.640831 | 7.275574 |
62556 | Suspended | 30.501674 | 11.331631 | 17.609695 | 17.709013 | 77581.736682 | 5.169908 | 5.058838 |
39135 | Suspended | 190.000000 | 9.695310 | 25.369582 | 5.456302 | 200000.000000 | 8.712352 | 5.827595 |
39354 | Active | 6.544593 | 21.103313 | 19.962436 | 2.204177 | 150368.220575 | 10.247147 | 7.501596 |
2407 | Suspended | 190.000000 | 24.392931 | 23.412698 | 26.019909 | 200000.000000 | 4.762652 | 1.078338 |
100 rows × 8 columns
#-----------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------[ LogisticRegression ]------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
# Select features and the target variable
features = ['FREQ_USSD', 'revenu_cdr_c', 'DUREE_APPEL_TOT', 'MNT_RECH',
'VOLUME_SESSION', 'MNT_FORFAIT_DATA', 'FREQ_ACT_OUT']
target = 'STATUT' # Change 'STATUT' to 'STATUS' (assuming it is the target variable)
# Split the data into features (X) and the target variable (y)
X = dataTelecomSelected[features]
y = dataTelecomSelected[target]
# Handle missing values using SimpleImputer
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)
# Create and fit the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)
# Make predictions on the test set
y_pred = model.predict(X_test)
# Display classification results : A -> Active | S -> Suspended
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
new_data_to_test = pd.DataFrame({
'FREQ_USSD': [184.00], # Replace with your input values
'revenu_cdr_c': [0.3],
'DUREE_APPEL_TOT': [7.82],
'MNT_RECH': [0.33],
'VOLUME_SESSION': [0],
'MNT_FORFAIT_DATA': [0],
'FREQ_ACT_OUT': [6.4]
})
# Handle missing values in the new data
new_data_imputed = imputer.transform(new_data_to_test)
# Make predictions on the new data
predictions = model.predict(new_data_imputed)
# Display predictions : A -> Active | S -> Suspended
print("Predicted Status:", predictions)
Predicted Status: ['Active']