In [1]:
#-----------------------------------------------------------------------------------------------------------------------------
#----------------------------------------------[ Libraries Imports ]----------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, confusion_matrix
In [3]:
#-----------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------[ Import DataBase ]-------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
In [4]:
dataTelecom=pd.read_excel('S:\\TunisieTelecom\\TelecomData.xlsx')
In [5]:
# selection of the most important variables for each clustering
selected_variables = [
    'STATUT','FREQ_USSD', 'revenu_cdr_c','DUREE_APPEL_TOT','MNT_RECH','VOLUME_SESSION', 'MNT_FORFAIT_DATA','FREQ_ACT_OUT'
]

# Select the desired variables from the dataframe
dataTelecomSelected = dataTelecom[selected_variables].copy()
In [6]:
dataTelecomSelected.dropna()
dataTelecomSelectedSample=dataTelecomSelected.sample(n=1000)
dataTelecomSelectedSample.head(100)
Out[6]:
STATUT FREQ_USSD revenu_cdr_c DUREE_APPEL_TOT MNT_RECH VOLUME_SESSION MNT_FORFAIT_DATA FREQ_ACT_OUT
29427 Suspended 110.624035 3.739770 238.831042 3.579538 75081.528201 4.256432 0.381994
55163 Active 24.685142 17.523111 109.456784 2.442152 63029.781037 11.406349 1.750035
70280 Suspended 7.035264 10.058542 72.785925 14.708630 59399.314440 17.656907 13.444205
12740 Suspended 32.597764 36.193214 288.289650 12.968408 151183.965547 1.358584 6.643898
39649 Suspended 81.990782 11.034485 76.323840 21.503203 44823.752872 5.163995 1.560619
... ... ... ... ... ... ... ... ...
24967 Suspended 69.579869 11.130183 20.005801 24.204721 123282.723577 11.640831 7.275574
62556 Suspended 30.501674 11.331631 17.609695 17.709013 77581.736682 5.169908 5.058838
39135 Suspended 190.000000 9.695310 25.369582 5.456302 200000.000000 8.712352 5.827595
39354 Active 6.544593 21.103313 19.962436 2.204177 150368.220575 10.247147 7.501596
2407 Suspended 190.000000 24.392931 23.412698 26.019909 200000.000000 4.762652 1.078338

100 rows × 8 columns

In [7]:
#-----------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------[ LogisticRegression ]------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------
In [8]:
# Select features and the target variable
features = ['FREQ_USSD', 'revenu_cdr_c', 'DUREE_APPEL_TOT', 'MNT_RECH',
            'VOLUME_SESSION', 'MNT_FORFAIT_DATA', 'FREQ_ACT_OUT']
target = 'STATUT'  # Change 'STATUT' to 'STATUS' (assuming it is the target variable)

# Split the data into features (X) and the target variable (y)
X = dataTelecomSelected[features]
y = dataTelecomSelected[target]

# Handle missing values using SimpleImputer
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)

# Create and fit the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Display classification results : A -> Active | S -> Suspended
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
In [9]:
new_data_to_test = pd.DataFrame({
    'FREQ_USSD': [184.00],  # Replace with your input values
    'revenu_cdr_c': [0.3],
    'DUREE_APPEL_TOT': [7.82],
    'MNT_RECH': [0.33],
    'VOLUME_SESSION': [0],
    'MNT_FORFAIT_DATA': [0],
    'FREQ_ACT_OUT': [6.4]
})

# Handle missing values in the new data
new_data_imputed = imputer.transform(new_data_to_test)

# Make predictions on the new data
predictions = model.predict(new_data_imputed)

# Display predictions : A -> Active | S -> Suspended
print("Predicted Status:", predictions)
Predicted Status: ['Active']