#-----------------------------------------------------------------------------------------------------------------------------
#----------------------------------------------[ Libraries Imports ]----------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, confusion_matrix


#-----------------------------------------------------------------------------------------------------------------------------
#---------------------------------------------------[ Import DataBase ]-------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------


dataTelecom=pd.read_excel('S:\\TunisieTelecom\\TelecomData.xlsx')


# selection of the most important variables for each clustering
selected_variables = [
    'STATUT','FREQ_USSD', 'revenu_cdr_c','DUREE_APPEL_TOT','MNT_RECH','VOLUME_SESSION', 'MNT_FORFAIT_DATA','FREQ_ACT_OUT'
]

# Select the desired variables from the dataframe
dataTelecomSelected = dataTelecom[selected_variables].copy()


dataTelecomSelected.dropna()
dataTelecomSelectedSample=dataTelecomSelected.sample(n=1000)
dataTelecomSelectedSample.head(100)


#-----------------------------------------------------------------------------------------------------------------------------
#-------------------------------------------------[ LogisticRegression ]------------------------------------------------------
#-----------------------------------------------------------------------------------------------------------------------------


# Select features and the target variable
features = ['FREQ_USSD', 'revenu_cdr_c', 'DUREE_APPEL_TOT', 'MNT_RECH',
            'VOLUME_SESSION', 'MNT_FORFAIT_DATA', 'FREQ_ACT_OUT']
target = 'STATUT'  # Change 'STATUT' to 'STATUS' (assuming it is the target variable)

# Split the data into features (X) and the target variable (y)
X = dataTelecomSelected[features]
y = dataTelecomSelected[target]

# Handle missing values using SimpleImputer
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)

# Create and fit the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Display classification results : A -> Active | S -> Suspended
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


new_data_to_test = pd.DataFrame({
    'FREQ_USSD': [184.00],  # Replace with your input values
    'revenu_cdr_c': [0.3],
    'DUREE_APPEL_TOT': [7.82],
    'MNT_RECH': [0.33],
    'VOLUME_SESSION': [0],
    'MNT_FORFAIT_DATA': [0],
    'FREQ_ACT_OUT': [6.4]
})

# Handle missing values in the new data
new_data_imputed = imputer.transform(new_data_to_test)

# Make predictions on the new data
predictions = model.predict(new_data_imputed)

# Display predictions : A -> Active | S -> Suspended
print("Predicted Status:", predictions)

Predicted Status: ['Active']

	STATUT	FREQ_USSD	revenu_cdr_c	DUREE_APPEL_TOT	MNT_RECH	VOLUME_SESSION	MNT_FORFAIT_DATA	FREQ_ACT_OUT
29427	Suspended	110.624035	3.739770	238.831042	3.579538	75081.528201	4.256432	0.381994
55163	Active	24.685142	17.523111	109.456784	2.442152	63029.781037	11.406349	1.750035
70280	Suspended	7.035264	10.058542	72.785925	14.708630	59399.314440	17.656907	13.444205
12740	Suspended	32.597764	36.193214	288.289650	12.968408	151183.965547	1.358584	6.643898
39649	Suspended	81.990782	11.034485	76.323840	21.503203	44823.752872	5.163995	1.560619
...	...	...	...	...	...	...	...	...
24967	Suspended	69.579869	11.130183	20.005801	24.204721	123282.723577	11.640831	7.275574
62556	Suspended	30.501674	11.331631	17.609695	17.709013	77581.736682	5.169908	5.058838
39135	Suspended	190.000000	9.695310	25.369582	5.456302	200000.000000	8.712352	5.827595
39354	Active	6.544593	21.103313	19.962436	2.204177	150368.220575	10.247147	7.501596
2407	Suspended	190.000000	24.392931	23.412698	26.019909	200000.000000	4.762652	1.078338