过采样后做分类预测模型

avatar
作者
筋斗云
阅读量:0
#!/usr/bin/env python # coding: utf-8  import pandas as pd from imblearn.over_sampling import SMOTE from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report  import matplotlib.pyplot as plt import pickle  df=pd.read_csv('credit_default.csv') df.head()  df.info()  # 平均值填充空值 for column in list(df.columns[df.isnull().sum() > 0]):     mean_val = df[column].mean()     df[column].fillna(mean_val, inplace=True)   # In[52]:   # 删除重复行 df.drop_duplicates(inplace=True)   # In[54]:   # 对每个特征绘制类别分布柱状图 columns = list(df.columns) columns.remove('Default') for feature in columns:     # 绘制柱状图     df[[feature, 'Default']]['Default'].value_counts().plot(kind='bar')     plt.title(f'Distribution of {feature}')     plt.xlabel(feature)     plt.ylabel('Frequency')     plt.show()   # In[24]:   # 查看不同的类别的个数 normal = df[df['Default']==0] fraud = df[df['Default']==1]  normal.shape, fraud.shape   # In[25]:   # 数据不平衡,过采样平衡数据 X = df.drop('Default', axis = 1) y= df['Default']  X_res, y_res = SMOTE().fit_resample(X,y)   # In[26]:   # 查看采样后的类别个数 y_res.value_counts()   # In[44]:   # 7:3拆分数据集 X_train,X_test,y_train,y_test=train_test_split(X_res,y_res,test_size=0.3,random_state=42)   # In[45]:   # 训练模型 clf=RandomForestClassifier(n_estimators=50) clf.fit(X_train,y_train)   # In[46]:   # 评估模型 y_pred = clf.predict(X_test) print(f"\n Accuaracy: {accuracy_score(y_test, y_pred)}") print(f"\n Precision: {precision_score(y_test, y_pred)}") print(f"\n Recall: {recall_score(y_test, y_pred)}") print(f"\n F1 Score: {f1_score(y_test, y_pred)}")   # In[47]:   # 混淆矩阵 confusion_matrix(y_test,y_pred)   # In[48]:   print(classification_report(y_test,y_pred))   # In[56]:   # 保存模型 with open('model.pkl', 'wb') as f:     pickle.dump(clf, f)       

在这里插入图片描述

广告一刻

为您即时展示最新活动产品广告消息,让您随时掌握产品活动新动态!