代码实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# 导入数据集
# data返回一个字典
from sklearn.datasets import load_iris
import numpy as np
data = load_iris()

x = data['data']
y = data['target']

# 直接看y是三分类,先看看二分类
x = x[50:]
y = y[50:]

y[:50] = 0
y[50:] = 1

# 数据集划分
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state = 0) # 如果想固定划分结果,用种子

# 导入逻辑回归
from sklearn.linear_model import LogisticRegression
# 实例化模型
lr = LogisticRegression()
# 训练模型
lr.fit(x_train,y_train)

# 查看训练情况
y_train_pred = lr.predict(x_train)
y_test_pred = lr.predict(x_test)
y_train_pred
y_test_pred
# 查看准确率
from sklearn.metrics import accuracy_score
accuracy_score(y_train,y_train_pred) # 查看训练集准确率
accuracy_score(y_test,y_test_pred) # 查看测试准确率
# 查看混淆矩阵,返回2x2数组
from sklearn.metrics import confusion_matrix
cm1 = confusion_matrix(y_train,y_train_pred) # 测试集
P1 = cm1[0,0]/(cm1[0,0] + cm1[1,0]) # 查准率
R1 = cm1[0,0]/(cm1[0,0] + cm1[0,1]) # 查全率
F11 = 2/(1/P1 + 1/R1) # F1指标
cm2 = confusion_matrix(y_test,y_test_pred) # 训练集
P2 = cm2[0,0]/(cm2[0,0] + cm2[1,0]) # 查准率
R2 = cm2[0,0]/(cm2[0,0] + cm2[0,1]) # 查全率
F12 = 2/(1/P1 + 1/R1) # F1指标

# AUC、ROC曲线(训练集)
from sklearn.metrics import roc_auc_score,roc_curve,auc
# 返回概率
y_p = lr.predict_proba(x_train)[:,1] # 第一列
fpr,tpr,thresholds = roc_curve(y_train,y_p) # 计算FPR、TPR
# 直接计算AUC面积
roc_auc = auc(fpr,tpr)
# 绘制ROC曲线图
import matplotlib.pyplot as plt
plt.subplots(figsize = (8,5))
plt.plot(fpr,tpr,color = 'darkorange',lw = 2,label = 'ROC curve(area = %0.4f)'%roc_auc) # ROC曲线
plt.plot([0,1],[0,1],color = 'navy',linestyle = '--') # 对角线为虚线
plt.xlim([0.0,1.0]) # 制定x轴刻度范围
plt.ylim([0.0,1.05]) # 制定y轴刻度范围
plt.xlabel('FPR') # x轴标签
plt.ylabel('TPR') # y轴标签
plt.title('Train ROC Curve') # 标题
plt.legend(loc = 'lower right') # 图例以及位置
plt.show()

正则化

1
2
3
4
5
6
7
8
9
10
# 实例化模型
lr = LogisticRegression(penalty = 'l2',C = 0.1) # 制定正则化,确认C,改变模型复杂程度,C越大越复杂
# 训练模型
lr.fit(x_train,y_train)

# 查看训练情况
y_train_pred = lr.predict(x_train)
y_test_pred = lr.predict(x_test)
y_train_pred
y_test_pred