代码实现lasso回归

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# 导入数据
from sklearn.datasets import load_boston
data = load_boston()
x = data['data']
y = data['target']
# 划分训练集和测试集
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random = ...) # 如果想固定划分结果,用种子

# 实例化一个lasso回归
from sklearn.linear_model import Lasso
clf1 = Lasso(alpha = 0.1) # α默认1,可调节。可以调节至欠拟合和过拟合之间。
clf1.fit(X_train,y_train)

# 进行预测
y_train_pred = clf1.predict(X_train)
y_test_pred = clf1.predict(X_test)

# MSE
from sklearn.metrics import mean_squared_error
mean_squared_error(y_train,y_train_pred) # 训练集
mean_squared_error(y_test,y_test_pred) # 测试集

#写一个alpha列表,取多个值作对比
alphas = [0.001,0.005,0.1,0.2,0.3,0.4,0.5,1,1.5]
mse_trains = []
mse_tests = []
for alpha in alphas:
clf = Lasso(alpha = alpha)
clf.fit(X_train,y_train)
# 预测
y_train_pred = clf.predict(X_train)
y_test_pred = clf.predict(X_test)
# 存储
mse_trains.append(mean_squared_error(y_train,y_train_pred)) # 训练集
mse_tests.append(mean_squared_error(y_test,y_test_pred)) # 测试集
# 可视化
import matplotlib.pyplot as plt
plt.plot(alphas,mse_trains,label = 'train')
plt.plot(alphas,mse_tests,label = 'test')
plt.legend()
plt.show()

# 如果mse特别大,则说明不适合做lasso回归,可以试试ridge回归。

代码实现ridge回归

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# 实例化一个ridge回归
from sklearn.linear_model import Ridge
clf1 = Ridge(alpha = 0.1) # α默认1,可调节。可以调节至欠拟合和过拟合之间。
clf1.fit(X_train,y_train)

# 进行预测
y_train_pred = clf1.predict(X_train)
y_test_pred = clf1.predict(X_test)

# MSE
from sklearn.metrics import mean_squared_error
mean_squared_error(y_train,y_train_pred) # 训练集
mean_squared_error(y_test,y_test_pred) # 测试集

#写一个alpha列表,取多个值作对比
alphas = [0.001,0.005,0.1,0.2,0.3,0.4,0.5,1,1.5]
mse_trains = []
mse_tests = []
for alpha in alphas:
clf = Ridge(alpha = alpha)
clf.fit(X_train,y_train)
# 预测
y_train_pred = clf.predict(X_train)
y_test_pred = clf.predict(X_test)
# 存储
mse_trains.append(mean_squared_error(y_train,y_train_pred)) # 训练集
mse_tests.append(mean_squared_error(y_test,y_test_pred)) # 测试集
# 可视化
import matplotlib.pyplot as plt
plt.plot(alphas,mse_trains,label = 'train')
plt.plot(alphas,mse_tests,label = 'test')
plt.legend()
plt.show()

# 可能用ridge更好
# 如何筛选?
# 选训练集mse变平缓时,测试集mse合适的时候的alpha即可