1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
| ''' 准备工作 '''
from sklearn.decomposition import PCA
import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import load_boston from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error
data = load_boston() x = data['data'] y = data['target']
from sklearn.model_selection import train_test_split X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.1)
''' PCA预处理,查看累计贡献率,确认降到几维 '''
pca = PCA()
pca.fit(X_train,y_train)
ratio = pca.explained_variance_ratio_ ratio
cum_ratio = ratio.cumsum() cum_ratio
xs = list(range(1, len(cum_ratio)+1)) plt.plot(xs,cum_ratio) plt.xlabel('number of components') plt.ylabel('cum_explained_variance_ratio') plt.grid() plt.show()
''' PCA正式处理,查看降维后训练的模型效果 '''
pca = PCA(n_components = 2)
pca.fit(X_train)
train_pca = pca.transform(X_train) test_pca = pca.transform(X_test)
model = LinearRegression() model.fit(train_pca,y_train)
train_predict = model.predict(train_pca) test_predict = model.predict(test_pca)
mean_squared_error(y_train,train_predict) mean_squared_error(y_test,test_predict)
model = LinearRegression() model.fit(X_train,y_train)
train_predict = model.predict(X_train) test_predict = model.predict(X_test)
mean_squared_error(y_train,train_predict) mean_squared_error(y_test,test_predict)
|