| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 
 | import numpy as npimport matplotlib as mpl
 import matplotlib.pyplot as plt
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import MinMaxScaler
 from sklearn.pipeline import Pipeline
 from sklearn.linear_model import LinearRegression
 from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
 from pprint import pprint
 if __name__ == '__main__':
 path = './Advertising.csv'
 data = pd.read_csv(path)
 
 x = data[['TV', 'Radio']]
 
 y = data[['Sales']]
 
 plt.figure(facecolor='w', figsize=(9, 10))
 plt.subplot(311)
 plt.plot(data['TV'], y, 'ro', mec='k')
 plt.title('TV')
 plt.grid(b=True, ls=':')
 plt.subplot(312)
 plt.plot(data['Radio'], y, 'g^', mec='k')
 plt.title('Radio')
 plt.grid(b=True, ls=':')
 plt.subplot(313)
 plt.plot(data['Newspaper'], y, 'b*', mec='k')
 plt.title('Newspaper')
 plt.grid(b=True, ls=':')
 plt.tight_layout(pad=2)
 
 plt.show()
 
 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)
 model = LinearRegression()
 model.fit(x_train, y_train)
 print(model.coef_, model.intercept_)
 order = y_test.argsort_value(axis=0)
 y_test = y_test.values[order]
 x_test = x_test.values[order, :]
 y_test_pred = model.predict(x_test)
 mse = np.mean((y_test_pred - np.array(y_test)) ** 2)
 rmse = np.sqrt(mse)
 mse_sys = mean_squared_error(y_test, y_test_pred)
 print('MSE = ', mse, end=' ')
 print('MSE(System Function) = ', mse_sys, end=' ')
 print('MAE = ', mean_absolute_error(y_test, y_test_pred))
 print('RMSE = ', rmse)
 print('Training R2 = ', model.score(x_train, y_train))
 print('Training R2(System) = ', r2_score(y_train, model.predict(x_train)))
 print('Test R2 = ', model.score(x_test, y_test))
 error = y_test - y_test_pred
 np.set_printoptions(suppress=True)
 print('error = ', error)
 plt.hist(error, bins=20, color='g', alpha=0.6, edgecolor='k')
 plt.show()
 plt.figure(facecolor='w')
 t = np.arange(len(x_test))
 plt.plot(t, y_test, 'r-', linewidth=2, label='真实数据')
 plt.plot(t, y_test_pred, 'g-', linewidth=2, label='预测数据')
 plt.legend(loc='upper left')
 plt.title('线性回归预测销量', fontsize=18)
 plt.grid(b=True, ls=':')
 plt.show()
 
 |