-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_function.py
98 lines (89 loc) · 2.75 KB
/
data_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import numpy as np
import matplotlib.pyplot as plt
# Scikit Imports
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.metrics import mean_squared_error as mse
####################################################################################
################################## data functions ##################################
####################################################################################
def shift(target):
_min = np.min(target)
return [t-_min for t in target]
def _filter(target , filter) :
r = []
rid = []
rest = []
for idx , t in enumerate(target) :
if t > filter :
r.append(t)
rid.append(idx)
else :
rest.append(t)
return r ,rid , rest
def indexing(target , index) :
return [target[idx] for idx in index]
def _plot(x,y , filter,color = [] , name = ""):
px = []
py = []
for _x , _y in zip(x,y) :
if abs(_y) < filter :
px.append(_x)
py.append(_y)
if color == [] :
plt.scatter( px, py)
else :
plt.scatter(px ,py , c =color)
plt.plot(np.linspace(0,0.1) , np.linspace(0,0.1))
if name != "" :
plt.savefig(name)
plt.show()
def _plot_p(x,y , filter,color = [] , name = ""):
px = []
py = []
for _x , _y in zip(x,y) :
if abs(_y) < filter :
px.append(_x)
py.append(abs(_y))
if color == [] :
plt.scatter( px, py)
else :
plt.scatter(px ,py , c =color)
plt.plot(np.linspace(0,0.1) , np.linspace(0,0.1))
if name != "" :
plt.savefig(name)
plt.show()
def tune(target ,x_train , y_train,kernel_method) :
param_grid =[{'kernel':kernel_method,
'epsilon':np.arange(0,1,0.001),
'C':np.arange(0.1,1,0.1)}]
# 初始化网格搜索的方法
rd_search = RandomizedSearchCV(target ,param_grid,n_jobs=8)
rd_search.fit(x_train, y_train)
return rd_search.best_params_
def _tune(target ,x_train,y_train,x_test , y_test ,kernel_method , tg = 'ts'):
param_grid =[{'kernel':kernel_method,
'epsilon':np.arange(0,0.1,0.001),
'C':np.arange(0.1,1,0.1)}]
best = 9999
best_dict = {'kernel': kernel_method[0]}
for ep in (param_grid[0]['epsilon']):
for c in param_grid[0]['C']:
_svr = SVR(kernel = kernel_method[0] , epsilon = ep , C=c)
_svr.fit(x_train , y_train)
if tg == 'ts':
_rs = _svr.predict(x_test)
_sc = mse(_rs , y_test)
else :
_rs = _svr.predict(x_train)
_sc = mse(_rs , y_train)
if _sc < best :
best = _sc
best_dict['epsilon'] = ep
best_dict['C'] = c
print(param_grid[0]['C'])
return best_dict