-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
High-Dimensional Feature Selection of Medical Data
Optimized LASSO feature selection
- Loading branch information
1 parent
8185347
commit c9597ec
Showing
16 changed files
with
6,446 additions
and
0 deletions.
There are no files selected for viewing
32 changes: 32 additions & 0 deletions
32
High-Dimensional Feature Selection of Medical Data/Code/FeatureSelect1.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
%% Read dataset and preprocessing | ||
x=csvread('train_data1.csv',0,0,[0 0 699 128]); | ||
y=csvread('train_data1.csv',0,129,[0 129 699 140]); | ||
y(y(:,:)==-1) = 0; | ||
|
||
%% T test | ||
pSum = zeros(1,129); | ||
for i=1:12 | ||
nowY = y(:,i); | ||
X0 = x(nowY==0,:); | ||
X1 = x(nowY==1,:); | ||
[~,p,~,~] = ttest2(X0,X1,'Vartype','unequal'); | ||
pSum = pSum+p; | ||
end | ||
|
||
[~,featureIdxSortbyP]= sort(pSum); | ||
x=x(:,featureIdxSortbyP(1:50)); | ||
|
||
%% LASSO regression feature selection | ||
opts = statset('UseParallel',true); | ||
featureSum = zeros(12,1); | ||
featureWeight = zeros(50,1); | ||
for i=1:12 | ||
[B,S] = lassoglm(x,y(:,i),'binomial','DFmax',30,'CV',10,'Alpha',0.5,'Options',opts); | ||
featureSum(i,1) = sum(B(:,S.IndexMinDeviance)~=0); | ||
featureWeight = featureWeight+B(:,S.IndexMinDeviance); | ||
end | ||
|
||
%% Filter | ||
sum=floor(sum(featureSum)/12); | ||
[~,featureIdxSortbyLasso]= sort(featureWeight); | ||
x=x(:,featureIdxSortbyLasso(1:sum)); |
59 changes: 59 additions & 0 deletions
59
High-Dimensional Feature Selection of Medical Data/Code/Question1.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
%% Read dataset | ||
x=csvread('train_data1.csv',0,0,[0 0 699 128]); | ||
y=csvread('train_data1.csv',0,129,[0 129 699 140]); | ||
testX = csvread('test_feature_data1.csv',0,1); | ||
number = csvread('test_feature_data1.csv',0,0,[0 0 203 0]); | ||
|
||
%% Preprocessing | ||
y(y(:,:)==-1) = 0; | ||
net = patternnet(15); | ||
|
||
net.trainFcn = 'trainrp'; | ||
net.trainParam.max_fail = 20; | ||
net.trainParam.epochs = 1000; | ||
|
||
net.divideParam.trainRatio = 70/100; | ||
net.divideParam.valRatio = 15/100; | ||
net.divideParam.testRatio = 15/100; | ||
|
||
results = zeros(204,13); | ||
results(:,1)=number; | ||
errors = zeros(1,700); | ||
|
||
%% Divide into 12 sub problems | ||
for i=1:12 | ||
|
||
% T Test feature selection | ||
nowY = y(:,i); | ||
X0 = x(nowY==0,:); | ||
X1 = x(nowY==1,:); | ||
[~,p,~,~] = ttest2(X0,X1,'Vartype','unequal'); | ||
[~,featureIdxSortbyP]= sort(p); | ||
nowX = x(:,featureIdxSortbyP(1:70)); | ||
|
||
% LASSO regression feature selection | ||
[B,S] = lassoglm(nowX,nowY,'binomial','DFmax',30,'CV',10,'Alpha',0.5); | ||
model = B(:,S.IndexMinDeviance)~=0; | ||
nowX = nowX(:,model); | ||
|
||
nowTestX = testX(:,featureIdxSortbyP(1:70)); | ||
nowTestX = nowTestX(:,model); | ||
|
||
% Train and use NN array | ||
outputs = zeros(1,204); | ||
for j=1:5 | ||
rand = randperm(700); | ||
nowX = nowX(rand,:); | ||
nowY = nowY(rand,:); | ||
nowNet = train(net,nowX',nowY'); | ||
errors = errors+gsubtract(nowY',nowNet(nowX')); | ||
outputs = outputs+nowNet(nowTestX'); | ||
end | ||
|
||
% Generate result | ||
results(:,i+1) = outputs'/5; | ||
end | ||
|
||
%% Output result | ||
disp(sum(abs(errors))/(700*60)); | ||
csvwrite('result1.csv',results); |
86 changes: 86 additions & 0 deletions
86
High-Dimensional Feature Selection of Medical Data/Code/Question2.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
%% Read dataset and preprocessing | ||
x=csvread('train_data2.csv',0,0,[0 0 3999 409]); | ||
y=csvread('train_data2.csv',0,410,[0 410 3999 410]); | ||
X1 = x(y==0,:); | ||
X2 = x(y==1,:); | ||
X3 = x(y==2,:); | ||
|
||
%% T Test | ||
[~,p12,~,~] = ttest2(X1,X2,'Vartype','unequal'); | ||
[~,p13,~,~] = ttest2(X2,X3,'Vartype','unequal'); | ||
[~,p23,~,~] = ttest2(X1,X3,'Vartype','unequal'); | ||
p=p12+p13+p23; | ||
[~,featureIdxSortbyP]= sort(p); | ||
|
||
%% LASSO regression feature selection | ||
|
||
x=x(:,featureIdxSortbyP(1:100)); | ||
opts = statset('UseParallel',true); | ||
% Linear Regression | ||
[B,S] = lasso(x,y,'DFmax',50,'CV',10,'Alpha',0.5,'Options',opts); | ||
% Poisson Regression | ||
% [B,S] = lassoglm(x,y,'poisson','DFmax',50,'CV',10,'Alpha',0.5,'Options',opts); | ||
model = B(:,S.Index1SE)~=0; | ||
x=x(:,model); | ||
|
||
clear X1 X2 X3 p p12 p13 p23 B S | ||
|
||
%% Process class label | ||
newY=zeros(4000,3); | ||
newY(y==0,1)=1; | ||
newY(y==1,2)=1; | ||
newY(y==2,3)=1; | ||
y=newY; | ||
clear newY; | ||
|
||
%% Build NN parameters | ||
neurons = sum(model~=0); | ||
net = patternnet(floor(neurons/2)); | ||
net.divideParam.trainRatio = 70/100; | ||
net.divideParam.valRatio = 15/100; | ||
net.divideParam.testRatio = 15/100; | ||
|
||
net.trainFcn = 'trainrp'; | ||
net.trainParam.max_fail = 20; | ||
net.trainParam.epochs = 1000; | ||
|
||
%% Train NN array | ||
errorsArray=zeros(1,20); | ||
nets = cell(1,20); | ||
|
||
for i=1:20 | ||
% Randomly disorganize sample order | ||
rand = randperm(4000); | ||
x = x(rand,:); | ||
y = y(rand,:); | ||
|
||
nets{i} = train(net,x',y'); | ||
neti=nets{i}; | ||
outputs = neti(x'); | ||
errors = gsubtract(y',outputs); | ||
errorsArray(i) = sum(sum(abs(errors)))/12000; | ||
end | ||
|
||
%% Select 10 best NN by error information | ||
|
||
[~,IdxSortbyerrors]= sort(errorsArray,'ascend'); | ||
nets = nets(IdxSortbyerrors(1:10)); | ||
disp(mean(errorsArray(IdxSortbyerrors(1:10)))); | ||
clear errors errorsArray i net neti neurons opts outputs rand IdxSortbyerrors | ||
|
||
%% Read test data | ||
testX = csvread('test_feature_data2.csv',0,1); | ||
number = csvread('test_feature_data2.csv',0,0,[0 0 1030 0]); | ||
testX = testX(:,featureIdxSortbyP(1:100)); | ||
testX = testX(:,model); | ||
|
||
%% Classify by NN | ||
testY = zeros(3,1031); | ||
for i=1:10 | ||
neti = nets{i}; | ||
testY = testY+neti(testX'); | ||
end | ||
testY = testY/10; | ||
testY = (vec2ind(testY)-1)'; | ||
testY = [number,testY]; | ||
csvwrite('result2.csv',testY); |
12 changes: 12 additions & 0 deletions
12
High-Dimensional Feature Selection of Medical Data/Code/lasso.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
%% Read dataset | ||
x=csvread('train_data2.csv',0,0,[0 0 3999 409]); | ||
y=csvread('train_data2.csv',0,410,[0 410 3999 410]); | ||
|
||
%% LASSO regression feature selection | ||
|
||
opts = statset('UseParallel',true); | ||
% Linear Regression | ||
[B,S] = lasso(x,y,'DFmax',100,'CV',10,'Alpha',0.5,'Options',opts); | ||
% Poisson Regression | ||
[B,S] = lassoglm(x,y,'poisson','DFmax',100,'CV',10,'Alpha',0.5,'Options',opts); | ||
model = B(:,S.Index1SE)~=0; |
26 changes: 26 additions & 0 deletions
26
High-Dimensional Feature Selection of Medical Data/Code/lassoSelect.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
%% Read dataset and preprocessing | ||
x=csvread('train_data2.csv',0,0,[0 0 3999 409]); | ||
y=csvread('train_data2.csv',0,410,[0 410 3999 410]); | ||
X1 = x(y==0,:); | ||
X2 = x(y==1,:); | ||
X3 = x(y==2,:); | ||
|
||
%% T test | ||
[~,p12,~,~] = ttest2(X1,X2,'Vartype','unequal'); | ||
[~,p13,~,~] = ttest2(X2,X3,'Vartype','unequal'); | ||
[~,p23,~,~] = ttest2(X1,X3,'Vartype','unequal'); | ||
p=p12+p13+p23; | ||
[~,featureIdxSortbyP]= sort(p); | ||
|
||
%% LASSO regression feature selection | ||
|
||
x=x(:,featureIdxSortbyP(1:100)); | ||
opts = statset('UseParallel',true); | ||
% Linear Regression | ||
[B,S] = lasso(x,y,'DFmax',100,'CV',10,'Alpha',0.5,'Options',opts); | ||
% Poisson Regression | ||
% [B,S] = lassoglm(x,y,'poisson','DFmax',100,'CV',10,'Alpha',0.5,'Options',opts); | ||
model = B(:,S.Index1SE)~=0; | ||
x=x(:,model); | ||
|
||
%% SVM |
14 changes: 14 additions & 0 deletions
14
High-Dimensional Feature Selection of Medical Data/Code/useNN.m
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
%% Read dataset | ||
|
||
testX = csvread('test_feature_data2.csv',0,1); | ||
number = csvread('test_feature_data2.csv',0,0,[0 0 1030 0]); | ||
testX = testX(:,featureIdxSortbyP(1:100)); | ||
testX = testX(:,model); | ||
|
||
%% Neural Network | ||
testY = net(testX'); | ||
testY = (vec2ind(testY)-1)'; | ||
testY = [number,testY]; | ||
|
||
%% Outpur data | ||
csvwrite('result2.csv',testY); |
50 changes: 50 additions & 0 deletions
50
High-Dimensional Feature Selection of Medical Data/Data/example_task/example_task1.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
1,0.82,0.53,0.81,0.77,0.12,0.53,0.81,0.47,0.12,0.53,0.83,0.11 | ||
2,0.72,0.53,0.81,0.71,0.1,0.51,0.74,0.77,0.72,0.93,0.83,0.11 | ||
3,0.68,0.53,0.81,0.77,0.12,0.52,0.81,0.37,0.12,0.53,0.83,0.11 | ||
4,0.62,0.53,0.83,0.77,0.12,0.53,0.63,0.27,0.72,0.63,0.83,0.11 | ||
5,0.21,0.53,0.81,0.77,0.12,0.56,0.81,0.77,0.12,0.53,0.83,0.11 | ||
6,0.12,0.53,0.81,0.77,0.11,0.53,0.22,0.17,0.92,0.43,0.83,0.11 | ||
7,0.22,0.53,0.82,0.76,0.12,0.53,0.29,0.67,0.12,0.53,0.83,0.11 | ||
8,0.12,0.53,0.81,0.77,0.14,0.58,0.71,0.37,0.12,0.73,0.83,0.11 | ||
9,0.92,0.53,0.81,0.77,0.12,0.53,0.81,0.87,0.12,0.83,0.83,0.11 | ||
10,0.92,0.53,0.81,0.77,0.12,0.53,0.81,0.87,0.12,0.83,0.83,0.11 | ||
11,0.82,0.53,0.81,0.77,0.12,0.53,0.81,0.47,0.12,0.53,0.83,0.11 | ||
12,0.72,0.53,0.81,0.71,0.1,0.51,0.74,0.77,0.72,0.93,0.83,0.11 | ||
13,0.68,0.53,0.81,0.77,0.12,0.52,0.81,0.37,0.12,0.53,0.83,0.11 | ||
14,0.62,0.53,0.83,0.77,0.12,0.53,0.63,0.27,0.72,0.63,0.83,0.11 | ||
15,0.21,0.53,0.81,0.77,0.12,0.56,0.81,0.77,0.12,0.53,0.83,0.11 | ||
16,0.12,0.53,0.81,0.77,0.11,0.53,0.22,0.17,0.92,0.43,0.83,0.11 | ||
17,0.22,0.53,0.82,0.76,0.12,0.53,0.29,0.67,0.12,0.53,0.83,0.11 | ||
18,0.12,0.53,0.81,0.77,0.14,0.58,0.71,0.37,0.12,0.73,0.83,0.11 | ||
19,0.92,0.53,0.81,0.77,0.12,0.53,0.81,0.87,0.12,0.83,0.83,0.11 | ||
20,0.92,0.53,0.81,0.77,0.12,0.53,0.81,0.87,0.12,0.83,0.83,0.11 | ||
21,0.82,0.53,0.81,0.77,0.12,0.53,0.81,0.47,0.12,0.53,0.83,0.11 | ||
22,0.72,0.53,0.81,0.71,0.1,0.51,0.74,0.77,0.72,0.93,0.75,0.11 | ||
23,0.68,0.53,0.81,0.77,0.12,0.52,0.81,0.37,0.12,0.53,0.75,0.11 | ||
24,0.62,0.53,0.83,0.77,0.12,0.53,0.63,0.27,0.72,0.63,0.75,0.11 | ||
25,0.21,0.53,0.81,0.77,0.12,0.56,0.81,0.77,0.12,0.53,0.75,0.11 | ||
26,0.12,0.53,0.81,0.77,0.11,0.53,0.22,0.17,0.92,0.43,0.75,0.11 | ||
27,0.22,0.53,0.82,0.76,0.12,0.53,0.29,0.67,0.12,0.53,0.75,0.11 | ||
28,0.12,0.53,0.81,0.77,0.14,0.58,0.71,0.37,0.12,0.73,0.75,0.11 | ||
29,0.92,0.53,0.81,0.77,0.12,0.53,0.81,0.87,0.12,0.83,0.75,0.11 | ||
30,0.92,0.53,0.81,0.77,0.12,0.53,0.81,0.87,0.12,0.83,0.75,0.11 | ||
31,0.82,0.53,0.81,0.77,0.12,0.53,0.81,0.47,0.12,0.53,0.75,0.11 | ||
32,0.72,0.53,0.81,0.71,0.1,0.51,0.74,0.77,0.72,0.93,0.75,0.11 | ||
33,0.68,0.53,0.81,0.77,0.12,0.52,0.81,0.37,0.12,0.53,0.75,0.11 | ||
34,0.62,0.53,0.83,0.77,0.12,0.53,0.63,0.27,0.72,0.63,0.75,0.11 | ||
35,0.21,0.53,0.81,0.77,0.12,0.56,0.81,0.77,0.12,0.53,0.75,0.11 | ||
36,0.12,0.53,0.81,0.77,0.11,0.53,0.22,0.17,0.92,0.43,0.75,0.11 | ||
37,0.22,0.53,0.82,0.76,0.12,0.53,0.29,0.67,0.12,0.53,0.75,0.11 | ||
38,0.12,0.53,0.81,0.77,0.14,0.58,0.71,0.37,0.12,0.73,0.64,0.11 | ||
39,0.92,0.53,0.81,0.77,0.12,0.53,0.81,0.87,0.12,0.83,0.64,0.11 | ||
40,0.92,0.53,0.81,0.77,0.12,0.53,0.81,0.87,0.12,0.83,0.64,0.11 | ||
41,0.82,0.53,0.81,0.77,0.12,0.53,0.81,0.47,0.12,0.53,0.64,0.11 | ||
42,0.72,0.53,0.81,0.71,0.1,0.51,0.74,0.77,0.72,0.93,0.64,0.11 | ||
43,0.68,0.53,0.81,0.77,0.12,0.52,0.81,0.37,0.12,0.53,0.64,0.11 | ||
44,0.62,0.53,0.83,0.77,0.12,0.53,0.63,0.27,0.72,0.63,0.64,0.11 | ||
45,0.21,0.53,0.81,0.77,0.12,0.56,0.81,0.77,0.12,0.53,0.64,0.11 | ||
46,0.12,0.53,0.81,0.77,0.11,0.53,0.22,0.17,0.92,0.43,0.64,0.11 | ||
47,0.22,0.53,0.82,0.76,0.12,0.53,0.29,0.67,0.12,0.53,0.64,0.11 | ||
48,0.12,0.53,0.81,0.77,0.14,0.58,0.71,0.37,0.12,0.73,0.64,0.11 | ||
49,0.92,0.53,0.81,0.77,0.12,0.53,0.81,0.87,0.12,0.83,0.64,0.11 | ||
50,0.92,0.53,0.81,0.77,0.12,0.53,0.81,0.87,0.12,0.83,0.64,0.11 |
100 changes: 100 additions & 0 deletions
100
High-Dimensional Feature Selection of Medical Data/Data/example_task/example_task2.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
1,1 | ||
2,1 | ||
3,1 | ||
4,0 | ||
5,1 | ||
6,0 | ||
7,2 | ||
8,1 | ||
9,0 | ||
10,2 | ||
11,1 | ||
12,0 | ||
13,1 | ||
14,0 | ||
15,1 | ||
16,1 | ||
17,1 | ||
18,1 | ||
19,1 | ||
20,1 | ||
21,1 | ||
22,0 | ||
23,1 | ||
24,0 | ||
25,2 | ||
26,1 | ||
27,0 | ||
28,0 | ||
29,2 | ||
30,1 | ||
31,1 | ||
32,2 | ||
33,1 | ||
34,1 | ||
35,1 | ||
36,1 | ||
37,1 | ||
38,1 | ||
39,1 | ||
40,1 | ||
41,1 | ||
42,1 | ||
43,1 | ||
44,1 | ||
45,1 | ||
46,2 | ||
47,2 | ||
48,0 | ||
49,1 | ||
50,1 | ||
51,1 | ||
52,1 | ||
53,0 | ||
54,1 | ||
55,1 | ||
56,1 | ||
57,0 | ||
58,1 | ||
59,0 | ||
60,0 | ||
61,2 | ||
62,1 | ||
63,1 | ||
64,1 | ||
65,1 | ||
66,0 | ||
67,1 | ||
68,1 | ||
69,0 | ||
70,0 | ||
71,1 | ||
72,1 | ||
73,2 | ||
74,1 | ||
75,0 | ||
76,1 | ||
77,1 | ||
78,1 | ||
79,1 | ||
80,0 | ||
81,0 | ||
82,1 | ||
83,1 | ||
84,0 | ||
85,1 | ||
86,1 | ||
87,2 | ||
88,2 | ||
89,1 | ||
90,1 | ||
91,2 | ||
92,0 | ||
93,1 | ||
94,1 | ||
95,2 | ||
96,0 | ||
97,1 | ||
98,1 | ||
99,1 | ||
100,1 |
Oops, something went wrong.