-
Notifications
You must be signed in to change notification settings - Fork 1
/
extractTfctAndPcaSimple.m
90 lines (76 loc) · 3.36 KB
/
extractTfctAndPcaSimple.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
function extractTfctAndPcaSimple(outDir, featDir, groupedFeatDir, pcaDataFile, groupedFile, overwrite)
% Extract features necessary to run several experiments and visualizations
%
% extractTfctAndPcaSimple(outDir, featDir, groupedFeatDir, pcaDataFile, groupedFile, overwrite)
%
% Inputs
% outDir base directory for output directory tree
% featDir base directory containing full (non-PCA) features
% groupedFeatDir base directory containing full PCA features grouped by
% original word
% pcaDataFile .mat file with pca info, e.g. 'pcaData_100dims_1000files.mat'
% groupedFile .mat file containing grouped results from listening test
% overwrite if 1, overwrite existing output files
if ~exist('overwrite', 'var') || isempty(overwrite), overwrite = 0; end
pcaFiles = findFiles(groupedFeatDir, '\.mat');
for target = 1:length(pcaFiles)
outFile = fullfile(outDir, sprintf('target=%s',basename(pcaFiles{target},0,0)), ...
'tfctAndPca.mat');
if exist(outFile, 'file') && ~overwrite
fprintf('Skipping %s\n', outFile)
continue
end
pcaFileInfo = load(fullfile(groupedFeatDir, pcaFiles{target}));
Xte = pcaFileInfo.pcaFeat;
yte = pcaFileInfo.isRight;
fprintf('%g%% correct\n', 100*mean(yte>0));
origShape = pcaFileInfo.origShape;
clean = reshape(pcaFileInfo.cleanFeat.features, pcaFileInfo.cleanFeat.origShape);
warped = zeros(length(yte), prod(origShape));
for f = 1:length(pcaFileInfo.files)
wTmp = load(fullfile(featDir, pcaFileInfo.files{f}));
warped(f,:) = wTmp.features;
end
% These are not right, just temporarily set to something...
ytem = pcaFileInfo.fracRight; % should be the listener's selections
mNames = {'1'}; % should be equivalence classes of selections
%[~,~,Xte,yte,ytem,mNames,warped,~,origShape,clean,warpDist,mfccDist,startDist] = ...
% crossUtWarp(fullfile(groupedFeatDir, pcaFiles{target}), ...
% fullfile(featDir, cleanFiles{target}), pcaDataFile, groupedFile, 0);
if size(Xte,1) == 0
fprintf('Skipping %s\n', outFile);
continue
end
[s0 s1 sNot0 sNot1 n0 n1 sig] = computeTfctStats(yte, warped);
nytem = bsxfun(@rdivide, ytem, sum(ytem,2)+1e-9);
[ssn ssy1 ssy2 ssx1 ssx2 ssyx] = corrSufficientStats(nytem, warped);
clear warped
ensureDirExists(outFile);
save(outFile);
fprintf('Wrote %s\n', outFile);
end
function [s0 s1 sNot0 sNot1 n0 n1 sig] = computeTfctStats(yte,warped)
% For TFCT
feat0 = warped(yte<0,:);
feat1 = warped(yte>0,:);
s0 = single(sum(feat0, 1));
s1 = single(sum(feat1, 1));
sNot0 = size(feat0,1) - s0;
sNot1 = size(feat1,1) - s1;
% For point-biserial correlation
n0 = size(feat0,1);
n1 = size(feat1,1);
sig = std(warped,[],1);
function matchedFiles = matchCleanToPcaFiles(pcaFiles, cleanFiles)
pcaTrunc = regexprep(pcaFiles, '_bps.*', '');
clnTrunc = regexprep(cleanFiles, '_bps.*', '');
matchedFiles = cell(size(pcaFiles));
for i = 1:length(pcaTrunc)
matches = find(strcmp(pcaTrunc{i}, clnTrunc));
if isempty(matches)
error('Could not find clean file for %s', pcaTrunc);
elseif length(matches) > 1
error('Found %d matches for %s', length(matches), pcaTrunc);
end
matchedFiles{i} = cleanFiles{matches};
end