-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis.R
102 lines (84 loc) · 3.14 KB
/
analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
rm(list=ls())
d = read.csv('Lassiter-must-NALS-2016-data-anonymized.csv')
length(levels(factor(d$workerid))) # 570
head(d)
levels(d$language)
d = subset(d, !(language %in% c('Indonesian', 'Japanese', 'Chinese', 'mandarin', 'vietnamese', 'Korean', 'French', 'Russian', 'Spanish', 'VIETNAMESE','tagalog','Tamil','Arabic','AVWUCILTTODO2','spanish','Tagalog','urdu','Mandarin','Cantonese','Chinese')))
levels(factor(d$language))
length(d$workerid)
plot(density(log(d$rt))) # check for approximately log-normal RT distribution
table = xtabs(~ response + modal, data=d)
Ns = xtabs(~ modal, data=d)
props = table[1,]/Ns
props = sort(props)
#names(props)[2] = 'know not'
# sanity check
approx.upper = props + 1.96 * sqrt(props * (1-props) / Ns)
approx.lower = props - 1.96 * sqrt(props * (1-props) / Ns)
bootstrap = function(mod) {
num = 50000
dat = subset(d, modal==mod)
modal.N = length(dat$response)
sample.props = sapply(1:num, function(i) {
s = sample(dat$response, modal.N, replace=T)
return(length(which(s == 'Agree'))/modal.N)
})
return(quantile(sample.props, c(.025, .975)))
}
CI = sapply(names(props), FUN=bootstrap)
names(props)[2] = 'know not' # fix condition name mis-annotated in JavaScript
par(mfrow=c(1,1))
#png("res.png") ; par(mar=c(6, 4, 4, 2) + 0.1)
barplot(props,
main='',
ylim=c(0,1),
ylab='Proportion agreement', las=2)
arrows(x0=c(.7, 1.9, 3.1, 4.3, 5.5, 6.7, 7.9, 9.1, 10.3),
y0=CI[1,],
y1=CI[2,],
angle=90,
code=3, length=.15)
# dev.off()
# sanity check - compare computed bootstrap CIs to binomial CIs generated by normal approximation
barplot(props,
main='',
ylim=c(0,1),
ylab='Proportion agreement', las=2)
arrows(x0=c(.7, 1.9, 3.1, 4.3, 5.5, 6.7, 7.9, 9.1, 10.3),
y0=approx.lower,
y1=approx.upper,
angle=90,
code=3, length=.15)
#
# basic descriptive stats, using 2-tailed bootstrap t-tests
#
prop.agree = function(v) abs(length(which(v))/length(v))
bootstrap.test = function(m1,m2) {
d1 = subset(d, modal==m1)$response == 'Agree'
d2 = subset(d, modal==m2)$response == 'Agree'
diff = prop.agree(d1) - prop.agree(d2)
comb = c(d1,d2)
s = sapply(1:50000, function(i) {
sim.d1 = prop.agree(sample(comb, length(d1), replace=T))
sim.d2 = prop.agree(sample(comb, length(d2), replace=T))
return(abs(sim.d1 - sim.d2) >= abs(diff))
})
return(mean(s)) # p-value computed by resampling
}
bootstrap.test('might', 'possible')
# key questions
bootstrap.test('knows', 'certain not')
bootstrap.test('knows', 'must not')
bootstrap.test('must not', 'certain not')
bootstrap.test('might', 'possible')
bootstrap.test('possibly', 'possible')
bootstrap.test('certain not', 'certainly not')
bootstrap.test('must not', 'did not')
bootstrap.test('did not', 'did')
bootstrap.test('certain not', 'must not')
bootstrap.test('certain not', 'did not')
bootstrap.test('did not', 'must not')
m0 = with(d, glm(response ~ modal + rt, family=binomial))
m1 = with(d, glm(response ~ modal * rt, family=binomial))
anova(m0, m1)
# nested movel comparison yields no evidence of interaction between modal and rt