Skip to content

Commit

Permalink
output error std dev
Browse files Browse the repository at this point in the history
  • Loading branch information
piskvorky committed Jan 11, 2014
1 parent 0f98ce9 commit f956158
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 8 deletions.
4 changes: 2 additions & 2 deletions run_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ run_combinations () {
}

# then create indexes for the various libraries & take accuracy measurements
ks="1 10 50 100 1000"
ks="1 10 100 1000"
OPENBLAS_NUM_THREADS=1 run_combinations "gensim" "exact" $ks
run_combinations "annoy" "10 50 100 500" $ks
run_combinations "annoy" "1 10 50 100 500" $ks
run_combinations "flann" "7 95 99" $ks
12 changes: 6 additions & 6 deletions shootout.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
TOP_N = 10 # how many similars to ask for
ACC = 'exact' # what accuracy are we aiming for
NUM_QUERIES = 100 # query with this many different documents, as a single experiment
REPEATS = 2 # run all queries this many times, take the best timing
REPEATS = 3 # run all queries this many times, take the best timing


FLANN_995 = {
Expand Down Expand Up @@ -159,7 +159,7 @@

ACC_SETTINGS = {
'flann': {'7': FLANN_7, '9': FLANN_9, '95': FLANN_95, '99': FLANN_99, '995': FLANN_995},
'annoy': {'10': 10, '50': 50, '100': 100, '500': 500},
'annoy': {'1': 1, '10': 10, '50': 50, '100': 100, '500': 500},
'lsh': {'low': {'k': 10, 'l': 10, 'w': float('inf')}, 'high': {'k': 10, 'l': 10, 'w': float('inf')}},
}

Expand Down Expand Up @@ -255,13 +255,13 @@ def get_accuracy(predicted_ids, queries, gensim_index, expecteds=None):
# if we got less than TOP_N results, assume zero similarity for the missing ids (LSH)
predicted_sims.extend([0.0] * (TOP_N - len(predicted_sims)))
diffs.extend(-numpy.array(predicted_sims) + expected_sims)
return correct / (TOP_N * len(queries)), 1.0 * sum(diffs) / len(diffs), max(diffs)
return correct / (TOP_N * len(queries)), numpy.mean(diffs), numpy.std(diffs), max(diffs)


def log_precision(method, index, queries, gensim_index, expecteds=None):
logger.info("computing accuracy of %s over %s queries at k=%s, acc=%s" % (method.__name__, NUM_QUERIES, TOP_N, ACC))
acc, avg_diff, max_diff = get_accuracy(method(index, queries), queries, gensim_index, expecteds)
logger.info("%s precision=%.3f, avg diff=%.3f, max diff=%.3f" % (method.__name__, acc, avg_diff, max_diff))
acc, avg_diff, std_diff, max_diff = get_accuracy(method(index, queries), queries, gensim_index, expecteds)
logger.info("%s precision=%.3f, avg diff=%.3f, std diff=%.5f, max diff=%.3f" % (method.__name__, acc, avg_diff, std_diff, max_diff))


def print_similar(title, index_gensim, id2title, title2id):
Expand Down Expand Up @@ -397,7 +397,7 @@ def print_similar(title, index_gensim, id2title, title2id):
logger.info("building sklearn index")
index_sklearn = NearestNeighbors(n_neighbors=TOP_N, algorithm='auto').fit(clipped)
logger.info("built sklearn index %s" % index_sklearn._fit_method)
# gensim.utils.pickle(index_sklearn, sim_prefix + '_sklearn') # 32GB RAM not enough to store the scikit-learn model...
gensim.utils.pickle(index_sklearn, sim_prefix + '_sklearn') # 32GB RAM not enough to store the scikit-learn model...
logger.info("finished sklearn index")

log_precision(sklearn_predictions, index_sklearn, queries, index_gensim)
Expand Down

0 comments on commit f956158

Please sign in to comment.