Skip to content

Commit

Permalink
Merge pull request #2 from divyegala/python-ann-bench-use-gbench
Browse files Browse the repository at this point in the history
  • Loading branch information
cjnolet authored Aug 26, 2023
2 parents 617c60f + 902f9f4 commit 9b82f85
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 186 deletions.
80 changes: 0 additions & 80 deletions bench/ann/data_export.py

This file was deleted.

44 changes: 29 additions & 15 deletions bench/ann/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,25 +192,38 @@ def inv_fun(x):
plt.close()


def load_all_results(result_filepath):
def load_all_results(dataset_path):
results = dict()
with open(result_filepath, 'r') as f:
for line in f.readlines()[1:]:
split_lines = line.split(',')
algo_name = split_lines[0].split('.')[0]
if algo_name not in results:
results[algo_name] = []
results[algo_name].append([algo_name, float(split_lines[1]),
float(split_lines[2])])
results_path = os.path.join(dataset_path, "result", "search")
for result_filepath in os.listdir(results_path):
with open(os.path.join(results_path, result_filepath), 'r') as f:
lines = f.readlines()
idx = 0
for pos, line in enumerate(lines):
if "QPS" in line:
idx = pos
break

for line in lines[idx+1:]:
split_lines = line.split(',')
algo_name = split_lines[0].split('.')[0].strip("\"")
if algo_name not in results:
results[algo_name] = []
results[algo_name].append([algo_name, float(split_lines[12]),
float(split_lines[10])])
return results


def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--result-csv", help="Path to CSV Results", required=True)
parser.add_argument("--output", help="Path to the PNG output file",
default=f"{os.getcwd()}/out.png")
parser.add_argument("--dataset", help="dataset to download",
default="glove-100-inner")
parser.add_argument("--dataset-path", help="path to dataset folder",
default=os.path.join(os.getenv("RAFT_HOME"),
"bench", "ann", "data"))
parser.add_argument("--output-filename",
default="plot.png")
parser.add_argument(
"--x-scale",
help="Scale to use when drawing the X-axis. \
Expand All @@ -228,12 +241,13 @@ def main():
)
args = parser.parse_args()

print(f"writing output to {args.output}")
output_filepath = os.path.join(args.dataset_path, args.dataset, args.output_filename)
print(f"writing output to {output_filepath}")

results = load_all_results(args.result_csv)
results = load_all_results(os.path.join(args.dataset_path, args.dataset))
linestyles = create_linestyles(sorted(results.keys()))

create_plot(results, args.raw, args.x_scale, args.y_scale, args.output, linestyles)
create_plot(results, args.raw, args.x_scale, args.y_scale, output_filepath, linestyles)


if __name__ == "__main__":
Expand Down
97 changes: 53 additions & 44 deletions bench/ann/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,49 +41,62 @@ def find_executable(algos_conf, algo):
executable)
build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable)
if os.path.exists(conda_path):
return (executable, conda_path)
return (executable, conda_path, algo)
elif os.path.exists(build_path):
return (executable, build_path)
return (executable, build_path, algo)
else:
raise FileNotFoundError(executable)


def run_build_and_search(conf_filename, conf_file, executables_to_run,
force, conf_filedir, build, search, k, batch_size):
for executable, ann_executable_path in executables_to_run.keys():
def run_build_and_search(conf_file, conf_filename, conf_filedir,
executables_to_run, dataset_path, force,
build, search, k, batch_size):
for executable, ann_executable_path, algo in executables_to_run.keys():
# Need to write temporary configuration
temp_conf_filename = f"temporary_executable_{conf_filename}"
temp_conf_filename = f"temporary_{conf_filename}"
temp_conf_filepath = os.path.join(conf_filedir, temp_conf_filename)
with open(temp_conf_filepath, "w") as f:
temp_conf = dict()
temp_conf["dataset"] = conf_file["dataset"]
temp_conf["search_basic_param"] = conf_file["search_basic_param"]
temp_conf["index"] = executables_to_run[(executable,
ann_executable_path)]["index"]
temp_conf["index"] = executables_to_run[(executable,
ann_executable_path,
algo)]["index"]
json.dump(temp_conf, f)

legacy_result_folder = os.path.join(dataset_path, conf_file['dataset']['name'], 'result')
os.makedirs(legacy_result_folder, exist_ok=True)
if build:
build_folder = os.path.join(legacy_result_folder, "build")
os.makedirs(build_folder, exist_ok=True)
cmd = [ann_executable_path,
"--build",
"--data_prefix="+dataset_path,
"--benchmark_out_format=csv",
f"--benchmark_out={os.path.join(build_folder, f'{algo}.csv')}"]
if force:
p = subprocess.Popen([ann_executable_path, "--build", "--overwrite",
temp_conf_filepath])
p.wait()
else:
p = subprocess.Popen([ann_executable_path, "--build",
temp_conf_filepath])
p.wait()
cmd = cmd + ["--overwrite"]
cmd = cmd + [temp_conf_filepath]
print(cmd)
p = subprocess.Popen(cmd)
p.wait()

if search:
legacy_result_folder = "result/" + temp_conf["dataset"]["name"]
os.makedirs(legacy_result_folder, exist_ok=True)
p = subprocess.Popen([
ann_executable_path,
"--search",
"--benchmark_counters_tabular",
"--benchmark_out_format=json",
"--override_kv=k:%s" % k,
"--override_kv=n_queries:%s" % batch_size,
f"--benchmark_out={legacy_result_folder}/{executable}.json",
temp_conf_filepath])
search_folder = os.path.join(legacy_result_folder, "search")
os.makedirs(search_folder, exist_ok=True)
cmd = [ann_executable_path,
"--search",
"--data_prefix="+dataset_path,
"--benchmark_counters_tabular",
"--override_kv=k:%s" % k,
"--override_kv=n_queries:%s" % batch_size,
"--benchmark_out_format=csv",
f"--benchmark_out={os.path.join(search_folder, f'{algo}.csv')}"]
if force:
cmd = cmd + ["--overwrite"]
cmd = cmd + [temp_conf_filepath]
print(cmd)
p = subprocess.Popen(cmd)
p.wait()

os.remove(temp_conf_filepath)
Expand Down Expand Up @@ -157,28 +170,24 @@ def main():
# Read configuration file associated to dataset
if args.configuration:
conf_filepath = args.configuration
elif args.dataset:
conf_filepath = \
os.path.join(scripts_path, "conf", f"{args.dataset}.json")
else:
conf_filepath = os.path.join(scripts_path, "conf", f"{args.dataset}.json")
raise ValueError("One of parameters `configuration` or \
`dataset` need to be provided")
conf_filename = conf_filepath.split("/")[-1]
conf_filedir = "/".join(conf_filepath.split("/")[:-1])
dataset_name = conf_filename.replace(".json", "")
dataset_path = os.path.realpath(os.path.join(args.dataset_path, dataset_name))
dataset_path = args.dataset_path
if not os.path.exists(conf_filepath):
raise FileNotFoundError(conf_filename)
if not os.path.exists(os.path.join(args.dataset_path, dataset_name)):
raise FileNotFoundError(os.path.join(args.dataset_path, dataset_name))

with open(conf_filepath, "r") as f:
conf_file = json.load(f)

# Replace base, query to dataset-path
conf_file["dataset"]["base_file"] = os.path.join(dataset_path, "base.fbin")
conf_file["dataset"]["query_file"] = os.path.join(dataset_path, "query.fbin")
conf_file["dataset"]["groundtruth_neighbors_file"] = os.path.join(dataset_path, "groundtruth.neighbors.ibin")
# Ensure base and query files exist for dataset
if not os.path.exists(conf_file["dataset"]["base_file"]):
raise FileNotFoundError(conf_file["dataset"]["base_file"])
if not os.path.exists(conf_file["dataset"]["query_file"]):
raise FileNotFoundError(conf_file["dataset"]["query_file"])

executables_to_run = dict()
# At least one named index should exist in config file
if args.indices:
Expand Down Expand Up @@ -218,16 +227,16 @@ def main():
executables_to_run[executable_path] = {"index": []}
executables_to_run[executable_path]["index"].append(index)

# Replace build, search to dataset path
# Replace index to dataset path
for executable_path in executables_to_run:
for pos, index in enumerate(executables_to_run[executable_path]["index"]):
index["file"] = os.path.join(dataset_path, "index", index["name"])
index["search_result_file"] = \
os.path.join(dataset_path, "result", index["name"])
index["file"] = os.path.join(dataset_path, dataset_name, "index", index["name"])
executables_to_run[executable_path]["index"][pos] = index

run_build_and_search(conf_filename, conf_file, executables_to_run,
args.force, conf_filedir, build, search, k, batch_size)
run_build_and_search(conf_file, conf_filename, conf_filedir,
executables_to_run, dataset_path,
args.force, build, search,
k, batch_size)


if __name__ == "__main__":
Expand Down
13 changes: 11 additions & 2 deletions cpp/bench/ann/src/common/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,17 @@ auto load_lib(const std::string& algo) -> void*
auto found = libs.find(algo);

if (found != libs.end()) { return found->second.handle; }
auto lib_name = "lib" + algo + "_ann_bench.so";
return libs.emplace(algo, lib_name).first->second.handle;
auto lib_name = "lib" + algo + "_ann_bench.so";
std::string lib_path = "";
if (std::getenv("CONDA_PREFIX") != nullptr) {
auto conda_path = std::string(std::getenv("CONDA_PREFIX")) + "/bin" + "/ann/";
if (std::filesystem::exists(conda_path + "ANN_BENCH")) { lib_path = conda_path; }
}
if (std::getenv("RAFT_HOME") != nullptr) {
auto build_path = std::string(std::getenv("RAFT_HOME")) + "/cpp" + "/build/";
if (std::filesystem::exists(build_path + "ANN_BENCH")) { lib_path = build_path; }
}
return libs.emplace(algo, lib_path + lib_name).first->second.handle;
}

auto get_fun_name(void* addr) -> std::string
Expand Down
Loading

0 comments on commit 9b82f85

Please sign in to comment.