-
Notifications
You must be signed in to change notification settings - Fork 3
/
hllEkzhu.py
83 lines (64 loc) · 2.03 KB
/
hllEkzhu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# AxProf example for testing the HyperLogLog implementation in ekzhu/datasketch
# NOTICE:
# This example requries the datasketch library
# Please follow instructions for downloading the datasketch library in the
# 'Example' section of README.md in the root directory of this repository.
import sys
import time
import pickle
import subprocess
from numpy import sqrt
sys.path.append('../AxProf')
import AxProf
sys.path.append('./datasketch')
try:
from datasketch import hyperloglog
except ModuleNotFoundError:
print(
"""
Error: datasketch library not found!
Please follow instructions for downloading the datasketch library in the
'Example' section of README.md in the root directory of this repository.
""")
exit(-1)
configlist = {'k': [8, 10, 12, 14],
'datasize': range(10000, 110000, 10000)}
def input_params(config, inputNum):
return config['datasize'], 0, 1000000
spec = '''
Input list of real;
Output real;
abs real;
sqrt real;
TIME k*datasize;
SPACE 2^k;
ACC Probability over inputs[ abs(datasize-Output) < (datasize*1.04)/sqrt(2^k) ] > 0.65
'''
def runner(ifname, config):
h = hyperloglog.HyperLogLog(p=int(config['k']))
data = []
for line in open(ifname, "r"):
data.append(line[:-1])
i_start = time.time()
for d in data:
h.update(d.encode('utf8'))
i_end = time.time()
time_diff = i_end - i_start
output = {}
output['time'] = time_diff
outfile = "_AXPROF_MEMDUMP"
filehandler = open(outfile, "wb")
pickle.dump(h, filehandler)
query_str = "ls -l {} | cut -d' ' -f5".format(outfile)
result_test = subprocess.check_output(query_str, shell=True)
memory_used = int(result_test)
output['space'] = memory_used
output['acc'] = h.count()
return output
if __name__ == "__main__":
subprocess.run(['date'])
AxProf.checkProperties(configlist, 1, None,
AxProf.distinctIntegerGenerator,
input_params, runner, spec=spec)
subprocess.run(['date'])
subprocess.run(args=['rm', '-f', '_AXPROF_MEMDUMP'])