Merge pull request #9 from svanoort/aws-launch

Enhanced benchmarking options with CSV out and different request sizes
bltb · Feb 27, 2016 · f8acfc2 · f8acfc2
2 parents ca43193 + 3d433df
commit f8acfc2
Show file tree

Hide file tree

Showing 6 changed files with 331 additions and 17 deletions.
diff --git a/aws-results/clean-aws-to-aws-bench.csv b/aws-results/clean-aws-to-aws-bench.csv
@@ -0,0 +1,7 @@
+Response_size,Requests Time (cnxn reuse),pyCurl Time (no cnxn reuse),Requests Time (no cnxn reuse),pyCurl Time (cnxn reuse)
+4,10.57290005683899,12.881441116333008,20.69159984588623,7.185319900512695
+1024,11.267004013061523,12.047899007797241,21.88335609436035,6.80279803276062
+4096,11.35699200630188,12.302227020263672,21.16353702545166,7.973640203475952
+8192,11.554597854614258,12.434753179550171,21.347973108291626,8.629438877105713
+32768,12.226283073425293,17.47734785079956,24.330202102661133,8.525309801101685
+131072,19.669034957885742,37.54492807388306,44.73135995864868,19.619460105895996
diff --git a/aws-results/clean-aws-to-aws-run.txt b/aws-results/clean-aws-to-aws-run.txt
@@ -0,0 +1,97 @@
+TESTING AGAINST URL: http://54.209.242.190:5000 with delay None
+START testing requests performance with 10000 cycles and connection reuse ACTUALLY False
+Options: Default
+END testing result: 10.5729000568
+
+START testing pycurl performance with 10000 cycles and connection reuse False
+Options: Reuse handle, save response to new cStringIO buffer
+END testing result: 12.8814411163
+
+START testing requests performance with 10000 cycles and connection reuse ACTUALLY True
+Options: Default
+END testing result: 20.6915998459
+
+START testing pycurl performance with 10000 cycles and connection reuse True
+Options: Reuse handle, save response to new cStringIO buffer
+END testing result: 7.18531990051
+
+START testing requests performance with 10000 cycles and connection reuse ACTUALLY False
+Options: Default
+END testing result: 11.2670040131
+
+START testing pycurl performance with 10000 cycles and connection reuse False
+Options: Reuse handle, save response to new cStringIO buffer
+END testing result: 12.0478990078
+
+START testing requests performance with 10000 cycles and connection reuse ACTUALLY True
+Options: Default
+END testing result: 21.8833560944
+
+START testing pycurl performance with 10000 cycles and connection reuse True
+Options: Reuse handle, save response to new cStringIO buffer
+END testing result: 6.80279803276
+
+START testing requests performance with 10000 cycles and connection reuse ACTUALLY False
+Options: Default
+END testing result: 11.3569920063
+
+START testing pycurl performance with 10000 cycles and connection reuse False
+Options: Reuse handle, save response to new cStringIO buffer
+END testing result: 12.3022270203
+
+START testing requests performance with 10000 cycles and connection reuse ACTUALLY True
+Options: Default
+END testing result: 21.1635370255
+
+START testing pycurl performance with 10000 cycles and connection reuse True
+Options: Reuse handle, save response to new cStringIO buffer
+END testing result: 7.97364020348
+
+START testing requests performance with 10000 cycles and connection reuse ACTUALLY False
+Options: Default
+END testing result: 11.5545978546
+
+START testing pycurl performance with 10000 cycles and connection reuse False
+Options: Reuse handle, save response to new cStringIO buffer
+END testing result: 12.4347531796
+
+START testing requests performance with 10000 cycles and connection reuse ACTUALLY True
+Options: Default
+END testing result: 21.3479731083
+
+START testing pycurl performance with 10000 cycles and connection reuse True
+Options: Reuse handle, save response to new cStringIO buffer
+END testing result: 8.62943887711
+
+START testing requests performance with 10000 cycles and connection reuse ACTUALLY False
+Options: Default
+END testing result: 12.2262830734
+
+START testing pycurl performance with 10000 cycles and connection reuse False
+Options: Reuse handle, save response to new cStringIO buffer
+END testing result: 17.4773478508
+
+START testing requests performance with 10000 cycles and connection reuse ACTUALLY True
+Options: Default
+END testing result: 24.3302021027
+
+START testing pycurl performance with 10000 cycles and connection reuse True
+Options: Reuse handle, save response to new cStringIO buffer
+END testing result: 8.5253098011
+
+START testing requests performance with 10000 cycles and connection reuse ACTUALLY False
+Options: Default
+END testing result: 19.6690349579
+
+START testing pycurl performance with 10000 cycles and connection reuse False
+Options: Reuse handle, save response to new cStringIO buffer
+END testing result: 37.5449280739
+
+START testing requests performance with 10000 cycles and connection reuse ACTUALLY True
+Options: Default
+END testing result: 44.7313599586
+
+START testing pycurl performance with 10000 cycles and connection reuse True
+Options: Reuse handle, save response to new cStringIO buffer
+END testing result: 19.6194601059
+
diff --git a/aws-results/client-pip-freeze.txt b/aws-results/client-pip-freeze.txt
@@ -0,0 +1,41 @@
+aws-cfn-bootstrap==1.4
+awscli==1.10.1
+Babel==0.9.4
+backports.ssl-match-hostname==3.4.0.2
+boto==2.39.0
+botocore==1.3.23
+chardet==2.0.1
+cloud-init==0.7.6
+colorama==0.2.5
+configobj==4.7.2
+docutils==0.11
+ecdsa==0.11
+iniparse==0.3.1
+Jinja2==2.7.2
+jmespath==0.7.1
+jsonpatch==1.2
+jsonpointer==1.0
+kitchen==1.1.1
+lockfile==0.8
+MarkupSafe==0.11
+paramiko==1.15.1
+PIL==1.1.6
+ply==3.4
+pyasn1==0.1.7
+pycrypto==2.6.1
+pycurl==7.19.0
+pygpgme==0.3
+pyliblzma==0.5.3
+pystache==0.5.3
+python-daemon==1.5.2
+python-dateutil==2.1
+pyxattr==0.5.0
+PyYAML==3.10
+requests==1.2.3
+rsa==3.3
+simplejson==3.6.5
+six==1.8.0
+urlgrabber==3.9.1
+urllib3==1.8.2
+virtualenv==12.0.7
+yum-metadata-parser==1.1.4
diff --git a/aws-results/config.txt b/aws-results/config.txt
@@ -0,0 +1,108 @@
+Environment config:
+
+# Host 1
+running the docker container for gunicorn, plus top to monitor
+- Each worker runs ~5% of a CPU, main gunicorn will spike to 35% or so rarely
+- load average around 25% initially, ramping up as benchmark progresses
+
+# Host 2: running benchmark
+python benchmark.py --cycles 10000 --url http://54.209.242.190:5000 --output-file clean-aws-to-aws-bench.csv | tee clean-aws-to-aws-run.log
+
+# Testing bandwidth available to hosts/CPU load
+# on client
+sudo yum install -y httpd-tools
+ab -n 10000 -c 1 http://54.86.170.38:5000/length/131072
+
+This is ApacheBench, Version 2.3 <$Revision: 655654 $>
+Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
+Licensed to The Apache Software Foundation, http://www.apache.org/
+
+Benchmarking 54.86.170.38 (be patient)
+Completed 1000 requests
+Completed 2000 requests
+Completed 3000 requests
+Completed 4000 requests
+Completed 5000 requests
+Completed 6000 requests
+Completed 7000 requests
+Completed 8000 requests
+Completed 9000 requests
+Completed 10000 requests
+Finished 10000 requests
+
+
+Server Software:        meinheld/0.5.8
+Server Hostname:        54.86.170.38
+Server Port:            5000
+
+Document Path:          /length/131072
+Document Length:        131072 bytes
+
+Concurrency Level:      1
+Time taken for tests:   40.226 seconds
+Complete requests:      10000
+Failed requests:        0
+Write errors:           0
+Total transferred:      1312350000 bytes
+HTML transferred:       1310720000 bytes
+Requests per second:    248.59 [#/sec] (mean)
+Time per request:       4.023 [ms] (mean)
+Time per request:       4.023 [ms] (mean, across all concurrent requests)
+Transfer rate:          31859.52 [Kbytes/sec] received
+
+Connection Times (ms)
+              min  mean[+/-sd] median   max
+Connect:        0    0  10.0      0     997
+Processing:     3    4   0.3      4      10
+Waiting:        1    1   0.1      1       2
+Total:          3    4  10.0      4    1001
+
+Percentage of the requests served within a certain time (ms)
+  50%      4
+  66%      4
+  75%      4
+  80%      4
+  90%      4
+  95%      4
+  98%      5
+  99%      5
+ 100%   1001 (longest request)
+
+Setting ab concurrency to 32 reveals that transfer rate peaks at: 65038.82 [Kbytes/sec]
+Using private IPs it is similar (~68 MB/s in this case)
+
+Smaller requests (32kB) can do up to 100 MB/s, at 16 kB/req, ab reports 54 MB/s, at 8 kB, 28 MB/s
+
+# Library versions 
+benchmark server (docker on AWS) - Dockerfile is debian base image
+	Flask==0.10.1
+	gevent==1.0.2
+	greenlet==0.4.9
+	gunicorn==19.4.5
+	itsdangerous==0.24
+	Jinja2==2.8
+	MarkupSafe==0.23
+	meinheld==0.5.8
+	pycurl==7.43.0
+	requests==2.9.1
+	urllib3==1.14
+	virtualenv==13.1.2
+	Werkzeug==0.11.4
+	wheel==0.29.0
+
+
+benchmark client - see client-pip-freeze.txt
+
+# BOTH running this linux:
+NAME="Amazon Linux AMI"
+VERSION="2015.09"
+ID="amzn"
+ID_LIKE="rhel fedora"
+VERSION_ID="2015.09"
+PRETTY_NAME="Amazon Linux AMI 2015.09"
+ANSI_COLOR="0;33"
+CPE_NAME="cpe:/o:amazon:linux:2015.09:ga"
+HOME_URL="http://aws.amazon.com/amazon-linux-ami/"
+Amazon Linux AMI release 2015.09
+
+*ALL* packages in yum updated to latest.
diff --git a/batch-benchmark.sh b/batch-benchmark.sh
diff --git a/benchmark.py b/benchmark.py
@@ -35,9 +35,73 @@ def run_test(library, url, cycles, connection_reuse, options, setup_test, run_te
 
     print("END testing result: {0}".format(mytime))
     print(' ')
-    result = (library, connection_reuse, options, cycles, mytime)
+    result = [library, connection_reuse, options, cycles, mytime]
     return result
 
+def run_size_benchmarks(url='', cycles=10, delay=None, output_file=None, length_api_format='/length/$length', **kwargs):
+    """ Run variable-size benchmarks, where URL is the base url """
+	# This will generate approximately 10 GB of total traffic to host    
+	sizes = [4, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072]
+
+    REQUESTS_NOREUSE = ('requests', False, 'Default', 
+        'import requests', 
+        "r = requests.get('$url')")
+    REQUESTS_REUSE = ('requests', True, 'Default', 
+        "import requests; \
+            session = requests.Session(); \
+            r = requests.Request('GET', '$url').prepare()", 
+        "v = session.send(r)")
+    PYCURL_REUSE = ('pycurl', True, "Reuse handle, save response to new cStringIO buffer", 
+        "from pycurl import Curl; from cStringIO import StringIO; \
+            mycurl=Curl(); \
+            mycurl.setopt(mycurl.URL, '$url')",
+        "body = StringIO(); \
+            mycurl.setopt(mycurl.WRITEFUNCTION, body.write); \
+            mycurl.perform(); \
+            val = body.getvalue(); \
+            body.close()")
+    PYCURL_NOREUSE = ('pycurl', False, "Reuse handle, save response to new cStringIO buffer", 
+        "from pycurl import Curl; from cStringIO import StringIO; \
+            mycurl=Curl(); \
+            mycurl.setopt(mycurl.URL, '$url'); \
+            body = StringIO(); \
+            mycurl.setopt(mycurl.FORBID_REUSE, 1)",
+        "body = StringIO(); \
+            mycurl.setopt(mycurl.WRITEFUNCTION, body.write); \
+            mycurl.perform(); \
+            val = body.getvalue(); \
+            body.close()")
+
+    TEST_TYPES = [REQUESTS_NOREUSE, PYCURL_NOREUSE, REQUESTS_REUSE, PYCURL_REUSE]
+
+    all_results = list()
+
+    # Run tests 
+    for size in sizes:
+        temp_url = url + string.Template(length_api_format).substitute(length=size)
+        for test in TEST_TYPES:
+            result = run_test(test[0], temp_url, cycles, test[1], test[2], test[3], test[4], delay=delay)
+            del result[3]  # Don't need cycles
+            result.insert(0, size)
+            all_results.append(result)
+
+    # Transform tuples to size, time graphs for each response size
+    final_output = [[x, 0, 0, 0, 0] for x in sizes]
+    for i in xrange(0, len(sizes)):
+        final_output[i][1] = all_results[i*4][4]
+        final_output[i][2] = all_results[i*4+1][4]
+        final_output[i][3] = all_results[i*4+2][4]
+        final_output[i][4] = all_results[i*4+3][4]
+
+    headers = ('Response_size', 'Requests Time (no cnxn reuse)', 'pyCurl Time (no cnxn reuse)',
+               'Requests Time (cnxn reuse)', 'pyCurl Time (cnxn reuse)')
+    if output_file:
+        with open(output_file, 'wb') as csvfile:
+            outwriter = csv.writer(csvfile, dialect=csv.excel)
+            outwriter.writerow(headers)
+            for result in final_output:
+                outwriter.writerow(result)
+
 def run_all_benchmarks(url='', cycles=10, delay=None, output_file=None, **kwargs):
     results = list()
 
@@ -133,10 +197,20 @@ def run_all_benchmarks(url='', cycles=10, delay=None, output_file=None, **kwargs
     parser.add_argument('--url', metavar='u', type=str, default='http://localhost:5000/ping', help="URL to run requests against")
     parser.add_argument('--cycles', metavar='c', type=int, default=10000, help="Number of cycles to run")    
     parser.add_argument('--delay', metavar='d', type=float, help="Delay in seconds between requests")    
-    parser.add_argument('--output-file', metavar='o', type=str, help="Output file to write CSV results to")
+    parser.add_argument('--output-file', metavar='o', nargs='?', type=str, help="Output file to write CSV results to")
+    parser.add_argument('--benchmark-type', type=str, default="full", choices=('full','size'), help="Benchmark type to run: full=all libraries, 1 request, size=basic pycurl/requests tests with different request sizes")
+    parser.add_argument('--length-api-format', metavar='l', type=str, default="/length/$length", help="Template for API request that accepts response length parameter, for size benchmarks")
     args = vars(parser.parse_args())
     if args.get('url') is None:
         print("No URL supplied, you must supply a URL!")
         exit(1)
-    print('TESTING AGAINST URL: {0} with delay {1}'.format(args['url'],args['delay']))
-    run_all_benchmarks(**args)
+    print('TESTING AGAINST BASE URL: {0} with delay {1}'.format(args['url'],args['delay']))
+
+    if args['benchmark_type'] == 'full':
+        run_all_benchmarks(**args)
+    elif args['benchmark_type'] =='size':
+        run_size_benchmarks(**args)
+    else:
+        raise Exception("Illegal benchmark type: {0}".format(args['benchmark_type']))
+
+