Skip to content

Commit

Permalink
Merge pull request kubernetes#303 from prameshj/nodecache-test
Browse files Browse the repository at this point in the history
Add support to test node-local-cache dns server
  • Loading branch information
k8s-ci-robot authored Mar 12, 2019
2 parents 477a113 + 16c877a commit a95787c
Show file tree
Hide file tree
Showing 10 changed files with 196 additions and 19 deletions.
11 changes: 11 additions & 0 deletions dns/cluster/testsvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: v1
kind: Service
metadata:
name: test-svc
labels:
app: test-svc
spec:
ports:
- name: test
port: 8080
protocol: TCP
2 changes: 1 addition & 1 deletion dns/params/coredns/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ coredns_cache: [0, 10000]
# until requests are dropped. null means no limit.
max_qps: [500, 1000, 2000, 3000, null]
# File to take queries from. This is in dnsperf format.
query_file: ["nx-domain.txt", "outside.txt", "pod-ip.txt", "service.txt"]
query_file: ["nx-domain.txt", "outside.txt", "pod-ip.txt", "service.txt", "multiple-svc.txt"]
2 changes: 1 addition & 1 deletion dns/params/kubedns/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ dnsmasq_cache: [0, 10000]
# until requests are dropped. null means no limit.
max_qps: [500, 1000, 2000, 3000, null]
# File to take queries from. This is in dnsperf format.
query_file: ["nx-domain.txt", "outside.txt", "pod-ip.txt", "service.txt"]
query_file: ["nx-domain.txt", "outside.txt", "pod-ip.txt", "service.txt", "multiple-svc.txt"]
9 changes: 9 additions & 0 deletions dns/params/nodelocaldns/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Default parameters that explores performance space given limited CPU.

# Number of seconds to run with a particular setting.
run_length_seconds: [60]
# Maximum QPS for dnsperf. dnsperf is self-pacing and will ramp request rate
# until requests are dropped. null means no limit.
max_qps: [500, 1000, 2000, 3000, null]
# File to take queries from. This is in dnsperf format.
query_file: ["nx-domain.txt", "outside.txt", "pod-ip.txt", "service.txt", "multiple-svc.txt"]
9 changes: 9 additions & 0 deletions dns/params/nodelocaldns/small.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Default parameters that explores performance space given limited CPU.

# Number of seconds to run with a particular setting.
run_length_seconds: [60]
# Maximum QPS for dnsperf. dnsperf is self-pacing and will ramp request rate
# until requests are dropped. null means no limit.
max_qps: [500, 2000, null]
# File to take queries from. This is in dnsperf format.
query_file: ["nx-domain.txt", "outside.txt", "pod-ip.txt", "service.txt", "multiple-svc.txt"]
4 changes: 4 additions & 0 deletions dns/py/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ def __init__(self, name, val_type, regex):
re.compile(r'\s*Average Latency \(s\):.*max ([0-9.]+).*')),
Result('stddev_latency', float,
re.compile(r'\s*Latency StdDev \(s\):\s*([0-9.]+)')),
Result('max_perfserver_cpu', int, None),
Result('max_perfserver_memory', int, None),
Result('max_kubedns_cpu', int, None),
Result('max_kubedns_memory', int, None),
# Derived results
Result('latency_50_percentile', float, None),
Result('latency_95_percentile', float, None),
Expand Down
13 changes: 12 additions & 1 deletion dns/py/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def __init__(self, name, data_type, container_name, yaml_path):
self.container_name = container_name

def is_relevant(self, attributes):
return 'cluster-dns' not in attributes
return 'cluster-dns' not in attributes and 'node-local-dns' not in attributes

def set(self, inputs, value):
spec = _item_by_predicate(
Expand Down Expand Up @@ -276,6 +276,15 @@ def iterate(remaining, pv):

return cases

def set_param(self, param_name, values):
if param_name not in self.values:
return
self.values[param_name].append(values)

def get_param(self, param_name):
if param_name not in self.values:
return None
return self.values[param_name]

def _item_by_predicate(list_obj, predicate):
"""
Expand Down Expand Up @@ -329,3 +338,5 @@ def _set_or_remove(root, path, value):
# Given as an attribute to TestCases.generate, specifies that the test
# case is run with cluster-dns.
ATTRIBUTE_CLUSTER_DNS = 'cluster-dns'
# specifies that the test uses node-cache
ATTRIBUTE_NODELOCAL_DNS = 'node-local-dns'
9 changes: 9 additions & 0 deletions dns/py/run_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ def parse_args():
parser.add_argument(
'--params', type=str, required=True,
help='perf test parameters')
parser.add_argument(
'--run-large-queries', action='store_true',
help='runs large example query file from dnsperf repo')
parser.add_argument(
'--testsvc-yaml', type=str, default='cluster/testsvc.yaml',
help='yaml for creating test services to be queried by dnsperf')
parser.add_argument(
'--out-dir', type=str, default='out',
help='output directory')
Expand All @@ -74,6 +80,9 @@ def parse_args():
'--dns-ip', type=str, default='10.0.0.20',
help='IP to use for the DNS service. Note: --use-cluster-dns '
'implicitly sets the service-ip of kube-dns service')
parser.add_argument(
'--nodecache-ip', type=str, default='',
help='IP of existing node-cache service to use for testing')
parser.add_argument(
'-v', '--verbose', action='store_true',
help='show verbose logging')
Expand Down
136 changes: 120 additions & 16 deletions dns/py/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,24 @@
import time
import traceback
import yaml

import threading
import re
import Queue
from subprocess import PIPE

from data import Parser, ResultDb
from params import ATTRIBUTE_CLUSTER_DNS, Inputs, TestCases
from params import ATTRIBUTE_CLUSTER_DNS, ATTRIBUTE_NODELOCAL_DNS, Inputs, TestCases, QueryFile, RunLengthSeconds

_log = logging.getLogger(__name__)
_app_label = 'app=dns-perf-server'
_client_podname = 'dns-perf-client'
_test_svc_label = 'app=test-svc'
_dnsperf_qfile_name='queryfile-example-current'
_dnsperf_qfile_path='ftp://ftp.nominum.com/pub/nominum/dnsperf/data/queryfile-example-current.gz'
# Remove dns queries to this host since it is associated with behavior pattern
# of some malware
_remove_query_pattern=["setting3[.]yeahost[.]com"]
MAX_TEST_SVC = 20

def add_prefix(prefix, text):
return '\n'.join([prefix + l for l in text.split('\n')])
Expand All @@ -47,22 +56,33 @@ def __init__(self, args):
self.deployment_yaml = yaml.load(open(self.args.deployment_yaml, 'r'))
self.configmap_yaml = yaml.load(open(self.args.configmap_yaml, 'r')) if \
self.args.configmap_yaml else None
self.service_yaml = yaml.load(open(self.args.service_yaml, 'r'))
self.service_yaml = yaml.load(open(self.args.service_yaml, 'r')) if \
self.args.service_yaml else None
self.dnsperf_yaml = yaml.load(open(self.args.dnsperf_yaml, 'r'))
self.test_params = TestCases.load_from_file(args.params)
if self.args.run_large_queries:
self.test_params.set_param(QueryFile().name, _dnsperf_qfile_name)
self.args.testsvc_yaml = yaml.load(open(self.args.testsvc_yaml, 'r')) if \
self.args.testsvc_yaml else None


self.server_node = None
self.client_node = None

self.use_existing = False
self.db = ResultDb(self.args.db) if self.args.db else None

self.attributes = set()

if self.args.use_cluster_dns:
_log.info('Using cluster DNS for tests')
self.args.dns_ip = self._get_dns_ip()
self.args.dns_ip = self._get_dns_ip("kube-dns")
self.attributes.add(ATTRIBUTE_CLUSTER_DNS)
self.use_existing = True
elif self.args.nodecache_ip:
_log.info('Using existing node-local-dns for tests')
self.args.dns_ip = self.args.nodecache_ip
self.attributes.add(ATTRIBUTE_NODELOCAL_DNS)
self.use_existing = True

_log.info('DNS service IP is %s', args.dns_ip)

Expand All @@ -80,6 +100,7 @@ def go(self):
try:
self._ensure_out_dir(test_cases[0].run_id)
self._reset_client()
self._create_test_services()

last_deploy_yaml = None
last_config_yaml = None
Expand All @@ -92,7 +113,7 @@ def go(self):
inputs.deployment_yaml['spec']['template']['spec']['nodeName'] = \
self.server_node

if not self.args.use_cluster_dns and (
if not self.use_existing and (
yaml.dump(inputs.deployment_yaml) !=
yaml.dump(last_deploy_yaml) or
yaml.dump(inputs.configmap_yaml) !=
Expand All @@ -102,7 +123,7 @@ def go(self):
self._create(inputs.deployment_yaml)
self._create(self.service_yaml)
if self.configmap_yaml is not None:
self._create(inputs.configmap_yaml)
self._create(self.configmap_yaml)
self._wait_for_status(True)

self._run_perf(test_case, inputs)
Expand Down Expand Up @@ -152,13 +173,59 @@ def _create(self, yaml_obj):
raise Exception('create failed')
_log.info('Create %s/%s ok', yaml_obj['kind'], yaml_obj['metadata']['name'])

def _run_top(self, output_q):
kubedns_top_args = ['-l', 'k8s-app=kube-dns', '-n', 'kube-system']
if self.args.nodecache_ip:
perfserver_top_args = ['-l', 'k8s-app=node-local-dns', '-n', 'kube-system']
else:
perfserver_top_args = ['-l', _app_label]
run_time = int(self.test_params.get_param(RunLengthSeconds().name)[0])
t_end = time.time() + run_time
while time.time() < t_end:
code, perfout, err = self._kubectl(*([None, 'top', 'pod'] + perfserver_top_args))
code, kubeout, err = self._kubectl(*([None, 'top', 'pod'] + kubedns_top_args))
# Output is of the form:
# NAME CPU(cores) MEMORY(bytes)
# kube-dns-686548bc64-4q7wg 2m 31Mi
pcpu = re.findall(' \d+m ', perfout)
pmem = re.findall(' \d+Mi ', perfout)
kcpu = re.findall(' \d+m ', kubeout)
kmem = re.findall(' \d+Mi ', kubeout)
max_perfserver_cpu = 0
max_perfserver_mem = 0
max_kubedns_cpu = 0
max_kubedns_mem = 0
for c in pcpu:
val = int(re.findall('\d+', c)[0])
if val > max_perfserver_cpu:
max_perfserver_cpu = val
for m in pmem:
val = int(re.findall('\d+', m)[0])
if val > max_perfserver_mem:
max_perfserver_mem = val
for c in kcpu:
val = int(re.findall('\d+', c)[0])
if val > max_kubedns_cpu:
max_kubedns_cpu = val
for m in kmem:
val = int(re.findall('\d+', m)[0])
if val > max_kubedns_mem:
max_kubedns_mem = val
time.sleep(2)
output_q.put(max_perfserver_cpu)
output_q.put(max_perfserver_mem)
output_q.put(max_kubedns_cpu)
output_q.put(max_kubedns_mem)

def _run_perf(self, test_case, inputs):
_log.info('Running test case: %s', test_case)

output_file = '%s/run-%s/result-%s.out' % \
(self.args.out_dir, test_case.run_id, test_case.run_subid)
_log.info('Writing to output file %s', output_file)

res_usage = Queue.Queue()
dt = threading.Thread(target=self._run_top,args=[res_usage])
dt.start()
header = '''### run_id {run_id}:{run_subid}
### date {now}
### settings {test_case}
Expand All @@ -178,6 +245,8 @@ def _run_perf(self, test_case, inputs):
if code != 0:
raise Exception('error running dnsperf')

dt.join()

with open(output_file, 'w') as fh:
results = {}
results['params'] = test_case.to_yaml()
Expand All @@ -197,6 +266,10 @@ def _run_perf(self, test_case, inputs):

for key, value in parser.results.items():
results['data'][key] = value
results['data']['max_perfserver_cpu'] = res_usage.get()
results['data']['max_perfserver_memory'] = res_usage.get()
results['data']['max_kubedns_cpu'] = res_usage.get()
results['data']['max_kubedns_memory'] = res_usage.get()
results['data']['histogram'] = parser.histogram
except Exception, exc:
_log.error('Error parsing results: %s', exc)
Expand All @@ -208,6 +281,17 @@ def _run_perf(self, test_case, inputs):
if self.db is not None and results['data']['ok']:
self.db.put(results)

def _create_test_services(self):
if not self.args.testsvc_yaml:
_log.info("Not creating test services since no yaml was provided")
return
# delete existing services if any
self._kubectl(None, 'delete', 'services', '-l', _test_svc_label)

for index in range(1,MAX_TEST_SVC + 1):
self.args.testsvc_yaml['metadata']['name'] = "test-svc" + str(index)
self._create(self.args.testsvc_yaml)

def _select_nodes(self):
code, out, _ = self._kubectl(None, 'get', 'nodes', '-o', 'yaml')
if code != 0:
Expand All @@ -231,7 +315,7 @@ def _select_nodes(self):

_log.info('Client node is %s', self.client_node)

if self.args.use_cluster_dns:
if self.use_existing:
return

if self.args.server_node:
Expand All @@ -243,17 +327,17 @@ def _select_nodes(self):
self.server_node = nodes[0]

_log.info('Server node is %s', self.server_node)
def _get_dns_ip(self):

def _get_dns_ip(self, svcname):
code, out, _ = self._kubectl(None, 'get', 'svc', '-o', 'yaml',
'kube-dns', '-nkube-system')
svcname, '-nkube-system')
if code != 0:
raise Exception('error gettings cluster dns ip: %d', code)
raise Exception('error gettings dns ip for service %s: %d' %(svcname, code))

try:
return yaml.load(out)['spec']['clusterIP']
except:
raise Exception('error parsing kube dns service, could not get dns ip')
raise Exception('error parsing %s service, could not get dns ip' %(svcname))

def _teardown(self):
_log.info('Starting server teardown')
Expand All @@ -266,11 +350,17 @@ def _teardown(self):

_log.info('Server teardown ok')

self._kubectl(None, 'delete', 'services', '-l', _test_svc_label)
if self.args.run_large_queries:
try:
subprocess.check_call(['rm', self.args.query_dir +_dnsperf_qfile_name])
except subprocess.CalledProcessError:
_log.info("Failed to delete query file")

def _reset_client(self):
self._teardown_client()

self.dnsperf_yaml['spec']['nodeName'] = self.client_node

self._create(self.dnsperf_yaml)
while True:
code, _, _ = self._kubectl(None, 'get', 'pod', _client_podname)
Expand All @@ -290,6 +380,20 @@ def _reset_client(self):
self._copy_query_files()

def _copy_query_files(self):
if self.args.run_large_queries:
try:
_log.info('Downloading large query file')
subprocess.check_call(['wget', _dnsperf_qfile_path])
subprocess.check_call(['gunzip', _dnsperf_qfile_path.split('/')[-1]])
_log.info('Removing hostnames matching specified patterns')
for pattern in _remove_query_pattern:
subprocess.check_call(['sed', '-i', '-e', '/%s/d' %(pattern), _dnsperf_qfile_name])
subprocess.check_call(['mv', _dnsperf_qfile_name, self.args.query_dir])

except subprocess.CalledProcessError:
_log.info('Exception caught when downloading query files %s',
traceback.format_exc())

_log.info('Copying query files to client')
tarfile_contents = subprocess.check_output(
['tar', '-czf', '-', self.args.query_dir])
Expand Down
20 changes: 20 additions & 0 deletions dns/queries/multiple-svc.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
svc1.dnstest.svc.cluster.local A
svc2.dnstest.svc.cluster.local A
svc3.dnstest.svc.cluster.local A
svc4.dnstest.svc.cluster.local A
svc5.dnstest.svc.cluster.local A
svc6.dnstest.svc.cluster.local A
svc7.dnstest.svc.cluster.local A
svc8.dnstest.svc.cluster.local A
svc9.dnstest.svc.cluster.local A
svc10.dnstest.svc.cluster.local A
svc11.dnstest.svc.cluster.local A
svc12.dnstest.svc.cluster.local A
svc13.dnstest.svc.cluster.local A
svc14.dnstest.svc.cluster.local A
svc15.dnstest.svc.cluster.local A
svc16.dnstest.svc.cluster.local A
svc17.dnstest.svc.cluster.local A
svc18.dnstest.svc.cluster.local A
svc19.dnstest.svc.cluster.local A
svc20.dnstest.svc.cluster.local A

0 comments on commit a95787c

Please sign in to comment.