Skip to content

Commit

Permalink
Hardening FIO interaction
Browse files Browse the repository at this point in the history
Fixes a problem where FIO does not terminate by scheduling a
second killall if we get a specific message back from FIO
stderr.

Introduces a new flavor for StorPerf that has a little more
memory as larger memory maps for duplicate blocks sometimes
caused out of memory killer to be invoked.

Change-Id: I06856561ad73fef582a81d4136a36a1bea47654a
JIRA: STORPERF-99
Signed-off-by: mbeierl <mark.beierl@dell.com>
  • Loading branch information
mbeierl committed Jan 26, 2017
1 parent 29cab6c commit 3d41a65
Show file tree
Hide file tree
Showing 11 changed files with 136 additions and 72 deletions.
20 changes: 20 additions & 0 deletions ci/create_glance_image.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
##############################################################################
# Copyright (c) 2017 EMC and others.
#
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0
# which accompanies this distribution, and is available at
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################

echo "Checking for Ubuntu 16.04 image in Glance"
IMAGE=`openstack image list | grep "Ubuntu 16.04 x86_64"`
if [ -z "$IMAGE" ]
then
wget -q https://cloud-images.ubuntu.com/releases/16.04/release/ubuntu-16.04-server-cloudimg-amd64-disk1.img
openstack image create "Ubuntu 16.04 x86_64" --disk-format qcow2 --public \
--container-format bare --file ubuntu-16.04-server-cloudimg-amd64-disk1.img
fi

openstack image show "Ubuntu 16.04 x86_64"
25 changes: 25 additions & 0 deletions ci/create_storperf_flavor.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash
##############################################################################
# Copyright (c) 2017 EMC and others.
#
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0
# which accompanies this distribution, and is available at
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################

echo "Checking for StorPerf flavor"

openstack flavor delete storperf

FLAVOUR=`openstack flavor list | grep "storperf"`
if [ -z "$FLAVOUR" ]
then
openstack flavor create storperf \
--id auto \
--ram 8192 \
--disk 4 \
--vcpus 2
fi

openstack flavor show storperf
64 changes: 26 additions & 38 deletions ci/daily.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@ then
sudo rm -rf $WORKSPACE/ci/job
fi

git clone --depth 1 https://gerrit.opnfv.org/gerrit/releng ci/job/releng
git clone --depth 1 https://gerrit.opnfv.org/gerrit/releng $WORKSPACE/ci/job/releng

virtualenv $WORKSPACE/ci/job/storperf_daily_venv
source $WORKSPACE/ci/job/storperf_daily_venv/bin/activate

pip install --upgrade setuptools
pip install functools32
pip install pytz
pip install osc_lib
pip install python-openstackclient
pip install python-heatclient
pip install --upgrade setuptools==33.1.1
pip install functools32==3.2.3.post2
pip install pytz==2016.10
pip install osc_lib==1.3.0
pip install python-openstackclient==3.7.0
pip install python-heatclient==1.7.0

# This is set by Jenkins, but if we are running manually, just use the
# current hostname.
Expand All @@ -41,45 +41,28 @@ export POD_NAME=$NODE_NAME

sudo find $WORKSPACE/ -name '*.db' -exec rm -fv {} \;

export INSTALLER=`$WORKSPACE/ci/detect_installer.sh`

$WORKSPACE/ci/generate-admin-rc.sh
$WORKSPACE/ci/generate-environment.sh

. $WORKSPACE/ci/job/environment.rc
for env in `cat $WORKSPACE/ci/job/admin.rc`
do
export $env
done

echo "Checking for an existing stack"
STACK_ID=`openstack stack list | grep StorPerfAgentGroup | awk '{print $2}'`
if [ ! -z $STACK_ID ]
then
openstack stack delete --yes --wait StorPerfAgentGroup
fi

echo Checking for Ubuntu 16.04 image in Glance
IMAGE=`openstack image list | grep "Ubuntu 16.04 x86_64"`
if [ -z $IMAGE ]
then
wget https://cloud-images.ubuntu.com/releases/16.04/release/ubuntu-16.04-server-cloudimg-amd64-disk1.img
openstack image create "Ubuntu 16.04 x86_64" --disk-format qcow2 --public \
--container-format bare --file ubuntu-16.04-server-cloudimg-amd64-disk1.img
fi
while read -r env
do
export "$env"
done < $WORKSPACE/ci/job/admin.rc

echo "TEST_DB_URL=http://testresults.opnfv.org/test/api/v1" >> $WORKSPACE/ci/job/admin.rc
echo "INSTALLER_TYPE=${INSTALLER}" >> $WORKSPACE/ci/job/admin.rc

$WORKSPACE/ci/delete_stack.sh
$WORKSPACE/ci/create_glance_image.sh
$WORKSPACE/ci/create_storperf_flavor.sh
$WORKSPACE/ci/launch_docker_container.sh
$WORKSPACE/ci/create_stack.sh $CINDER_NODES 10 "Ubuntu 16.04 x86_64" $NETWORK

echo "Waiting for StorPerf to become active"
while [ $(curl -X GET 'http://127.0.0.1:5000/api/v1.0/configurations' > /dev/null 2>&1;echo $?) -ne 0 ]
do
sleep 1
done

echo Creating 1:1 stack
$WORKSPACE/ci/create_stack.sh $CINDER_NODES 10 "Ubuntu 16.04 x86_64" $NETWORK
echo ==========================================================================
echo Starting warmup
echo ==========================================================================

export QUEUE_DEPTH=8
export BLOCK_SIZE=16384
Expand All @@ -96,9 +79,14 @@ do
| awk '/Status/ {print $2}' | sed 's/"//g'`
done

export QUEUE_DEPTH=1,2,8
export BLOCK_SIZE=2048,8192,16384

echo ==========================================================================
echo Starting full matrix run
echo ==========================================================================

export WORKLOAD=ws,wr,rs,rr,rw
export BLOCK_SIZE=2048,8192,16384
export QUEUE_DEPTH=1,2,8
export SCENARIO_NAME="${CINDER_BACKEND}_${WORKLOAD}"

JOB=`$WORKSPACE/ci/start_job.sh \
Expand Down
16 changes: 16 additions & 0 deletions ci/delete_stack.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
##############################################################################
# Copyright (c) 2017 EMC and others.
#
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Apache License, Version 2.0
# which accompanies this distribution, and is available at
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################

echo "Checking for an existing stack"
STACK_ID=`openstack stack list | grep StorPerfAgentGroup | awk '{print $2}'`
if [ ! -z $STACK_ID ]
then
openstack stack delete --yes --wait StorPerfAgentGroup
fi
3 changes: 2 additions & 1 deletion ci/generate-admin-rc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,5 @@ then
echo export OS_PROJECT_NAME=admin >> job/openstack.rc
fi

sed "s/export //" job/openstack.rc > job/admin.rc
sed "s/export //" job/openstack.rc > job/admin.rc
echo "INSTALLER_TYPE=${INSTALLER}" >> job/admin.rc
7 changes: 6 additions & 1 deletion ci/launch_docker_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,10 @@ docker run -d --env-file `pwd`/job/admin.rc \
-p 8000:8000 \
-v `pwd`/job/carbon:/opt/graphite/storage/whisper \
--name storperf opnfv/storperf
# -v `pwd`/../../storperf:/home/opnfv/repos/storperf \


echo "Waiting for StorPerf to become active"
while [ $(curl -X GET 'http://127.0.0.1:5000/api/v1.0/configurations' > /dev/null 2>&1;echo $?) -ne 0 ]
do
sleep 1
done
41 changes: 23 additions & 18 deletions storperf/fio/fio_invoker.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

import json
import logging
import subprocess
from threading import Thread
import paramiko

Expand Down Expand Up @@ -65,7 +64,7 @@ def stdout_handler(self, stdout):
"Event listener callback complete")
except Exception, e:
self.logger.error("Error parsing JSON: %s", e)
except ValueError:
except IOError:
pass # We might have read from the closed socket, ignore it

stdout.close()
Expand All @@ -76,6 +75,14 @@ def stderr_handler(self, stderr):
for line in iter(stderr.readline, b''):
self.logger.error("FIO Error: %s", line.rstrip())

# Sometime, FIO gets stuck and will give us this message:
# fio: job 'sequential_read' hasn't exited in 60 seconds,
# it appears to be stuck. Doing forceful exit of this job.
# A second killall of fio will release it stuck process.

if 'it appears to be stuck' in line:
self.terminate()

stderr.close()
self.logger.debug("Finished")

Expand Down Expand Up @@ -121,24 +128,22 @@ def execute(self, args=[]):

def terminate(self):
self.logger.debug("Terminating fio on " + self.remote_host)
cmd = ['ssh', '-o', 'StrictHostKeyChecking=no',
'-o', 'UserKnownHostsFile=/dev/null',
'-o', 'LogLevel=error',
'-i', 'storperf/resources/ssh/storperf_rsa',
'storperf@' + self.remote_host,
'sudo', 'killall', '-9', 'fio']

kill_process = subprocess.Popen(cmd,
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(self.remote_host, username='storperf',
key_filename='storperf/resources/ssh/storperf_rsa',
timeout=2)

for line in iter(kill_process.stdout.readline, b''):
self.logger.debug("FIO Termination: " + line)
command = "sudo killall fio"

kill_process.stdout.close()
self.logger.debug("Executing on %s: %s" % (self.remote_host, command))
(_, stdout, stderr) = ssh.exec_command(command)

for line in iter(kill_process.stderr.readline, b''):
self.logger.debug("FIO Termination: " + line)
for line in stdout.readlines():
self.logger.debug(line.strip())
for line in stderr.readlines():
self.logger.error(line.strip())

kill_process.stderr.close()
stdout.close()
stderr.close()
4 changes: 2 additions & 2 deletions storperf/resources/hot/agent-group.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ parameters:
- custom_constraint: neutron.network
flavor:
type: string
default: "m1.small"
default: "storperf"
agent_image:
type: string
default: 'StorPerf Ubuntu 14.04'
Expand All @@ -38,7 +38,7 @@ parameters:
resources:
slaves:
type: OS::Heat::ResourceGroup
depends_on: [storperf_subnet, storperf_network_router_interface,
depends_on: [storperf_subnet, storperf_network_router_interface,
storperf_open_security_group, storperf_key_pair]
properties:
count: {get_param: agent_count}
Expand Down
4 changes: 2 additions & 2 deletions storperf/resources/hot/storperf-agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ heat_template_version: 2013-05-23
parameters:
flavor:
type: string
default: m1.small
default: storperf
image:
type: string
default: 'Ubuntu 16.04'
Expand Down Expand Up @@ -96,4 +96,4 @@ resources:
outputs:
storperf_agent_ip:
description: The floating IP address of the agent on the public network
value: { get_attr: [ storperf_floating_ip, floating_ip_address ] }
value: { get_attr: [ storperf_floating_ip, floating_ip_address ] }
11 changes: 4 additions & 7 deletions storperf/utilities/data_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class DataHandler(object):

def __init__(self):
self.logger = logging.getLogger(__name__)
self.samples = 11
self.samples = 10

"""
"""
Expand Down Expand Up @@ -116,12 +116,9 @@ def _evaluate_prior_data(self, data_series):
self.logger.debug("Data series: %s" % data_series)
if len(data_series) == 0:
return False
earliest_timestamp = data_series[0][0]
latest_timestamp = data_series[-1][0]
duration = latest_timestamp - earliest_timestamp
if (duration < 60 * self.samples):
self.logger.debug("Only %s minutes of samples, ignoring" %
((duration / 60 + 1),))
number_of_samples = len(data_series)
if (number_of_samples < self.samples):
self.logger.debug("Only %s samples, ignoring" % number_of_samples)
return False

return SteadyState.steady_state(data_series)
Expand Down
13 changes: 10 additions & 3 deletions tests/utilities_tests/data_handler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def test_long_steady_sample(self):
series = [[4804559100, 205.345],
[4804559200, 201.59],
[4804559300, 205.76],
[4804559400, 205.76],
[4804559500, 205.76],
[4804559600, 205.76],
[4804559700, 205.76],
[4804560300, 219.37],
[4804560400, 219.28],
[4804560500, 217.75]]
Expand Down Expand Up @@ -199,15 +203,19 @@ def test_report_that_causes_termination(self,
series = [[4804559100, 205.345],
[4804559200, 201.59],
[4804559300, 205.76],
[4804559400, 205.76],
[4804559500, 205.76],
[4804559600, 205.76],
[4804559700, 205.76],
[4804560300, 219.37],
[4804560400, 219.28],
[4804560500, 217.75]]
mock_graphite_db.return_value = series
mock_time.return_value = 4804560500 + 10

expected_slope = 0.011830471529818998
expected_slope = 0.01266822319352225
expected_range = 17.78
expected_average = 211.51583333333335
expected_average = 209.2135

self.current_workload = ("%s.%s.queue-depth.%s.block-size.%s" %
("job_id",
Expand Down Expand Up @@ -240,4 +248,3 @@ def test_report_that_causes_termination(self,
self.assertEqual(True, self._terminated)

self.assertEqual(False, self.pushed)
self.assertEqual(True, self._terminated)

0 comments on commit 3d41a65

Please sign in to comment.