Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix suite info on daemonize #2044

Merged
merged 2 commits into from
Oct 31, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 70 additions & 14 deletions lib/cylc/daemonize.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@

import os
import sys
from cylc.suite_output import SuiteOutput
from time import sleep, time
from cylc.suite_logging import SuiteLog


SUITE_SCAN_INFO_TMPL = r"""
Expand All @@ -41,6 +42,23 @@
+ Port: %(port)s
+ Logs: %(logd)s/{log,out,err}""" + SUITE_SCAN_INFO_TMPL

_TIMEOUT = 300.0 # 5 minutes


def redirect(logd):
"""Redirect standard file descriptors

Note that simply reassigning the sys streams is not sufficient
if we import modules that write to stdin and stdout from C
code - evidently the subprocess module is in this category!
"""
sout = file(os.path.join(logd, SuiteLog.OUT), 'a+', 0) # 0 => unbuffered
serr = file(os.path.join(logd, SuiteLog.ERR), 'a+', 0)
dvnl = file(os.devnull, 'r')
os.dup2(sout.fileno(), sys.stdout.fileno())
os.dup2(serr.fileno(), sys.stderr.fileno())
os.dup2(dvnl.fileno(), sys.stdin.fileno())


def daemonize(server):
"""Turn a cylc scheduler into a Unix daemon.
Expand All @@ -52,13 +70,57 @@ def daemonize(server):
http://code.activestate.com/recipes/66012-fork-a-daemon-process-on-unix/

"""

sout = SuiteOutput(server.suite)

logd = SuiteLog.get_dir_for_suite(server.suite)
log_fname = os.path.join(logd, SuiteLog.LOG)
try:
old_log_mtime = os.stat(log_fname).st_mtime
except OSError:
old_log_mtime = None
# fork 1
try:
pid = os.fork()
if pid > 0:
# exit first parent
# Poll for suite log to be populated
suite_pid = None
suite_port = None
timeout = time() + _TIMEOUT
while time() <= timeout and (
suite_pid is None or suite_port is None):
sleep(0.1)
try:
log_stat = os.stat(log_fname)
if (log_stat.st_mtime == old_log_mtime or
log_stat.st_size == 0):
continue
# Line 1 of suite log should contain start up message, host
# name and port number. Format is:
# LOG-PREIFX Suite starting: server=HOST:PORT, pid=PID
# Otherwise, something has gone wrong, print the suite log
# and exit with an error.
log_line1 = open(log_fname).readline()
if server.START_MESSAGE_PREFIX in log_line1:
server_str, pid_str = log_line1.rsplit()[-2:]
suite_pid = pid_str.rsplit("=", 1)[-1]
suite_port = server_str.rsplit(":", 1)[-1]
else:
try:
sys.stderr.write(open(log_fname).read())
sys.exit(1)
except IOError:
sys.exit("Suite daemon exited")
except (IOError, OSError, ValueError):
pass
if suite_pid is None or suite_port is None:
sys.exit("Suite not started after %ds" % _TIMEOUT)
# Print suite information
sys.stdout.write(_INFO_TMPL % {
"suite": server.suite,
"host": server.host,
"port": suite_port,
"pid": suite_pid,
"logd": logd,
})
# exit parent 1
sys.exit(0)
except OSError, exc:
sys.stderr.write(
Expand All @@ -70,17 +132,11 @@ def daemonize(server):
os.setsid()
os.umask(0)

# do second fork
# fork 2
try:
pid = os.fork()
if pid > 0:
# exit from second parent, print eventual PID before
sys.stdout.write(_INFO_TMPL % {
"suite": server.suite,
"host": server.host,
"port": server.port,
"pid": pid,
"logd": os.path.dirname(sout.get_path())})
# exit parent 2
sys.exit(0)
except OSError, exc:
sys.stderr.write(
Expand All @@ -91,4 +147,4 @@ def daemonize(server):
os.umask(022)

# redirect output to the suite log files
sout.redirect()
redirect(logd)
11 changes: 7 additions & 4 deletions lib/cylc/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ class Scheduler(object):
SUITE_EVENT_HANDLER = 'suite-event-handler'
SUITE_EVENT_MAIL = 'suite-event-mail'

START_MESSAGE_PREFIX = 'Suite starting: '
START_MESSAGE_TMPL = (
START_MESSAGE_PREFIX + 'server=%(host)s:%(port)s pid=%(pid)s')

# Dependency negotation etc. will run after these commands
PROC_CMDS = (
'release_suite',
Expand Down Expand Up @@ -258,8 +262,7 @@ def start(self):
os.unlink(os.path.join(run_dir, "state.tar.gz"))
except OSError:
pass
ERR.warning(
"ERROR: cannot tar-gzip + remove old state/ directory")
ERR.error("cannot tar-gzip + remove old state/ directory")
else:
pri_dao = CylcSuiteDAO(pri_db_path)

Expand Down Expand Up @@ -395,7 +398,8 @@ def configure(self):
if self.gen_reference_log or self.reference_test_mode:
self.configure_reftest()

self.log.info('Suite starting on %s:%s' % (self.host, self.port))
self.log.info(self.START_MESSAGE_TMPL % {
'host': self.host, 'port': self.port, 'pid': os.getpid()})
# Note that the following lines must be present at the top of
# the suite log file for use in reference test runs:
self.log.info('Run mode: ' + self.run_mode)
Expand Down Expand Up @@ -2060,7 +2064,6 @@ def will_pause_at(self):

def command_trigger_tasks(self, items):
"""Trigger tasks."""
print "Trigger", items
return self.pool.trigger_tasks(items)

def command_dry_run_tasks(self, items):
Expand Down
69 changes: 0 additions & 69 deletions lib/cylc/suite_output.py

This file was deleted.

2 changes: 1 addition & 1 deletion tests/cylc-cat-log/00-local.t
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ cylc stop --max-polls=10 --interval=2 $SUITE_NAME 2>'/dev/null'
#-------------------------------------------------------------------------------
TEST_NAME=${TEST_NAME_BASE}-suite-log-log
cylc cat-log $SUITE_NAME >$TEST_NAME.out
grep_ok 'Suite starting on' $TEST_NAME.out
grep_ok 'Suite starting' $TEST_NAME.out
#-------------------------------------------------------------------------------
TEST_NAME=${TEST_NAME_BASE}-suite-log-out
cylc cat-log -o $SUITE_NAME >$TEST_NAME.out
Expand Down