Add retry on submit for 502 http error
authorJonathan Rajotte <jonathan.rajotte-julien@efficios.com>
Wed, 22 May 2019 15:06:45 +0000 (11:06 -0400)
committerJonathan Rajotte <jonathan.rajotte-julien@efficios.com>
Wed, 22 May 2019 15:06:45 +0000 (11:06 -0400)
The root cause of 5022 is still not known on submit. Retry 10 times with
sleep of 5 seconds between attempts.

Signed-off-by: Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
scripts/system-tests/lava2-submit.py

index 1001256386560cb5a33218e0f817c72197171cfb..75b5a379614893fe78fd92877b236fa6b56638d1 100644 (file)
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 # Copyright (C) 2016 - Francis Deslauriers <francis.deslauriers@efficios.com>
 #
 # This program is free software: you can redistribute it and/or modify
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import argparse
-import base64
 import json
 import os
 import random
 import sys
 import time
-import yaml
 import xmlrpc.client
-import pprint
 from urllib.parse import urljoin
 from urllib.request import urlretrieve
-
-from jinja2 import Environment, FileSystemLoader, meta
+import yaml
+from jinja2 import Environment, FileSystemLoader
 
 USERNAME = 'lava-jenkins'
 HOSTNAME = 'lava-master-02.internal.efficios.com'
 OBJSTORE_URL = "https://obj.internal.efficios.com/lava/results/"
 
 class TestType():
-    baremetal_benchmarks=1
-    baremetal_tests=2
-    kvm_tests=3
-    kvm_fuzzing_tests=4
+    """ Enum like for test type """
+    baremetal_benchmarks = 1
+    baremetal_tests = 2
+    kvm_tests = 3
+    kvm_fuzzing_tests = 4
     values = {
         'baremetal-benchmarks' : baremetal_benchmarks,
         'baremetal-tests' : baremetal_tests,
@@ -46,6 +44,7 @@ class TestType():
     }
 
 class DeviceType():
+    """ Enum like for device type """
     x86 = 'x86'
     kvm = 'qemu'
     values = {
@@ -57,21 +56,23 @@ def get_job_bundle_content(server, job):
     try:
         bundle_sha = server.scheduler.job_status(str(job))['bundle_sha1']
         bundle = server.dashboard.get(bundle_sha)
-    except xmlrpc.client.Fault as f:
-        print('Error while fetching results bundle', f.faultString)
-        raise f
+    except xmlrpc.client.Fault as error:
+        print('Error while fetching results bundle', error.faultString)
+        raise error
 
     return json.loads(bundle['content'])
 
-# Parse the results bundle to see the run-tests testcase
-# of the lttng-kernel-tests passed successfully
 def check_job_all_test_cases_state_count(server, job):
+    """
+    Parse the results bundle to see the run-tests testcase
+    of the lttng-kernel-tests passed successfully
+    """
     print("Testcase result:")
     content = server.results.get_testjob_results_yaml(str(job))
     testcases = yaml.load(content)
 
-    passed_tests=0
-    failed_tests=0
+    passed_tests = 0
+    failed_tests = 0
     for testcase in testcases:
         if testcase['result'] != 'pass':
             print("\tFAILED {}\n\t\t See http://{}{}".format(
@@ -79,27 +80,31 @@ def check_job_all_test_cases_state_count(server, job):
                 HOSTNAME,
                 testcase['url']
             ))
-            failed_tests+=1
+            failed_tests += 1
         else:
-            passed_tests+=1
+            passed_tests += 1
     return (passed_tests, failed_tests)
 
-# Get the benchmark results from the objstore
-# save them as CSV files localy
 def fetch_benchmark_results(build_id):
+    """
+    Get the benchmark results from the objstore
+    save them as CSV files localy
+    """
     testcases = ['processed_results_close.csv',
-            'processed_results_ioctl.csv',
-            'processed_results_open_efault.csv',
-            'processed_results_open_enoent.csv',
-            'processed_results_dup_close.csv',
-            'processed_results_raw_syscall_getpid.csv',
-            'processed_results_lttng_test_filter.csv']
+                 'processed_results_ioctl.csv',
+                 'processed_results_open_efault.csv',
+                 'processed_results_open_enoent.csv',
+                 'processed_results_dup_close.csv',
+                 'processed_results_raw_syscall_getpid.csv',
+                 'processed_results_lttng_test_filter.csv']
     for testcase in testcases:
         url = urljoin(OBJSTORE_URL, "{:s}/{:s}".format(build_id, testcase))
         urlretrieve(url, testcase)
 
-# Parse the attachment of the testcase to fetch the stdout of the test suite
 def print_test_output(server, job):
+    """
+    Parse the attachment of the testcase to fetch the stdout of the test suite
+    """
     job_finished, log = server.scheduler.jobs.logs(str(job))
     logs = yaml.load(log.data.decode('ascii'))
     print_line = False
@@ -116,7 +121,10 @@ def print_test_output(server, job):
         if print_line:
             print("{} {}".format(line['dt'], line['msg']))
 
-def get_vlttng_cmd(device, lttng_tools_commit, lttng_ust_commit=None):
+def get_vlttng_cmd(lttng_tools_commit, lttng_ust_commit=None):
+    """
+    Return vlttng cmd to be used in the job template for setup.
+    """
 
     vlttng_cmd = 'vlttng --jobs=$(nproc) --profile urcu-master' \
                     ' --override projects.babeltrace.build-env.PYTHON=python3' \
@@ -163,17 +171,15 @@ def main():
     if not args.debug:
         try:
             lava_api_key = os.environ['LAVA2_JENKINS_TOKEN']
-        except Exception as e:
-            print('LAVA2_JENKINS_TOKEN not found in the environment variable. Exiting...', e )
+        except Exception as error:
+            print('LAVA2_JENKINS_TOKEN not found in the environment variable. Exiting...',
+                  error)
             return -1
 
     jinja_loader = FileSystemLoader(os.path.dirname(os.path.realpath(__file__)))
     jinja_env = Environment(loader=jinja_loader, trim_blocks=True,
-            lstrip_blocks= True)
+                            lstrip_blocks=True)
     jinja_template = jinja_env.get_template('template_lava_job.jinja2')
-    template_source = jinja_env.loader.get_source(jinja_env, 'template_lava_job.jinja2')
-    parsed_content = jinja_env.parse(template_source)
-    undef = meta.find_undeclared_variables(parsed_content)
 
     test_type = TestType.values[args.type]
 
@@ -184,7 +190,7 @@ def main():
 
     vlttng_path = '/tmp/virtenv'
 
-    vlttng_cmd = get_vlttng_cmd(device_type, args.tools_commit, args.ust_commit)
+    vlttng_cmd = get_vlttng_cmd(args.tools_commit, args.ust_commit)
 
     context = dict()
     context['DeviceType'] = DeviceType
@@ -216,7 +222,16 @@ def main():
 
     server = xmlrpc.client.ServerProxy('http://%s:%s@%s/RPC2' % (USERNAME, lava_api_key, HOSTNAME))
 
-    jobid = server.scheduler.submit_job(render)
+    for attempt in range(10):
+        try:
+            jobid = server.scheduler.submit_job(render)
+        except xmlrpc.client.ProtocolError as error:
+            print('Protocol error on submit, sleeping and retrying. Attempt #{}'
+                  .format(attempt))
+            time.sleep(5)
+            continue
+        else:
+            break
 
     print('Lava jobid:{}'.format(jobid))
     print('Lava job URL: http://lava-master-02.internal.efficios.com/scheduler/job/{}'.format(jobid))
@@ -224,15 +239,16 @@ def main():
     #Check the status of the job every 30 seconds
     jobstatus = server.scheduler.job_state(jobid)['job_state']
     running = False
-    while jobstatus in ['Submitted','Scheduling','Scheduled','Running']:
+    while jobstatus in ['Submitted', 'Scheduling', 'Scheduled', 'Running']:
         if not running and jobstatus == 'Running':
             print('Job started running')
             running = True
         time.sleep(30)
         try:
             jobstatus = server.scheduler.job_state(jobid)['job_state']
-        except xmlrpc.client.ProtocolError as e:
-            print('Protocol error, retring')
+        except xmlrpc.client.ProtocolError as error:
+            print('Protocol error, retrying')
+            continue
     print('Job ended with {} status.'.format(jobstatus))
 
     if jobstatus != 'Finished':
@@ -243,13 +259,13 @@ def main():
     elif test_type is TestType.baremetal_benchmarks:
         fetch_benchmark_results(args.build_id)
 
-    passed, failed=check_job_all_test_cases_state_count(server, jobid)
+    passed, failed = check_job_all_test_cases_state_count(server, jobid)
     print('With {} passed and {} failed Lava test cases.'.format(passed, failed))
 
-    if failed == 0:
-        return 0
-    else:
+    if failed != 0:
         return -1
 
+    return 0
+
 if __name__ == "__main__":
     sys.exit(main())
This page took 0.026714 seconds and 4 git commands to generate.