ansible: Ensure jq is installed on SLES12SP5
[lttng-ci.git] / scripts / babeltrace-benchmark / benchmark.py
CommitLineData
5c65bbc2
JR
1#!/usr/bin/python3
2# Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17import json
18import os
19import tempfile
20from statistics import mean
21import argparse
22import sys
23from operator import add
24
25import matplotlib.pyplot as plt
26from matplotlib.backends.backend_pdf import PdfPages
27from matplotlib.ticker import PercentFormatter
28
29import git
30import numpy
31import lava_submit
32
33from minio import Minio
34from minio.error import NoSuchKey
35from minio.error import ResponseError
36
37
38BENCHMARK_TYPES = ["dummy", "text"]
39DEFAULT_BUCKET = "lava"
40
e085717c 41invalid_commits = {
c19fa307
KS
42 "ec9a9794af488a9accce7708a8b0d8188b498789", # Does not build
43 "8c99128c640cbce71fb8a6caa15e4c672252b662", # Block on configure
44 "f3847c753f1b4f12353c38d97b0577d9993d19fb", # Does not build
45 "e0111295f17ddfcc33ec771a8deac505473a06ad", # Does not build
46 "d0d4e0ed487ea23aaf0d023513c0a4d86901b79b", # Does not build
47 "c24f7ab4dd9edeb5e50b0070fd9d9e8691057dde", # Does not build
48 "ce67f5614a4db3b2de4d887eca52135b439b4937", # Does not build
49 "80aff5efc66679fd934cef433c0e698694748385", # Does not build
50 "f4f11e84942d36fcc8a597d226928bce2ccac4b3", # Does not build
51 "ae466a6e1b856d96cf5112a371b4df2b732503ec", # Does not build
b2f18af4
KS
52 "ade5c95e2a4f90f839f222fc1a66175b3b199922", # Configuration fails
53 "30341532906d62808e9d66fb115f5edb4e6f5706", # Configuration fails
54 "006c5ffb42f32e802136e3c27a63accb59b4d6c4", # Does not build
55 "88488ff5bdcd7679ff1f04fe6cff0d24b4f8fc0c", # Does not build
56 # Other errors
57 "7c7301d5827bd10ec7c34da7ffc5fe74e5047d38",
58 "a0df3abf88616cb0799f87f4eb57c54268e63448",
59 "b7045dd71bc0524ad6b5db96df365e98e237d395",
60 "cf7b259eaa602abcef308d2b5dd8e6c9ee995d8b",
61 "90a55a4ef47cac7b568f5f0a8a78bd760f82d23c",
62 "baa5e3aa82a82c9d0fa59e3c586c0168bb5dc267",
63 "af9f8da7ba4a9b16fc36d637b8c3a0c7a8774da2",
64 "fe748379adbd385efdfc7acae9c2340fb8b7d717",
65 "baa5e3aa82a82c9d0fa59e3c586c0168bb5dc267",
66 "af9f8da7ba4a9b16fc36d637b8c3a0c7a8774da2",
67 "fe748379adbd385efdfc7acae9c2340fb8b7d717",
68 "929627965e33e06dc77254d81e8ec1d66cc06590",
69 "48a0e52c4632a60cd43423f2f34f10de350bf868",
70 "b7fa35fce415b33207a9eba111069ed31ef122a0",
71 "828c8a25785e0cedaeb6987256a4dfc3c43b982f",
72 "213489680861e4d796173513effac7023312ec2d",
73 "430a5ccbbd15782501ca56bb148f3850126277ad",
74 "629d19044c43b195498d0a4e002906c54b6186d5",
75 "c423217ed1640b4152739f7e5613775d46c25050",
76 # Elfutils
77 "776a2a252c9875caa1e8b4f41cb8cc12c79611c3",
78 "435aa29aff0527d36aafa1b657ae70b9db5f9ea5",
79 "95651695473495501fc6b2c4a1cf6a78cfb3cd6a",
80 "e0748fb2ba8994c136bcc0b67d3044f09841cf8e",
81 "9e632b22e1310fe773edc32ab08a60602f4b2861",
82 "271fb6907a6f4705a1c799d925394243eae51d68",
83 "328342cd737582216dc7b8b7d558b2a1bf8ea5e8",
84 "ae5c1a4481be68fae027910b141354c1d86daa64",
85 "e6938018975e45d35dab5fef795fe7344eef7d62",
86 "e015bae2ef343b30c890eebb9182a8be13d12ed0",
87 "5e8a0751ae0c418a615025d1da10bc84f91b3d97",
88 "887d26fa0fd0ae0c5c15e4b885473c4cdc0bf078",
89 "e97fe75eac59fc39a6e4f3c4f9f3301835a0315e",
90 "8b130e7f1d6a41fb5c64a014c15246ba74b79470",
91 "f4f8f79893b18199b38edc3330093a9403c4c737",
c19fa307 92}
5c65bbc2 93
cf595cda
JR
94def json_type(string):
95 """
96 Argpase type for json args.
97 We expect a base dictionary.
98 """
99 passed_json = json.loads(string)
100 if not isinstance(passed_json, dict):
101 msg = "%r is not a dict" % string
102 raise argparse.ArgumentTypeError(msg)
103 return passed_json
104
5c65bbc2
JR
105def graph_get_color(branch):
106 """
107 Get the color matching the branch.
108 """
109 color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
110 return color[branch]
111
112
113def graph_get_title(branch, benchmark_type):
114 """
115 Get title for graph based on benchmark type.
116 """
117 string = {"dummy": "Dummy output", "text": "Text output"}
118 return "{} - {}".format(branch, string[benchmark_type])
119
120
121def get_client():
122 """
123 Return minio client configured.
124 """
125 return Minio(
126 "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
127 )
128
129
130def get_file(client, prefix, file_name, workdir_name):
131 """
132 Return the path of the downloaded file.
133 Return None on error
134 """
135 destination = os.path.join(workdir_name, file_name)
136 object_name = "{}/{}".format(prefix, file_name)
137 try:
138 client.fget_object(DEFAULT_BUCKET, object_name, destination)
139 except NoSuchKey:
140 return None
141
142 return destination
143
144
145def delete_file(client, prefix, file_name):
146 """
147 Delete the file on remote.
148 """
149 object_name = "{}/{}".format(prefix, file_name)
150 try:
151 client.remove_object(DEFAULT_BUCKET, object_name)
152 except ResponseError as err:
153 print(err)
154 except NoSuchKey:
155 pass
156
157
158def get_git_log(bt_version, cutoff, repo_path):
159 """
160 Return an ordered (older to newer) list of commits for the bt_version and
161 cutoff. WARNING: This changes the git repo HEAD.
162 """
163 repo = git.Repo(repo_path)
164 repo.git.fetch()
165 return repo.git.log(
166 "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
167 ).split("\n")
168
169
170def parse_result(result_path):
171 """
172 Parse the result file. Return a dataset of User time + System time.
173 """
174 with open(result_path) as result:
175 parsed_result = json.load(result)
176 return list(
177 map(
178 add,
179 parsed_result["User time (seconds)"],
180 parsed_result["System time (seconds)"],
181 )
182 )
183
184
185def get_benchmark_results(client, commit, workdir):
186 """
187 Fetch the benchmark result from a certain commit across all benchmark type.
188 """
189 results = {}
190 benchmark_valid = True
191 for b_type in BENCHMARK_TYPES:
cdace203 192 prefix = "/results/benchmarks/babeltrace/{}".format(b_type)
5c65bbc2
JR
193 result_file = get_file(client, prefix, commit, workdir)
194 if not result_file:
195 """
196 Benchmark is either corrupted or not complete.
197 """
198 return None, benchmark_valid
199 results[b_type] = parse_result(result_file)
200 if all(i == 0.0 for i in results[b_type]):
201 benchmark_valid = False
202 print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
203 # The dataset is valid return immediately.
204 return results, benchmark_valid
205
206
207def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
208 """
209 Plot the graph using the raw value.
210 """
211 point_x_data = []
212 outlier_x_data = []
213 point_y_data = []
214 outlier_y_data = []
215 for pos in range(len(x_data)):
216 x = x_data[pos]
217 valid_points, outliers = sanitize_dataset(y_data[pos])
218 for y in valid_points:
219 point_x_data.append(x)
220 point_y_data.append(y)
221 for y in outliers:
222 outlier_x_data.append(x)
223 outlier_y_data.append(y)
224
225 plt.plot(
226 point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
227 )
228 plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")
229
5c65bbc2
JR
230 ymax = 1
231 if y_data:
232 ymin = 0.8 * min([item for sublist in y_data for item in sublist])
233 ymax = 1.2 * max([item for sublist in y_data for item in sublist])
234 # Put latest of other branches for reference as horizontal line.
235 for l_branch, l_result in latest_values.items():
236 if not l_result or l_branch == branch:
237 continue
238 plt.axhline(
239 y=l_result,
240 label="Latest {}".format(l_branch),
241 color=graph_get_color(l_branch),
242 )
5c65bbc2
JR
243 if l_result >= ymax:
244 ymax = 1.2 * l_result
056f7519 245 ax = plt.gca()
925d7893 246 plt.ylim(ymin=0, ymax=ymax)
5c65bbc2
JR
247 plt.xticks(x_data, labels, rotation=90, family="monospace")
248 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
249 plt.ylabel("User + system time (s)")
250 plt.xlabel("Latest commits")
251 plt.legend()
526aab11 252 plt.grid(True)
5c65bbc2 253
056f7519
JR
254 # Put tick on the right side
255 ax.tick_params(labeltop=False, labelright=True)
256
5c65bbc2
JR
257 plt.tight_layout()
258 return
259
09de7b53
JR
260
261def plot_delta_between_point(
262 branch, benchmark_type, x_data, y_data, labels, latest_values
263):
20defd5e
JR
264 """
265 Plot the graph of delta between each sequential commit.
266 """
267 local_abs_max = 100
268
269 # Transform y_data to a list of for which the reference is the first
270 # element.
271 local_y_data = []
272 for pos, y in enumerate(y_data):
273 if pos == 0:
274 local_y_data.append(0.0)
275 continue
276 local_y_data.append(y - y_data[pos - 1])
277
278 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
279
280 # Get max absolute value to align the y axis with zero in the middle.
281 if local_y_data:
282 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
283
284 plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)
285
286 ax = plt.gca()
287 plt.xticks(x_data, labels, rotation=90, family="monospace")
09de7b53
JR
288 plt.title(
289 graph_get_title(branch, benchmark_type) + " Delta to previous commit",
290 fontweight="bold",
291 )
20defd5e
JR
292 plt.ylabel("Seconds")
293 plt.xlabel("Latest commits")
294 plt.legend()
526aab11 295 plt.grid(True)
20defd5e
JR
296
297 # Put tick on the right side
298 ax.tick_params(labeltop=False, labelright=True)
299
300 plt.tight_layout()
301 return
5c65bbc2 302
09de7b53 303
5c65bbc2
JR
304def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
305 """
306 Plot the graph using a ratio using first point as reference (0%).
307 """
308 reference = 0.01
309 y_abs_max = 100
310
311 if y_data:
312 reference = y_data[0]
313
314 # Transform y_data to a list of ratio for which the reference is the first
315 # element.
316 local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))
317
318 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
319
320 # Put latest of other branches for reference as horizontal line.
321 for l_branch, l_result in latest_values.items():
322 if not l_result or l_branch == branch:
323 continue
324 ratio_l_result = ((l_result / reference) - 1.0) * 100.0
325 print(
326 "branch {} branch {} value {} l_result {} reference {}".format(
327 branch, l_branch, ratio_l_result, l_result, reference
328 )
329 )
330 plt.axhline(
331 y=ratio_l_result,
332 label="Latest {}".format(l_branch),
333 color=graph_get_color(l_branch),
334 )
335
336 # Draw the reference line.
337 plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")
338
339 # Get max absolute value to align the y axis with zero in the middle.
340 if local_y_data:
341 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
342 if y_abs_max > 100:
343 y_abs_max = local_abs_max
344
345 plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)
346
347 ax = plt.gca()
348 percent_formatter = PercentFormatter()
349 ax.yaxis.set_major_formatter(percent_formatter)
350 ax.yaxis.set_minor_formatter(percent_formatter)
351 plt.xticks(x_data, labels, rotation=90, family="monospace")
352 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
353 plt.ylabel("Ratio")
354 plt.xlabel("Latest commits")
355 plt.legend()
526aab11 356 plt.grid(True)
5c65bbc2 357
056f7519
JR
358 # Put tick on the right side
359 ax.tick_params(labeltop=False, labelright=True)
360
5c65bbc2
JR
361 plt.tight_layout()
362 return
363
09de7b53 364
5c65bbc2
JR
365def generate_graph(branches, report_name, git_path):
366
367 # The PDF document
368 pdf_pages = PdfPages(report_name)
369
370 client = get_client()
371 branch_results = dict()
372
373 # Fetch the results for each branch.
374 for branch, cutoff in branches.items():
375 commits = get_git_log(branch, cutoff, git_path)
376 results = []
377 with tempfile.TemporaryDirectory() as workdir:
378 for commit in commits:
379 b_results, valid = get_benchmark_results(client, commit, workdir)
380 if not b_results or not valid:
381 continue
382 results.append((commit, b_results))
383 branch_results[branch] = results
384
385 for b_type in BENCHMARK_TYPES:
386 latest_values = {}
387 max_len = 0
388
389 # Find the maximum size for a series inside our series dataset.
390 # This is used later to compute the size of the actual plot (pdf).
391 # While there gather the comparison value used to draw comparison line
392 # between branches.
393 for branch, results in branch_results.items():
394 max_len = max([max_len, len(results)])
395 if results:
396 latest_values[branch] = mean(
397 sanitize_dataset(results[-1][1][b_type])[0]
398 )
399 else:
400 latest_values[branch] = None
401
402 for branch, results in branch_results.items():
403 # Create a figure instance
404 if max_len and max_len > 10:
405 width = 0.16 * max_len
406 else:
407 width = 11.69
408
409 x_data = list(range(len(results)))
410 y_data = [c[1][b_type] for c in results]
411 labels = [c[0][:8] for c in results]
412
413 fig = plt.figure(figsize=(width, 8.27), dpi=100)
414 plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
415 pdf_pages.savefig(fig)
416
5c65bbc2
JR
417 # Use the mean of each sanitize dataset here, we do not care for
418 # variance for ratio. At least not yet.
419 y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
20defd5e 420 fig = plt.figure(figsize=(width, 8.27), dpi=100)
5c65bbc2
JR
421 plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
422 pdf_pages.savefig(fig)
423
20defd5e 424 fig = plt.figure(figsize=(width, 8.27), dpi=100)
09de7b53
JR
425 plot_delta_between_point(
426 branch, b_type, x_data, y_data, labels, latest_values
427 )
20defd5e
JR
428 pdf_pages.savefig(fig)
429
5c65bbc2
JR
430 pdf_pages.close()
431
432
d373c66e 433def launch_jobs(branches, git_path, wait_for_completion, debug, force):
5c65bbc2
JR
434 """
435 Lauch jobs for all missing results.
436 """
437 client = get_client()
73fe8ab4 438 commits_to_test = set()
5c65bbc2 439 for branch, cutoff in branches.items():
73fe8ab4 440 commits = [x for x in get_git_log(branch, cutoff, git_path) if x not in invalid_commits]
5c65bbc2
JR
441 with tempfile.TemporaryDirectory() as workdir:
442 for commit in commits:
443 b_results = get_benchmark_results(client, commit, workdir)[0]
d373c66e 444 if b_results and not force:
5c65bbc2 445 continue
73fe8ab4
KS
446 commits_to_test.add(commit)
447 for index, commit in enumerate(commits_to_test):
448 print("Job {}/{}".format(index+1, len(commits_to_test)))
449 lava_submit.submit(
450 commit, wait_for_completion=wait_for_completion, debug=debug
451 )
5c65bbc2
JR
452
453
454def main():
455 """
456 Parse arguments and execute as needed.
457 """
458 bt_branches = {
459 "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
460 "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
461 "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
462 }
463
464 parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
465 parser.add_argument(
466 "--generate-jobs", action="store_true", help="Generate and send jobs"
467 )
d373c66e
JR
468 parser.add_argument(
469 "--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
470 )
5c65bbc2
JR
471 parser.add_argument(
472 "--do-not-wait-on-completion",
473 action="store_true",
474 default=False,
475 help="Wait for the completion of each jobs sent. This is useful"
476 "for the ci. Otherwise we could end up spaming the lava instance.",
477 )
478 parser.add_argument(
479 "--generate-report",
480 action="store_true",
481 help="Generate graphs and save them to pdf",
482 )
483 parser.add_argument(
484 "--report-name", default="report.pdf", help="The name of the pdf report."
485 )
486 parser.add_argument(
487 "--debug", action="store_true", default=False, help="Do not send jobs to lava."
488 )
489 parser.add_argument(
490 "--repo-path", help="The location of the git repo to use.", required=True
491 )
cf595cda
JR
492 parser.add_argument(
493 "--overwrite-branches-cutoff",
494 help="A dictionary of the form {"
495 "'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
496 "jobs generation.",
497 required=False, type=json_type
498 )
5c65bbc2
JR
499
500 args = parser.parse_args()
501
cf595cda
JR
502 if args.overwrite_branches_cutoff:
503 bt_branches = args.overwrite_branches_cutoff
504
5c65bbc2
JR
505 if not os.path.exists(args.repo_path):
506 print("Repository location does not exists.")
507 return 1
508
509 if args.generate_jobs:
510 print("Launching jobs for:")
d373c66e 511
5c65bbc2
JR
512 for branch, cutoff in bt_branches.items():
513 print("\t Branch {} with cutoff {}".format(branch, cutoff))
d373c66e 514
5c65bbc2 515 launch_jobs(
d373c66e
JR
516 bt_branches,
517 args.repo_path,
518 not args.do_not_wait_on_completion,
519 args.debug,
520 args.force_jobs,
5c65bbc2
JR
521 )
522
523 if args.generate_report:
524 print("Generating pdf report ({}) for:".format(args.report_name))
525 for branch, cutoff in bt_branches.items():
526 print("\t Branch {} with cutoff {}".format(branch, cutoff))
527 generate_graph(bt_branches, args.report_name, args.repo_path)
528
529 return 0
530
531
532def sanitize_dataset(dataset):
533 """
534 Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
535 representative mean without outlier in it.
536 [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
537 """
538 sorted_data = sorted(dataset)
539 q1, q3 = numpy.percentile(sorted_data, [25, 75])
540 iqr = q3 - q1
541 lower_bound = q1 - (1.5 * iqr)
542 upper_bound = q3 + (1.5 * iqr)
543 new_dataset = []
544 outliers = []
545 for i in dataset:
546 if lower_bound <= i <= upper_bound:
547 new_dataset.append(i)
548 else:
549 outliers.append(i)
550 return new_dataset, outliers
551
552
553if __name__ == "__main__":
554 sys.exit(main())
This page took 0.046659 seconds and 4 git commands to generate.