jjb: babeltrace: use clang-format-16
[lttng-ci.git] / scripts / babeltrace-benchmark / benchmark.py
1 #!/usr/bin/python3
2 # Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 import json
18 import os
19 import tempfile
20 from statistics import mean
21 import argparse
22 import sys
23 from operator import add
24
25 import matplotlib.pyplot as plt
26 from matplotlib.backends.backend_pdf import PdfPages
27 from matplotlib.ticker import PercentFormatter
28
29 import git
30 import numpy
31 import lava_submit
32
33 from minio import Minio
34 from minio.error import NoSuchKey
35 from minio.error import ResponseError
36
37
38 BENCHMARK_TYPES = ["dummy", "text"]
39 DEFAULT_BUCKET = "lava"
40
41 invalid_commits = {
42 "ec9a9794af488a9accce7708a8b0d8188b498789", # Does not build
43 "8c99128c640cbce71fb8a6caa15e4c672252b662", # Block on configure
44 "f3847c753f1b4f12353c38d97b0577d9993d19fb", # Does not build
45 "e0111295f17ddfcc33ec771a8deac505473a06ad", # Does not build
46 "d0d4e0ed487ea23aaf0d023513c0a4d86901b79b", # Does not build
47 "c24f7ab4dd9edeb5e50b0070fd9d9e8691057dde", # Does not build
48 "ce67f5614a4db3b2de4d887eca52135b439b4937", # Does not build
49 "80aff5efc66679fd934cef433c0e698694748385", # Does not build
50 "f4f11e84942d36fcc8a597d226928bce2ccac4b3", # Does not build
51 "ae466a6e1b856d96cf5112a371b4df2b732503ec", # Does not build
52 "ade5c95e2a4f90f839f222fc1a66175b3b199922", # Configuration fails
53 "30341532906d62808e9d66fb115f5edb4e6f5706", # Configuration fails
54 "006c5ffb42f32e802136e3c27a63accb59b4d6c4", # Does not build
55 "88488ff5bdcd7679ff1f04fe6cff0d24b4f8fc0c", # Does not build
56 # Other errors
57 "7c7301d5827bd10ec7c34da7ffc5fe74e5047d38",
58 "a0df3abf88616cb0799f87f4eb57c54268e63448",
59 "b7045dd71bc0524ad6b5db96df365e98e237d395",
60 "cf7b259eaa602abcef308d2b5dd8e6c9ee995d8b",
61 "90a55a4ef47cac7b568f5f0a8a78bd760f82d23c",
62 "baa5e3aa82a82c9d0fa59e3c586c0168bb5dc267",
63 "af9f8da7ba4a9b16fc36d637b8c3a0c7a8774da2",
64 "fe748379adbd385efdfc7acae9c2340fb8b7d717",
65 "baa5e3aa82a82c9d0fa59e3c586c0168bb5dc267",
66 "af9f8da7ba4a9b16fc36d637b8c3a0c7a8774da2",
67 "fe748379adbd385efdfc7acae9c2340fb8b7d717",
68 "929627965e33e06dc77254d81e8ec1d66cc06590",
69 "48a0e52c4632a60cd43423f2f34f10de350bf868",
70 "b7fa35fce415b33207a9eba111069ed31ef122a0",
71 "828c8a25785e0cedaeb6987256a4dfc3c43b982f",
72 "213489680861e4d796173513effac7023312ec2d",
73 "430a5ccbbd15782501ca56bb148f3850126277ad",
74 "629d19044c43b195498d0a4e002906c54b6186d5",
75 "c423217ed1640b4152739f7e5613775d46c25050",
76 # Elfutils
77 "776a2a252c9875caa1e8b4f41cb8cc12c79611c3",
78 "435aa29aff0527d36aafa1b657ae70b9db5f9ea5",
79 "95651695473495501fc6b2c4a1cf6a78cfb3cd6a",
80 "e0748fb2ba8994c136bcc0b67d3044f09841cf8e",
81 "9e632b22e1310fe773edc32ab08a60602f4b2861",
82 "271fb6907a6f4705a1c799d925394243eae51d68",
83 "328342cd737582216dc7b8b7d558b2a1bf8ea5e8",
84 "ae5c1a4481be68fae027910b141354c1d86daa64",
85 "e6938018975e45d35dab5fef795fe7344eef7d62",
86 "e015bae2ef343b30c890eebb9182a8be13d12ed0",
87 "5e8a0751ae0c418a615025d1da10bc84f91b3d97",
88 "887d26fa0fd0ae0c5c15e4b885473c4cdc0bf078",
89 "e97fe75eac59fc39a6e4f3c4f9f3301835a0315e",
90 "8b130e7f1d6a41fb5c64a014c15246ba74b79470",
91 "f4f8f79893b18199b38edc3330093a9403c4c737",
92 }
93
94 def json_type(string):
95 """
96 Argpase type for json args.
97 We expect a base dictionary.
98 """
99 passed_json = json.loads(string)
100 if not isinstance(passed_json, dict):
101 msg = "%r is not a dict" % string
102 raise argparse.ArgumentTypeError(msg)
103 return passed_json
104
105 def graph_get_color(branch):
106 """
107 Get the color matching the branch.
108 """
109 color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
110 return color[branch]
111
112
113 def graph_get_title(branch, benchmark_type):
114 """
115 Get title for graph based on benchmark type.
116 """
117 string = {"dummy": "Dummy output", "text": "Text output"}
118 return "{} - {}".format(branch, string[benchmark_type])
119
120
121 def get_client():
122 """
123 Return minio client configured.
124 """
125 return Minio(
126 "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
127 )
128
129
130 def get_file(client, prefix, file_name, workdir_name):
131 """
132 Return the path of the downloaded file.
133 Return None on error
134 """
135 destination = os.path.join(workdir_name, file_name)
136 object_name = "{}/{}".format(prefix, file_name)
137 try:
138 client.fget_object(DEFAULT_BUCKET, object_name, destination)
139 except NoSuchKey:
140 return None
141
142 return destination
143
144
145 def delete_file(client, prefix, file_name):
146 """
147 Delete the file on remote.
148 """
149 object_name = "{}/{}".format(prefix, file_name)
150 try:
151 client.remove_object(DEFAULT_BUCKET, object_name)
152 except ResponseError as err:
153 print(err)
154 except NoSuchKey:
155 pass
156
157
158 def get_git_log(bt_version, cutoff, repo_path):
159 """
160 Return an ordered (older to newer) list of commits for the bt_version and
161 cutoff. WARNING: This changes the git repo HEAD.
162 """
163 repo = git.Repo(repo_path)
164 repo.git.fetch()
165 return repo.git.log(
166 "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
167 ).split("\n")
168
169
170 def parse_result(result_path):
171 """
172 Parse the result file. Return a dataset of User time + System time.
173 """
174 with open(result_path) as result:
175 parsed_result = json.load(result)
176 return list(
177 map(
178 add,
179 parsed_result["User time (seconds)"],
180 parsed_result["System time (seconds)"],
181 )
182 )
183
184
185 def get_benchmark_results(client, commit, workdir):
186 """
187 Fetch the benchmark result from a certain commit across all benchmark type.
188 """
189 results = {}
190 benchmark_valid = True
191 for b_type in BENCHMARK_TYPES:
192 prefix = "/results/benchmarks/babeltrace/{}".format(b_type)
193 result_file = get_file(client, prefix, commit, workdir)
194 if not result_file:
195 """
196 Benchmark is either corrupted or not complete.
197 """
198 return None, benchmark_valid
199 results[b_type] = parse_result(result_file)
200 if all(i == 0.0 for i in results[b_type]):
201 benchmark_valid = False
202 print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
203 # The dataset is valid return immediately.
204 return results, benchmark_valid
205
206
207 def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
208 """
209 Plot the graph using the raw value.
210 """
211 point_x_data = []
212 outlier_x_data = []
213 point_y_data = []
214 outlier_y_data = []
215 for pos in range(len(x_data)):
216 x = x_data[pos]
217 valid_points, outliers = sanitize_dataset(y_data[pos])
218 for y in valid_points:
219 point_x_data.append(x)
220 point_y_data.append(y)
221 for y in outliers:
222 outlier_x_data.append(x)
223 outlier_y_data.append(y)
224
225 plt.plot(
226 point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
227 )
228 plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")
229
230 ymax = 1
231 if y_data:
232 ymin = 0.8 * min([item for sublist in y_data for item in sublist])
233 ymax = 1.2 * max([item for sublist in y_data for item in sublist])
234 # Put latest of other branches for reference as horizontal line.
235 for l_branch, l_result in latest_values.items():
236 if not l_result or l_branch == branch:
237 continue
238 plt.axhline(
239 y=l_result,
240 label="Latest {}".format(l_branch),
241 color=graph_get_color(l_branch),
242 )
243 if l_result >= ymax:
244 ymax = 1.2 * l_result
245 ax = plt.gca()
246 plt.ylim(ymin=0, ymax=ymax)
247 plt.xticks(x_data, labels, rotation=90, family="monospace")
248 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
249 plt.ylabel("User + system time (s)")
250 plt.xlabel("Latest commits")
251 plt.legend()
252 plt.grid(True)
253
254 # Put tick on the right side
255 ax.tick_params(labeltop=False, labelright=True)
256
257 plt.tight_layout()
258 return
259
260
261 def plot_delta_between_point(
262 branch, benchmark_type, x_data, y_data, labels, latest_values
263 ):
264 """
265 Plot the graph of delta between each sequential commit.
266 """
267 local_abs_max = 100
268
269 # Transform y_data to a list of for which the reference is the first
270 # element.
271 local_y_data = []
272 for pos, y in enumerate(y_data):
273 if pos == 0:
274 local_y_data.append(0.0)
275 continue
276 local_y_data.append(y - y_data[pos - 1])
277
278 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
279
280 # Get max absolute value to align the y axis with zero in the middle.
281 if local_y_data:
282 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
283
284 plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)
285
286 ax = plt.gca()
287 plt.xticks(x_data, labels, rotation=90, family="monospace")
288 plt.title(
289 graph_get_title(branch, benchmark_type) + " Delta to previous commit",
290 fontweight="bold",
291 )
292 plt.ylabel("Seconds")
293 plt.xlabel("Latest commits")
294 plt.legend()
295 plt.grid(True)
296
297 # Put tick on the right side
298 ax.tick_params(labeltop=False, labelright=True)
299
300 plt.tight_layout()
301 return
302
303
304 def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
305 """
306 Plot the graph using a ratio using first point as reference (0%).
307 """
308 reference = 0.01
309 y_abs_max = 100
310
311 if y_data:
312 reference = y_data[0]
313
314 # Transform y_data to a list of ratio for which the reference is the first
315 # element.
316 local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))
317
318 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
319
320 # Put latest of other branches for reference as horizontal line.
321 for l_branch, l_result in latest_values.items():
322 if not l_result or l_branch == branch:
323 continue
324 ratio_l_result = ((l_result / reference) - 1.0) * 100.0
325 print(
326 "branch {} branch {} value {} l_result {} reference {}".format(
327 branch, l_branch, ratio_l_result, l_result, reference
328 )
329 )
330 plt.axhline(
331 y=ratio_l_result,
332 label="Latest {}".format(l_branch),
333 color=graph_get_color(l_branch),
334 )
335
336 # Draw the reference line.
337 plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")
338
339 # Get max absolute value to align the y axis with zero in the middle.
340 if local_y_data:
341 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
342 if y_abs_max > 100:
343 y_abs_max = local_abs_max
344
345 plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)
346
347 ax = plt.gca()
348 percent_formatter = PercentFormatter()
349 ax.yaxis.set_major_formatter(percent_formatter)
350 ax.yaxis.set_minor_formatter(percent_formatter)
351 plt.xticks(x_data, labels, rotation=90, family="monospace")
352 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
353 plt.ylabel("Ratio")
354 plt.xlabel("Latest commits")
355 plt.legend()
356 plt.grid(True)
357
358 # Put tick on the right side
359 ax.tick_params(labeltop=False, labelright=True)
360
361 plt.tight_layout()
362 return
363
364
365 def generate_graph(branches, report_name, git_path):
366
367 # The PDF document
368 pdf_pages = PdfPages(report_name)
369
370 client = get_client()
371 branch_results = dict()
372
373 # Fetch the results for each branch.
374 for branch, cutoff in branches.items():
375 commits = get_git_log(branch, cutoff, git_path)
376 results = []
377 with tempfile.TemporaryDirectory() as workdir:
378 for commit in commits:
379 b_results, valid = get_benchmark_results(client, commit, workdir)
380 if not b_results or not valid:
381 continue
382 results.append((commit, b_results))
383 branch_results[branch] = results
384
385 for b_type in BENCHMARK_TYPES:
386 latest_values = {}
387 max_len = 0
388
389 # Find the maximum size for a series inside our series dataset.
390 # This is used later to compute the size of the actual plot (pdf).
391 # While there gather the comparison value used to draw comparison line
392 # between branches.
393 for branch, results in branch_results.items():
394 max_len = max([max_len, len(results)])
395 if results:
396 latest_values[branch] = mean(
397 sanitize_dataset(results[-1][1][b_type])[0]
398 )
399 else:
400 latest_values[branch] = None
401
402 for branch, results in branch_results.items():
403 # Create a figure instance
404 if max_len and max_len > 10:
405 width = 0.16 * max_len
406 else:
407 width = 11.69
408
409 x_data = list(range(len(results)))
410 y_data = [c[1][b_type] for c in results]
411 labels = [c[0][:8] for c in results]
412
413 fig = plt.figure(figsize=(width, 8.27), dpi=100)
414 plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
415 pdf_pages.savefig(fig)
416
417 # Use the mean of each sanitize dataset here, we do not care for
418 # variance for ratio. At least not yet.
419 y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
420 fig = plt.figure(figsize=(width, 8.27), dpi=100)
421 plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
422 pdf_pages.savefig(fig)
423
424 fig = plt.figure(figsize=(width, 8.27), dpi=100)
425 plot_delta_between_point(
426 branch, b_type, x_data, y_data, labels, latest_values
427 )
428 pdf_pages.savefig(fig)
429
430 pdf_pages.close()
431
432
433 def launch_jobs(branches, git_path, wait_for_completion, debug, force):
434 """
435 Lauch jobs for all missing results.
436 """
437 client = get_client()
438 commits_to_test = set()
439 for branch, cutoff in branches.items():
440 commits = [x for x in get_git_log(branch, cutoff, git_path) if x not in invalid_commits]
441 with tempfile.TemporaryDirectory() as workdir:
442 for commit in commits:
443 b_results = get_benchmark_results(client, commit, workdir)[0]
444 if b_results and not force:
445 continue
446 commits_to_test.add(commit)
447 for index, commit in enumerate(commits_to_test):
448 print("Job {}/{}".format(index+1, len(commits_to_test)))
449 lava_submit.submit(
450 commit, wait_for_completion=wait_for_completion, debug=debug
451 )
452
453
454 def main():
455 """
456 Parse arguments and execute as needed.
457 """
458 bt_branches = {
459 "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
460 "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
461 "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
462 }
463
464 parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
465 parser.add_argument(
466 "--generate-jobs", action="store_true", help="Generate and send jobs"
467 )
468 parser.add_argument(
469 "--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
470 )
471 parser.add_argument(
472 "--do-not-wait-on-completion",
473 action="store_true",
474 default=False,
475 help="Wait for the completion of each jobs sent. This is useful"
476 "for the ci. Otherwise we could end up spaming the lava instance.",
477 )
478 parser.add_argument(
479 "--generate-report",
480 action="store_true",
481 help="Generate graphs and save them to pdf",
482 )
483 parser.add_argument(
484 "--report-name", default="report.pdf", help="The name of the pdf report."
485 )
486 parser.add_argument(
487 "--debug", action="store_true", default=False, help="Do not send jobs to lava."
488 )
489 parser.add_argument(
490 "--repo-path", help="The location of the git repo to use.", required=True
491 )
492 parser.add_argument(
493 "--overwrite-branches-cutoff",
494 help="A dictionary of the form {"
495 "'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
496 "jobs generation.",
497 required=False, type=json_type
498 )
499
500 args = parser.parse_args()
501
502 if args.overwrite_branches_cutoff:
503 bt_branches = args.overwrite_branches_cutoff
504
505 if not os.path.exists(args.repo_path):
506 print("Repository location does not exists.")
507 return 1
508
509 if args.generate_jobs:
510 print("Launching jobs for:")
511
512 for branch, cutoff in bt_branches.items():
513 print("\t Branch {} with cutoff {}".format(branch, cutoff))
514
515 launch_jobs(
516 bt_branches,
517 args.repo_path,
518 not args.do_not_wait_on_completion,
519 args.debug,
520 args.force_jobs,
521 )
522
523 if args.generate_report:
524 print("Generating pdf report ({}) for:".format(args.report_name))
525 for branch, cutoff in bt_branches.items():
526 print("\t Branch {} with cutoff {}".format(branch, cutoff))
527 generate_graph(bt_branches, args.report_name, args.repo_path)
528
529 return 0
530
531
532 def sanitize_dataset(dataset):
533 """
534 Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
535 representative mean without outlier in it.
536 [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
537 """
538 sorted_data = sorted(dataset)
539 q1, q3 = numpy.percentile(sorted_data, [25, 75])
540 iqr = q3 - q1
541 lower_bound = q1 - (1.5 * iqr)
542 upper_bound = q3 + (1.5 * iqr)
543 new_dataset = []
544 outliers = []
545 for i in dataset:
546 if lower_bound <= i <= upper_bound:
547 new_dataset.append(i)
548 else:
549 outliers.append(i)
550 return new_dataset, outliers
551
552
553 if __name__ == "__main__":
554 sys.exit(main())
This page took 0.03984 seconds and 4 git commands to generate.