Remove kernel module for babeltrace benchmark jobs
[lttng-ci.git] / scripts / babeltrace-benchmark / benchmark.py
1 #!/usr/bin/python3
2 # Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 import json
18 import os
19 import tempfile
20 from statistics import mean
21 import argparse
22 import sys
23 from operator import add
24
25 import matplotlib.pyplot as plt
26 from matplotlib.backends.backend_pdf import PdfPages
27 from matplotlib.ticker import PercentFormatter
28
29 import git
30 import numpy
31 import lava_submit
32
33 from minio import Minio
34 from minio.error import NoSuchKey
35 from minio.error import ResponseError
36
37
38 BENCHMARK_TYPES = ["dummy", "text"]
39 DEFAULT_BUCKET = "lava"
40
41
42 def json_type(string):
43 """
44 Argpase type for json args.
45 We expect a base dictionary.
46 """
47 passed_json = json.loads(string)
48 if not isinstance(passed_json, dict):
49 msg = "%r is not a dict" % string
50 raise argparse.ArgumentTypeError(msg)
51 return passed_json
52
53 def graph_get_color(branch):
54 """
55 Get the color matching the branch.
56 """
57 color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
58 return color[branch]
59
60
61 def graph_get_title(branch, benchmark_type):
62 """
63 Get title for graph based on benchmark type.
64 """
65 string = {"dummy": "Dummy output", "text": "Text output"}
66 return "{} - {}".format(branch, string[benchmark_type])
67
68
69 def get_client():
70 """
71 Return minio client configured.
72 """
73 return Minio(
74 "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
75 )
76
77
78 def get_file(client, prefix, file_name, workdir_name):
79 """
80 Return the path of the downloaded file.
81 Return None on error
82 """
83 destination = os.path.join(workdir_name, file_name)
84 object_name = "{}/{}".format(prefix, file_name)
85 try:
86 client.fget_object(DEFAULT_BUCKET, object_name, destination)
87 except NoSuchKey:
88 return None
89
90 return destination
91
92
93 def delete_file(client, prefix, file_name):
94 """
95 Delete the file on remote.
96 """
97 object_name = "{}/{}".format(prefix, file_name)
98 try:
99 client.remove_object(DEFAULT_BUCKET, object_name)
100 except ResponseError as err:
101 print(err)
102 except NoSuchKey:
103 pass
104
105
106 def get_git_log(bt_version, cutoff, repo_path):
107 """
108 Return an ordered (older to newer) list of commits for the bt_version and
109 cutoff. WARNING: This changes the git repo HEAD.
110 """
111 repo = git.Repo(repo_path)
112 repo.git.fetch()
113 return repo.git.log(
114 "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
115 ).split("\n")
116
117
118 def parse_result(result_path):
119 """
120 Parse the result file. Return a dataset of User time + System time.
121 """
122 with open(result_path) as result:
123 parsed_result = json.load(result)
124 return list(
125 map(
126 add,
127 parsed_result["User time (seconds)"],
128 parsed_result["System time (seconds)"],
129 )
130 )
131
132
133 def get_benchmark_results(client, commit, workdir):
134 """
135 Fetch the benchmark result from a certain commit across all benchmark type.
136 """
137 results = {}
138 benchmark_valid = True
139 for b_type in BENCHMARK_TYPES:
140 prefix = "/results/benchmarks/babeltrace/{}/".format(b_type)
141 result_file = get_file(client, prefix, commit, workdir)
142 if not result_file:
143 """
144 Benchmark is either corrupted or not complete.
145 """
146 return None, benchmark_valid
147 results[b_type] = parse_result(result_file)
148 if all(i == 0.0 for i in results[b_type]):
149 benchmark_valid = False
150 print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
151 # The dataset is valid return immediately.
152 return results, benchmark_valid
153
154
155 def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
156 """
157 Plot the graph using the raw value.
158 """
159 point_x_data = []
160 outlier_x_data = []
161 point_y_data = []
162 outlier_y_data = []
163 for pos in range(len(x_data)):
164 x = x_data[pos]
165 valid_points, outliers = sanitize_dataset(y_data[pos])
166 for y in valid_points:
167 point_x_data.append(x)
168 point_y_data.append(y)
169 for y in outliers:
170 outlier_x_data.append(x)
171 outlier_y_data.append(y)
172
173 plt.plot(
174 point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
175 )
176 plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")
177
178 ymax = 1
179 if y_data:
180 ymin = 0.8 * min([item for sublist in y_data for item in sublist])
181 ymax = 1.2 * max([item for sublist in y_data for item in sublist])
182 # Put latest of other branches for reference as horizontal line.
183 for l_branch, l_result in latest_values.items():
184 if not l_result or l_branch == branch:
185 continue
186 plt.axhline(
187 y=l_result,
188 label="Latest {}".format(l_branch),
189 color=graph_get_color(l_branch),
190 )
191 if l_result >= ymax:
192 ymax = 1.2 * l_result
193 ax = plt.gca()
194 plt.ylim(ymin=0, ymax=ymax)
195 plt.xticks(x_data, labels, rotation=90, family="monospace")
196 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
197 plt.ylabel("User + system time (s)")
198 plt.xlabel("Latest commits")
199 plt.legend()
200 plt.grid(True)
201
202 # Put tick on the right side
203 ax.tick_params(labeltop=False, labelright=True)
204
205 plt.tight_layout()
206 return
207
208
209 def plot_delta_between_point(
210 branch, benchmark_type, x_data, y_data, labels, latest_values
211 ):
212 """
213 Plot the graph of delta between each sequential commit.
214 """
215 local_abs_max = 100
216
217 # Transform y_data to a list of for which the reference is the first
218 # element.
219 local_y_data = []
220 for pos, y in enumerate(y_data):
221 if pos == 0:
222 local_y_data.append(0.0)
223 continue
224 local_y_data.append(y - y_data[pos - 1])
225
226 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
227
228 # Get max absolute value to align the y axis with zero in the middle.
229 if local_y_data:
230 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
231
232 plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)
233
234 ax = plt.gca()
235 plt.xticks(x_data, labels, rotation=90, family="monospace")
236 plt.title(
237 graph_get_title(branch, benchmark_type) + " Delta to previous commit",
238 fontweight="bold",
239 )
240 plt.ylabel("Seconds")
241 plt.xlabel("Latest commits")
242 plt.legend()
243 plt.grid(True)
244
245 # Put tick on the right side
246 ax.tick_params(labeltop=False, labelright=True)
247
248 plt.tight_layout()
249 return
250
251
252 def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
253 """
254 Plot the graph using a ratio using first point as reference (0%).
255 """
256 reference = 0.01
257 y_abs_max = 100
258
259 if y_data:
260 reference = y_data[0]
261
262 # Transform y_data to a list of ratio for which the reference is the first
263 # element.
264 local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))
265
266 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
267
268 # Put latest of other branches for reference as horizontal line.
269 for l_branch, l_result in latest_values.items():
270 if not l_result or l_branch == branch:
271 continue
272 ratio_l_result = ((l_result / reference) - 1.0) * 100.0
273 print(
274 "branch {} branch {} value {} l_result {} reference {}".format(
275 branch, l_branch, ratio_l_result, l_result, reference
276 )
277 )
278 plt.axhline(
279 y=ratio_l_result,
280 label="Latest {}".format(l_branch),
281 color=graph_get_color(l_branch),
282 )
283
284 # Draw the reference line.
285 plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")
286
287 # Get max absolute value to align the y axis with zero in the middle.
288 if local_y_data:
289 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
290 if y_abs_max > 100:
291 y_abs_max = local_abs_max
292
293 plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)
294
295 ax = plt.gca()
296 percent_formatter = PercentFormatter()
297 ax.yaxis.set_major_formatter(percent_formatter)
298 ax.yaxis.set_minor_formatter(percent_formatter)
299 plt.xticks(x_data, labels, rotation=90, family="monospace")
300 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
301 plt.ylabel("Ratio")
302 plt.xlabel("Latest commits")
303 plt.legend()
304 plt.grid(True)
305
306 # Put tick on the right side
307 ax.tick_params(labeltop=False, labelright=True)
308
309 plt.tight_layout()
310 return
311
312
313 def generate_graph(branches, report_name, git_path):
314
315 # The PDF document
316 pdf_pages = PdfPages(report_name)
317
318 client = get_client()
319 branch_results = dict()
320
321 # Fetch the results for each branch.
322 for branch, cutoff in branches.items():
323 commits = get_git_log(branch, cutoff, git_path)
324 results = []
325 with tempfile.TemporaryDirectory() as workdir:
326 for commit in commits:
327 b_results, valid = get_benchmark_results(client, commit, workdir)
328 if not b_results or not valid:
329 continue
330 results.append((commit, b_results))
331 branch_results[branch] = results
332
333 for b_type in BENCHMARK_TYPES:
334 latest_values = {}
335 max_len = 0
336
337 # Find the maximum size for a series inside our series dataset.
338 # This is used later to compute the size of the actual plot (pdf).
339 # While there gather the comparison value used to draw comparison line
340 # between branches.
341 for branch, results in branch_results.items():
342 max_len = max([max_len, len(results)])
343 if results:
344 latest_values[branch] = mean(
345 sanitize_dataset(results[-1][1][b_type])[0]
346 )
347 else:
348 latest_values[branch] = None
349
350 for branch, results in branch_results.items():
351 # Create a figure instance
352 if max_len and max_len > 10:
353 width = 0.16 * max_len
354 else:
355 width = 11.69
356
357 x_data = list(range(len(results)))
358 y_data = [c[1][b_type] for c in results]
359 labels = [c[0][:8] for c in results]
360
361 fig = plt.figure(figsize=(width, 8.27), dpi=100)
362 plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
363 pdf_pages.savefig(fig)
364
365 # Use the mean of each sanitize dataset here, we do not care for
366 # variance for ratio. At least not yet.
367 y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
368 fig = plt.figure(figsize=(width, 8.27), dpi=100)
369 plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
370 pdf_pages.savefig(fig)
371
372 fig = plt.figure(figsize=(width, 8.27), dpi=100)
373 plot_delta_between_point(
374 branch, b_type, x_data, y_data, labels, latest_values
375 )
376 pdf_pages.savefig(fig)
377
378 pdf_pages.close()
379
380
381 def launch_jobs(branches, git_path, wait_for_completion, debug, force):
382 """
383 Lauch jobs for all missing results.
384 """
385 client = get_client()
386 for branch, cutoff in branches.items():
387 commits = get_git_log(branch, cutoff, git_path)
388
389 with tempfile.TemporaryDirectory() as workdir:
390 for commit in commits:
391 b_results = get_benchmark_results(client, commit, workdir)[0]
392 if b_results and not force:
393 continue
394 lava_submit.submit(
395 commit, wait_for_completion=wait_for_completion, debug=debug
396 )
397
398
399 def main():
400 """
401 Parse arguments and execute as needed.
402 """
403 bt_branches = {
404 "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
405 "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
406 "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
407 }
408
409 parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
410 parser.add_argument(
411 "--generate-jobs", action="store_true", help="Generate and send jobs"
412 )
413 parser.add_argument(
414 "--force-jobs", action="store_true", help="Force the queueing of jobs to lava"
415 )
416 parser.add_argument(
417 "--do-not-wait-on-completion",
418 action="store_true",
419 default=False,
420 help="Wait for the completion of each jobs sent. This is useful"
421 "for the ci. Otherwise we could end up spaming the lava instance.",
422 )
423 parser.add_argument(
424 "--generate-report",
425 action="store_true",
426 help="Generate graphs and save them to pdf",
427 )
428 parser.add_argument(
429 "--report-name", default="report.pdf", help="The name of the pdf report."
430 )
431 parser.add_argument(
432 "--debug", action="store_true", default=False, help="Do not send jobs to lava."
433 )
434 parser.add_argument(
435 "--repo-path", help="The location of the git repo to use.", required=True
436 )
437 parser.add_argument(
438 "--overwrite-branches-cutoff",
439 help="A dictionary of the form {"
440 "'branch_name': 'commit_hash_cutoff',...}. Allow custom graphing and"
441 "jobs generation.",
442 required=False, type=json_type
443 )
444
445 args = parser.parse_args()
446
447 if args.overwrite_branches_cutoff:
448 bt_branches = args.overwrite_branches_cutoff
449
450 if not os.path.exists(args.repo_path):
451 print("Repository location does not exists.")
452 return 1
453
454 if args.generate_jobs:
455 print("Launching jobs for:")
456
457 for branch, cutoff in bt_branches.items():
458 print("\t Branch {} with cutoff {}".format(branch, cutoff))
459
460 launch_jobs(
461 bt_branches,
462 args.repo_path,
463 not args.do_not_wait_on_completion,
464 args.debug,
465 args.force_jobs,
466 )
467
468 if args.generate_report:
469 print("Generating pdf report ({}) for:".format(args.report_name))
470 for branch, cutoff in bt_branches.items():
471 print("\t Branch {} with cutoff {}".format(branch, cutoff))
472 generate_graph(bt_branches, args.report_name, args.repo_path)
473
474 return 0
475
476
477 def sanitize_dataset(dataset):
478 """
479 Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
480 representative mean without outlier in it.
481 [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
482 """
483 sorted_data = sorted(dataset)
484 q1, q3 = numpy.percentile(sorted_data, [25, 75])
485 iqr = q3 - q1
486 lower_bound = q1 - (1.5 * iqr)
487 upper_bound = q3 + (1.5 * iqr)
488 new_dataset = []
489 outliers = []
490 for i in dataset:
491 if lower_bound <= i <= upper_bound:
492 new_dataset.append(i)
493 else:
494 outliers.append(i)
495 return new_dataset, outliers
496
497
498 if __name__ == "__main__":
499 sys.exit(main())
This page took 0.045459 seconds and 4 git commands to generate.