Tune kernel parameter for babeltrace benchmarking
[lttng-ci.git] / scripts / babeltrace-benchmark / benchmark.py
1 #!/usr/bin/python3
2 # Copyright (C) 2019 - Jonathan Rajotte <jonathan.rajotte-julien@efficios.com>
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 import json
18 import os
19 import tempfile
20 from statistics import mean
21 import argparse
22 import sys
23 from operator import add
24
25 import matplotlib.pyplot as plt
26 from matplotlib.backends.backend_pdf import PdfPages
27 from matplotlib.ticker import PercentFormatter
28
29 import git
30 import numpy
31 import lava_submit
32
33 from minio import Minio
34 from minio.error import NoSuchKey
35 from minio.error import ResponseError
36
37
38 BENCHMARK_TYPES = ["dummy", "text"]
39 DEFAULT_BUCKET = "lava"
40
41
42 def graph_get_color(branch):
43 """
44 Get the color matching the branch.
45 """
46 color = {"stable-1.5": "red", "stable-2.0": "green", "master": "blue"}
47 return color[branch]
48
49
50 def graph_get_title(branch, benchmark_type):
51 """
52 Get title for graph based on benchmark type.
53 """
54 string = {"dummy": "Dummy output", "text": "Text output"}
55 return "{} - {}".format(branch, string[benchmark_type])
56
57
58 def get_client():
59 """
60 Return minio client configured.
61 """
62 return Minio(
63 "obj.internal.efficios.com", access_key="jenkins", secret_key="echo123456"
64 )
65
66
67 def get_file(client, prefix, file_name, workdir_name):
68 """
69 Return the path of the downloaded file.
70 Return None on error
71 """
72 destination = os.path.join(workdir_name, file_name)
73 object_name = "{}/{}".format(prefix, file_name)
74 try:
75 client.fget_object(DEFAULT_BUCKET, object_name, destination)
76 except NoSuchKey:
77 return None
78
79 return destination
80
81
82 def delete_file(client, prefix, file_name):
83 """
84 Delete the file on remote.
85 """
86 object_name = "{}/{}".format(prefix, file_name)
87 try:
88 client.remove_object(DEFAULT_BUCKET, object_name)
89 except ResponseError as err:
90 print(err)
91 except NoSuchKey:
92 pass
93
94
95 def get_git_log(bt_version, cutoff, repo_path):
96 """
97 Return an ordered (older to newer) list of commits for the bt_version and
98 cutoff. WARNING: This changes the git repo HEAD.
99 """
100 repo = git.Repo(repo_path)
101 repo.git.fetch()
102 return repo.git.log(
103 "{}..origin/{}".format(cutoff, bt_version), "--pretty=format:%H", "--reverse"
104 ).split("\n")
105
106
107 def parse_result(result_path):
108 """
109 Parse the result file. Return a dataset of User time + System time.
110 """
111 with open(result_path) as result:
112 parsed_result = json.load(result)
113 return list(
114 map(
115 add,
116 parsed_result["User time (seconds)"],
117 parsed_result["System time (seconds)"],
118 )
119 )
120
121
122 def get_benchmark_results(client, commit, workdir):
123 """
124 Fetch the benchmark result from a certain commit across all benchmark type.
125 """
126 results = {}
127 benchmark_valid = True
128 for b_type in BENCHMARK_TYPES:
129 prefix = "/results/benchmarks/babeltrace/{}/".format(b_type)
130 result_file = get_file(client, prefix, commit, workdir)
131 if not result_file:
132 """
133 Benchmark is either corrupted or not complete.
134 """
135 return None, benchmark_valid
136 results[b_type] = parse_result(result_file)
137 if all(i == 0.0 for i in results[b_type]):
138 benchmark_valid = False
139 print("Invalid benchmark for {}/{}/{}".format(prefix, b_type, commit))
140 # The dataset is valid return immediately.
141 return results, benchmark_valid
142
143
144 def plot_raw_value(branch, benchmark_type, x_data, y_data, labels, latest_values):
145 """
146 Plot the graph using the raw value.
147 """
148 point_x_data = []
149 outlier_x_data = []
150 point_y_data = []
151 outlier_y_data = []
152 for pos in range(len(x_data)):
153 x = x_data[pos]
154 valid_points, outliers = sanitize_dataset(y_data[pos])
155 for y in valid_points:
156 point_x_data.append(x)
157 point_y_data.append(y)
158 for y in outliers:
159 outlier_x_data.append(x)
160 outlier_y_data.append(y)
161
162 plt.plot(
163 point_x_data, point_y_data, "o", label=branch, color=graph_get_color(branch)
164 )
165 plt.plot(outlier_x_data, outlier_y_data, "+", label="outlier", color="black")
166
167 ymax = 1
168 if y_data:
169 ymin = 0.8 * min([item for sublist in y_data for item in sublist])
170 ymax = 1.2 * max([item for sublist in y_data for item in sublist])
171 # Put latest of other branches for reference as horizontal line.
172 for l_branch, l_result in latest_values.items():
173 if not l_result or l_branch == branch:
174 continue
175 plt.axhline(
176 y=l_result,
177 label="Latest {}".format(l_branch),
178 color=graph_get_color(l_branch),
179 )
180 if l_result >= ymax:
181 ymax = 1.2 * l_result
182 ax = plt.gca()
183 plt.ylim(ymin=0, ymax=ymax)
184 plt.xticks(x_data, labels, rotation=90, family="monospace")
185 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
186 plt.ylabel("User + system time (s)")
187 plt.xlabel("Latest commits")
188 plt.legend()
189
190 # Put tick on the right side
191 ax.tick_params(labeltop=False, labelright=True)
192
193 plt.tight_layout()
194 return
195
196 def plot_delta_between_point(branch, benchmark_type, x_data, y_data, labels, latest_values):
197 """
198 Plot the graph of delta between each sequential commit.
199 """
200 local_abs_max = 100
201
202 # Transform y_data to a list of for which the reference is the first
203 # element.
204 local_y_data = []
205 for pos, y in enumerate(y_data):
206 if pos == 0:
207 local_y_data.append(0.0)
208 continue
209 local_y_data.append(y - y_data[pos - 1])
210
211 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
212
213 # Get max absolute value to align the y axis with zero in the middle.
214 if local_y_data:
215 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
216
217 plt.ylim(ymin=local_abs_max * -1, ymax=local_abs_max)
218
219 ax = plt.gca()
220 plt.xticks(x_data, labels, rotation=90, family="monospace")
221 plt.title(graph_get_title(branch, benchmark_type) + " Delta to previous commit", fontweight="bold")
222 plt.ylabel("Seconds")
223 plt.xlabel("Latest commits")
224 plt.legend()
225
226 # Put tick on the right side
227 ax.tick_params(labeltop=False, labelright=True)
228
229 plt.tight_layout()
230 return
231
232 def plot_ratio(branch, benchmark_type, x_data, y_data, labels, latest_values):
233 """
234 Plot the graph using a ratio using first point as reference (0%).
235 """
236 reference = 0.01
237 y_abs_max = 100
238
239 if y_data:
240 reference = y_data[0]
241
242 # Transform y_data to a list of ratio for which the reference is the first
243 # element.
244 local_y_data = list(map(lambda y: ((y / reference) - 1.0) * 100, y_data))
245
246 plt.plot(x_data, local_y_data, "o", label=branch, color=graph_get_color(branch))
247
248 # Put latest of other branches for reference as horizontal line.
249 for l_branch, l_result in latest_values.items():
250 if not l_result or l_branch == branch:
251 continue
252 ratio_l_result = ((l_result / reference) - 1.0) * 100.0
253 print(
254 "branch {} branch {} value {} l_result {} reference {}".format(
255 branch, l_branch, ratio_l_result, l_result, reference
256 )
257 )
258 plt.axhline(
259 y=ratio_l_result,
260 label="Latest {}".format(l_branch),
261 color=graph_get_color(l_branch),
262 )
263
264 # Draw the reference line.
265 plt.axhline(y=0, label="Reference (leftmost point)", linestyle="-", color="Black")
266
267 # Get max absolute value to align the y axis with zero in the middle.
268 if local_y_data:
269 local_abs_max = abs(max(local_y_data, key=abs)) * 1.3
270 if y_abs_max > 100:
271 y_abs_max = local_abs_max
272
273 plt.ylim(ymin=y_abs_max * -1, ymax=y_abs_max)
274
275 ax = plt.gca()
276 percent_formatter = PercentFormatter()
277 ax.yaxis.set_major_formatter(percent_formatter)
278 ax.yaxis.set_minor_formatter(percent_formatter)
279 plt.xticks(x_data, labels, rotation=90, family="monospace")
280 plt.title(graph_get_title(branch, benchmark_type), fontweight="bold")
281 plt.ylabel("Ratio")
282 plt.xlabel("Latest commits")
283 plt.legend()
284
285 # Put tick on the right side
286 ax.tick_params(labeltop=False, labelright=True)
287
288 plt.tight_layout()
289 return
290
291 def generate_graph(branches, report_name, git_path):
292
293 # The PDF document
294 pdf_pages = PdfPages(report_name)
295
296 client = get_client()
297 branch_results = dict()
298
299 # Fetch the results for each branch.
300 for branch, cutoff in branches.items():
301 commits = get_git_log(branch, cutoff, git_path)
302 results = []
303 with tempfile.TemporaryDirectory() as workdir:
304 for commit in commits:
305 b_results, valid = get_benchmark_results(client, commit, workdir)
306 if not b_results or not valid:
307 continue
308 results.append((commit, b_results))
309 branch_results[branch] = results
310
311 for b_type in BENCHMARK_TYPES:
312 latest_values = {}
313 max_len = 0
314
315 # Find the maximum size for a series inside our series dataset.
316 # This is used later to compute the size of the actual plot (pdf).
317 # While there gather the comparison value used to draw comparison line
318 # between branches.
319 for branch, results in branch_results.items():
320 max_len = max([max_len, len(results)])
321 if results:
322 latest_values[branch] = mean(
323 sanitize_dataset(results[-1][1][b_type])[0]
324 )
325 else:
326 latest_values[branch] = None
327
328 for branch, results in branch_results.items():
329 # Create a figure instance
330 if max_len and max_len > 10:
331 width = 0.16 * max_len
332 else:
333 width = 11.69
334
335 x_data = list(range(len(results)))
336 y_data = [c[1][b_type] for c in results]
337 labels = [c[0][:8] for c in results]
338
339 fig = plt.figure(figsize=(width, 8.27), dpi=100)
340 plot_raw_value(branch, b_type, x_data, y_data, labels, latest_values)
341 pdf_pages.savefig(fig)
342
343 # Use the mean of each sanitize dataset here, we do not care for
344 # variance for ratio. At least not yet.
345 y_data = [mean(sanitize_dataset(c[1][b_type])[0]) for c in results]
346 fig = plt.figure(figsize=(width, 8.27), dpi=100)
347 plot_ratio(branch, b_type, x_data, y_data, labels, latest_values)
348 pdf_pages.savefig(fig)
349
350 fig = plt.figure(figsize=(width, 8.27), dpi=100)
351 plot_delta_between_point(branch, b_type, x_data, y_data, labels, latest_values)
352 pdf_pages.savefig(fig)
353
354 pdf_pages.close()
355
356
357 def launch_jobs(branches, git_path, wait_for_completion, debug):
358 """
359 Lauch jobs for all missing results.
360 """
361 client = get_client()
362 for branch, cutoff in branches.items():
363 commits = get_git_log(branch, cutoff, git_path)
364
365 with tempfile.TemporaryDirectory() as workdir:
366 for commit in commits:
367 b_results = get_benchmark_results(client, commit, workdir)[0]
368 if b_results:
369 continue
370 lava_submit.submit(
371 commit, wait_for_completion=wait_for_completion, debug=debug
372 )
373
374
375 def main():
376 """
377 Parse arguments and execute as needed.
378 """
379 bt_branches = {
380 "master": "31976fe2d70a8b6b7f8b31b9e0b3bc004d415575",
381 "stable-2.0": "07f585356018b4ddfbd0e09c49a14e38977c6973",
382 "stable-1.5": "49e98b837a5667130e0d1e062a6bd7985c7c4582",
383 }
384
385 parser = argparse.ArgumentParser(description="Babeltrace benchmark utility")
386 parser.add_argument(
387 "--generate-jobs", action="store_true", help="Generate and send jobs"
388 )
389 parser.add_argument(
390 "--do-not-wait-on-completion",
391 action="store_true",
392 default=False,
393 help="Wait for the completion of each jobs sent. This is useful"
394 "for the ci. Otherwise we could end up spaming the lava instance.",
395 )
396 parser.add_argument(
397 "--generate-report",
398 action="store_true",
399 help="Generate graphs and save them to pdf",
400 )
401 parser.add_argument(
402 "--report-name", default="report.pdf", help="The name of the pdf report."
403 )
404 parser.add_argument(
405 "--debug", action="store_true", default=False, help="Do not send jobs to lava."
406 )
407 parser.add_argument(
408 "--repo-path", help="The location of the git repo to use.", required=True
409 )
410
411 args = parser.parse_args()
412
413 if not os.path.exists(args.repo_path):
414 print("Repository location does not exists.")
415 return 1
416
417 if args.generate_jobs:
418 print("Launching jobs for:")
419 for branch, cutoff in bt_branches.items():
420 print("\t Branch {} with cutoff {}".format(branch, cutoff))
421 launch_jobs(
422 bt_branches, args.repo_path, not args.do_not_wait_on_completion, args.debug
423 )
424
425 if args.generate_report:
426 print("Generating pdf report ({}) for:".format(args.report_name))
427 for branch, cutoff in bt_branches.items():
428 print("\t Branch {} with cutoff {}".format(branch, cutoff))
429 generate_graph(bt_branches, args.report_name, args.repo_path)
430
431 return 0
432
433
434 def sanitize_dataset(dataset):
435 """
436 Use IRQ 1.5 [1] to remove outlier from the dataset. This is useful to get a
437 representative mean without outlier in it.
438 [1] https://en.wikipedia.org/wiki/Interquartile_range#Outliers
439 """
440 sorted_data = sorted(dataset)
441 q1, q3 = numpy.percentile(sorted_data, [25, 75])
442 iqr = q3 - q1
443 lower_bound = q1 - (1.5 * iqr)
444 upper_bound = q3 + (1.5 * iqr)
445 new_dataset = []
446 outliers = []
447 for i in dataset:
448 if lower_bound <= i <= upper_bound:
449 new_dataset.append(i)
450 else:
451 outliers.append(i)
452 return new_dataset, outliers
453
454
455 if __name__ == "__main__":
456 sys.exit(main())
This page took 0.037937 seconds and 4 git commands to generate.