Use MAP_POPULATE to reduce pagefault when available
[lttng-ust.git] / libringbuffer / shm.c
CommitLineData
1d498196
MD
1/*
2 * libringbuffer/shm.c
3 *
e92f3e28 4 * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
1d498196 5 *
e92f3e28
MD
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; only
9 * version 2.1 of the License.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1d498196
MD
19 */
20
3fbec7dc 21#define _LGPL_SOURCE
bfcda6ce 22#include <config.h>
1d498196
MD
23#include "shm.h"
24#include <unistd.h>
25#include <fcntl.h>
26#include <sys/mman.h>
a9ff648c 27#include <sys/types.h>
1d498196
MD
28#include <sys/stat.h> /* For mode constants */
29#include <fcntl.h> /* For O_* constants */
30#include <assert.h>
8da6cd6d
MD
31#include <stdio.h>
32#include <signal.h>
33#include <dirent.h>
4318ae1b 34#include <lttng/align.h>
96e80018 35#include <limits.h>
8a208943 36#include <stdbool.h>
bfcda6ce 37#ifdef HAVE_LIBNUMA
4b68c31f 38#include <numa.h>
8a208943 39#include <numaif.h>
bfcda6ce 40#endif
3a81f31d 41#include <helper.h>
6548fca4 42#include <ust-fd.h>
31dccd4e 43#include "mmap.h"
3a81f31d
MD
44
45/*
46 * Ensure we have the required amount of space available by writing 0
47 * into the entire buffer. Not doing so can trigger SIGBUS when going
48 * beyond the available shm space.
49 */
50static
51int zero_file(int fd, size_t len)
52{
53 ssize_t retlen;
54 size_t written = 0;
55 char *zeropage;
56 long pagelen;
57 int ret;
58
59 pagelen = sysconf(_SC_PAGESIZE);
60 if (pagelen < 0)
61 return (int) pagelen;
62 zeropage = calloc(pagelen, 1);
63 if (!zeropage)
64 return -ENOMEM;
65
66 while (len > written) {
67 do {
68 retlen = write(fd, zeropage,
69 min_t(size_t, pagelen, len - written));
70 } while (retlen == -1UL && errno == EINTR);
71 if (retlen < 0) {
72 ret = (int) retlen;
73 goto error;
74 }
75 written += retlen;
76 }
77 ret = 0;
78error:
79 free(zeropage);
80 return ret;
81}
1d498196
MD
82
83struct shm_object_table *shm_object_table_create(size_t max_nb_obj)
84{
85 struct shm_object_table *table;
86
87 table = zmalloc(sizeof(struct shm_object_table) +
88 max_nb_obj * sizeof(table->objects[0]));
74d48abe
MD
89 if (!table)
90 return NULL;
1d498196
MD
91 table->size = max_nb_obj;
92 return table;
93}
94
74d81a6c
MD
95static
96struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
a9ff648c 97 size_t memory_map_size,
5ea386c3 98 int stream_fd)
1d498196 99{
5ea386c3 100 int shmfd, waitfd[2], ret, i;
1d498196
MD
101 struct shm_object *obj;
102 char *memory_map;
103
5ea386c3
MD
104 if (stream_fd < 0)
105 return NULL;
1d498196
MD
106 if (table->allocated_len >= table->size)
107 return NULL;
7a9c21bd 108 obj = &table->objects[table->allocated_len];
1d498196
MD
109
110 /* wait_fd: create pipe */
111 ret = pipe(waitfd);
112 if (ret < 0) {
113 PERROR("pipe");
114 goto error_pipe;
115 }
116 for (i = 0; i < 2; i++) {
117 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
118 if (ret < 0) {
119 PERROR("fcntl");
120 goto error_fcntl;
121 }
122 }
5d61a504
MD
123 /* The write end of the pipe needs to be non-blocking */
124 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
125 if (ret < 0) {
126 PERROR("fcntl");
127 goto error_fcntl;
128 }
7a9c21bd 129 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
1d498196 130
5ea386c3 131 /* create shm */
a9ff648c 132
5ea386c3 133 shmfd = stream_fd;
3a81f31d
MD
134 ret = zero_file(shmfd, memory_map_size);
135 if (ret) {
136 PERROR("zero_file");
137 goto error_zero_file;
138 }
1d498196
MD
139 ret = ftruncate(shmfd, memory_map_size);
140 if (ret) {
141 PERROR("ftruncate");
142 goto error_ftruncate;
143 }
d0f6cf57
MD
144 /*
145 * Also ensure the file metadata is synced with the storage by using
146 * fsync(2).
147 */
148 ret = fsync(shmfd);
149 if (ret) {
150 PERROR("fsync");
151 goto error_fsync;
152 }
5ea386c3 153 obj->shm_fd_ownership = 0;
1d498196
MD
154 obj->shm_fd = shmfd;
155
156 /* memory_map: mmap */
157 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
31dccd4e 158 MAP_SHARED | LTTNG_MAP_POPULATE, shmfd, 0);
1d498196
MD
159 if (memory_map == MAP_FAILED) {
160 PERROR("mmap");
161 goto error_mmap;
162 }
74d81a6c 163 obj->type = SHM_OBJECT_SHM;
1d498196
MD
164 obj->memory_map = memory_map;
165 obj->memory_map_size = memory_map_size;
166 obj->allocated_len = 0;
dc613eb9 167 obj->index = table->allocated_len++;
7a9c21bd 168
1d498196
MD
169 return obj;
170
171error_mmap:
d0f6cf57 172error_fsync:
1d498196 173error_ftruncate:
3a81f31d 174error_zero_file:
1d498196
MD
175error_fcntl:
176 for (i = 0; i < 2; i++) {
177 ret = close(waitfd[i]);
178 if (ret) {
179 PERROR("close");
180 assert(0);
181 }
182 }
183error_pipe:
1d498196 184 return NULL;
1d498196
MD
185}
186
74d81a6c
MD
187static
188struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table,
189 size_t memory_map_size)
190{
191 struct shm_object *obj;
192 void *memory_map;
ff0f5728 193 int waitfd[2], i, ret;
74d81a6c
MD
194
195 if (table->allocated_len >= table->size)
196 return NULL;
197 obj = &table->objects[table->allocated_len];
198
199 memory_map = zmalloc(memory_map_size);
200 if (!memory_map)
201 goto alloc_error;
202
ff0f5728
MD
203 /* wait_fd: create pipe */
204 ret = pipe(waitfd);
205 if (ret < 0) {
206 PERROR("pipe");
207 goto error_pipe;
208 }
209 for (i = 0; i < 2; i++) {
210 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
211 if (ret < 0) {
212 PERROR("fcntl");
213 goto error_fcntl;
214 }
215 }
216 /* The write end of the pipe needs to be non-blocking */
217 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
218 if (ret < 0) {
219 PERROR("fcntl");
220 goto error_fcntl;
221 }
222 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
223
224 /* no shm_fd */
74d81a6c 225 obj->shm_fd = -1;
5ea386c3 226 obj->shm_fd_ownership = 0;
74d81a6c
MD
227
228 obj->type = SHM_OBJECT_MEM;
229 obj->memory_map = memory_map;
230 obj->memory_map_size = memory_map_size;
231 obj->allocated_len = 0;
232 obj->index = table->allocated_len++;
233
234 return obj;
235
ff0f5728
MD
236error_fcntl:
237 for (i = 0; i < 2; i++) {
238 ret = close(waitfd[i]);
239 if (ret) {
240 PERROR("close");
241 assert(0);
242 }
243 }
244error_pipe:
245 free(memory_map);
74d81a6c
MD
246alloc_error:
247 return NULL;
248}
249
8a208943
MD
250/*
251 * libnuma prints errors on the console even for numa_available().
252 * Work-around this limitation by using get_mempolicy() directly to
253 * check whether the kernel supports mempolicy.
254 */
255#ifdef HAVE_LIBNUMA
256static bool lttng_is_numa_available(void)
257{
258 int ret;
259
260 ret = get_mempolicy(NULL, NULL, 0, NULL, 0);
261 if (ret && errno == ENOSYS) {
262 return false;
263 }
264 return numa_available() > 0;
265}
266#endif
267
74d81a6c
MD
268struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
269 size_t memory_map_size,
a9ff648c 270 enum shm_object_type type,
4b68c31f
MD
271 int stream_fd,
272 int cpu)
74d81a6c 273{
4b68c31f 274 struct shm_object *shm_object;
bfcda6ce 275#ifdef HAVE_LIBNUMA
8a208943
MD
276 int oldnode = 0, node;
277 bool numa_avail;
4b68c31f 278
8a208943
MD
279 numa_avail = lttng_is_numa_available();
280 if (numa_avail) {
281 oldnode = numa_preferred();
282 if (cpu >= 0) {
283 node = numa_node_of_cpu(cpu);
284 if (node >= 0)
285 numa_set_preferred(node);
286 }
287 if (cpu < 0 || node < 0)
288 numa_set_localalloc();
4b68c31f 289 }
bfcda6ce 290#endif /* HAVE_LIBNUMA */
74d81a6c
MD
291 switch (type) {
292 case SHM_OBJECT_SHM:
4b68c31f 293 shm_object = _shm_object_table_alloc_shm(table, memory_map_size,
5ea386c3 294 stream_fd);
4b68c31f 295 break;
74d81a6c 296 case SHM_OBJECT_MEM:
4b68c31f
MD
297 shm_object = _shm_object_table_alloc_mem(table, memory_map_size);
298 break;
74d81a6c
MD
299 default:
300 assert(0);
301 }
bfcda6ce 302#ifdef HAVE_LIBNUMA
8a208943
MD
303 if (numa_avail)
304 numa_set_preferred(oldnode);
bfcda6ce 305#endif /* HAVE_LIBNUMA */
4b68c31f 306 return shm_object;
74d81a6c
MD
307}
308
309struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
310 int shm_fd, int wakeup_fd, uint32_t stream_nr,
311 size_t memory_map_size)
193183fb
MD
312{
313 struct shm_object *obj;
314 char *memory_map;
74d81a6c 315 int ret;
193183fb
MD
316
317 if (table->allocated_len >= table->size)
318 return NULL;
74d81a6c
MD
319 /* streams _must_ be received in sequential order, else fail. */
320 if (stream_nr + 1 != table->allocated_len)
321 return NULL;
322
193183fb
MD
323 obj = &table->objects[table->allocated_len];
324
74d81a6c
MD
325 /* wait_fd: set write end of the pipe. */
326 obj->wait_fd[0] = -1; /* read end is unset */
327 obj->wait_fd[1] = wakeup_fd;
193183fb 328 obj->shm_fd = shm_fd;
5ea386c3 329 obj->shm_fd_ownership = 1;
193183fb 330
74d81a6c
MD
331 ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
332 if (ret < 0) {
333 PERROR("fcntl");
334 goto error_fcntl;
335 }
336 /* The write end of the pipe needs to be non-blocking */
337 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
338 if (ret < 0) {
339 PERROR("fcntl");
340 goto error_fcntl;
341 }
342
193183fb
MD
343 /* memory_map: mmap */
344 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
31dccd4e 345 MAP_SHARED | LTTNG_MAP_POPULATE, shm_fd, 0);
193183fb
MD
346 if (memory_map == MAP_FAILED) {
347 PERROR("mmap");
348 goto error_mmap;
349 }
74d81a6c 350 obj->type = SHM_OBJECT_SHM;
193183fb
MD
351 obj->memory_map = memory_map;
352 obj->memory_map_size = memory_map_size;
353 obj->allocated_len = memory_map_size;
354 obj->index = table->allocated_len++;
355
356 return obj;
357
74d81a6c 358error_fcntl:
193183fb
MD
359error_mmap:
360 return NULL;
361}
362
74d81a6c
MD
363/*
364 * Passing ownership of mem to object.
365 */
366struct shm_object *shm_object_table_append_mem(struct shm_object_table *table,
ff0f5728 367 void *mem, size_t memory_map_size, int wakeup_fd)
74d81a6c
MD
368{
369 struct shm_object *obj;
ff0f5728 370 int ret;
74d81a6c
MD
371
372 if (table->allocated_len >= table->size)
373 return NULL;
374 obj = &table->objects[table->allocated_len];
375
ff0f5728
MD
376 obj->wait_fd[0] = -1; /* read end is unset */
377 obj->wait_fd[1] = wakeup_fd;
74d81a6c 378 obj->shm_fd = -1;
5ea386c3 379 obj->shm_fd_ownership = 0;
74d81a6c 380
ff0f5728
MD
381 ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
382 if (ret < 0) {
383 PERROR("fcntl");
384 goto error_fcntl;
385 }
386 /* The write end of the pipe needs to be non-blocking */
387 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
388 if (ret < 0) {
389 PERROR("fcntl");
390 goto error_fcntl;
391 }
392
74d81a6c
MD
393 obj->type = SHM_OBJECT_MEM;
394 obj->memory_map = mem;
395 obj->memory_map_size = memory_map_size;
396 obj->allocated_len = memory_map_size;
397 obj->index = table->allocated_len++;
398
399 return obj;
ff0f5728
MD
400
401error_fcntl:
402 return NULL;
74d81a6c
MD
403}
404
1d498196 405static
6548fca4 406void shmp_object_destroy(struct shm_object *obj, int consumer)
1d498196 407{
74d81a6c
MD
408 switch (obj->type) {
409 case SHM_OBJECT_SHM:
410 {
411 int ret, i;
1d498196 412
7a784989
MD
413 ret = munmap(obj->memory_map, obj->memory_map_size);
414 if (ret) {
415 PERROR("umnmap");
416 assert(0);
417 }
6548fca4 418
5ea386c3 419 if (obj->shm_fd_ownership) {
6548fca4
MD
420 /* Delete FDs only if called from app (not consumer). */
421 if (!consumer) {
422 lttng_ust_lock_fd_tracker();
423 ret = close(obj->shm_fd);
424 if (!ret) {
425 lttng_ust_delete_fd_from_tracker(obj->shm_fd);
426 } else {
427 PERROR("close");
428 assert(0);
429 }
430 lttng_ust_unlock_fd_tracker();
431 } else {
432 ret = close(obj->shm_fd);
433 if (ret) {
434 PERROR("close");
435 assert(0);
436 }
a9ff648c
MD
437 }
438 }
74d81a6c
MD
439 for (i = 0; i < 2; i++) {
440 if (obj->wait_fd[i] < 0)
441 continue;
6548fca4
MD
442 if (!consumer) {
443 lttng_ust_lock_fd_tracker();
444 ret = close(obj->wait_fd[i]);
445 if (!ret) {
446 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
447 } else {
448 PERROR("close");
449 assert(0);
450 }
451 lttng_ust_unlock_fd_tracker();
452 } else {
453 ret = close(obj->wait_fd[i]);
454 if (ret) {
455 PERROR("close");
456 assert(0);
457 }
74d81a6c 458 }
1d498196 459 }
74d81a6c
MD
460 break;
461 }
462 case SHM_OBJECT_MEM:
ff0f5728
MD
463 {
464 int ret, i;
465
466 for (i = 0; i < 2; i++) {
467 if (obj->wait_fd[i] < 0)
468 continue;
6548fca4
MD
469 if (!consumer) {
470 lttng_ust_lock_fd_tracker();
471 ret = close(obj->wait_fd[i]);
472 if (!ret) {
473 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
474 } else {
475 PERROR("close");
476 assert(0);
477 }
478 lttng_ust_unlock_fd_tracker();
479 } else {
480 ret = close(obj->wait_fd[i]);
481 if (ret) {
482 PERROR("close");
483 assert(0);
484 }
ff0f5728
MD
485 }
486 }
74d81a6c
MD
487 free(obj->memory_map);
488 break;
ff0f5728 489 }
74d81a6c
MD
490 default:
491 assert(0);
1d498196
MD
492 }
493}
494
6548fca4 495void shm_object_table_destroy(struct shm_object_table *table, int consumer)
1d498196
MD
496{
497 int i;
498
499 for (i = 0; i < table->allocated_len; i++)
6548fca4 500 shmp_object_destroy(&table->objects[i], consumer);
1d498196
MD
501 free(table);
502}
503
504/*
505 * zalloc_shm - allocate memory within a shm object.
506 *
507 * Shared memory is already zeroed by shmget.
508 * *NOT* multithread-safe (should be protected by mutex).
509 * Returns a -1, -1 tuple on error.
510 */
511struct shm_ref zalloc_shm(struct shm_object *obj, size_t len)
512{
513 struct shm_ref ref;
514 struct shm_ref shm_ref_error = { -1, -1 };
515
516 if (obj->memory_map_size - obj->allocated_len < len)
517 return shm_ref_error;
518 ref.index = obj->index;
519 ref.offset = obj->allocated_len;
520 obj->allocated_len += len;
521 return ref;
522}
523
524void align_shm(struct shm_object *obj, size_t align)
525{
526 size_t offset_len = offset_align(obj->allocated_len, align);
527 obj->allocated_len += offset_len;
528}
This page took 0.090538 seconds and 4 git commands to generate.