Fix: concurrent exec(2) file descriptor leak
[lttng-ust.git] / src / common / ringbuffer / shm.c
CommitLineData
1d498196 1/*
c0c0989a 2 * SPDX-License-Identifier: LGPL-2.1-only
1d498196 3 *
e92f3e28 4 * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
1d498196
MD
5 */
6
3fbec7dc 7#define _LGPL_SOURCE
1d498196
MD
8#include "shm.h"
9#include <unistd.h>
10#include <fcntl.h>
11#include <sys/mman.h>
a9ff648c 12#include <sys/types.h>
1d498196
MD
13#include <sys/stat.h> /* For mode constants */
14#include <fcntl.h> /* For O_* constants */
15#include <assert.h>
8da6cd6d
MD
16#include <stdio.h>
17#include <signal.h>
18#include <dirent.h>
96e80018 19#include <limits.h>
8a208943 20#include <stdbool.h>
fb31eb73 21#include <stdint.h>
3d3a2bb8 22
bfcda6ce 23#ifdef HAVE_LIBNUMA
4b68c31f 24#include <numa.h>
8a208943 25#include <numaif.h>
bfcda6ce 26#endif
3d3a2bb8 27
eae3c729 28#include <lttng/ust-utils.h>
3d3a2bb8 29
9d315d6d
MJ
30#include "common/macros.h"
31#include "common/ust-fd.h"
1be43539 32#include "common/compat/mmap.h"
3a81f31d
MD
33
34/*
35 * Ensure we have the required amount of space available by writing 0
36 * into the entire buffer. Not doing so can trigger SIGBUS when going
37 * beyond the available shm space.
38 */
39static
40int zero_file(int fd, size_t len)
41{
42 ssize_t retlen;
43 size_t written = 0;
44 char *zeropage;
45 long pagelen;
46 int ret;
47
48 pagelen = sysconf(_SC_PAGESIZE);
49 if (pagelen < 0)
50 return (int) pagelen;
51 zeropage = calloc(pagelen, 1);
52 if (!zeropage)
53 return -ENOMEM;
54
55 while (len > written) {
56 do {
57 retlen = write(fd, zeropage,
58 min_t(size_t, pagelen, len - written));
59 } while (retlen == -1UL && errno == EINTR);
60 if (retlen < 0) {
61 ret = (int) retlen;
62 goto error;
63 }
64 written += retlen;
65 }
66 ret = 0;
67error:
68 free(zeropage);
69 return ret;
70}
1d498196
MD
71
72struct shm_object_table *shm_object_table_create(size_t max_nb_obj)
73{
74 struct shm_object_table *table;
75
76 table = zmalloc(sizeof(struct shm_object_table) +
77 max_nb_obj * sizeof(table->objects[0]));
74d48abe
MD
78 if (!table)
79 return NULL;
1d498196
MD
80 table->size = max_nb_obj;
81 return table;
82}
83
74d81a6c
MD
84static
85struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
a9ff648c 86 size_t memory_map_size,
5ea386c3 87 int stream_fd)
1d498196 88{
5ea386c3 89 int shmfd, waitfd[2], ret, i;
1d498196
MD
90 struct shm_object *obj;
91 char *memory_map;
92
5ea386c3
MD
93 if (stream_fd < 0)
94 return NULL;
1d498196
MD
95 if (table->allocated_len >= table->size)
96 return NULL;
7a9c21bd 97 obj = &table->objects[table->allocated_len];
1d498196
MD
98
99 /* wait_fd: create pipe */
8a8c2117 100 ret = pipe2(waitfd, O_CLOEXEC);
1d498196
MD
101 if (ret < 0) {
102 PERROR("pipe");
103 goto error_pipe;
104 }
5d61a504
MD
105 /* The write end of the pipe needs to be non-blocking */
106 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
107 if (ret < 0) {
108 PERROR("fcntl");
109 goto error_fcntl;
110 }
7a9c21bd 111 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
1d498196 112
053e6e24
MJ
113 /*
114 * Set POSIX shared memory object size
115 *
116 * First, use ftruncate() to set its size, some implementations won't
117 * allow writes past the size set by ftruncate.
118 * Then, use write() to fill it with zeros, this allows us to fully
119 * allocate it and detect a shortage of shm space without dealing with
120 * a SIGBUS.
121 */
a9ff648c 122
5ea386c3 123 shmfd = stream_fd;
1d498196
MD
124 ret = ftruncate(shmfd, memory_map_size);
125 if (ret) {
126 PERROR("ftruncate");
127 goto error_ftruncate;
128 }
053e6e24
MJ
129 ret = zero_file(shmfd, memory_map_size);
130 if (ret) {
131 PERROR("zero_file");
132 goto error_zero_file;
133 }
71be0c53 134
d0f6cf57
MD
135 /*
136 * Also ensure the file metadata is synced with the storage by using
71be0c53
MJ
137 * fsync(2). Some platforms don't allow fsync on POSIX shm fds, ignore
138 * EINVAL accordingly.
d0f6cf57
MD
139 */
140 ret = fsync(shmfd);
71be0c53 141 if (ret && errno != EINVAL) {
d0f6cf57
MD
142 PERROR("fsync");
143 goto error_fsync;
144 }
5ea386c3 145 obj->shm_fd_ownership = 0;
1d498196
MD
146 obj->shm_fd = shmfd;
147
148 /* memory_map: mmap */
149 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
4d4838ba 150 MAP_SHARED | LTTNG_MAP_POPULATE, shmfd, 0);
1d498196
MD
151 if (memory_map == MAP_FAILED) {
152 PERROR("mmap");
153 goto error_mmap;
154 }
74d81a6c 155 obj->type = SHM_OBJECT_SHM;
1d498196
MD
156 obj->memory_map = memory_map;
157 obj->memory_map_size = memory_map_size;
158 obj->allocated_len = 0;
dc613eb9 159 obj->index = table->allocated_len++;
7a9c21bd 160
1d498196
MD
161 return obj;
162
163error_mmap:
d0f6cf57 164error_fsync:
1d498196 165error_ftruncate:
3a81f31d 166error_zero_file:
1d498196
MD
167error_fcntl:
168 for (i = 0; i < 2; i++) {
169 ret = close(waitfd[i]);
170 if (ret) {
171 PERROR("close");
172 assert(0);
173 }
174 }
175error_pipe:
1d498196 176 return NULL;
1d498196
MD
177}
178
74d81a6c
MD
179static
180struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table,
181 size_t memory_map_size)
182{
183 struct shm_object *obj;
184 void *memory_map;
ff0f5728 185 int waitfd[2], i, ret;
74d81a6c
MD
186
187 if (table->allocated_len >= table->size)
188 return NULL;
189 obj = &table->objects[table->allocated_len];
190
191 memory_map = zmalloc(memory_map_size);
192 if (!memory_map)
193 goto alloc_error;
194
ff0f5728 195 /* wait_fd: create pipe */
8a8c2117 196 ret = pipe2(waitfd, O_CLOEXEC);
ff0f5728
MD
197 if (ret < 0) {
198 PERROR("pipe");
199 goto error_pipe;
200 }
ff0f5728
MD
201 /* The write end of the pipe needs to be non-blocking */
202 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
203 if (ret < 0) {
204 PERROR("fcntl");
205 goto error_fcntl;
206 }
207 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
208
209 /* no shm_fd */
74d81a6c 210 obj->shm_fd = -1;
5ea386c3 211 obj->shm_fd_ownership = 0;
74d81a6c
MD
212
213 obj->type = SHM_OBJECT_MEM;
214 obj->memory_map = memory_map;
215 obj->memory_map_size = memory_map_size;
216 obj->allocated_len = 0;
217 obj->index = table->allocated_len++;
218
219 return obj;
220
ff0f5728
MD
221error_fcntl:
222 for (i = 0; i < 2; i++) {
223 ret = close(waitfd[i]);
224 if (ret) {
225 PERROR("close");
226 assert(0);
227 }
228 }
229error_pipe:
230 free(memory_map);
74d81a6c
MD
231alloc_error:
232 return NULL;
233}
234
8a208943
MD
235/*
236 * libnuma prints errors on the console even for numa_available().
237 * Work-around this limitation by using get_mempolicy() directly to
238 * check whether the kernel supports mempolicy.
239 */
240#ifdef HAVE_LIBNUMA
241static bool lttng_is_numa_available(void)
242{
243 int ret;
244
245 ret = get_mempolicy(NULL, NULL, 0, NULL, 0);
246 if (ret && errno == ENOSYS) {
247 return false;
248 }
249 return numa_available() > 0;
250}
251#endif
252
c494c0f1 253#ifdef HAVE_LIBNUMA
74d81a6c
MD
254struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
255 size_t memory_map_size,
a9ff648c 256 enum shm_object_type type,
4b68c31f
MD
257 int stream_fd,
258 int cpu)
c494c0f1
MJ
259#else
260struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
261 size_t memory_map_size,
262 enum shm_object_type type,
263 int stream_fd,
264 int cpu __attribute__((unused)))
265#endif
74d81a6c 266{
4b68c31f 267 struct shm_object *shm_object;
bfcda6ce 268#ifdef HAVE_LIBNUMA
8a208943
MD
269 int oldnode = 0, node;
270 bool numa_avail;
4b68c31f 271
8a208943
MD
272 numa_avail = lttng_is_numa_available();
273 if (numa_avail) {
274 oldnode = numa_preferred();
275 if (cpu >= 0) {
276 node = numa_node_of_cpu(cpu);
277 if (node >= 0)
278 numa_set_preferred(node);
279 }
280 if (cpu < 0 || node < 0)
281 numa_set_localalloc();
4b68c31f 282 }
bfcda6ce 283#endif /* HAVE_LIBNUMA */
74d81a6c
MD
284 switch (type) {
285 case SHM_OBJECT_SHM:
4b68c31f 286 shm_object = _shm_object_table_alloc_shm(table, memory_map_size,
5ea386c3 287 stream_fd);
4b68c31f 288 break;
74d81a6c 289 case SHM_OBJECT_MEM:
4b68c31f
MD
290 shm_object = _shm_object_table_alloc_mem(table, memory_map_size);
291 break;
74d81a6c
MD
292 default:
293 assert(0);
294 }
bfcda6ce 295#ifdef HAVE_LIBNUMA
8a208943
MD
296 if (numa_avail)
297 numa_set_preferred(oldnode);
bfcda6ce 298#endif /* HAVE_LIBNUMA */
4b68c31f 299 return shm_object;
74d81a6c
MD
300}
301
302struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
303 int shm_fd, int wakeup_fd, uint32_t stream_nr,
304 size_t memory_map_size)
193183fb
MD
305{
306 struct shm_object *obj;
307 char *memory_map;
74d81a6c 308 int ret;
193183fb
MD
309
310 if (table->allocated_len >= table->size)
311 return NULL;
74d81a6c
MD
312 /* streams _must_ be received in sequential order, else fail. */
313 if (stream_nr + 1 != table->allocated_len)
314 return NULL;
315
193183fb
MD
316 obj = &table->objects[table->allocated_len];
317
74d81a6c
MD
318 /* wait_fd: set write end of the pipe. */
319 obj->wait_fd[0] = -1; /* read end is unset */
320 obj->wait_fd[1] = wakeup_fd;
193183fb 321 obj->shm_fd = shm_fd;
5ea386c3 322 obj->shm_fd_ownership = 1;
193183fb 323
74d81a6c
MD
324 /* The write end of the pipe needs to be non-blocking */
325 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
326 if (ret < 0) {
327 PERROR("fcntl");
328 goto error_fcntl;
329 }
330
193183fb
MD
331 /* memory_map: mmap */
332 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
4d4838ba 333 MAP_SHARED | LTTNG_MAP_POPULATE, shm_fd, 0);
193183fb
MD
334 if (memory_map == MAP_FAILED) {
335 PERROR("mmap");
336 goto error_mmap;
337 }
74d81a6c 338 obj->type = SHM_OBJECT_SHM;
193183fb
MD
339 obj->memory_map = memory_map;
340 obj->memory_map_size = memory_map_size;
341 obj->allocated_len = memory_map_size;
342 obj->index = table->allocated_len++;
343
344 return obj;
345
74d81a6c 346error_fcntl:
193183fb
MD
347error_mmap:
348 return NULL;
349}
350
74d81a6c
MD
351/*
352 * Passing ownership of mem to object.
353 */
354struct shm_object *shm_object_table_append_mem(struct shm_object_table *table,
ff0f5728 355 void *mem, size_t memory_map_size, int wakeup_fd)
74d81a6c
MD
356{
357 struct shm_object *obj;
ff0f5728 358 int ret;
74d81a6c
MD
359
360 if (table->allocated_len >= table->size)
361 return NULL;
362 obj = &table->objects[table->allocated_len];
363
ff0f5728
MD
364 obj->wait_fd[0] = -1; /* read end is unset */
365 obj->wait_fd[1] = wakeup_fd;
74d81a6c 366 obj->shm_fd = -1;
5ea386c3 367 obj->shm_fd_ownership = 0;
74d81a6c 368
ff0f5728
MD
369 /* The write end of the pipe needs to be non-blocking */
370 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
371 if (ret < 0) {
372 PERROR("fcntl");
373 goto error_fcntl;
374 }
375
74d81a6c
MD
376 obj->type = SHM_OBJECT_MEM;
377 obj->memory_map = mem;
378 obj->memory_map_size = memory_map_size;
379 obj->allocated_len = memory_map_size;
380 obj->index = table->allocated_len++;
381
382 return obj;
ff0f5728
MD
383
384error_fcntl:
385 return NULL;
74d81a6c
MD
386}
387
1d498196 388static
6548fca4 389void shmp_object_destroy(struct shm_object *obj, int consumer)
1d498196 390{
74d81a6c
MD
391 switch (obj->type) {
392 case SHM_OBJECT_SHM:
393 {
394 int ret, i;
1d498196 395
7a784989
MD
396 ret = munmap(obj->memory_map, obj->memory_map_size);
397 if (ret) {
398 PERROR("umnmap");
399 assert(0);
400 }
6548fca4 401
5ea386c3 402 if (obj->shm_fd_ownership) {
6548fca4
MD
403 /* Delete FDs only if called from app (not consumer). */
404 if (!consumer) {
405 lttng_ust_lock_fd_tracker();
406 ret = close(obj->shm_fd);
407 if (!ret) {
408 lttng_ust_delete_fd_from_tracker(obj->shm_fd);
409 } else {
410 PERROR("close");
411 assert(0);
412 }
413 lttng_ust_unlock_fd_tracker();
414 } else {
415 ret = close(obj->shm_fd);
416 if (ret) {
417 PERROR("close");
418 assert(0);
419 }
a9ff648c
MD
420 }
421 }
74d81a6c
MD
422 for (i = 0; i < 2; i++) {
423 if (obj->wait_fd[i] < 0)
424 continue;
6548fca4
MD
425 if (!consumer) {
426 lttng_ust_lock_fd_tracker();
427 ret = close(obj->wait_fd[i]);
428 if (!ret) {
429 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
430 } else {
431 PERROR("close");
432 assert(0);
433 }
434 lttng_ust_unlock_fd_tracker();
435 } else {
436 ret = close(obj->wait_fd[i]);
437 if (ret) {
438 PERROR("close");
439 assert(0);
440 }
74d81a6c 441 }
1d498196 442 }
74d81a6c
MD
443 break;
444 }
445 case SHM_OBJECT_MEM:
ff0f5728
MD
446 {
447 int ret, i;
448
449 for (i = 0; i < 2; i++) {
450 if (obj->wait_fd[i] < 0)
451 continue;
6548fca4
MD
452 if (!consumer) {
453 lttng_ust_lock_fd_tracker();
454 ret = close(obj->wait_fd[i]);
455 if (!ret) {
456 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
457 } else {
458 PERROR("close");
459 assert(0);
460 }
461 lttng_ust_unlock_fd_tracker();
462 } else {
463 ret = close(obj->wait_fd[i]);
464 if (ret) {
465 PERROR("close");
466 assert(0);
467 }
ff0f5728
MD
468 }
469 }
74d81a6c
MD
470 free(obj->memory_map);
471 break;
ff0f5728 472 }
74d81a6c
MD
473 default:
474 assert(0);
1d498196
MD
475 }
476}
477
6548fca4 478void shm_object_table_destroy(struct shm_object_table *table, int consumer)
1d498196
MD
479{
480 int i;
481
482 for (i = 0; i < table->allocated_len; i++)
6548fca4 483 shmp_object_destroy(&table->objects[i], consumer);
1d498196
MD
484 free(table);
485}
486
487/*
488 * zalloc_shm - allocate memory within a shm object.
489 *
490 * Shared memory is already zeroed by shmget.
491 * *NOT* multithread-safe (should be protected by mutex).
492 * Returns a -1, -1 tuple on error.
493 */
494struct shm_ref zalloc_shm(struct shm_object *obj, size_t len)
495{
496 struct shm_ref ref;
497 struct shm_ref shm_ref_error = { -1, -1 };
498
499 if (obj->memory_map_size - obj->allocated_len < len)
500 return shm_ref_error;
501 ref.index = obj->index;
502 ref.offset = obj->allocated_len;
503 obj->allocated_len += len;
504 return ref;
505}
506
507void align_shm(struct shm_object *obj, size_t align)
508{
b72687b8 509 size_t offset_len = lttng_ust_offset_align(obj->allocated_len, align);
1d498196
MD
510 obj->allocated_len += offset_len;
511}
This page took 0.075893 seconds and 4 git commands to generate.