Fix: set FD_CLOEXEC on incoming FDs.
[lttng-ust.git] / libringbuffer / shm.c
CommitLineData
1d498196
MD
1/*
2 * libringbuffer/shm.c
3 *
e92f3e28 4 * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
1d498196 5 *
e92f3e28
MD
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; only
9 * version 2.1 of the License.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1d498196
MD
19 */
20
3fbec7dc 21#define _LGPL_SOURCE
bfcda6ce 22#include <config.h>
1d498196
MD
23#include "shm.h"
24#include <unistd.h>
25#include <fcntl.h>
26#include <sys/mman.h>
a9ff648c 27#include <sys/types.h>
1d498196
MD
28#include <sys/stat.h> /* For mode constants */
29#include <fcntl.h> /* For O_* constants */
30#include <assert.h>
8da6cd6d
MD
31#include <stdio.h>
32#include <signal.h>
33#include <dirent.h>
4318ae1b 34#include <lttng/align.h>
96e80018 35#include <limits.h>
8a208943 36#include <stdbool.h>
fb31eb73 37#include <stdint.h>
bfcda6ce 38#ifdef HAVE_LIBNUMA
4b68c31f 39#include <numa.h>
8a208943 40#include <numaif.h>
bfcda6ce 41#endif
3a81f31d 42#include <helper.h>
6548fca4 43#include <ust-fd.h>
4d4838ba 44#include "mmap.h"
3a81f31d
MD
45
46/*
47 * Ensure we have the required amount of space available by writing 0
48 * into the entire buffer. Not doing so can trigger SIGBUS when going
49 * beyond the available shm space.
50 */
51static
52int zero_file(int fd, size_t len)
53{
54 ssize_t retlen;
55 size_t written = 0;
56 char *zeropage;
57 long pagelen;
58 int ret;
59
60 pagelen = sysconf(_SC_PAGESIZE);
61 if (pagelen < 0)
62 return (int) pagelen;
63 zeropage = calloc(pagelen, 1);
64 if (!zeropage)
65 return -ENOMEM;
66
67 while (len > written) {
68 do {
69 retlen = write(fd, zeropage,
70 min_t(size_t, pagelen, len - written));
71 } while (retlen == -1UL && errno == EINTR);
72 if (retlen < 0) {
73 ret = (int) retlen;
74 goto error;
75 }
76 written += retlen;
77 }
78 ret = 0;
79error:
80 free(zeropage);
81 return ret;
82}
1d498196
MD
83
84struct shm_object_table *shm_object_table_create(size_t max_nb_obj)
85{
86 struct shm_object_table *table;
87
88 table = zmalloc(sizeof(struct shm_object_table) +
89 max_nb_obj * sizeof(table->objects[0]));
74d48abe
MD
90 if (!table)
91 return NULL;
1d498196
MD
92 table->size = max_nb_obj;
93 return table;
94}
95
74d81a6c
MD
96static
97struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
a9ff648c 98 size_t memory_map_size,
5ea386c3 99 int stream_fd)
1d498196 100{
5ea386c3 101 int shmfd, waitfd[2], ret, i;
1d498196
MD
102 struct shm_object *obj;
103 char *memory_map;
104
5ea386c3
MD
105 if (stream_fd < 0)
106 return NULL;
1d498196
MD
107 if (table->allocated_len >= table->size)
108 return NULL;
7a9c21bd 109 obj = &table->objects[table->allocated_len];
1d498196
MD
110
111 /* wait_fd: create pipe */
112 ret = pipe(waitfd);
113 if (ret < 0) {
114 PERROR("pipe");
115 goto error_pipe;
116 }
117 for (i = 0; i < 2; i++) {
118 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
119 if (ret < 0) {
120 PERROR("fcntl");
121 goto error_fcntl;
122 }
123 }
5d61a504
MD
124 /* The write end of the pipe needs to be non-blocking */
125 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
126 if (ret < 0) {
127 PERROR("fcntl");
128 goto error_fcntl;
129 }
7a9c21bd 130 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
1d498196 131
5ea386c3 132 /* create shm */
a9ff648c 133
5ea386c3 134 shmfd = stream_fd;
3a81f31d
MD
135 ret = zero_file(shmfd, memory_map_size);
136 if (ret) {
137 PERROR("zero_file");
138 goto error_zero_file;
139 }
1d498196
MD
140 ret = ftruncate(shmfd, memory_map_size);
141 if (ret) {
142 PERROR("ftruncate");
143 goto error_ftruncate;
144 }
d0f6cf57
MD
145 /*
146 * Also ensure the file metadata is synced with the storage by using
147 * fsync(2).
148 */
149 ret = fsync(shmfd);
150 if (ret) {
151 PERROR("fsync");
152 goto error_fsync;
153 }
5ea386c3 154 obj->shm_fd_ownership = 0;
1d498196
MD
155 obj->shm_fd = shmfd;
156
157 /* memory_map: mmap */
158 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
4d4838ba 159 MAP_SHARED | LTTNG_MAP_POPULATE, shmfd, 0);
1d498196
MD
160 if (memory_map == MAP_FAILED) {
161 PERROR("mmap");
162 goto error_mmap;
163 }
74d81a6c 164 obj->type = SHM_OBJECT_SHM;
1d498196
MD
165 obj->memory_map = memory_map;
166 obj->memory_map_size = memory_map_size;
167 obj->allocated_len = 0;
dc613eb9 168 obj->index = table->allocated_len++;
7a9c21bd 169
1d498196
MD
170 return obj;
171
172error_mmap:
d0f6cf57 173error_fsync:
1d498196 174error_ftruncate:
3a81f31d 175error_zero_file:
1d498196
MD
176error_fcntl:
177 for (i = 0; i < 2; i++) {
178 ret = close(waitfd[i]);
179 if (ret) {
180 PERROR("close");
181 assert(0);
182 }
183 }
184error_pipe:
1d498196 185 return NULL;
1d498196
MD
186}
187
74d81a6c
MD
188static
189struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table,
190 size_t memory_map_size)
191{
192 struct shm_object *obj;
193 void *memory_map;
ff0f5728 194 int waitfd[2], i, ret;
74d81a6c
MD
195
196 if (table->allocated_len >= table->size)
197 return NULL;
198 obj = &table->objects[table->allocated_len];
199
200 memory_map = zmalloc(memory_map_size);
201 if (!memory_map)
202 goto alloc_error;
203
ff0f5728
MD
204 /* wait_fd: create pipe */
205 ret = pipe(waitfd);
206 if (ret < 0) {
207 PERROR("pipe");
208 goto error_pipe;
209 }
210 for (i = 0; i < 2; i++) {
211 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
212 if (ret < 0) {
213 PERROR("fcntl");
214 goto error_fcntl;
215 }
216 }
217 /* The write end of the pipe needs to be non-blocking */
218 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
219 if (ret < 0) {
220 PERROR("fcntl");
221 goto error_fcntl;
222 }
223 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
224
225 /* no shm_fd */
74d81a6c 226 obj->shm_fd = -1;
5ea386c3 227 obj->shm_fd_ownership = 0;
74d81a6c
MD
228
229 obj->type = SHM_OBJECT_MEM;
230 obj->memory_map = memory_map;
231 obj->memory_map_size = memory_map_size;
232 obj->allocated_len = 0;
233 obj->index = table->allocated_len++;
234
235 return obj;
236
ff0f5728
MD
237error_fcntl:
238 for (i = 0; i < 2; i++) {
239 ret = close(waitfd[i]);
240 if (ret) {
241 PERROR("close");
242 assert(0);
243 }
244 }
245error_pipe:
246 free(memory_map);
74d81a6c
MD
247alloc_error:
248 return NULL;
249}
250
8a208943
MD
251/*
252 * libnuma prints errors on the console even for numa_available().
253 * Work-around this limitation by using get_mempolicy() directly to
254 * check whether the kernel supports mempolicy.
255 */
256#ifdef HAVE_LIBNUMA
257static bool lttng_is_numa_available(void)
258{
259 int ret;
260
261 ret = get_mempolicy(NULL, NULL, 0, NULL, 0);
262 if (ret && errno == ENOSYS) {
263 return false;
264 }
265 return numa_available() > 0;
266}
267#endif
268
74d81a6c
MD
269struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
270 size_t memory_map_size,
a9ff648c 271 enum shm_object_type type,
4b68c31f
MD
272 int stream_fd,
273 int cpu)
74d81a6c 274{
4b68c31f 275 struct shm_object *shm_object;
bfcda6ce 276#ifdef HAVE_LIBNUMA
8a208943
MD
277 int oldnode = 0, node;
278 bool numa_avail;
4b68c31f 279
8a208943
MD
280 numa_avail = lttng_is_numa_available();
281 if (numa_avail) {
282 oldnode = numa_preferred();
283 if (cpu >= 0) {
284 node = numa_node_of_cpu(cpu);
285 if (node >= 0)
286 numa_set_preferred(node);
287 }
288 if (cpu < 0 || node < 0)
289 numa_set_localalloc();
4b68c31f 290 }
bfcda6ce 291#endif /* HAVE_LIBNUMA */
74d81a6c
MD
292 switch (type) {
293 case SHM_OBJECT_SHM:
4b68c31f 294 shm_object = _shm_object_table_alloc_shm(table, memory_map_size,
5ea386c3 295 stream_fd);
4b68c31f 296 break;
74d81a6c 297 case SHM_OBJECT_MEM:
4b68c31f
MD
298 shm_object = _shm_object_table_alloc_mem(table, memory_map_size);
299 break;
74d81a6c
MD
300 default:
301 assert(0);
302 }
bfcda6ce 303#ifdef HAVE_LIBNUMA
8a208943
MD
304 if (numa_avail)
305 numa_set_preferred(oldnode);
bfcda6ce 306#endif /* HAVE_LIBNUMA */
4b68c31f 307 return shm_object;
74d81a6c
MD
308}
309
310struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
311 int shm_fd, int wakeup_fd, uint32_t stream_nr,
312 size_t memory_map_size)
193183fb
MD
313{
314 struct shm_object *obj;
315 char *memory_map;
74d81a6c 316 int ret;
193183fb
MD
317
318 if (table->allocated_len >= table->size)
319 return NULL;
74d81a6c
MD
320 /* streams _must_ be received in sequential order, else fail. */
321 if (stream_nr + 1 != table->allocated_len)
322 return NULL;
323
193183fb
MD
324 obj = &table->objects[table->allocated_len];
325
74d81a6c
MD
326 /* wait_fd: set write end of the pipe. */
327 obj->wait_fd[0] = -1; /* read end is unset */
328 obj->wait_fd[1] = wakeup_fd;
193183fb 329 obj->shm_fd = shm_fd;
5ea386c3 330 obj->shm_fd_ownership = 1;
193183fb 331
74d81a6c
MD
332 /* The write end of the pipe needs to be non-blocking */
333 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
334 if (ret < 0) {
335 PERROR("fcntl");
336 goto error_fcntl;
337 }
338
193183fb
MD
339 /* memory_map: mmap */
340 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
4d4838ba 341 MAP_SHARED | LTTNG_MAP_POPULATE, shm_fd, 0);
193183fb
MD
342 if (memory_map == MAP_FAILED) {
343 PERROR("mmap");
344 goto error_mmap;
345 }
74d81a6c 346 obj->type = SHM_OBJECT_SHM;
193183fb
MD
347 obj->memory_map = memory_map;
348 obj->memory_map_size = memory_map_size;
349 obj->allocated_len = memory_map_size;
350 obj->index = table->allocated_len++;
351
352 return obj;
353
74d81a6c 354error_fcntl:
193183fb
MD
355error_mmap:
356 return NULL;
357}
358
74d81a6c
MD
359/*
360 * Passing ownership of mem to object.
361 */
362struct shm_object *shm_object_table_append_mem(struct shm_object_table *table,
ff0f5728 363 void *mem, size_t memory_map_size, int wakeup_fd)
74d81a6c
MD
364{
365 struct shm_object *obj;
ff0f5728 366 int ret;
74d81a6c
MD
367
368 if (table->allocated_len >= table->size)
369 return NULL;
370 obj = &table->objects[table->allocated_len];
371
ff0f5728
MD
372 obj->wait_fd[0] = -1; /* read end is unset */
373 obj->wait_fd[1] = wakeup_fd;
74d81a6c 374 obj->shm_fd = -1;
5ea386c3 375 obj->shm_fd_ownership = 0;
74d81a6c 376
ff0f5728
MD
377 ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
378 if (ret < 0) {
379 PERROR("fcntl");
380 goto error_fcntl;
381 }
382 /* The write end of the pipe needs to be non-blocking */
383 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
384 if (ret < 0) {
385 PERROR("fcntl");
386 goto error_fcntl;
387 }
388
74d81a6c
MD
389 obj->type = SHM_OBJECT_MEM;
390 obj->memory_map = mem;
391 obj->memory_map_size = memory_map_size;
392 obj->allocated_len = memory_map_size;
393 obj->index = table->allocated_len++;
394
395 return obj;
ff0f5728
MD
396
397error_fcntl:
398 return NULL;
74d81a6c
MD
399}
400
1d498196 401static
6548fca4 402void shmp_object_destroy(struct shm_object *obj, int consumer)
1d498196 403{
74d81a6c
MD
404 switch (obj->type) {
405 case SHM_OBJECT_SHM:
406 {
407 int ret, i;
1d498196 408
7a784989
MD
409 ret = munmap(obj->memory_map, obj->memory_map_size);
410 if (ret) {
411 PERROR("umnmap");
412 assert(0);
413 }
6548fca4 414
5ea386c3 415 if (obj->shm_fd_ownership) {
6548fca4
MD
416 /* Delete FDs only if called from app (not consumer). */
417 if (!consumer) {
418 lttng_ust_lock_fd_tracker();
419 ret = close(obj->shm_fd);
420 if (!ret) {
421 lttng_ust_delete_fd_from_tracker(obj->shm_fd);
422 } else {
423 PERROR("close");
424 assert(0);
425 }
426 lttng_ust_unlock_fd_tracker();
427 } else {
428 ret = close(obj->shm_fd);
429 if (ret) {
430 PERROR("close");
431 assert(0);
432 }
a9ff648c
MD
433 }
434 }
74d81a6c
MD
435 for (i = 0; i < 2; i++) {
436 if (obj->wait_fd[i] < 0)
437 continue;
6548fca4
MD
438 if (!consumer) {
439 lttng_ust_lock_fd_tracker();
440 ret = close(obj->wait_fd[i]);
441 if (!ret) {
442 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
443 } else {
444 PERROR("close");
445 assert(0);
446 }
447 lttng_ust_unlock_fd_tracker();
448 } else {
449 ret = close(obj->wait_fd[i]);
450 if (ret) {
451 PERROR("close");
452 assert(0);
453 }
74d81a6c 454 }
1d498196 455 }
74d81a6c
MD
456 break;
457 }
458 case SHM_OBJECT_MEM:
ff0f5728
MD
459 {
460 int ret, i;
461
462 for (i = 0; i < 2; i++) {
463 if (obj->wait_fd[i] < 0)
464 continue;
6548fca4
MD
465 if (!consumer) {
466 lttng_ust_lock_fd_tracker();
467 ret = close(obj->wait_fd[i]);
468 if (!ret) {
469 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
470 } else {
471 PERROR("close");
472 assert(0);
473 }
474 lttng_ust_unlock_fd_tracker();
475 } else {
476 ret = close(obj->wait_fd[i]);
477 if (ret) {
478 PERROR("close");
479 assert(0);
480 }
ff0f5728
MD
481 }
482 }
74d81a6c
MD
483 free(obj->memory_map);
484 break;
ff0f5728 485 }
74d81a6c
MD
486 default:
487 assert(0);
1d498196
MD
488 }
489}
490
6548fca4 491void shm_object_table_destroy(struct shm_object_table *table, int consumer)
1d498196
MD
492{
493 int i;
494
495 for (i = 0; i < table->allocated_len; i++)
6548fca4 496 shmp_object_destroy(&table->objects[i], consumer);
1d498196
MD
497 free(table);
498}
499
500/*
501 * zalloc_shm - allocate memory within a shm object.
502 *
503 * Shared memory is already zeroed by shmget.
504 * *NOT* multithread-safe (should be protected by mutex).
505 * Returns a -1, -1 tuple on error.
506 */
507struct shm_ref zalloc_shm(struct shm_object *obj, size_t len)
508{
509 struct shm_ref ref;
510 struct shm_ref shm_ref_error = { -1, -1 };
511
512 if (obj->memory_map_size - obj->allocated_len < len)
513 return shm_ref_error;
514 ref.index = obj->index;
515 ref.offset = obj->allocated_len;
516 obj->allocated_len += len;
517 return ref;
518}
519
520void align_shm(struct shm_object *obj, size_t align)
521{
522 size_t offset_len = offset_align(obj->allocated_len, align);
523 obj->allocated_len += offset_len;
524}
This page took 0.053286 seconds and 4 git commands to generate.