Cleanup: Move lib/ringbuffer/ headers to include/ringbuffer/
[lttng-modules.git] / lib / ringbuffer / ring_buffer_backend.c
index 8e1a796a55071abb6f388b2c9ed11cc34b7ed845..d6547d7de9c9cef6f45a6fb426c864a1b8ee3347 100644 (file)
@@ -1,21 +1,8 @@
-/*
+/* SPDX-License-Identifier: (GPL-2.0-only OR LGPL-2.1-only)
+ *
  * ring_buffer_backend.c
  *
  * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; only
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <linux/stddef.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/mm.h>
+#include <linux/vmalloc.h>
 
-#include "../../wrapper/vmalloc.h"     /* for wrapper_vmalloc_sync_all() */
-#include "../../wrapper/ringbuffer/config.h"
-#include "../../wrapper/ringbuffer/backend.h"
-#include "../../wrapper/ringbuffer/frontend.h"
+#include <wrapper/mm.h>
+#include <wrapper/vmalloc.h>   /* for wrapper_vmalloc_sync_mappings() */
+#include <ringbuffer/config.h>
+#include <ringbuffer/backend.h>
+#include <ringbuffer/frontend.h>
 
 /**
  * lib_ring_buffer_backend_allocate - allocate a channel buffer
@@ -52,10 +41,28 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
        unsigned long subbuf_size, mmap_offset = 0;
        unsigned long num_subbuf_alloc;
        struct page **pages;
-       void **virt;
        unsigned long i;
 
        num_pages = size >> PAGE_SHIFT;
+
+       /*
+        * Verify that there is enough free pages available on the system for
+        * the current allocation request.
+        * wrapper_check_enough_free_pages uses si_mem_available() if available
+        * and returns if there should be enough free pages based on the
+        * current estimate.
+        */
+       if (!wrapper_check_enough_free_pages(num_pages))
+               goto not_enough_pages;
+
+       /*
+        * Set the current user thread as the first target of the OOM killer.
+        * If the estimate received by si_mem_available() was off, and we do
+        * end up running out of memory because of this buffer allocation, we
+        * want to kill the offending app first.
+        */
+       wrapper_set_current_oom_origin();
+
        num_pages_per_subbuf = num_pages >> get_count_order(num_subbuf);
        subbuf_size = chanb->subbuf_size;
        num_subbuf_alloc = num_subbuf;
@@ -65,53 +72,49 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
                num_subbuf_alloc++;
        }
 
-       pages = kmalloc_node(ALIGN(sizeof(*pages) * num_pages,
+       pages = vmalloc_node(ALIGN(sizeof(*pages) * num_pages,
                                   1 << INTERNODE_CACHE_SHIFT),
-                       GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
+                       cpu_to_node(max(bufb->cpu, 0)));
        if (unlikely(!pages))
                goto pages_error;
 
-       virt = kmalloc_node(ALIGN(sizeof(*virt) * num_pages,
-                                 1 << INTERNODE_CACHE_SHIFT),
-                       GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
-       if (unlikely(!virt))
-               goto virt_error;
-
-       bufb->array = kmalloc_node(ALIGN(sizeof(*bufb->array)
+       bufb->array = lttng_kvmalloc_node(ALIGN(sizeof(*bufb->array)
                                         * num_subbuf_alloc,
                                  1 << INTERNODE_CACHE_SHIFT),
-                       GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
+                       GFP_KERNEL | __GFP_NOWARN,
+                       cpu_to_node(max(bufb->cpu, 0)));
        if (unlikely(!bufb->array))
                goto array_error;
 
        for (i = 0; i < num_pages; i++) {
                pages[i] = alloc_pages_node(cpu_to_node(max(bufb->cpu, 0)),
-                                           GFP_KERNEL | __GFP_ZERO, 0);
+                               GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 0);
                if (unlikely(!pages[i]))
                        goto depopulate;
-               virt[i] = page_address(pages[i]);
        }
        bufb->num_pages_per_subbuf = num_pages_per_subbuf;
 
        /* Allocate backend pages array elements */
        for (i = 0; i < num_subbuf_alloc; i++) {
                bufb->array[i] =
-                       kzalloc_node(ALIGN(
+                       lttng_kvzalloc_node(ALIGN(
                                sizeof(struct lib_ring_buffer_backend_pages) +
                                sizeof(struct lib_ring_buffer_backend_page)
                                * num_pages_per_subbuf,
                                1 << INTERNODE_CACHE_SHIFT),
-                               GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
+                               GFP_KERNEL | __GFP_NOWARN,
+                               cpu_to_node(max(bufb->cpu, 0)));
                if (!bufb->array[i])
                        goto free_array;
        }
 
        /* Allocate write-side subbuffer table */
-       bufb->buf_wsb = kzalloc_node(ALIGN(
+       bufb->buf_wsb = lttng_kvzalloc_node(ALIGN(
                                sizeof(struct lib_ring_buffer_backend_subbuffer)
                                * num_subbuf,
                                1 << INTERNODE_CACHE_SHIFT),
-                               GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
+                               GFP_KERNEL | __GFP_NOWARN,
+                               cpu_to_node(max(bufb->cpu, 0)));
        if (unlikely(!bufb->buf_wsb))
                goto free_array;
 
@@ -125,12 +128,22 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
        else
                bufb->buf_rsb.id = subbuffer_id(config, 0, 1, 0);
 
+       /* Allocate subbuffer packet counter table */
+       bufb->buf_cnt = lttng_kvzalloc_node(ALIGN(
+                               sizeof(struct lib_ring_buffer_backend_counts)
+                               * num_subbuf,
+                               1 << INTERNODE_CACHE_SHIFT),
+                       GFP_KERNEL | __GFP_NOWARN,
+                       cpu_to_node(max(bufb->cpu, 0)));
+       if (unlikely(!bufb->buf_cnt))
+               goto free_wsb;
+
        /* Assign pages to page index */
        for (i = 0; i < num_subbuf_alloc; i++) {
                for (j = 0; j < num_pages_per_subbuf; j++) {
                        CHAN_WARN_ON(chanb, page_idx > num_pages);
-                       bufb->array[i]->p[j].virt = virt[page_idx];
-                       bufb->array[i]->p[j].page = pages[page_idx];
+                       bufb->array[i]->p[j].virt = page_address(pages[page_idx]);
+                       bufb->array[i]->p[j].pfn = page_to_pfn(pages[page_idx]);
                        page_idx++;
                }
                if (config->output == RING_BUFFER_MMAP) {
@@ -143,24 +156,26 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
         * If kmalloc ever uses vmalloc underneath, make sure the buffer pages
         * will not fault.
         */
-       wrapper_vmalloc_sync_all();
-       kfree(virt);
-       kfree(pages);
+       wrapper_vmalloc_sync_mappings();
+       wrapper_clear_current_oom_origin();
+       vfree(pages);
        return 0;
 
+free_wsb:
+       lttng_kvfree(bufb->buf_wsb);
 free_array:
        for (i = 0; (i < num_subbuf_alloc && bufb->array[i]); i++)
-               kfree(bufb->array[i]);
+               lttng_kvfree(bufb->array[i]);
 depopulate:
        /* Free all allocated pages */
        for (i = 0; (i < num_pages && pages[i]); i++)
                __free_page(pages[i]);
-       kfree(bufb->array);
+       lttng_kvfree(bufb->array);
 array_error:
-       kfree(virt);
-virt_error:
-       kfree(pages);
+       vfree(pages);
 pages_error:
+       wrapper_clear_current_oom_origin();
+not_enough_pages:
        return -ENOMEM;
 }
 
@@ -186,13 +201,14 @@ void lib_ring_buffer_backend_free(struct lib_ring_buffer_backend *bufb)
        if (chanb->extra_reader_sb)
                num_subbuf_alloc++;
 
-       kfree(bufb->buf_wsb);
+       lttng_kvfree(bufb->buf_wsb);
+       lttng_kvfree(bufb->buf_cnt);
        for (i = 0; i < num_subbuf_alloc; i++) {
                for (j = 0; j < bufb->num_pages_per_subbuf; j++)
-                       __free_page(bufb->array[i]->p[j].page);
-               kfree(bufb->array[i]);
+                       __free_page(pfn_to_page(bufb->array[i]->p[j].pfn));
+               lttng_kvfree(bufb->array[i]);
        }
-       kfree(bufb->array);
+       lttng_kvfree(bufb->array);
        bufb->allocated = 0;
 }
 
@@ -243,7 +259,42 @@ void channel_backend_reset(struct channel_backend *chanb)
        chanb->start_tsc = config->cb.ring_buffer_clock_read(chan);
 }
 
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+
+/*
+ * No need to implement a "dead" callback to do a buffer switch here,
+ * because it will happen when tracing is stopped, or will be done by
+ * switch timer CPU DEAD callback.
+ * We don't free buffers when CPU go away, because it would make trace
+ * data vanish, which is unwanted.
+ */
+int lttng_cpuhp_rb_backend_prepare(unsigned int cpu,
+               struct lttng_cpuhp_node *node)
+{
+       struct channel_backend *chanb = container_of(node,
+                       struct channel_backend, cpuhp_prepare);
+       const struct lib_ring_buffer_config *config = &chanb->config;
+       struct lib_ring_buffer *buf;
+       int ret;
+
+       CHAN_WARN_ON(chanb, config->alloc == RING_BUFFER_ALLOC_GLOBAL);
+
+       buf = per_cpu_ptr(chanb->buf, cpu);
+       ret = lib_ring_buffer_create(buf, chanb, cpu);
+       if (ret) {
+               printk(KERN_ERR
+                 "ring_buffer_cpu_hp_callback: cpu %d "
+                 "buffer creation failed\n", cpu);
+               return ret;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(lttng_cpuhp_rb_backend_prepare);
+
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+
 #ifdef CONFIG_HOTPLUG_CPU
+
 /**
  *     lib_ring_buffer_cpu_hp_callback - CPU hotplug callback
  *     @nb: notifier block
@@ -287,8 +338,11 @@ int lib_ring_buffer_cpu_hp_callback(struct notifier_block *nb,
        }
        return NOTIFY_OK;
 }
+
 #endif
 
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+
 /**
  * channel_backend_init - initialize a channel backend
  * @chanb: channel backend
@@ -331,6 +385,12 @@ int channel_backend_init(struct channel_backend *chanb,
                return -EINVAL;
        if (!num_subbuf || (num_subbuf & (num_subbuf - 1)))
                return -EINVAL;
+       /*
+        * Overwrite mode buffers require at least 2 subbuffers per
+        * buffer.
+        */
+       if (config->mode == RING_BUFFER_OVERWRITE && num_subbuf < 2)
+               return -EINVAL;
 
        ret = subbuffer_id_check_index(config, num_subbuf);
        if (ret)
@@ -359,39 +419,50 @@ int channel_backend_init(struct channel_backend *chanb,
                if (!chanb->buf)
                        goto free_cpumask;
 
-               /*
-                * In case of non-hotplug cpu, if the ring-buffer is allocated
-                * in early initcall, it will not be notified of secondary cpus.
-                * In that off case, we need to allocate for all possible cpus.
-                */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+               chanb->cpuhp_prepare.component = LTTNG_RING_BUFFER_BACKEND;
+               ret = cpuhp_state_add_instance(lttng_rb_hp_prepare,
+                       &chanb->cpuhp_prepare.node);
+               if (ret)
+                       goto free_bufs;
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+
+               {
+                       /*
+                        * In case of non-hotplug cpu, if the ring-buffer is allocated
+                        * in early initcall, it will not be notified of secondary cpus.
+                        * In that off case, we need to allocate for all possible cpus.
+                        */
 #ifdef CONFIG_HOTPLUG_CPU
-               /*
-                * buf->backend.allocated test takes care of concurrent CPU
-                * hotplug.
-                * Priority higher than frontend, so we create the ring buffer
-                * before we start the timer.
-                */
-               chanb->cpu_hp_notifier.notifier_call =
-                               lib_ring_buffer_cpu_hp_callback;
-               chanb->cpu_hp_notifier.priority = 5;
-               register_hotcpu_notifier(&chanb->cpu_hp_notifier);
-
-               get_online_cpus();
-               for_each_online_cpu(i) {
-                       ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
-                                                chanb, i);
-                       if (ret)
-                               goto free_bufs; /* cpu hotplug locked */
-               }
-               put_online_cpus();
+                       /*
+                        * buf->backend.allocated test takes care of concurrent CPU
+                        * hotplug.
+                        * Priority higher than frontend, so we create the ring buffer
+                        * before we start the timer.
+                        */
+                       chanb->cpu_hp_notifier.notifier_call =
+                                       lib_ring_buffer_cpu_hp_callback;
+                       chanb->cpu_hp_notifier.priority = 5;
+                       register_hotcpu_notifier(&chanb->cpu_hp_notifier);
+
+                       get_online_cpus();
+                       for_each_online_cpu(i) {
+                               ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
+                                                        chanb, i);
+                               if (ret)
+                                       goto free_bufs; /* cpu hotplug locked */
+                       }
+                       put_online_cpus();
 #else
-               for_each_possible_cpu(i) {
-                       ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
-                                                chanb, i);
-                       if (ret)
-                               goto free_bufs; /* cpu hotplug locked */
-               }
+                       for_each_possible_cpu(i) {
+                               ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
+                                                        chanb, i);
+                               if (ret)
+                                       goto free_bufs;
+                       }
 #endif
+               }
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
        } else {
                chanb->buf = kzalloc(sizeof(struct lib_ring_buffer), GFP_KERNEL);
                if (!chanb->buf)
@@ -406,16 +477,26 @@ int channel_backend_init(struct channel_backend *chanb,
 
 free_bufs:
        if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+               /*
+                * Teardown of lttng_rb_hp_prepare instance
+                * on "add" error is handled within cpu hotplug,
+                * no teardown to do from the caller.
+                */
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+#ifdef CONFIG_HOTPLUG_CPU
+               put_online_cpus();
+               unregister_hotcpu_notifier(&chanb->cpu_hp_notifier);
+#endif
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
                for_each_possible_cpu(i) {
-                       struct lib_ring_buffer *buf = per_cpu_ptr(chanb->buf, i);
+                       struct lib_ring_buffer *buf =
+                               per_cpu_ptr(chanb->buf, i);
 
                        if (!buf->backend.allocated)
                                continue;
                        lib_ring_buffer_free(buf);
                }
-#ifdef CONFIG_HOTPLUG_CPU
-               put_online_cpus();
-#endif
                free_percpu(chanb->buf);
        } else
                kfree(chanb->buf);
@@ -435,8 +516,17 @@ void channel_backend_unregister_notifiers(struct channel_backend *chanb)
 {
        const struct lib_ring_buffer_config *config = &chanb->config;
 
-       if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
+       if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+               int ret;
+
+               ret = cpuhp_state_remove_instance(lttng_rb_hp_prepare,
+                               &chanb->cpuhp_prepare.node);
+               WARN_ON(ret);
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
                unregister_hotcpu_notifier(&chanb->cpu_hp_notifier);
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+       }
 }
 
 /**
@@ -478,7 +568,7 @@ void channel_backend_free(struct channel_backend *chanb)
  * @pagecpy : page size copied so far
  */
 void _lib_ring_buffer_write(struct lib_ring_buffer_backend *bufb, size_t offset,
-                           const void *src, size_t len, ssize_t pagecpy)
+                           const void *src, size_t len, size_t pagecpy)
 {
        struct channel_backend *chanb = &bufb->chan->backend;
        const struct lib_ring_buffer_config *config = &chanb->config;
@@ -524,7 +614,7 @@ EXPORT_SYMBOL_GPL(_lib_ring_buffer_write);
  */
 void _lib_ring_buffer_memset(struct lib_ring_buffer_backend *bufb,
                             size_t offset,
-                            int c, size_t len, ssize_t pagecpy)
+                            int c, size_t len, size_t pagecpy)
 {
        struct channel_backend *chanb = &bufb->chan->backend;
        const struct lib_ring_buffer_config *config = &chanb->config;
@@ -654,7 +744,7 @@ EXPORT_SYMBOL_GPL(_lib_ring_buffer_strcpy);
 void _lib_ring_buffer_copy_from_user_inatomic(struct lib_ring_buffer_backend *bufb,
                                      size_t offset,
                                      const void __user *src, size_t len,
-                                     ssize_t pagecpy)
+                                     size_t pagecpy)
 {
        struct channel_backend *chanb = &bufb->chan->backend;
        const struct lib_ring_buffer_config *config = &chanb->config;
@@ -686,8 +776,7 @@ void _lib_ring_buffer_copy_from_user_inatomic(struct lib_ring_buffer_backend *bu
                                                        + (offset & ~PAGE_MASK),
                                                        src, pagecpy) != 0;
                if (ret > 0) {
-                       offset += (pagecpy - ret);
-                       len -= (pagecpy - ret);
+                       /* Copy failed. */
                        _lib_ring_buffer_memset(bufb, offset, 0, len, 0);
                        break; /* stop copy */
                }
@@ -795,8 +884,7 @@ size_t lib_ring_buffer_read(struct lib_ring_buffer_backend *bufb, size_t offset,
 {
        struct channel_backend *chanb = &bufb->chan->backend;
        const struct lib_ring_buffer_config *config = &chanb->config;
-       size_t index;
-       ssize_t pagecpy, orig_len;
+       size_t index, pagecpy, orig_len;
        struct lib_ring_buffer_backend_pages *rpages;
        unsigned long sb_bindex, id;
 
@@ -944,15 +1032,15 @@ int lib_ring_buffer_read_cstr(struct lib_ring_buffer_backend *bufb, size_t offse
 EXPORT_SYMBOL_GPL(lib_ring_buffer_read_cstr);
 
 /**
- * lib_ring_buffer_read_get_page - Get a whole page to read from
+ * lib_ring_buffer_read_get_pfn - Get a page frame number to read from
  * @bufb : buffer backend
  * @offset : offset within the buffer
  * @virt : pointer to page address (output)
  *
  * Should be protected by get_subbuf/put_subbuf.
- * Returns the pointer to the page struct pointer.
+ * Returns the pointer to the page frame number unsigned long.
  */
-struct page **lib_ring_buffer_read_get_page(struct lib_ring_buffer_backend *bufb,
+unsigned long *lib_ring_buffer_read_get_pfn(struct lib_ring_buffer_backend *bufb,
                                            size_t offset, void ***virt)
 {
        size_t index;
@@ -969,9 +1057,9 @@ struct page **lib_ring_buffer_read_get_page(struct lib_ring_buffer_backend *bufb
        CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
                     && subbuffer_id_is_noref(config, id));
        *virt = &rpages->p[index].virt;
-       return &rpages->p[index].page;
+       return &rpages->p[index].pfn;
 }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_read_get_page);
+EXPORT_SYMBOL_GPL(lib_ring_buffer_read_get_pfn);
 
 /**
  * lib_ring_buffer_read_offset_address - get address of a buffer location
This page took 0.030034 seconds and 4 git commands to generate.