Cleanup: Move lib/ringbuffer/ headers to include/ringbuffer/

[lttng-modules.git] / lib / ringbuffer / ring_buffer_backend.c
diff --git a/lib/ringbuffer/ring_buffer_backend.c b/lib/ringbuffer/ring_buffer_backend.c

index 8e1a796a55071abb6f388b2c9ed11cc34b7ed845..d6547d7de9c9cef6f45a6fb426c864a1b8ee3347 100644 (file)
--- a/lib/ringbuffer/ring_buffer_backend.c
+++ b/lib/ringbuffer/ring_buffer_backend.c
@@ -1,21 +1,8 @@
-/*
+/* SPDX-License-Identifier: (GPL-2.0-only OR LGPL-2.1-only)
+ *
   * ring_buffer_backend.c
   *
   * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; only
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
  #include <linux/stddef.h>
@@ -27,11 +14,13 @@
  #include <linux/slab.h>
  #include <linux/cpu.h>
  #include <linux/mm.h>
+#include <linux/vmalloc.h>
  
-#include "../../wrapper/vmalloc.h"     /* for wrapper_vmalloc_sync_all() */
-#include "../../wrapper/ringbuffer/config.h"
-#include "../../wrapper/ringbuffer/backend.h"
-#include "../../wrapper/ringbuffer/frontend.h"
+#include <wrapper/mm.h>
+#include <wrapper/vmalloc.h>   /* for wrapper_vmalloc_sync_mappings() */
+#include <ringbuffer/config.h>
+#include <ringbuffer/backend.h>
+#include <ringbuffer/frontend.h>
  
  /**
   * lib_ring_buffer_backend_allocate - allocate a channel buffer
@@ -52,10 +41,28 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
         unsigned long subbuf_size, mmap_offset = 0;
         unsigned long num_subbuf_alloc;
         struct page **pages;
-       void **virt;
         unsigned long i;
  
         num_pages = size >> PAGE_SHIFT;
+
+       /*
+        * Verify that there is enough free pages available on the system for
+        * the current allocation request.
+        * wrapper_check_enough_free_pages uses si_mem_available() if available
+        * and returns if there should be enough free pages based on the
+        * current estimate.
+        */
+       if (!wrapper_check_enough_free_pages(num_pages))
+               goto not_enough_pages;
+
+       /*
+        * Set the current user thread as the first target of the OOM killer.
+        * If the estimate received by si_mem_available() was off, and we do
+        * end up running out of memory because of this buffer allocation, we
+        * want to kill the offending app first.
+        */
+       wrapper_set_current_oom_origin();
+
         num_pages_per_subbuf = num_pages >> get_count_order(num_subbuf);
         subbuf_size = chanb->subbuf_size;
         num_subbuf_alloc = num_subbuf;
@@ -65,53 +72,49 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
                 num_subbuf_alloc++;
         }
  
-       pages = kmalloc_node(ALIGN(sizeof(*pages) * num_pages,
+       pages = vmalloc_node(ALIGN(sizeof(*pages) * num_pages,
                                    1 << INTERNODE_CACHE_SHIFT),
-                       GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
+                       cpu_to_node(max(bufb->cpu, 0)));
         if (unlikely(!pages))
                 goto pages_error;
  
-       virt = kmalloc_node(ALIGN(sizeof(*virt) * num_pages,
-                                 1 << INTERNODE_CACHE_SHIFT),
-                       GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
-       if (unlikely(!virt))
-               goto virt_error;
-
-       bufb->array = kmalloc_node(ALIGN(sizeof(*bufb->array)
+       bufb->array = lttng_kvmalloc_node(ALIGN(sizeof(*bufb->array)
                                          * num_subbuf_alloc,
                                   1 << INTERNODE_CACHE_SHIFT),
-                       GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
+                       GFP_KERNEL | __GFP_NOWARN,
+                       cpu_to_node(max(bufb->cpu, 0)));
         if (unlikely(!bufb->array))
                 goto array_error;
  
         for (i = 0; i < num_pages; i++) {
                 pages[i] = alloc_pages_node(cpu_to_node(max(bufb->cpu, 0)),
-                                           GFP_KERNEL | __GFP_ZERO, 0);
+                               GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 0);
                 if (unlikely(!pages[i]))
                         goto depopulate;
-               virt[i] = page_address(pages[i]);
         }
         bufb->num_pages_per_subbuf = num_pages_per_subbuf;
  
         /* Allocate backend pages array elements */
         for (i = 0; i < num_subbuf_alloc; i++) {
                 bufb->array[i] =
-                       kzalloc_node(ALIGN(
+                       lttng_kvzalloc_node(ALIGN(
                                 sizeof(struct lib_ring_buffer_backend_pages) +
                                 sizeof(struct lib_ring_buffer_backend_page)
                                 * num_pages_per_subbuf,
                                 1 << INTERNODE_CACHE_SHIFT),
-                               GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
+                               GFP_KERNEL | __GFP_NOWARN,
+                               cpu_to_node(max(bufb->cpu, 0)));
                 if (!bufb->array[i])
                         goto free_array;
         }
  
         /* Allocate write-side subbuffer table */
-       bufb->buf_wsb = kzalloc_node(ALIGN(
+       bufb->buf_wsb = lttng_kvzalloc_node(ALIGN(
                                 sizeof(struct lib_ring_buffer_backend_subbuffer)
                                 * num_subbuf,
                                 1 << INTERNODE_CACHE_SHIFT),
-                               GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
+                               GFP_KERNEL | __GFP_NOWARN,
+                               cpu_to_node(max(bufb->cpu, 0)));
         if (unlikely(!bufb->buf_wsb))
                 goto free_array;
  
@@ -125,12 +128,22 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
         else
                 bufb->buf_rsb.id = subbuffer_id(config, 0, 1, 0);
  
+       /* Allocate subbuffer packet counter table */
+       bufb->buf_cnt = lttng_kvzalloc_node(ALIGN(
+                               sizeof(struct lib_ring_buffer_backend_counts)
+                               * num_subbuf,
+                               1 << INTERNODE_CACHE_SHIFT),
+                       GFP_KERNEL | __GFP_NOWARN,
+                       cpu_to_node(max(bufb->cpu, 0)));
+       if (unlikely(!bufb->buf_cnt))
+               goto free_wsb;
+
         /* Assign pages to page index */
         for (i = 0; i < num_subbuf_alloc; i++) {
                 for (j = 0; j < num_pages_per_subbuf; j++) {
                         CHAN_WARN_ON(chanb, page_idx > num_pages);
-                       bufb->array[i]->p[j].virt = virt[page_idx];
-                       bufb->array[i]->p[j].page = pages[page_idx];
+                       bufb->array[i]->p[j].virt = page_address(pages[page_idx]);
+                       bufb->array[i]->p[j].pfn = page_to_pfn(pages[page_idx]);
                         page_idx++;
                 }
                 if (config->output == RING_BUFFER_MMAP) {
@@ -143,24 +156,26 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
          * If kmalloc ever uses vmalloc underneath, make sure the buffer pages
          * will not fault.
          */
-       wrapper_vmalloc_sync_all();
-       kfree(virt);
-       kfree(pages);
+       wrapper_vmalloc_sync_mappings();
+       wrapper_clear_current_oom_origin();
+       vfree(pages);
         return 0;
  
+free_wsb:
+       lttng_kvfree(bufb->buf_wsb);
  free_array:
         for (i = 0; (i < num_subbuf_alloc && bufb->array[i]); i++)
-               kfree(bufb->array[i]);
+               lttng_kvfree(bufb->array[i]);
  depopulate:
         /* Free all allocated pages */
         for (i = 0; (i < num_pages && pages[i]); i++)
                 __free_page(pages[i]);
-       kfree(bufb->array);
+       lttng_kvfree(bufb->array);
  array_error:
-       kfree(virt);
-virt_error:
-       kfree(pages);
+       vfree(pages);
  pages_error:
+       wrapper_clear_current_oom_origin();
+not_enough_pages:
         return -ENOMEM;
  }
  
@@ -186,13 +201,14 @@ void lib_ring_buffer_backend_free(struct lib_ring_buffer_backend *bufb)
         if (chanb->extra_reader_sb)
                 num_subbuf_alloc++;
  
-       kfree(bufb->buf_wsb);
+       lttng_kvfree(bufb->buf_wsb);
+       lttng_kvfree(bufb->buf_cnt);
         for (i = 0; i < num_subbuf_alloc; i++) {
                 for (j = 0; j < bufb->num_pages_per_subbuf; j++)
-                       __free_page(bufb->array[i]->p[j].page);
-               kfree(bufb->array[i]);
+                       __free_page(pfn_to_page(bufb->array[i]->p[j].pfn));
+               lttng_kvfree(bufb->array[i]);
         }
-       kfree(bufb->array);
+       lttng_kvfree(bufb->array);
         bufb->allocated = 0;
  }
  
@@ -243,7 +259,42 @@ void channel_backend_reset(struct channel_backend *chanb)
         chanb->start_tsc = config->cb.ring_buffer_clock_read(chan);
  }
  
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+
+/*
+ * No need to implement a "dead" callback to do a buffer switch here,
+ * because it will happen when tracing is stopped, or will be done by
+ * switch timer CPU DEAD callback.
+ * We don't free buffers when CPU go away, because it would make trace
+ * data vanish, which is unwanted.
+ */
+int lttng_cpuhp_rb_backend_prepare(unsigned int cpu,
+               struct lttng_cpuhp_node *node)
+{
+       struct channel_backend *chanb = container_of(node,
+                       struct channel_backend, cpuhp_prepare);
+       const struct lib_ring_buffer_config *config = &chanb->config;
+       struct lib_ring_buffer *buf;
+       int ret;
+
+       CHAN_WARN_ON(chanb, config->alloc == RING_BUFFER_ALLOC_GLOBAL);
+
+       buf = per_cpu_ptr(chanb->buf, cpu);
+       ret = lib_ring_buffer_create(buf, chanb, cpu);
+       if (ret) {
+               printk(KERN_ERR
+                 "ring_buffer_cpu_hp_callback: cpu %d "
+                 "buffer creation failed\n", cpu);
+               return ret;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(lttng_cpuhp_rb_backend_prepare);
+
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+
  #ifdef CONFIG_HOTPLUG_CPU
+
  /**
   *     lib_ring_buffer_cpu_hp_callback - CPU hotplug callback
   *     @nb: notifier block
@@ -287,8 +338,11 @@ int lib_ring_buffer_cpu_hp_callback(struct notifier_block *nb,
         }
         return NOTIFY_OK;
  }
+
  #endif
  
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+
  /**
   * channel_backend_init - initialize a channel backend
   * @chanb: channel backend
@@ -331,6 +385,12 @@ int channel_backend_init(struct channel_backend *chanb,
                 return -EINVAL;
         if (!num_subbuf || (num_subbuf & (num_subbuf - 1)))
                 return -EINVAL;
+       /*
+        * Overwrite mode buffers require at least 2 subbuffers per
+        * buffer.
+        */
+       if (config->mode == RING_BUFFER_OVERWRITE && num_subbuf < 2)
+               return -EINVAL;
  
         ret = subbuffer_id_check_index(config, num_subbuf);
         if (ret)
@@ -359,39 +419,50 @@ int channel_backend_init(struct channel_backend *chanb,
                 if (!chanb->buf)
                         goto free_cpumask;
  
-               /*
-                * In case of non-hotplug cpu, if the ring-buffer is allocated
-                * in early initcall, it will not be notified of secondary cpus.
-                * In that off case, we need to allocate for all possible cpus.
-                */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+               chanb->cpuhp_prepare.component = LTTNG_RING_BUFFER_BACKEND;
+               ret = cpuhp_state_add_instance(lttng_rb_hp_prepare,
+                       &chanb->cpuhp_prepare.node);
+               if (ret)
+                       goto free_bufs;
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+
+               {
+                       /*
+                        * In case of non-hotplug cpu, if the ring-buffer is allocated
+                        * in early initcall, it will not be notified of secondary cpus.
+                        * In that off case, we need to allocate for all possible cpus.
+                        */
  #ifdef CONFIG_HOTPLUG_CPU
-               /*
-                * buf->backend.allocated test takes care of concurrent CPU
-                * hotplug.
-                * Priority higher than frontend, so we create the ring buffer
-                * before we start the timer.
-                */
-               chanb->cpu_hp_notifier.notifier_call =
-                               lib_ring_buffer_cpu_hp_callback;
-               chanb->cpu_hp_notifier.priority = 5;
-               register_hotcpu_notifier(&chanb->cpu_hp_notifier);
-
-               get_online_cpus();
-               for_each_online_cpu(i) {
-                       ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
-                                                chanb, i);
-                       if (ret)
-                               goto free_bufs; /* cpu hotplug locked */
-               }
-               put_online_cpus();
+                       /*
+                        * buf->backend.allocated test takes care of concurrent CPU
+                        * hotplug.
+                        * Priority higher than frontend, so we create the ring buffer
+                        * before we start the timer.
+                        */
+                       chanb->cpu_hp_notifier.notifier_call =
+                                       lib_ring_buffer_cpu_hp_callback;
+                       chanb->cpu_hp_notifier.priority = 5;
+                       register_hotcpu_notifier(&chanb->cpu_hp_notifier);
+
+                       get_online_cpus();
+                       for_each_online_cpu(i) {
+                               ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
+                                                        chanb, i);
+                               if (ret)
+                                       goto free_bufs; /* cpu hotplug locked */
+                       }
+                       put_online_cpus();
  #else
-               for_each_possible_cpu(i) {
-                       ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
-                                                chanb, i);
-                       if (ret)
-                               goto free_bufs; /* cpu hotplug locked */
-               }
+                       for_each_possible_cpu(i) {
+                               ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
+                                                        chanb, i);
+                               if (ret)
+                                       goto free_bufs;
+                       }
  #endif
+               }
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
         } else {
                 chanb->buf = kzalloc(sizeof(struct lib_ring_buffer), GFP_KERNEL);
                 if (!chanb->buf)
@@ -406,16 +477,26 @@ int channel_backend_init(struct channel_backend *chanb,
  
  free_bufs:
         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+               /*
+                * Teardown of lttng_rb_hp_prepare instance
+                * on "add" error is handled within cpu hotplug,
+                * no teardown to do from the caller.
+                */
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+#ifdef CONFIG_HOTPLUG_CPU
+               put_online_cpus();
+               unregister_hotcpu_notifier(&chanb->cpu_hp_notifier);
+#endif
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
                 for_each_possible_cpu(i) {
-                       struct lib_ring_buffer *buf = per_cpu_ptr(chanb->buf, i);
+                       struct lib_ring_buffer *buf =
+                               per_cpu_ptr(chanb->buf, i);
  
                         if (!buf->backend.allocated)
                                 continue;
                         lib_ring_buffer_free(buf);
                 }
-#ifdef CONFIG_HOTPLUG_CPU
-               put_online_cpus();
-#endif
                 free_percpu(chanb->buf);
         } else
                 kfree(chanb->buf);
@@ -435,8 +516,17 @@ void channel_backend_unregister_notifiers(struct channel_backend *chanb)
  {
         const struct lib_ring_buffer_config *config = &chanb->config;
  
-       if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
+       if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+               int ret;
+
+               ret = cpuhp_state_remove_instance(lttng_rb_hp_prepare,
+                               &chanb->cpuhp_prepare.node);
+               WARN_ON(ret);
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
                 unregister_hotcpu_notifier(&chanb->cpu_hp_notifier);
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+       }
  }
  
  /**
@@ -478,7 +568,7 @@ void channel_backend_free(struct channel_backend *chanb)
   * @pagecpy : page size copied so far
   */
  void _lib_ring_buffer_write(struct lib_ring_buffer_backend *bufb, size_t offset,
-                           const void *src, size_t len, ssize_t pagecpy)
+                           const void *src, size_t len, size_t pagecpy)
  {
         struct channel_backend *chanb = &bufb->chan->backend;
         const struct lib_ring_buffer_config *config = &chanb->config;
@@ -524,7 +614,7 @@ EXPORT_SYMBOL_GPL(_lib_ring_buffer_write);
   */
  void _lib_ring_buffer_memset(struct lib_ring_buffer_backend *bufb,
                              size_t offset,
-                            int c, size_t len, ssize_t pagecpy)
+                            int c, size_t len, size_t pagecpy)
  {
         struct channel_backend *chanb = &bufb->chan->backend;
         const struct lib_ring_buffer_config *config = &chanb->config;
@@ -654,7 +744,7 @@ EXPORT_SYMBOL_GPL(_lib_ring_buffer_strcpy);
  void _lib_ring_buffer_copy_from_user_inatomic(struct lib_ring_buffer_backend *bufb,
                                       size_t offset,
                                       const void __user *src, size_t len,
-                                     ssize_t pagecpy)
+                                     size_t pagecpy)
  {
         struct channel_backend *chanb = &bufb->chan->backend;
         const struct lib_ring_buffer_config *config = &chanb->config;
@@ -686,8 +776,7 @@ void _lib_ring_buffer_copy_from_user_inatomic(struct lib_ring_buffer_backend *bu
                                                         + (offset & ~PAGE_MASK),
                                                         src, pagecpy) != 0;
                 if (ret > 0) {
-                       offset += (pagecpy - ret);
-                       len -= (pagecpy - ret);
+                       /* Copy failed. */
                         _lib_ring_buffer_memset(bufb, offset, 0, len, 0);
                         break; /* stop copy */
                 }
@@ -795,8 +884,7 @@ size_t lib_ring_buffer_read(struct lib_ring_buffer_backend *bufb, size_t offset,
  {
         struct channel_backend *chanb = &bufb->chan->backend;
         const struct lib_ring_buffer_config *config = &chanb->config;
-       size_t index;
-       ssize_t pagecpy, orig_len;
+       size_t index, pagecpy, orig_len;
         struct lib_ring_buffer_backend_pages *rpages;
         unsigned long sb_bindex, id;
  
@@ -944,15 +1032,15 @@ int lib_ring_buffer_read_cstr(struct lib_ring_buffer_backend *bufb, size_t offse
  EXPORT_SYMBOL_GPL(lib_ring_buffer_read_cstr);
  
  /**
- * lib_ring_buffer_read_get_page - Get a whole page to read from
+ * lib_ring_buffer_read_get_pfn - Get a page frame number to read from
   * @bufb : buffer backend
   * @offset : offset within the buffer
   * @virt : pointer to page address (output)
   *
   * Should be protected by get_subbuf/put_subbuf.
- * Returns the pointer to the page struct pointer.
+ * Returns the pointer to the page frame number unsigned long.
   */
-struct page **lib_ring_buffer_read_get_page(struct lib_ring_buffer_backend *bufb,
+unsigned long *lib_ring_buffer_read_get_pfn(struct lib_ring_buffer_backend *bufb,
                                             size_t offset, void ***virt)
  {
         size_t index;
@@ -969,9 +1057,9 @@ struct page **lib_ring_buffer_read_get_page(struct lib_ring_buffer_backend *bufb
         CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
                      && subbuffer_id_is_noref(config, id));
         *virt = &rpages->p[index].virt;
-       return &rpages->p[index].page;
+       return &rpages->p[index].pfn;
  }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_read_get_page);
+EXPORT_SYMBOL_GPL(lib_ring_buffer_read_get_pfn);
  
  /**
   * lib_ring_buffer_read_offset_address - get address of a buffer location