X-Git-Url: http://git.liburcu.org/?a=blobdiff_plain;f=lib%2Fringbuffer%2Fring_buffer_backend.c;h=2a47948e3efa6597d12018364e58c0c7e739d9d9;hb=263b6c88138c3354d63dba3c70a965de94becd22;hp=f7ed20df19869b767fd23f568c87e5831956ead2;hpb=bfe529f90b1354246b5d8a18f49e30595c612041;p=lttng-modules.git

diff --git a/lib/ringbuffer/ring_buffer_backend.c b/lib/ringbuffer/ring_buffer_backend.c
index f7ed20df..2a47948e 100644
--- a/lib/ringbuffer/ring_buffer_backend.c
+++ b/lib/ringbuffer/ring_buffer_backend.c
@@ -1,21 +1,8 @@
-/*
+/* SPDX-License-Identifier: (GPL-2.0-only OR LGPL-2.1-only)
+ *
  * ring_buffer_backend.c
  *
  * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; only
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <linux/stddef.h>
@@ -27,11 +14,13 @@
 #include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/mm.h>
+#include <linux/vmalloc.h>
 
-#include "../../wrapper/vmalloc.h"	/* for wrapper_vmalloc_sync_all() */
-#include "../../wrapper/ringbuffer/config.h"
-#include "../../wrapper/ringbuffer/backend.h"
-#include "../../wrapper/ringbuffer/frontend.h"
+#include <wrapper/mm.h>
+#include <wrapper/vmalloc.h>	/* for wrapper_vmalloc_sync_mappings() */
+#include <wrapper/ringbuffer/config.h>
+#include <wrapper/ringbuffer/backend.h>
+#include <wrapper/ringbuffer/frontend.h>
 
 /**
  * lib_ring_buffer_backend_allocate - allocate a channel buffer
@@ -52,10 +41,28 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
 	unsigned long subbuf_size, mmap_offset = 0;
 	unsigned long num_subbuf_alloc;
 	struct page **pages;
-	void **virt;
 	unsigned long i;
 
 	num_pages = size >> PAGE_SHIFT;
+
+	/*
+	 * Verify that there is enough free pages available on the system for
+	 * the current allocation request.
+	 * wrapper_check_enough_free_pages uses si_mem_available() if available
+	 * and returns if there should be enough free pages based on the
+	 * current estimate.
+	 */
+	if (!wrapper_check_enough_free_pages(num_pages))
+		goto not_enough_pages;
+
+	/*
+	 * Set the current user thread as the first target of the OOM killer.
+	 * If the estimate received by si_mem_available() was off, and we do
+	 * end up running out of memory because of this buffer allocation, we
+	 * want to kill the offending app first.
+	 */
+	wrapper_set_current_oom_origin();
+
 	num_pages_per_subbuf = num_pages >> get_count_order(num_subbuf);
 	subbuf_size = chanb->subbuf_size;
 	num_subbuf_alloc = num_subbuf;
@@ -65,53 +72,49 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
 		num_subbuf_alloc++;
 	}
 
-	pages = kmalloc_node(ALIGN(sizeof(*pages) * num_pages,
+	pages = vmalloc_node(ALIGN(sizeof(*pages) * num_pages,
 				   1 << INTERNODE_CACHE_SHIFT),
-			GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
+			cpu_to_node(max(bufb->cpu, 0)));
 	if (unlikely(!pages))
 		goto pages_error;
 
-	virt = kmalloc_node(ALIGN(sizeof(*virt) * num_pages,
-				  1 << INTERNODE_CACHE_SHIFT),
-			GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
-	if (unlikely(!virt))
-		goto virt_error;
-
-	bufb->array = kmalloc_node(ALIGN(sizeof(*bufb->array)
+	bufb->array = lttng_kvmalloc_node(ALIGN(sizeof(*bufb->array)
 					 * num_subbuf_alloc,
 				  1 << INTERNODE_CACHE_SHIFT),
-			GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
+			GFP_KERNEL | __GFP_NOWARN,
+			cpu_to_node(max(bufb->cpu, 0)));
 	if (unlikely(!bufb->array))
 		goto array_error;
 
 	for (i = 0; i < num_pages; i++) {
 		pages[i] = alloc_pages_node(cpu_to_node(max(bufb->cpu, 0)),
-					    GFP_KERNEL | __GFP_ZERO, 0);
+				GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 0);
 		if (unlikely(!pages[i]))
 			goto depopulate;
-		virt[i] = page_address(pages[i]);
 	}
 	bufb->num_pages_per_subbuf = num_pages_per_subbuf;
 
 	/* Allocate backend pages array elements */
 	for (i = 0; i < num_subbuf_alloc; i++) {
 		bufb->array[i] =
-			kzalloc_node(ALIGN(
+			lttng_kvzalloc_node(ALIGN(
 				sizeof(struct lib_ring_buffer_backend_pages) +
 				sizeof(struct lib_ring_buffer_backend_page)
 				* num_pages_per_subbuf,
 				1 << INTERNODE_CACHE_SHIFT),
-				GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
+				GFP_KERNEL | __GFP_NOWARN,
+				cpu_to_node(max(bufb->cpu, 0)));
 		if (!bufb->array[i])
 			goto free_array;
 	}
 
 	/* Allocate write-side subbuffer table */
-	bufb->buf_wsb = kzalloc_node(ALIGN(
+	bufb->buf_wsb = lttng_kvzalloc_node(ALIGN(
 				sizeof(struct lib_ring_buffer_backend_subbuffer)
 				* num_subbuf,
 				1 << INTERNODE_CACHE_SHIFT),
-				GFP_KERNEL, cpu_to_node(max(bufb->cpu, 0)));
+				GFP_KERNEL | __GFP_NOWARN,
+				cpu_to_node(max(bufb->cpu, 0)));
 	if (unlikely(!bufb->buf_wsb))
 		goto free_array;
 
@@ -125,12 +128,22 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
 	else
 		bufb->buf_rsb.id = subbuffer_id(config, 0, 1, 0);
 
+	/* Allocate subbuffer packet counter table */
+	bufb->buf_cnt = lttng_kvzalloc_node(ALIGN(
+				sizeof(struct lib_ring_buffer_backend_counts)
+				* num_subbuf,
+				1 << INTERNODE_CACHE_SHIFT),
+			GFP_KERNEL | __GFP_NOWARN,
+			cpu_to_node(max(bufb->cpu, 0)));
+	if (unlikely(!bufb->buf_cnt))
+		goto free_wsb;
+
 	/* Assign pages to page index */
 	for (i = 0; i < num_subbuf_alloc; i++) {
 		for (j = 0; j < num_pages_per_subbuf; j++) {
 			CHAN_WARN_ON(chanb, page_idx > num_pages);
-			bufb->array[i]->p[j].virt = virt[page_idx];
-			bufb->array[i]->p[j].page = pages[page_idx];
+			bufb->array[i]->p[j].virt = page_address(pages[page_idx]);
+			bufb->array[i]->p[j].pfn = page_to_pfn(pages[page_idx]);
 			page_idx++;
 		}
 		if (config->output == RING_BUFFER_MMAP) {
@@ -143,24 +156,26 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
 	 * If kmalloc ever uses vmalloc underneath, make sure the buffer pages
 	 * will not fault.
 	 */
-	wrapper_vmalloc_sync_all();
-	kfree(virt);
-	kfree(pages);
+	wrapper_vmalloc_sync_mappings();
+	wrapper_clear_current_oom_origin();
+	vfree(pages);
 	return 0;
 
+free_wsb:
+	lttng_kvfree(bufb->buf_wsb);
 free_array:
 	for (i = 0; (i < num_subbuf_alloc && bufb->array[i]); i++)
-		kfree(bufb->array[i]);
+		lttng_kvfree(bufb->array[i]);
 depopulate:
 	/* Free all allocated pages */
 	for (i = 0; (i < num_pages && pages[i]); i++)
 		__free_page(pages[i]);
-	kfree(bufb->array);
+	lttng_kvfree(bufb->array);
 array_error:
-	kfree(virt);
-virt_error:
-	kfree(pages);
+	vfree(pages);
 pages_error:
+	wrapper_clear_current_oom_origin();
+not_enough_pages:
 	return -ENOMEM;
 }
 
@@ -186,13 +201,14 @@ void lib_ring_buffer_backend_free(struct lib_ring_buffer_backend *bufb)
 	if (chanb->extra_reader_sb)
 		num_subbuf_alloc++;
 
-	kfree(bufb->buf_wsb);
+	lttng_kvfree(bufb->buf_wsb);
+	lttng_kvfree(bufb->buf_cnt);
 	for (i = 0; i < num_subbuf_alloc; i++) {
 		for (j = 0; j < bufb->num_pages_per_subbuf; j++)
-			__free_page(bufb->array[i]->p[j].page);
-		kfree(bufb->array[i]);
+			__free_page(pfn_to_page(bufb->array[i]->p[j].pfn));
+		lttng_kvfree(bufb->array[i]);
 	}
-	kfree(bufb->array);
+	lttng_kvfree(bufb->array);
 	bufb->allocated = 0;
 }
 
@@ -243,7 +259,42 @@ void channel_backend_reset(struct channel_backend *chanb)
 	chanb->start_tsc = config->cb.ring_buffer_clock_read(chan);
 }
 
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+
+/*
+ * No need to implement a "dead" callback to do a buffer switch here,
+ * because it will happen when tracing is stopped, or will be done by
+ * switch timer CPU DEAD callback.
+ * We don't free buffers when CPU go away, because it would make trace
+ * data vanish, which is unwanted.
+ */
+int lttng_cpuhp_rb_backend_prepare(unsigned int cpu,
+		struct lttng_cpuhp_node *node)
+{
+	struct channel_backend *chanb = container_of(node,
+			struct channel_backend, cpuhp_prepare);
+	const struct lib_ring_buffer_config *config = &chanb->config;
+	struct lib_ring_buffer *buf;
+	int ret;
+
+	CHAN_WARN_ON(chanb, config->alloc == RING_BUFFER_ALLOC_GLOBAL);
+
+	buf = per_cpu_ptr(chanb->buf, cpu);
+	ret = lib_ring_buffer_create(buf, chanb, cpu);
+	if (ret) {
+		printk(KERN_ERR
+		  "ring_buffer_cpu_hp_callback: cpu %d "
+		  "buffer creation failed\n", cpu);
+		return ret;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(lttng_cpuhp_rb_backend_prepare);
+
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+
 #ifdef CONFIG_HOTPLUG_CPU
+
 /**
  *	lib_ring_buffer_cpu_hp_callback - CPU hotplug callback
  *	@nb: notifier block
@@ -287,8 +338,11 @@ int lib_ring_buffer_cpu_hp_callback(struct notifier_block *nb,
 	}
 	return NOTIFY_OK;
 }
+
 #endif
 
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+
 /**
  * channel_backend_init - initialize a channel backend
  * @chanb: channel backend
@@ -331,6 +385,12 @@ int channel_backend_init(struct channel_backend *chanb,
 		return -EINVAL;
 	if (!num_subbuf || (num_subbuf & (num_subbuf - 1)))
 		return -EINVAL;
+	/*
+	 * Overwrite mode buffers require at least 2 subbuffers per
+	 * buffer.
+	 */
+	if (config->mode == RING_BUFFER_OVERWRITE && num_subbuf < 2)
+		return -EINVAL;
 
 	ret = subbuffer_id_check_index(config, num_subbuf);
 	if (ret)
@@ -359,39 +419,50 @@ int channel_backend_init(struct channel_backend *chanb,
 		if (!chanb->buf)
 			goto free_cpumask;
 
-		/*
-		 * In case of non-hotplug cpu, if the ring-buffer is allocated
-		 * in early initcall, it will not be notified of secondary cpus.
-		 * In that off case, we need to allocate for all possible cpus.
-		 */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+		chanb->cpuhp_prepare.component = LTTNG_RING_BUFFER_BACKEND;
+		ret = cpuhp_state_add_instance(lttng_rb_hp_prepare,
+			&chanb->cpuhp_prepare.node);
+		if (ret)
+			goto free_bufs;
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+
+		{
+			/*
+			 * In case of non-hotplug cpu, if the ring-buffer is allocated
+			 * in early initcall, it will not be notified of secondary cpus.
+			 * In that off case, we need to allocate for all possible cpus.
+			 */
 #ifdef CONFIG_HOTPLUG_CPU
-		/*
-		 * buf->backend.allocated test takes care of concurrent CPU
-		 * hotplug.
-		 * Priority higher than frontend, so we create the ring buffer
-		 * before we start the timer.
-		 */
-		chanb->cpu_hp_notifier.notifier_call =
-				lib_ring_buffer_cpu_hp_callback;
-		chanb->cpu_hp_notifier.priority = 5;
-		register_hotcpu_notifier(&chanb->cpu_hp_notifier);
-
-		get_online_cpus();
-		for_each_online_cpu(i) {
-			ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
-						 chanb, i);
-			if (ret)
-				goto free_bufs;	/* cpu hotplug locked */
-		}
-		put_online_cpus();
+			/*
+			 * buf->backend.allocated test takes care of concurrent CPU
+			 * hotplug.
+			 * Priority higher than frontend, so we create the ring buffer
+			 * before we start the timer.
+			 */
+			chanb->cpu_hp_notifier.notifier_call =
+					lib_ring_buffer_cpu_hp_callback;
+			chanb->cpu_hp_notifier.priority = 5;
+			register_hotcpu_notifier(&chanb->cpu_hp_notifier);
+
+			get_online_cpus();
+			for_each_online_cpu(i) {
+				ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
+							 chanb, i);
+				if (ret)
+					goto free_bufs;	/* cpu hotplug locked */
+			}
+			put_online_cpus();
 #else
-		for_each_possible_cpu(i) {
-			ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
-						 chanb, i);
-			if (ret)
-				goto free_bufs;	/* cpu hotplug locked */
-		}
+			for_each_possible_cpu(i) {
+				ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
+							 chanb, i);
+				if (ret)
+					goto free_bufs;
+			}
 #endif
+		}
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
 	} else {
 		chanb->buf = kzalloc(sizeof(struct lib_ring_buffer), GFP_KERNEL);
 		if (!chanb->buf)
@@ -406,16 +477,26 @@ int channel_backend_init(struct channel_backend *chanb,
 
 free_bufs:
 	if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+		/*
+		 * Teardown of lttng_rb_hp_prepare instance
+		 * on "add" error is handled within cpu hotplug,
+		 * no teardown to do from the caller.
+		 */
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+#ifdef CONFIG_HOTPLUG_CPU
+		put_online_cpus();
+		unregister_hotcpu_notifier(&chanb->cpu_hp_notifier);
+#endif
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
 		for_each_possible_cpu(i) {
-			struct lib_ring_buffer *buf = per_cpu_ptr(chanb->buf, i);
+			struct lib_ring_buffer *buf =
+				per_cpu_ptr(chanb->buf, i);
 
 			if (!buf->backend.allocated)
 				continue;
 			lib_ring_buffer_free(buf);
 		}
-#ifdef CONFIG_HOTPLUG_CPU
-		put_online_cpus();
-#endif
 		free_percpu(chanb->buf);
 	} else
 		kfree(chanb->buf);
@@ -435,8 +516,17 @@ void channel_backend_unregister_notifiers(struct channel_backend *chanb)
 {
 	const struct lib_ring_buffer_config *config = &chanb->config;
 
-	if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
+	if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0))
+		int ret;
+
+		ret = cpuhp_state_remove_instance(lttng_rb_hp_prepare,
+				&chanb->cpuhp_prepare.node);
+		WARN_ON(ret);
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
 		unregister_hotcpu_notifier(&chanb->cpu_hp_notifier);
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,10,0)) */
+	}
 }
 
 /**
@@ -686,8 +776,7 @@ void _lib_ring_buffer_copy_from_user_inatomic(struct lib_ring_buffer_backend *bu
 							+ (offset & ~PAGE_MASK),
 							src, pagecpy) != 0;
 		if (ret > 0) {
-			offset += (pagecpy - ret);
-			len -= (pagecpy - ret);
+			/* Copy failed. */
 			_lib_ring_buffer_memset(bufb, offset, 0, len, 0);
 			break; /* stop copy */
 		}
@@ -943,15 +1032,15 @@ int lib_ring_buffer_read_cstr(struct lib_ring_buffer_backend *bufb, size_t offse
 EXPORT_SYMBOL_GPL(lib_ring_buffer_read_cstr);
 
 /**
- * lib_ring_buffer_read_get_page - Get a whole page to read from
+ * lib_ring_buffer_read_get_pfn - Get a page frame number to read from
  * @bufb : buffer backend
  * @offset : offset within the buffer
  * @virt : pointer to page address (output)
  *
  * Should be protected by get_subbuf/put_subbuf.
- * Returns the pointer to the page struct pointer.
+ * Returns the pointer to the page frame number unsigned long.
  */
-struct page **lib_ring_buffer_read_get_page(struct lib_ring_buffer_backend *bufb,
+unsigned long *lib_ring_buffer_read_get_pfn(struct lib_ring_buffer_backend *bufb,
 					    size_t offset, void ***virt)
 {
 	size_t index;
@@ -968,9 +1057,9 @@ struct page **lib_ring_buffer_read_get_page(struct lib_ring_buffer_backend *bufb
 	CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
 		     && subbuffer_id_is_noref(config, id));
 	*virt = &rpages->p[index].virt;
-	return &rpages->p[index].page;
+	return &rpages->p[index].pfn;
 }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_read_get_page);
+EXPORT_SYMBOL_GPL(lib_ring_buffer_read_get_pfn);
 
 /**
  * lib_ring_buffer_read_offset_address - get address of a buffer location