From 7ead8b8313d92b3a69a1a61b0dcbc4cd66c960dc Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 17 Aug 2009 16:56:28 +0800 Subject: tracing/events: Add module tracepoints Add trace points to trace module_load, module_free, module_get, module_put and module_request, and use trace_event facility to get the trace output. Here's the sample output: TASK-PID CPU# TIMESTAMP FUNCTION | | | | | <...>-42 [000] 1.758380: module_request: fb0 wait=1 call_site=fb_open ... <...>-60 [000] 3.269403: module_load: scsi_wait_scan <...>-60 [000] 3.269432: module_put: scsi_wait_scan call_site=sys_init_module refcnt=0 <...>-61 [001] 3.273168: module_free: scsi_wait_scan ... <...>-1021 [000] 13.836081: module_load: sunrpc <...>-1021 [000] 13.840589: module_put: sunrpc call_site=sys_init_module refcnt=-1 <...>-1027 [000] 13.848098: module_get: sunrpc call_site=try_module_get refcnt=0 <...>-1027 [000] 13.848308: module_get: sunrpc call_site=get_filesystem refcnt=1 <...>-1027 [000] 13.848692: module_put: sunrpc call_site=put_filesystem refcnt=0 ... modprobe-2587 [001] 1088.437213: module_load: trace_events_sample F modprobe-2587 [001] 1088.437786: module_put: trace_events_sample call_site=sys_init_module refcnt=0 Note: - the taints flag can be 'F', 'C' and/or 'P' if mod->taints != 0 - the module refcnt is percpu, so it can be negative in a specific cpu Signed-off-by: Li Zefan Acked-by: Rusty Russell Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Rusty Russell LKML-Reference: <4A891B3C.5030608@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/module.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 098bdb7bfacf..f8f92d015efe 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,10 +17,12 @@ #include #include #include -#include +#include #include +#include + /* Not Yet Implemented */ #define MODULE_SUPPORTED_DEVICE(name) @@ -462,7 +464,10 @@ static inline local_t *__module_ref_addr(struct module *mod, int cpu) static inline void __module_get(struct module *module) { if (module) { - local_inc(__module_ref_addr(module, get_cpu())); + unsigned int cpu = get_cpu(); + local_inc(__module_ref_addr(module, cpu)); + trace_module_get(module, _THIS_IP_, + local_read(__module_ref_addr(module, cpu))); put_cpu(); } } @@ -473,8 +478,11 @@ static inline int try_module_get(struct module *module) if (module) { unsigned int cpu = get_cpu(); - if (likely(module_is_live(module))) + if (likely(module_is_live(module))) { local_inc(__module_ref_addr(module, cpu)); + trace_module_get(module, _THIS_IP_, + local_read(__module_ref_addr(module, cpu))); + } else ret = 0; put_cpu(); -- cgit v1.2.3-71-gd317 From fc5377668c3d808e1d53c4aee152c836f55c3490 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Sep 2009 19:35:28 +0200 Subject: tracing: Remove markers Now that the last users of markers have migrated to the event tracer we can kill off the (now orphan) support code. Signed-off-by: Christoph Hellwig Acked-by: Mathieu Desnoyers Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <20090917173527.GA1699@lst.de> Signed-off-by: Ingo Molnar --- Documentation/markers.txt | 104 ---- arch/powerpc/platforms/cell/spufs/file.c | 1 - arch/powerpc/platforms/cell/spufs/sched.c | 1 - include/linux/kvm_host.h | 1 - include/linux/marker.h | 221 ------- include/linux/module.h | 11 - init/Kconfig | 7 - kernel/Makefile | 1 - kernel/marker.c | 930 ------------------------------ kernel/module.c | 18 - kernel/trace/trace_printk.c | 1 - samples/Kconfig | 6 - samples/Makefile | 2 +- samples/markers/Makefile | 4 - samples/markers/marker-example.c | 53 -- samples/markers/probe-example.c | 92 --- scripts/Makefile.modpost | 12 - 17 files changed, 1 insertion(+), 1464 deletions(-) delete mode 100644 Documentation/markers.txt delete mode 100644 include/linux/marker.h delete mode 100644 kernel/marker.c delete mode 100644 samples/markers/Makefile delete mode 100644 samples/markers/marker-example.c delete mode 100644 samples/markers/probe-example.c (limited to 'include/linux/module.h') diff --git a/Documentation/markers.txt b/Documentation/markers.txt deleted file mode 100644 index d2b3d0e91b26..000000000000 --- a/Documentation/markers.txt +++ /dev/null @@ -1,104 +0,0 @@ - Using the Linux Kernel Markers - - Mathieu Desnoyers - - -This document introduces Linux Kernel Markers and their use. It provides -examples of how to insert markers in the kernel and connect probe functions to -them and provides some examples of probe functions. - - -* Purpose of markers - -A marker placed in code provides a hook to call a function (probe) that you can -provide at runtime. A marker can be "on" (a probe is connected to it) or "off" -(no probe is attached). When a marker is "off" it has no effect, except for -adding a tiny time penalty (checking a condition for a branch) and space -penalty (adding a few bytes for the function call at the end of the -instrumented function and adds a data structure in a separate section). When a -marker is "on", the function you provide is called each time the marker is -executed, in the execution context of the caller. When the function provided -ends its execution, it returns to the caller (continuing from the marker site). - -You can put markers at important locations in the code. Markers are -lightweight hooks that can pass an arbitrary number of parameters, -described in a printk-like format string, to the attached probe function. - -They can be used for tracing and performance accounting. - - -* Usage - -In order to use the macro trace_mark, you should include linux/marker.h. - -#include - -And, - -trace_mark(subsystem_event, "myint %d mystring %s", someint, somestring); -Where : -- subsystem_event is an identifier unique to your event - - subsystem is the name of your subsystem. - - event is the name of the event to mark. -- "myint %d mystring %s" is the formatted string for the serializer. "myint" and - "mystring" are repectively the field names associated with the first and - second parameter. -- someint is an integer. -- somestring is a char pointer. - -Connecting a function (probe) to a marker is done by providing a probe (function -to call) for the specific marker through marker_probe_register() and can be -activated by calling marker_arm(). Marker deactivation can be done by calling -marker_disarm() as many times as marker_arm() has been called. Removing a probe -is done through marker_probe_unregister(); it will disarm the probe. - -marker_synchronize_unregister() must be called between probe unregistration and -the first occurrence of -- the end of module exit function, - to make sure there is no caller left using the probe; -- the free of any resource used by the probes, - to make sure the probes wont be accessing invalid data. -This, and the fact that preemption is disabled around the probe call, make sure -that probe removal and module unload are safe. See the "Probe example" section -below for a sample probe module. - -The marker mechanism supports inserting multiple instances of the same marker. -Markers can be put in inline functions, inlined static functions, and -unrolled loops as well as regular functions. - -The naming scheme "subsystem_event" is suggested here as a convention intended -to limit collisions. Marker names are global to the kernel: they are considered -as being the same whether they are in the core kernel image or in modules. -Conflicting format strings for markers with the same name will cause the markers -to be detected to have a different format string not to be armed and will output -a printk warning which identifies the inconsistency: - -"Format mismatch for probe probe_name (format), marker (format)" - -Another way to use markers is to simply define the marker without generating any -function call to actually call into the marker. This is useful in combination -with tracepoint probes in a scheme like this : - -void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk); - -DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name, - "arg1 %u pid %d"); - -notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk) -{ - struct marker *marker = &GET_MARKER(kernel_irq_entry); - /* write data to trace buffers ... */ -} - -* Probe / marker example - -See the example provided in samples/markers/src - -Compile them with your kernel. - -Run, as root : -modprobe marker-example (insmod order is not important) -modprobe probe-example -cat /proc/marker-example (returns an expected error) -rmmod marker-example probe-example -dmesg diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index ab8aef9bb8ea..8f079b865ad0 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index bb5b77c66d05..4678078fede8 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4af56036a6bf..b7bbb5ddd7ae 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include diff --git a/include/linux/marker.h b/include/linux/marker.h deleted file mode 100644 index b85e74ca782f..000000000000 --- a/include/linux/marker.h +++ /dev/null @@ -1,221 +0,0 @@ -#ifndef _LINUX_MARKER_H -#define _LINUX_MARKER_H - -/* - * Code markup for dynamic and static tracing. - * - * See Documentation/marker.txt. - * - * (C) Copyright 2006 Mathieu Desnoyers - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include -#include - -struct module; -struct marker; - -/** - * marker_probe_func - Type of a marker probe function - * @probe_private: probe private data - * @call_private: call site private data - * @fmt: format string - * @args: variable argument list pointer. Use a pointer to overcome C's - * inability to pass this around as a pointer in a portable manner in - * the callee otherwise. - * - * Type of marker probe functions. They receive the mdata and need to parse the - * format string to recover the variable argument list. - */ -typedef void marker_probe_func(void *probe_private, void *call_private, - const char *fmt, va_list *args); - -struct marker_probe_closure { - marker_probe_func *func; /* Callback */ - void *probe_private; /* Private probe data */ -}; - -struct marker { - const char *name; /* Marker name */ - const char *format; /* Marker format string, describing the - * variable argument list. - */ - char state; /* Marker state. */ - char ptype; /* probe type : 0 : single, 1 : multi */ - /* Probe wrapper */ - void (*call)(const struct marker *mdata, void *call_private, ...); - struct marker_probe_closure single; - struct marker_probe_closure *multi; - const char *tp_name; /* Optional tracepoint name */ - void *tp_cb; /* Optional tracepoint callback */ -} __attribute__((aligned(8))); - -#ifdef CONFIG_MARKERS - -#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\ - NULL, tp_name_str, tp_cb } - -#define DEFINE_MARKER(name, format) \ - _DEFINE_MARKER(name, NULL, NULL, format) - -#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \ - _DEFINE_MARKER(name, #tp_name, tp_cb, format) - -/* - * Note : the empty asm volatile with read constraint is used here instead of a - * "used" attribute to fix a gcc 4.1.x bug. - * Make sure the alignment of the structure in the __markers section will - * not add unwanted padding between the beginning of the section and the - * structure. Force alignment to the same alignment as the section start. - * - * The "generic" argument controls which marker enabling mechanism must be used. - * If generic is true, a variable read is used. - * If generic is false, immediate values are used. - */ -#define __trace_mark(generic, name, call_private, format, args...) \ - do { \ - DEFINE_MARKER(name, format); \ - __mark_check_format(format, ## args); \ - if (unlikely(__mark_##name.state)) { \ - (*__mark_##name.call) \ - (&__mark_##name, call_private, ## args);\ - } \ - } while (0) - -#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ - do { \ - void __check_tp_type(void) \ - { \ - register_trace_##tp_name(tp_cb); \ - } \ - DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \ - __mark_check_format(format, ## args); \ - (*__mark_##name.call)(&__mark_##name, call_private, \ - ## args); \ - } while (0) - -extern void marker_update_probe_range(struct marker *begin, - struct marker *end); - -#define GET_MARKER(name) (__mark_##name) - -#else /* !CONFIG_MARKERS */ -#define DEFINE_MARKER(name, tp_name, tp_cb, format) -#define __trace_mark(generic, name, call_private, format, args...) \ - __mark_check_format(format, ## args) -#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ - do { \ - void __check_tp_type(void) \ - { \ - register_trace_##tp_name(tp_cb); \ - } \ - __mark_check_format(format, ## args); \ - } while (0) -static inline void marker_update_probe_range(struct marker *begin, - struct marker *end) -{ } -#define GET_MARKER(name) -#endif /* CONFIG_MARKERS */ - -/** - * trace_mark - Marker using code patching - * @name: marker name, not quoted. - * @format: format string - * @args...: variable argument list - * - * Places a marker using optimized code patching technique (imv_read()) - * to be enabled when immediate values are present. - */ -#define trace_mark(name, format, args...) \ - __trace_mark(0, name, NULL, format, ## args) - -/** - * _trace_mark - Marker using variable read - * @name: marker name, not quoted. - * @format: format string - * @args...: variable argument list - * - * Places a marker using a standard memory read (_imv_read()) to be - * enabled. Should be used for markers in code paths where instruction - * modification based enabling is not welcome. (__init and __exit functions, - * lockdep, some traps, printk). - */ -#define _trace_mark(name, format, args...) \ - __trace_mark(1, name, NULL, format, ## args) - -/** - * trace_mark_tp - Marker in a tracepoint callback - * @name: marker name, not quoted. - * @tp_name: tracepoint name, not quoted. - * @tp_cb: tracepoint callback. Should have an associated global symbol so it - * is not optimized away by the compiler (should not be static). - * @format: format string - * @args...: variable argument list - * - * Places a marker in a tracepoint callback. - */ -#define trace_mark_tp(name, tp_name, tp_cb, format, args...) \ - __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args) - -/** - * MARK_NOARGS - Format string for a marker with no argument. - */ -#define MARK_NOARGS " " - -/* To be used for string format validity checking with gcc */ -static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) -{ -} - -#define __mark_check_format(format, args...) \ - do { \ - if (0) \ - ___mark_check_format(format, ## args); \ - } while (0) - -extern marker_probe_func __mark_empty_function; - -extern void marker_probe_cb(const struct marker *mdata, - void *call_private, ...); - -/* - * Connect a probe to a marker. - * private data pointer must be a valid allocated memory address, or NULL. - */ -extern int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *probe_private); - -/* - * Returns the private data given to marker_probe_register. - */ -extern int marker_probe_unregister(const char *name, - marker_probe_func *probe, void *probe_private); -/* - * Unregister a marker by providing the registered private data. - */ -extern int marker_probe_unregister_private_data(marker_probe_func *probe, - void *probe_private); - -extern void *marker_get_private_data(const char *name, marker_probe_func *probe, - int num); - -/* - * marker_synchronize_unregister must be called between the last marker probe - * unregistration and the first one of - * - the end of module exit function - * - the free of any resource used by the probes - * to ensure the code and data are valid for any possibly running probes. - */ -#define marker_synchronize_unregister() synchronize_sched() - -#endif diff --git a/include/linux/module.h b/include/linux/module.h index f8f92d015efe..1c755b2f937d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -327,10 +326,6 @@ struct module /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ char *args; -#ifdef CONFIG_MARKERS - struct marker *markers; - unsigned int num_markers; -#endif #ifdef CONFIG_TRACEPOINTS struct tracepoint *tracepoints; unsigned int num_tracepoints; @@ -535,8 +530,6 @@ int unregister_module_notifier(struct notifier_block * nb); extern void print_modules(void); -extern void module_update_markers(void); - extern void module_update_tracepoints(void); extern int module_get_iter_tracepoints(struct tracepoint_iter *iter); @@ -651,10 +644,6 @@ static inline void print_modules(void) { } -static inline void module_update_markers(void) -{ -} - static inline void module_update_tracepoints(void) { } diff --git a/init/Kconfig b/init/Kconfig index 8e8b76d8a272..4cc0fa13d5eb 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1054,13 +1054,6 @@ config PROFILING config TRACEPOINTS bool -config MARKERS - bool "Activate markers" - select TRACEPOINTS - help - Place an empty function call at each marker site. Can be - dynamically changed for a probe function. - source "arch/Kconfig" config SLOW_WORK diff --git a/kernel/Makefile b/kernel/Makefile index 3d9c7e27e3f9..7c9b0a585502 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -87,7 +87,6 @@ obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o -obj-$(CONFIG_MARKERS) += marker.o obj-$(CONFIG_TRACEPOINTS) += tracepoint.o obj-$(CONFIG_LATENCYTOP) += latencytop.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ diff --git a/kernel/marker.c b/kernel/marker.c deleted file mode 100644 index ea54f2647868..000000000000 --- a/kernel/marker.c +++ /dev/null @@ -1,930 +0,0 @@ -/* - * Copyright (C) 2007 Mathieu Desnoyers - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern struct marker __start___markers[]; -extern struct marker __stop___markers[]; - -/* Set to 1 to enable marker debug output */ -static const int marker_debug; - -/* - * markers_mutex nests inside module_mutex. Markers mutex protects the builtin - * and module markers and the hash table. - */ -static DEFINE_MUTEX(markers_mutex); - -/* - * Marker hash table, containing the active markers. - * Protected by module_mutex. - */ -#define MARKER_HASH_BITS 6 -#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) -static struct hlist_head marker_table[MARKER_TABLE_SIZE]; - -/* - * Note about RCU : - * It is used to make sure every handler has finished using its private data - * between two consecutive operation (add or remove) on a given marker. It is - * also used to delay the free of multiple probes array until a quiescent state - * is reached. - * marker entries modifications are protected by the markers_mutex. - */ -struct marker_entry { - struct hlist_node hlist; - char *format; - /* Probe wrapper */ - void (*call)(const struct marker *mdata, void *call_private, ...); - struct marker_probe_closure single; - struct marker_probe_closure *multi; - int refcount; /* Number of times armed. 0 if disarmed. */ - struct rcu_head rcu; - void *oldptr; - int rcu_pending; - unsigned char ptype:1; - unsigned char format_allocated:1; - char name[0]; /* Contains name'\0'format'\0' */ -}; - -/** - * __mark_empty_function - Empty probe callback - * @probe_private: probe private data - * @call_private: call site private data - * @fmt: format string - * @...: variable argument list - * - * Empty callback provided as a probe to the markers. By providing this to a - * disabled marker, we make sure the execution flow is always valid even - * though the function pointer change and the marker enabling are two distinct - * operations that modifies the execution flow of preemptible code. - */ -notrace void __mark_empty_function(void *probe_private, void *call_private, - const char *fmt, va_list *args) -{ -} -EXPORT_SYMBOL_GPL(__mark_empty_function); - -/* - * marker_probe_cb Callback that prepares the variable argument list for probes. - * @mdata: pointer of type struct marker - * @call_private: caller site private data - * @...: Variable argument list. - * - * Since we do not use "typical" pointer based RCU in the 1 argument case, we - * need to put a full smp_rmb() in this branch. This is why we do not use - * rcu_dereference() for the pointer read. - */ -notrace void marker_probe_cb(const struct marker *mdata, - void *call_private, ...) -{ - va_list args; - char ptype; - - /* - * rcu_read_lock_sched does two things : disabling preemption to make - * sure the teardown of the callbacks can be done correctly when they - * are in modules and they insure RCU read coherency. - */ - rcu_read_lock_sched_notrace(); - ptype = mdata->ptype; - if (likely(!ptype)) { - marker_probe_func *func; - /* Must read the ptype before ptr. They are not data dependant, - * so we put an explicit smp_rmb() here. */ - smp_rmb(); - func = mdata->single.func; - /* Must read the ptr before private data. They are not data - * dependant, so we put an explicit smp_rmb() here. */ - smp_rmb(); - va_start(args, call_private); - func(mdata->single.probe_private, call_private, mdata->format, - &args); - va_end(args); - } else { - struct marker_probe_closure *multi; - int i; - /* - * Read mdata->ptype before mdata->multi. - */ - smp_rmb(); - multi = mdata->multi; - /* - * multi points to an array, therefore accessing the array - * depends on reading multi. However, even in this case, - * we must insure that the pointer is read _before_ the array - * data. Same as rcu_dereference, but we need a full smp_rmb() - * in the fast path, so put the explicit barrier here. - */ - smp_read_barrier_depends(); - for (i = 0; multi[i].func; i++) { - va_start(args, call_private); - multi[i].func(multi[i].probe_private, call_private, - mdata->format, &args); - va_end(args); - } - } - rcu_read_unlock_sched_notrace(); -} -EXPORT_SYMBOL_GPL(marker_probe_cb); - -/* - * marker_probe_cb Callback that does not prepare the variable argument list. - * @mdata: pointer of type struct marker - * @call_private: caller site private data - * @...: Variable argument list. - * - * Should be connected to markers "MARK_NOARGS". - */ -static notrace void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, ...) -{ - va_list args; /* not initialized */ - char ptype; - - rcu_read_lock_sched_notrace(); - ptype = mdata->ptype; - if (likely(!ptype)) { - marker_probe_func *func; - /* Must read the ptype before ptr. They are not data dependant, - * so we put an explicit smp_rmb() here. */ - smp_rmb(); - func = mdata->single.func; - /* Must read the ptr before private data. They are not data - * dependant, so we put an explicit smp_rmb() here. */ - smp_rmb(); - func(mdata->single.probe_private, call_private, mdata->format, - &args); - } else { - struct marker_probe_closure *multi; - int i; - /* - * Read mdata->ptype before mdata->multi. - */ - smp_rmb(); - multi = mdata->multi; - /* - * multi points to an array, therefore accessing the array - * depends on reading multi. However, even in this case, - * we must insure that the pointer is read _before_ the array - * data. Same as rcu_dereference, but we need a full smp_rmb() - * in the fast path, so put the explicit barrier here. - */ - smp_read_barrier_depends(); - for (i = 0; multi[i].func; i++) - multi[i].func(multi[i].probe_private, call_private, - mdata->format, &args); - } - rcu_read_unlock_sched_notrace(); -} - -static void free_old_closure(struct rcu_head *head) -{ - struct marker_entry *entry = container_of(head, - struct marker_entry, rcu); - kfree(entry->oldptr); - /* Make sure we free the data before setting the pending flag to 0 */ - smp_wmb(); - entry->rcu_pending = 0; -} - -static void debug_print_probes(struct marker_entry *entry) -{ - int i; - - if (!marker_debug) - return; - - if (!entry->ptype) { - printk(KERN_DEBUG "Single probe : %p %p\n", - entry->single.func, - entry->single.probe_private); - } else { - for (i = 0; entry->multi[i].func; i++) - printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, - entry->multi[i].func, - entry->multi[i].probe_private); - } -} - -static struct marker_probe_closure * -marker_entry_add_probe(struct marker_entry *entry, - marker_probe_func *probe, void *probe_private) -{ - int nr_probes = 0; - struct marker_probe_closure *old, *new; - - WARN_ON(!probe); - - debug_print_probes(entry); - old = entry->multi; - if (!entry->ptype) { - if (entry->single.func == probe && - entry->single.probe_private == probe_private) - return ERR_PTR(-EBUSY); - if (entry->single.func == __mark_empty_function) { - /* 0 -> 1 probes */ - entry->single.func = probe; - entry->single.probe_private = probe_private; - entry->refcount = 1; - entry->ptype = 0; - debug_print_probes(entry); - return NULL; - } else { - /* 1 -> 2 probes */ - nr_probes = 1; - old = NULL; - } - } else { - /* (N -> N+1), (N != 0, 1) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) - if (old[nr_probes].func == probe - && old[nr_probes].probe_private - == probe_private) - return ERR_PTR(-EBUSY); - } - /* + 2 : one for new probe, one for NULL func */ - new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), - GFP_KERNEL); - if (new == NULL) - return ERR_PTR(-ENOMEM); - if (!old) - new[0] = entry->single; - else - memcpy(new, old, - nr_probes * sizeof(struct marker_probe_closure)); - new[nr_probes].func = probe; - new[nr_probes].probe_private = probe_private; - entry->refcount = nr_probes + 1; - entry->multi = new; - entry->ptype = 1; - debug_print_probes(entry); - return old; -} - -static struct marker_probe_closure * -marker_entry_remove_probe(struct marker_entry *entry, - marker_probe_func *probe, void *probe_private) -{ - int nr_probes = 0, nr_del = 0, i; - struct marker_probe_closure *old, *new; - - old = entry->multi; - - debug_print_probes(entry); - if (!entry->ptype) { - /* 0 -> N is an error */ - WARN_ON(entry->single.func == __mark_empty_function); - /* 1 -> 0 probes */ - WARN_ON(probe && entry->single.func != probe); - WARN_ON(entry->single.probe_private != probe_private); - entry->single.func = __mark_empty_function; - entry->refcount = 0; - entry->ptype = 0; - debug_print_probes(entry); - return NULL; - } else { - /* (N -> M), (N > 1, M >= 0) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - if ((!probe || old[nr_probes].func == probe) - && old[nr_probes].probe_private - == probe_private) - nr_del++; - } - } - - if (nr_probes - nr_del == 0) { - /* N -> 0, (N > 1) */ - entry->single.func = __mark_empty_function; - entry->refcount = 0; - entry->ptype = 0; - } else if (nr_probes - nr_del == 1) { - /* N -> 1, (N > 1) */ - for (i = 0; old[i].func; i++) - if ((probe && old[i].func != probe) || - old[i].probe_private != probe_private) - entry->single = old[i]; - entry->refcount = 1; - entry->ptype = 0; - } else { - int j = 0; - /* N -> M, (N > 1, M > 1) */ - /* + 1 for NULL */ - new = kzalloc((nr_probes - nr_del + 1) - * sizeof(struct marker_probe_closure), GFP_KERNEL); - if (new == NULL) - return ERR_PTR(-ENOMEM); - for (i = 0; old[i].func; i++) - if ((probe && old[i].func != probe) || - old[i].probe_private != probe_private) - new[j++] = old[i]; - entry->refcount = nr_probes - nr_del; - entry->ptype = 1; - entry->multi = new; - } - debug_print_probes(entry); - return old; -} - -/* - * Get marker if the marker is present in the marker hash table. - * Must be called with markers_mutex held. - * Returns NULL if not present. - */ -static struct marker_entry *get_marker(const char *name) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - u32 hash = jhash(name, strlen(name), 0); - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) - return e; - } - return NULL; -} - -/* - * Add the marker to the marker hash table. Must be called with markers_mutex - * held. - */ -static struct marker_entry *add_marker(const char *name, const char *format) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - size_t name_len = strlen(name) + 1; - size_t format_len = 0; - u32 hash = jhash(name, name_len-1, 0); - - if (format) - format_len = strlen(format) + 1; - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - printk(KERN_NOTICE - "Marker %s busy\n", name); - return ERR_PTR(-EBUSY); /* Already there */ - } - } - /* - * Using kmalloc here to allocate a variable length element. Could - * cause some memory fragmentation if overused. - */ - e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, - GFP_KERNEL); - if (!e) - return ERR_PTR(-ENOMEM); - memcpy(&e->name[0], name, name_len); - if (format) { - e->format = &e->name[name_len]; - memcpy(e->format, format, format_len); - if (strcmp(e->format, MARK_NOARGS) == 0) - e->call = marker_probe_cb_noarg; - else - e->call = marker_probe_cb; - trace_mark(core_marker_format, "name %s format %s", - e->name, e->format); - } else { - e->format = NULL; - e->call = marker_probe_cb; - } - e->single.func = __mark_empty_function; - e->single.probe_private = NULL; - e->multi = NULL; - e->ptype = 0; - e->format_allocated = 0; - e->refcount = 0; - e->rcu_pending = 0; - hlist_add_head(&e->hlist, head); - return e; -} - -/* - * Remove the marker from the marker hash table. Must be called with mutex_lock - * held. - */ -static int remove_marker(const char *name) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - int found = 0; - size_t len = strlen(name) + 1; - u32 hash = jhash(name, len-1, 0); - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - found = 1; - break; - } - } - if (!found) - return -ENOENT; - if (e->single.func != __mark_empty_function) - return -EBUSY; - hlist_del(&e->hlist); - if (e->format_allocated) - kfree(e->format); - /* Make sure the call_rcu has been executed */ - if (e->rcu_pending) - rcu_barrier_sched(); - kfree(e); - return 0; -} - -/* - * Set the mark_entry format to the format found in the element. - */ -static int marker_set_format(struct marker_entry *entry, const char *format) -{ - entry->format = kstrdup(format, GFP_KERNEL); - if (!entry->format) - return -ENOMEM; - entry->format_allocated = 1; - - trace_mark(core_marker_format, "name %s format %s", - entry->name, entry->format); - return 0; -} - -/* - * Sets the probe callback corresponding to one marker. - */ -static int set_marker(struct marker_entry *entry, struct marker *elem, - int active) -{ - int ret = 0; - WARN_ON(strcmp(entry->name, elem->name) != 0); - - if (entry->format) { - if (strcmp(entry->format, elem->format) != 0) { - printk(KERN_NOTICE - "Format mismatch for probe %s " - "(%s), marker (%s)\n", - entry->name, - entry->format, - elem->format); - return -EPERM; - } - } else { - ret = marker_set_format(entry, elem->format); - if (ret) - return ret; - } - - /* - * probe_cb setup (statically known) is done here. It is - * asynchronous with the rest of execution, therefore we only - * pass from a "safe" callback (with argument) to an "unsafe" - * callback (does not set arguments). - */ - elem->call = entry->call; - /* - * Sanity check : - * We only update the single probe private data when the ptr is - * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) - */ - WARN_ON(elem->single.func != __mark_empty_function - && elem->single.probe_private != entry->single.probe_private - && !elem->ptype); - elem->single.probe_private = entry->single.probe_private; - /* - * Make sure the private data is valid when we update the - * single probe ptr. - */ - smp_wmb(); - elem->single.func = entry->single.func; - /* - * We also make sure that the new probe callbacks array is consistent - * before setting a pointer to it. - */ - rcu_assign_pointer(elem->multi, entry->multi); - /* - * Update the function or multi probe array pointer before setting the - * ptype. - */ - smp_wmb(); - elem->ptype = entry->ptype; - - if (elem->tp_name && (active ^ elem->state)) { - WARN_ON(!elem->tp_cb); - /* - * It is ok to directly call the probe registration because type - * checking has been done in the __trace_mark_tp() macro. - */ - - if (active) { - /* - * try_module_get should always succeed because we hold - * lock_module() to get the tp_cb address. - */ - ret = try_module_get(__module_text_address( - (unsigned long)elem->tp_cb)); - BUG_ON(!ret); - ret = tracepoint_probe_register_noupdate( - elem->tp_name, - elem->tp_cb); - } else { - ret = tracepoint_probe_unregister_noupdate( - elem->tp_name, - elem->tp_cb); - /* - * tracepoint_probe_update_all() must be called - * before the module containing tp_cb is unloaded. - */ - module_put(__module_text_address( - (unsigned long)elem->tp_cb)); - } - } - elem->state = active; - - return ret; -} - -/* - * Disable a marker and its probe callback. - * Note: only waiting an RCU period after setting elem->call to the empty - * function insures that the original callback is not used anymore. This insured - * by rcu_read_lock_sched around the call site. - */ -static void disable_marker(struct marker *elem) -{ - int ret; - - /* leave "call" as is. It is known statically. */ - if (elem->tp_name && elem->state) { - WARN_ON(!elem->tp_cb); - /* - * It is ok to directly call the probe registration because type - * checking has been done in the __trace_mark_tp() macro. - */ - ret = tracepoint_probe_unregister_noupdate(elem->tp_name, - elem->tp_cb); - WARN_ON(ret); - /* - * tracepoint_probe_update_all() must be called - * before the module containing tp_cb is unloaded. - */ - module_put(__module_text_address((unsigned long)elem->tp_cb)); - } - elem->state = 0; - elem->single.func = __mark_empty_function; - /* Update the function before setting the ptype */ - smp_wmb(); - elem->ptype = 0; /* single probe */ - /* - * Leave the private data and id there, because removal is racy and - * should be done only after an RCU period. These are never used until - * the next initialization anyway. - */ -} - -/** - * marker_update_probe_range - Update a probe range - * @begin: beginning of the range - * @end: end of the range - * - * Updates the probe callback corresponding to a range of markers. - */ -void marker_update_probe_range(struct marker *begin, - struct marker *end) -{ - struct marker *iter; - struct marker_entry *mark_entry; - - mutex_lock(&markers_mutex); - for (iter = begin; iter < end; iter++) { - mark_entry = get_marker(iter->name); - if (mark_entry) { - set_marker(mark_entry, iter, !!mark_entry->refcount); - /* - * ignore error, continue - */ - } else { - disable_marker(iter); - } - } - mutex_unlock(&markers_mutex); -} - -/* - * Update probes, removing the faulty probes. - * - * Internal callback only changed before the first probe is connected to it. - * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 - * transitions. All other transitions will leave the old private data valid. - * This makes the non-atomicity of the callback/private data updates valid. - * - * "special case" updates : - * 0 -> 1 callback - * 1 -> 0 callback - * 1 -> 2 callbacks - * 2 -> 1 callbacks - * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. - * Site effect : marker_set_format may delete the marker entry (creating a - * replacement). - */ -static void marker_update_probes(void) -{ - /* Core kernel markers */ - marker_update_probe_range(__start___markers, __stop___markers); - /* Markers in modules. */ - module_update_markers(); - tracepoint_probe_update_all(); -} - -/** - * marker_probe_register - Connect a probe to a marker - * @name: marker name - * @format: format string - * @probe: probe handler - * @probe_private: probe private data - * - * private data must be a valid allocated memory address, or NULL. - * Returns 0 if ok, error value on error. - * The probe address must at least be aligned on the architecture pointer size. - */ -int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - int ret = 0; - struct marker_probe_closure *old; - - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) { - entry = add_marker(name, format); - if (IS_ERR(entry)) - ret = PTR_ERR(entry); - } else if (format) { - if (!entry->format) - ret = marker_set_format(entry, format); - else if (strcmp(entry->format, format)) - ret = -EPERM; - } - if (ret) - goto end; - - /* - * If we detect that a call_rcu is pending for this marker, - * make sure it's executed now. - */ - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_add_probe(entry, probe, probe_private); - if (IS_ERR(old)) { - ret = PTR_ERR(old); - goto end; - } - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_register); - -/** - * marker_probe_unregister - Disconnect a probe from a marker - * @name: marker name - * @probe: probe function pointer - * @probe_private: probe private data - * - * Returns the private data given to marker_probe_register, or an ERR_PTR(). - * We do not need to call a synchronize_sched to make sure the probes have - * finished running before doing a module unload, because the module unload - * itself uses stop_machine(), which insures that every preempt disabled section - * have finished. - */ -int marker_probe_unregister(const char *name, - marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - struct marker_probe_closure *old; - int ret = -ENOENT; - - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_remove_probe(entry, probe, probe_private); - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); - remove_marker(name); /* Ignore busy error message */ - ret = 0; -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_unregister); - -static struct marker_entry * -get_marker_from_private_data(marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - unsigned int i; - struct hlist_head *head; - struct hlist_node *node; - - for (i = 0; i < MARKER_TABLE_SIZE; i++) { - head = &marker_table[i]; - hlist_for_each_entry(entry, node, head, hlist) { - if (!entry->ptype) { - if (entry->single.func == probe - && entry->single.probe_private - == probe_private) - return entry; - } else { - struct marker_probe_closure *closure; - closure = entry->multi; - for (i = 0; closure[i].func; i++) { - if (closure[i].func == probe && - closure[i].probe_private - == probe_private) - return entry; - } - } - } - } - return NULL; -} - -/** - * marker_probe_unregister_private_data - Disconnect a probe from a marker - * @probe: probe function - * @probe_private: probe private data - * - * Unregister a probe by providing the registered private data. - * Only removes the first marker found in hash table. - * Return 0 on success or error value. - * We do not need to call a synchronize_sched to make sure the probes have - * finished running before doing a module unload, because the module unload - * itself uses stop_machine(), which insures that every preempt disabled section - * have finished. - */ -int marker_probe_unregister_private_data(marker_probe_func *probe, - void *probe_private) -{ - struct marker_entry *entry; - int ret = 0; - struct marker_probe_closure *old; - - mutex_lock(&markers_mutex); - entry = get_marker_from_private_data(probe, probe_private); - if (!entry) { - ret = -ENOENT; - goto end; - } - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_remove_probe(entry, NULL, probe_private); - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker_from_private_data(probe, probe_private); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); - remove_marker(entry->name); /* Ignore busy error message */ -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); - -/** - * marker_get_private_data - Get a marker's probe private data - * @name: marker name - * @probe: probe to match - * @num: get the nth matching probe's private data - * - * Returns the nth private data pointer (starting from 0) matching, or an - * ERR_PTR. - * Returns the private data pointer, or an ERR_PTR. - * The private data pointer should _only_ be dereferenced if the caller is the - * owner of the data, or its content could vanish. This is mostly used to - * confirm that a caller is the owner of a registered probe. - */ -void *marker_get_private_data(const char *name, marker_probe_func *probe, - int num) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - size_t name_len = strlen(name) + 1; - u32 hash = jhash(name, name_len-1, 0); - int i; - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - if (!e->ptype) { - if (num == 0 && e->single.func == probe) - return e->single.probe_private; - } else { - struct marker_probe_closure *closure; - int match = 0; - closure = e->multi; - for (i = 0; closure[i].func; i++) { - if (closure[i].func != probe) - continue; - if (match++ == num) - return closure[i].probe_private; - } - } - break; - } - } - return ERR_PTR(-ENOENT); -} -EXPORT_SYMBOL_GPL(marker_get_private_data); - -#ifdef CONFIG_MODULES - -int marker_module_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct module *mod = data; - - switch (val) { - case MODULE_STATE_COMING: - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - break; - case MODULE_STATE_GOING: - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - break; - } - return 0; -} - -struct notifier_block marker_module_nb = { - .notifier_call = marker_module_notify, - .priority = 0, -}; - -static int init_markers(void) -{ - return register_module_notifier(&marker_module_nb); -} -__initcall(init_markers); - -#endif /* CONFIG_MODULES */ diff --git a/kernel/module.c b/kernel/module.c index 05ce49ced8f6..b6ee424245dd 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2237,10 +2237,6 @@ static noinline struct module *load_module(void __user *umod, sizeof(*mod->ctors), &mod->num_ctors); #endif -#ifdef CONFIG_MARKERS - mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", - sizeof(*mod->markers), &mod->num_markers); -#endif #ifdef CONFIG_TRACEPOINTS mod->tracepoints = section_objs(hdr, sechdrs, secstrings, "__tracepoints", @@ -2958,20 +2954,6 @@ void module_layout(struct module *mod, EXPORT_SYMBOL(module_layout); #endif -#ifdef CONFIG_MARKERS -void module_update_markers(void) -{ - struct module *mod; - - mutex_lock(&module_mutex); - list_for_each_entry(mod, &modules, list) - if (!mod->taints) - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - mutex_unlock(&module_mutex); -} -#endif - #ifdef CONFIG_TRACEPOINTS void module_update_tracepoints(void) { diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 687699d365ae..2547d8813cf0 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/samples/Kconfig b/samples/Kconfig index 428b065ba695..b92bde3c6a89 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -7,12 +7,6 @@ menuconfig SAMPLES if SAMPLES -config SAMPLE_MARKERS - tristate "Build markers examples -- loadable modules only" - depends on MARKERS && m - help - This build markers example modules. - config SAMPLE_TRACEPOINTS tristate "Build tracepoints examples -- loadable modules only" depends on TRACEPOINTS && m diff --git a/samples/Makefile b/samples/Makefile index 13e4b470b539..43343a03b1f4 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,3 +1,3 @@ # Makefile for Linux samples code -obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/ +obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ diff --git a/samples/markers/Makefile b/samples/markers/Makefile deleted file mode 100644 index 6d7231265f0f..000000000000 --- a/samples/markers/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# builds the kprobes example kernel modules; -# then to use one (as root): insmod - -obj-$(CONFIG_SAMPLE_MARKERS) += probe-example.o marker-example.o diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c deleted file mode 100644 index e9cd9c0bc84f..000000000000 --- a/samples/markers/marker-example.c +++ /dev/null @@ -1,53 +0,0 @@ -/* marker-example.c - * - * Executes a marker when /proc/marker-example is opened. - * - * (C) Copyright 2007 Mathieu Desnoyers - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include -#include -#include -#include - -struct proc_dir_entry *pentry_example; - -static int my_open(struct inode *inode, struct file *file) -{ - int i; - - trace_mark(subsystem_event, "integer %d string %s", 123, - "example string"); - for (i = 0; i < 10; i++) - trace_mark(subsystem_eventb, MARK_NOARGS); - return -EPERM; -} - -static struct file_operations mark_ops = { - .open = my_open, -}; - -static int __init example_init(void) -{ - printk(KERN_ALERT "example init\n"); - pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops); - if (!pentry_example) - return -EPERM; - return 0; -} - -static void __exit example_exit(void) -{ - printk(KERN_ALERT "example exit\n"); - remove_proc_entry("marker-example", NULL); -} - -module_init(example_init) -module_exit(example_exit) - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("Marker example"); diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c deleted file mode 100644 index 2dfb3b32937e..000000000000 --- a/samples/markers/probe-example.c +++ /dev/null @@ -1,92 +0,0 @@ -/* probe-example.c - * - * Connects two functions to marker call sites. - * - * (C) Copyright 2007 Mathieu Desnoyers - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include -#include -#include -#include -#include - -struct probe_data { - const char *name; - const char *format; - marker_probe_func *probe_func; -}; - -void probe_subsystem_event(void *probe_data, void *call_data, - const char *format, va_list *args) -{ - /* Declare args */ - unsigned int value; - const char *mystr; - - /* Assign args */ - value = va_arg(*args, typeof(value)); - mystr = va_arg(*args, typeof(mystr)); - - /* Call printk */ - printk(KERN_INFO "Value %u, string %s\n", value, mystr); - - /* or count, check rights, serialize data in a buffer */ -} - -atomic_t eventb_count = ATOMIC_INIT(0); - -void probe_subsystem_eventb(void *probe_data, void *call_data, - const char *format, va_list *args) -{ - /* Increment counter */ - atomic_inc(&eventb_count); -} - -static struct probe_data probe_array[] = -{ - { .name = "subsystem_event", - .format = "integer %d string %s", - .probe_func = probe_subsystem_event }, - { .name = "subsystem_eventb", - .format = MARK_NOARGS, - .probe_func = probe_subsystem_eventb }, -}; - -static int __init probe_init(void) -{ - int result; - int i; - - for (i = 0; i < ARRAY_SIZE(probe_array); i++) { - result = marker_probe_register(probe_array[i].name, - probe_array[i].format, - probe_array[i].probe_func, &probe_array[i]); - if (result) - printk(KERN_INFO "Unable to register probe %s\n", - probe_array[i].name); - } - return 0; -} - -static void __exit probe_fini(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(probe_array); i++) - marker_probe_unregister(probe_array[i].name, - probe_array[i].probe_func, &probe_array[i]); - printk(KERN_INFO "Number of event b : %u\n", - atomic_read(&eventb_count)); - marker_synchronize_unregister(); -} - -module_init(probe_init); -module_exit(probe_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("SUBSYSTEM Probe"); diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index f4053dc7b5d6..8f14c81abbc7 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -13,7 +13,6 @@ # 2) modpost is then used to # 3) create one .mod.c file pr. module # 4) create one Module.symvers file with CRC for all exported symbols -# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers # 5) compile all .mod.c files # 6) final link of the module to a file @@ -59,10 +58,6 @@ include scripts/Makefile.lib kernelsymfile := $(objtree)/Module.symvers modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers -kernelmarkersfile := $(objtree)/Module.markers -modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers - -markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) # Step 1), find all modules listed in $(MODVERDIR)/ __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) @@ -85,8 +80,6 @@ modpost = scripts/mod/modpost \ $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \ - $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ - $(if $(CONFIG_MARKERS),-M $(markersfile)) \ $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \ $(if $(cross_build),-c) @@ -101,17 +94,12 @@ quiet_cmd_kernel-mod = MODPOST $@ cmd_kernel-mod = $(modpost) $@ vmlinux.o: FORCE - @rm -fr $(kernelmarkersfile) $(call cmd,kernel-mod) # Declare generated files as targets for modpost $(symverfile): __modpost ; $(modules:.ko=.mod.c): __modpost ; -ifdef CONFIG_MARKERS -$(markersfile): __modpost ; -endif - # Step 5), compile all *.mod.c files -- cgit v1.2.3-71-gd317 From 4a4962263f07d14660849ec134ee42b63e95ea9a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 6 Jul 2009 14:50:42 +0100 Subject: module: reduce symbol table for loaded modules (v2) Discard all symbols not interesting for kallsyms use: absolute, section, and in the common case (!KALLSYMS_ALL) data ones. Signed-off-by: Jan Beulich Signed-off-by: Rusty Russell --- include/linux/module.h | 10 ++++-- kernel/module.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 94 insertions(+), 7 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 1c755b2f937d..1d3ccb173fd6 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -308,9 +308,13 @@ struct module #endif #ifdef CONFIG_KALLSYMS - /* We keep the symbol and string tables for kallsyms. */ - Elf_Sym *symtab; - unsigned int num_symtab; + /* + * We keep the symbol and string tables for kallsyms. + * The core_* fields below are temporary, loader-only (they + * could really be discarded after module init). + */ + Elf_Sym *symtab, *core_symtab; + unsigned int num_symtab, core_num_syms; char *strtab; /* Section attributes */ diff --git a/kernel/module.c b/kernel/module.c index e6bc4b28aa62..97f4d5e15535 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1862,13 +1862,68 @@ static char elf_type(const Elf_Sym *sym, return '?'; } +static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, + unsigned int shnum) +{ + const Elf_Shdr *sec; + + if (src->st_shndx == SHN_UNDEF + || src->st_shndx >= shnum + || !src->st_name) + return false; + + sec = sechdrs + src->st_shndx; + if (!(sec->sh_flags & SHF_ALLOC) +#ifndef CONFIG_KALLSYMS_ALL + || !(sec->sh_flags & SHF_EXECINSTR) +#endif + || (sec->sh_entsize & INIT_OFFSET_MASK)) + return false; + + return true; +} + +static unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + const Elf_Ehdr *hdr, + const char *secstrings) +{ + unsigned long symoffs; + Elf_Shdr *symsect = sechdrs + symindex; + const Elf_Sym *src; + unsigned int i, nsrc, ndst; + + /* Put symbol section at end of init part of module. */ + symsect->sh_flags |= SHF_ALLOC; + symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, + symindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + symsect->sh_name); + + src = (void *)hdr + symsect->sh_offset; + nsrc = symsect->sh_size / sizeof(*src); + for (ndst = i = 1; i < nsrc; ++i, ++src) + if (is_core_symbol(src, sechdrs, hdr->e_shnum)) + ++ndst; + + /* Append room for core symbols at end of core part. */ + symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); + mod->core_size = symoffs + ndst * sizeof(Elf_Sym); + + return symoffs; +} + static void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, + unsigned long symoffs, const char *secstrings) { - unsigned int i; + unsigned int i, ndst; + const Elf_Sym *src; + Elf_Sym *dst; mod->symtab = (void *)sechdrs[symindex].sh_addr; mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); @@ -1878,12 +1933,32 @@ static void add_kallsyms(struct module *mod, for (i = 0; i < mod->num_symtab; i++) mod->symtab[i].st_info = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); + + mod->core_symtab = dst = mod->module_core + symoffs; + src = mod->symtab; + *dst = *src; + for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { + if (!is_core_symbol(src, sechdrs, shnum)) + continue; + dst[ndst] = *src; + ++ndst; + } + mod->core_num_syms = ndst; } #else +static inline unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + const Elf_Hdr *hdr, + const char *secstrings) +{ +} static inline void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, + unsigned long symoffs, const char *secstrings) { } @@ -1959,6 +2034,9 @@ static noinline struct module *load_module(void __user *umod, struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ +#ifdef CONFIG_KALLSYMS + unsigned long symoffs; +#endif mm_segment_t old_fs; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", @@ -2041,8 +2119,7 @@ static noinline struct module *load_module(void __user *umod, sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; #ifdef CONFIG_KALLSYMS - /* Keep symbol and string tables for decoding later. */ - sechdrs[symindex].sh_flags |= SHF_ALLOC; + /* Keep string table for decoding later. */ sechdrs[strindex].sh_flags |= SHF_ALLOC; #endif @@ -2109,6 +2186,7 @@ static noinline struct module *load_module(void __user *umod, this is done generically; there doesn't appear to be any special cases for the architectures. */ layout_sections(mod, hdr, sechdrs, secstrings); + symoffs = layout_symtab(mod, sechdrs, symindex, hdr, secstrings); /* Do the allocs. */ ptr = module_alloc_update_bounds(mod->core_size); @@ -2313,7 +2391,8 @@ static noinline struct module *load_module(void __user *umod, percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); - add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, + symoffs, secstrings); if (!mod->taints) { struct _ddebug *debug; @@ -2491,6 +2570,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, /* Drop initial reference. */ module_put(mod); trim_init_extable(mod); +#ifdef CONFIG_KALLSYMS + mod->num_symtab = mod->core_num_syms; + mod->symtab = mod->core_symtab; +#endif module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; -- cgit v1.2.3-71-gd317 From 554bdfe5acf3715e87c8d5e25a4f9a896ac9f014 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 6 Jul 2009 14:51:44 +0100 Subject: module: reduce string table for loaded modules (v2) Also remove all parts of the string table (referenced by the symbol table) that are not needed for kallsyms use (i.e. which were only referenced by symbols discarded by the previous patch, or not referenced at all for whatever reason). Signed-off-by: Jan Beulich Signed-off-by: Rusty Russell --- include/linux/module.h | 2 +- kernel/module.c | 68 +++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 13 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 1d3ccb173fd6..aca980365956 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -315,7 +315,7 @@ struct module */ Elf_Sym *symtab, *core_symtab; unsigned int num_symtab, core_num_syms; - char *strtab; + char *strtab, *core_strtab; /* Section attributes */ struct module_sect_attrs *sect_attrs; diff --git a/kernel/module.c b/kernel/module.c index 97f4d5e15535..39827c3d9484 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1886,12 +1886,17 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, static unsigned long layout_symtab(struct module *mod, Elf_Shdr *sechdrs, unsigned int symindex, + unsigned int strindex, const Elf_Ehdr *hdr, - const char *secstrings) + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) { unsigned long symoffs; Elf_Shdr *symsect = sechdrs + symindex; + Elf_Shdr *strsect = sechdrs + strindex; const Elf_Sym *src; + const char *strtab; unsigned int i, nsrc, ndst; /* Put symbol section at end of init part of module. */ @@ -1902,14 +1907,31 @@ static unsigned long layout_symtab(struct module *mod, src = (void *)hdr + symsect->sh_offset; nsrc = symsect->sh_size / sizeof(*src); + strtab = (void *)hdr + strsect->sh_offset; for (ndst = i = 1; i < nsrc; ++i, ++src) - if (is_core_symbol(src, sechdrs, hdr->e_shnum)) + if (is_core_symbol(src, sechdrs, hdr->e_shnum)) { + unsigned int j = src->st_name; + + while(!__test_and_set_bit(j, strmap) && strtab[j]) + ++j; ++ndst; + } /* Append room for core symbols at end of core part. */ symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); mod->core_size = symoffs + ndst * sizeof(Elf_Sym); + /* Put string table section at end of init part of module. */ + strsect->sh_flags |= SHF_ALLOC; + strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, + strindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + strsect->sh_name); + + /* Append room for core symbols' strings at end of core part. */ + *pstroffs = mod->core_size; + __set_bit(0, strmap); + mod->core_size += bitmap_weight(strmap, strsect->sh_size); + return symoffs; } @@ -1919,11 +1941,14 @@ static void add_kallsyms(struct module *mod, unsigned int symindex, unsigned int strindex, unsigned long symoffs, - const char *secstrings) + unsigned long stroffs, + const char *secstrings, + unsigned long *strmap) { unsigned int i, ndst; const Elf_Sym *src; Elf_Sym *dst; + char *s; mod->symtab = (void *)sechdrs[symindex].sh_addr; mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); @@ -1941,16 +1966,25 @@ static void add_kallsyms(struct module *mod, if (!is_core_symbol(src, sechdrs, shnum)) continue; dst[ndst] = *src; + dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name); ++ndst; } mod->core_num_syms = ndst; + + mod->core_strtab = s = mod->module_core + stroffs; + for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i) + if (test_bit(i, strmap)) + *++s = mod->strtab[i]; } #else static inline unsigned long layout_symtab(struct module *mod, Elf_Shdr *sechdrs, unsigned int symindex, + unsigned int strindex, const Elf_Hdr *hdr, - const char *secstrings) + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) { } static inline void add_kallsyms(struct module *mod, @@ -1959,7 +1993,9 @@ static inline void add_kallsyms(struct module *mod, unsigned int symindex, unsigned int strindex, unsigned long symoffs, - const char *secstrings) + unsigned long stroffs, + const char *secstrings, + const unsigned long *strmap) { } #endif /* CONFIG_KALLSYMS */ @@ -2035,7 +2071,7 @@ static noinline struct module *load_module(void __user *umod, long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ #ifdef CONFIG_KALLSYMS - unsigned long symoffs; + unsigned long symoffs, stroffs, *strmap; #endif mm_segment_t old_fs; @@ -2118,10 +2154,6 @@ static noinline struct module *load_module(void __user *umod, /* Don't keep modinfo and version sections. */ sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; -#ifdef CONFIG_KALLSYMS - /* Keep string table for decoding later. */ - sechdrs[strindex].sh_flags |= SHF_ALLOC; -#endif /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(sechdrs, versindex, mod)) { @@ -2157,6 +2189,13 @@ static noinline struct module *load_module(void __user *umod, goto free_hdr; } + strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size) + * sizeof(long), GFP_KERNEL); + if (!strmap) { + err = -ENOMEM; + goto free_mod; + } + if (find_module(mod->name)) { err = -EEXIST; goto free_mod; @@ -2186,7 +2225,8 @@ static noinline struct module *load_module(void __user *umod, this is done generically; there doesn't appear to be any special cases for the architectures. */ layout_sections(mod, hdr, sechdrs, secstrings); - symoffs = layout_symtab(mod, sechdrs, symindex, hdr, secstrings); + symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr, + secstrings, &stroffs, strmap); /* Do the allocs. */ ptr = module_alloc_update_bounds(mod->core_size); @@ -2392,7 +2432,9 @@ static noinline struct module *load_module(void __user *umod, sechdrs[pcpuindex].sh_size); add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, - symoffs, secstrings); + symoffs, stroffs, secstrings, strmap); + kfree(strmap); + strmap = NULL; if (!mod->taints) { struct _ddebug *debug; @@ -2481,6 +2523,7 @@ static noinline struct module *load_module(void __user *umod, percpu_modfree(percpu); free_mod: kfree(args); + kfree(strmap); free_hdr: vfree(hdr); return ERR_PTR(err); @@ -2573,6 +2616,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, #ifdef CONFIG_KALLSYMS mod->num_symtab = mod->core_num_syms; mod->symtab = mod->core_symtab; + mod->strtab = mod->core_strtab; #endif module_free(mod, mod->module_init); mod->module_init = NULL; -- cgit v1.2.3-71-gd317 From 1d7015caa082d465faeae5d6fd1be077ee6dfa87 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Sep 2009 00:32:58 -0600 Subject: module: preferred way to use MODULE_AUTHOR For the longest time now we've been using multiple MODULE_AUTHOR() statements when a module has more than one author, but the comment here disagrees. Signed-off-by: Johannes Berg Cc: Jiri Kosina Cc: Luciano Coelho Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell --- include/linux/module.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index aca980365956..482efc865acf 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -128,7 +128,10 @@ extern struct module __this_module; */ #define MODULE_LICENSE(_license) MODULE_INFO(license, _license) -/* Author, ideally of form NAME[, NAME]*[ and NAME] */ +/* + * Author(s), use "Name " or just "Name", for multiple + * authors use multiple MODULE_AUTHOR() statements/lines. + */ #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author) /* What your module does. */ -- cgit v1.2.3-71-gd317 From 9e1b9b80721661bd63b3662453767b22cd614fe7 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Sat, 7 Nov 2009 21:03:54 +0000 Subject: module: make MODULE_SYMBOL_PREFIX into a CONFIG option The next commit will require the use of MODULE_SYMBOL_PREFIX in .tmp_exports-asm.S. Currently it is mixed in with C structure definitions in "asm/module.h". Move the definition of this arch option into Kconfig, so it can be easily accessed by any code. This also lets modpost.c use the same definition. Previously modpost relied on a hardcoded list of architectures in mk_elfconfig.c. A build test for blackfin, one of the two MODULE_SYMBOL_PREFIX archs, showed the generated code was unchanged. vmlinux was identical save for build ids, and an apparently randomized suffix on a single "__key" symbol in the kallsyms data). Signed-off-by: Alan Jenkins Acked-by: Mike Frysinger (blackfin) CC: Sam Ravnborg Signed-off-by: Rusty Russell --- arch/blackfin/Kconfig | 4 ++++ arch/blackfin/include/asm/module.h | 2 -- arch/blackfin/kernel/vmlinux.lds.S | 2 -- arch/h8300/Kconfig | 4 ++++ arch/h8300/include/asm/module.h | 2 -- arch/h8300/kernel/vmlinux.lds.S | 1 - include/asm-generic/vmlinux.lds.h | 8 ++++++-- include/linux/module.h | 6 ++++-- scripts/Makefile.lib | 5 +++++ scripts/mod/Makefile | 2 +- scripts/mod/mk_elfconfig.c | 9 --------- scripts/mod/modpost.c | 9 +++++++++ 12 files changed, 33 insertions(+), 21 deletions(-) (limited to 'include/linux/module.h') diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index ae6a60f10120..2180433213b7 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -5,6 +5,10 @@ mainmenu "Blackfin Kernel Configuration" +config SYMBOL_PREFIX + string + default "_" + config MMU def_bool n diff --git a/arch/blackfin/include/asm/module.h b/arch/blackfin/include/asm/module.h index 9c1cfffddd9b..4282b169ead9 100644 --- a/arch/blackfin/include/asm/module.h +++ b/arch/blackfin/include/asm/module.h @@ -7,8 +7,6 @@ #ifndef _ASM_BFIN_MODULE_H #define _ASM_BFIN_MODULE_H -#define MODULE_SYMBOL_PREFIX "_" - #define Elf_Shdr Elf32_Shdr #define Elf_Sym Elf32_Sym #define Elf_Ehdr Elf32_Ehdr diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index 10e12539000e..f39707c6590d 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -4,8 +4,6 @@ * Licensed under the GPL-2 or later */ -#define VMLINUX_SYMBOL(_sym_) _##_sym_ - #include #include #include diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 9420648352b8..53cc669e6d59 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -10,6 +10,10 @@ config H8300 default y select HAVE_IDE +config SYMBOL_PREFIX + string + default "_" + config MMU bool default n diff --git a/arch/h8300/include/asm/module.h b/arch/h8300/include/asm/module.h index de23231f3196..8e46724b7c09 100644 --- a/arch/h8300/include/asm/module.h +++ b/arch/h8300/include/asm/module.h @@ -8,6 +8,4 @@ struct mod_arch_specific { }; #define Elf_Sym Elf32_Sym #define Elf_Ehdr Elf32_Ehdr -#define MODULE_SYMBOL_PREFIX "_" - #endif /* _ASM_H8/300_MODULE_H */ diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index b9e24907e6ea..03d356d96e5d 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -1,4 +1,3 @@ -#define VMLINUX_SYMBOL(_sym_) _##_sym_ #include #include diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b6e818f4b247..67e652068e0e 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -52,8 +52,12 @@ #define LOAD_OFFSET 0 #endif -#ifndef VMLINUX_SYMBOL -#define VMLINUX_SYMBOL(_sym_) _sym_ +#ifndef SYMBOL_PREFIX +#define VMLINUX_SYMBOL(sym) sym +#else +#define PASTE2(x,y) x##y +#define PASTE(x,y) PASTE2(x,y) +#define VMLINUX_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym) #endif /* Align . to a 8 byte boundary equals to maximum function alignment. */ diff --git a/include/linux/module.h b/include/linux/module.h index 482efc865acf..6cb1a3cab5d3 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -25,8 +25,10 @@ /* Not Yet Implemented */ #define MODULE_SUPPORTED_DEVICE(name) -/* some toolchains uses a `_' prefix for all user symbols */ -#ifndef MODULE_SYMBOL_PREFIX +/* Some toolchains use a `_' prefix for all user symbols. */ +#ifdef CONFIG_SYMBOL_PREFIX +#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX +#else #define MODULE_SYMBOL_PREFIX "" #endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index ffdafb26f539..224d85e72ef1 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -127,6 +127,11 @@ _c_flags += $(if $(patsubst n%,, \ $(CFLAGS_GCOV)) endif +ifdef CONFIG_SYMBOL_PREFIX +_cpp_flags += -DSYMBOL_PREFIX=$(patsubst "%",%,$(CONFIG_SYMBOL_PREFIX)) +endif + + # If building the kernel in a separate objtree expand all occurrences # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/'). diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile index 11d69c35e5b4..ff954f8168c1 100644 --- a/scripts/mod/Makefile +++ b/scripts/mod/Makefile @@ -8,7 +8,7 @@ modpost-objs := modpost.o file2alias.o sumversion.o $(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h quiet_cmd_elfconfig = MKELF $@ - cmd_elfconfig = $(obj)/mk_elfconfig $(ARCH) < $< > $@ + cmd_elfconfig = $(obj)/mk_elfconfig < $< > $@ $(obj)/elfconfig.h: $(obj)/empty.o $(obj)/mk_elfconfig FORCE $(call if_changed,elfconfig) diff --git a/scripts/mod/mk_elfconfig.c b/scripts/mod/mk_elfconfig.c index 6a96d47bd1e6..639bca7ba559 100644 --- a/scripts/mod/mk_elfconfig.c +++ b/scripts/mod/mk_elfconfig.c @@ -9,9 +9,6 @@ main(int argc, char **argv) unsigned char ei[EI_NIDENT]; union { short s; char c[2]; } endian_test; - if (argc != 2) { - fprintf(stderr, "Error: no arch\n"); - } if (fread(ei, 1, EI_NIDENT, stdin) != EI_NIDENT) { fprintf(stderr, "Error: input truncated\n"); return 1; @@ -55,12 +52,6 @@ main(int argc, char **argv) else exit(1); - if ((strcmp(argv[1], "h8300") == 0) - || (strcmp(argv[1], "blackfin") == 0)) - printf("#define MODULE_SYMBOL_PREFIX \"_\"\n"); - else - printf("#define MODULE_SYMBOL_PREFIX \"\"\n"); - return 0; } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 801a16a17545..fb0f9b711af3 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -15,8 +15,17 @@ #include #include #include "modpost.h" +#include "../../include/linux/autoconf.h" #include "../../include/linux/license.h" +/* Some toolchains use a `_' prefix for all user symbols. */ +#ifdef CONFIG_SYMBOL_PREFIX +#define MODULE_SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX +#else +#define MODULE_SYMBOL_PREFIX "" +#endif + + /* Are we using CONFIG_MODVERSIONS? */ int modversions = 0; /* Warn about undefined symbols? (do so if we have vmlinux) */ -- cgit v1.2.3-71-gd317 From e1783a240f491fb233f04edc042e16b18a7a79ba Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: module: Use this_cpu_xx to dynamically allocate counters Use cpu ops to deal with the per cpu data instead of a local_t. Reduces memory requirements, cache footprint and decreases cycle counts. The this_cpu_xx operations are also used for !SMP mode. Otherwise we could not drop the use of __module_ref_addr() which would make per cpu data handling complicated. this_cpu_xx operations have their own fallback for !SMP. V8-V9: - Leave include asm/module.h since ringbuffer.c depends on it. Nothing else does though. Another patch will deal with that. - Remove spurious free. Signed-off-by: Christoph Lameter Acked-by: Rusty Russell Signed-off-by: Tejun Heo --- include/linux/module.h | 36 ++++++++++++++---------------------- kernel/module.c | 29 +++++++++++++++-------------- 2 files changed, 29 insertions(+), 36 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 6cb1a3cab5d3..2302f09ea2d9 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -363,11 +364,9 @@ struct module /* Destruction function. */ void (*exit)(void); -#ifdef CONFIG_SMP - char *refptr; -#else - local_t ref; -#endif + struct module_ref { + int count; + } *refptr; #endif #ifdef CONFIG_CONSTRUCTORS @@ -454,25 +453,16 @@ void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x) void symbol_put_addr(void *addr); -static inline local_t *__module_ref_addr(struct module *mod, int cpu) -{ -#ifdef CONFIG_SMP - return (local_t *) (mod->refptr + per_cpu_offset(cpu)); -#else - return &mod->ref; -#endif -} - /* Sometimes we know we already have a refcount, and it's easier not to handle the error case (which only happens with rmmod --wait). */ static inline void __module_get(struct module *module) { if (module) { - unsigned int cpu = get_cpu(); - local_inc(__module_ref_addr(module, cpu)); + preempt_disable(); + __this_cpu_inc(module->refptr->count); trace_module_get(module, _THIS_IP_, - local_read(__module_ref_addr(module, cpu))); - put_cpu(); + __this_cpu_read(module->refptr->count)); + preempt_enable(); } } @@ -481,15 +471,17 @@ static inline int try_module_get(struct module *module) int ret = 1; if (module) { - unsigned int cpu = get_cpu(); + preempt_disable(); + if (likely(module_is_live(module))) { - local_inc(__module_ref_addr(module, cpu)); + __this_cpu_inc(module->refptr->count); trace_module_get(module, _THIS_IP_, - local_read(__module_ref_addr(module, cpu))); + __this_cpu_read(module->refptr->count)); } else ret = 0; - put_cpu(); + + preempt_enable(); } return ret; } diff --git a/kernel/module.c b/kernel/module.c index e96b8ed1cb6a..9bf228052ec5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -474,9 +474,10 @@ static void module_unload_init(struct module *mod) INIT_LIST_HEAD(&mod->modules_which_use_me); for_each_possible_cpu(cpu) - local_set(__module_ref_addr(mod, cpu), 0); + per_cpu_ptr(mod->refptr, cpu)->count = 0; + /* Hold reference count during initialization. */ - local_set(__module_ref_addr(mod, raw_smp_processor_id()), 1); + __this_cpu_write(mod->refptr->count, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -619,7 +620,7 @@ unsigned int module_refcount(struct module *mod) int cpu; for_each_possible_cpu(cpu) - total += local_read(__module_ref_addr(mod, cpu)); + total += per_cpu_ptr(mod->refptr, cpu)->count; return total; } EXPORT_SYMBOL(module_refcount); @@ -796,14 +797,15 @@ static struct module_attribute refcnt = { void module_put(struct module *module) { if (module) { - unsigned int cpu = get_cpu(); - local_dec(__module_ref_addr(module, cpu)); + preempt_disable(); + __this_cpu_dec(module->refptr->count); + trace_module_put(module, _RET_IP_, - local_read(__module_ref_addr(module, cpu))); + __this_cpu_read(module->refptr->count)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); - put_cpu(); + preempt_enable(); } } EXPORT_SYMBOL(module_put); @@ -1394,9 +1396,9 @@ static void free_module(struct module *mod) kfree(mod->args); if (mod->percpu) percpu_modfree(mod->percpu); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) +#if defined(CONFIG_MODULE_UNLOAD) if (mod->refptr) - percpu_modfree(mod->refptr); + free_percpu(mod->refptr); #endif /* Free lock-classes: */ lockdep_free_key_range(mod->module_core, mod->core_size); @@ -2159,9 +2161,8 @@ static noinline struct module *load_module(void __user *umod, mod = (void *)sechdrs[modindex].sh_addr; kmemleak_load_module(mod, hdr, sechdrs, secstrings); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), - mod->name); +#if defined(CONFIG_MODULE_UNLOAD) + mod->refptr = alloc_percpu(struct module_ref); if (!mod->refptr) { err = -ENOMEM; goto free_init; @@ -2393,8 +2394,8 @@ static noinline struct module *load_module(void __user *umod, kobject_put(&mod->mkobj.kobj); free_unload: module_unload_free(mod); -#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - percpu_modfree(mod->refptr); +#if defined(CONFIG_MODULE_UNLOAD) + free_percpu(mod->refptr); free_init: #endif module_free(mod, mod->module_init); -- cgit v1.2.3-71-gd317 From 79615760f380ec86cd58204744e774c33fab9211 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: local_t: Move local.h include to ringbuffer.c and ring_buffer_benchmark.c ringbuffer*.c are the last users of local.h. Remove the include from modules.h and add it to ringbuffer files. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/module.h | 1 - kernel/trace/ring_buffer.c | 1 + kernel/trace/ring_buffer_benchmark.c | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 2302f09ea2d9..7e74ae0051cc 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,7 +17,6 @@ #include #include -#include #include #include diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2326b04c95c4..eb6c8988c31a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -20,6 +20,7 @@ #include #include +#include #include "trace.h" /* diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index b2477caf09c2..df74c7982255 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -8,6 +8,7 @@ #include #include #include +#include struct rb_page { u64 ts; -- cgit v1.2.3-71-gd317 From 43cf38eb5cea91245502df3fcee4dbfc1c74dd1c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Feb 2010 14:38:57 +0900 Subject: percpu: add __percpu sparse annotations to core kernel subsystems Add __percpu sparse annotations to core subsystems. These annotations are to make sparse consider percpu variables to be in a different address space and warn if accessed without going through percpu accessors. This patch doesn't affect normal builds. Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter Acked-by: Paul E. McKenney Cc: Jens Axboe Cc: linux-mm@kvack.org Cc: Rusty Russell Cc: Dipankar Sarma Cc: Peter Zijlstra Cc: Andrew Morton Cc: Eric Biederman --- include/linux/blktrace_api.h | 4 ++-- include/linux/genhd.h | 2 +- include/linux/kexec.h | 2 +- include/linux/mmzone.h | 2 +- include/linux/module.h | 2 +- include/linux/percpu_counter.h | 2 +- include/linux/srcu.h | 2 +- kernel/kexec.c | 2 +- kernel/sched.c | 4 ++-- kernel/stop_machine.c | 2 +- mm/percpu.c | 18 ++++++++++-------- 11 files changed, 22 insertions(+), 20 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 3b73b9992b26..416bf62d6d46 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -150,8 +150,8 @@ struct blk_user_trace_setup { struct blk_trace { int trace_state; struct rchan *rchan; - unsigned long *sequence; - unsigned char *msg_data; + unsigned long __percpu *sequence; + unsigned char __percpu *msg_data; u16 act_mask; u64 start_lba; u64 end_lba; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9717081c75ad..56b50514ab25 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -101,7 +101,7 @@ struct hd_struct { unsigned long stamp; int in_flight[2]; #ifdef CONFIG_SMP - struct disk_stats *dkstats; + struct disk_stats __percpu *dkstats; #else struct disk_stats dkstats; #endif diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c356b6914ffd..03e8e8dbc577 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -199,7 +199,7 @@ extern struct kimage *kexec_crash_image; */ extern struct resource crashk_res; typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4]; -extern note_buf_t *crash_notes; +extern note_buf_t __percpu *crash_notes; extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; extern size_t vmcoreinfo_size; extern size_t vmcoreinfo_max_size; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7874201a3556..41acd4bf7664 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -301,7 +301,7 @@ struct zone { unsigned long min_unmapped_pages; unsigned long min_slab_pages; #endif - struct per_cpu_pageset *pageset; + struct per_cpu_pageset __percpu *pageset; /* * free areas of different sizes */ diff --git a/include/linux/module.h b/include/linux/module.h index 7e74ae0051cc..dd618eb026aa 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -365,7 +365,7 @@ struct module struct module_ref { int count; - } *refptr; + } __percpu *refptr; #endif #ifdef CONFIG_CONSTRUCTORS diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index a7684a513994..9bd103c844ee 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -21,7 +21,7 @@ struct percpu_counter { #ifdef CONFIG_HOTPLUG_CPU struct list_head list; /* All percpu_counters are on a list */ #endif - s32 *counters; + s32 __percpu *counters; }; extern int percpu_counter_batch; diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4765d97dcafb..41eedccc962c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -33,7 +33,7 @@ struct srcu_struct_array { struct srcu_struct { int completed; - struct srcu_struct_array *per_cpu_ref; + struct srcu_struct_array __percpu *per_cpu_ref; struct mutex mutex; }; diff --git a/kernel/kexec.c b/kernel/kexec.c index ef077fb73155..87ebe8adc474 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -41,7 +41,7 @@ #include /* Per cpu memory for storing cpu states in case of system crash. */ -note_buf_t* crash_notes; +note_buf_t __percpu *crash_notes; /* vmcoreinfo stuff */ static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; diff --git a/kernel/sched.c b/kernel/sched.c index 3a8fb30a91b1..978edfd35a96 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1566,7 +1566,7 @@ static unsigned long cpu_avg_load_per_task(int cpu) #ifdef CONFIG_FAIR_GROUP_SCHED -static __read_mostly unsigned long *update_shares_data; +static __read_mostly unsigned long __percpu *update_shares_data; static void __set_se_shares(struct sched_entity *se, unsigned long shares); @@ -10683,7 +10683,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { struct cpuacct { struct cgroup_subsys_state css; /* cpuusage holds pointer to a u64-type object on every cpu */ - u64 *cpuusage; + u64 __percpu *cpuusage; struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; struct cpuacct *parent; }; diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 912823e2a11b..9bb9fb1bd79c 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -45,7 +45,7 @@ static int refcount; static struct workqueue_struct *stop_machine_wq; static struct stop_machine_data active, idle; static const struct cpumask *active_cpus; -static void *stop_machine_work; +static void __percpu *stop_machine_work; static void set_state(enum stopmachine_state newstate) { diff --git a/mm/percpu.c b/mm/percpu.c index b336638d20e7..768419d44ad7 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -80,13 +80,15 @@ /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ #ifndef __addr_to_pcpu_ptr #define __addr_to_pcpu_ptr(addr) \ - (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \ - + (unsigned long)__per_cpu_start) + (void __percpu *)((unsigned long)(addr) - \ + (unsigned long)pcpu_base_addr + \ + (unsigned long)__per_cpu_start) #endif #ifndef __pcpu_ptr_to_addr #define __pcpu_ptr_to_addr(ptr) \ - (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \ - - (unsigned long)__per_cpu_start) + (void __force *)((unsigned long)(ptr) + \ + (unsigned long)pcpu_base_addr - \ + (unsigned long)__per_cpu_start) #endif struct pcpu_chunk { @@ -1065,7 +1067,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -static void *pcpu_alloc(size_t size, size_t align, bool reserved) +static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) { static int warn_limit = 10; struct pcpu_chunk *chunk; @@ -1194,7 +1196,7 @@ fail_unlock_mutex: * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -void *__alloc_percpu(size_t size, size_t align) +void __percpu *__alloc_percpu(size_t size, size_t align) { return pcpu_alloc(size, align, false); } @@ -1215,7 +1217,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu); * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -void *__alloc_reserved_percpu(size_t size, size_t align) +void __percpu *__alloc_reserved_percpu(size_t size, size_t align) { return pcpu_alloc(size, align, true); } @@ -1267,7 +1269,7 @@ static void pcpu_reclaim(struct work_struct *work) * CONTEXT: * Can be called from atomic context. */ -void free_percpu(void *ptr) +void free_percpu(void __percpu *ptr) { void *addr; struct pcpu_chunk *chunk; -- cgit v1.2.3-71-gd317 From 5ed109103d73b0bafc92e860cead56725231384d Mon Sep 17 00:00:00 2001 From: Dave Young Date: Wed, 10 Mar 2010 15:24:06 -0800 Subject: sysctl extern cleanup: module Extern declarations in sysctl.c should be moved to their own header file, and then include them in relavant .c files. Move modprobe_path extern declaration to linux/kmod.h Move modules_disabled extern declaration to linux/module.h Signed-off-by: Dave Young Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmod.h | 1 + include/linux/module.h | 1 + kernel/sysctl.c | 4 ---- 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 384ca8bbf1ac..facb27fe7de0 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -27,6 +27,7 @@ #define KMOD_PATH_LEN 256 #ifdef CONFIG_MODULES +extern char modprobe_path[]; /* for sysctl */ /* modprobe exit status on success, -ve on error. Return value * usually useless though. */ extern int __request_module(bool wait, const char *name, ...) \ diff --git a/include/linux/module.h b/include/linux/module.h index dd618eb026aa..5e869ffd34aa 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -175,6 +175,7 @@ struct notifier_block; #ifdef CONFIG_MODULES +extern int modules_disabled; /* for sysctl */ /* Get/put a kernel symbol (calls must be symmetric) */ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f18aaa7b0d65..44e9492368fd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -116,10 +116,6 @@ static int min_percpu_pagelist_fract = 8; static int ngroups_max = NGROUPS_MAX; -#ifdef CONFIG_MODULES -extern char modprobe_path[]; -extern int modules_disabled; -#endif #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; #endif -- cgit v1.2.3-71-gd317 From 259354deaaf03d49a02dbb9975d6ec2a54675672 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Mar 2010 18:56:10 +0900 Subject: module: encapsulate percpu handling better and record percpu_size Better encapsulate module static percpu area handling so that code outsidef of CONFIG_SMP ifdef doesn't deal with mod->percpu directly and add mod->percpu_size and record percpu_size in it. Both percpu fields are compiled out on UP. While at it, mark mod->percpu w/ __percpu. This is to prepare for is_module_percpu_address(). Signed-off-by: Tejun Heo Acked-by: Rusty Russell --- include/linux/module.h | 5 +++- kernel/module.c | 66 ++++++++++++++++++++++++++------------------------ 2 files changed, 39 insertions(+), 32 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 5e869ffd34aa..87d247ac6761 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -330,8 +330,11 @@ struct module struct module_notes_attrs *notes_attrs; #endif +#ifdef CONFIG_SMP /* Per-cpu data. */ - void *percpu; + void __percpu *percpu; + unsigned int percpu_size; +#endif /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ diff --git a/kernel/module.c b/kernel/module.c index c968d3606dca..e7a6e53fc73e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -370,27 +370,33 @@ EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP -static void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { - void *ptr; + return mod->percpu; +} +static int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ if (align > PAGE_SIZE) { printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", - name, align, PAGE_SIZE); + mod->name, align, PAGE_SIZE); align = PAGE_SIZE; } - ptr = __alloc_reserved_percpu(size, align); - if (!ptr) + mod->percpu = __alloc_reserved_percpu(size, align); + if (!mod->percpu) { printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", size); - return ptr; + return -ENOMEM; + } + mod->percpu_size = size; + return 0; } -static void percpu_modfree(void *freeme) +static void percpu_modfree(struct module *mod) { - free_percpu(freeme); + free_percpu(mod->percpu); } static unsigned int find_pcpusec(Elf_Ehdr *hdr, @@ -400,24 +406,28 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr, return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); } -static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) +static void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { int cpu; for_each_possible_cpu(cpu) - memcpy(pcpudest + per_cpu_offset(cpu), from, size); + memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); } #else /* ... !CONFIG_SMP */ -static inline void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { return NULL; } -static inline void percpu_modfree(void *pcpuptr) +static inline int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ + return -ENOMEM; +} +static inline void percpu_modfree(struct module *mod) { - BUG(); } static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -425,8 +435,8 @@ static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, { return 0; } -static inline void percpu_modcopy(void *pcpudst, const void *src, - unsigned long size) +static inline void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); @@ -1400,8 +1410,7 @@ static void free_module(struct module *mod) /* This may be NULL, but that's OK */ module_free(mod, mod->module_init); kfree(mod->args); - if (mod->percpu) - percpu_modfree(mod->percpu); + percpu_modfree(mod); #if defined(CONFIG_MODULE_UNLOAD) if (mod->refptr) free_percpu(mod->refptr); @@ -1520,7 +1529,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, default: /* Divert to percpu allocation if a percpu var. */ if (sym[i].st_shndx == pcpuindex) - secbase = (unsigned long)mod->percpu; + secbase = (unsigned long)mod_percpu(mod); else secbase = sechdrs[sym[i].st_shndx].sh_addr; sym[i].st_value += secbase; @@ -1954,7 +1963,7 @@ static noinline struct module *load_module(void __user *umod, unsigned int modindex, versindex, infoindex, pcpuindex; struct module *mod; long err = 0; - void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ + void *ptr = NULL; /* Stops spurious gcc warning */ unsigned long symoffs, stroffs, *strmap; mm_segment_t old_fs; @@ -2094,15 +2103,11 @@ static noinline struct module *load_module(void __user *umod, if (pcpuindex) { /* We have a special allocation for this section. */ - percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, - sechdrs[pcpuindex].sh_addralign, - mod->name); - if (!percpu) { - err = -ENOMEM; + err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size, + sechdrs[pcpuindex].sh_addralign); + if (err) goto free_mod; - } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; - mod->percpu = percpu; } /* Determine total sizes, and put offsets in sh_entsize. For now @@ -2317,7 +2322,7 @@ static noinline struct module *load_module(void __user *umod, sort_extable(mod->extable, mod->extable + mod->num_exentries); /* Finally, copy percpu area over. */ - percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, + percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, @@ -2409,8 +2414,7 @@ static noinline struct module *load_module(void __user *umod, module_free(mod, mod->module_core); /* mod will be freed with core. Don't access it beyond this line! */ free_percpu: - if (percpu) - percpu_modfree(percpu); + percpu_modfree(mod); free_mod: kfree(args); kfree(strmap); -- cgit v1.2.3-71-gd317 From 10fad5e46f6c7bdfb01b1a012380a38e3c6ab346 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 10 Mar 2010 18:57:54 +0900 Subject: percpu, module: implement and use is_kernel/module_percpu_address() lockdep has custom code to check whether a pointer belongs to static percpu area which is somewhat broken. Implement proper is_kernel/module_percpu_address() and replace the custom code. On UP, percpu variables are regular static variables and can't be distinguished from them. Always return %false on UP. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Cc: Rusty Russell Cc: Ingo Molnar --- include/linux/module.h | 1 + include/linux/percpu.h | 7 +++++++ kernel/lockdep.c | 21 +++++---------------- kernel/module.c | 38 ++++++++++++++++++++++++++++++++++++++ mm/percpu.c | 26 ++++++++++++++++++++++++++ 5 files changed, 77 insertions(+), 16 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 87d247ac6761..f0e2659f4e3e 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -395,6 +395,7 @@ static inline int module_is_live(struct module *mod) struct module *__module_text_address(unsigned long addr); struct module *__module_address(unsigned long addr); bool is_module_address(unsigned long addr); +bool is_module_percpu_address(unsigned long addr); bool is_module_text_address(unsigned long addr); static inline int within_module_core(unsigned long addr, struct module *mod) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a93e5bfdccb8..11d5f834b54a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -137,6 +137,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); extern void __percpu *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void __percpu *__pdata); +extern bool is_kernel_percpu_address(unsigned long addr); extern phys_addr_t per_cpu_ptr_to_phys(void *addr); #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA @@ -163,6 +164,12 @@ static inline void free_percpu(void __percpu *p) kfree(p); } +/* can't distinguish from other static vars, always false */ +static inline bool is_kernel_percpu_address(unsigned long addr) +{ + return false; +} + static inline phys_addr_t per_cpu_ptr_to_phys(void *addr) { return __pa(addr); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c927a549db2c..9bbb9c841e48 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -582,9 +582,6 @@ static int static_obj(void *obj) unsigned long start = (unsigned long) &_stext, end = (unsigned long) &_end, addr = (unsigned long) obj; -#ifdef CONFIG_SMP - int i; -#endif /* * static variable? @@ -595,24 +592,16 @@ static int static_obj(void *obj) if (arch_is_kernel_data(addr)) return 1; -#ifdef CONFIG_SMP /* - * percpu var? + * in-kernel percpu var? */ - for_each_possible_cpu(i) { - start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); - end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM - + per_cpu_offset(i); - - if ((addr >= start) && (addr < end)) - return 1; - } -#endif + if (is_kernel_percpu_address(addr)) + return 1; /* - * module var? + * module static or percpu var? */ - return is_module_address(addr); + return is_module_address(addr) || is_module_percpu_address(addr); } /* diff --git a/kernel/module.c b/kernel/module.c index e7a6e53fc73e..9f8d23d8b3a8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -415,6 +415,40 @@ static void percpu_modcopy(struct module *mod, memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); } +/** + * is_module_percpu_address - test whether address is from module static percpu + * @addr: address to test + * + * Test whether @addr belongs to module static percpu area. + * + * RETURNS: + * %true if @addr is from module static percpu area + */ +bool is_module_percpu_address(unsigned long addr) +{ + struct module *mod; + unsigned int cpu; + + preempt_disable(); + + list_for_each_entry_rcu(mod, &modules, list) { + if (!mod->percpu_size) + continue; + for_each_possible_cpu(cpu) { + void *start = per_cpu_ptr(mod->percpu, cpu); + + if ((void *)addr >= start && + (void *)addr < start + mod->percpu_size) { + preempt_enable(); + return true; + } + } + } + + preempt_enable(); + return false; +} + #else /* ... !CONFIG_SMP */ static inline void __percpu *mod_percpu(struct module *mod) @@ -441,6 +475,10 @@ static inline void percpu_modcopy(struct module *mod, /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); } +bool is_module_percpu_address(unsigned long addr) +{ + return false; +} #endif /* CONFIG_SMP */ diff --git a/mm/percpu.c b/mm/percpu.c index 768419d44ad7..6e09741ddc62 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1303,6 +1303,32 @@ void free_percpu(void __percpu *ptr) } EXPORT_SYMBOL_GPL(free_percpu); +/** + * is_kernel_percpu_address - test whether address is from static percpu area + * @addr: address to test + * + * Test whether @addr belongs to in-kernel static percpu area. Module + * static percpu areas are not considered. For those, use + * is_module_percpu_address(). + * + * RETURNS: + * %true if @addr is from in-kernel static percpu area, %false otherwise. + */ +bool is_kernel_percpu_address(unsigned long addr) +{ + const size_t static_size = __per_cpu_end - __per_cpu_start; + void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); + unsigned int cpu; + + for_each_possible_cpu(cpu) { + void *start = per_cpu_ptr(base, cpu); + + if ((void *)addr >= start && (void *)addr < start + static_size) + return true; + } + return false; +} + /** * per_cpu_ptr_to_phys - convert translated percpu address to physical address * @addr: the address to be converted to physical address -- cgit v1.2.3-71-gd317 From d5e50daf92df8afcb701fd717b301985691e802f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 31 Mar 2010 11:33:42 +0900 Subject: module: add stub for is_module_percpu_address Fix build for CONFIG_MODULES not enabled by providing a stub for is_module_percpu_address(). kernel/lockdep.c:605: error: implicit declaration of function 'is_module_percpu_address' Signed-off-by: Randy Dunlap Signed-off-by: Tejun Heo --- include/linux/module.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index f0e2659f4e3e..8bd399a00343 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -567,6 +567,11 @@ static inline bool is_module_address(unsigned long addr) return false; } +static inline bool is_module_percpu_address(unsigned long addr) +{ + return false; +} + static inline bool is_module_text_address(unsigned long addr) { return false; -- cgit v1.2.3-71-gd317 From ae832d1e03ac9bf09fb8a07fb37908ab40c7cd0e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 24 Mar 2010 10:57:43 +0800 Subject: tracing: Remove side effect from module tracepoints that caused a GPF Remove the @refcnt argument, because it has side-effects, and arguments with side-effects are not skipped by the jump over disabled instrumentation and are executed even when the tracepoint is disabled. This was also causing a GPF as found by Randy Dunlap: Subject: 2.6.33 GP fault only when built with tracing LKML-Reference: <4BA2B69D.3000309@oracle.com> Note, the current 2.6.34-rc has a fix for the actual cause of the GPF, but this fixes one of its triggers. Tested-by: Randy Dunlap Acked-by: Mathieu Desnoyers Signed-off-by: Li Zefan LKML-Reference: <4BA97FA7.6040406@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/module.h | 6 ++---- include/trace/events/module.h | 14 +++++++------- kernel/module.c | 3 +-- 3 files changed, 10 insertions(+), 13 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 5e869ffd34aa..393ec39b580a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -460,8 +460,7 @@ static inline void __module_get(struct module *module) if (module) { preempt_disable(); __this_cpu_inc(module->refptr->count); - trace_module_get(module, _THIS_IP_, - __this_cpu_read(module->refptr->count)); + trace_module_get(module, _THIS_IP_); preempt_enable(); } } @@ -475,8 +474,7 @@ static inline int try_module_get(struct module *module) if (likely(module_is_live(module))) { __this_cpu_inc(module->refptr->count); - trace_module_get(module, _THIS_IP_, - __this_cpu_read(module->refptr->count)); + trace_module_get(module, _THIS_IP_); } else ret = 0; diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 4b0f48ba16a6..a585f8135bd9 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -53,9 +53,9 @@ TRACE_EVENT(module_free, DECLARE_EVENT_CLASS(module_refcnt, - TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + TP_PROTO(struct module *mod, unsigned long ip), - TP_ARGS(mod, ip, refcnt), + TP_ARGS(mod, ip), TP_STRUCT__entry( __field( unsigned long, ip ) @@ -65,7 +65,7 @@ DECLARE_EVENT_CLASS(module_refcnt, TP_fast_assign( __entry->ip = ip; - __entry->refcnt = refcnt; + __entry->refcnt = __this_cpu_read(mod->refptr->count); __assign_str(name, mod->name); ), @@ -75,16 +75,16 @@ DECLARE_EVENT_CLASS(module_refcnt, DEFINE_EVENT(module_refcnt, module_get, - TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + TP_PROTO(struct module *mod, unsigned long ip), - TP_ARGS(mod, ip, refcnt) + TP_ARGS(mod, ip) ); DEFINE_EVENT(module_refcnt, module_put, - TP_PROTO(struct module *mod, unsigned long ip, int refcnt), + TP_PROTO(struct module *mod, unsigned long ip), - TP_ARGS(mod, ip, refcnt) + TP_ARGS(mod, ip) ); TRACE_EVENT(module_request, diff --git a/kernel/module.c b/kernel/module.c index c968d3606dca..21591ad921f3 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -800,8 +800,7 @@ void module_put(struct module *module) preempt_disable(); __this_cpu_dec(module->refptr->count); - trace_module_put(module, _RET_IP_, - __this_cpu_read(module->refptr->count)); + trace_module_put(module, _RET_IP_); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); -- cgit v1.2.3-71-gd317 From 5fbfb18d7a5b846946d52c4a10e3aaa213ec31b6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 1 Apr 2010 19:09:40 +1100 Subject: Fix up possibly racy module refcounting Module refcounting is implemented with a per-cpu counter for speed. However there is a race when tallying the counter where a reference may be taken by one CPU and released by another. Reference count summation may then see the decrement without having seen the previous increment, leading to lower than expected count. A module which never has its actual reference drop below 1 may return a reference count of 0 due to this race. Module removal generally runs under stop_machine, which prevents this race causing bugs due to removal of in-use modules. However there are other real bugs in module.c code and driver code (module_refcount is exported) where the callers do not run under stop_machine. Fix this by maintaining running per-cpu counters for the number of module refcount increments and the number of refcount decrements. The increments are tallied after the decrements, so any decrement seen will always have its corresponding increment counted. The final refcount is the difference of the total increments and decrements, preventing a low-refcount from being returned. Signed-off-by: Nick Piggin Acked-by: Rusty Russell Signed-off-by: Linus Torvalds --- include/linux/module.h | 14 +++++++------- kernel/module.c | 35 +++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 15 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 8bd399a00343..515d53ae6a79 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -368,7 +368,8 @@ struct module void (*exit)(void); struct module_ref { - int count; + unsigned int incs; + unsigned int decs; } __percpu *refptr; #endif @@ -463,9 +464,9 @@ static inline void __module_get(struct module *module) { if (module) { preempt_disable(); - __this_cpu_inc(module->refptr->count); + __this_cpu_inc(module->refptr->incs); trace_module_get(module, _THIS_IP_, - __this_cpu_read(module->refptr->count)); + __this_cpu_read(module->refptr->incs)); preempt_enable(); } } @@ -478,11 +479,10 @@ static inline int try_module_get(struct module *module) preempt_disable(); if (likely(module_is_live(module))) { - __this_cpu_inc(module->refptr->count); + __this_cpu_inc(module->refptr->incs); trace_module_get(module, _THIS_IP_, - __this_cpu_read(module->refptr->count)); - } - else + __this_cpu_read(module->refptr->incs)); + } else ret = 0; preempt_enable(); diff --git a/kernel/module.c b/kernel/module.c index 9f8d23d8b3a8..1016b75b026a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -521,11 +521,13 @@ static void module_unload_init(struct module *mod) int cpu; INIT_LIST_HEAD(&mod->modules_which_use_me); - for_each_possible_cpu(cpu) - per_cpu_ptr(mod->refptr, cpu)->count = 0; + for_each_possible_cpu(cpu) { + per_cpu_ptr(mod->refptr, cpu)->incs = 0; + per_cpu_ptr(mod->refptr, cpu)->decs = 0; + } /* Hold reference count during initialization. */ - __this_cpu_write(mod->refptr->count, 1); + __this_cpu_write(mod->refptr->incs, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -664,12 +666,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced) unsigned int module_refcount(struct module *mod) { - unsigned int total = 0; + unsigned int incs = 0, decs = 0; int cpu; for_each_possible_cpu(cpu) - total += per_cpu_ptr(mod->refptr, cpu)->count; - return total; + decs += per_cpu_ptr(mod->refptr, cpu)->decs; + /* + * ensure the incs are added up after the decs. + * module_put ensures incs are visible before decs with smp_wmb. + * + * This 2-count scheme avoids the situation where the refcount + * for CPU0 is read, then CPU0 increments the module refcount, + * then CPU1 drops that refcount, then the refcount for CPU1 is + * read. We would record a decrement but not its corresponding + * increment so we would see a low count (disaster). + * + * Rare situation? But module_refcount can be preempted, and we + * might be tallying up 4096+ CPUs. So it is not impossible. + */ + smp_rmb(); + for_each_possible_cpu(cpu) + incs += per_cpu_ptr(mod->refptr, cpu)->incs; + return incs - decs; } EXPORT_SYMBOL(module_refcount); @@ -846,10 +864,11 @@ void module_put(struct module *module) { if (module) { preempt_disable(); - __this_cpu_dec(module->refptr->count); + smp_wmb(); /* see comment in module_refcount */ + __this_cpu_inc(module->refptr->decs); trace_module_put(module, _RET_IP_, - __this_cpu_read(module->refptr->count)); + __this_cpu_read(module->refptr->decs)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); -- cgit v1.2.3-71-gd317 From 2c02dfe7fe3fba97a5665d329d039d2415ea5607 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 31 May 2010 12:19:37 -0700 Subject: module: Make the 'usage' lists be two-way When adding a module that depends on another one, we used to create a one-way list of "modules_which_use_me", so that module unloading could see who needs a module. It's actually quite simple to make that list go both ways: so that we not only can see "who uses me", but also see a list of modules that are "used by me". In fact, we always wanted that list in "module_unload_free()": when we unload a module, we want to also release all the other modules that are used by that module. But because we didn't have that list, we used to first iterate over all modules, and then iterate over each "used by me" list of that module. By making the list two-way, we simplify module_unload_free(), and it allows for some trivial fixes later too. Signed-off-by: Linus Torvalds Signed-off-by: Rusty Russell (cleaned & rebased) --- include/linux/module.h | 4 ++- kernel/module.c | 79 ++++++++++++++++++++++++++++++-------------------- 2 files changed, 51 insertions(+), 32 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 6914fcad4673..680db9e2ac36 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -359,7 +359,9 @@ struct module #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ - struct list_head modules_which_use_me; + struct list_head source_list; + /* What modules do I depend on? */ + struct list_head target_list; /* Who is waiting for us to be unloaded */ struct task_struct *waiter; diff --git a/kernel/module.c b/kernel/module.c index 0129769301e3..be18c3e34684 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -523,7 +523,8 @@ static void module_unload_init(struct module *mod) { int cpu; - INIT_LIST_HEAD(&mod->modules_which_use_me); + INIT_LIST_HEAD(&mod->source_list); + INIT_LIST_HEAD(&mod->target_list); for_each_possible_cpu(cpu) { per_cpu_ptr(mod->refptr, cpu)->incs = 0; per_cpu_ptr(mod->refptr, cpu)->decs = 0; @@ -538,8 +539,9 @@ static void module_unload_init(struct module *mod) /* modules using other modules */ struct module_use { - struct list_head list; - struct module *module_which_uses; + struct list_head source_list; + struct list_head target_list; + struct module *source, *target; }; /* Does a already use b? */ @@ -547,8 +549,8 @@ static int already_uses(struct module *a, struct module *b) { struct module_use *use; - list_for_each_entry(use, &b->modules_which_use_me, list) { - if (use->module_which_uses == a) { + list_for_each_entry(use, &b->source_list, source_list) { + if (use->source == a) { DEBUGP("%s uses %s!\n", a->name, b->name); return 1; } @@ -557,6 +559,33 @@ static int already_uses(struct module *a, struct module *b) return 0; } +/* + * Module a uses b + * - we add 'a' as a "source", 'b' as a "target" of module use + * - the module_use is added to the list of 'b' sources (so + * 'b' can walk the list to see who sourced them), and of 'a' + * targets (so 'a' can see what modules it targets). + */ +static int add_module_usage(struct module *a, struct module *b) +{ + int no_warn; + struct module_use *use; + + DEBUGP("Allocating new usage for %s.\n", a->name); + use = kmalloc(sizeof(*use), GFP_ATOMIC); + if (!use) { + printk(KERN_WARNING "%s: out of memory loading\n", a->name); + return -ENOMEM; + } + + use->source = a; + use->target = b; + list_add(&use->source_list, &b->source_list); + list_add(&use->target_list, &a->target_list); + no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); + return 0; +} + /* Module a uses b */ int use_module(struct module *a, struct module *b) { @@ -578,17 +607,11 @@ int use_module(struct module *a, struct module *b) if (err) return 0; - DEBUGP("Allocating new usage for %s.\n", a->name); - use = kmalloc(sizeof(*use), GFP_ATOMIC); - if (!use) { - printk("%s: out of memory loading\n", a->name); + err = add_module_usage(a, b); + if (err) { module_put(b); return 0; } - - use->module_which_uses = a; - list_add(&use->list, &b->modules_which_use_me); - no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name); return 1; } EXPORT_SYMBOL_GPL(use_module); @@ -596,22 +619,16 @@ EXPORT_SYMBOL_GPL(use_module); /* Clear the unload stuff of the module. */ static void module_unload_free(struct module *mod) { - struct module *i; - - list_for_each_entry(i, &modules, list) { - struct module_use *use; + struct module_use *use, *tmp; - list_for_each_entry(use, &i->modules_which_use_me, list) { - if (use->module_which_uses == mod) { - DEBUGP("%s unusing %s\n", mod->name, i->name); - module_put(i); - list_del(&use->list); - kfree(use); - sysfs_remove_link(i->holders_dir, mod->name); - /* There can be at most one match. */ - break; - } - } + list_for_each_entry_safe(use, tmp, &mod->target_list, target_list) { + struct module *i = use->target; + DEBUGP("%s unusing %s\n", mod->name, i->name); + module_put(i); + list_del(&use->source_list); + list_del(&use->target_list); + kfree(use); + sysfs_remove_link(i->holders_dir, mod->name); } } @@ -735,7 +752,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, goto out; } - if (!list_empty(&mod->modules_which_use_me)) { + if (!list_empty(&mod->source_list)) { /* Other modules depend on us: get rid of them first. */ ret = -EWOULDBLOCK; goto out; @@ -799,9 +816,9 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) /* Always include a trailing , so userspace can differentiate between this and the old multi-field proc format. */ - list_for_each_entry(use, &mod->modules_which_use_me, list) { + list_for_each_entry(use, &mod->source_list, source_list) { printed_something = 1; - seq_printf(m, "%s,", use->module_which_uses->name); + seq_printf(m, "%s,", use->source->name); } if (mod->init != NULL && mod->exit == NULL) { -- cgit v1.2.3-71-gd317 From c8e21ced08b39ef8dfe7236fb2a923a95f645262 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:35 -0600 Subject: module: fix kdb's illicit use of struct module_use. Linus changed the structure, and luckily this didn't compile any more. Reported-by: Stephen Rothwell Signed-off-by: Rusty Russell Cc: Jason Wessel Cc: Martin Hicks --- include/linux/module.h | 7 +++++++ kernel/debug/kdb/kdb_main.c | 12 +++--------- kernel/module.c | 11 +---------- 3 files changed, 11 insertions(+), 19 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 680db9e2ac36..5d8fca5dcff5 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -181,6 +181,13 @@ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); #define symbol_get(x) ((typeof(&x))(__symbol_get(MODULE_SYMBOL_PREFIX #x))) +/* modules using other modules: kdb wants to see this. */ +struct module_use { + struct list_head source_list; + struct list_head target_list; + struct module *source, *target; +}; + #ifndef __GENKSYMS__ #ifdef CONFIG_MODVERSIONS /* Mark the CRC weak since genksyms apparently decides not to diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index b724c791b6d4..184cd8209c36 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1857,12 +1857,6 @@ static int kdb_ef(int argc, const char **argv) } #if defined(CONFIG_MODULES) -/* modules using other modules */ -struct module_use { - struct list_head list; - struct module *module_which_uses; -}; - /* * kdb_lsmod - This function implements the 'lsmod' command. Lists * currently loaded kernel modules. @@ -1894,9 +1888,9 @@ static int kdb_lsmod(int argc, const char **argv) { struct module_use *use; kdb_printf(" [ "); - list_for_each_entry(use, &mod->modules_which_use_me, - list) - kdb_printf("%s ", use->module_which_uses->name); + list_for_each_entry(use, &mod->source_list, + source_list) + kdb_printf("%s ", use->target->name); kdb_printf("]\n"); } #endif diff --git a/kernel/module.c b/kernel/module.c index be18c3e34684..bbb1d812c79c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -536,14 +536,6 @@ static void module_unload_init(struct module *mod) mod->waiter = current; } -/* modules using other modules */ -struct module_use -{ - struct list_head source_list; - struct list_head target_list; - struct module *source, *target; -}; - /* Does a already use b? */ static int already_uses(struct module *a, struct module *b) { @@ -589,8 +581,7 @@ static int add_module_usage(struct module *a, struct module *b) /* Module a uses b */ int use_module(struct module *a, struct module *b) { - struct module_use *use; - int no_warn, err; + int err; if (b == NULL || already_uses(a, b)) return 1; -- cgit v1.2.3-71-gd317 From 6407ebb271fc34440b306f305e1efb7685eece26 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 5 Jun 2010 11:17:36 -0600 Subject: module: Make module sysfs functions private. These were placed in the header in ef665c1a06 to get the various SYSFS/MODULE config combintations to compile. That may have been necessary then, but it's not now. These functions are all local to module.c. Signed-off-by: Rusty Russell Cc: Randy Dunlap --- include/linux/module.h | 33 --------------------------------- kernel/module.c | 29 +++++++++++++++++++++++++---- 2 files changed, 25 insertions(+), 37 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 5d8fca5dcff5..8a6b9fdc7ffa 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -672,43 +672,10 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter) #endif /* CONFIG_MODULES */ -struct device_driver; #ifdef CONFIG_SYSFS -struct module; - extern struct kset *module_kset; extern struct kobj_type module_ktype; extern int module_sysfs_initialized; - -int mod_sysfs_init(struct module *mod); -int mod_sysfs_setup(struct module *mod, - struct kernel_param *kparam, - unsigned int num_params); -int module_add_modinfo_attrs(struct module *mod); -void module_remove_modinfo_attrs(struct module *mod); - -#else /* !CONFIG_SYSFS */ - -static inline int mod_sysfs_init(struct module *mod) -{ - return 0; -} - -static inline int mod_sysfs_setup(struct module *mod, - struct kernel_param *kparam, - unsigned int num_params) -{ - return 0; -} - -static inline int module_add_modinfo_attrs(struct module *mod) -{ - return 0; -} - -static inline void module_remove_modinfo_attrs(struct module *mod) -{ } - #endif /* CONFIG_SYSFS */ #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) diff --git a/kernel/module.c b/kernel/module.c index c690d9885797..808aa18dd661 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1323,7 +1323,7 @@ static void del_usage_links(struct module *mod) #endif } -int module_add_modinfo_attrs(struct module *mod) +static int module_add_modinfo_attrs(struct module *mod) { struct module_attribute *attr; struct module_attribute *temp_attr; @@ -1349,7 +1349,7 @@ int module_add_modinfo_attrs(struct module *mod) return error; } -void module_remove_modinfo_attrs(struct module *mod) +static void module_remove_modinfo_attrs(struct module *mod) { struct module_attribute *attr; int i; @@ -1365,7 +1365,7 @@ void module_remove_modinfo_attrs(struct module *mod) kfree(mod->modinfo_attrs); } -int mod_sysfs_init(struct module *mod) +static int mod_sysfs_init(struct module *mod) { int err; struct kobject *kobj; @@ -1399,7 +1399,7 @@ out: return err; } -int mod_sysfs_setup(struct module *mod, +static int mod_sysfs_setup(struct module *mod, struct kernel_param *kparam, unsigned int num_params) { @@ -1445,6 +1445,27 @@ static void mod_sysfs_fini(struct module *mod) #else /* CONFIG_SYSFS */ +static inline int mod_sysfs_init(struct module *mod) +{ + return 0; +} + +static inline int mod_sysfs_setup(struct module *mod, + struct kernel_param *kparam, + unsigned int num_params) +{ + return 0; +} + +static inline int module_add_modinfo_attrs(struct module *mod) +{ + return 0; +} + +static inline void module_remove_modinfo_attrs(struct module *mod) +{ +} + static void mod_sysfs_fini(struct module *mod) { } -- cgit v1.2.3-71-gd317 From bf5438fca2950b03c21ad868090cc1a8fcd49536 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 17 Sep 2010 11:09:00 -0400 Subject: jump label: Base patch for jump label base patch to implement 'jump labeling'. Based on a new 'asm goto' inline assembly gcc mechanism, we can now branch to labels from an 'asm goto' statment. This allows us to create a 'no-op' fastpath, which can subsequently be patched with a jump to the slowpath code. This is useful for code which might be rarely used, but which we'd like to be able to call, if needed. Tracepoints are the current usecase that these are being implemented for. Acked-by: David S. Miller Signed-off-by: Jason Baron LKML-Reference: [ cleaned up some formating ] Signed-off-by: Steven Rostedt --- Makefile | 5 + arch/Kconfig | 3 + arch/x86/include/asm/alternative.h | 3 +- arch/x86/kernel/alternative.c | 2 +- include/asm-generic/vmlinux.lds.h | 10 ++ include/linux/jump_label.h | 58 +++++++ include/linux/module.h | 5 +- kernel/Makefile | 2 +- kernel/jump_label.c | 346 +++++++++++++++++++++++++++++++++++++ kernel/kprobes.c | 1 + kernel/module.c | 6 + scripts/gcc-goto.sh | 5 + 12 files changed, 442 insertions(+), 4 deletions(-) create mode 100644 include/linux/jump_label.h create mode 100644 kernel/jump_label.c create mode 100644 scripts/gcc-goto.sh (limited to 'include/linux/module.h') diff --git a/Makefile b/Makefile index 92ab33f16cf0..a906378d505d 100644 --- a/Makefile +++ b/Makefile @@ -591,6 +591,11 @@ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) # conserve stack if available KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) +# check for 'asm goto' +ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) + KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO +endif + # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments # But warn user when we do so warn-assign = \ diff --git a/arch/Kconfig b/arch/Kconfig index 4877a8c8ee16..1462d8492d89 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI subsystem. Also has support for calculating CPU cycle events to determine how many clock cycles in a given period. +config HAVE_ARCH_JUMP_LABEL + bool + source "kernel/gcov/Kconfig" diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 634bf782dca5..76561d20ea2f 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -4,6 +4,7 @@ #include #include #include +#include #include /* @@ -182,7 +183,7 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len); extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_smp(void *addr, const void *opcode, size_t len); -#if defined(CONFIG_DYNAMIC_FTRACE) +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) #define IDEAL_NOP_SIZE_5 5 extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; extern void arch_init_ideal_nop5(void); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 083bd010d92c..cb0e6d385f6d 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -641,7 +641,7 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) return addr; } -#if defined(CONFIG_DYNAMIC_FTRACE) +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a92a170fb7d..ef2af9948eac 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -220,6 +220,8 @@ \ BUG_TABLE \ \ + JUMP_TABLE \ + \ /* PCI quirks */ \ .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ @@ -563,6 +565,14 @@ #define BUG_TABLE #endif +#define JUMP_TABLE \ + . = ALIGN(8); \ + __jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start___jump_table) = .; \ + *(__jump_table) \ + VMLINUX_SYMBOL(__stop___jump_table) = .; \ + } + #ifdef CONFIG_PM_TRACE #define TRACEDATA \ . = ALIGN(4); \ diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h new file mode 100644 index 000000000000..de58656d28e0 --- /dev/null +++ b/include/linux/jump_label.h @@ -0,0 +1,58 @@ +#ifndef _LINUX_JUMP_LABEL_H +#define _LINUX_JUMP_LABEL_H + +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL) +# include +# define HAVE_JUMP_LABEL +#endif + +enum jump_label_type { + JUMP_LABEL_ENABLE, + JUMP_LABEL_DISABLE +}; + +struct module; + +#ifdef HAVE_JUMP_LABEL + +extern struct jump_entry __start___jump_table[]; +extern struct jump_entry __stop___jump_table[]; + +extern void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type); +extern void jump_label_update(unsigned long key, enum jump_label_type type); +extern void jump_label_apply_nops(struct module *mod); +extern void arch_jump_label_text_poke_early(jump_label_t addr); + +#define enable_jump_label(key) \ + jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE); + +#define disable_jump_label(key) \ + jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE); + +#else + +#define JUMP_LABEL(key, label) \ +do { \ + if (unlikely(*key)) \ + goto label; \ +} while (0) + +#define enable_jump_label(cond_var) \ +do { \ + *(cond_var) = 1; \ +} while (0) + +#define disable_jump_label(cond_var) \ +do { \ + *(cond_var) = 0; \ +} while (0) + +static inline int jump_label_apply_nops(struct module *mod) +{ + return 0; +} + +#endif + +#endif diff --git a/include/linux/module.h b/include/linux/module.h index 8a6b9fdc7ffa..403ac26023ce 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -350,7 +350,10 @@ struct module struct tracepoint *tracepoints; unsigned int num_tracepoints; #endif - +#ifdef HAVE_JUMP_LABEL + struct jump_entry *jump_entries; + unsigned int num_jump_entries; +#endif #ifdef CONFIG_TRACING const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; diff --git a/kernel/Makefile b/kernel/Makefile index 0b72d1a74be0..d52b473c99a1 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ - async.o range.o + async.o range.o jump_label.o obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o obj-y += groups.o diff --git a/kernel/jump_label.c b/kernel/jump_label.c new file mode 100644 index 000000000000..460fd40112b3 --- /dev/null +++ b/kernel/jump_label.c @@ -0,0 +1,346 @@ +/* + * jump label support + * + * Copyright (C) 2009 Jason Baron + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_JUMP_LABEL + +#define JUMP_LABEL_HASH_BITS 6 +#define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS) +static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE]; + +/* mutex to protect coming/going of the the jump_label table */ +static DEFINE_MUTEX(jump_label_mutex); + +struct jump_label_entry { + struct hlist_node hlist; + struct jump_entry *table; + int nr_entries; + /* hang modules off here */ + struct hlist_head modules; + unsigned long key; +}; + +struct jump_label_module_entry { + struct hlist_node hlist; + struct jump_entry *table; + int nr_entries; + struct module *mod; +}; + +static int jump_label_cmp(const void *a, const void *b) +{ + const struct jump_entry *jea = a; + const struct jump_entry *jeb = b; + + if (jea->key < jeb->key) + return -1; + + if (jea->key > jeb->key) + return 1; + + return 0; +} + +static void +sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop) +{ + unsigned long size; + + size = (((unsigned long)stop - (unsigned long)start) + / sizeof(struct jump_entry)); + sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); +} + +static struct jump_label_entry *get_jump_label_entry(jump_label_t key) +{ + struct hlist_head *head; + struct hlist_node *node; + struct jump_label_entry *e; + u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0); + + head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (key == e->key) + return e; + } + return NULL; +} + +static struct jump_label_entry * +add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table) +{ + struct hlist_head *head; + struct jump_label_entry *e; + u32 hash; + + e = get_jump_label_entry(key); + if (e) + return ERR_PTR(-EEXIST); + + e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + hash = jhash((void *)&key, sizeof(jump_label_t), 0); + head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; + e->key = key; + e->table = table; + e->nr_entries = nr_entries; + INIT_HLIST_HEAD(&(e->modules)); + hlist_add_head(&e->hlist, head); + return e; +} + +static int +build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop) +{ + struct jump_entry *iter, *iter_begin; + struct jump_label_entry *entry; + int count; + + sort_jump_label_entries(start, stop); + iter = start; + while (iter < stop) { + entry = get_jump_label_entry(iter->key); + if (!entry) { + iter_begin = iter; + count = 0; + while ((iter < stop) && + (iter->key == iter_begin->key)) { + iter++; + count++; + } + entry = add_jump_label_entry(iter_begin->key, + count, iter_begin); + if (IS_ERR(entry)) + return PTR_ERR(entry); + } else { + WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n"); + return -1; + } + } + return 0; +} + +/*** + * jump_label_update - update jump label text + * @key - key value associated with a a jump label + * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE + * + * Will enable/disable the jump for jump label @key, depending on the + * value of @type. + * + */ + +void jump_label_update(unsigned long key, enum jump_label_type type) +{ + struct jump_entry *iter; + struct jump_label_entry *entry; + struct hlist_node *module_node; + struct jump_label_module_entry *e_module; + int count; + + mutex_lock(&jump_label_mutex); + entry = get_jump_label_entry((jump_label_t)key); + if (entry) { + count = entry->nr_entries; + iter = entry->table; + while (count--) { + if (kernel_text_address(iter->code)) + arch_jump_label_transform(iter, type); + iter++; + } + /* eanble/disable jump labels in modules */ + hlist_for_each_entry(e_module, module_node, &(entry->modules), + hlist) { + count = e_module->nr_entries; + iter = e_module->table; + while (count--) { + if (kernel_text_address(iter->code)) + arch_jump_label_transform(iter, type); + iter++; + } + } + } + mutex_unlock(&jump_label_mutex); +} + +static __init int init_jump_label(void) +{ + int ret; + struct jump_entry *iter_start = __start___jump_table; + struct jump_entry *iter_stop = __stop___jump_table; + struct jump_entry *iter; + + mutex_lock(&jump_label_mutex); + ret = build_jump_label_hashtable(__start___jump_table, + __stop___jump_table); + iter = iter_start; + while (iter < iter_stop) { + arch_jump_label_text_poke_early(iter->code); + iter++; + } + mutex_unlock(&jump_label_mutex); + return ret; +} +early_initcall(init_jump_label); + +#ifdef CONFIG_MODULES + +static struct jump_label_module_entry * +add_jump_label_module_entry(struct jump_label_entry *entry, + struct jump_entry *iter_begin, + int count, struct module *mod) +{ + struct jump_label_module_entry *e; + + e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + e->mod = mod; + e->nr_entries = count; + e->table = iter_begin; + hlist_add_head(&e->hlist, &entry->modules); + return e; +} + +static int add_jump_label_module(struct module *mod) +{ + struct jump_entry *iter, *iter_begin; + struct jump_label_entry *entry; + struct jump_label_module_entry *module_entry; + int count; + + /* if the module doesn't have jump label entries, just return */ + if (!mod->num_jump_entries) + return 0; + + sort_jump_label_entries(mod->jump_entries, + mod->jump_entries + mod->num_jump_entries); + iter = mod->jump_entries; + while (iter < mod->jump_entries + mod->num_jump_entries) { + entry = get_jump_label_entry(iter->key); + iter_begin = iter; + count = 0; + while ((iter < mod->jump_entries + mod->num_jump_entries) && + (iter->key == iter_begin->key)) { + iter++; + count++; + } + if (!entry) { + entry = add_jump_label_entry(iter_begin->key, 0, NULL); + if (IS_ERR(entry)) + return PTR_ERR(entry); + } + module_entry = add_jump_label_module_entry(entry, iter_begin, + count, mod); + if (IS_ERR(module_entry)) + return PTR_ERR(module_entry); + } + return 0; +} + +static void remove_jump_label_module(struct module *mod) +{ + struct hlist_head *head; + struct hlist_node *node, *node_next, *module_node, *module_node_next; + struct jump_label_entry *e; + struct jump_label_module_entry *e_module; + int i; + + /* if the module doesn't have jump label entries, just return */ + if (!mod->num_jump_entries) + return; + + for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { + head = &jump_label_table[i]; + hlist_for_each_entry_safe(e, node, node_next, head, hlist) { + hlist_for_each_entry_safe(e_module, module_node, + module_node_next, + &(e->modules), hlist) { + if (e_module->mod == mod) { + hlist_del(&e_module->hlist); + kfree(e_module); + } + } + if (hlist_empty(&e->modules) && (e->nr_entries == 0)) { + hlist_del(&e->hlist); + kfree(e); + } + } + } +} + +static int +jump_label_module_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct module *mod = data; + int ret = 0; + + switch (val) { + case MODULE_STATE_COMING: + mutex_lock(&jump_label_mutex); + ret = add_jump_label_module(mod); + if (ret) + remove_jump_label_module(mod); + mutex_unlock(&jump_label_mutex); + break; + case MODULE_STATE_GOING: + mutex_lock(&jump_label_mutex); + remove_jump_label_module(mod); + mutex_unlock(&jump_label_mutex); + break; + } + return ret; +} + +/*** + * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop() + * @mod: module to patch + * + * Allow for run-time selection of the optimal nops. Before the module + * loads patch these with arch_get_jump_label_nop(), which is specified by + * the arch specific jump label code. + */ +void jump_label_apply_nops(struct module *mod) +{ + struct jump_entry *iter; + + /* if the module doesn't have jump label entries, just return */ + if (!mod->num_jump_entries) + return; + + iter = mod->jump_entries; + while (iter < mod->jump_entries + mod->num_jump_entries) { + arch_jump_label_text_poke_early(iter->code); + iter++; + } +} + +struct notifier_block jump_label_module_nb = { + .notifier_call = jump_label_module_notify, + .priority = 0, +}; + +static __init int init_jump_label_module(void) +{ + return register_module_notifier(&jump_label_module_nb); +} +early_initcall(init_jump_label_module); + +#endif /* CONFIG_MODULES */ + +#endif diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 6dd5359e1f0e..18904e42a918 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include diff --git a/kernel/module.c b/kernel/module.c index d0b5f8db11b4..eba134157ef6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -55,6 +55,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -2308,6 +2309,11 @@ static void find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->tracepoints), &mod->num_tracepoints); #endif +#ifdef HAVE_JUMP_LABEL + mod->jump_entries = section_objs(info, "__jump_table", + sizeof(*mod->jump_entries), + &mod->num_jump_entries); +#endif #ifdef CONFIG_EVENT_TRACING mod->trace_events = section_objs(info, "_ftrace_events", sizeof(*mod->trace_events), diff --git a/scripts/gcc-goto.sh b/scripts/gcc-goto.sh new file mode 100644 index 000000000000..8e82424be7aa --- /dev/null +++ b/scripts/gcc-goto.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# Test for gcc 'asm goto' suport +# Copyright (C) 2010, Jason Baron + +echo "int main(void) { entry: asm goto (\"\"::::entry); return 0; }" | $1 -x c - -c -o /dev/null >/dev/null 2>&1 && echo "y" -- cgit v1.2.3-71-gd317 From 5336377d6225959624146629ce3fc88ee8ecda3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 5 Oct 2010 11:29:27 -0700 Subject: modules: Fix module_bug_list list corruption race With all the recent module loading cleanups, we've minimized the code that sits under module_mutex, fixing various deadlocks and making it possible to do most of the module loading in parallel. However, that whole conversion totally missed the rather obscure code that adds a new module to the list for BUG() handling. That code was doubly obscure because (a) the code itself lives in lib/bugs.c (for dubious reasons) and (b) it gets called from the architecture-specific "module_finalize()" rather than from generic code. Calling it from arch-specific code makes no sense what-so-ever to begin with, and is now actively wrong since that code isn't protected by the module loading lock any more. So this commit moves the "module_bug_{finalize,cleanup}()" calls away from the arch-specific code, and into the generic code - and in the process protects it with the module_mutex so that the list operations are now safe. Future fixups: - move the module list handling code into kernel/module.c where it belongs. - get rid of 'module_bug_list' and just use the regular list of modules (called 'modules' - imagine that) that we already create and maintain for other reasons. Reported-and-tested-by: Thomas Gleixner Cc: Rusty Russell Cc: Adrian Bunk Cc: Andrew Morton Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/avr32/kernel/module.c | 3 +-- arch/h8300/kernel/module.c | 3 +-- arch/mn10300/kernel/module.c | 3 +-- arch/parisc/kernel/module.c | 3 +-- arch/powerpc/kernel/module.c | 5 ----- arch/s390/kernel/module.c | 3 +-- arch/sh/kernel/module.c | 2 -- arch/x86/kernel/module.c | 3 +-- include/linux/module.h | 5 ++--- kernel/module.c | 4 ++++ lib/bug.c | 6 ++---- 11 files changed, 14 insertions(+), 26 deletions(-) (limited to 'include/linux/module.h') diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c index 98f94d041d9c..a727f54d64d6 100644 --- a/arch/avr32/kernel/module.c +++ b/arch/avr32/kernel/module.c @@ -314,10 +314,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, vfree(module->arch.syminfo); module->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, module); + return 0; } void module_arch_cleanup(struct module *module) { - module_bug_cleanup(module); } diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c index 0865e291c20d..db4953dc4e1b 100644 --- a/arch/h8300/kernel/module.c +++ b/arch/h8300/kernel/module.c @@ -112,10 +112,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c index 6aea7fd76993..196a111e2e29 100644 --- a/arch/mn10300/kernel/module.c +++ b/arch/mn10300/kernel/module.c @@ -206,7 +206,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } /* @@ -214,5 +214,4 @@ int module_finalize(const Elf_Ehdr *hdr, */ void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 159a2b81e90c..6e81bb596e5b 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -941,11 +941,10 @@ int module_finalize(const Elf_Ehdr *hdr, nsyms = newptr - (Elf_Sym *)symhdr->sh_addr; DEBUGP("NEW num_symtab %lu\n", nsyms); symhdr->sh_size = nsyms * sizeof(Elf_Sym); - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { deregister_unwind_table(mod); - module_bug_cleanup(mod); } diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 477c663e0140..4ef93ae2235f 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -65,10 +65,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sect; int err; - err = module_bug_finalize(hdr, sechdrs, me); - if (err) - return err; - /* Apply feature fixups */ sect = find_section(hdr, sechdrs, "__ftr_fixup"); if (sect != NULL) @@ -101,5 +97,4 @@ int module_finalize(const Elf_Ehdr *hdr, void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 22cfd634c355..f7167ee4604c 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -407,10 +407,9 @@ int module_finalize(const Elf_Ehdr *hdr, { vfree(me->arch.syminfo); me->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index 43adddfe4c04..ae0be697a89e 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c @@ -149,13 +149,11 @@ int module_finalize(const Elf_Ehdr *hdr, int ret = 0; ret |= module_dwarf_finalize(hdr, sechdrs, me); - ret |= module_bug_finalize(hdr, sechdrs, me); return ret; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); module_dwarf_cleanup(mod); } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e0bc186d7501..1c355c550960 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -239,11 +239,10 @@ int module_finalize(const Elf_Ehdr *hdr, apply_paravirt(pseg, pseg + para->sh_size); } - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { alternatives_smp_module_del(mod); - module_bug_cleanup(mod); } diff --git a/include/linux/module.h b/include/linux/module.h index 8a6b9fdc7ffa..aace066bad8f 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -686,17 +686,16 @@ extern int module_sysfs_initialized; #ifdef CONFIG_GENERIC_BUG -int module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, +void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, struct module *); void module_bug_cleanup(struct module *); #else /* !CONFIG_GENERIC_BUG */ -static inline int module_bug_finalize(const Elf_Ehdr *hdr, +static inline void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { - return 0; } static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ diff --git a/kernel/module.c b/kernel/module.c index d0b5f8db11b4..ccd641991842 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1537,6 +1537,7 @@ static int __unlink_module(void *_mod) { struct module *mod = _mod; list_del(&mod->list); + module_bug_cleanup(mod); return 0; } @@ -2625,6 +2626,7 @@ static struct module *load_module(void __user *umod, if (err < 0) goto ddebug; + module_bug_finalize(info.hdr, info.sechdrs, mod); list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2650,6 +2652,8 @@ static struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + module_bug_cleanup(mod); + ddebug: if (!mod->taints) dynamic_debug_remove(info.debug); diff --git a/lib/bug.c b/lib/bug.c index 7cdfad88128f..19552096d16b 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr) return NULL; } -int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, - struct module *mod) +void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) { char *secstrings; unsigned int i; @@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, * could potentially lead to deadlock and thus be counter-productive. */ list_add(&mod->bug_list, &module_bug_list); - - return 0; } void module_bug_cleanup(struct module *mod) -- cgit v1.2.3-71-gd317 From 84e1c6bb38eb318e456558b610396d9f1afaabf0 Mon Sep 17 00:00:00 2001 From: matthieu castet Date: Tue, 16 Nov 2010 22:35:16 +0100 Subject: x86: Add RO/NX protection for loadable kernel modules This patch is a logical extension of the protection provided by CONFIG_DEBUG_RODATA to LKMs. The protection is provided by splitting module_core and module_init into three logical parts each and setting appropriate page access permissions for each individual section: 1. Code: RO+X 2. RO data: RO+NX 3. RW data: RW+NX In order to achieve proper protection, layout_sections() have been modified to align each of the three parts mentioned above onto page boundary. Next, the corresponding page access permissions are set right before successful exit from load_module(). Further, free_module() and sys_init_module have been modified to set module_core and module_init as RW+NX right before calling module_free(). By default, the original section layout and access flags are preserved. When compiled with CONFIG_DEBUG_SET_MODULE_RONX=y, the patch will page-align each group of sections to ensure that each page contains only one type of content and will enforce RO/NX for each group of pages. -v1: Initial proof-of-concept patch. -v2: The patch have been re-written to reduce the number of #ifdefs and to make it architecture-agnostic. Code formatting has also been corrected. -v3: Opportunistic RO/NX protection is now unconditional. Section page-alignment is enabled when CONFIG_DEBUG_RODATA=y. -v4: Removed most macros and improved coding style. -v5: Changed page-alignment and RO/NX section size calculation -v6: Fixed comments. Restricted RO/NX enforcement to x86 only -v7: Introduced CONFIG_DEBUG_SET_MODULE_RONX, added calls to set_all_modules_text_rw() and set_all_modules_text_ro() in ftrace -v8: updated for compatibility with linux 2.6.33-rc5 -v9: coding style fixes -v10: more coding style fixes -v11: minor adjustments for -tip -v12: minor adjustments for v2.6.35-rc2-tip -v13: minor adjustments for v2.6.37-rc1-tip Signed-off-by: Siarhei Liakh Signed-off-by: Xuxian Jiang Acked-by: Arjan van de Ven Reviewed-by: James Morris Signed-off-by: H. Peter Anvin Cc: Andi Kleen Cc: Rusty Russell Cc: Stephen Rothwell Cc: Dave Jones Cc: Kees Cook Cc: Linus Torvalds LKML-Reference: <4CE2F914.9070106@free.fr> [ minor cleanliness edits, -v14: build failure fix ] Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 11 +++ arch/x86/kernel/ftrace.c | 3 + include/linux/module.h | 11 ++- kernel/module.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 193 insertions(+), 3 deletions(-) (limited to 'include/linux/module.h') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index b59ee765414e..45143bbcfe5e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST feature as well as for the change_page_attr() infrastructure. If in doubt, say "N" +config DEBUG_SET_MODULE_RONX + bool "Set loadable kernel module data as NX and text as RO" + depends on MODULES + ---help--- + This option helps catch unintended modifications to loadable + kernel module's text and read-only data. It also prevents execution + of module data. Such protection may interfere with run-time code + patching and dynamic kernel tracing - and they might also protect + against certain classes of kernel exploits. + If in doubt, say "N". + config DEBUG_NX_TEST tristate "Testcase for the NX non-executable stack feature" depends on DEBUG_KERNEL && m diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3afb33f14d2d..298448656b60 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code); int ftrace_arch_code_modify_prepare(void) { set_kernel_text_rw(); + set_all_modules_text_rw(); modifying_code = 1; return 0; } @@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void) int ftrace_arch_code_modify_post_process(void) { modifying_code = 0; + set_all_modules_text_ro(); set_kernel_text_ro(); return 0; } diff --git a/include/linux/module.h b/include/linux/module.h index b29e7458b966..ddaa689d71bd 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -308,6 +308,9 @@ struct module /* The size of the executable code in each section. */ unsigned int init_text_size, core_text_size; + /* Size of RO sections of the module (text+rodata) */ + unsigned int init_ro_size, core_ro_size; + /* Arch-specific module values */ struct mod_arch_specific arch; @@ -672,7 +675,6 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter) { return 0; } - #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS @@ -687,6 +689,13 @@ extern int module_sysfs_initialized; #define __MODULE_STRING(x) __stringify(x) +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +extern void set_all_modules_text_rw(void); +extern void set_all_modules_text_ro(void); +#else +static inline void set_all_modules_text_rw(void) { } +static inline void set_all_modules_text_ro(void) { } +#endif #ifdef CONFIG_GENERIC_BUG void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, diff --git a/kernel/module.c b/kernel/module.c index 437a74a7524a..ba421e6b4ada 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -56,6 +56,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -70,6 +71,26 @@ #define ARCH_SHF_SMALL 0 #endif +/* + * Modules' sections will be aligned on page boundaries + * to ensure complete separation of code and data, but + * only when CONFIG_DEBUG_SET_MODULE_RONX=y + */ +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +# define debug_align(X) ALIGN(X, PAGE_SIZE) +#else +# define debug_align(X) (X) +#endif + +/* + * Given BASE and SIZE this macro calculates the number of pages the + * memory regions occupies + */ +#define MOD_NUMBER_OF_PAGES(BASE, SIZE) (((SIZE) > 0) ? \ + (PFN_DOWN((unsigned long)(BASE) + (SIZE) - 1) - \ + PFN_DOWN((unsigned long)BASE) + 1) \ + : (0UL)) + /* If this is set, the section belongs in the init part of the module */ #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1)) @@ -1542,6 +1563,115 @@ static int __unlink_module(void *_mod) return 0; } +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +/* + * LKM RO/NX protection: protect module's text/ro-data + * from modification and any data from execution. + */ +void set_page_attributes(void *start, void *end, int (*set)(unsigned long start, int num_pages)) +{ + unsigned long begin_pfn = PFN_DOWN((unsigned long)start); + unsigned long end_pfn = PFN_DOWN((unsigned long)end); + + if (end_pfn > begin_pfn) + set(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); +} + +static void set_section_ro_nx(void *base, + unsigned long text_size, + unsigned long ro_size, + unsigned long total_size) +{ + /* begin and end PFNs of the current subsection */ + unsigned long begin_pfn; + unsigned long end_pfn; + + /* + * Set RO for module text and RO-data: + * - Always protect first page. + * - Do not protect last partial page. + */ + if (ro_size > 0) + set_page_attributes(base, base + ro_size, set_memory_ro); + + /* + * Set NX permissions for module data: + * - Do not protect first partial page. + * - Always protect last page. + */ + if (total_size > text_size) { + begin_pfn = PFN_UP((unsigned long)base + text_size); + end_pfn = PFN_UP((unsigned long)base + total_size); + if (end_pfn > begin_pfn) + set_memory_nx(begin_pfn << PAGE_SHIFT, end_pfn - begin_pfn); + } +} + +/* Setting memory back to RW+NX before releasing it */ +void unset_section_ro_nx(struct module *mod, void *module_region) +{ + unsigned long total_pages; + + if (mod->module_core == module_region) { + /* Set core as NX+RW */ + total_pages = MOD_NUMBER_OF_PAGES(mod->module_core, mod->core_size); + set_memory_nx((unsigned long)mod->module_core, total_pages); + set_memory_rw((unsigned long)mod->module_core, total_pages); + + } else if (mod->module_init == module_region) { + /* Set init as NX+RW */ + total_pages = MOD_NUMBER_OF_PAGES(mod->module_init, mod->init_size); + set_memory_nx((unsigned long)mod->module_init, total_pages); + set_memory_rw((unsigned long)mod->module_init, total_pages); + } +} + +/* Iterate through all modules and set each module's text as RW */ +void set_all_modules_text_rw() +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry_rcu(mod, &modules, list) { + if ((mod->module_core) && (mod->core_text_size)) { + set_page_attributes(mod->module_core, + mod->module_core + mod->core_text_size, + set_memory_rw); + } + if ((mod->module_init) && (mod->init_text_size)) { + set_page_attributes(mod->module_init, + mod->module_init + mod->init_text_size, + set_memory_rw); + } + } + mutex_unlock(&module_mutex); +} + +/* Iterate through all modules and set each module's text as RO */ +void set_all_modules_text_ro() +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry_rcu(mod, &modules, list) { + if ((mod->module_core) && (mod->core_text_size)) { + set_page_attributes(mod->module_core, + mod->module_core + mod->core_text_size, + set_memory_ro); + } + if ((mod->module_init) && (mod->init_text_size)) { + set_page_attributes(mod->module_init, + mod->module_init + mod->init_text_size, + set_memory_ro); + } + } + mutex_unlock(&module_mutex); +} +#else +static inline void set_section_ro_nx(void *base, unsigned long text_size, unsigned long ro_size, unsigned long total_size) { } +static inline void unset_section_ro_nx(struct module *mod, void *module_region) { } +#endif + /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { @@ -1566,6 +1696,7 @@ static void free_module(struct module *mod) destroy_params(mod->kp, mod->num_kp); /* This may be NULL, but that's OK */ + unset_section_ro_nx(mod, mod->module_init); module_free(mod, mod->module_init); kfree(mod->args); percpu_modfree(mod); @@ -1574,6 +1705,7 @@ static void free_module(struct module *mod) lockdep_free_key_range(mod->module_core, mod->core_size); /* Finally, free the core (containing the module structure) */ + unset_section_ro_nx(mod, mod->module_core); module_free(mod, mod->module_core); #ifdef CONFIG_MPU @@ -1777,8 +1909,19 @@ static void layout_sections(struct module *mod, struct load_info *info) s->sh_entsize = get_offset(mod, &mod->core_size, s, i); DEBUGP("\t%s\n", name); } - if (m == 0) + switch (m) { + case 0: /* executable */ + mod->core_size = debug_align(mod->core_size); mod->core_text_size = mod->core_size; + break; + case 1: /* RO: text and ro-data */ + mod->core_size = debug_align(mod->core_size); + mod->core_ro_size = mod->core_size; + break; + case 3: /* whole core */ + mod->core_size = debug_align(mod->core_size); + break; + } } DEBUGP("Init section allocation order:\n"); @@ -1796,8 +1939,19 @@ static void layout_sections(struct module *mod, struct load_info *info) | INIT_OFFSET_MASK); DEBUGP("\t%s\n", sname); } - if (m == 0) + switch (m) { + case 0: /* executable */ + mod->init_size = debug_align(mod->init_size); mod->init_text_size = mod->init_size; + break; + case 1: /* RO: text and ro-data */ + mod->init_size = debug_align(mod->init_size); + mod->init_ro_size = mod->init_size; + break; + case 3: /* whole init */ + mod->init_size = debug_align(mod->init_size); + break; + } } } @@ -2650,6 +2804,18 @@ static struct module *load_module(void __user *umod, kfree(info.strmap); free_copy(&info); + /* Set RO and NX regions for core */ + set_section_ro_nx(mod->module_core, + mod->core_text_size, + mod->core_ro_size, + mod->core_size); + + /* Set RO and NX regions for init */ + set_section_ro_nx(mod->module_init, + mod->init_text_size, + mod->init_ro_size, + mod->init_size); + /* Done! */ trace_module_load(mod); return mod; @@ -2753,6 +2919,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, mod->symtab = mod->core_symtab; mod->strtab = mod->core_strtab; #endif + unset_section_ro_nx(mod, mod->module_init); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; -- cgit v1.2.3-71-gd317 From dfd62d1d84d83f9421792c78bcf72de9bc2bb603 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Wed, 24 Nov 2010 15:21:10 -0600 Subject: module: Update prototype for ref_module (formerly use_module) Commit 9bea7f23952d5948f8e5dfdff4de09bb9981fb5f renamed use_module to ref_module (and changed its return value), but forgot to update this prototype in module.h. Signed-off-by: Anders Kaseorg Acked-by: WANG Cong Signed-off-by: Rusty Russell --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index b29e7458b966..7575bbbdf2a2 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -517,7 +517,7 @@ static inline void __module_get(struct module *module) #define symbol_put_addr(p) do { } while(0) #endif /* CONFIG_MODULE_UNLOAD */ -int use_module(struct module *a, struct module *b); +int ref_module(struct module *a, struct module *b); /* This is a #define so the string doesn't get put in every .o file */ #define module_name(mod) \ -- cgit v1.2.3-71-gd317