From 3859a271a003aba01e45b85c9d8b355eb7bf25f9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 28 Oct 2016 01:22:25 -0700 Subject: randstruct: Mark various structs for randomization This marks many critical kernel structures for randomization. These are structures that have been targeted in the past in security exploits, or contain functions pointers, pointers to function pointer tables, lists, workqueues, ref-counters, credentials, permissions, or are otherwise sensitive. This initial list was extracted from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. Left out of this list is task_struct, which requires special handling and will be covered in a subsequent patch. Signed-off-by: Kees Cook --- include/linux/binfmts.h | 4 ++-- include/linux/cdev.h | 2 +- include/linux/cred.h | 4 ++-- include/linux/dcache.h | 2 +- include/linux/fs.h | 17 +++++++++-------- include/linux/fs_struct.h | 2 +- include/linux/ipc.h | 2 +- include/linux/ipc_namespace.h | 2 +- include/linux/key-type.h | 4 ++-- include/linux/kmod.h | 2 +- include/linux/kobject.h | 2 +- include/linux/lsm_hooks.h | 4 ++-- include/linux/mm_types.h | 4 ++-- include/linux/module.h | 4 ++-- include/linux/mount.h | 2 +- include/linux/msg.h | 2 +- include/linux/path.h | 2 +- include/linux/pid_namespace.h | 2 +- include/linux/proc_ns.h | 2 +- include/linux/sched.h | 2 +- include/linux/sched/signal.h | 2 +- include/linux/sem.h | 2 +- include/linux/shm.h | 2 +- include/linux/sysctl.h | 2 +- include/linux/tty.h | 2 +- include/linux/tty_driver.h | 4 ++-- include/linux/user_namespace.h | 2 +- include/linux/utsname.h | 2 +- include/net/af_unix.h | 2 +- include/net/neighbour.h | 2 +- include/net/net_namespace.h | 2 +- include/net/sock.h | 2 +- 32 files changed, 47 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 05488da3aee9..3ae9013eeaaa 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -46,7 +46,7 @@ struct linux_binprm { unsigned interp_flags; unsigned interp_data; unsigned long loader, exec; -}; +} __randomize_layout; #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 #define BINPRM_FLAGS_ENFORCE_NONDUMP (1 << BINPRM_FLAGS_ENFORCE_NONDUMP_BIT) @@ -81,7 +81,7 @@ struct linux_binfmt { int (*load_shlib)(struct file *); int (*core_dump)(struct coredump_params *cprm); unsigned long min_coredump; /* minimal dump size */ -}; +} __randomize_layout; extern void __register_binfmt(struct linux_binfmt *fmt, int insert); diff --git a/include/linux/cdev.h b/include/linux/cdev.h index 408bc09ce497..cb28eb21e3ca 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -17,7 +17,7 @@ struct cdev { struct list_head list; dev_t dev; unsigned int count; -}; +} __randomize_layout; void cdev_init(struct cdev *, const struct file_operations *); diff --git a/include/linux/cred.h b/include/linux/cred.h index b03e7d049a64..82c8a9e1aabb 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -31,7 +31,7 @@ struct group_info { atomic_t usage; int ngroups; kgid_t gid[0]; -}; +} __randomize_layout; /** * get_group_info - Get a reference to a group info structure @@ -145,7 +145,7 @@ struct cred { struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ struct rcu_head rcu; /* RCU deletion hook */ -}; +} __randomize_layout; extern void __put_cred(struct cred *); extern void exit_creds(struct task_struct *); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d2e38dc6172c..7eb262e13d3c 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -113,7 +113,7 @@ struct dentry { struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */ struct rcu_head d_rcu; } d_u; -}; +} __randomize_layout; /* * dentry->d_lock spinlock nesting subclasses: diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..8f28143486c4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -275,7 +275,7 @@ struct kiocb { void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); void *private; int ki_flags; -}; +} __randomize_layout; static inline bool is_sync_kiocb(struct kiocb *kiocb) { @@ -392,7 +392,7 @@ struct address_space { gfp_t gfp_mask; /* implicit gfp mask for allocations */ struct list_head private_list; /* ditto */ void *private_data; /* ditto */ -} __attribute__((aligned(sizeof(long)))); +} __attribute__((aligned(sizeof(long)))) __randomize_layout; /* * On most architectures that alignment is already the case; but * must be enforced here for CRIS, to let the least significant bit @@ -435,7 +435,7 @@ struct block_device { int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; -}; +} __randomize_layout; /* * Radix-tree tags, for tagging dirty and writeback pages within the pagecache @@ -653,7 +653,7 @@ struct inode { #endif void *i_private; /* fs or device private pointer */ -}; +} __randomize_layout; static inline unsigned int i_blocksize(const struct inode *node) { @@ -868,7 +868,8 @@ struct file { struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; -} __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ +} __randomize_layout + __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { __u32 handle_bytes; @@ -1005,7 +1006,7 @@ struct file_lock { int state; /* state of grant or error if -ve */ } afs; } fl_u; -}; +} __randomize_layout; struct file_lock_context { spinlock_t flc_lock; @@ -1404,7 +1405,7 @@ struct super_block { spinlock_t s_inode_wblist_lock; struct list_head s_inodes_wb; /* writeback inodes */ -}; +} __randomize_layout; /* Helper functions so that in most cases filesystems will * not need to deal directly with kuid_t and kgid_t and can @@ -1690,7 +1691,7 @@ struct file_operations { u64); ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *, u64); -}; +} __randomize_layout; struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 0efc3e62843a..7a026240cbb1 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -12,7 +12,7 @@ struct fs_struct { int umask; int in_exec; struct path root, pwd; -}; +} __randomize_layout; extern struct kmem_cache *fs_cachep; diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 71fd92d81b26..ea0eb0b5f98c 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -20,6 +20,6 @@ struct kern_ipc_perm { umode_t mode; unsigned long seq; void *security; -} ____cacheline_aligned_in_smp; +} ____cacheline_aligned_in_smp __randomize_layout; #endif /* _LINUX_IPC_H */ diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 848e5796400e..65327ee0936b 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -61,7 +61,7 @@ struct ipc_namespace { struct ucounts *ucounts; struct ns_common ns; -}; +} __randomize_layout; extern struct ipc_namespace init_ipc_ns; extern spinlock_t mq_lock; diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 8496cf64575c..9520fc3c3b9a 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -45,7 +45,7 @@ struct key_preparsed_payload { size_t datalen; /* Raw datalen */ size_t quotalen; /* Quota length for proposed payload */ time_t expiry; /* Expiry time of key */ -}; +} __randomize_layout; typedef int (*request_key_actor_t)(struct key_construction *key, const char *op, void *aux); @@ -158,7 +158,7 @@ struct key_type { /* internal fields */ struct list_head link; /* link in types list */ struct lock_class_key lock_class; /* key->sem lock class */ -}; +} __randomize_layout; extern struct key_type key_type_keyring; diff --git a/include/linux/kmod.h b/include/linux/kmod.h index c4e441e00db5..655082c88fd9 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -64,7 +64,7 @@ struct subprocess_info { int (*init)(struct subprocess_info *info, struct cred *new); void (*cleanup)(struct subprocess_info *info); void *data; -}; +} __randomize_layout; extern int call_usermodehelper(const char *path, char **argv, char **envp, int wait); diff --git a/include/linux/kobject.h b/include/linux/kobject.h index ca85cb80e99a..084513350317 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -172,7 +172,7 @@ struct kset { spinlock_t list_lock; struct kobject kobj; const struct kset_uevent_ops *uevent_ops; -}; +} __randomize_layout; extern void kset_init(struct kset *kset); extern int __must_check kset_register(struct kset *kset); diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 080f34e66017..565163fc9ad4 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1876,7 +1876,7 @@ struct security_hook_heads { struct list_head audit_rule_match; struct list_head audit_rule_free; #endif /* CONFIG_AUDIT */ -}; +} __randomize_layout; /* * Security module hook list structure. @@ -1887,7 +1887,7 @@ struct security_hook_list { struct list_head *head; union security_list_options hook; char *lsm; -}; +} __randomize_layout; /* * Initializing a security_hook_list structure takes diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 45cdb27791a3..ff151814a02d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -342,7 +342,7 @@ struct vm_area_struct { struct mempolicy *vm_policy; /* NUMA policy for the VMA */ #endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; -}; +} __randomize_layout; struct core_thread { struct task_struct *task; @@ -500,7 +500,7 @@ struct mm_struct { atomic_long_t hugetlb_usage; #endif struct work_struct async_put_work; -}; +} __randomize_layout; extern struct mm_struct init_mm; diff --git a/include/linux/module.h b/include/linux/module.h index 21f56393602f..d93111d7def6 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -45,7 +45,7 @@ struct module_kobject { struct kobject *drivers_dir; struct module_param_attrs *mp; struct completion *kobj_completion; -}; +} __randomize_layout; struct module_attribute { struct attribute attr; @@ -475,7 +475,7 @@ struct module { ctor_fn_t *ctors; unsigned int num_ctors; #endif -} ____cacheline_aligned; +} ____cacheline_aligned __randomize_layout; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} #endif diff --git a/include/linux/mount.h b/include/linux/mount.h index 8e0352af06b7..1ce85e6fd95f 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -67,7 +67,7 @@ struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ int mnt_flags; -}; +} __randomize_layout; struct file; /* forward dec */ struct path; diff --git a/include/linux/msg.h b/include/linux/msg.h index f3f302f9c197..a001305f5a79 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -29,7 +29,7 @@ struct msg_queue { struct list_head q_messages; struct list_head q_receivers; struct list_head q_senders; -}; +} __randomize_layout; /* Helper routines for sys_msgsnd and sys_msgrcv */ extern long do_msgsnd(int msqid, long mtype, void __user *mtext, diff --git a/include/linux/path.h b/include/linux/path.h index d1372186f431..cde895cc4af4 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -7,7 +7,7 @@ struct vfsmount; struct path { struct vfsmount *mnt; struct dentry *dentry; -}; +} __randomize_layout; extern void path_get(const struct path *); extern void path_put(const struct path *); diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index c2a989dee876..b09136f88cf4 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -52,7 +52,7 @@ struct pid_namespace { int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ struct ns_common ns; -}; +} __randomize_layout; extern struct pid_namespace init_pid_ns; diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 58ab28d81fc2..06844b54dfc1 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -21,7 +21,7 @@ struct proc_ns_operations { int (*install)(struct nsproxy *nsproxy, struct ns_common *ns); struct user_namespace *(*owner)(struct ns_common *ns); struct ns_common *(*get_parent)(struct ns_common *ns); -}; +} __randomize_layout; extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; diff --git a/include/linux/sched.h b/include/linux/sched.h index 2b69fc650201..f833254fce00 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -408,7 +408,7 @@ struct sched_rt_entity { /* rq "owned" by this entity/group: */ struct rt_rq *my_q; #endif -}; +} __randomize_layout; struct sched_dl_entity { struct rb_node rb_node; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index c06d63b3a583..2a0dd40b15db 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -222,7 +222,7 @@ struct signal_struct { struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations * (notably. ptrace) */ -}; +} __randomize_layout; /* * Bits in flags field of signal_struct. diff --git a/include/linux/sem.h b/include/linux/sem.h index 9edec926e9d9..23bcbdfad4a6 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -21,7 +21,7 @@ struct sem_array { int sem_nsems; /* no. of semaphores in array */ int complex_count; /* pending complex operations */ unsigned int use_global_lock;/* >0: global lock required */ -}; +} __randomize_layout; #ifdef CONFIG_SYSVIPC diff --git a/include/linux/shm.h b/include/linux/shm.h index 04e881829625..0fb7061ec54c 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -22,7 +22,7 @@ struct shmid_kernel /* private to the kernel */ /* The task created the shm object. NULL if the task is dead. */ struct task_struct *shm_creator; struct list_head shm_clist; /* list by creator */ -}; +} __randomize_layout; /* shm_mode upper byte flags */ #define SHM_DEST 01000 /* segment will be destroyed on last detach */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 80d07816def0..9ddeef2c03e2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -117,7 +117,7 @@ struct ctl_table struct ctl_table_poll *poll; void *extra1; void *extra2; -}; +} __randomize_layout; struct ctl_node { struct rb_node node; diff --git a/include/linux/tty.h b/include/linux/tty.h index d07cd2105a6c..73f8d0977bb0 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -333,7 +333,7 @@ struct tty_struct { /* If the tty has a pending do_SAK, queue it here - akpm */ struct work_struct SAK_work; struct tty_port *port; -}; +} __randomize_layout; /* Each of a tty's open files has private_data pointing to tty_file_private */ struct tty_file_private { diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index b742b5e47cc2..00b2213f6a35 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -291,7 +291,7 @@ struct tty_operations { void (*poll_put_char)(struct tty_driver *driver, int line, char ch); #endif const struct file_operations *proc_fops; -}; +} __randomize_layout; struct tty_driver { int magic; /* magic number for this structure */ @@ -325,7 +325,7 @@ struct tty_driver { const struct tty_operations *ops; struct list_head tty_drivers; -}; +} __randomize_layout; extern struct list_head tty_drivers; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 32354b4b4b2b..b3575ce29148 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -66,7 +66,7 @@ struct user_namespace { #endif struct ucounts *ucounts; int ucount_max[UCOUNT_COUNTS]; -}; +} __randomize_layout; struct ucounts { struct hlist_node node; diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 60f0bb83b313..da826ed059cf 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -26,7 +26,7 @@ struct uts_namespace { struct user_namespace *user_ns; struct ucounts *ucounts; struct ns_common ns; -}; +} __randomize_layout; extern struct uts_namespace init_uts_ns; #ifdef CONFIG_UTS_NS diff --git a/include/net/af_unix.h b/include/net/af_unix.h index fd60eccb59a6..64e2a1e24a2c 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -36,7 +36,7 @@ struct unix_skb_parms { u32 secid; /* Security ID */ #endif u32 consumed; -}; +} __randomize_layout; #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index e4dd3a214034..a62959d2b3f7 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -155,7 +155,7 @@ struct neighbour { struct rcu_head rcu; struct net_device *dev; u8 primary_key[0]; -}; +} __randomize_layout; struct neigh_ops { int family; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fe80bb48ab1f..a224196d16ac 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -147,7 +147,7 @@ struct net { #endif struct sock *diag_nlsk; atomic_t fnhe_genid; -}; +} __randomize_layout; #include diff --git a/include/net/sock.h b/include/net/sock.h index f33e3d134e0b..d349297db9e9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1113,7 +1113,7 @@ struct proto { atomic_t socks; #endif int (*diag_destroy)(struct sock *sk, int err); -}; +} __randomize_layout; int proto_register(struct proto *prot, int alloc_slab); void proto_unregister(struct proto *prot); -- cgit v1.2.3-71-gd317 From 29e48ce87f1eaaa4b1fe3d9af90c586ac2d1fb74 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 5 Apr 2017 22:43:33 -0700 Subject: task_struct: Allow randomized layout This marks most of the layout of task_struct as randomizable, but leaves thread_info and scheduler state untouched at the start, and thread_struct untouched at the end. Other parts of the kernel use unnamed structures, but the 0-day builder using gcc-4.4 blows up on static initializers. Officially, it's documented as only working on gcc 4.6 and later, which further confuses me: https://gcc.gnu.org/wiki/C11Status The structure layout randomization already requires gcc 4.7, but instead of depending on the plugin being enabled, just check the gcc versions for wider build testing. At Linus's suggestion, the marking is hidden in a macro to reduce how ugly it looks. Additionally, indenting is left unchanged since it would make things harder to read. Randomization of task_struct is modified from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. Cc: Linus Torvalds Signed-off-by: Kees Cook --- include/linux/compiler-gcc.h | 13 ++++++++++++- include/linux/compiler.h | 5 +++++ include/linux/sched.h | 14 ++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7deaae3dc87d..c4a66c036692 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -231,6 +231,7 @@ #endif /* GCC_VERSION >= 40500 */ #if GCC_VERSION >= 40600 + /* * When used with Link Time Optimization, gcc can optimize away C functions or * variables which are referenced only from assembly code. __visible tells the @@ -238,7 +239,17 @@ * this. */ #define __visible __attribute__((externally_visible)) -#endif + +/* + * RANDSTRUCT_PLUGIN wants to use an anonymous struct, but it is only + * possible since GCC 4.6. To provide as much build testing coverage + * as possible, this is used for all GCC 4.6+ builds, and not just on + * RANDSTRUCT_PLUGIN builds. + */ +#define randomized_struct_fields_start struct { +#define randomized_struct_fields_end } __randomize_layout; + +#endif /* GCC_VERSION >= 40600 */ #if GCC_VERSION >= 40900 && !defined(__CHECKER__) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 55ee9ee814f8..0b4ac3e8c63e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -456,6 +456,11 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s # define __no_randomize_layout #endif +#ifndef randomized_struct_fields_start +# define randomized_struct_fields_start +# define randomized_struct_fields_end +#endif + /* * Tell gcc if a function is cold. The compiler will assume any path * directly leading to the call is unlikely. diff --git a/include/linux/sched.h b/include/linux/sched.h index f833254fce00..e2ad3531e7fe 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -490,6 +490,13 @@ struct task_struct { #endif /* -1 unrunnable, 0 runnable, >0 stopped: */ volatile long state; + + /* + * This begins the randomizable portion of task_struct. Only + * scheduling-critical items should be added above here. + */ + randomized_struct_fields_start + void *stack; atomic_t usage; /* Per task flags (PF_*), defined further below: */ @@ -1051,6 +1058,13 @@ struct task_struct { /* Used by LSM modules for access restriction: */ void *security; #endif + + /* + * New fields for task_struct should be added above here, so that + * they are included in the randomized portion of task_struct. + */ + randomized_struct_fields_end + /* CPU-specific state of this task: */ struct thread_struct thread; -- cgit v1.2.3-71-gd317 From 2683701201ddb9a2a4d0fda5d950ab50ca8e77e4 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Thu, 13 Jul 2017 16:46:43 +0200 Subject: netlink: correctly document nla_put_u64_64bit() Signed-off-by: Rolf Eike Beer Signed-off-by: David S. Miller --- include/net/netlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 01709172b3d3..ef8e6c3a80a6 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -98,8 +98,8 @@ * nla_put_u8(skb, type, value) add u8 attribute to skb * nla_put_u16(skb, type, value) add u16 attribute to skb * nla_put_u32(skb, type, value) add u32 attribute to skb - * nla_put_u64_64bits(skb, type, - * value, padattr) add u64 attribute to skb + * nla_put_u64_64bit(skb, type, + * value, padattr) add u64 attribute to skb * nla_put_s8(skb, type, value) add s8 attribute to skb * nla_put_s16(skb, type, value) add s16 attribute to skb * nla_put_s32(skb, type, value) add s32 attribute to skb -- cgit v1.2.3-71-gd317 From 301bfa483016d48b7fb9cbad87c0a04a15c25b90 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Jul 2017 17:53:48 -0400 Subject: NFS: Don't run wake_up_bit() when nobody is waiting... "perf lock" shows fairly heavy contention for the bit waitqueue locks when doing an I/O heavy workload. Use a bit to tell whether or not there has been contention for a lock so that we can optimise away the bit waitqueue options in those cases. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pagelist.c | 17 ++++++++++++++++- include/linux/nfs_page.h | 2 ++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 8a23e2b40b04..de9066a92c0d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -155,9 +155,12 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock) if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) return 0; - if (!nonblock) + if (!nonblock) { + set_bit(PG_CONTENDED1, &head->wb_flags); + smp_mb__after_atomic(); return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, TASK_UNINTERRUPTIBLE); + } return -EAGAIN; } @@ -175,6 +178,10 @@ nfs_page_group_lock_wait(struct nfs_page *req) WARN_ON_ONCE(head != head->wb_head); + if (!test_bit(PG_HEADLOCK, &head->wb_flags)) + return; + set_bit(PG_CONTENDED1, &head->wb_flags); + smp_mb__after_atomic(); wait_on_bit(&head->wb_flags, PG_HEADLOCK, TASK_UNINTERRUPTIBLE); } @@ -193,6 +200,8 @@ nfs_page_group_unlock(struct nfs_page *req) smp_mb__before_atomic(); clear_bit(PG_HEADLOCK, &head->wb_flags); smp_mb__after_atomic(); + if (!test_bit(PG_CONTENDED1, &head->wb_flags)) + return; wake_up_bit(&head->wb_flags, PG_HEADLOCK); } @@ -383,6 +392,8 @@ void nfs_unlock_request(struct nfs_page *req) smp_mb__before_atomic(); clear_bit(PG_BUSY, &req->wb_flags); smp_mb__after_atomic(); + if (!test_bit(PG_CONTENDED2, &req->wb_flags)) + return; wake_up_bit(&req->wb_flags, PG_BUSY); } @@ -465,6 +476,10 @@ void nfs_release_request(struct nfs_page *req) int nfs_wait_on_request(struct nfs_page *req) { + if (!test_bit(PG_BUSY, &req->wb_flags)) + return 0; + set_bit(PG_CONTENDED2, &req->wb_flags); + smp_mb__after_atomic(); return wait_on_bit_io(&req->wb_flags, PG_BUSY, TASK_UNINTERRUPTIBLE); } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index abbee2d15dce..d67b67ae6c8b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -33,6 +33,8 @@ enum { PG_UPTODATE, /* page group sync bit in read path */ PG_WB_END, /* page group sync bit in write path */ PG_REMOVE, /* page group sync bit in write path */ + PG_CONTENDED1, /* Is someone waiting for a lock? */ + PG_CONTENDED2, /* Is someone waiting for a lock? */ }; struct nfs_inode; -- cgit v1.2.3-71-gd317 From 2b02c20ce0c28974b44e69a2e2f5ddc6a470ad6f Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Tue, 11 Jul 2017 17:21:52 +0200 Subject: cdc_ncm: Set NTB format again after altsetting switch for Huawei devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some firmwares in Huawei E3372H devices have been observed to switch back to NTB 32-bit format after altsetting switch. This patch implements a driver flag to check for the device settings and set NTB format to 16-bit again if needed. The flag has been activated for devices controlled by the huawei_cdc_ncm.c driver. V1->V2: - fixed broken error checks - some corrections to the commit message V2->V3: - variable name changes, to clarify what's happening - check (and possibly set) the NTB format later in the common bind code path Signed-off-by: Enrico Mioso Reported-and-tested-by: Christian Panton Reviewed-by: Bjørn Mork CC: Bjørn Mork CC: Christian Panton CC: linux-usb@vger.kernel.org CC: netdev@vger.kernel.org CC: Oliver Neukum Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 28 ++++++++++++++++++++++++++++ drivers/net/usb/huawei_cdc_ncm.c | 6 ++++++ include/linux/usb/cdc_ncm.h | 1 + 3 files changed, 35 insertions(+) (limited to 'include') diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index d103a1d4fb36..8f572b9f3625 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -768,8 +768,10 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ u8 *buf; int len; int temp; + int err; u8 iface_no; struct usb_cdc_parsed_header hdr; + u16 curr_ntb_format; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -874,6 +876,32 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ goto error2; } + /* + * Some Huawei devices have been observed to come out of reset in NDP32 mode. + * Let's check if this is the case, and set the device to NDP16 mode again if + * needed. + */ + if (ctx->drvflags & CDC_NCM_FLAG_RESET_NTB16) { + err = usbnet_read_cmd(dev, USB_CDC_GET_NTB_FORMAT, + USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE, + 0, iface_no, &curr_ntb_format, 2); + if (err < 0) { + goto error2; + } + + if (curr_ntb_format == USB_CDC_NCM_NTB32_FORMAT) { + dev_info(&intf->dev, "resetting NTB format to 16-bit"); + err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_FORMAT, + USB_TYPE_CLASS | USB_DIR_OUT + | USB_RECIP_INTERFACE, + USB_CDC_NCM_NTB16_FORMAT, + iface_no, NULL, 0); + + if (err < 0) + goto error2; + } + } + cdc_ncm_find_endpoints(dev, ctx->data); cdc_ncm_find_endpoints(dev, ctx->control); if (!dev->in || !dev->out || !dev->status) { diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c index 2680a65cd5e4..63f28908afda 100644 --- a/drivers/net/usb/huawei_cdc_ncm.c +++ b/drivers/net/usb/huawei_cdc_ncm.c @@ -80,6 +80,12 @@ static int huawei_cdc_ncm_bind(struct usbnet *usbnet_dev, * be at the end of the frame. */ drvflags |= CDC_NCM_FLAG_NDP_TO_END; + + /* Additionally, it has been reported that some Huawei E3372H devices, with + * firmware version 21.318.01.00.541, come out of reset in NTB32 format mode, hence + * needing to be set to the NTB16 one again. + */ + drvflags |= CDC_NCM_FLAG_RESET_NTB16; ret = cdc_ncm_bind_common(usbnet_dev, intf, 1, drvflags); if (ret) goto err; diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 021f7a88f52c..1a59699cf82a 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -83,6 +83,7 @@ /* Driver flags */ #define CDC_NCM_FLAG_NDP_TO_END 0x02 /* NDP is placed at end of frame */ #define CDC_MBIM_FLAG_AVOID_ALTSETTING_TOGGLE 0x04 /* Avoid altsetting toggle during init */ +#define CDC_NCM_FLAG_RESET_NTB16 0x08 /* set NDP16 one more time after altsetting switch */ #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) -- cgit v1.2.3-71-gd317 From 76250f2b743b72cb685cc51ac0cdabb32957180b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Feb 2017 12:40:01 +0000 Subject: dma-buf/fence: Avoid use of uninitialised timestamp [ 236.821534] WARNING: kmemcheck: Caught 64-bit read from uninitialized memory (ffff8802538683d0) [ 236.828642] 420000001e7f0000000000000000000000080000000000000000000000000000 [ 236.839543] i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u [ 236.850420] ^ [ 236.854123] RIP: 0010:[] [] fence_signal+0x17/0xd0 [ 236.861313] RSP: 0018:ffff88024acd7ba0 EFLAGS: 00010282 [ 236.865027] RAX: ffffffff812f6a90 RBX: ffff8802527ca800 RCX: ffff880252cb30e0 [ 236.868801] RDX: ffff88024ac5d918 RSI: ffff880252f780e0 RDI: ffff880253868380 [ 236.872579] RBP: ffff88024acd7bc0 R08: ffff88024acd7be0 R09: 0000000000000000 [ 236.876407] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880253868380 [ 236.880185] R13: ffff8802538684d0 R14: ffff880253868380 R15: ffff88024cd48e00 [ 236.883983] FS: 00007f1646d1a740(0000) GS:ffff88025d000000(0000) knlGS:0000000000000000 [ 236.890959] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 236.894702] CR2: ffff880251360318 CR3: 000000024ad21000 CR4: 00000000001406f0 [ 236.898481] [] i915_gem_request_retire+0x1cd/0x230 [ 236.902439] [] i915_gem_request_alloc+0xa3/0x2f0 [ 236.906435] [] i915_gem_do_execbuffer.isra.41+0xb6d/0x18b0 [ 236.910434] [] i915_gem_execbuffer2+0x95/0x1e0 [ 236.914390] [] drm_ioctl+0x1e5/0x460 [ 236.918275] [] do_vfs_ioctl+0x8f/0x5c0 [ 236.922168] [] SyS_ioctl+0x3c/0x70 [ 236.926090] [] entry_SYSCALL_64_fastpath+0x17/0x93 [ 236.930045] [] 0xffffffffffffffff We only set the timestamp before we mark the fence as signaled. It is done before to avoid observers having a window in which they may see the fence as complete but no timestamp. Having it does incur a potential for the timestamp to be written twice, and even for it to be corrupted if the u64 write is not atomic. Instead use a new bit to record the presence of the timestamp, and teach the readers to wait until it is set if the fence is complete. There still remains a race where the timestamp for the signaled fence may be shown before the fence is reported as signaled, but that's a pre-existing error. Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Gustavo Padovan Cc: Daniel Vetter Reported-by: Rafael Antognolli Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170214124001.1930-1-chris@chris-wilson.co.uk --- drivers/dma-buf/dma-fence.c | 17 ++++++----------- drivers/dma-buf/sync_debug.c | 2 +- drivers/dma-buf/sync_file.c | 8 +++++++- include/linux/dma-fence.h | 2 ++ 4 files changed, 16 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 0918d3f003d6..13556fdda2a5 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -75,11 +75,6 @@ int dma_fence_signal_locked(struct dma_fence *fence) if (WARN_ON(!fence)) return -EINVAL; - if (!ktime_to_ns(fence->timestamp)) { - fence->timestamp = ktime_get(); - smp_mb__before_atomic(); - } - if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { ret = -EINVAL; @@ -87,8 +82,11 @@ int dma_fence_signal_locked(struct dma_fence *fence) * we might have raced with the unlocked dma_fence_signal, * still run through all callbacks */ - } else + } else { + fence->timestamp = ktime_get(); + set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); trace_dma_fence_signaled(fence); + } list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { list_del_init(&cur->node); @@ -115,14 +113,11 @@ int dma_fence_signal(struct dma_fence *fence) if (!fence) return -EINVAL; - if (!ktime_to_ns(fence->timestamp)) { - fence->timestamp = ktime_get(); - smp_mb__before_atomic(); - } - if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return -EINVAL; + fence->timestamp = ktime_get(); + set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); trace_dma_fence_signaled(fence); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) { diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index c769dc653b34..bfead12390f2 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -84,7 +84,7 @@ static void sync_print_fence(struct seq_file *s, show ? "_" : "", sync_status_str(status)); - if (status) { + if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) { struct timespec64 ts64 = ktime_to_timespec64(fence->timestamp); diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 2321035f6204..95f259b719fc 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -375,7 +375,13 @@ static void sync_fill_fence_info(struct dma_fence *fence, sizeof(info->driver_name)); info->status = dma_fence_get_status(fence); - info->timestamp_ns = ktime_to_ns(fence->timestamp); + while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && + !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) + cpu_relax(); + info->timestamp_ns = + test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ? + ktime_to_ns(fence->timestamp) : + ktime_set(0, 0); } static long sync_file_ioctl_fence_info(struct sync_file *sync_file, diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index a5195a7d6f77..0a186c4f3981 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -55,6 +55,7 @@ struct dma_fence_cb; * of the time. * * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled + * DMA_FENCE_FLAG_TIMESTAMP_BIT - timestamp recorded for fence signaling * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called * DMA_FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the * implementer of the fence for its own purposes. Can be used in different @@ -84,6 +85,7 @@ struct dma_fence { enum dma_fence_flag_bits { DMA_FENCE_FLAG_SIGNALED_BIT, + DMA_FENCE_FLAG_TIMESTAMP_BIT, DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, DMA_FENCE_FLAG_USER_BITS, /* must always be last member */ }; -- cgit v1.2.3-71-gd317 From b1f5bfc27a19f214006b9b4db7b9126df2dfdf5a Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 14 Jul 2017 18:32:45 +0200 Subject: sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}() If the length field of the iterator (|pos.p| or |err|) is past the end of the chunk, we shouldn't access it. This bug has been detected by KMSAN. For the following pair of system calls: socket(PF_INET6, SOCK_STREAM, 0x84 /* IPPROTO_??? */) = 3 sendto(3, "A", 1, MSG_OOB, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 1 the tool has reported a use of uninitialized memory: ================================================================== BUG: KMSAN: use of uninitialized memory in sctp_rcv+0x17b8/0x43b0 CPU: 1 PID: 2940 Comm: probe Not tainted 4.11.0-rc5+ #2926 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x172/0x1c0 lib/dump_stack.c:52 kmsan_report+0x12a/0x180 mm/kmsan/kmsan.c:927 __msan_warning_32+0x61/0xb0 mm/kmsan/kmsan_instr.c:469 __sctp_rcv_init_lookup net/sctp/input.c:1074 __sctp_rcv_lookup_harder net/sctp/input.c:1233 __sctp_rcv_lookup net/sctp/input.c:1255 sctp_rcv+0x17b8/0x43b0 net/sctp/input.c:170 sctp6_rcv+0x32/0x70 net/sctp/ipv6.c:984 ip6_input_finish+0x82f/0x1ee0 net/ipv6/ip6_input.c:279 NF_HOOK ./include/linux/netfilter.h:257 ip6_input+0x239/0x290 net/ipv6/ip6_input.c:322 dst_input ./include/net/dst.h:492 ip6_rcv_finish net/ipv6/ip6_input.c:69 NF_HOOK ./include/linux/netfilter.h:257 ipv6_rcv+0x1dbd/0x22e0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x2f6f/0x3a20 net/core/dev.c:4208 __netif_receive_skb net/core/dev.c:4246 process_backlog+0x667/0xba0 net/core/dev.c:4866 napi_poll net/core/dev.c:5268 net_rx_action+0xc95/0x1590 net/core/dev.c:5333 __do_softirq+0x485/0x942 kernel/softirq.c:284 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 do_softirq kernel/softirq.c:328 __local_bh_enable_ip+0x25b/0x290 kernel/softirq.c:181 local_bh_enable+0x37/0x40 ./include/linux/bottom_half.h:31 rcu_read_unlock_bh ./include/linux/rcupdate.h:931 ip6_finish_output2+0x19b2/0x1cf0 net/ipv6/ip6_output.c:124 ip6_finish_output+0x764/0x970 net/ipv6/ip6_output.c:149 NF_HOOK_COND ./include/linux/netfilter.h:246 ip6_output+0x456/0x520 net/ipv6/ip6_output.c:163 dst_output ./include/net/dst.h:486 NF_HOOK ./include/linux/netfilter.h:257 ip6_xmit+0x1841/0x1c00 net/ipv6/ip6_output.c:261 sctp_v6_xmit+0x3b7/0x470 net/sctp/ipv6.c:225 sctp_packet_transmit+0x38cb/0x3a20 net/sctp/output.c:632 sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885 sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750 sctp_side_effects net/sctp/sm_sideeffect.c:1773 sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147 sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88 sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 sock_sendmsg net/socket.c:643 SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285 entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:246 RIP: 0033:0x401133 RSP: 002b:00007fff6d99cd38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000004002b0 RCX: 0000000000401133 RDX: 0000000000000001 RSI: 0000000000494088 RDI: 0000000000000003 RBP: 00007fff6d99cd90 R08: 00007fff6d99cd50 R09: 000000000000001c R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000 R13: 00000000004063d0 R14: 0000000000406460 R15: 0000000000000000 origin: save_stack_trace+0x37/0x40 arch/x86/kernel/stacktrace.c:59 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:302 kmsan_internal_poison_shadow+0xb1/0x1a0 mm/kmsan/kmsan.c:198 kmsan_poison_shadow+0x6d/0xc0 mm/kmsan/kmsan.c:211 slab_alloc_node mm/slub.c:2743 __kmalloc_node_track_caller+0x200/0x360 mm/slub.c:4351 __kmalloc_reserve net/core/skbuff.c:138 __alloc_skb+0x26b/0x840 net/core/skbuff.c:231 alloc_skb ./include/linux/skbuff.h:933 sctp_packet_transmit+0x31e/0x3a20 net/sctp/output.c:570 sctp_outq_flush+0xeb3/0x46e0 net/sctp/outqueue.c:885 sctp_outq_uncork+0xb2/0xd0 net/sctp/outqueue.c:750 sctp_side_effects net/sctp/sm_sideeffect.c:1773 sctp_do_sm+0x6962/0x6ec0 net/sctp/sm_sideeffect.c:1147 sctp_primitive_ASSOCIATE+0x12c/0x160 net/sctp/primitive.c:88 sctp_sendmsg+0x43e5/0x4f90 net/sctp/socket.c:1954 inet_sendmsg+0x498/0x670 net/ipv4/af_inet.c:762 sock_sendmsg_nosec net/socket.c:633 sock_sendmsg net/socket.c:643 SYSC_sendto+0x608/0x710 net/socket.c:1696 SyS_sendto+0x8a/0xb0 net/socket.c:1664 do_syscall_64+0xe6/0x130 arch/x86/entry/common.c:285 return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:246 ================================================================== Signed-off-by: Alexander Potapenko Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index a9519a06a23b..980807d7506f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -469,6 +469,8 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) #define _sctp_walk_params(pos, chunk, end, member)\ for (pos.v = chunk->member;\ + (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\ + (void *)chunk + end) &&\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\ pos.v += SCTP_PAD4(ntohs(pos.p->length))) @@ -479,6 +481,8 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(struct sctp_chunkhdr));\ + ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\ + (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) -- cgit v1.2.3-71-gd317 From e67ae2b7b23b283e657865b498b151e6a17b919d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Jul 2017 13:17:26 +0200 Subject: libceph: fix old style declaration warnings The new macros don't follow the usual style for declarations, which we get a warning for with 'make W=1': In file included from fs/ceph/mds_client.c:16:0: include/linux/ceph/ceph_features.h:74:1: error: 'static' is not at beginning of declaration [-Werror=old-style-declaration] This moves the 'static' keyword to the front of the declaration. Fixes: f179d3ba8cb9 ("libceph: new features macros") Signed-off-by: Arnd Bergmann Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index f0f6c537b64c..040dd105c3e7 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -10,14 +10,14 @@ #define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL #define DEFINE_CEPH_FEATURE(bit, incarnation, name) \ - const static uint64_t CEPH_FEATURE_##name = (1ULL< Date: Thu, 6 Jul 2017 23:17:44 +0200 Subject: netfilter: remove old pre-netns era hook api no more users in the tree, remove this. The old api is racy wrt. module removal, all users have been converted to the netns-aware api. The old api pretended we still have global hooks but that has not been true for a long time. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 9 --- net/netfilter/core.c | 143 ---------------------------------------------- 2 files changed, 152 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index a4b97be30b28..22f081065d49 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -61,8 +61,6 @@ typedef unsigned int nf_hookfn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); struct nf_hook_ops { - struct list_head list; - /* User fills in from here down. */ nf_hookfn *hook; struct net_device *dev; @@ -160,13 +158,6 @@ int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int n); -int nf_register_hook(struct nf_hook_ops *reg); -void nf_unregister_hook(struct nf_hook_ops *reg); -int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); -void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); -int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); -void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); - /* Functions to register get/setsockopt ranges (non-inclusive). You need to check permissions yourself! */ int nf_register_sockopt(struct nf_sockopt_ops *reg); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 552d606e57ca..368610dbc3c0 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -227,114 +227,6 @@ void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, } EXPORT_SYMBOL(nf_unregister_net_hooks); -static LIST_HEAD(nf_hook_list); - -static int _nf_register_hook(struct nf_hook_ops *reg) -{ - struct net *net, *last; - int ret; - - for_each_net(net) { - ret = nf_register_net_hook(net, reg); - if (ret && ret != -ENOENT) - goto rollback; - } - list_add_tail(®->list, &nf_hook_list); - - return 0; -rollback: - last = net; - for_each_net(net) { - if (net == last) - break; - nf_unregister_net_hook(net, reg); - } - return ret; -} - -int nf_register_hook(struct nf_hook_ops *reg) -{ - int ret; - - rtnl_lock(); - ret = _nf_register_hook(reg); - rtnl_unlock(); - - return ret; -} -EXPORT_SYMBOL(nf_register_hook); - -static void _nf_unregister_hook(struct nf_hook_ops *reg) -{ - struct net *net; - - list_del(®->list); - for_each_net(net) - nf_unregister_net_hook(net, reg); -} - -void nf_unregister_hook(struct nf_hook_ops *reg) -{ - rtnl_lock(); - _nf_unregister_hook(reg); - rtnl_unlock(); -} -EXPORT_SYMBOL(nf_unregister_hook); - -int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n) -{ - unsigned int i; - int err = 0; - - for (i = 0; i < n; i++) { - err = nf_register_hook(®[i]); - if (err) - goto err; - } - return err; - -err: - if (i > 0) - nf_unregister_hooks(reg, i); - return err; -} -EXPORT_SYMBOL(nf_register_hooks); - -/* Caller MUST take rtnl_lock() */ -int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n) -{ - unsigned int i; - int err = 0; - - for (i = 0; i < n; i++) { - err = _nf_register_hook(®[i]); - if (err) - goto err; - } - return err; - -err: - if (i > 0) - _nf_unregister_hooks(reg, i); - return err; -} -EXPORT_SYMBOL(_nf_register_hooks); - -void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) -{ - while (n-- > 0) - nf_unregister_hook(®[n]); -} -EXPORT_SYMBOL(nf_unregister_hooks); - -/* Caller MUST take rtnl_lock */ -void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) -{ - while (n-- > 0) - _nf_unregister_hook(®[n]); -} -EXPORT_SYMBOL(_nf_unregister_hooks); - /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, @@ -450,37 +342,6 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(nf_nat_decode_session_hook); #endif -static int nf_register_hook_list(struct net *net) -{ - struct nf_hook_ops *elem; - int ret; - - rtnl_lock(); - list_for_each_entry(elem, &nf_hook_list, list) { - ret = nf_register_net_hook(net, elem); - if (ret && ret != -ENOENT) - goto out_undo; - } - rtnl_unlock(); - return 0; - -out_undo: - list_for_each_entry_continue_reverse(elem, &nf_hook_list, list) - nf_unregister_net_hook(net, elem); - rtnl_unlock(); - return ret; -} - -static void nf_unregister_hook_list(struct net *net) -{ - struct nf_hook_ops *elem; - - rtnl_lock(); - list_for_each_entry(elem, &nf_hook_list, list) - nf_unregister_net_hook(net, elem); - rtnl_unlock(); -} - static int __net_init netfilter_net_init(struct net *net) { int i, h, ret; @@ -500,16 +361,12 @@ static int __net_init netfilter_net_init(struct net *net) return -ENOMEM; } #endif - ret = nf_register_hook_list(net); - if (ret) - remove_proc_entry("netfilter", net->proc_net); return ret; } static void __net_exit netfilter_net_exit(struct net *net) { - nf_unregister_hook_list(net); remove_proc_entry("netfilter", net->proc_net); } -- cgit v1.2.3-71-gd317 From c6325179238f1d4683edbec53d8322575d76d7e2 Mon Sep 17 00:00:00 2001 From: Gleb Fotengauer-Malinovskiy Date: Mon, 17 Jul 2017 16:29:46 +0300 Subject: tty: Fix TIOCGPTPEER ioctl definition This ioctl does nothing to justify an _IOC_READ or _IOC_WRITE flag because it doesn't copy anything from/to userspace to access the argument. Fixes: 54ebbfb16034 ("tty: add TIOCGPTPEER ioctl") Signed-off-by: Gleb Fotengauer-Malinovskiy Acked-by: Aleksa Sarai Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/alpha/include/uapi/asm/ioctls.h | 2 +- arch/mips/include/uapi/asm/ioctls.h | 2 +- arch/parisc/include/uapi/asm/ioctls.h | 2 +- arch/powerpc/include/uapi/asm/ioctls.h | 2 +- arch/sh/include/uapi/asm/ioctls.h | 2 +- arch/sparc/include/uapi/asm/ioctls.h | 2 +- arch/xtensa/include/uapi/asm/ioctls.h | 2 +- include/uapi/asm-generic/ioctls.h | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h index ff67b8373bf7..1cd7dc7d4870 100644 --- a/arch/alpha/include/uapi/asm/ioctls.h +++ b/arch/alpha/include/uapi/asm/ioctls.h @@ -100,7 +100,7 @@ #define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */ #define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */ #define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */ -#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */ +#define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */ #define TIOCSERCONFIG 0x5453 #define TIOCSERGWILD 0x5454 diff --git a/arch/mips/include/uapi/asm/ioctls.h b/arch/mips/include/uapi/asm/ioctls.h index 68e19b689a00..1609cb0907ac 100644 --- a/arch/mips/include/uapi/asm/ioctls.h +++ b/arch/mips/include/uapi/asm/ioctls.h @@ -91,7 +91,7 @@ #define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */ #define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */ #define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */ -#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */ +#define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */ /* I hope the range from 0x5480 on is free ... */ #define TIOCSCTTY 0x5480 /* become controlling tty */ diff --git a/arch/parisc/include/uapi/asm/ioctls.h b/arch/parisc/include/uapi/asm/ioctls.h index 674c68a5bbd0..d0e3321403be 100644 --- a/arch/parisc/include/uapi/asm/ioctls.h +++ b/arch/parisc/include/uapi/asm/ioctls.h @@ -60,7 +60,7 @@ #define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */ #define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */ #define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */ -#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */ +#define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */ #define FIONCLEX 0x5450 /* these numbers need to be adjusted. */ #define FIOCLEX 0x5451 diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h index bfd609a3e928..e3b10469f787 100644 --- a/arch/powerpc/include/uapi/asm/ioctls.h +++ b/arch/powerpc/include/uapi/asm/ioctls.h @@ -100,7 +100,7 @@ #define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */ #define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */ #define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */ -#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */ +#define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */ #define TIOCSERCONFIG 0x5453 #define TIOCSERGWILD 0x5454 diff --git a/arch/sh/include/uapi/asm/ioctls.h b/arch/sh/include/uapi/asm/ioctls.h index eec7901e9e65..787bac9f67da 100644 --- a/arch/sh/include/uapi/asm/ioctls.h +++ b/arch/sh/include/uapi/asm/ioctls.h @@ -93,7 +93,7 @@ #define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */ #define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */ #define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */ -#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */ +#define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */ #define TIOCSERCONFIG _IO('T', 83) /* 0x5453 */ #define TIOCSERGWILD _IOR('T', 84, int) /* 0x5454 */ diff --git a/arch/sparc/include/uapi/asm/ioctls.h b/arch/sparc/include/uapi/asm/ioctls.h index 6d27398632ea..f5df72b93bb2 100644 --- a/arch/sparc/include/uapi/asm/ioctls.h +++ b/arch/sparc/include/uapi/asm/ioctls.h @@ -88,7 +88,7 @@ #define TIOCGPTN _IOR('t', 134, unsigned int) /* Get Pty Number */ #define TIOCSPTLCK _IOW('t', 135, int) /* Lock/unlock PTY */ #define TIOCSIG _IOW('t', 136, int) /* Generate signal on Pty slave */ -#define TIOCGPTPEER _IOR('t', 137, int) /* Safely open the slave */ +#define TIOCGPTPEER _IO('t', 137) /* Safely open the slave */ /* Little f */ #define FIOCLEX _IO('f', 1) diff --git a/arch/xtensa/include/uapi/asm/ioctls.h b/arch/xtensa/include/uapi/asm/ioctls.h index 98b004e24e85..47d82c09be7b 100644 --- a/arch/xtensa/include/uapi/asm/ioctls.h +++ b/arch/xtensa/include/uapi/asm/ioctls.h @@ -105,7 +105,7 @@ #define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */ #define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */ #define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */ -#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */ +#define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */ #define TIOCSERCONFIG _IO('T', 83) #define TIOCSERGWILD _IOR('T', 84, int) diff --git a/include/uapi/asm-generic/ioctls.h b/include/uapi/asm-generic/ioctls.h index 06d5f7ddf84e..14baf9f23a14 100644 --- a/include/uapi/asm-generic/ioctls.h +++ b/include/uapi/asm-generic/ioctls.h @@ -77,7 +77,7 @@ #define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */ #define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */ #define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */ -#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */ +#define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */ #define FIONCLEX 0x5450 #define FIOCLEX 0x5451 -- cgit v1.2.3-71-gd317 From 13c401f33e19c20431d9888a91d9ea82e5133bd9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 14 Jul 2017 23:03:49 -0700 Subject: jhash: fix -Wimplicit-fallthrough warnings GCC 7 added a new -Wimplicit-fallthrough warning. It's only enabled with W=1, but since linux/jhash.h is included in over hundred places (including other global headers) it seems worthwhile fixing this warning. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/jhash.h | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/jhash.h b/include/linux/jhash.h index 348c6f47e4cc..8037850f3104 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -85,19 +85,18 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) k += 12; } /* Last block: affect all 32 bits of (c) */ - /* All the case statements fall through */ switch (length) { - case 12: c += (u32)k[11]<<24; - case 11: c += (u32)k[10]<<16; - case 10: c += (u32)k[9]<<8; - case 9: c += k[8]; - case 8: b += (u32)k[7]<<24; - case 7: b += (u32)k[6]<<16; - case 6: b += (u32)k[5]<<8; - case 5: b += k[4]; - case 4: a += (u32)k[3]<<24; - case 3: a += (u32)k[2]<<16; - case 2: a += (u32)k[1]<<8; + case 12: c += (u32)k[11]<<24; /* fall through */ + case 11: c += (u32)k[10]<<16; /* fall through */ + case 10: c += (u32)k[9]<<8; /* fall through */ + case 9: c += k[8]; /* fall through */ + case 8: b += (u32)k[7]<<24; /* fall through */ + case 7: b += (u32)k[6]<<16; /* fall through */ + case 6: b += (u32)k[5]<<8; /* fall through */ + case 5: b += k[4]; /* fall through */ + case 4: a += (u32)k[3]<<24; /* fall through */ + case 3: a += (u32)k[2]<<16; /* fall through */ + case 2: a += (u32)k[1]<<8; /* fall through */ case 1: a += k[0]; __jhash_final(a, b, c); case 0: /* Nothing left to add */ @@ -131,10 +130,10 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) k += 3; } - /* Handle the last 3 u32's: all the case statements fall through */ + /* Handle the last 3 u32's */ switch (length) { - case 3: c += k[2]; - case 2: b += k[1]; + case 3: c += k[2]; /* fall through */ + case 2: b += k[1]; /* fall through */ case 1: a += k[0]; __jhash_final(a, b, c); case 0: /* Nothing left to add */ -- cgit v1.2.3-71-gd317 From df39a9f106d53532443a804352894480ca6ca5fd Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 17 Jul 2017 11:42:55 -0700 Subject: bpf: check NULL for sk_to_full_sk() return value When req->rsk_listener is NULL, sk_to_full_sk() returns NULL too, so we have to check its return value against NULL here. Fixes: 40304b2a1567 ("bpf: BPF support for sock_ops") Reported-by: David Ahern Tested-by: David Ahern Cc: Lawrence Brakmo Cc: Daniel Borkmann Signed-off-by: Cong Wang Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 360c082e885c..d41d40ac3efd 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -85,7 +85,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, int __ret = 0; \ if (cgroup_bpf_enabled && (sock_ops)->sk) { \ typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \ - if (sk_fullsock(__sk)) \ + if (__sk && sk_fullsock(__sk)) \ __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \ sock_ops, \ BPF_CGROUP_SOCK_OPS); \ -- cgit v1.2.3-71-gd317 From a512c2fbef9c700ee1ee0e045b75e140fef8f5ee Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 23 May 2017 11:26:08 +0300 Subject: IB/core: Introduce modify QP operation with udata This patch adds new function ib_modify_qp_with_udata so that uverbs layer can avoid handling L2 mac address at verbs layer and depend on the core layer to resolve the mac address consistently for all required QPs. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 32 ++++++++++++++++++++++++-------- include/rdma/ib_verbs.h | 16 ++++++++++++++++ 2 files changed, 40 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 644fa0d13f02..7f8fe443df46 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1276,20 +1276,36 @@ out: } EXPORT_SYMBOL(ib_resolve_eth_dmac); -int ib_modify_qp(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, - int qp_attr_mask) +/** + * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. + * @qp: The QP to modify. + * @attr: On input, specifies the QP attributes to modify. On output, + * the current values of selected QP attributes are returned. + * @attr_mask: A bit-mask used to specify which attributes of the QP + * are being modified. + * @udata: pointer to user's input output buffer information + * are being modified. + * It returns 0 on success and returns appropriate error code on error. + */ +int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) { + int ret; - if (qp_attr_mask & IB_QP_AV) { - int ret; - - ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr); + if (attr_mask & IB_QP_AV) { + ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); if (ret) return ret; } + return ib_security_modify_qp(qp, attr, attr_mask, udata); +} +EXPORT_SYMBOL(ib_modify_qp_with_udata); - return ib_security_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); +int ib_modify_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask) +{ + return ib_modify_qp_with_udata(qp, qp_attr, qp_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_qp); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 356953d3dbd1..6e62c9e2c971 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2947,6 +2947,22 @@ static inline int ib_post_srq_recv(struct ib_srq *srq, struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr); +/** + * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. + * @qp: The QP to modify. + * @attr: On input, specifies the QP attributes to modify. On output, + * the current values of selected QP attributes are returned. + * @attr_mask: A bit-mask used to specify which attributes of the QP + * are being modified. + * @udata: pointer to user's input output buffer information + * are being modified. + * It returns 0 on success and returns appropriate error code on error. + */ +int ib_modify_qp_with_udata(struct ib_qp *qp, + struct ib_qp_attr *attr, + int attr_mask, + struct ib_udata *udata); + /** * ib_modify_qp - Modifies the attributes for the specified QP and then * transitions the QP to the given state. -- cgit v1.2.3-71-gd317 From 0f4d027c3b4240ecb314daa948238d459fdc3a00 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 14:38:14 +0300 Subject: IB/{rdmavt, qib, hfi1}: Remove gfp flags argument The caller to the driver marks GFP_NOIO allocations with help of memalloc_noio-* calls now. This makes redundant to pass down to the driver gfp flags, which can be GFP_KERNEL only. The patch removes the gfp flags argument and updates all driver paths. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qp.c | 7 +++-- drivers/infiniband/hw/hfi1/qp.h | 3 +-- drivers/infiniband/hw/qib/qib_qp.c | 15 +++++------ drivers/infiniband/hw/qib/qib_verbs.h | 4 +-- drivers/infiniband/sw/rdmavt/qp.c | 48 ++++++++++------------------------- include/rdma/rdma_vt.h | 5 ++-- 6 files changed, 28 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 650305cc0373..1a7af9f60c13 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -647,18 +647,17 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) qp->pid); } -void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, - gfp_t gfp) +void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) { struct hfi1_qp_priv *priv; - priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node); + priv = kzalloc_node(sizeof(*priv), GFP_KERNEL, rdi->dparms.node); if (!priv) return ERR_PTR(-ENOMEM); priv->owner = qp; - priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), gfp, + priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), GFP_KERNEL, rdi->dparms.node); if (!priv->s_ahg) { kfree(priv); diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h index 1eb9cd7b8c19..6fe542b6a927 100644 --- a/drivers/infiniband/hw/hfi1/qp.h +++ b/drivers/infiniband/hw/hfi1/qp.h @@ -123,8 +123,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp); /* * Functions provided by hfi1 driver for rdmavt to use */ -void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, - gfp_t gfp); +void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp); void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); unsigned free_all_qps(struct rvt_dev_info *rdi); void notify_qp_reset(struct rvt_qp *qp); diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 5984981e7dd4..a343e3b5d4cb 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -104,10 +104,9 @@ const struct rvt_operation_params qib_post_parms[RVT_OPERATION_MAX] = { }; -static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, - gfp_t gfp) +static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) { - unsigned long page = get_zeroed_page(gfp); + unsigned long page = get_zeroed_page(GFP_KERNEL); /* * Free the page if someone raced with us installing it. @@ -126,7 +125,7 @@ static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp) + enum ib_qp_type type, u8 port) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; @@ -160,7 +159,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map, gfp); + get_map_page(qpt, map); if (unlikely(!map->page)) break; } @@ -317,16 +316,16 @@ u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) return ib_mtu_enum_to_int(pmtu); } -void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp) +void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) { struct qib_qp_priv *priv; - priv = kzalloc(sizeof(*priv), gfp); + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return ERR_PTR(-ENOMEM); priv->owner = qp; - priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp); + priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), GFP_KERNEL); if (!priv->s_hdr) { kfree(priv); return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index da0db5485ddc..a52fc67b40d7 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -274,11 +274,11 @@ int qib_get_counters(struct qib_pportdata *ppd, * Functions provided by qib driver for rdmavt to use */ unsigned qib_free_all_qps(struct rvt_dev_info *rdi); -void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp); +void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp); void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); void qib_notify_qp_reset(struct rvt_qp *qp); int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp); + enum ib_qp_type type, u8 port); void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 727e81cc2c8f..459865439a0b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -118,10 +118,9 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { EXPORT_SYMBOL(ib_rvt_state_ops); static void get_map_page(struct rvt_qpn_table *qpt, - struct rvt_qpn_map *map, - gfp_t gfp) + struct rvt_qpn_map *map) { - unsigned long page = get_zeroed_page(gfp); + unsigned long page = get_zeroed_page(GFP_KERNEL); /* * Free the page if someone raced with us installing it. @@ -173,7 +172,7 @@ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end); for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) { if (!map->page) { - get_map_page(qpt, map, GFP_KERNEL); + get_map_page(qpt, map); if (!map->page) { ret = -ENOMEM; break; @@ -342,14 +341,14 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, * Return: The queue pair number */ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port_num, gfp_t gfp) + enum ib_qp_type type, u8 port_num) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; if (rdi->driver_f.alloc_qpn) - return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, gfp); + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num); if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; @@ -374,7 +373,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map, gfp); + get_map_page(qpt, map); if (unlikely(!map->page)) break; } @@ -672,7 +671,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp *ret = ERR_PTR(-ENOMEM); struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); void *priv = NULL; - gfp_t gfp; size_t sqsize; if (!rdi) @@ -680,18 +678,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || - init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) + init_attr->create_flags) return ERR_PTR(-EINVAL); - /* GFP_NOIO is applicable to RC QP's only */ - - if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && - init_attr->qp_type != IB_QPT_RC) - return ERR_PTR(-EINVAL); - - gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? - GFP_NOIO : GFP_KERNEL; - /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || @@ -719,14 +708,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, sz = sizeof(struct rvt_sge) * init_attr->cap.max_send_sge + sizeof(struct rvt_swqe); - if (gfp == GFP_NOIO) - swq = __vmalloc( - sqsize * sz, - gfp | __GFP_ZERO, PAGE_KERNEL); - else - swq = vzalloc_node( - sqsize * sz, - rdi->dparms.node); + swq = vzalloc_node(sqsize * sz, rdi->dparms.node); if (!swq) return ERR_PTR(-ENOMEM); @@ -741,7 +723,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node); + qp = kzalloc_node(sz + sg_list_sz, GFP_KERNEL, + rdi->dparms.node); if (!qp) goto bail_swq; @@ -751,7 +734,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, kzalloc_node( sizeof(*qp->s_ack_queue) * rvt_max_atomic(rdi), - gfp, + GFP_KERNEL, rdi->dparms.node); if (!qp->s_ack_queue) goto bail_qp; @@ -766,7 +749,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, * Driver needs to set up it's private QP structure and do any * initialization that is needed. */ - priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp); + priv = rdi->driver_f.qp_priv_alloc(rdi, qp); if (IS_ERR(priv)) { ret = priv; goto bail_qp; @@ -786,11 +769,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.wq = vmalloc_user( sizeof(struct rvt_rwq) + qp->r_rq.size * sz); - else if (gfp == GFP_NOIO) - qp->r_rq.wq = __vmalloc( - sizeof(struct rvt_rwq) + - qp->r_rq.size * sz, - gfp | __GFP_ZERO, PAGE_KERNEL); else qp->r_rq.wq = vzalloc_node( sizeof(struct rvt_rwq) + @@ -824,7 +802,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, - init_attr->port_num, gfp); + init_attr->port_num); if (err < 0) { ret = ERR_PTR(err); goto bail_rq_wq; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4878aaf7bdff..55af69271053 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -229,8 +229,7 @@ struct rvt_driver_provided { * ERR_PTR(err). The driver is free to return NULL or a valid * pointer. */ - void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, - gfp_t gfp); + void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); /* * Free the driver's private qp structure. @@ -319,7 +318,7 @@ struct rvt_driver_provided { /* Let the driver pick the next queue pair number*/ int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port_num, gfp_t gfp); + enum ib_qp_type type, u8 port_num); /* Determine if its safe or allowed to modify the qp */ int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, -- cgit v1.2.3-71-gd317 From 8900b894e769dd88b53e519e3502e0e3c349fe95 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 14:38:15 +0300 Subject: {net, IB}/mlx4: Remove gfp flags argument The caller to the driver marks GFP_NOIO allocations with help of memalloc_noio-* calls now. This makes redundant to pass down to the driver gfp flags, which can be GFP_KERNEL only. The patch removes the gfp flags argument and updates all driver paths. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 6 ++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 - drivers/infiniband/hw/mlx4/qp.c | 40 +++++++++------------- drivers/infiniband/hw/mlx4/srq.c | 8 ++--- drivers/net/ethernet/mellanox/mlx4/alloc.c | 29 ++++++++-------- drivers/net/ethernet/mellanox/mlx4/cq.c | 4 +-- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 7 ++-- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 2 +- drivers/net/ethernet/mellanox/mlx4/icm.c | 7 ++-- drivers/net/ethernet/mellanox/mlx4/icm.h | 3 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 +-- drivers/net/ethernet/mellanox/mlx4/mr.c | 17 +++++---- drivers/net/ethernet/mellanox/mlx4/qp.c | 20 +++++------ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 4 +-- drivers/net/ethernet/mellanox/mlx4/srq.c | 4 +-- include/linux/mlx4/device.h | 10 +++--- 16 files changed, 76 insertions(+), 90 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 4f5a143fc0a7..ff931c580557 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * int err; err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size, - PAGE_SIZE * 2, &buf->buf, GFP_KERNEL); + PAGE_SIZE * 2, &buf->buf); if (err) goto out; @@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL); + err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf); if (err) goto err_mtt; @@ -219,7 +219,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, uar = &to_mucontext(context)->uar; } else { - err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL); + err = mlx4_db_alloc(dev->dev, &cq->db, 1); if (err) goto err_cq; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index c2b9cbf4da05..9db82e67e959 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -185,7 +185,6 @@ enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, - MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO, /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */ MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 996e9058e515..75c0e6c5dd56 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -634,8 +634,8 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev, static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp, - gfp_t gfp) + struct ib_udata *udata, int sqpn, + struct mlx4_ib_qp **caller_qp) { int qpn; int err; @@ -691,14 +691,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { - sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp); + sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL); if (!sqp) return -ENOMEM; qp = &sqp->qp; qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; } else { - qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp); + qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL); if (!qp) return -ENOMEM; qp->pri.vid = 0xFFFF; @@ -780,7 +780,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; if (qp_has_rq(init_attr)) { - err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp); + err = mlx4_db_alloc(dev->dev, &qp->db, 0); if (err) goto err; @@ -788,7 +788,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size, - &qp->buf, gfp)) { + &qp->buf)) { memcpy(&init_attr->cap, &backup_cap, sizeof(backup_cap)); err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, @@ -797,7 +797,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_db; if (mlx4_buf_alloc(dev->dev, qp->buf_size, - PAGE_SIZE * 2, &qp->buf, gfp)) { + PAGE_SIZE * 2, &qp->buf)) { err = -ENOMEM; goto err_db; } @@ -808,20 +808,20 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp); + err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); if (err) goto err_mtt; qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64), - gfp | __GFP_NOWARN); + GFP_KERNEL | __GFP_NOWARN); if (!qp->sq.wrid) qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64), - gfp, PAGE_KERNEL); + GFP_KERNEL, PAGE_KERNEL); qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64), - gfp | __GFP_NOWARN); + GFP_KERNEL | __GFP_NOWARN); if (!qp->rq.wrid) qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64), - gfp, PAGE_KERNEL); + GFP_KERNEL, PAGE_KERNEL); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; @@ -859,7 +859,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; - err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp); + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); if (err) goto err_qpn; @@ -1127,10 +1127,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, int err; int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; u16 xrcdn = 0; - gfp_t gfp; - gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ? - GFP_NOIO : GFP_KERNEL; /* * We only support LSO, vendor flag1, and multicast loopback blocking, * and only for kernel UD QPs. @@ -1140,8 +1137,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF | - MLX4_IB_QP_CREATE_ROCE_V2_GSI | - MLX4_IB_QP_CREATE_USE_GFP_NOIO)) + MLX4_IB_QP_CREATE_ROCE_V2_GSI)) return ERR_PTR(-EINVAL); if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { @@ -1154,7 +1150,6 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | - MLX4_IB_QP_CREATE_USE_GFP_NOIO | MLX4_IB_QP_CREATE_ROCE_V2_GSI | MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) && init_attr->qp_type != IB_QPT_UD) || @@ -1179,7 +1174,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_RAW_PACKET: - qp = kzalloc(sizeof *qp, gfp); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); qp->pri.vid = 0xFFFF; @@ -1188,7 +1183,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_UD: { err = create_qp_common(to_mdev(pd->device), pd, init_attr, - udata, 0, &qp, gfp); + udata, 0, &qp); if (err) { kfree(qp); return ERR_PTR(err); @@ -1217,8 +1212,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, } err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, - sqpn, - &qp, gfp); + sqpn, &qp); if (err) return ERR_PTR(err); diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index e32dd58937a8..0facaf5f6d23 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -135,14 +135,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; } else { - err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL); + err = mlx4_db_alloc(dev->dev, &srq->db, 0); if (err) goto err_srq; *srq->db.db = 0; - if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf, - GFP_KERNEL)) { + if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, + &srq->buf)) { err = -ENOMEM; goto err_db; } @@ -167,7 +167,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL); + err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf); if (err) goto err_mtt; diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 249a4584401a..d94b3744a5b9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -578,7 +578,7 @@ out: } static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size, - struct mlx4_buf *buf, gfp_t gfp) + struct mlx4_buf *buf) { dma_addr_t t; @@ -587,7 +587,7 @@ static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size, buf->page_shift = get_order(size) + PAGE_SHIFT; buf->direct.buf = dma_zalloc_coherent(&dev->persist->pdev->dev, - size, &t, gfp); + size, &t, GFP_KERNEL); if (!buf->direct.buf) return -ENOMEM; @@ -607,10 +607,10 @@ static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size, * multiple pages, so we don't require too much contiguous memory. */ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf, gfp_t gfp) + struct mlx4_buf *buf) { if (size <= max_direct) { - return mlx4_buf_direct_alloc(dev, size, buf, gfp); + return mlx4_buf_direct_alloc(dev, size, buf); } else { dma_addr_t t; int i; @@ -620,14 +620,14 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->npages = buf->nbufs; buf->page_shift = PAGE_SHIFT; buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), - gfp); + GFP_KERNEL); if (!buf->page_list) return -ENOMEM; for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = dma_zalloc_coherent(&dev->persist->pdev->dev, - PAGE_SIZE, &t, gfp); + PAGE_SIZE, &t, GFP_KERNEL); if (!buf->page_list[i].buf) goto err_free; @@ -663,12 +663,11 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) } EXPORT_SYMBOL_GPL(mlx4_buf_free); -static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device, - gfp_t gfp) +static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) { struct mlx4_db_pgdir *pgdir; - pgdir = kzalloc(sizeof *pgdir, gfp); + pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL); if (!pgdir) return NULL; @@ -676,7 +675,7 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device, pgdir->bits[0] = pgdir->order0; pgdir->bits[1] = pgdir->order1; pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE, - &pgdir->db_dma, gfp); + &pgdir->db_dma, GFP_KERNEL); if (!pgdir->db_page) { kfree(pgdir); return NULL; @@ -716,7 +715,7 @@ found: return 0; } -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp) +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_db_pgdir *pgdir; @@ -728,7 +727,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp if (!mlx4_alloc_db_from_pgdir(pgdir, db, order)) goto out; - pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp); + pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev); if (!pgdir) { ret = -ENOMEM; goto out; @@ -780,13 +779,13 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, { int err; - err = mlx4_db_alloc(dev, &wqres->db, 1, GFP_KERNEL); + err = mlx4_db_alloc(dev, &wqres->db, 1); if (err) return err; *wqres->db.db = 0; - err = mlx4_buf_direct_alloc(dev, size, &wqres->buf, GFP_KERNEL); + err = mlx4_buf_direct_alloc(dev, size, &wqres->buf); if (err) goto err_db; @@ -795,7 +794,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf, GFP_KERNEL); + err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf); if (err) goto err_mtt; diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index fa6d2354a0e9..c56a511b918e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -224,11 +224,11 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn) if (*cqn == -1) return -ENOMEM; - err = mlx4_table_get(dev, &cq_table->table, *cqn, GFP_KERNEL); + err = mlx4_table_get(dev, &cq_table->table, *cqn); if (err) goto err_out; - err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn, GFP_KERNEL); + err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn); if (err) goto err_put; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index e5fb89505a13..436f7689a032 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1042,7 +1042,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, if (!context) return -ENOMEM; - err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL); + err = mlx4_qp_alloc(mdev->dev, qpn, qp); if (err) { en_err(priv, "Failed to allocate qp #%x\n", qpn); goto out; @@ -1086,7 +1086,7 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) en_err(priv, "Failed reserving drop qpn\n"); return err; } - err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL); + err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp); if (err) { en_err(priv, "Failed allocating drop qp\n"); mlx4_qp_release_range(priv->mdev->dev, qpn, 1); @@ -1158,8 +1158,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) } /* Configure RSS indirection qp */ - err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp, - GFP_KERNEL); + err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp); if (err) { en_err(priv, "Failed to allocate RSS indirection QP\n"); goto rss_err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 4f3a9b27ce4a..73faa3d77921 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -111,7 +111,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, goto err_hwq_res; } - err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp, GFP_KERNEL); + err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp); if (err) { en_err(priv, "Failed allocating qp %d\n", ring->qpn); goto err_reserve; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index e1f9e7cebf8f..5a7816e7c7b4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -251,8 +251,7 @@ int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev) MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } -int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, - gfp_t gfp) +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj) { u32 i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size); @@ -266,7 +265,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, } table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT, - (table->lowmem ? gfp : GFP_HIGHUSER) | + (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN, table->coherent); if (!table->icm[i]) { ret = -ENOMEM; @@ -363,7 +362,7 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 i; for (i = start; i <= end; i += inc) { - err = mlx4_table_get(dev, table, i, GFP_KERNEL); + err = mlx4_table_get(dev, table, i); if (err) goto fail; } diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h index 0c7364550150..dee67fa39107 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.h +++ b/drivers/net/ethernet/mellanox/mlx4/icm.h @@ -71,8 +71,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, gfp_t gfp_mask, int coherent); void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent); -int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj, - gfp_t gfp); +int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj); int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 start, u32 end); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 30616cd0140d..706d7f21ac5c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -969,7 +969,7 @@ void mlx4_cleanup_cq_table(struct mlx4_dev *dev); void mlx4_cleanup_qp_table(struct mlx4_dev *dev); void mlx4_cleanup_srq_table(struct mlx4_dev *dev); void mlx4_cleanup_mcg_table(struct mlx4_dev *dev); -int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp); +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn); void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn); int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn); void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn); @@ -977,7 +977,7 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn); void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn); int __mlx4_mpt_reserve(struct mlx4_dev *dev); void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index); -int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp); +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index); void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index); u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order); void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index ce852ca22a96..24282cd017d3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -479,14 +479,14 @@ static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index) __mlx4_mpt_release(dev, index); } -int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; - return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp); + return mlx4_table_get(dev, &mr_table->dmpt_table, index); } -static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) +static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) { u64 param = 0; @@ -497,7 +497,7 @@ static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp) MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_mpt_alloc_icm(dev, index, gfp); + return __mlx4_mpt_alloc_icm(dev, index); } void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) @@ -629,7 +629,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key)); if (err) return err; @@ -787,14 +787,13 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, EXPORT_SYMBOL_GPL(mlx4_write_mtt); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, - struct mlx4_buf *buf, gfp_t gfp) + struct mlx4_buf *buf) { u64 *page_list; int err; int i; - page_list = kmalloc(buf->npages * sizeof *page_list, - gfp); + page_list = kcalloc(buf->npages, sizeof(*page_list), GFP_KERNEL); if (!page_list) return -ENOMEM; @@ -841,7 +840,7 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key)); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 5a310d313e94..26747212526b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -301,29 +301,29 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) } EXPORT_SYMBOL_GPL(mlx4_qp_release_range); -int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp) +int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; int err; - err = mlx4_table_get(dev, &qp_table->qp_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->qp_table, qpn); if (err) goto err_out; - err = mlx4_table_get(dev, &qp_table->auxc_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->auxc_table, qpn); if (err) goto err_put_qp; - err = mlx4_table_get(dev, &qp_table->altc_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->altc_table, qpn); if (err) goto err_put_auxc; - err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn); if (err) goto err_put_altc; - err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn, gfp); + err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn); if (err) goto err_put_rdmarc; @@ -345,7 +345,7 @@ err_out: return err; } -static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp) +static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn) { u64 param = 0; @@ -355,7 +355,7 @@ static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp) MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_qp_alloc_icm(dev, qpn, gfp); + return __mlx4_qp_alloc_icm(dev, qpn); } void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) @@ -397,7 +397,7 @@ struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) return qp; } -int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp) +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; @@ -408,7 +408,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp) qp->qpn = qpn; - err = mlx4_qp_alloc_icm(dev, qpn, gfp); + err = mlx4_qp_alloc_icm(dev, qpn); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 812783865205..215e21c3dc8a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1822,7 +1822,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, return err; if (!fw_reserved(dev, qpn)) { - err = __mlx4_qp_alloc_icm(dev, qpn, GFP_KERNEL); + err = __mlx4_qp_alloc_icm(dev, qpn); if (err) { res_abort_move(dev, slave, RES_QP, qpn); return err; @@ -1909,7 +1909,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - err = __mlx4_mpt_alloc_icm(dev, mpt->key, GFP_KERNEL); + err = __mlx4_mpt_alloc_icm(dev, mpt->key); if (err) { res_abort_move(dev, slave, RES_MPT, id); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index f44d089e2ca6..bedf52126824 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -100,11 +100,11 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn) if (*srqn == -1) return -ENOMEM; - err = mlx4_table_get(dev, &srq_table->table, *srqn, GFP_KERNEL); + err = mlx4_table_get(dev, &srq_table->table, *srqn); if (err) goto err_out; - err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn, GFP_KERNEL); + err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn); if (err) goto err_put; return 0; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d5bed0875d30..aad5d81dfb44 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1068,7 +1068,7 @@ static inline int mlx4_is_eth(struct mlx4_dev *dev, int port) } int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf, gfp_t gfp); + struct mlx4_buf *buf); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf); static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset) { @@ -1105,10 +1105,9 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, - struct mlx4_buf *buf, gfp_t gfp); + struct mlx4_buf *buf); -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, - gfp_t gfp); +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order); void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, @@ -1124,8 +1123,7 @@ int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base, u8 flags); void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); -int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, - gfp_t gfp); +int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp); int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn, -- cgit v1.2.3-71-gd317 From 7855f5842741e5835b9be073079780c444dca898 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 23 May 2017 14:38:16 +0300 Subject: IB/core: Remove NOIO QP create flag There are no users for IB_QP_CREATE_USE_GFP_NOIO flag, so let's remove it. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_verbs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6e62c9e2c971..b5732432bb29 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1056,7 +1056,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_MANAGED_RECV = 1 << 4, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_SIGNATURE_EN = 1 << 6, - IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, + /* FREE = 1 << 7, */ IB_QP_CREATE_SCATTER_FCS = 1 << 8, IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9, /* reserve bits 26-31 for low level drivers' internal use */ -- cgit v1.2.3-71-gd317 From 8bd226f9a7dc18740a916dcba3112f2bfc3ad9e8 Mon Sep 17 00:00:00 2001 From: Ruslan Bilovol Date: Sun, 25 Jun 2017 16:23:45 +0300 Subject: include: usb: audio: specify exact endiannes of descriptors USB spec says that multiple byte fields are stored in little-endian order (see chapter 8.1 of USB2.0 spec and chapter 7.1 of USB3.0 spec), thus mark such fields as LE for UAC1 and UAC2 headers Signed-off-by: Ruslan Bilovol Signed-off-by: Felipe Balbi --- include/linux/usb/audio-v2.h | 14 +++++++------- include/uapi/linux/usb/audio.h | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h index c5f2158ab00e..fd73bc0e9027 100644 --- a/include/linux/usb/audio-v2.h +++ b/include/linux/usb/audio-v2.h @@ -115,13 +115,13 @@ struct uac2_input_terminal_descriptor { __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bTerminalID; - __u16 wTerminalType; + __le16 wTerminalType; __u8 bAssocTerminal; __u8 bCSourceID; __u8 bNrChannels; - __u32 bmChannelConfig; + __le32 bmChannelConfig; __u8 iChannelNames; - __u16 bmControls; + __le16 bmControls; __u8 iTerminal; } __attribute__((packed)); @@ -132,11 +132,11 @@ struct uac2_output_terminal_descriptor { __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bTerminalID; - __u16 wTerminalType; + __le16 wTerminalType; __u8 bAssocTerminal; __u8 bSourceID; __u8 bCSourceID; - __u16 bmControls; + __le16 bmControls; __u8 iTerminal; } __attribute__((packed)); @@ -164,9 +164,9 @@ struct uac2_as_header_descriptor { __u8 bTerminalLink; __u8 bmControls; __u8 bFormatType; - __u32 bmFormats; + __le32 bmFormats; __u8 bNrChannels; - __u32 bmChannelConfig; + __le32 bmChannelConfig; __u8 iChannelNames; } __attribute__((packed)); diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h index d2314be4f0c0..a4680a5bf5dd 100644 --- a/include/uapi/linux/usb/audio.h +++ b/include/uapi/linux/usb/audio.h @@ -333,7 +333,7 @@ struct uac_processing_unit_descriptor { __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bUnitID; - __u16 wProcessType; + __le16 wProcessType; __u8 bNrInPins; __u8 baSourceID[]; } __attribute__ ((packed)); @@ -491,8 +491,8 @@ struct uac_format_type_ii_ext_descriptor { __u8 bDescriptorType; __u8 bDescriptorSubtype; __u8 bFormatType; - __u16 wMaxBitRate; - __u16 wSamplesPerFrame; + __le16 wMaxBitRate; + __le16 wSamplesPerFrame; __u8 bHeaderLength; __u8 bSideBandProtocol; } __attribute__((packed)); -- cgit v1.2.3-71-gd317 From beaec533fc2701a28a4d667f67c9f59c6e4e0d13 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 19 Jul 2017 20:27:30 +0200 Subject: llist: clang: introduce member_address_is_nonnull() Currently llist_for_each_entry() and llist_for_each_entry_safe() iterate until &pos->member != NULL. But when building the kernel with Clang, the compiler assumes &pos->member cannot be NULL if the member's offset is greater than 0 (which would be equivalent to the object being non-contiguous in memory). Therefore the loop condition is always true, and the loops become infinite. To work around this, introduce the member_address_is_nonnull() macro, which casts object pointer to uintptr_t, thus letting the member pointer to be NULL. Signed-off-by: Alexander Potapenko Tested-by: Sodagudi Prasad Signed-off-by: Linus Torvalds --- include/linux/llist.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/llist.h b/include/linux/llist.h index d11738110a7a..1957635e6d5f 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -92,6 +92,23 @@ static inline void init_llist_head(struct llist_head *list) #define llist_entry(ptr, type, member) \ container_of(ptr, type, member) +/** + * member_address_is_nonnull - check whether the member address is not NULL + * @ptr: the object pointer (struct type * that contains the llist_node) + * @member: the name of the llist_node within the struct. + * + * This macro is conceptually the same as + * &ptr->member != NULL + * but it works around the fact that compilers can decide that taking a member + * address is never a NULL pointer. + * + * Real objects that start at a high address and have a member at NULL are + * unlikely to exist, but such pointers may be returned e.g. by the + * container_of() macro. + */ +#define member_address_is_nonnull(ptr, member) \ + ((uintptr_t)(ptr) + offsetof(typeof(*(ptr)), member) != 0) + /** * llist_for_each - iterate over some deleted entries of a lock-less list * @pos: the &struct llist_node to use as a loop cursor @@ -145,7 +162,7 @@ static inline void init_llist_head(struct llist_head *list) */ #define llist_for_each_entry(pos, node, member) \ for ((pos) = llist_entry((node), typeof(*(pos)), member); \ - &(pos)->member != NULL; \ + member_address_is_nonnull(pos, member); \ (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) /** @@ -167,7 +184,7 @@ static inline void init_llist_head(struct llist_head *list) */ #define llist_for_each_entry_safe(pos, n, node, member) \ for (pos = llist_entry((node), typeof(*pos), member); \ - &pos->member != NULL && \ + member_address_is_nonnull(pos, member) && \ (n = llist_entry(pos->member.next, typeof(*n), member), true); \ pos = n) -- cgit v1.2.3-71-gd317 From f86f418059b94aa01f9342611a272ca60c583e89 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 7 Jun 2017 16:12:51 +0800 Subject: trace: fix the errors caused by incompatible type of RCU variables The variables which are processed by RCU functions should be annotated as RCU, otherwise sparse will report the errors like below: "error: incompatible types in comparison expression (different address spaces)" Link: http://lkml.kernel.org/r/1496823171-7758-1-git-send-email-zhang.chunyan@linaro.org Signed-off-by: Chunyan Zhang [ Updated to not be 100% 80 column strict ] Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 6 +++--- include/linux/trace_events.h | 2 +- kernel/trace/ftrace.c | 41 +++++++++++++++++++++++++++-------------- kernel/trace/trace.h | 6 +++--- 4 files changed, 34 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 5857390ac35a..6383115e9d2c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -145,8 +145,8 @@ enum { #ifdef CONFIG_DYNAMIC_FTRACE /* The hash used to know what functions callbacks trace */ struct ftrace_ops_hash { - struct ftrace_hash *notrace_hash; - struct ftrace_hash *filter_hash; + struct ftrace_hash __rcu *notrace_hash; + struct ftrace_hash __rcu *filter_hash; struct mutex regex_lock; }; @@ -168,7 +168,7 @@ static inline void ftrace_free_init_mem(void) { } */ struct ftrace_ops { ftrace_func_t func; - struct ftrace_ops *next; + struct ftrace_ops __rcu *next; unsigned long flags; void *private; ftrace_func_t saved_func; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index f73cedfa2e0b..536c80ff7ad9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -338,7 +338,7 @@ enum { struct trace_event_file { struct list_head list; struct trace_event_call *event_call; - struct event_filter *filter; + struct event_filter __rcu *filter; struct dentry *dir; struct trace_array *tr; struct trace_subsystem_dir *system; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 53f6b6401cf0..02004ae91860 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -113,7 +113,7 @@ static int ftrace_disabled __read_mostly; static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; +static struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; @@ -169,8 +169,11 @@ int ftrace_nr_registered_ops(void) mutex_lock(&ftrace_lock); - for (ops = ftrace_ops_list; - ops != &ftrace_list_end; ops = ops->next) + for (ops = rcu_dereference_protected(ftrace_ops_list, + lockdep_is_held(&ftrace_lock)); + ops != &ftrace_list_end; + ops = rcu_dereference_protected(ops->next, + lockdep_is_held(&ftrace_lock))) cnt++; mutex_unlock(&ftrace_lock); @@ -275,10 +278,11 @@ static void update_ftrace_function(void) * If there's only one ftrace_ops registered, the ftrace_ops_list * will point to the ops we want. */ - set_function_trace_op = ftrace_ops_list; + set_function_trace_op = rcu_dereference_protected(ftrace_ops_list, + lockdep_is_held(&ftrace_lock)); /* If there's no ftrace_ops registered, just call the stub function */ - if (ftrace_ops_list == &ftrace_list_end) { + if (set_function_trace_op == &ftrace_list_end) { func = ftrace_stub; /* @@ -286,7 +290,8 @@ static void update_ftrace_function(void) * recursion safe and not dynamic and the arch supports passing ops, * then have the mcount trampoline call the function directly. */ - } else if (ftrace_ops_list->next == &ftrace_list_end) { + } else if (rcu_dereference_protected(ftrace_ops_list->next, + lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) { func = ftrace_ops_get_list_func(ftrace_ops_list); } else { @@ -348,9 +353,11 @@ int using_ftrace_ops_list_func(void) return ftrace_trace_function == ftrace_ops_list_func; } -static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) +static void add_ftrace_ops(struct ftrace_ops __rcu **list, + struct ftrace_ops *ops) { - ops->next = *list; + rcu_assign_pointer(ops->next, *list); + /* * We are entering ops into the list but another * CPU might be walking that list. We need to make sure @@ -360,7 +367,8 @@ static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) rcu_assign_pointer(*list, ops); } -static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) +static int remove_ftrace_ops(struct ftrace_ops __rcu **list, + struct ftrace_ops *ops) { struct ftrace_ops **p; @@ -368,7 +376,10 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) * If we are removing the last function, then simply point * to the ftrace_stub. */ - if (*list == ops && ops->next == &ftrace_list_end) { + if (rcu_dereference_protected(*list, + lockdep_is_held(&ftrace_lock)) == ops && + rcu_dereference_protected(ops->next, + lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) { *list = &ftrace_list_end; return 0; } @@ -1569,8 +1580,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) return 0; #endif - hash.filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash); - hash.notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash); + rcu_assign_pointer(hash.filter_hash, ops->func_hash->filter_hash); + rcu_assign_pointer(hash.notrace_hash, ops->func_hash->notrace_hash); if (hash_contains_ip(ip, &hash)) ret = 1; @@ -2840,7 +2851,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * If there's no more ops registered with ftrace, run a * sanity check to make sure all rec flags are cleared. */ - if (ftrace_ops_list == &ftrace_list_end) { + if (rcu_dereference_protected(ftrace_ops_list, + lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) { struct ftrace_page *pg; struct dyn_ftrace *rec; @@ -6453,7 +6465,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, if (ftrace_enabled) { /* we are starting ftrace again */ - if (ftrace_ops_list != &ftrace_list_end) + if (rcu_dereference_protected(ftrace_ops_list, + lockdep_is_held(&ftrace_lock)) != &ftrace_list_end) update_ftrace_function(); ftrace_startup_sysctl(); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6ade1c55cc3a..490ba229931d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1210,9 +1210,9 @@ struct ftrace_event_field { struct event_filter { int n_preds; /* Number assigned */ int a_preds; /* allocated */ - struct filter_pred *preds; - struct filter_pred *root; - char *filter_string; + struct filter_pred __rcu *preds; + struct filter_pred __rcu *root; + char *filter_string; }; struct event_subsystem { -- cgit v1.2.3-71-gd317 From dc1a0afbacaeaced8f5679a99047c0467f1099e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Jul 2017 11:12:09 +0200 Subject: nvme: fix byte swapping in the streams code Signed-off-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- include/linux/nvme.h | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index cb96f4a7ae3a..3b77cfe5aa1e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -336,7 +336,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl, c.directive.opcode = nvme_admin_directive_recv; c.directive.nsid = cpu_to_le32(nsid); - c.directive.numd = sizeof(*s); + c.directive.numd = cpu_to_le32(sizeof(*s)); c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM; c.directive.dtype = NVME_DIR_STREAMS; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 6b8ee9e628e1..bc74da018bdc 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -963,14 +963,14 @@ struct nvme_dbbuf { }; struct streams_directive_params { - __u16 msl; - __u16 nssa; - __u16 nsso; + __le16 msl; + __le16 nssa; + __le16 nsso; __u8 rsvd[10]; - __u32 sws; - __u16 sgs; - __u16 nsa; - __u16 nso; + __le32 sws; + __le16 sgs; + __le16 nsa; + __le16 nso; __u8 rsvd2[6]; }; -- cgit v1.2.3-71-gd317 From a25ce4270bfdd522207b02f81a594c7d1746b697 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Sat, 17 Jun 2017 10:37:26 -0700 Subject: IB/rdmavt: Setting of QP timeout can overflow jiffies computation Current computation of qp->timeout_jiffies in rvt_modify_qp() will cause overflow due to the fact that the input to the function usecs_to_jiffies is only 32-bit ( unsigned int). Overflow will occur when attr->timeout is equal to or greater than 30. The consequence is unnecessarily excessive retry and thus degradation of the system performance. This patch fixes the problem by limiting the input to 5-bit and calling usecs_to_jiffies() before multiplying the scaling factor. Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 4 +--- include/rdma/rdmavt_qp.h | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 459865439a0b..8876ee7bc326 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1258,9 +1258,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_TIMEOUT) { qp->timeout = attr->timeout; - qp->timeout_jiffies = - usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / - 1000UL); + qp->timeout_jiffies = rvt_timeout_to_jiffies(qp->timeout); } if (attr_mask & IB_QP_QKEY) diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index be6472e5b06b..d664d2e76280 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -647,6 +647,20 @@ static inline u32 rvt_div_mtu(struct rvt_qp *qp, u32 len) return len >> qp->log_pmtu; } +/** + * rvt_timeout_to_jiffies - Convert a ULP timeout input into jiffies + * @timeout - timeout input(0 - 31). + * + * Return a timeout value in jiffies. + */ +static inline unsigned long rvt_timeout_to_jiffies(u8 timeout) +{ + if (timeout > 31) + timeout = 31; + + return usecs_to_jiffies(1U << timeout) * 4096UL / 1000UL; +} + extern const int ib_rvt_state_ops[]; struct rvt_dev_info; -- cgit v1.2.3-71-gd317 From 963916fdb3e5ad4af57ac959b5a03bf23f7568ca Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Thu, 6 Jul 2017 23:22:11 +0300 Subject: IB/cma: Fix reference count leak when no ipv4 addresses are set Once in_dev_get is called to receive in_device pointer, the in_device reference counter is increased, but if there are no ipv4 addresses configured on the net-device the ifa_list will be null, resulting in a flow that doesn't call in_dev_put to decrease the ref_cnt. This was exposed when running RoCE over ipv6 without any ipv4 addresses configured Fixes: commit 8e3867310c90 ("IB/cma: Fix a race condition in iboe_addr_get_sgid()") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: Doug Ledford --- include/rdma/ib_addr.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 4b34c51f859e..b73a14edc85e 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -205,11 +205,13 @@ static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); if (dev) { ip4 = in_dev_get(dev); - if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) { + if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address, (struct in6_addr *)gid); + + if (ip4) in_dev_put(ip4); - } + dev_put(dev); } } -- cgit v1.2.3-71-gd317 From 4cabc5b186b5427b9ee5a7495172542af105f02b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Jul 2017 00:00:21 +0200 Subject: bpf: fix mixed signed/unsigned derived min/max value bounds Edward reported that there's an issue in min/max value bounds tracking when signed and unsigned compares both provide hints on limits when having unknown variables. E.g. a program such as the following should have been rejected: 0: (7a) *(u64 *)(r10 -8) = 0 1: (bf) r2 = r10 2: (07) r2 += -8 3: (18) r1 = 0xffff8a94cda93400 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+7 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp 7: (7a) *(u64 *)(r10 -16) = -8 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = -1 10: (2d) if r1 > r2 goto pc+3 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0 R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 11: (65) if r1 s> 0x1 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=1 R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 12: (0f) r0 += r1 13: (72) *(u8 *)(r0 +0) = 0 R0=map_value_adj(ks=8,vs=8,id=0),min_value=0,max_value=1 R1=inv,min_value=0,max_value=1 R2=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 14: (b7) r0 = 0 15: (95) exit What happens is that in the first part ... 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = -1 10: (2d) if r1 > r2 goto pc+3 ... r1 carries an unsigned value, and is compared as unsigned against a register carrying an immediate. Verifier deduces in reg_set_min_max() that since the compare is unsigned and operation is greater than (>), that in the fall-through/false case, r1's minimum bound must be 0 and maximum bound must be r2. Latter is larger than the bound and thus max value is reset back to being 'invalid' aka BPF_REGISTER_MAX_RANGE. Thus, r1 state is now 'R1=inv,min_value=0'. The subsequent test ... 11: (65) if r1 s> 0x1 goto pc+2 ... is a signed compare of r1 with immediate value 1. Here, verifier deduces in reg_set_min_max() that since the compare is signed this time and operation is greater than (>), that in the fall-through/false case, we can deduce that r1's maximum bound must be 1, meaning with prior test, we result in r1 having the following state: R1=inv,min_value=0,max_value=1. Given that the actual value this holds is -8, the bounds are wrongly deduced. When this is being added to r0 which holds the map_value(_adj) type, then subsequent store access in above case will go through check_mem_access() which invokes check_map_access_adj(), that will then probe whether the map memory is in bounds based on the min_value and max_value as well as access size since the actual unknown value is min_value <= x <= max_value; commit fce366a9dd0d ("bpf, verifier: fix alu ops against map_value{, _adj} register types") provides some more explanation on the semantics. It's worth to note in this context that in the current code, min_value and max_value tracking are used for two things, i) dynamic map value access via check_map_access_adj() and since commit 06c1c049721a ("bpf: allow helpers access to variable memory") ii) also enforced at check_helper_mem_access() when passing a memory address (pointer to packet, map value, stack) and length pair to a helper and the length in this case is an unknown value defining an access range through min_value/max_value in that case. The min_value/max_value tracking is /not/ used in the direct packet access case to track ranges. However, the issue also affects case ii), for example, the following crafted program based on the same principle must be rejected as well: 0: (b7) r2 = 0 1: (bf) r3 = r10 2: (07) r3 += -512 3: (7a) *(u64 *)(r10 -16) = -8 4: (79) r4 = *(u64 *)(r10 -16) 5: (b7) r6 = -1 6: (2d) if r4 > r6 goto pc+5 R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 R4=inv,min_value=0 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 7: (65) if r4 s> 0x1 goto pc+4 R1=ctx R2=imm0,min_value=0,max_value=0,min_align=2147483648 R3=fp-512 R4=inv,min_value=0,max_value=1 R6=imm-1,max_value=18446744073709551615,min_align=1 R10=fp 8: (07) r4 += 1 9: (b7) r5 = 0 10: (6a) *(u16 *)(r10 -512) = 0 11: (85) call bpf_skb_load_bytes#26 12: (b7) r0 = 0 13: (95) exit Meaning, while we initialize the max_value stack slot that the verifier thinks we access in the [1,2] range, in reality we pass -7 as length which is interpreted as u32 in the helper. Thus, this issue is relevant also for the case of helper ranges. Resetting both bounds in check_reg_overflow() in case only one of them exceeds limits is also not enough as similar test can be created that uses values which are within range, thus also here learned min value in r1 is incorrect when mixed with later signed test to create a range: 0: (7a) *(u64 *)(r10 -8) = 0 1: (bf) r2 = r10 2: (07) r2 += -8 3: (18) r1 = 0xffff880ad081fa00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+7 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp 7: (7a) *(u64 *)(r10 -16) = -8 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = 2 10: (3d) if r2 >= r1 goto pc+3 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 11: (65) if r1 s> 0x4 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 12: (0f) r0 += r1 13: (72) *(u8 *)(r0 +0) = 0 R0=map_value_adj(ks=8,vs=8,id=0),min_value=3,max_value=4 R1=inv,min_value=3,max_value=4 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 14: (b7) r0 = 0 15: (95) exit This leaves us with two options for fixing this: i) to invalidate all prior learned information once we switch signed context, ii) to track min/max signed and unsigned boundaries separately as done in [0]. (Given latter introduces major changes throughout the whole verifier, it's rather net-next material, thus this patch follows option i), meaning we can derive bounds either from only signed tests or only unsigned tests.) There is still the case of adjust_reg_min_max_vals(), where we adjust bounds on ALU operations, meaning programs like the following where boundaries on the reg get mixed in context later on when bounds are merged on the dst reg must get rejected, too: 0: (7a) *(u64 *)(r10 -8) = 0 1: (bf) r2 = r10 2: (07) r2 += -8 3: (18) r1 = 0xffff89b2bf87ce00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+6 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R10=fp 7: (7a) *(u64 *)(r10 -16) = -8 8: (79) r1 = *(u64 *)(r10 -16) 9: (b7) r2 = 2 10: (3d) if r2 >= r1 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R10=fp 11: (b7) r7 = 1 12: (65) if r7 s> 0x0 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,max_value=0 R10=fp 13: (b7) r0 = 0 14: (95) exit from 12 to 15: R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=imm1,min_value=1 R10=fp 15: (0f) r7 += r1 16: (65) if r7 s> 0x4 goto pc+2 R0=map_value(ks=8,vs=8,id=0),min_value=0,max_value=0 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp 17: (0f) r0 += r7 18: (72) *(u8 *)(r0 +0) = 0 R0=map_value_adj(ks=8,vs=8,id=0),min_value=4,max_value=4 R1=inv,min_value=3 R2=imm2,min_value=2,max_value=2,min_align=2 R7=inv,min_value=4,max_value=4 R10=fp 19: (b7) r0 = 0 20: (95) exit Meaning, in adjust_reg_min_max_vals() we must also reset range values on the dst when src/dst registers have mixed signed/ unsigned derived min/max value bounds with one unbounded value as otherwise they can be added together deducing false boundaries. Once both boundaries are established from either ALU ops or compare operations w/o mixing signed/unsigned insns, then they can safely be added to other regs also having both boundaries established. Adding regs with one unbounded side to a map value where the bounded side has been learned w/o mixing ops is possible, but the resulting map value won't recover from that, meaning such op is considered invalid on the time of actual access. Invalid bounds are set on the dst reg in case i) src reg, or ii) in case dst reg already had them. The only way to recover would be to perform i) ALU ops but only 'add' is allowed on map value types or ii) comparisons, but these are disallowed on pointers in case they span a range. This is fine as only BPF_JEQ and BPF_JNE may be performed on PTR_TO_MAP_VALUE_OR_NULL registers which potentially turn them into PTR_TO_MAP_VALUE type depending on the branch, so only here min/max value cannot be invalidated for them. In terms of state pruning, value_from_signed is considered as well in states_equal() when dealing with adjusted map values. With regards to breaking existing programs, there is a small risk, but use-cases are rather quite narrow where this could occur and mixing compares probably unlikely. Joint work with Josef and Edward. [0] https://lists.iovisor.org/pipermail/iovisor-dev/2017-June/000822.html Fixes: 484611357c19 ("bpf: allow access into map value arrays") Reported-by: Edward Cree Signed-off-by: Daniel Borkmann Signed-off-by: Edward Cree Signed-off-by: Josef Bacik Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 108 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 95 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 621076f56251..8e5d31f6faef 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -43,6 +43,7 @@ struct bpf_reg_state { u32 min_align; u32 aux_off; u32 aux_off_align; + bool value_from_signed; }; enum bpf_stack_slot_type { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6a86723c5b64..af9e84a4944e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -504,6 +504,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno) { regs[regno].min_value = BPF_REGISTER_MIN_RANGE; regs[regno].max_value = BPF_REGISTER_MAX_RANGE; + regs[regno].value_from_signed = false; regs[regno].min_align = 0; } @@ -777,12 +778,13 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, return -EACCES; } -static bool is_pointer_value(struct bpf_verifier_env *env, int regno) +static bool __is_pointer_value(bool allow_ptr_leaks, + const struct bpf_reg_state *reg) { - if (env->allow_ptr_leaks) + if (allow_ptr_leaks) return false; - switch (env->cur_state.regs[regno].type) { + switch (reg->type) { case UNKNOWN_VALUE: case CONST_IMM: return false; @@ -791,6 +793,11 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) } } +static bool is_pointer_value(struct bpf_verifier_env *env, int regno) +{ + return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); +} + static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, int off, int size, bool strict) { @@ -1832,10 +1839,24 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, dst_align = dst_reg->min_align; /* We don't know anything about what was done to this register, mark it - * as unknown. + * as unknown. Also, if both derived bounds came from signed/unsigned + * mixed compares and one side is unbounded, we cannot really do anything + * with them as boundaries cannot be trusted. Thus, arithmetic of two + * regs of such kind will get invalidated bounds on the dst side. */ - if (min_val == BPF_REGISTER_MIN_RANGE && - max_val == BPF_REGISTER_MAX_RANGE) { + if ((min_val == BPF_REGISTER_MIN_RANGE && + max_val == BPF_REGISTER_MAX_RANGE) || + (BPF_SRC(insn->code) == BPF_X && + ((min_val != BPF_REGISTER_MIN_RANGE && + max_val == BPF_REGISTER_MAX_RANGE) || + (min_val == BPF_REGISTER_MIN_RANGE && + max_val != BPF_REGISTER_MAX_RANGE) || + (dst_reg->min_value != BPF_REGISTER_MIN_RANGE && + dst_reg->max_value == BPF_REGISTER_MAX_RANGE) || + (dst_reg->min_value == BPF_REGISTER_MIN_RANGE && + dst_reg->max_value != BPF_REGISTER_MAX_RANGE)) && + regs[insn->dst_reg].value_from_signed != + regs[insn->src_reg].value_from_signed)) { reset_reg_range_values(regs, insn->dst_reg); return; } @@ -2023,6 +2044,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) regs[insn->dst_reg].max_value = insn->imm; regs[insn->dst_reg].min_value = insn->imm; regs[insn->dst_reg].min_align = calc_align(insn->imm); + regs[insn->dst_reg].value_from_signed = false; } } else if (opcode > BPF_END) { @@ -2198,40 +2220,63 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u8 opcode) { + bool value_from_signed = true; + bool is_range = true; + switch (opcode) { case BPF_JEQ: /* If this is false then we know nothing Jon Snow, but if it is * true then we know for sure. */ true_reg->max_value = true_reg->min_value = val; + is_range = false; break; case BPF_JNE: /* If this is true we know nothing Jon Snow, but if it is false * we know the value for sure; */ false_reg->max_value = false_reg->min_value = val; + is_range = false; break; case BPF_JGT: - /* Unsigned comparison, the minimum value is 0. */ - false_reg->min_value = 0; + value_from_signed = false; /* fallthrough */ case BPF_JSGT: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGT) { + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + } /* If this is false then we know the maximum val is val, * otherwise we know the min val is val+1. */ false_reg->max_value = val; + false_reg->value_from_signed = value_from_signed; true_reg->min_value = val + 1; + true_reg->value_from_signed = value_from_signed; break; case BPF_JGE: - /* Unsigned comparison, the minimum value is 0. */ - false_reg->min_value = 0; + value_from_signed = false; /* fallthrough */ case BPF_JSGE: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGE) { + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + } /* If this is false then we know the maximum value is val - 1, * otherwise we know the mimimum value is val. */ false_reg->max_value = val - 1; + false_reg->value_from_signed = value_from_signed; true_reg->min_value = val; + true_reg->value_from_signed = value_from_signed; break; default: break; @@ -2239,6 +2284,12 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, check_reg_overflow(false_reg); check_reg_overflow(true_reg); + if (is_range) { + if (__is_pointer_value(false, false_reg)) + reset_reg_range_values(false_reg, 0); + if (__is_pointer_value(false, true_reg)) + reset_reg_range_values(true_reg, 0); + } } /* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg @@ -2248,41 +2299,64 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u8 opcode) { + bool value_from_signed = true; + bool is_range = true; + switch (opcode) { case BPF_JEQ: /* If this is false then we know nothing Jon Snow, but if it is * true then we know for sure. */ true_reg->max_value = true_reg->min_value = val; + is_range = false; break; case BPF_JNE: /* If this is true we know nothing Jon Snow, but if it is false * we know the value for sure; */ false_reg->max_value = false_reg->min_value = val; + is_range = false; break; case BPF_JGT: - /* Unsigned comparison, the minimum value is 0. */ - true_reg->min_value = 0; + value_from_signed = false; /* fallthrough */ case BPF_JSGT: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGT) { + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + } /* * If this is false, then the val is <= the register, if it is * true the register <= to the val. */ false_reg->min_value = val; + false_reg->value_from_signed = value_from_signed; true_reg->max_value = val - 1; + true_reg->value_from_signed = value_from_signed; break; case BPF_JGE: - /* Unsigned comparison, the minimum value is 0. */ - true_reg->min_value = 0; + value_from_signed = false; /* fallthrough */ case BPF_JSGE: + if (true_reg->value_from_signed != value_from_signed) + reset_reg_range_values(true_reg, 0); + if (false_reg->value_from_signed != value_from_signed) + reset_reg_range_values(false_reg, 0); + if (opcode == BPF_JGE) { + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + } /* If this is false then constant < register, if it is true then * the register < constant. */ false_reg->min_value = val + 1; + false_reg->value_from_signed = value_from_signed; true_reg->max_value = val; + true_reg->value_from_signed = value_from_signed; break; default: break; @@ -2290,6 +2364,12 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, check_reg_overflow(false_reg); check_reg_overflow(true_reg); + if (is_range) { + if (__is_pointer_value(false, false_reg)) + reset_reg_range_values(false_reg, 0); + if (__is_pointer_value(false, true_reg)) + reset_reg_range_values(true_reg, 0); + } } static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, -- cgit v1.2.3-71-gd317 From bd8b2441742b49c76bec707757bd9c028ea9838e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Jul 2017 17:54:34 -0400 Subject: NFS: Store the raw NFS access mask in the inode's access cache Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 9 ++++++--- include/linux/nfs_fs.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 24b3a6748062..8fae8b00b8f5 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2399,7 +2399,7 @@ nfs_access_calc_mask(u32 access_result) void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result) { - entry->mask = nfs_access_calc_mask(access_result); + entry->mask = access_result; } EXPORT_SYMBOL_GPL(nfs_access_set_mask); @@ -2407,6 +2407,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) { struct nfs_access_entry cache; bool may_block = (mask & MAY_NOT_BLOCK) == 0; + int cache_mask; int status; trace_nfs_access_enter(inode); @@ -2422,7 +2423,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) goto out; /* Be clever: ask server to check for all possible rights */ - cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE + | NFS_MAY_WRITE | NFS_MAY_READ; cache.cred = cred; cache.jiffies = jiffies; status = NFS_PROTO(inode)->access(inode, &cache); @@ -2436,7 +2438,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) } nfs_access_add_cache(inode, &cache); out_cached: - if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) + cache_mask = nfs_access_calc_mask(cache.mask); + if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) status = -EACCES; out: trace_nfs_access_exit(inode, status); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index e52cc55ac300..5cc91d6381a3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -51,7 +51,7 @@ struct nfs_access_entry { struct list_head lru; unsigned long jiffies; struct rpc_cred * cred; - int mask; + __u32 mask; struct rcu_head rcu_head; }; -- cgit v1.2.3-71-gd317 From 96edd61dcf44362d3ef0bed1a5361e0ac7886a63 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 10 Jul 2017 10:10:45 +0200 Subject: xen/balloon: don't online new memory initially When setting up the Xenstore watch for the memory target size the new watch will fire at once. Don't try to reach the configured target size by onlining new memory in this case, as the current memory size will be smaller in almost all cases due to e.g. BIOS reserved pages. Onlining new memory will lead to more problems e.g. undesired conflicts with NVMe devices meant to be operated as block devices. Instead remember the difference between target size and current size when the watch fires for the first time and apply it to any further size changes, too. In order to avoid races between balloon.c and xen-balloon.c init calls do the xen-balloon.c initialization from balloon.c. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/balloon.c | 3 +++ drivers/xen/xen-balloon.c | 22 ++++++++++++---------- include/xen/balloon.h | 8 ++++++++ 3 files changed, 23 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 50dcb68d8070..ab609255a0f3 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -780,6 +780,9 @@ static int __init balloon_init(void) } #endif + /* Init the xen-balloon driver. */ + xen_balloon_init(); + return 0; } subsys_initcall(balloon_init); diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index e7715cb62eef..e89136ab851e 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -59,6 +59,8 @@ static void watch_target(struct xenbus_watch *watch, { unsigned long long new_target; int err; + static bool watch_fired; + static long target_diff; err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); if (err != 1) { @@ -69,7 +71,14 @@ static void watch_target(struct xenbus_watch *watch, /* The given memory/target value is in KiB, so it needs converting to * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. */ - balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); + new_target >>= PAGE_SHIFT - 10; + if (watch_fired) { + balloon_set_new_target(new_target - target_diff); + return; + } + + watch_fired = true; + target_diff = new_target - balloon_stats.target_pages; } static struct xenbus_watch target_watch = { .node = "memory/target", @@ -94,22 +103,15 @@ static struct notifier_block xenstore_notifier = { .notifier_call = balloon_init_watcher, }; -static int __init balloon_init(void) +void xen_balloon_init(void) { - if (!xen_domain()) - return -ENODEV; - - pr_info("Initialising balloon driver\n"); - register_balloon(&balloon_dev); register_xen_selfballooning(&balloon_dev); register_xenstore_notifier(&xenstore_notifier); - - return 0; } -subsys_initcall(balloon_init); +EXPORT_SYMBOL_GPL(xen_balloon_init); #define BALLOON_SHOW(name, format, args...) \ static ssize_t show_##name(struct device *dev, \ diff --git a/include/xen/balloon.h b/include/xen/balloon.h index d1767dfb0d95..8906361bb50c 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -35,3 +35,11 @@ static inline int register_xen_selfballooning(struct device *dev) return -ENOSYS; } #endif + +#ifdef CONFIG_XEN_BALLOON +void xen_balloon_init(void); +#else +static inline void xen_balloon_init(void) +{ +} +#endif -- cgit v1.2.3-71-gd317