From 25559c22cef879c5cf7119540bfe21fb379d29f3 Mon Sep 17 00:00:00 2001
From: Jens Wiklander <jens.wiklander@linaro.org>
Date: Mon, 9 Jul 2018 08:15:49 +0200
Subject: tee: add kernel internal client interface

Adds a kernel internal TEE client interface to be used by other drivers.

Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Tested-by: Sumit Garg <sumit.garg@linaro.org>
Tested-by: Zeng Tao <prime.zeng@hisilicon.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/linux/tee_drv.h | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index a2b3dfcee0b5..6cfe05893a76 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -453,6 +453,79 @@ static inline int tee_shm_get_id(struct tee_shm *shm)
  */
 struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id);
 
+/**
+ * tee_client_open_context() - Open a TEE context
+ * @start:	if not NULL, continue search after this context
+ * @match:	function to check TEE device
+ * @data:	data for match function
+ * @vers:	if not NULL, version data of TEE device of the context returned
+ *
+ * This function does an operation similar to open("/dev/teeX") in user space.
+ * A returned context must be released with tee_client_close_context().
+ *
+ * Returns a TEE context of the first TEE device matched by the match()
+ * callback or an ERR_PTR.
+ */
+struct tee_context *
+tee_client_open_context(struct tee_context *start,
+			int (*match)(struct tee_ioctl_version_data *,
+				     const void *),
+			const void *data, struct tee_ioctl_version_data *vers);
+
+/**
+ * tee_client_close_context() - Close a TEE context
+ * @ctx:	TEE context to close
+ *
+ * Note that all sessions previously opened with this context will be
+ * closed when this function is called.
+ */
+void tee_client_close_context(struct tee_context *ctx);
+
+/**
+ * tee_client_get_version() - Query version of TEE
+ * @ctx:	TEE context to TEE to query
+ * @vers:	Pointer to version data
+ */
+void tee_client_get_version(struct tee_context *ctx,
+			    struct tee_ioctl_version_data *vers);
+
+/**
+ * tee_client_open_session() - Open a session to a Trusted Application
+ * @ctx:	TEE context
+ * @arg:	Open session arguments, see description of
+ *		struct tee_ioctl_open_session_arg
+ * @param:	Parameters passed to the Trusted Application
+ *
+ * Returns < 0 on error else see @arg->ret for result. If @arg->ret
+ * is TEEC_SUCCESS the session identifier is available in @arg->session.
+ */
+int tee_client_open_session(struct tee_context *ctx,
+			    struct tee_ioctl_open_session_arg *arg,
+			    struct tee_param *param);
+
+/**
+ * tee_client_close_session() - Close a session to a Trusted Application
+ * @ctx:	TEE Context
+ * @session:	Session id
+ *
+ * Return < 0 on error else 0, regardless the session will not be
+ * valid after this function has returned.
+ */
+int tee_client_close_session(struct tee_context *ctx, u32 session);
+
+/**
+ * tee_client_invoke_func() - Invoke a function in a Trusted Application
+ * @ctx:	TEE Context
+ * @arg:	Invoke arguments, see description of
+ *		struct tee_ioctl_invoke_arg
+ * @param:	Parameters passed to the Trusted Application
+ *
+ * Returns < 0 on error else see @arg->ret for result.
+ */
+int tee_client_invoke_func(struct tee_context *ctx,
+			   struct tee_ioctl_invoke_arg *arg,
+			   struct tee_param *param);
+
 static inline bool tee_param_is_memref(struct tee_param *param)
 {
 	switch (param->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) {
-- 
cgit v1.2.3-71-gd317


From 1e6cb72399fd58b38a1c11055ef18fe01f535cda Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 1 Sep 2018 10:41:11 +0300
Subject: fsnotify: add super block object type

Add the infrastructure to attach a mark to a super_block struct
and detach all attached marks when super block is destroyed.

This is going to be used by fanotify backend to setup super block
marks.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fdinfo.c               |  5 +++++
 fs/notify/fsnotify.c             |  8 +++++++-
 fs/notify/fsnotify.h             | 11 +++++++++++
 fs/notify/mark.c                 |  4 ++++
 fs/super.c                       |  2 +-
 include/linux/fs.h               |  5 +++++
 include/linux/fsnotify_backend.h | 17 ++++++++++++++---
 7 files changed, 47 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 86fcf5814279..25385e336ac7 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -131,6 +131,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 
 		seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
 			   mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
+	} else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) {
+		struct super_block *sb = fsnotify_conn_sb(mark->connector);
+
+		seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
+			   sb->s_dev, mflags, mark->mask, mark->ignored_mask);
 	}
 }
 
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index ababdbfab537..2971803d151c 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -48,7 +48,7 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
  * Called during unmount with no locks held, so needs to be safe against
  * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
  */
-void fsnotify_unmount_inodes(struct super_block *sb)
+static void fsnotify_unmount_inodes(struct super_block *sb)
 {
 	struct inode *inode, *iput_inode = NULL;
 
@@ -98,6 +98,12 @@ void fsnotify_unmount_inodes(struct super_block *sb)
 		iput(iput_inode);
 }
 
+void fsnotify_sb_delete(struct super_block *sb)
+{
+	fsnotify_unmount_inodes(sb);
+	fsnotify_clear_marks_by_sb(sb);
+}
+
 /*
  * Given an inode, first check if we care what happens to our children.  Inotify
  * and dnotify both tell their parents about events.  If we care about any event
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 7902653dd577..5a00121fb219 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -21,6 +21,12 @@ static inline struct mount *fsnotify_conn_mount(
 	return container_of(conn->obj, struct mount, mnt_fsnotify_marks);
 }
 
+static inline struct super_block *fsnotify_conn_sb(
+				struct fsnotify_mark_connector *conn)
+{
+	return container_of(conn->obj, struct super_block, s_fsnotify_marks);
+}
+
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
@@ -43,6 +49,11 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
 {
 	fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks);
 }
+/* run the list of all marks associated with sb and destroy them */
+static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
+{
+	fsnotify_destroy_marks(&sb->s_fsnotify_marks);
+}
 /* Wait until all marks queued for destruction are destroyed */
 extern void fsnotify_wait_marks_destroyed(void);
 
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 59cdb27826de..b5172ccb2e60 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -115,6 +115,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
 		return &fsnotify_conn_inode(conn)->i_fsnotify_mask;
 	else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
 		return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
+	else if (conn->type == FSNOTIFY_OBJ_TYPE_SB)
+		return &fsnotify_conn_sb(conn)->s_fsnotify_mask;
 	return NULL;
 }
 
@@ -192,6 +194,8 @@ static struct inode *fsnotify_detach_connector_from_object(
 		inode->i_fsnotify_mask = 0;
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
 		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
+	} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
+		fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
 	}
 
 	rcu_assign_pointer(*(conn->obj), NULL);
diff --git a/fs/super.c b/fs/super.c
index f3a8c008e164..ca53a08497ed 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -442,7 +442,7 @@ void generic_shutdown_super(struct super_block *sb)
 		sync_filesystem(sb);
 		sb->s_flags &= ~SB_ACTIVE;
 
-		fsnotify_unmount_inodes(sb);
+		fsnotify_sb_delete(sb);
 		cgroup_writeback_umount();
 
 		evict_inodes(sb);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 33322702c910..2c14801d0aa3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1464,6 +1464,11 @@ struct super_block {
 
 	spinlock_t		s_inode_wblist_lock;
 	struct list_head	s_inodes_wb;	/* writeback inodes */
+
+#ifdef CONFIG_FSNOTIFY
+	__u32			s_fsnotify_mask;
+	struct fsnotify_mark_connector __rcu	*s_fsnotify_marks;
+#endif
 } __randomize_layout;
 
 /* Helper functions so that in most cases filesystems will
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b8f4182f42f1..81b88fc9df31 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -206,12 +206,14 @@ struct fsnotify_group {
 enum fsnotify_obj_type {
 	FSNOTIFY_OBJ_TYPE_INODE,
 	FSNOTIFY_OBJ_TYPE_VFSMOUNT,
+	FSNOTIFY_OBJ_TYPE_SB,
 	FSNOTIFY_OBJ_TYPE_COUNT,
 	FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT
 };
 
 #define FSNOTIFY_OBJ_TYPE_INODE_FL	(1U << FSNOTIFY_OBJ_TYPE_INODE)
 #define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL	(1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT)
+#define FSNOTIFY_OBJ_TYPE_SB_FL		(1U << FSNOTIFY_OBJ_TYPE_SB)
 #define FSNOTIFY_OBJ_ALL_TYPES_MASK	((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1)
 
 static inline bool fsnotify_valid_obj_type(unsigned int type)
@@ -255,6 +257,7 @@ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \
 
 FSNOTIFY_ITER_FUNCS(inode, INODE)
 FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT)
+FSNOTIFY_ITER_FUNCS(sb, SB)
 
 #define fsnotify_foreach_obj_type(type) \
 	for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++)
@@ -267,8 +270,8 @@ struct fsnotify_mark_connector;
 typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
 
 /*
- * Inode / vfsmount point to this structure which tracks all marks attached to
- * the inode / vfsmount. The reference to inode / vfsmount is held by this
+ * Inode/vfsmount/sb point to this structure which tracks all marks attached to
+ * the inode/vfsmount/sb. The reference to inode/vfsmount/sb is held by this
  * structure. We destroy this structure when there are no more marks attached
  * to it. The structure is protected by fsnotify_mark_srcu.
  */
@@ -335,6 +338,7 @@ extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int dat
 extern int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask);
 extern void __fsnotify_inode_delete(struct inode *inode);
 extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
+extern void fsnotify_sb_delete(struct super_block *sb);
 extern u32 fsnotify_get_cookie(void);
 
 static inline int fsnotify_inode_watches_children(struct inode *inode)
@@ -455,9 +459,13 @@ static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *gr
 {
 	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE_FL);
 }
+/* run all the marks in a group, and clear all of the sn marks */
+static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group)
+{
+	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB_FL);
+}
 extern void fsnotify_get_mark(struct fsnotify_mark *mark);
 extern void fsnotify_put_mark(struct fsnotify_mark *mark);
-extern void fsnotify_unmount_inodes(struct super_block *sb);
 extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info);
 extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info);
 
@@ -484,6 +492,9 @@ static inline void __fsnotify_inode_delete(struct inode *inode)
 static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
 {}
 
+static inline void fsnotify_sb_delete(struct super_block *sb)
+{}
+
 static inline void fsnotify_update_flags(struct dentry *dentry)
 {}
 
-- 
cgit v1.2.3-71-gd317


From 4d18975c78f2d5c91792356501cf369e67594241 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Sat, 1 Sep 2018 16:08:45 +0200
Subject: fbdev: add remove_conflicting_pci_framebuffers()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Almost all PCI drivers using remove_conflicting_framebuffers() wrap it
with the same code.

v2: add kerneldoc for DRM helper
v3: propagate remove_conflicting_framebuffers() return value
  + move kerneldoc to where function is implemented

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/7db1c278276de420eb45a1b71d06b5eb6bbd49ef.1535810304.git.mirq-linux@rere.qmqm.pl
---
 drivers/video/fbdev/core/fbmem.c | 35 +++++++++++++++++++++++++++++++++++
 include/drm/drm_fb_helper.h      | 12 ++++++++++++
 include/linux/fb.h               |  2 ++
 3 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 6e9961b71442..5ffadc8e681d 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -34,6 +34,7 @@
 #include <linux/fb.h>
 #include <linux/fbcon.h>
 #include <linux/mem_encrypt.h>
+#include <linux/pci.h>
 
 #include <asm/fb.h>
 
@@ -1830,6 +1831,40 @@ int remove_conflicting_framebuffers(struct apertures_struct *a,
 }
 EXPORT_SYMBOL(remove_conflicting_framebuffers);
 
+/**
+ * remove_conflicting_pci_framebuffers - remove firmware-configured framebuffers for PCI devices
+ * @pdev: PCI device
+ * @resource_id: index of PCI BAR configuring framebuffer memory
+ * @name: requesting driver name
+ *
+ * This function removes framebuffer devices (eg. initialized by firmware)
+ * using memory range configured for @pdev's BAR @resource_id.
+ *
+ * The function assumes that PCI device with shadowed ROM drives a primary
+ * display and so kicks out vga16fb.
+ */
+int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, int res_id, const char *name)
+{
+	struct apertures_struct *ap;
+	bool primary = false;
+	int err;
+
+	ap = alloc_apertures(1);
+	if (!ap)
+		return -ENOMEM;
+
+	ap->ranges[0].base = pci_resource_start(pdev, res_id);
+	ap->ranges[0].size = pci_resource_len(pdev, res_id);
+#ifdef CONFIG_X86
+	primary = pdev->resource[PCI_ROM_RESOURCE].flags &
+					IORESOURCE_ROM_SHADOW;
+#endif
+	err = remove_conflicting_framebuffers(ap, name, primary);
+	kfree(ap);
+	return err;
+}
+EXPORT_SYMBOL(remove_conflicting_pci_framebuffers);
+
 /**
  *	register_framebuffer - registers a frame buffer device
  *	@fb_info: frame buffer info structure
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 5db08c8f1d25..8b6ab3200a2c 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -615,4 +615,16 @@ drm_fb_helper_remove_conflicting_framebuffers(struct apertures_struct *a,
 #endif
 }
 
+static inline int
+drm_fb_helper_remove_conflicting_pci_framebuffers(struct pci_dev *pdev,
+						  int resource_id,
+						  const char *name)
+{
+#if IS_REACHABLE(CONFIG_FB)
+	return remove_conflicting_pci_framebuffers(pdev, resource_id, name);
+#else
+	return 0;
+#endif
+}
+
 #endif
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 3e7e75383d32..3cd375dafd0e 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -632,6 +632,8 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf,
 extern int register_framebuffer(struct fb_info *fb_info);
 extern int unregister_framebuffer(struct fb_info *fb_info);
 extern int unlink_framebuffer(struct fb_info *fb_info);
+extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, int res_id,
+					       const char *name);
 extern int remove_conflicting_framebuffers(struct apertures_struct *a,
 					   const char *name, bool primary);
 extern int fb_prepare_logo(struct fb_info *fb_info, int rotate);
-- 
cgit v1.2.3-71-gd317


From cc8a4ea182efac95ad4582053f8a51271fab734d Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Wed, 18 Jul 2018 18:09:55 +0200
Subject: platform/chrome: Move mfd/cros_ec_lpc* includes to drivers/platform.

The cros-ec-lpc driver lives in drivers/platform because is platform
specific, however there are two includes (cros_ec_lpc_mec.h and
cros_ec_lpc_reg.h) that lives in include/linux/mfd. These two includes
are only used for the platform driver and are not really related to the
MFD subsystem, so move the includes from include/linux/mfd to
drivers/platform/chrome.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_lpc.c     |  3 +-
 drivers/platform/chrome/cros_ec_lpc_mec.c |  3 +-
 drivers/platform/chrome/cros_ec_lpc_mec.h | 90 +++++++++++++++++++++++++++++++
 drivers/platform/chrome/cros_ec_lpc_reg.c |  3 +-
 drivers/platform/chrome/cros_ec_lpc_reg.h | 61 +++++++++++++++++++++
 include/linux/mfd/cros_ec_lpc_mec.h       | 90 -------------------------------
 include/linux/mfd/cros_ec_lpc_reg.h       | 61 ---------------------
 7 files changed, 157 insertions(+), 154 deletions(-)
 create mode 100644 drivers/platform/chrome/cros_ec_lpc_mec.h
 create mode 100644 drivers/platform/chrome/cros_ec_lpc_reg.h
 delete mode 100644 include/linux/mfd/cros_ec_lpc_mec.h
 delete mode 100644 include/linux/mfd/cros_ec_lpc_reg.h

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 31c8b8c49e45..7ec8789bf161 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -27,12 +27,13 @@
 #include <linux/io.h>
 #include <linux/mfd/cros_ec.h>
 #include <linux/mfd/cros_ec_commands.h>
-#include <linux/mfd/cros_ec_lpc_reg.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 #include <linux/suspend.h>
 
+#include "cros_ec_lpc_reg.h"
+
 #define DRV_NAME "cros_ec_lpcs"
 #define ACPI_DRV_NAME "GOOG0004"
 
diff --git a/drivers/platform/chrome/cros_ec_lpc_mec.c b/drivers/platform/chrome/cros_ec_lpc_mec.c
index 2eda2c2fc210..c4edfa83e493 100644
--- a/drivers/platform/chrome/cros_ec_lpc_mec.c
+++ b/drivers/platform/chrome/cros_ec_lpc_mec.c
@@ -24,10 +24,11 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/mfd/cros_ec_commands.h>
-#include <linux/mfd/cros_ec_lpc_mec.h>
 #include <linux/mutex.h>
 #include <linux/types.h>
 
+#include "cros_ec_lpc_mec.h"
+
 /*
  * This mutex must be held while accessing the EMI unit. We can't rely on the
  * EC mutex because memmap data may be accessed without it being held.
diff --git a/drivers/platform/chrome/cros_ec_lpc_mec.h b/drivers/platform/chrome/cros_ec_lpc_mec.h
new file mode 100644
index 000000000000..105068c0e919
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_lpc_mec.h
@@ -0,0 +1,90 @@
+/*
+ * cros_ec_lpc_mec - LPC variant I/O for Microchip EC
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#ifndef __CROS_EC_LPC_MEC_H
+#define __CROS_EC_LPC_MEC_H
+
+#include <linux/mfd/cros_ec_commands.h>
+
+enum cros_ec_lpc_mec_emi_access_mode {
+	/* 8-bit access */
+	ACCESS_TYPE_BYTE = 0x0,
+	/* 16-bit access */
+	ACCESS_TYPE_WORD = 0x1,
+	/* 32-bit access */
+	ACCESS_TYPE_LONG = 0x2,
+	/*
+	 * 32-bit access, read or write of MEC_EMI_EC_DATA_B3 causes the
+	 * EC data register to be incremented.
+	 */
+	ACCESS_TYPE_LONG_AUTO_INCREMENT = 0x3,
+};
+
+enum cros_ec_lpc_mec_io_type {
+	MEC_IO_READ,
+	MEC_IO_WRITE,
+};
+
+/* Access IO ranges 0x800 thru 0x9ff using EMI interface instead of LPC */
+#define MEC_EMI_RANGE_START EC_HOST_CMD_REGION0
+#define MEC_EMI_RANGE_END   (EC_LPC_ADDR_MEMMAP + EC_MEMMAP_SIZE)
+
+/* EMI registers are relative to base */
+#define MEC_EMI_BASE 0x800
+#define MEC_EMI_HOST_TO_EC (MEC_EMI_BASE + 0)
+#define MEC_EMI_EC_TO_HOST (MEC_EMI_BASE + 1)
+#define MEC_EMI_EC_ADDRESS_B0 (MEC_EMI_BASE + 2)
+#define MEC_EMI_EC_ADDRESS_B1 (MEC_EMI_BASE + 3)
+#define MEC_EMI_EC_DATA_B0 (MEC_EMI_BASE + 4)
+#define MEC_EMI_EC_DATA_B1 (MEC_EMI_BASE + 5)
+#define MEC_EMI_EC_DATA_B2 (MEC_EMI_BASE + 6)
+#define MEC_EMI_EC_DATA_B3 (MEC_EMI_BASE + 7)
+
+/*
+ * cros_ec_lpc_mec_init
+ *
+ * Initialize MEC I/O.
+ */
+void cros_ec_lpc_mec_init(void);
+
+/*
+ * cros_ec_lpc_mec_destroy
+ *
+ * Cleanup MEC I/O.
+ */
+void cros_ec_lpc_mec_destroy(void);
+
+/**
+ * cros_ec_lpc_io_bytes_mec - Read / write bytes to MEC EMI port
+ *
+ * @io_type: MEC_IO_READ or MEC_IO_WRITE, depending on request
+ * @offset:  Base read / write address
+ * @length:  Number of bytes to read / write
+ * @buf:     Destination / source buffer
+ *
+ * @return 8-bit checksum of all bytes read / written
+ */
+u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type io_type,
+			    unsigned int offset, unsigned int length, u8 *buf);
+
+#endif /* __CROS_EC_LPC_MEC_H */
diff --git a/drivers/platform/chrome/cros_ec_lpc_reg.c b/drivers/platform/chrome/cros_ec_lpc_reg.c
index dcc7a3e30604..fc23d535c404 100644
--- a/drivers/platform/chrome/cros_ec_lpc_reg.c
+++ b/drivers/platform/chrome/cros_ec_lpc_reg.c
@@ -24,7 +24,8 @@
 #include <linux/io.h>
 #include <linux/mfd/cros_ec.h>
 #include <linux/mfd/cros_ec_commands.h>
-#include <linux/mfd/cros_ec_lpc_mec.h>
+
+#include "cros_ec_lpc_mec.h"
 
 static u8 lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
 {
diff --git a/drivers/platform/chrome/cros_ec_lpc_reg.h b/drivers/platform/chrome/cros_ec_lpc_reg.h
new file mode 100644
index 000000000000..1c12c38b306a
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_lpc_reg.h
@@ -0,0 +1,61 @@
+/*
+ * cros_ec_lpc_reg - LPC access to the Chrome OS Embedded Controller
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#ifndef __CROS_EC_LPC_REG_H
+#define __CROS_EC_LPC_REG_H
+
+/**
+ * cros_ec_lpc_read_bytes - Read bytes from a given LPC-mapped address.
+ * Returns 8-bit checksum of all bytes read.
+ *
+ * @offset: Base read address
+ * @length: Number of bytes to read
+ * @dest: Destination buffer
+ */
+u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest);
+
+/**
+ * cros_ec_lpc_write_bytes - Write bytes to a given LPC-mapped address.
+ * Returns 8-bit checksum of all bytes written.
+ *
+ * @offset: Base write address
+ * @length: Number of bytes to write
+ * @msg: Write data buffer
+ */
+u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg);
+
+/**
+ * cros_ec_lpc_reg_init
+ *
+ * Initialize register I/O.
+ */
+void cros_ec_lpc_reg_init(void);
+
+/**
+ * cros_ec_lpc_reg_destroy
+ *
+ * Cleanup reg I/O.
+ */
+void cros_ec_lpc_reg_destroy(void);
+
+#endif /* __CROS_EC_LPC_REG_H */
diff --git a/include/linux/mfd/cros_ec_lpc_mec.h b/include/linux/mfd/cros_ec_lpc_mec.h
deleted file mode 100644
index 176496ddc66c..000000000000
--- a/include/linux/mfd/cros_ec_lpc_mec.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * cros_ec_lpc_mec - LPC variant I/O for Microchip EC
- *
- * Copyright (C) 2016 Google, Inc
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * This driver uses the Chrome OS EC byte-level message-based protocol for
- * communicating the keyboard state (which keys are pressed) from a keyboard EC
- * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
- * but everything else (including deghosting) is done here.  The main
- * motivation for this is to keep the EC firmware as simple as possible, since
- * it cannot be easily upgraded and EC flash/IRAM space is relatively
- * expensive.
- */
-
-#ifndef __LINUX_MFD_CROS_EC_MEC_H
-#define __LINUX_MFD_CROS_EC_MEC_H
-
-#include <linux/mfd/cros_ec_commands.h>
-
-enum cros_ec_lpc_mec_emi_access_mode {
-	/* 8-bit access */
-	ACCESS_TYPE_BYTE = 0x0,
-	/* 16-bit access */
-	ACCESS_TYPE_WORD = 0x1,
-	/* 32-bit access */
-	ACCESS_TYPE_LONG = 0x2,
-	/*
-	 * 32-bit access, read or write of MEC_EMI_EC_DATA_B3 causes the
-	 * EC data register to be incremented.
-	 */
-	ACCESS_TYPE_LONG_AUTO_INCREMENT = 0x3,
-};
-
-enum cros_ec_lpc_mec_io_type {
-	MEC_IO_READ,
-	MEC_IO_WRITE,
-};
-
-/* Access IO ranges 0x800 thru 0x9ff using EMI interface instead of LPC */
-#define MEC_EMI_RANGE_START EC_HOST_CMD_REGION0
-#define MEC_EMI_RANGE_END   (EC_LPC_ADDR_MEMMAP + EC_MEMMAP_SIZE)
-
-/* EMI registers are relative to base */
-#define MEC_EMI_BASE 0x800
-#define MEC_EMI_HOST_TO_EC (MEC_EMI_BASE + 0)
-#define MEC_EMI_EC_TO_HOST (MEC_EMI_BASE + 1)
-#define MEC_EMI_EC_ADDRESS_B0 (MEC_EMI_BASE + 2)
-#define MEC_EMI_EC_ADDRESS_B1 (MEC_EMI_BASE + 3)
-#define MEC_EMI_EC_DATA_B0 (MEC_EMI_BASE + 4)
-#define MEC_EMI_EC_DATA_B1 (MEC_EMI_BASE + 5)
-#define MEC_EMI_EC_DATA_B2 (MEC_EMI_BASE + 6)
-#define MEC_EMI_EC_DATA_B3 (MEC_EMI_BASE + 7)
-
-/*
- * cros_ec_lpc_mec_init
- *
- * Initialize MEC I/O.
- */
-void cros_ec_lpc_mec_init(void);
-
-/*
- * cros_ec_lpc_mec_destroy
- *
- * Cleanup MEC I/O.
- */
-void cros_ec_lpc_mec_destroy(void);
-
-/**
- * cros_ec_lpc_io_bytes_mec - Read / write bytes to MEC EMI port
- *
- * @io_type: MEC_IO_READ or MEC_IO_WRITE, depending on request
- * @offset:  Base read / write address
- * @length:  Number of bytes to read / write
- * @buf:     Destination / source buffer
- *
- * @return 8-bit checksum of all bytes read / written
- */
-u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type io_type,
-			    unsigned int offset, unsigned int length, u8 *buf);
-
-#endif /* __LINUX_MFD_CROS_EC_MEC_H */
diff --git a/include/linux/mfd/cros_ec_lpc_reg.h b/include/linux/mfd/cros_ec_lpc_reg.h
deleted file mode 100644
index 5560bef63c2b..000000000000
--- a/include/linux/mfd/cros_ec_lpc_reg.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * cros_ec_lpc_reg - LPC access to the Chrome OS Embedded Controller
- *
- * Copyright (C) 2016 Google, Inc
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * This driver uses the Chrome OS EC byte-level message-based protocol for
- * communicating the keyboard state (which keys are pressed) from a keyboard EC
- * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
- * but everything else (including deghosting) is done here.  The main
- * motivation for this is to keep the EC firmware as simple as possible, since
- * it cannot be easily upgraded and EC flash/IRAM space is relatively
- * expensive.
- */
-
-#ifndef __LINUX_MFD_CROS_EC_REG_H
-#define __LINUX_MFD_CROS_EC_REG_H
-
-/**
- * cros_ec_lpc_read_bytes - Read bytes from a given LPC-mapped address.
- * Returns 8-bit checksum of all bytes read.
- *
- * @offset: Base read address
- * @length: Number of bytes to read
- * @dest: Destination buffer
- */
-u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest);
-
-/**
- * cros_ec_lpc_write_bytes - Write bytes to a given LPC-mapped address.
- * Returns 8-bit checksum of all bytes written.
- *
- * @offset: Base write address
- * @length: Number of bytes to write
- * @msg: Write data buffer
- */
-u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg);
-
-/**
- * cros_ec_lpc_reg_init
- *
- * Initialize register I/O.
- */
-void cros_ec_lpc_reg_init(void);
-
-/**
- * cros_ec_lpc_reg_destroy
- *
- * Cleanup reg I/O.
- */
-void cros_ec_lpc_reg_destroy(void);
-
-#endif /* __LINUX_MFD_CROS_EC_REG_H */
-- 
cgit v1.2.3-71-gd317


From e2bbf91cad09118d7500f1fdaaa83d7741d30395 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Wed, 18 Jul 2018 18:09:56 +0200
Subject: mfd: cros_ec: Fix and improve kerneldoc comments.

cros-ec includes inside the MFD subsystem, specially the file
cros_ec_commands.h, has been modified several times and it has grown a
lot, unfortunately, we didn't have care too much about the documentation.
This patch tries to improve the documentation and also fixes all the
issues reported by kerneldoc script.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/mfd/cros_ec_dev.h            |  13 +-
 include/linux/mfd/cros_ec.h          | 214 +++++++++++++------------
 include/linux/mfd/cros_ec_commands.h | 295 ++++++++++++++++++++++-------------
 3 files changed, 310 insertions(+), 212 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cros_ec_dev.h b/drivers/mfd/cros_ec_dev.h
index 45e9453608c5..978d836a0248 100644
--- a/drivers/mfd/cros_ec_dev.h
+++ b/drivers/mfd/cros_ec_dev.h
@@ -26,12 +26,13 @@
 
 #define CROS_EC_DEV_VERSION "1.0.0"
 
-/*
- * @offset: within EC_LPC_ADDR_MEMMAP region
- * @bytes: number of bytes to read. zero means "read a string" (including '\0')
- *         (at most only EC_MEMMAP_SIZE bytes can be read)
- * @buffer: where to store the result
- * ioctl returns the number of bytes read, negative on error
+/**
+ * struct cros_ec_readmem - Struct used to read mapped memory.
+ * @offset: Within EC_LPC_ADDR_MEMMAP region.
+ * @bytes: Number of bytes to read. Zero means "read a string" (including '\0')
+ *         At most only EC_MEMMAP_SIZE bytes can be read.
+ * @buffer: Where to store the result. The ioctl returns the number of bytes
+ *         read or negative on error.
  */
 struct cros_ec_readmem {
 	uint32_t offset;
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 20949dde35cd..e44e3ec8a9c7 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -36,7 +36,7 @@
  * I2C requires 1 additional byte for requests.
  * I2C requires 2 additional bytes for responses.
  * SPI requires up to 32 additional bytes for responses.
- * */
+ */
 #define EC_PROTO_VERSION_UNKNOWN	0
 #define EC_MAX_REQUEST_OVERHEAD		1
 #define EC_MAX_RESPONSE_OVERHEAD	32
@@ -58,13 +58,14 @@ enum {
 	EC_MAX_MSG_BYTES		= 64 * 1024,
 };
 
-/*
- * @version: Command version number (often 0)
- * @command: Command to send (EC_CMD_...)
- * @outsize: Outgoing length in bytes
- * @insize: Max number of bytes to accept from EC
- * @result: EC's response to the command (separate from communication failure)
- * @data: Where to put the incoming data from EC and outgoing data to EC
+/**
+ * struct cros_ec_command - Information about a ChromeOS EC command.
+ * @version: Command version number (often 0).
+ * @command: Command to send (EC_CMD_...).
+ * @outsize: Outgoing length in bytes.
+ * @insize: Max number of bytes to accept from the EC.
+ * @result: EC's response to the command (separate from communication failure).
+ * @data: Where to put the incoming data from EC and outgoing data to EC.
  */
 struct cros_ec_command {
 	uint32_t version;
@@ -76,48 +77,55 @@ struct cros_ec_command {
 };
 
 /**
- * struct cros_ec_device - Information about a ChromeOS EC device
- *
- * @phys_name: name of physical comms layer (e.g. 'i2c-4')
+ * struct cros_ec_device - Information about a ChromeOS EC device.
+ * @phys_name: Name of physical comms layer (e.g. 'i2c-4').
  * @dev: Device pointer for physical comms device
- * @was_wake_device: true if this device was set to wake the system from
- * sleep at the last suspend
- * @cmd_readmem: direct read of the EC memory-mapped region, if supported
- *     @offset is within EC_LPC_ADDR_MEMMAP region.
- *     @bytes: number of bytes to read. zero means "read a string" (including
- *     the trailing '\0'). At most only EC_MEMMAP_SIZE bytes can be read.
- *     Caller must ensure that the buffer is large enough for the result when
- *     reading a string.
- *
- * @priv: Private data
- * @irq: Interrupt to use
- * @id: Device id
- * @din: input buffer (for data from EC)
- * @dout: output buffer (for data to EC)
- * \note
- * These two buffers will always be dword-aligned and include enough
- * space for up to 7 word-alignment bytes also, so we can ensure that
- * the body of the message is always dword-aligned (64-bit).
- * We use this alignment to keep ARM and x86 happy. Probably word
- * alignment would be OK, there might be a small performance advantage
- * to using dword.
- * @din_size: size of din buffer to allocate (zero to use static din)
- * @dout_size: size of dout buffer to allocate (zero to use static dout)
- * @wake_enabled: true if this device can wake the system from sleep
- * @suspended: true if this device had been suspended
- * @cmd_xfer: send command to EC and get response
- *     Returns the number of bytes received if the communication succeeded, but
- *     that doesn't mean the EC was happy with the command. The caller
- *     should check msg.result for the EC's result code.
- * @pkt_xfer: send packet to EC and get response
- * @lock: one transaction at a time
- * @mkbp_event_supported: true if this EC supports the MKBP event protocol.
- * @event_notifier: interrupt event notifier for transport devices.
- * @event_data: raw payload transferred with the MKBP event.
- * @event_size: size in bytes of the event data.
+ * @was_wake_device: True if this device was set to wake the system from
+ *                   sleep at the last suspend.
+ * @cros_class: The class structure for this device.
+ * @cmd_readmem: Direct read of the EC memory-mapped region, if supported.
+ *     @offset: Is within EC_LPC_ADDR_MEMMAP region.
+ *     @bytes: Number of bytes to read. zero means "read a string" (including
+ *             the trailing '\0'). At most only EC_MEMMAP_SIZE bytes can be
+ *             read. Caller must ensure that the buffer is large enough for the
+ *             result when reading a string.
+ * @max_request: Max size of message requested.
+ * @max_response: Max size of message response.
+ * @max_passthru: Max sice of passthru message.
+ * @proto_version: The protocol version used for this device.
+ * @priv: Private data.
+ * @irq: Interrupt to use.
+ * @id: Device id.
+ * @din: Input buffer (for data from EC). This buffer will always be
+ *       dword-aligned and include enough space for up to 7 word-alignment
+ *       bytes also, so we can ensure that the body of the message is always
+ *       dword-aligned (64-bit). We use this alignment to keep ARM and x86
+ *       happy. Probably word alignment would be OK, there might be a small
+ *       performance advantage to using dword.
+ * @dout: Output buffer (for data to EC). This buffer will always be
+ *        dword-aligned and include enough space for up to 7 word-alignment
+ *        bytes also, so we can ensure that the body of the message is always
+ *        dword-aligned (64-bit). We use this alignment to keep ARM and x86
+ *        happy. Probably word alignment would be OK, there might be a small
+ *        performance advantage to using dword.
+ * @din_size: Size of din buffer to allocate (zero to use static din).
+ * @dout_size: Size of dout buffer to allocate (zero to use static dout).
+ * @wake_enabled: True if this device can wake the system from sleep.
+ * @suspended: True if this device had been suspended.
+ * @cmd_xfer: Send command to EC and get response.
+ *            Returns the number of bytes received if the communication
+ *            succeeded, but that doesn't mean the EC was happy with the
+ *            command. The caller should check msg.result for the EC's result
+ *            code.
+ * @pkt_xfer: Send packet to EC and get response.
+ * @lock: One transaction at a time.
+ * @mkbp_event_supported: True if this EC supports the MKBP event protocol.
+ * @event_notifier: Interrupt event notifier for transport devices.
+ * @event_data: Raw payload transferred with the MKBP event.
+ * @event_size: Size in bytes of the event data.
+ * @host_event_wake_mask: Mask of host events that cause wake from suspend.
  */
 struct cros_ec_device {
-
 	/* These are used by other drivers that want to talk to the EC */
 	const char *phys_name;
 	struct device *dev;
@@ -153,20 +161,19 @@ struct cros_ec_device {
 };
 
 /**
- * struct cros_ec_sensor_platform - ChromeOS EC sensor platform information
- *
+ * struct cros_ec_sensor_platform - ChromeOS EC sensor platform information.
  * @sensor_num: Id of the sensor, as reported by the EC.
  */
 struct cros_ec_sensor_platform {
 	u8 sensor_num;
 };
 
-/* struct cros_ec_platform - ChromeOS EC platform information
- *
- * @ec_name: name of EC device (e.g. 'cros-ec', 'cros-pd', ...)
- * used in /dev/ and sysfs.
- * @cmd_offset: offset to apply for each command. Set when
- * registering a devicde behind another one.
+/**
+ * struct cros_ec_platform - ChromeOS EC platform information.
+ * @ec_name: Name of EC device (e.g. 'cros-ec', 'cros-pd', ...)
+ *           used in /dev/ and sysfs.
+ * @cmd_offset: Offset to apply for each command. Set when
+ *              registering a device behind another one.
  */
 struct cros_ec_platform {
 	const char *ec_name;
@@ -175,16 +182,16 @@ struct cros_ec_platform {
 
 struct cros_ec_debugfs;
 
-/*
- * struct cros_ec_dev - ChromeOS EC device entry point
- *
- * @class_dev: Device structure used in sysfs
- * @cdev: Character device structure in /dev
- * @ec_dev: cros_ec_device structure to talk to the physical device
- * @dev: pointer to the platform device
- * @debug_info: cros_ec_debugfs structure for debugging information
- * @has_kb_wake_angle: true if at least 2 accelerometer are connected to the EC.
- * @cmd_offset: offset to apply for each command.
+/**
+ * struct cros_ec_dev - ChromeOS EC device entry point.
+ * @class_dev: Device structure used in sysfs.
+ * @cdev: Character device structure in /dev.
+ * @ec_dev: cros_ec_device structure to talk to the physical device.
+ * @dev: Pointer to the platform device.
+ * @debug_info: cros_ec_debugfs structure for debugging information.
+ * @has_kb_wake_angle: True if at least 2 accelerometer are connected to the EC.
+ * @cmd_offset: Offset to apply for each command.
+ * @features: Features supported by the EC.
  */
 struct cros_ec_dev {
 	struct device class_dev;
@@ -200,124 +207,129 @@ struct cros_ec_dev {
 #define to_cros_ec_dev(dev)  container_of(dev, struct cros_ec_dev, class_dev)
 
 /**
- * cros_ec_suspend - Handle a suspend operation for the ChromeOS EC device
+ * cros_ec_suspend() - Handle a suspend operation for the ChromeOS EC device.
+ * @ec_dev: Device to suspend.
  *
  * This can be called by drivers to handle a suspend event.
  *
- * ec_dev: Device to suspend
- * @return 0 if ok, -ve on error
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_suspend(struct cros_ec_device *ec_dev);
 
 /**
- * cros_ec_resume - Handle a resume operation for the ChromeOS EC device
+ * cros_ec_resume() - Handle a resume operation for the ChromeOS EC device.
+ * @ec_dev: Device to resume.
  *
  * This can be called by drivers to handle a resume event.
  *
- * @ec_dev: Device to resume
- * @return 0 if ok, -ve on error
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_resume(struct cros_ec_device *ec_dev);
 
 /**
- * cros_ec_prepare_tx - Prepare an outgoing message in the output buffer
+ * cros_ec_prepare_tx() - Prepare an outgoing message in the output buffer.
+ * @ec_dev: Device to register.
+ * @msg: Message to write.
  *
  * This is intended to be used by all ChromeOS EC drivers, but at present
  * only SPI uses it. Once LPC uses the same protocol it can start using it.
  * I2C could use it now, with a refactor of the existing code.
  *
- * @ec_dev: Device to register
- * @msg: Message to write
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
 		       struct cros_ec_command *msg);
 
 /**
- * cros_ec_check_result - Check ec_msg->result
+ * cros_ec_check_result() - Check ec_msg->result.
+ * @ec_dev: EC device.
+ * @msg: Message to check.
  *
  * This is used by ChromeOS EC drivers to check the ec_msg->result for
  * errors and to warn about them.
  *
- * @ec_dev: EC device
- * @msg: Message to check
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_check_result(struct cros_ec_device *ec_dev,
 			 struct cros_ec_command *msg);
 
 /**
- * cros_ec_cmd_xfer - Send a command to the ChromeOS EC
+ * cros_ec_cmd_xfer() - Send a command to the ChromeOS EC.
+ * @ec_dev: EC device.
+ * @msg: Message to write.
  *
  * Call this to send a command to the ChromeOS EC.  This should be used
  * instead of calling the EC's cmd_xfer() callback directly.
  *
- * @ec_dev: EC device
- * @msg: Message to write
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
 		     struct cros_ec_command *msg);
 
 /**
- * cros_ec_cmd_xfer_status - Send a command to the ChromeOS EC
+ * cros_ec_cmd_xfer_status() - Send a command to the ChromeOS EC.
+ * @ec_dev: EC device.
+ * @msg: Message to write.
  *
  * This function is identical to cros_ec_cmd_xfer, except it returns success
  * status only if both the command was transmitted successfully and the EC
  * replied with success status. It's not necessary to check msg->result when
  * using this function.
  *
- * @ec_dev: EC device
- * @msg: Message to write
- * @return: Num. of bytes transferred on success, <0 on failure
+ * Return: The number of bytes transferred on success or negative error code.
  */
 int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
 			    struct cros_ec_command *msg);
 
 /**
- * cros_ec_remove - Remove a ChromeOS EC
+ * cros_ec_remove() - Remove a ChromeOS EC.
+ * @ec_dev: Device to register.
  *
  * Call this to deregister a ChromeOS EC, then clean up any private data.
  *
- * @ec_dev: Device to register
- * @return 0 if ok, -ve on error
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_remove(struct cros_ec_device *ec_dev);
 
 /**
- * cros_ec_register - Register a new ChromeOS EC, using the provided info
+ * cros_ec_register() - Register a new ChromeOS EC, using the provided info.
+ * @ec_dev: Device to register.
  *
  * Before calling this, allocate a pointer to a new device and then fill
  * in all the fields up to the --private-- marker.
  *
- * @ec_dev: Device to register
- * @return 0 if ok, -ve on error
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_register(struct cros_ec_device *ec_dev);
 
 /**
- * cros_ec_query_all -  Query the protocol version supported by the ChromeOS EC
+ * cros_ec_query_all() -  Query the protocol version supported by the
+ *         ChromeOS EC.
+ * @ec_dev: Device to register.
  *
- * @ec_dev: Device to register
- * @return 0 if ok, -ve on error
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_query_all(struct cros_ec_device *ec_dev);
 
 /**
- * cros_ec_get_next_event -  Fetch next event from the ChromeOS EC
- *
- * @ec_dev: Device to fetch event from
+ * cros_ec_get_next_event() - Fetch next event from the ChromeOS EC.
+ * @ec_dev: Device to fetch event from.
  * @wake_event: Pointer to a bool set to true upon return if the event might be
  *              treated as a wake event. Ignored if null.
  *
- * Returns: 0 on success, Linux error number on failure
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event);
 
 /**
- * cros_ec_get_host_event - Return a mask of event set by the EC.
+ * cros_ec_get_host_event() - Return a mask of event set by the ChromeOS EC.
+ * @ec_dev: Device to fetch event from.
  *
- * When MKBP is supported, when the EC raises an interrupt,
- * We collect the events raised and call the functions in the ec notifier.
+ * When MKBP is supported, when the EC raises an interrupt, we collect the
+ * events raised and call the functions in the ec notifier. This function
+ * is a helper to know which events are raised.
  *
- * This function is a helper to know which events are raised.
+ * Return: 0 on success or negative error code.
  */
 u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev);
 
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 6e1ab9bead28..88100bddf1ab 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -306,15 +306,18 @@ enum host_event_code {
 /* Host event mask */
 #define EC_HOST_EVENT_MASK(event_code) (1UL << ((event_code) - 1))
 
-/* Arguments at EC_LPC_ADDR_HOST_ARGS */
+/**
+ * struct ec_lpc_host_args - Arguments at EC_LPC_ADDR_HOST_ARGS
+ * @flags: The host argument flags.
+ * @command_version: Command version.
+ * @data_size: The length of data.
+ * @checksum: Checksum; sum of command + flags + command_version + data_size +
+ *            all params/response data bytes.
+ */
 struct ec_lpc_host_args {
 	uint8_t flags;
 	uint8_t command_version;
 	uint8_t data_size;
-	/*
-	 * Checksum; sum of command + flags + command_version + data_size +
-	 * all params/response data bytes.
-	 */
 	uint8_t checksum;
 } __packed;
 
@@ -468,54 +471,43 @@ struct ec_lpc_host_args {
 
 #define EC_HOST_REQUEST_VERSION 3
 
-/* Version 3 request from host */
+/**
+ * struct ec_host_request - Version 3 request from host.
+ * @struct_version: Should be 3. The EC will return EC_RES_INVALID_HEADER if it
+ *                  receives a header with a version it doesn't know how to
+ *                  parse.
+ * @checksum: Checksum of request and data; sum of all bytes including checksum
+ *            should total to 0.
+ * @command: Command to send (EC_CMD_...)
+ * @command_version: Command version.
+ * @reserved: Unused byte in current protocol version; set to 0.
+ * @data_len: Length of data which follows this header.
+ */
 struct ec_host_request {
-	/* Struct version (=3)
-	 *
-	 * EC will return EC_RES_INVALID_HEADER if it receives a header with a
-	 * version it doesn't know how to parse.
-	 */
 	uint8_t struct_version;
-
-	/*
-	 * Checksum of request and data; sum of all bytes including checksum
-	 * should total to 0.
-	 */
 	uint8_t checksum;
-
-	/* Command code */
 	uint16_t command;
-
-	/* Command version */
 	uint8_t command_version;
-
-	/* Unused byte in current protocol version; set to 0 */
 	uint8_t reserved;
-
-	/* Length of data which follows this header */
 	uint16_t data_len;
 } __packed;
 
 #define EC_HOST_RESPONSE_VERSION 3
 
-/* Version 3 response from EC */
+/**
+ * struct ec_host_response - Version 3 response from EC.
+ * @struct_version: Struct version (=3).
+ * @checksum: Checksum of response and data; sum of all bytes including
+ *            checksum should total to 0.
+ * @result: EC's response to the command (separate from communication failure)
+ * @data_len: Length of data which follows this header.
+ * @reserved: Unused bytes in current protocol version; set to 0.
+ */
 struct ec_host_response {
-	/* Struct version (=3) */
 	uint8_t struct_version;
-
-	/*
-	 * Checksum of response and data; sum of all bytes including checksum
-	 * should total to 0.
-	 */
 	uint8_t checksum;
-
-	/* Result code (EC_RES_*) */
 	uint16_t result;
-
-	/* Length of data which follows this header */
 	uint16_t data_len;
-
-	/* Unused bytes in current protocol version; set to 0 */
 	uint16_t reserved;
 } __packed;
 
@@ -540,6 +532,10 @@ struct ec_host_response {
  */
 #define EC_CMD_PROTO_VERSION 0x00
 
+/**
+ * struct ec_response_proto_version - Response to the proto version command.
+ * @version: The protocol version.
+ */
 struct ec_response_proto_version {
 	uint32_t version;
 } __packed;
@@ -550,12 +546,20 @@ struct ec_response_proto_version {
  */
 #define EC_CMD_HELLO 0x01
 
+/**
+ * struct ec_params_hello - Parameters to the hello command.
+ * @in_data: Pass anything here.
+ */
 struct ec_params_hello {
-	uint32_t in_data;  /* Pass anything here */
+	uint32_t in_data;
 } __packed;
 
+/**
+ * struct ec_response_hello - Response to the hello command.
+ * @out_data: Output will be in_data + 0x01020304.
+ */
 struct ec_response_hello {
-	uint32_t out_data;  /* Output will be in_data + 0x01020304 */
+	uint32_t out_data;
 } __packed;
 
 /* Get version number */
@@ -567,22 +571,37 @@ enum ec_current_image {
 	EC_IMAGE_RW
 };
 
+/**
+ * struct ec_response_get_version - Response to the get version command.
+ * @version_string_ro: Null-terminated RO firmware version string.
+ * @version_string_rw: Null-terminated RW firmware version string.
+ * @reserved: Unused bytes; was previously RW-B firmware version string.
+ * @current_image: One of ec_current_image.
+ */
 struct ec_response_get_version {
-	/* Null-terminated version strings for RO, RW */
 	char version_string_ro[32];
 	char version_string_rw[32];
-	char reserved[32];       /* Was previously RW-B string */
-	uint32_t current_image;  /* One of ec_current_image */
+	char reserved[32];
+	uint32_t current_image;
 } __packed;
 
 /* Read test */
 #define EC_CMD_READ_TEST 0x03
 
+/**
+ * struct ec_params_read_test - Parameters for the read test command.
+ * @offset: Starting value for read buffer.
+ * @size: Size to read in bytes.
+ */
 struct ec_params_read_test {
-	uint32_t offset;   /* Starting value for read buffer */
-	uint32_t size;     /* Size to read in bytes */
+	uint32_t offset;
+	uint32_t size;
 } __packed;
 
+/**
+ * struct ec_response_read_test - Response to the read test command.
+ * @data: Data returned by the read test command.
+ */
 struct ec_response_read_test {
 	uint32_t data[32];
 } __packed;
@@ -597,18 +616,27 @@ struct ec_response_read_test {
 /* Get chip info */
 #define EC_CMD_GET_CHIP_INFO 0x05
 
+/**
+ * struct ec_response_get_chip_info - Response to the get chip info command.
+ * @vendor: Null-terminated string for chip vendor.
+ * @name: Null-terminated string for chip name.
+ * @revision: Null-terminated string for chip mask version.
+ */
 struct ec_response_get_chip_info {
-	/* Null-terminated strings */
 	char vendor[32];
 	char name[32];
-	char revision[32];  /* Mask version */
+	char revision[32];
 } __packed;
 
 /* Get board HW version */
 #define EC_CMD_GET_BOARD_VERSION 0x06
 
+/**
+ * struct ec_response_board_version - Response to the board version command.
+ * @board_version: A monotonously incrementing number.
+ */
 struct ec_response_board_version {
-	uint16_t board_version;  /* A monotonously incrementing number. */
+	uint16_t board_version;
 } __packed;
 
 /*
@@ -621,27 +649,42 @@ struct ec_response_board_version {
  */
 #define EC_CMD_READ_MEMMAP 0x07
 
+/**
+ * struct ec_params_read_memmap - Parameters for the read memory map command.
+ * @offset: Offset in memmap (EC_MEMMAP_*).
+ * @size: Size to read in bytes.
+ */
 struct ec_params_read_memmap {
-	uint8_t offset;   /* Offset in memmap (EC_MEMMAP_*) */
-	uint8_t size;     /* Size to read in bytes */
+	uint8_t offset;
+	uint8_t size;
 } __packed;
 
 /* Read versions supported for a command */
 #define EC_CMD_GET_CMD_VERSIONS 0x08
 
+/**
+ * struct ec_params_get_cmd_versions - Parameters for the get command versions.
+ * @cmd: Command to check.
+ */
 struct ec_params_get_cmd_versions {
-	uint8_t cmd;      /* Command to check */
+	uint8_t cmd;
 } __packed;
 
+/**
+ * struct ec_params_get_cmd_versions_v1 - Parameters for the get command
+ *         versions (v1)
+ * @cmd: Command to check.
+ */
 struct ec_params_get_cmd_versions_v1 {
-	uint16_t cmd;     /* Command to check */
+	uint16_t cmd;
 } __packed;
 
+/**
+ * struct ec_response_get_cmd_version - Response to the get command versions.
+ * @version_mask: Mask of supported versions; use EC_VER_MASK() to compare with
+ *                a desired version.
+ */
 struct ec_response_get_cmd_versions {
-	/*
-	 * Mask of supported versions; use EC_VER_MASK() to compare with a
-	 * desired version.
-	 */
 	uint32_t version_mask;
 } __packed;
 
@@ -659,6 +702,11 @@ enum ec_comms_status {
 	EC_COMMS_STATUS_PROCESSING	= 1 << 0,	/* Processing cmd */
 };
 
+/**
+ * struct ec_response_get_comms_status - Response to the get comms status
+ *         command.
+ * @flags: Mask of enum ec_comms_status.
+ */
 struct ec_response_get_comms_status {
 	uint32_t flags;		/* Mask of enum ec_comms_status */
 } __packed;
@@ -685,19 +733,19 @@ struct ec_response_test_protocol {
 /* EC_RES_IN_PROGRESS may be returned if a command is slow */
 #define EC_PROTOCOL_INFO_IN_PROGRESS_SUPPORTED (1 << 0)
 
+/**
+ * struct ec_response_get_protocol_info - Response to the get protocol info.
+ * @protocol_versions: Bitmask of protocol versions supported (1 << n means
+ *                     version n).
+ * @max_request_packet_size: Maximum request packet size in bytes.
+ * @max_response_packet_size: Maximum response packet size in bytes.
+ * @flags: see EC_PROTOCOL_INFO_*
+ */
 struct ec_response_get_protocol_info {
 	/* Fields which exist if at least protocol version 3 supported */
-
-	/* Bitmask of protocol versions supported (1 << n means version n)*/
 	uint32_t protocol_versions;
-
-	/* Maximum request packet size, in bytes */
 	uint16_t max_request_packet_size;
-
-	/* Maximum response packet size, in bytes */
 	uint16_t max_response_packet_size;
-
-	/* Flags; see EC_PROTOCOL_INFO_* */
 	uint32_t flags;
 } __packed;
 
@@ -708,8 +756,10 @@ struct ec_response_get_protocol_info {
 /* The upper byte of .flags tells what to do (nothing means "get") */
 #define EC_GSV_SET        0x80000000
 
-/* The lower three bytes of .flags identifies the parameter, if that has
-   meaning for an individual command. */
+/*
+ * The lower three bytes of .flags identifies the parameter, if that has
+ * meaning for an individual command.
+ */
 #define EC_GSV_PARAM_MASK 0x00ffffff
 
 struct ec_params_get_set_value {
@@ -810,6 +860,7 @@ enum ec_feature_code {
 
 #define EC_FEATURE_MASK_0(event_code) (1UL << (event_code % 32))
 #define EC_FEATURE_MASK_1(event_code) (1UL << (event_code - 32))
+
 struct ec_response_get_features {
 	uint32_t flags[2];
 } __packed;
@@ -820,24 +871,22 @@ struct ec_response_get_features {
 /* Get flash info */
 #define EC_CMD_FLASH_INFO 0x10
 
-/* Version 0 returns these fields */
+/**
+ * struct ec_response_flash_info - Response to the flash info command.
+ * @flash_size: Usable flash size in bytes.
+ * @write_block_size: Write block size. Write offset and size must be a
+ *                    multiple of this.
+ * @erase_block_size: Erase block size. Erase offset and size must be a
+ *                    multiple of this.
+ * @protect_block_size: Protection block size. Protection offset and size
+ *                      must be a multiple of this.
+ *
+ * Version 0 returns these fields.
+ */
 struct ec_response_flash_info {
-	/* Usable flash size, in bytes */
 	uint32_t flash_size;
-	/*
-	 * Write block size.  Write offset and size must be a multiple
-	 * of this.
-	 */
 	uint32_t write_block_size;
-	/*
-	 * Erase block size.  Erase offset and size must be a multiple
-	 * of this.
-	 */
 	uint32_t erase_block_size;
-	/*
-	 * Protection block size.  Protection offset and size must be a
-	 * multiple of this.
-	 */
 	uint32_t protect_block_size;
 } __packed;
 
@@ -845,7 +894,22 @@ struct ec_response_flash_info {
 /* EC flash erases bits to 0 instead of 1 */
 #define EC_FLASH_INFO_ERASE_TO_0 (1 << 0)
 
-/*
+/**
+ * struct ec_response_flash_info_1 - Response to the flash info v1 command.
+ * @flash_size: Usable flash size in bytes.
+ * @write_block_size: Write block size. Write offset and size must be a
+ *                    multiple of this.
+ * @erase_block_size: Erase block size. Erase offset and size must be a
+ *                    multiple of this.
+ * @protect_block_size: Protection block size. Protection offset and size
+ *                      must be a multiple of this.
+ * @write_ideal_size: Ideal write size in bytes.  Writes will be fastest if
+ *                    size is exactly this and offset is a multiple of this.
+ *                    For example, an EC may have a write buffer which can do
+ *                    half-page operations if data is aligned, and a slower
+ *                    word-at-a-time write mode.
+ * @flags: Flags; see EC_FLASH_INFO_*
+ *
  * Version 1 returns the same initial fields as version 0, with additional
  * fields following.
  *
@@ -860,15 +924,7 @@ struct ec_response_flash_info_1 {
 	uint32_t protect_block_size;
 
 	/* Version 1 adds these fields: */
-	/*
-	 * Ideal write size in bytes.  Writes will be fastest if size is
-	 * exactly this and offset is a multiple of this.  For example, an EC
-	 * may have a write buffer which can do half-page operations if data is
-	 * aligned, and a slower word-at-a-time write mode.
-	 */
 	uint32_t write_ideal_size;
-
-	/* Flags; see EC_FLASH_INFO_* */
 	uint32_t flags;
 } __packed;
 
@@ -879,9 +935,14 @@ struct ec_response_flash_info_1 {
  */
 #define EC_CMD_FLASH_READ 0x11
 
+/**
+ * struct ec_params_flash_read - Parameters for the flash read command.
+ * @offset: Byte offset to read.
+ * @size: Size to read in bytes.
+ */
 struct ec_params_flash_read {
-	uint32_t offset;   /* Byte offset to read */
-	uint32_t size;     /* Size to read in bytes */
+	uint32_t offset;
+	uint32_t size;
 } __packed;
 
 /* Write flash */
@@ -891,18 +952,28 @@ struct ec_params_flash_read {
 /* Version 0 of the flash command supported only 64 bytes of data */
 #define EC_FLASH_WRITE_VER0_SIZE 64
 
+/**
+ * struct ec_params_flash_write - Parameters for the flash write command.
+ * @offset: Byte offset to write.
+ * @size: Size to write in bytes.
+ */
 struct ec_params_flash_write {
-	uint32_t offset;   /* Byte offset to write */
-	uint32_t size;     /* Size to write in bytes */
+	uint32_t offset;
+	uint32_t size;
 	/* Followed by data to write */
 } __packed;
 
 /* Erase flash */
 #define EC_CMD_FLASH_ERASE 0x13
 
+/**
+ * struct ec_params_flash_erase - Parameters for the flash erase command.
+ * @offset: Byte offset to erase.
+ * @size: Size to erase in bytes.
+ */
 struct ec_params_flash_erase {
-	uint32_t offset;   /* Byte offset to erase */
-	uint32_t size;     /* Size to erase in bytes */
+	uint32_t offset;
+	uint32_t size;
 } __packed;
 
 /*
@@ -941,21 +1012,28 @@ struct ec_params_flash_erase {
 /* Entile flash code protected when the EC boots */
 #define EC_FLASH_PROTECT_ALL_AT_BOOT        (1 << 6)
 
+/**
+ * struct ec_params_flash_protect - Parameters for the flash protect command.
+ * @mask: Bits in flags to apply.
+ * @flags: New flags to apply.
+ */
 struct ec_params_flash_protect {
-	uint32_t mask;   /* Bits in flags to apply */
-	uint32_t flags;  /* New flags to apply */
+	uint32_t mask;
+	uint32_t flags;
 } __packed;
 
+/**
+ * struct ec_response_flash_protect - Response to the flash protect command.
+ * @flags: Current value of flash protect flags.
+ * @valid_flags: Flags which are valid on this platform. This allows the
+ *               caller to distinguish between flags which aren't set vs. flags
+ *               which can't be set on this platform.
+ * @writable_flags: Flags which can be changed given the current protection
+ *                  state.
+ */
 struct ec_response_flash_protect {
-	/* Current value of flash protect flags */
 	uint32_t flags;
-	/*
-	 * Flags which are valid on this platform.  This allows the caller
-	 * to distinguish between flags which aren't set vs. flags which can't
-	 * be set on this platform.
-	 */
 	uint32_t valid_flags;
-	/* Flags which can be changed given the current protection state */
 	uint32_t writable_flags;
 } __packed;
 
@@ -982,8 +1060,13 @@ enum ec_flash_region {
 	EC_FLASH_REGION_COUNT,
 };
 
+/**
+ * struct ec_params_flash_region_info - Parameters for the flash region info
+ *         command.
+ * @region: Flash region; see EC_FLASH_REGION_*
+ */
 struct ec_params_flash_region_info {
-	uint32_t region;  /* enum ec_flash_region */
+	uint32_t region;
 } __packed;
 
 struct ec_response_flash_region_info {
@@ -1094,7 +1177,9 @@ struct rgb_s {
 };
 
 #define LB_BATTERY_LEVELS 4
-/* List of tweakable parameters. NOTE: It's __packed so it can be sent in a
+
+/*
+ * List of tweakable parameters. NOTE: It's __packed so it can be sent in a
  * host command, but the alignment is the same regardless. Keep it that way.
  */
 struct lightbar_params_v0 {
-- 
cgit v1.2.3-71-gd317


From 1a63fe9a2b1f47af5b2b7436b41824b14999c17a Mon Sep 17 00:00:00 2001
From: Quentin Perret <quentin.perret@arm.com>
Date: Mon, 10 Sep 2018 17:28:10 +0100
Subject: firmware: arm_scmi: add a getter for power of performance states

The SCMI protocol can be used to get power estimates from firmware
corresponding to each performance state of a device. Although these power
costs are already managed by the SCMI firmware driver, they are not
exposed to any external subsystem yet.

Fix this by adding a new get_power() interface to the exisiting perf_ops
defined for the SCMI protocol.

Signed-off-by: Quentin Perret <quentin.perret@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scmi/perf.c | 28 ++++++++++++++++++++++++++++
 include/linux/scmi_protocol.h    |  4 ++++
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c
index 87c99d296ecd..3c8ae7cc35de 100644
--- a/drivers/firmware/arm_scmi/perf.c
+++ b/drivers/firmware/arm_scmi/perf.c
@@ -427,6 +427,33 @@ static int scmi_dvfs_freq_get(const struct scmi_handle *handle, u32 domain,
 	return ret;
 }
 
+static int scmi_dvfs_est_power_get(const struct scmi_handle *handle, u32 domain,
+				   unsigned long *freq, unsigned long *power)
+{
+	struct scmi_perf_info *pi = handle->perf_priv;
+	struct perf_dom_info *dom;
+	unsigned long opp_freq;
+	int idx, ret = -EINVAL;
+	struct scmi_opp *opp;
+
+	dom = pi->dom_info + domain;
+	if (!dom)
+		return -EIO;
+
+	for (opp = dom->opp, idx = 0; idx < dom->opp_count; idx++, opp++) {
+		opp_freq = opp->perf * dom->mult_factor;
+		if (opp_freq < *freq)
+			continue;
+
+		*freq = opp_freq;
+		*power = opp->power;
+		ret = 0;
+		break;
+	}
+
+	return ret;
+}
+
 static struct scmi_perf_ops perf_ops = {
 	.limits_set = scmi_perf_limits_set,
 	.limits_get = scmi_perf_limits_get,
@@ -437,6 +464,7 @@ static struct scmi_perf_ops perf_ops = {
 	.device_opps_add = scmi_dvfs_device_opps_add,
 	.freq_set = scmi_dvfs_freq_set,
 	.freq_get = scmi_dvfs_freq_get,
+	.est_power_get = scmi_dvfs_est_power_get,
 };
 
 static int scmi_perf_protocol_init(struct scmi_handle *handle)
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index f4c9fc0fc755..3105055c00a7 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -91,6 +91,8 @@ struct scmi_clk_ops {
  *	to sustained performance level mapping
  * @freq_get: gets the frequency for a given device using sustained frequency
  *	to sustained performance level mapping
+ * @est_power_get: gets the estimated power cost for a given performance domain
+ *	at a given frequency
  */
 struct scmi_perf_ops {
 	int (*limits_set)(const struct scmi_handle *handle, u32 domain,
@@ -110,6 +112,8 @@ struct scmi_perf_ops {
 			unsigned long rate, bool poll);
 	int (*freq_get)(const struct scmi_handle *handle, u32 domain,
 			unsigned long *rate, bool poll);
+	int (*est_power_get)(const struct scmi_handle *handle, u32 domain,
+			     unsigned long *rate, unsigned long *power);
 };
 
 /**
-- 
cgit v1.2.3-71-gd317


From 04cfcc7ab358e331b32cabde1e853a125f3f8735 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 22 Aug 2018 10:54:02 +0200
Subject: fbdev: Drop FBINFO_CAN_FORCE_OUTPUT flag

This was only added for the drm's fbdev emulation support, so that it
would try harder to show the Oops.

Unfortunately this never really worked reliably, and in practice ended
up pushing the real Oops off the screen due to plentyfull locking,
sleep-while-atomic and other issues. So we removed all that support
from the fbdev emulation a while back. Aside: We've also removed the
kgdb support, for similar reasons.

Since it's such a small patch I figured I don't split this up into the
usual 3-phase removal.

Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Alexander Kapshuk <alexander.kapshuk@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thierry Reding <treding@nvidia.com>
Cc: David Lechner <david@lechnology.com>
Cc: nouveau@lists.freedesktop.org
Cc: linux-fbdev@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180822085405.10787-1-daniel.vetter@ffwll.ch
---
 drivers/gpu/drm/nouveau/nouveau_fbcon.c | 1 -
 drivers/staging/vboxvideo/vbox_fb.c     | 3 +--
 drivers/video/fbdev/core/fbcon.c        | 1 -
 include/linux/fb.h                      | 4 ----
 4 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 844498c4267c..20a260887be3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -379,7 +379,6 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 		info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA |
 			      FBINFO_HWACCEL_FILLRECT |
 			      FBINFO_HWACCEL_IMAGEBLIT;
-	info->flags |= FBINFO_CAN_FORCE_OUTPUT;
 	info->fbops = &nouveau_fbcon_sw_ops;
 	info->fix.smem_start = fb->nvbo->bo.mem.bus.base +
 			       fb->nvbo->bo.mem.bus.offset;
diff --git a/drivers/staging/vboxvideo/vbox_fb.c b/drivers/staging/vboxvideo/vbox_fb.c
index 43c39eca4ae1..034f8ffa8f20 100644
--- a/drivers/staging/vboxvideo/vbox_fb.c
+++ b/drivers/staging/vboxvideo/vbox_fb.c
@@ -155,8 +155,7 @@ static int vboxfb_create(struct drm_fb_helper *helper,
 	 * The last flag forces a mode set on VT switches even if the kernel
 	 * does not think it is needed.
 	 */
-	info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT |
-		      FBINFO_MISC_ALWAYS_SETPAR;
+	info->flags = FBINFO_DEFAULT | FBINFO_MISC_ALWAYS_SETPAR;
 	info->fbops = &vboxfb_ops;
 
 	/*
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 75ebbbf0a1fb..9fd99681a7f2 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -1104,7 +1104,6 @@ static void fbcon_init(struct vc_data *vc, int init)
 	if (p->userfont)
 		charcnt = FNTCHARCNT(p->fontdata);
 
-	vc->vc_panic_force_write = !!(info->flags & FBINFO_CAN_FORCE_OUTPUT);
 	vc->vc_can_do_color = (fb_get_color_depth(&info->var, &info->fix)!=1);
 	vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
 	if (charcnt == 256) {
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 3cd375dafd0e..b1db5c5eb671 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -457,10 +457,6 @@ struct fb_tile_ops {
  */
 #define FBINFO_BE_MATH  0x100000
 
-/* report to the VT layer that this fb driver can accept forced console
-   output like oopses */
-#define FBINFO_CAN_FORCE_OUTPUT     0x200000
-
 struct fb_info {
 	atomic_t count;
 	int node;
-- 
cgit v1.2.3-71-gd317


From 8d7fc2994f4d1f431e280c9e21a139c18dc435ec Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 22 Aug 2018 10:54:03 +0200
Subject: vt: Remove vc_panic_force_write

It was only used by the panic support in fbcon, which is now gone.
Remove this now dead code too.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Joe Perches <joe@perches.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Meng Xu <mengxu.gatech@gmail.com>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Thomas Meyer <thomas@m3y3r.de>
Cc: Mike Frysinger <vapier@chromium.org>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Thierry Reding <treding@nvidia.com>
Cc: David Lechner <david@lechnology.com>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180822085405.10787-2-daniel.vetter@ffwll.ch
---
 drivers/tty/vt/vt.c              | 12 ++++--------
 drivers/video/fbdev/core/fbcon.c |  3 +--
 include/linux/console_struct.h   |  1 -
 include/linux/vt_kern.h          |  7 -------
 4 files changed, 5 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 5f1183b0b89d..55370e651db3 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1004,9 +1004,7 @@ void redraw_screen(struct vc_data *vc, int is_switch)
 			clear_buffer_attributes(vc);
 		}
 
-		/* Forcibly update if we're panicing */
-		if ((update && vc->vc_mode != KD_GRAPHICS) ||
-		    vt_force_oops_output(vc))
+		if (update && vc->vc_mode != KD_GRAPHICS)
 			do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2);
 	}
 	set_cursor(vc);
@@ -1046,7 +1044,6 @@ static void visual_init(struct vc_data *vc, int num, int init)
 	vc->vc_hi_font_mask = 0;
 	vc->vc_complement_mask = 0;
 	vc->vc_can_do_color = 0;
-	vc->vc_panic_force_write = false;
 	vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS;
 	vc->vc_sw->con_init(vc, init);
 	if (!vc->vc_complement_mask)
@@ -2911,7 +2908,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count)
 		goto quit;
 	}
 
-	if (vc->vc_mode != KD_TEXT && !vt_force_oops_output(vc))
+	if (vc->vc_mode != KD_TEXT)
 		goto quit;
 
 	/* undraw cursor first */
@@ -4229,8 +4226,7 @@ void do_unblank_screen(int leaving_gfx)
 		return;
 	}
 	vc = vc_cons[fg_console].d;
-	/* Try to unblank in oops case too */
-	if (vc->vc_mode != KD_TEXT && !vt_force_oops_output(vc))
+	if (vc->vc_mode != KD_TEXT)
 		return; /* but leave console_blanked != 0 */
 
 	if (blankinterval) {
@@ -4239,7 +4235,7 @@ void do_unblank_screen(int leaving_gfx)
 	}
 
 	console_blanked = 0;
-	if (vc->vc_sw->con_blank(vc, 0, leaving_gfx) || vt_force_oops_output(vc))
+	if (vc->vc_sw->con_blank(vc, 0, leaving_gfx))
 		/* Low-level driver cannot restore -> do it ourselves */
 		update_screen(vc);
 	if (console_blank_hook)
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 9fd99681a7f2..8958ccc8b1ac 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -284,8 +284,7 @@ static inline int fbcon_is_inactive(struct vc_data *vc, struct fb_info *info)
 	struct fbcon_ops *ops = info->fbcon_par;
 
 	return (info->state != FBINFO_STATE_RUNNING ||
-		vc->vc_mode != KD_TEXT || ops->graphics) &&
-		!vt_force_oops_output(vc);
+		vc->vc_mode != KD_TEXT || ops->graphics);
 }
 
 static int get_color(struct vc_data *vc, struct fb_info *info,
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index fea64f2692a0..ab137f97ecbd 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -141,7 +141,6 @@ struct vc_data {
 	struct uni_pagedir *vc_uni_pagedir;
 	struct uni_pagedir **vc_uni_pagedir_loc; /* [!] Location of uni_pagedir variable for this console */
 	struct uni_screen *vc_uni_screen;	/* unicode screen content */
-	bool vc_panic_force_write; /* when oops/panic this VC can accept forced output/blanking */
 	/* additional information is in vt_kern.h */
 };
 
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 3fd07912909c..8dc77e40bc03 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -135,13 +135,6 @@ extern int do_unbind_con_driver(const struct consw *csw, int first, int last,
 			     int deflt);
 int vty_init(const struct file_operations *console_fops);
 
-static inline bool vt_force_oops_output(struct vc_data *vc)
-{
-	if (oops_in_progress && vc->vc_panic_force_write  && panic_timeout >= 0)
-		return true;
-	return false;
-}
-
 extern char vt_dont_switch;
 extern int default_utf8;
 extern int global_cursor_default;
-- 
cgit v1.2.3-71-gd317


From da6c7707caf3736c1cf968606bd97c07e79625d4 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 22 Aug 2018 10:54:04 +0200
Subject: fbdev: Add FBINFO_HIDE_SMEM_START flag

DRM drivers really, really, really don't want random userspace to
share buffer behind it's back, bypassing the dma-buf buffer sharing
machanism. For that reason we've ruthlessly rejected any IOCTL
exposing the physical address of any graphics buffer.

Unfortunately fbdev comes with that built-in. We could just set
smem_start to 0, but that means we'd have to hand-roll our own fb_mmap
implementation. For good reasons many drivers do that, but
smem_start/length is still super convenient.

Hence instead just stop the leak in the ioctl, to keep fb mmap working
as-is. A second patch will set this flag for all drm drivers.

Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: linux-fbdev@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180822085405.10787-3-daniel.vetter@ffwll.ch
---
 drivers/video/fbdev/core/fbmem.c | 4 ++++
 include/linux/fb.h               | 7 +++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 0da75c55660d..861bf8081619 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1117,6 +1117,8 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
 		if (!lock_fb_info(info))
 			return -ENODEV;
 		fix = info->fix;
+		if (info->flags & FBINFO_HIDE_SMEM_START)
+			fix.smem_start = 0;
 		unlock_fb_info(info);
 
 		ret = copy_to_user(argp, &fix, sizeof(fix)) ? -EFAULT : 0;
@@ -1327,6 +1329,8 @@ static int fb_get_fscreeninfo(struct fb_info *info, unsigned int cmd,
 	if (!lock_fb_info(info))
 		return -ENODEV;
 	fix = info->fix;
+	if (info->flags & FBINFO_HIDE_SMEM_START)
+		fix.smem_start = 0;
 	unlock_fb_info(info);
 	return do_fscreeninfo_to_user(&fix, compat_ptr(arg));
 }
diff --git a/include/linux/fb.h b/include/linux/fb.h
index b1db5c5eb671..a3cab6dc9b44 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -456,6 +456,13 @@ struct fb_tile_ops {
  * and host endianness. Drivers should not use this flag.
  */
 #define FBINFO_BE_MATH  0x100000
+/*
+ * Hide smem_start in the FBIOGET_FSCREENINFO IOCTL. This is used by modern DRM
+ * drivers to stop userspace from trying to share buffers behind the kernel's
+ * back. Instead dma-buf based buffer sharing should be used.
+ */
+#define FBINFO_HIDE_SMEM_START  0x200000
+
 
 struct fb_info {
 	atomic_t count;
-- 
cgit v1.2.3-71-gd317


From cdc3d7f346476b4a4d1c4738b7dcdda4b3d5d870 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Sun, 5 Aug 2018 23:18:05 -0400
Subject: media: drm: shmobile: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/linux/platform_data/shmob_drm.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h
index ee495d707f17..fe815d7d9f58 100644
--- a/include/linux/platform_data/shmob_drm.h
+++ b/include/linux/platform_data/shmob_drm.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm.h  --  SH Mobile DRM driver
  *
  * Copyright (C) 2012 Renesas Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_H__
-- 
cgit v1.2.3-71-gd317


From 0789724f86a59fa7078d67dfeb1ee4a15ae3c693 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Thu, 26 Jul 2018 15:59:16 +0200
Subject: firmware: meson_sm: Add serial number sysfs entry

The Amlogic Meson SoC Secure Monitor implements a call to retrieve an unique
SoC ID starting from the GX Family and all new families.

The serial number is simply exposed as a sysfs entry under the firmware
sysfs directory.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 drivers/firmware/meson/meson_sm.c       | 56 +++++++++++++++++++++++++++++++++
 include/linux/firmware/meson/meson_sm.h |  1 +
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/meson/meson_sm.c b/drivers/firmware/meson/meson_sm.c
index 0ec2ca87318c..29fbc818a573 100644
--- a/drivers/firmware/meson/meson_sm.c
+++ b/drivers/firmware/meson/meson_sm.c
@@ -24,6 +24,7 @@
 #include <linux/printk.h>
 #include <linux/types.h>
 #include <linux/sizes.h>
+ #include <linux/slab.h>
 
 #include <linux/firmware/meson/meson_sm.h>
 
@@ -48,6 +49,7 @@ struct meson_sm_chip gxbb_chip = {
 		CMD(SM_EFUSE_READ,	0x82000030),
 		CMD(SM_EFUSE_WRITE,	0x82000031),
 		CMD(SM_EFUSE_USER_MAX,	0x82000033),
+		CMD(SM_GET_CHIP_ID,	0x82000044),
 		{ /* sentinel */ },
 	},
 };
@@ -214,6 +216,57 @@ int meson_sm_call_write(void *buffer, unsigned int size, unsigned int cmd_index,
 }
 EXPORT_SYMBOL(meson_sm_call_write);
 
+#define SM_CHIP_ID_LENGTH	119
+#define SM_CHIP_ID_OFFSET	4
+#define SM_CHIP_ID_SIZE		12
+
+static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	uint8_t *id_buf;
+	int ret;
+
+	id_buf = kmalloc(SM_CHIP_ID_LENGTH, GFP_KERNEL);
+	if (!id_buf)
+		return -ENOMEM;
+
+	ret = meson_sm_call_read(id_buf, SM_CHIP_ID_LENGTH, SM_GET_CHIP_ID,
+				 0, 0, 0, 0, 0);
+	if (ret < 0) {
+		kfree(id_buf);
+		return ret;
+	}
+
+	ret = sprintf(buf, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+			id_buf[SM_CHIP_ID_OFFSET + 0],
+			id_buf[SM_CHIP_ID_OFFSET + 1],
+			id_buf[SM_CHIP_ID_OFFSET + 2],
+			id_buf[SM_CHIP_ID_OFFSET + 3],
+			id_buf[SM_CHIP_ID_OFFSET + 4],
+			id_buf[SM_CHIP_ID_OFFSET + 5],
+			id_buf[SM_CHIP_ID_OFFSET + 6],
+			id_buf[SM_CHIP_ID_OFFSET + 7],
+			id_buf[SM_CHIP_ID_OFFSET + 8],
+			id_buf[SM_CHIP_ID_OFFSET + 9],
+			id_buf[SM_CHIP_ID_OFFSET + 10],
+			id_buf[SM_CHIP_ID_OFFSET + 11]);
+
+	kfree(id_buf);
+
+	return ret;
+}
+
+static DEVICE_ATTR_RO(serial);
+
+static struct attribute *meson_sm_sysfs_attributes[] = {
+	&dev_attr_serial.attr,
+	NULL,
+};
+
+static const struct attribute_group meson_sm_sysfs_attr_group = {
+	.attrs = meson_sm_sysfs_attributes,
+};
+
 static const struct of_device_id meson_sm_ids[] = {
 	{ .compatible = "amlogic,meson-gxbb-sm", .data = &gxbb_chip },
 	{ /* sentinel */ },
@@ -242,6 +295,9 @@ static int __init meson_sm_probe(struct platform_device *pdev)
 	fw.chip = chip;
 	pr_info("secure-monitor enabled\n");
 
+	if (sysfs_create_group(&pdev->dev.kobj, &meson_sm_sysfs_attr_group))
+		goto out_in_base;
+
 	return 0;
 
 out_in_base:
diff --git a/include/linux/firmware/meson/meson_sm.h b/include/linux/firmware/meson/meson_sm.h
index 37a5eaea69dd..f98c20dd266e 100644
--- a/include/linux/firmware/meson/meson_sm.h
+++ b/include/linux/firmware/meson/meson_sm.h
@@ -17,6 +17,7 @@ enum {
 	SM_EFUSE_READ,
 	SM_EFUSE_WRITE,
 	SM_EFUSE_USER_MAX,
+	SM_GET_CHIP_ID,
 };
 
 struct meson_sm_firmware;
-- 
cgit v1.2.3-71-gd317


From d4983983d98710e4927fdb8de8e987c303b3fba3 Mon Sep 17 00:00:00 2001
From: Maxime Jourdan <mjourdan@baylibre.com>
Date: Thu, 23 Aug 2018 13:49:53 +0200
Subject: soc: amlogic: add meson-canvas driver

Amlogic SoCs have a repository of 256 canvas which they use to
describe pixel buffers.

They contain metadata like width, height, block mode, endianness [..]

Many IPs within those SoCs like vdec/vpu rely on those canvas to read/write
pixels.

Reviewed-by: Jerome Brunet <jbrunet@baylibre.com>
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Maxime Jourdan <mjourdan@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 drivers/soc/amlogic/Kconfig              |   7 ++
 drivers/soc/amlogic/Makefile             |   1 +
 drivers/soc/amlogic/meson-canvas.c       | 185 +++++++++++++++++++++++++++++++
 include/linux/soc/amlogic/meson-canvas.h |  65 +++++++++++
 4 files changed, 258 insertions(+)
 create mode 100644 drivers/soc/amlogic/meson-canvas.c
 create mode 100644 include/linux/soc/amlogic/meson-canvas.h

(limited to 'include/linux')

diff --git a/drivers/soc/amlogic/Kconfig b/drivers/soc/amlogic/Kconfig
index b04f6e4aedbc..2f282b472912 100644
--- a/drivers/soc/amlogic/Kconfig
+++ b/drivers/soc/amlogic/Kconfig
@@ -1,5 +1,12 @@
 menu "Amlogic SoC drivers"
 
+config MESON_CANVAS
+	tristate "Amlogic Meson Canvas driver"
+	depends on ARCH_MESON || COMPILE_TEST
+	default n
+	help
+	  Say yes to support the canvas IP for Amlogic SoCs.
+
 config MESON_GX_SOCINFO
 	bool "Amlogic Meson GX SoC Information driver"
 	depends on ARCH_MESON || COMPILE_TEST
diff --git a/drivers/soc/amlogic/Makefile b/drivers/soc/amlogic/Makefile
index 8fa321893928..0ab16d35ac36 100644
--- a/drivers/soc/amlogic/Makefile
+++ b/drivers/soc/amlogic/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_MESON_CANVAS) += meson-canvas.o
 obj-$(CONFIG_MESON_GX_SOCINFO) += meson-gx-socinfo.o
 obj-$(CONFIG_MESON_GX_PM_DOMAINS) += meson-gx-pwrc-vpu.o
 obj-$(CONFIG_MESON_MX_SOCINFO) += meson-mx-socinfo.o
diff --git a/drivers/soc/amlogic/meson-canvas.c b/drivers/soc/amlogic/meson-canvas.c
new file mode 100644
index 000000000000..fce33ca76bb6
--- /dev/null
+++ b/drivers/soc/amlogic/meson-canvas.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018 BayLibre, SAS
+ * Copyright (C) 2015 Amlogic, Inc. All rights reserved.
+ * Copyright (C) 2014 Endless Mobile
+ */
+
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/soc/amlogic/meson-canvas.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/io.h>
+
+#define NUM_CANVAS 256
+
+/* DMC Registers */
+#define DMC_CAV_LUT_DATAL	0x00
+	#define CANVAS_WIDTH_LBIT	29
+	#define CANVAS_WIDTH_LWID	3
+#define DMC_CAV_LUT_DATAH	0x04
+	#define CANVAS_WIDTH_HBIT	0
+	#define CANVAS_HEIGHT_BIT	9
+	#define CANVAS_WRAP_BIT		22
+	#define CANVAS_BLKMODE_BIT	24
+	#define CANVAS_ENDIAN_BIT	26
+#define DMC_CAV_LUT_ADDR	0x08
+	#define CANVAS_LUT_WR_EN	BIT(9)
+	#define CANVAS_LUT_RD_EN	BIT(8)
+
+struct meson_canvas {
+	struct device *dev;
+	void __iomem *reg_base;
+	spinlock_t lock; /* canvas device lock */
+	u8 used[NUM_CANVAS];
+};
+
+static void canvas_write(struct meson_canvas *canvas, u32 reg, u32 val)
+{
+	writel_relaxed(val, canvas->reg_base + reg);
+}
+
+static u32 canvas_read(struct meson_canvas *canvas, u32 reg)
+{
+	return readl_relaxed(canvas->reg_base + reg);
+}
+
+struct meson_canvas *meson_canvas_get(struct device *dev)
+{
+	struct device_node *canvas_node;
+	struct platform_device *canvas_pdev;
+
+	canvas_node = of_parse_phandle(dev->of_node, "amlogic,canvas", 0);
+	if (!canvas_node)
+		return ERR_PTR(-ENODEV);
+
+	canvas_pdev = of_find_device_by_node(canvas_node);
+	if (!canvas_pdev)
+		return ERR_PTR(-EPROBE_DEFER);
+
+	return dev_get_drvdata(&canvas_pdev->dev);
+}
+EXPORT_SYMBOL_GPL(meson_canvas_get);
+
+int meson_canvas_config(struct meson_canvas *canvas, u8 canvas_index,
+			u32 addr, u32 stride, u32 height,
+			unsigned int wrap,
+			unsigned int blkmode,
+			unsigned int endian)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&canvas->lock, flags);
+	if (!canvas->used[canvas_index]) {
+		dev_err(canvas->dev,
+			"Trying to setup non allocated canvas %u\n",
+			canvas_index);
+		spin_unlock_irqrestore(&canvas->lock, flags);
+		return -EINVAL;
+	}
+
+	canvas_write(canvas, DMC_CAV_LUT_DATAL,
+		     ((addr + 7) >> 3) |
+		     (((stride + 7) >> 3) << CANVAS_WIDTH_LBIT));
+
+	canvas_write(canvas, DMC_CAV_LUT_DATAH,
+		     ((((stride + 7) >> 3) >> CANVAS_WIDTH_LWID) <<
+						CANVAS_WIDTH_HBIT) |
+		     (height << CANVAS_HEIGHT_BIT) |
+		     (wrap << CANVAS_WRAP_BIT) |
+		     (blkmode << CANVAS_BLKMODE_BIT) |
+		     (endian << CANVAS_ENDIAN_BIT));
+
+	canvas_write(canvas, DMC_CAV_LUT_ADDR,
+		     CANVAS_LUT_WR_EN | canvas_index);
+
+	/* Force a read-back to make sure everything is flushed. */
+	canvas_read(canvas, DMC_CAV_LUT_DATAH);
+	spin_unlock_irqrestore(&canvas->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(meson_canvas_config);
+
+int meson_canvas_alloc(struct meson_canvas *canvas, u8 *canvas_index)
+{
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&canvas->lock, flags);
+	for (i = 0; i < NUM_CANVAS; ++i) {
+		if (!canvas->used[i]) {
+			canvas->used[i] = 1;
+			spin_unlock_irqrestore(&canvas->lock, flags);
+			*canvas_index = i;
+			return 0;
+		}
+	}
+	spin_unlock_irqrestore(&canvas->lock, flags);
+
+	dev_err(canvas->dev, "No more canvas available\n");
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(meson_canvas_alloc);
+
+int meson_canvas_free(struct meson_canvas *canvas, u8 canvas_index)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&canvas->lock, flags);
+	if (!canvas->used[canvas_index]) {
+		dev_err(canvas->dev,
+			"Trying to free unused canvas %u\n", canvas_index);
+		spin_unlock_irqrestore(&canvas->lock, flags);
+		return -EINVAL;
+	}
+	canvas->used[canvas_index] = 0;
+	spin_unlock_irqrestore(&canvas->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(meson_canvas_free);
+
+static int meson_canvas_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct meson_canvas *canvas;
+	struct device *dev = &pdev->dev;
+
+	canvas = devm_kzalloc(dev, sizeof(*canvas), GFP_KERNEL);
+	if (!canvas)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	canvas->reg_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(canvas->reg_base))
+		return PTR_ERR(canvas->reg_base);
+
+	canvas->dev = dev;
+	spin_lock_init(&canvas->lock);
+	dev_set_drvdata(dev, canvas);
+
+	return 0;
+}
+
+static const struct of_device_id canvas_dt_match[] = {
+	{ .compatible = "amlogic,canvas" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, canvas_dt_match);
+
+static struct platform_driver meson_canvas_driver = {
+	.probe = meson_canvas_probe,
+	.driver = {
+		.name = "amlogic-canvas",
+		.of_match_table = canvas_dt_match,
+	},
+};
+module_platform_driver(meson_canvas_driver);
+
+MODULE_DESCRIPTION("Amlogic Canvas driver");
+MODULE_AUTHOR("Maxime Jourdan <mjourdan@baylibre.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/soc/amlogic/meson-canvas.h b/include/linux/soc/amlogic/meson-canvas.h
new file mode 100644
index 000000000000..b4dde2fbeb3f
--- /dev/null
+++ b/include/linux/soc/amlogic/meson-canvas.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2018 BayLibre, SAS
+ */
+#ifndef __SOC_MESON_CANVAS_H
+#define __SOC_MESON_CANVAS_H
+
+#include <linux/kernel.h>
+
+#define MESON_CANVAS_WRAP_NONE	0x00
+#define MESON_CANVAS_WRAP_X	0x01
+#define MESON_CANVAS_WRAP_Y	0x02
+
+#define MESON_CANVAS_BLKMODE_LINEAR	0x00
+#define MESON_CANVAS_BLKMODE_32x32	0x01
+#define MESON_CANVAS_BLKMODE_64x64	0x02
+
+#define MESON_CANVAS_ENDIAN_SWAP16	0x1
+#define MESON_CANVAS_ENDIAN_SWAP32	0x3
+#define MESON_CANVAS_ENDIAN_SWAP64	0x7
+#define MESON_CANVAS_ENDIAN_SWAP128	0xf
+
+struct meson_canvas;
+
+/**
+ * meson_canvas_get() - get a canvas provider instance
+ *
+ * @dev: consumer device pointer
+ */
+struct meson_canvas *meson_canvas_get(struct device *dev);
+
+/**
+ * meson_canvas_alloc() - take ownership of a canvas
+ *
+ * @canvas: canvas provider instance retrieved from meson_canvas_get()
+ * @canvas_index: will be filled with the canvas ID
+ */
+int meson_canvas_alloc(struct meson_canvas *canvas, u8 *canvas_index);
+
+/**
+ * meson_canvas_free() - remove ownership from a canvas
+ *
+ * @canvas: canvas provider instance retrieved from meson_canvas_get()
+ * @canvas_index: canvas ID that was obtained via meson_canvas_alloc()
+ */
+int meson_canvas_free(struct meson_canvas *canvas, u8 canvas_index);
+
+/**
+ * meson_canvas_config() - configure a canvas
+ *
+ * @canvas: canvas provider instance retrieved from meson_canvas_get()
+ * @canvas_index: canvas ID that was obtained via meson_canvas_alloc()
+ * @addr: physical address to the pixel buffer
+ * @stride: width of the buffer
+ * @height: height of the buffer
+ * @wrap: undocumented
+ * @blkmode: block mode (linear, 32x32, 64x64)
+ * @endian: byte swapping (swap16, swap32, swap64, swap128)
+ */
+int meson_canvas_config(struct meson_canvas *canvas, u8 canvas_index,
+			u32 addr, u32 stride, u32 height,
+			unsigned int wrap, unsigned int blkmode,
+			unsigned int endian);
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 1e479c619b2ac983fdea1a8212c7b822b5098da0 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 12 Sep 2018 22:22:45 +0200
Subject: rtc: unexport non devm managed registration

Ensure the non managed version of the un/registration functions is not used
anymore. No driver is using it anymore and they should not be necessary.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/class.c | 12 +++++-------
 include/linux/rtc.h |  5 -----
 2 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 0fca4d74c76b..3b43787f154b 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -286,9 +286,10 @@ static void rtc_device_get_offset(struct rtc_device *rtc)
  *
  * Returns the pointer to the new struct class device.
  */
-struct rtc_device *rtc_device_register(const char *name, struct device *dev,
-					const struct rtc_class_ops *ops,
-					struct module *owner)
+static struct rtc_device *rtc_device_register(const char *name,
+					      struct device *dev,
+					      const struct rtc_class_ops *ops,
+					      struct module *owner)
 {
 	struct rtc_device *rtc;
 	struct rtc_wkalrm alrm;
@@ -351,15 +352,13 @@ exit:
 			name, err);
 	return ERR_PTR(err);
 }
-EXPORT_SYMBOL_GPL(rtc_device_register);
-
 
 /**
  * rtc_device_unregister - removes the previously registered RTC class device
  *
  * @rtc: the RTC class device to destroy
  */
-void rtc_device_unregister(struct rtc_device *rtc)
+static void rtc_device_unregister(struct rtc_device *rtc)
 {
 	mutex_lock(&rtc->ops_lock);
 	/*
@@ -372,7 +371,6 @@ void rtc_device_unregister(struct rtc_device *rtc)
 	mutex_unlock(&rtc->ops_lock);
 	put_device(&rtc->dev);
 }
-EXPORT_SYMBOL_GPL(rtc_device_unregister);
 
 static void devm_rtc_device_release(struct device *dev, void *res)
 {
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 6aedc30003e7..faf00a1472d4 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -167,17 +167,12 @@ struct rtc_device {
 #define RTC_TIMESTAMP_BEGIN_2000	946684800LL /* 2000-01-01 00:00:00 */
 #define RTC_TIMESTAMP_END_2099		4102444799LL /* 2099-12-31 23:59:59 */
 
-extern struct rtc_device *rtc_device_register(const char *name,
-					struct device *dev,
-					const struct rtc_class_ops *ops,
-					struct module *owner);
 extern struct rtc_device *devm_rtc_device_register(struct device *dev,
 					const char *name,
 					const struct rtc_class_ops *ops,
 					struct module *owner);
 struct rtc_device *devm_rtc_allocate_device(struct device *dev);
 int __rtc_register_device(struct module *owner, struct rtc_device *rtc);
-extern void rtc_device_unregister(struct rtc_device *rtc);
 extern void devm_rtc_device_unregister(struct device *dev,
 					struct rtc_device *rtc);
 
-- 
cgit v1.2.3-71-gd317


From 7f9c136216c745099f36a4e0c3b2e63eedeb442f Mon Sep 17 00:00:00 2001
From: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
Date: Wed, 12 Sep 2018 11:06:32 -0700
Subject: soc: qcom: Add broadcast base for Last Level Cache Controller (LLCC)

Currently, broadcast base is set to end of the LLCC banks, which may
not be correct always. As the number of banks may vary for each chipset
and the broadcast base could be at a different address as well. This info
depends on the chipset, so get the broadcast base info from the device
tree (DT). Add broadcast base in LLCC driver and use this for broadcast
writes.

Signed-off-by: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
Reviewed-by: Evan Green <evgreen@chromium.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 drivers/soc/qcom/llcc-slice.c      | 55 +++++++++++++++++++++++---------------
 include/linux/soc/qcom/llcc-qcom.h |  4 +--
 2 files changed, 35 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/llcc-slice.c b/drivers/soc/qcom/llcc-slice.c
index 54063a31132f..08e3d388e153 100644
--- a/drivers/soc/qcom/llcc-slice.c
+++ b/drivers/soc/qcom/llcc-slice.c
@@ -106,22 +106,24 @@ static int llcc_update_act_ctrl(u32 sid,
 	u32 slice_status;
 	int ret;
 
-	act_ctrl_reg = drv_data->bcast_off + LLCC_TRP_ACT_CTRLn(sid);
-	status_reg = drv_data->bcast_off + LLCC_TRP_STATUSn(sid);
+	act_ctrl_reg = LLCC_TRP_ACT_CTRLn(sid);
+	status_reg = LLCC_TRP_STATUSn(sid);
 
 	/* Set the ACTIVE trigger */
 	act_ctrl_reg_val |= ACT_CTRL_ACT_TRIG;
-	ret = regmap_write(drv_data->regmap, act_ctrl_reg, act_ctrl_reg_val);
+	ret = regmap_write(drv_data->bcast_regmap, act_ctrl_reg,
+				act_ctrl_reg_val);
 	if (ret)
 		return ret;
 
 	/* Clear the ACTIVE trigger */
 	act_ctrl_reg_val &= ~ACT_CTRL_ACT_TRIG;
-	ret = regmap_write(drv_data->regmap, act_ctrl_reg, act_ctrl_reg_val);
+	ret = regmap_write(drv_data->bcast_regmap, act_ctrl_reg,
+				act_ctrl_reg_val);
 	if (ret)
 		return ret;
 
-	ret = regmap_read_poll_timeout(drv_data->regmap, status_reg,
+	ret = regmap_read_poll_timeout(drv_data->bcast_regmap, status_reg,
 				      slice_status, !(slice_status & status),
 				      0, LLCC_STATUS_READ_DELAY);
 	return ret;
@@ -226,16 +228,13 @@ static int qcom_llcc_cfg_program(struct platform_device *pdev)
 	int ret;
 	const struct llcc_slice_config *llcc_table;
 	struct llcc_slice_desc desc;
-	u32 bcast_off = drv_data->bcast_off;
 
 	sz = drv_data->cfg_size;
 	llcc_table = drv_data->cfg;
 
 	for (i = 0; i < sz; i++) {
-		attr1_cfg = bcast_off +
-				LLCC_TRP_ATTR1_CFGn(llcc_table[i].slice_id);
-		attr0_cfg = bcast_off +
-				LLCC_TRP_ATTR0_CFGn(llcc_table[i].slice_id);
+		attr1_cfg = LLCC_TRP_ATTR1_CFGn(llcc_table[i].slice_id);
+		attr0_cfg = LLCC_TRP_ATTR0_CFGn(llcc_table[i].slice_id);
 
 		attr1_val = llcc_table[i].cache_mode;
 		attr1_val |= llcc_table[i].probe_target_ways <<
@@ -260,10 +259,12 @@ static int qcom_llcc_cfg_program(struct platform_device *pdev)
 		attr0_val = llcc_table[i].res_ways & ATTR0_RES_WAYS_MASK;
 		attr0_val |= llcc_table[i].bonus_ways << ATTR0_BONUS_WAYS_SHIFT;
 
-		ret = regmap_write(drv_data->regmap, attr1_cfg, attr1_val);
+		ret = regmap_write(drv_data->bcast_regmap, attr1_cfg,
+					attr1_val);
 		if (ret)
 			return ret;
-		ret = regmap_write(drv_data->regmap, attr0_cfg, attr0_val);
+		ret = regmap_write(drv_data->bcast_regmap, attr0_cfg,
+					attr0_val);
 		if (ret)
 			return ret;
 		if (llcc_table[i].activate_on_init) {
@@ -279,24 +280,36 @@ int qcom_llcc_probe(struct platform_device *pdev,
 {
 	u32 num_banks;
 	struct device *dev = &pdev->dev;
-	struct resource *res;
-	void __iomem *base;
+	struct resource *llcc_banks_res, *llcc_bcast_res;
+	void __iomem *llcc_banks_base, *llcc_bcast_base;
 	int ret, i;
 
 	drv_data = devm_kzalloc(dev, sizeof(*drv_data), GFP_KERNEL);
 	if (!drv_data)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
+	llcc_banks_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+							"llcc_base");
+	llcc_banks_base = devm_ioremap_resource(&pdev->dev, llcc_banks_res);
+	if (IS_ERR(llcc_banks_base))
+		return PTR_ERR(llcc_banks_base);
 
-	drv_data->regmap = devm_regmap_init_mmio(dev, base,
-					&llcc_regmap_config);
+	drv_data->regmap = devm_regmap_init_mmio(dev, llcc_banks_base,
+						&llcc_regmap_config);
 	if (IS_ERR(drv_data->regmap))
 		return PTR_ERR(drv_data->regmap);
 
+	llcc_bcast_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+							"llcc_broadcast_base");
+	llcc_bcast_base = devm_ioremap_resource(&pdev->dev, llcc_bcast_res);
+	if (IS_ERR(llcc_bcast_base))
+		return PTR_ERR(llcc_bcast_base);
+
+	drv_data->bcast_regmap = devm_regmap_init_mmio(dev, llcc_bcast_base,
+							&llcc_regmap_config);
+	if (IS_ERR(drv_data->bcast_regmap))
+		return PTR_ERR(drv_data->bcast_regmap);
+
 	ret = regmap_read(drv_data->regmap, LLCC_COMMON_STATUS0,
 						&num_banks);
 	if (ret)
@@ -318,8 +331,6 @@ int qcom_llcc_probe(struct platform_device *pdev,
 	for (i = 0; i < num_banks; i++)
 		drv_data->offsets[i] = i * BANK_OFFSET_STRIDE;
 
-	drv_data->bcast_off = num_banks * BANK_OFFSET_STRIDE;
-
 	drv_data->bitmap = devm_kcalloc(dev,
 	BITS_TO_LONGS(drv_data->max_slices), sizeof(unsigned long),
 						GFP_KERNEL);
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 7e3b9c605ab2..c681e795b587 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -70,22 +70,22 @@ struct llcc_slice_config {
 /**
  * llcc_drv_data - Data associated with the llcc driver
  * @regmap: regmap associated with the llcc device
+ * @bcast_regmap: regmap associated with llcc broadcast offset
  * @cfg: pointer to the data structure for slice configuration
  * @lock: mutex associated with each slice
  * @cfg_size: size of the config data table
  * @max_slices: max slices as read from device tree
- * @bcast_off: Offset of the broadcast bank
  * @num_banks: Number of llcc banks
  * @bitmap: Bit map to track the active slice ids
  * @offsets: Pointer to the bank offsets array
  */
 struct llcc_drv_data {
 	struct regmap *regmap;
+	struct regmap *bcast_regmap;
 	const struct llcc_slice_config *cfg;
 	struct mutex lock;
 	u32 cfg_size;
 	u32 max_slices;
-	u32 bcast_off;
 	u32 num_banks;
 	unsigned long *bitmap;
 	u32 *offsets;
-- 
cgit v1.2.3-71-gd317


From c081f3060fab316fcf103967a24e502d58488849 Mon Sep 17 00:00:00 2001
From: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
Date: Wed, 12 Sep 2018 11:06:33 -0700
Subject: soc: qcom: Add support to register LLCC EDAC driver

Cache error reporting controller detects and reports single and
double bit errors on Last Level Cache Controller (LLCC) cache.
Add required support to register LLCC EDAC driver as platform driver,
from LLCC driver.

Signed-off-by: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
Reviewed-by: Evan Green <evgreen@chromium.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 drivers/soc/qcom/llcc-slice.c      | 18 ++++++++++++++++--
 include/linux/soc/qcom/llcc-qcom.h |  2 ++
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/llcc-slice.c b/drivers/soc/qcom/llcc-slice.c
index 08e3d388e153..d78926742510 100644
--- a/drivers/soc/qcom/llcc-slice.c
+++ b/drivers/soc/qcom/llcc-slice.c
@@ -225,7 +225,7 @@ static int qcom_llcc_cfg_program(struct platform_device *pdev)
 	u32 attr0_val;
 	u32 max_cap_cacheline;
 	u32 sz;
-	int ret;
+	int ret = 0;
 	const struct llcc_slice_config *llcc_table;
 	struct llcc_slice_desc desc;
 
@@ -283,6 +283,7 @@ int qcom_llcc_probe(struct platform_device *pdev,
 	struct resource *llcc_banks_res, *llcc_bcast_res;
 	void __iomem *llcc_banks_base, *llcc_bcast_base;
 	int ret, i;
+	struct platform_device *llcc_edac;
 
 	drv_data = devm_kzalloc(dev, sizeof(*drv_data), GFP_KERNEL);
 	if (!drv_data)
@@ -342,7 +343,20 @@ int qcom_llcc_probe(struct platform_device *pdev,
 	mutex_init(&drv_data->lock);
 	platform_set_drvdata(pdev, drv_data);
 
-	return qcom_llcc_cfg_program(pdev);
+	ret = qcom_llcc_cfg_program(pdev);
+	if (ret)
+		return ret;
+
+	drv_data->ecc_irq = platform_get_irq(pdev, 0);
+	if (drv_data->ecc_irq >= 0) {
+		llcc_edac = platform_device_register_data(&pdev->dev,
+						"qcom_llcc_edac", -1, drv_data,
+						sizeof(*drv_data));
+		if (IS_ERR(llcc_edac))
+			dev_err(dev, "Failed to register llcc edac driver\n");
+	}
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(qcom_llcc_probe);
 
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index c681e795b587..2e4b34d2617e 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -78,6 +78,7 @@ struct llcc_slice_config {
  * @num_banks: Number of llcc banks
  * @bitmap: Bit map to track the active slice ids
  * @offsets: Pointer to the bank offsets array
+ * @ecc_irq: interrupt for llcc cache error detection and reporting
  */
 struct llcc_drv_data {
 	struct regmap *regmap;
@@ -89,6 +90,7 @@ struct llcc_drv_data {
 	u32 num_banks;
 	unsigned long *bitmap;
 	u32 *offsets;
+	int ecc_irq;
 };
 
 #if IS_ENABLED(CONFIG_QCOM_LLCC)
-- 
cgit v1.2.3-71-gd317


From 27450653f1db0b9d5b5048a246c850c52ee4aa61 Mon Sep 17 00:00:00 2001
From: Channagoud Kadabi <ckadabi@codeaurora.org>
Date: Wed, 12 Sep 2018 11:06:34 -0700
Subject: drivers: edac: Add EDAC driver support for QCOM SoCs

Add error reporting driver for Single Bit Errors (SBEs) and Double Bit
Errors (DBEs). As of now, this driver supports error reporting for
Last Level Cache Controller (LLCC) of Tag RAM and Data RAM. Interrupts
are triggered when the errors happen in the cache, the driver handles
those interrupts and dumps the syndrome registers.

Signed-off-by: Channagoud Kadabi <ckadabi@codeaurora.org>
Signed-off-by: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
Co-developed-by: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 MAINTAINERS                        |   8 +
 drivers/edac/Kconfig               |  14 ++
 drivers/edac/Makefile              |   1 +
 drivers/edac/qcom_edac.c           | 414 +++++++++++++++++++++++++++++++++++++
 include/linux/soc/qcom/llcc-qcom.h |  24 +++
 5 files changed, 461 insertions(+)
 create mode 100644 drivers/edac/qcom_edac.c

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index a5b256b25905..f7d7213ca293 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5346,6 +5346,14 @@ L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/ti_edac.c
 
+EDAC-QCOM
+M:	Channagoud Kadabi <ckadabi@codeaurora.org>
+M:	Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
+L:	linux-arm-msm@vger.kernel.org
+L:	linux-edac@vger.kernel.org
+S:	Maintained
+F:	drivers/edac/qcom_edac.c
+
 EDIROL UA-101/UA-1000 DRIVER
 M:	Clemens Ladisch <clemens@ladisch.de>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 57304b2e989f..df9467eef32a 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -460,4 +460,18 @@ config EDAC_TI
 	  Support for error detection and correction on the
           TI SoCs.
 
+config EDAC_QCOM
+	tristate "QCOM EDAC Controller"
+	depends on ARCH_QCOM && QCOM_LLCC
+	help
+	  Support for error detection and correction on the
+	  Qualcomm Technologies, Inc. SoCs.
+
+	  This driver reports Single Bit Errors (SBEs) and Double Bit Errors (DBEs).
+	  As of now, it supports error reporting for Last Level Cache Controller (LLCC)
+	  of Tag RAM and Data RAM.
+
+	  For debugging issues having to do with stability and overall system
+	  health, you should probably say 'Y' here.
+
 endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 02b43a7d8c3e..716096d08ea0 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -77,3 +77,4 @@ obj-$(CONFIG_EDAC_ALTERA)		+= altera_edac.o
 obj-$(CONFIG_EDAC_SYNOPSYS)		+= synopsys_edac.o
 obj-$(CONFIG_EDAC_XGENE)		+= xgene_edac.o
 obj-$(CONFIG_EDAC_TI)			+= ti_edac.o
+obj-$(CONFIG_EDAC_QCOM)			+= qcom_edac.o
diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c
new file mode 100644
index 000000000000..82bd775124f2
--- /dev/null
+++ b/drivers/edac/qcom_edac.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/edac.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/soc/qcom/llcc-qcom.h>
+
+#include "edac_mc.h"
+#include "edac_device.h"
+
+#define EDAC_LLCC                       "qcom_llcc"
+
+#define LLCC_ERP_PANIC_ON_UE            1
+
+#define TRP_SYN_REG_CNT                 6
+#define DRP_SYN_REG_CNT                 8
+
+#define LLCC_COMMON_STATUS0             0x0003000c
+#define LLCC_LB_CNT_MASK                GENMASK(31, 28)
+#define LLCC_LB_CNT_SHIFT               28
+
+/* Single & double bit syndrome register offsets */
+#define TRP_ECC_SB_ERR_SYN0             0x0002304c
+#define TRP_ECC_DB_ERR_SYN0             0x00020370
+#define DRP_ECC_SB_ERR_SYN0             0x0004204c
+#define DRP_ECC_DB_ERR_SYN0             0x00042070
+
+/* Error register offsets */
+#define TRP_ECC_ERROR_STATUS1           0x00020348
+#define TRP_ECC_ERROR_STATUS0           0x00020344
+#define DRP_ECC_ERROR_STATUS1           0x00042048
+#define DRP_ECC_ERROR_STATUS0           0x00042044
+
+/* TRP, DRP interrupt register offsets */
+#define DRP_INTERRUPT_STATUS            0x00041000
+#define TRP_INTERRUPT_0_STATUS          0x00020480
+#define DRP_INTERRUPT_CLEAR             0x00041008
+#define DRP_ECC_ERROR_CNTR_CLEAR        0x00040004
+#define TRP_INTERRUPT_0_CLEAR           0x00020484
+#define TRP_ECC_ERROR_CNTR_CLEAR        0x00020440
+
+/* Mask and shift macros */
+#define ECC_DB_ERR_COUNT_MASK           GENMASK(4, 0)
+#define ECC_DB_ERR_WAYS_MASK            GENMASK(31, 16)
+#define ECC_DB_ERR_WAYS_SHIFT           BIT(4)
+
+#define ECC_SB_ERR_COUNT_MASK           GENMASK(23, 16)
+#define ECC_SB_ERR_COUNT_SHIFT          BIT(4)
+#define ECC_SB_ERR_WAYS_MASK            GENMASK(15, 0)
+
+#define SB_ECC_ERROR                    BIT(0)
+#define DB_ECC_ERROR                    BIT(1)
+
+#define DRP_TRP_INT_CLEAR               GENMASK(1, 0)
+#define DRP_TRP_CNT_CLEAR               GENMASK(1, 0)
+
+/* Config registers offsets*/
+#define DRP_ECC_ERROR_CFG               0x00040000
+
+/* Tag RAM, Data RAM interrupt register offsets */
+#define CMN_INTERRUPT_0_ENABLE          0x0003001c
+#define CMN_INTERRUPT_2_ENABLE          0x0003003c
+#define TRP_INTERRUPT_0_ENABLE          0x00020488
+#define DRP_INTERRUPT_ENABLE            0x0004100c
+
+#define SB_ERROR_THRESHOLD              0x1
+#define SB_ERROR_THRESHOLD_SHIFT        24
+#define SB_DB_TRP_INTERRUPT_ENABLE      0x3
+#define TRP0_INTERRUPT_ENABLE           0x1
+#define DRP0_INTERRUPT_ENABLE           BIT(6)
+#define SB_DB_DRP_INTERRUPT_ENABLE      0x3
+
+enum {
+	LLCC_DRAM_CE = 0,
+	LLCC_DRAM_UE,
+	LLCC_TRAM_CE,
+	LLCC_TRAM_UE,
+};
+
+static const struct llcc_edac_reg_data edac_reg_data[] = {
+	[LLCC_DRAM_CE] = {
+		.name = "DRAM Single-bit",
+		.synd_reg = DRP_ECC_SB_ERR_SYN0,
+		.count_status_reg = DRP_ECC_ERROR_STATUS1,
+		.ways_status_reg = DRP_ECC_ERROR_STATUS0,
+		.reg_cnt = DRP_SYN_REG_CNT,
+		.count_mask = ECC_SB_ERR_COUNT_MASK,
+		.ways_mask = ECC_SB_ERR_WAYS_MASK,
+		.count_shift = ECC_SB_ERR_COUNT_SHIFT,
+	},
+	[LLCC_DRAM_UE] = {
+		.name = "DRAM Double-bit",
+		.synd_reg = DRP_ECC_DB_ERR_SYN0,
+		.count_status_reg = DRP_ECC_ERROR_STATUS1,
+		.ways_status_reg = DRP_ECC_ERROR_STATUS0,
+		.reg_cnt = DRP_SYN_REG_CNT,
+		.count_mask = ECC_DB_ERR_COUNT_MASK,
+		.ways_mask = ECC_DB_ERR_WAYS_MASK,
+		.ways_shift = ECC_DB_ERR_WAYS_SHIFT,
+	},
+	[LLCC_TRAM_CE] = {
+		.name = "TRAM Single-bit",
+		.synd_reg = TRP_ECC_SB_ERR_SYN0,
+		.count_status_reg = TRP_ECC_ERROR_STATUS1,
+		.ways_status_reg = TRP_ECC_ERROR_STATUS0,
+		.reg_cnt = TRP_SYN_REG_CNT,
+		.count_mask = ECC_SB_ERR_COUNT_MASK,
+		.ways_mask = ECC_SB_ERR_WAYS_MASK,
+		.count_shift = ECC_SB_ERR_COUNT_SHIFT,
+	},
+	[LLCC_TRAM_UE] = {
+		.name = "TRAM Double-bit",
+		.synd_reg = TRP_ECC_DB_ERR_SYN0,
+		.count_status_reg = TRP_ECC_ERROR_STATUS1,
+		.ways_status_reg = TRP_ECC_ERROR_STATUS0,
+		.reg_cnt = TRP_SYN_REG_CNT,
+		.count_mask = ECC_DB_ERR_COUNT_MASK,
+		.ways_mask = ECC_DB_ERR_WAYS_MASK,
+		.ways_shift = ECC_DB_ERR_WAYS_SHIFT,
+	},
+};
+
+static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap)
+{
+	u32 sb_err_threshold;
+	int ret;
+
+	/*
+	 * Configure interrupt enable registers such that Tag, Data RAM related
+	 * interrupts are propagated to interrupt controller for servicing
+	 */
+	ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
+				 TRP0_INTERRUPT_ENABLE,
+				 TRP0_INTERRUPT_ENABLE);
+	if (ret)
+		return ret;
+
+	ret = regmap_update_bits(llcc_bcast_regmap, TRP_INTERRUPT_0_ENABLE,
+				 SB_DB_TRP_INTERRUPT_ENABLE,
+				 SB_DB_TRP_INTERRUPT_ENABLE);
+	if (ret)
+		return ret;
+
+	sb_err_threshold = (SB_ERROR_THRESHOLD << SB_ERROR_THRESHOLD_SHIFT);
+	ret = regmap_write(llcc_bcast_regmap, DRP_ECC_ERROR_CFG,
+			   sb_err_threshold);
+	if (ret)
+		return ret;
+
+	ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
+				 DRP0_INTERRUPT_ENABLE,
+				 DRP0_INTERRUPT_ENABLE);
+	if (ret)
+		return ret;
+
+	ret = regmap_write(llcc_bcast_regmap, DRP_INTERRUPT_ENABLE,
+			   SB_DB_DRP_INTERRUPT_ENABLE);
+	return ret;
+}
+
+/* Clear the error interrupt and counter registers */
+static int
+qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv)
+{
+	int ret = 0;
+
+	switch (err_type) {
+	case LLCC_DRAM_CE:
+	case LLCC_DRAM_UE:
+		ret = regmap_write(drv->bcast_regmap, DRP_INTERRUPT_CLEAR,
+				   DRP_TRP_INT_CLEAR);
+		if (ret)
+			return ret;
+
+		ret = regmap_write(drv->bcast_regmap, DRP_ECC_ERROR_CNTR_CLEAR,
+				   DRP_TRP_CNT_CLEAR);
+		if (ret)
+			return ret;
+		break;
+	case LLCC_TRAM_CE:
+	case LLCC_TRAM_UE:
+		ret = regmap_write(drv->bcast_regmap, TRP_INTERRUPT_0_CLEAR,
+				   DRP_TRP_INT_CLEAR);
+		if (ret)
+			return ret;
+
+		ret = regmap_write(drv->bcast_regmap, TRP_ECC_ERROR_CNTR_CLEAR,
+				   DRP_TRP_CNT_CLEAR);
+		if (ret)
+			return ret;
+		break;
+	default:
+		ret = -EINVAL;
+		edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n",
+			    err_type);
+	}
+	return ret;
+}
+
+/* Dump Syndrome registers data for Tag RAM, Data RAM bit errors*/
+static int
+dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
+{
+	struct llcc_edac_reg_data reg_data = edac_reg_data[err_type];
+	int err_cnt, err_ways, ret, i;
+	u32 synd_reg, synd_val;
+
+	for (i = 0; i < reg_data.reg_cnt; i++) {
+		synd_reg = reg_data.synd_reg + (i * 4);
+		ret = regmap_read(drv->regmap, drv->offsets[bank] + synd_reg,
+				  &synd_val);
+		if (ret)
+			goto clear;
+
+		edac_printk(KERN_CRIT, EDAC_LLCC, "%s: ECC_SYN%d: 0x%8x\n",
+			    reg_data.name, i, synd_val);
+	}
+
+	ret = regmap_read(drv->regmap,
+			  drv->offsets[bank] + reg_data.count_status_reg,
+			  &err_cnt);
+	if (ret)
+		goto clear;
+
+	err_cnt &= reg_data.count_mask;
+	err_cnt >>= reg_data.count_shift;
+	edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error count: 0x%4x\n",
+		    reg_data.name, err_cnt);
+
+	ret = regmap_read(drv->regmap,
+			  drv->offsets[bank] + reg_data.ways_status_reg,
+			  &err_ways);
+	if (ret)
+		goto clear;
+
+	err_ways &= reg_data.ways_mask;
+	err_ways >>= reg_data.ways_shift;
+
+	edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error ways: 0x%4x\n",
+		    reg_data.name, err_ways);
+
+clear:
+	return qcom_llcc_clear_error_status(err_type, drv);
+}
+
+static int
+dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank)
+{
+	struct llcc_drv_data *drv = edev_ctl->pvt_info;
+	int ret;
+
+	ret = dump_syn_reg_values(drv, bank, err_type);
+	if (ret)
+		return ret;
+
+	switch (err_type) {
+	case LLCC_DRAM_CE:
+		edac_device_handle_ce(edev_ctl, 0, bank,
+				      "LLCC Data RAM correctable Error");
+		break;
+	case LLCC_DRAM_UE:
+		edac_device_handle_ue(edev_ctl, 0, bank,
+				      "LLCC Data RAM uncorrectable Error");
+		break;
+	case LLCC_TRAM_CE:
+		edac_device_handle_ce(edev_ctl, 0, bank,
+				      "LLCC Tag RAM correctable Error");
+		break;
+	case LLCC_TRAM_UE:
+		edac_device_handle_ue(edev_ctl, 0, bank,
+				      "LLCC Tag RAM uncorrectable Error");
+		break;
+	default:
+		ret = -EINVAL;
+		edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n",
+			    err_type);
+	}
+
+	return ret;
+}
+
+static irqreturn_t
+llcc_ecc_irq_handler(int irq, void *edev_ctl)
+{
+	struct edac_device_ctl_info *edac_dev_ctl = edev_ctl;
+	struct llcc_drv_data *drv = edac_dev_ctl->pvt_info;
+	irqreturn_t irq_rc = IRQ_NONE;
+	u32 drp_error, trp_error, i;
+	bool irq_handled;
+	int ret;
+
+	/* Iterate over the banks and look for Tag RAM or Data RAM errors */
+	for (i = 0; i < drv->num_banks; i++) {
+		ret = regmap_read(drv->regmap,
+				  drv->offsets[i] + DRP_INTERRUPT_STATUS,
+				  &drp_error);
+
+		if (!ret && (drp_error & SB_ECC_ERROR)) {
+			edac_printk(KERN_CRIT, EDAC_LLCC,
+				    "Single Bit Error detected in Data RAM\n");
+			ret = dump_syn_reg(edev_ctl, LLCC_DRAM_CE, i);
+		} else if (!ret && (drp_error & DB_ECC_ERROR)) {
+			edac_printk(KERN_CRIT, EDAC_LLCC,
+				    "Double Bit Error detected in Data RAM\n");
+			ret = dump_syn_reg(edev_ctl, LLCC_DRAM_UE, i);
+		}
+		if (!ret)
+			irq_handled = true;
+
+		ret = regmap_read(drv->regmap,
+				  drv->offsets[i] + TRP_INTERRUPT_0_STATUS,
+				  &trp_error);
+
+		if (!ret && (trp_error & SB_ECC_ERROR)) {
+			edac_printk(KERN_CRIT, EDAC_LLCC,
+				    "Single Bit Error detected in Tag RAM\n");
+			ret = dump_syn_reg(edev_ctl, LLCC_TRAM_CE, i);
+		} else if (!ret && (trp_error & DB_ECC_ERROR)) {
+			edac_printk(KERN_CRIT, EDAC_LLCC,
+				    "Double Bit Error detected in Tag RAM\n");
+			ret = dump_syn_reg(edev_ctl, LLCC_TRAM_UE, i);
+		}
+		if (!ret)
+			irq_handled = true;
+	}
+
+	if (irq_handled)
+		irq_rc = IRQ_HANDLED;
+
+	return irq_rc;
+}
+
+static int qcom_llcc_edac_probe(struct platform_device *pdev)
+{
+	struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data;
+	struct edac_device_ctl_info *edev_ctl;
+	struct device *dev = &pdev->dev;
+	int ecc_irq;
+	int rc;
+
+	rc = qcom_llcc_core_setup(llcc_driv_data->bcast_regmap);
+	if (rc)
+		return rc;
+
+	/* Allocate edac control info */
+	edev_ctl = edac_device_alloc_ctl_info(0, "qcom-llcc", 1, "bank",
+					      llcc_driv_data->num_banks, 1,
+					      NULL, 0,
+					      edac_device_alloc_index());
+
+	if (!edev_ctl)
+		return -ENOMEM;
+
+	edev_ctl->dev = dev;
+	edev_ctl->mod_name = dev_name(dev);
+	edev_ctl->dev_name = dev_name(dev);
+	edev_ctl->ctl_name = "llcc";
+	edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;
+	edev_ctl->pvt_info = llcc_driv_data;
+
+	rc = edac_device_add_device(edev_ctl);
+	if (rc)
+		goto out_mem;
+
+	platform_set_drvdata(pdev, edev_ctl);
+
+	/* Request for ecc irq */
+	ecc_irq = llcc_driv_data->ecc_irq;
+	if (ecc_irq < 0) {
+		rc = -ENODEV;
+		goto out_dev;
+	}
+	rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
+			      IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl);
+	if (rc)
+		goto out_dev;
+
+	return rc;
+
+out_dev:
+	edac_device_del_device(edev_ctl->dev);
+out_mem:
+	edac_device_free_ctl_info(edev_ctl);
+
+	return rc;
+}
+
+static int qcom_llcc_edac_remove(struct platform_device *pdev)
+{
+	struct edac_device_ctl_info *edev_ctl = dev_get_drvdata(&pdev->dev);
+
+	edac_device_del_device(edev_ctl->dev);
+	edac_device_free_ctl_info(edev_ctl);
+
+	return 0;
+}
+
+static struct platform_driver qcom_llcc_edac_driver = {
+	.probe = qcom_llcc_edac_probe,
+	.remove = qcom_llcc_edac_remove,
+	.driver = {
+		.name = "qcom_llcc_edac",
+	},
+};
+module_platform_driver(qcom_llcc_edac_driver);
+
+MODULE_DESCRIPTION("QCOM EDAC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 2e4b34d2617e..69c285b1c990 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -93,6 +93,30 @@ struct llcc_drv_data {
 	int ecc_irq;
 };
 
+/**
+ * llcc_edac_reg_data - llcc edac registers data for each error type
+ * @name: Name of the error
+ * @synd_reg: Syndrome register address
+ * @count_status_reg: Status register address to read the error count
+ * @ways_status_reg: Status register address to read the error ways
+ * @reg_cnt: Number of registers
+ * @count_mask: Mask value to get the error count
+ * @ways_mask: Mask value to get the error ways
+ * @count_shift: Shift value to get the error count
+ * @ways_shift: Shift value to get the error ways
+ */
+struct llcc_edac_reg_data {
+	char *name;
+	u64 synd_reg;
+	u64 count_status_reg;
+	u64 ways_status_reg;
+	u32 reg_cnt;
+	u32 count_mask;
+	u32 ways_mask;
+	u8  count_shift;
+	u8  ways_shift;
+};
+
 #if IS_ENABLED(CONFIG_QCOM_LLCC)
 /**
  * llcc_slice_getd - get llcc slice descriptor
-- 
cgit v1.2.3-71-gd317


From f4926ef76e23e291fcd38bd107c0a9bb8e2db505 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Fri, 18 May 2018 15:47:50 -0700
Subject: soc: qcom: geni: Make version macros simpler

This macro doesn't work, because it hides a local variable inside of the
macro to hold the version and that variable name is called 'ver' and
'version' sometimes.

Let's change this to be more explicit. Introduce three macros for the
major, minor, and step of the version, and require callers to pass the
version in to get the part of the version out. This way we don't hide
local variables inside macros and things are less evil overall.

Cc: Karthikeyan Ramasubramanian <kramasub@codeaurora.org>
Cc: Sagar Dharia <sdharia@codeaurora.org>
Cc: Girish Mahadevan <girishm@codeaurora.org>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 include/linux/qcom-geni-se.h | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h
index 5d6144977828..3bcd67fd5548 100644
--- a/include/linux/qcom-geni-se.h
+++ b/include/linux/qcom-geni-se.h
@@ -225,19 +225,14 @@ struct geni_se {
 #define HW_VER_MINOR_SHFT		16
 #define HW_VER_STEP_MASK		GENMASK(15, 0)
 
+#define GENI_SE_VERSION_MAJOR(ver) ((ver & HW_VER_MAJOR_MASK) >> HW_VER_MAJOR_SHFT)
+#define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT)
+#define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK)
+
 #if IS_ENABLED(CONFIG_QCOM_GENI_SE)
 
 u32 geni_se_get_qup_hw_version(struct geni_se *se);
 
-#define geni_se_get_wrapper_version(se, major, minor, step) do { \
-	u32 ver; \
-\
-	ver = geni_se_get_qup_hw_version(se); \
-	major = (ver & HW_VER_MAJOR_MASK) >> HW_VER_MAJOR_SHFT; \
-	minor = (ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT; \
-	step = version & HW_VER_STEP_MASK; \
-} while (0)
-
 /**
  * geni_se_read_proto() - Read the protocol configured for a serial engine
  * @se:		Pointer to the concerned serial engine.
-- 
cgit v1.2.3-71-gd317


From 59104f239b9ed6cf3986e4228173ff2f4c95039e Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 26 Jul 2018 02:37:49 +0000
Subject: drm: shmobile: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
---
 drivers/gpu/drm/shmobile/Kconfig               | 1 +
 drivers/gpu/drm/shmobile/shmob_drm_backlight.c | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_backlight.h | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_crtc.c      | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_crtc.h      | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_drv.c       | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_drv.h       | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_kms.c       | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_kms.h       | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_plane.c     | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_plane.h     | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_regs.h      | 6 +-----
 include/linux/platform_data/shmob_drm.h        | 6 +-----
 13 files changed, 13 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/shmobile/Kconfig b/drivers/gpu/drm/shmobile/Kconfig
index 0426d66660d1..61bbe8e8bcc5 100644
--- a/drivers/gpu/drm/shmobile/Kconfig
+++ b/drivers/gpu/drm/shmobile/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 config DRM_SHMOBILE
 	tristate "DRM Support for SH Mobile"
 	depends on DRM && ARM
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_backlight.c b/drivers/gpu/drm/shmobile/shmob_drm_backlight.c
index 33dd41afea0e..f6628a5ee95f 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_backlight.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_backlight.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * shmob_drm_backlight.c  --  SH Mobile DRM Backlight
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/backlight.h>
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_backlight.h b/drivers/gpu/drm/shmobile/shmob_drm_backlight.h
index bac719ecc301..d9abb7a60be5 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_backlight.h
+++ b/drivers/gpu/drm/shmobile/shmob_drm_backlight.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm_backlight.h  --  SH Mobile DRM Backlight
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_BACKLIGHT_H__
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
index fc66167b0641..499b5fdb869f 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * shmob_drm_crtc.c  --  SH Mobile DRM CRTCs
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/backlight.h>
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.h b/drivers/gpu/drm/shmobile/shmob_drm_crtc.h
index c11f421737dc..9ca6920641d8 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.h
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm_crtc.h  --  SH Mobile DRM CRTCs
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_CRTC_H__
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
index 592572554eb0..6ececad6f845 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_drv.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * shmob_drm_drv.c  --  SH Mobile DRM driver
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.h b/drivers/gpu/drm/shmobile/shmob_drm_drv.h
index 088a6e55fa29..80dc4b1020aa 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_drv.h
+++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm.h  --  SH Mobile DRM driver
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_DRV_H__
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_kms.c b/drivers/gpu/drm/shmobile/shmob_drm_kms.c
index 447638581c08..a17268444c6d 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_kms.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_kms.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * shmob_drm_kms.c  --  SH Mobile DRM Mode Setting
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <drm/drmP.h>
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_kms.h b/drivers/gpu/drm/shmobile/shmob_drm_kms.h
index 753e2817dc2c..6ec2b732bb94 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_kms.h
+++ b/drivers/gpu/drm/shmobile/shmob_drm_kms.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm_kms.h  --  SH Mobile DRM Mode Setting
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_KMS_H__
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_plane.c b/drivers/gpu/drm/shmobile/shmob_drm_plane.c
index 1d0359f713ca..1d1ee5e51351 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_plane.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_plane.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * shmob_drm_plane.c  --  SH Mobile DRM Planes
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <drm/drmP.h>
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_plane.h b/drivers/gpu/drm/shmobile/shmob_drm_plane.h
index a58cc1fc3240..bae67cc8c628 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_plane.h
+++ b/drivers/gpu/drm/shmobile/shmob_drm_plane.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm_plane.h  --  SH Mobile DRM Planes
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_PLANE_H__
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_regs.h b/drivers/gpu/drm/shmobile/shmob_drm_regs.h
index ea17d4415b9e..9eb0b3d01df8 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_regs.h
+++ b/drivers/gpu/drm/shmobile/shmob_drm_regs.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm_regs.h  --  SH Mobile DRM registers
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_REGS_H__
diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h
index ee495d707f17..fe815d7d9f58 100644
--- a/include/linux/platform_data/shmob_drm.h
+++ b/include/linux/platform_data/shmob_drm.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm.h  --  SH Mobile DRM driver
  *
  * Copyright (C) 2012 Renesas Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_H__
-- 
cgit v1.2.3-71-gd317


From 10c63443b74d1ef5c1b3bb104a9e6e40dc2437ff Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 30 Aug 2018 14:54:03 +0200
Subject: Revert "serial: sh-sci: Remove SCIx_RZ_SCIFA_REGTYPE"

This reverts commit 7acece71a517cad83a0842a94d94c13f271b680c.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Chris Brandt <chris.brandt@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 31 +++++++++++++++++++++++++++++++
 include/linux/serial_sci.h  |  1 +
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index ac4424bf6b13..5d42c9a63001 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -291,6 +291,33 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = {
 		.error_clear = SCIF_ERROR_CLEAR,
 	},
 
+	/*
+	 * The "SCIFA" that is in RZ/T and RZ/A2.
+	 * It looks like a normal SCIF with FIFO data, but with a
+	 * compressed address space. Also, the break out of interrupts
+	 * are different: ERI/BRI, RXI, TXI, TEI, DRI.
+	 */
+	[SCIx_RZ_SCIFA_REGTYPE] = {
+		.regs = {
+			[SCSMR]		= { 0x00, 16 },
+			[SCBRR]		= { 0x02,  8 },
+			[SCSCR]		= { 0x04, 16 },
+			[SCxTDR]	= { 0x06,  8 },
+			[SCxSR]		= { 0x08, 16 },
+			[SCxRDR]	= { 0x0A,  8 },
+			[SCFCR]		= { 0x0C, 16 },
+			[SCFDR]		= { 0x0E, 16 },
+			[SCSPTR]	= { 0x10, 16 },
+			[SCLSR]		= { 0x12, 16 },
+		},
+		.fifosize = 16,
+		.overrun_reg = SCLSR,
+		.overrun_mask = SCLSR_ORER,
+		.sampling_rate_mask = SCI_SR(32),
+		.error_mask = SCIF_DEFAULT_ERROR_MASK,
+		.error_clear = SCIF_ERROR_CLEAR,
+	},
+
 	/*
 	 * Common SH-3 SCIF definitions.
 	 */
@@ -3110,6 +3137,10 @@ static const struct of_device_id of_sci_match[] = {
 		.compatible = "renesas,scif-r7s72100",
 		.data = SCI_OF_DATA(PORT_SCIF, SCIx_SH2_SCIF_FIFODATA_REGTYPE),
 	},
+	{
+		.compatible = "renesas,scif-r7s9210",
+		.data = SCI_OF_DATA(PORT_SCIF, SCIx_RZ_SCIFA_REGTYPE),
+	},
 	/* Family-specific types */
 	{
 		.compatible = "renesas,rcar-gen1-scif",
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index c0e795d95477..1c89611e0e06 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -36,6 +36,7 @@ enum {
 	SCIx_SH4_SCIF_FIFODATA_REGTYPE,
 	SCIx_SH7705_SCIF_REGTYPE,
 	SCIx_HSCIF_REGTYPE,
+	SCIx_RZ_SCIFA_REGTYPE,
 
 	SCIx_NR_REGTYPES,
 };
-- 
cgit v1.2.3-71-gd317


From b1078c355d76769b5ddefc67d143fbd9b6e52c05 Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@xilinx.com>
Date: Thu, 20 Sep 2018 13:41:52 +0200
Subject: of: base: Introduce of_alias_get_alias_list() to check alias IDs

The function travels the lookup table to record alias ids for the given
device match structures and alias stem.
This function will be used by serial drivers to check if requested alias
is allocated or free to use.

Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/base.c  | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of.h | 10 ++++++++++
 2 files changed, 62 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 74eaedd5b860..33011b88ed3f 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -16,6 +16,7 @@
 
 #define pr_fmt(fmt)	"OF: " fmt
 
+#include <linux/bitmap.h>
 #include <linux/console.h>
 #include <linux/ctype.h>
 #include <linux/cpu.h>
@@ -1942,6 +1943,57 @@ int of_alias_get_id(struct device_node *np, const char *stem)
 }
 EXPORT_SYMBOL_GPL(of_alias_get_id);
 
+/**
+ * of_alias_get_alias_list - Get alias list for the given device driver
+ * @matches:	Array of OF device match structures to search in
+ * @stem:	Alias stem of the given device_node
+ * @bitmap:	Bitmap field pointer
+ * @nbits:	Maximum number of alias ID which can be recorded it bitmap
+ *
+ * The function travels the lookup table to record alias ids for the given
+ * device match structures and alias stem.
+ *
+ * Return:	0 or -ENOSYS when !CONFIG_OF
+ */
+int of_alias_get_alias_list(const struct of_device_id *matches,
+			     const char *stem, unsigned long *bitmap,
+			     unsigned int nbits)
+{
+	struct alias_prop *app;
+
+	/* Zero bitmap field to make sure that all the time it is clean */
+	bitmap_zero(bitmap, nbits);
+
+	mutex_lock(&of_mutex);
+	pr_debug("%s: Looking for stem: %s\n", __func__, stem);
+	list_for_each_entry(app, &aliases_lookup, link) {
+		pr_debug("%s: stem: %s, id: %d\n",
+			 __func__, app->stem, app->id);
+
+		if (strcmp(app->stem, stem) != 0) {
+			pr_debug("%s: stem comparison doesn't passed %s\n",
+				 __func__, app->stem);
+			continue;
+		}
+
+		if (app->id >= nbits) {
+			pr_debug("%s: ID %d greater then bitmap field %d\n",
+				__func__, app->id, nbits);
+			continue;
+		}
+
+		if (of_match_node(matches, app->np)) {
+			pr_debug("%s: Allocated ID %d\n", __func__, app->id);
+			set_bit(app->id, bitmap);
+		}
+		/* Alias exist but it not compatible with matches */
+	}
+	mutex_unlock(&of_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_alias_get_alias_list);
+
 /**
  * of_alias_get_highest_id - Get highest alias id for the given stem
  * @stem:	Alias stem to be examined
diff --git a/include/linux/of.h b/include/linux/of.h
index 99b0ebf49632..d51457b40725 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -392,6 +392,9 @@ extern int of_phandle_iterator_args(struct of_phandle_iterator *it,
 extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align));
 extern int of_alias_get_id(struct device_node *np, const char *stem);
 extern int of_alias_get_highest_id(const char *stem);
+extern int of_alias_get_alias_list(const struct of_device_id *matches,
+				   const char *stem, unsigned long *bitmap,
+				   unsigned int nbits);
 
 extern int of_machine_is_compatible(const char *compat);
 
@@ -893,6 +896,13 @@ static inline int of_alias_get_highest_id(const char *stem)
 	return -ENOSYS;
 }
 
+static inline int of_alias_get_alias_list(const struct of_device_id *matches,
+					  const char *stem, unsigned long *bitmap,
+					  unsigned int nbits)
+{
+	return -ENOSYS;
+}
+
 static inline int of_machine_is_compatible(const char *compat)
 {
 	return 0;
-- 
cgit v1.2.3-71-gd317


From 26683316c92ab2d1b330a3a38548328c9b136ad1 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Mon, 10 Sep 2018 19:47:22 +0200
Subject: ARM: OMAP1: ams-delta-fiq: Use <linux/platform_data/gpio-omap.h>

Instead of defining symbols already defined in
linux/platform_data/gpio-omap.h, use that header file.

Since we include the header into an assembler code, prevent C only bits
from being read in.

Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/ams-delta-fiq-handler.S | 12 +++---------
 include/linux/platform_data/gpio-omap.h     |  4 ++++
 2 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/ams-delta-fiq-handler.S b/arch/arm/mach-omap1/ams-delta-fiq-handler.S
index ddc27638ba2a..e3faa0274b56 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq-handler.S
+++ b/arch/arm/mach-omap1/ams-delta-fiq-handler.S
@@ -15,6 +15,7 @@
 
 #include <linux/linkage.h>
 #include <linux/platform_data/ams-delta-fiq.h>
+#include <linux/platform_data/gpio-omap.h>
 
 #include <asm/assembler.h>
 #include <mach/board-ams-delta.h>
@@ -24,17 +25,10 @@
 #include "soc.h"
 
 /*
- * GPIO related definitions, copied from arch/arm/plat-omap/gpio.c.
- * Unfortunately, those were not placed in a separate header file.
+ * OMAP1510 GPIO related symbol copied from arch/arm/mach-omap1/gpio15xx.c.
+ * Unfortunately, it was not placed in a separate header file.
  */
 #define OMAP1510_GPIO_BASE		0xFFFCE000
-#define OMAP1510_GPIO_DATA_INPUT	0x00
-#define OMAP1510_GPIO_DATA_OUTPUT	0x04
-#define OMAP1510_GPIO_DIR_CONTROL	0x08
-#define OMAP1510_GPIO_INT_CONTROL	0x0c
-#define OMAP1510_GPIO_INT_MASK		0x10
-#define OMAP1510_GPIO_INT_STATUS	0x14
-#define OMAP1510_GPIO_PIN_CONTROL	0x18
 
 /* GPIO register bitmasks */
 #define KEYBRD_DATA_MASK		(0x1 << AMS_DELTA_GPIO_PIN_KEYBRD_DATA)
diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h
index 8612855691b2..ed071f76b642 100644
--- a/include/linux/platform_data/gpio-omap.h
+++ b/include/linux/platform_data/gpio-omap.h
@@ -24,8 +24,10 @@
 #ifndef __ASM_ARCH_OMAP_GPIO_H
 #define __ASM_ARCH_OMAP_GPIO_H
 
+#ifndef __ASSEMBLER__
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#endif
 
 #define OMAP1_MPUIO_BASE			0xfffb5000
 
@@ -157,6 +159,7 @@
 #define OMAP_MPUIO(nr)		(OMAP_MAX_GPIO_LINES + (nr))
 #define OMAP_GPIO_IS_MPUIO(nr)	((nr) >= OMAP_MAX_GPIO_LINES)
 
+#ifndef __ASSEMBLER__
 struct omap_gpio_reg_offs {
 	u16 revision;
 	u16 direction;
@@ -215,5 +218,6 @@ static inline void omap2_gpio_resume_after_idle(void)
 {
 }
 #endif
+#endif /* __ASSEMBLER__ */
 
 #endif
-- 
cgit v1.2.3-71-gd317


From 1cc33161a83d20b5462b1e93f95d3ce6388079ee Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Mon, 20 Aug 2018 10:12:47 +0530
Subject: uprobes: Support SDT markers having reference count (semaphore)

Userspace Statically Defined Tracepoints[1] are dtrace style markers
inside userspace applications. Applications like PostgreSQL, MySQL,
Pthread, Perl, Python, Java, Ruby, Node.js, libvirt, QEMU, glib etc
have these markers embedded in them. These markers are added by developer
at important places in the code. Each marker source expands to a single
nop instruction in the compiled code but there may be additional
overhead for computing the marker arguments which expands to couple of
instructions. In case the overhead is more, execution of it can be
omitted by runtime if() condition when no one is tracing on the marker:

    if (reference_counter > 0) {
        Execute marker instructions;
    }

Default value of reference counter is 0. Tracer has to increment the
reference counter before tracing on a marker and decrement it when
done with the tracing.

Implement the reference counter logic in core uprobe. User will be
able to use it from trace_uprobe as well as from kernel module. New
trace_uprobe definition with reference counter will now be:

    <path>:<offset>[(ref_ctr_offset)]

where ref_ctr_offset is an optional field. For kernel module, new
variant of uprobe_register() has been introduced:

    uprobe_register_refctr(inode, offset, ref_ctr_offset, consumer)

No new variant for uprobe_unregister() because it's assumed to have
only one reference counter for one uprobe.

[1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation

Note: 'reference counter' is called as 'semaphore' in original Dtrace
(or Systemtap, bcc and even in ELF) documentation and code. But the
term 'semaphore' is misleading in this context. This is just a counter
used to hold number of tracers tracing on a marker. This is not really
used for any synchronization. So we are calling it a 'reference counter'
in kernel / perf code.

Link: http://lkml.kernel.org/r/20180820044250.11659-2-ravi.bangoria@linux.ibm.com

Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
[Only trace_uprobe.c]
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Song Liu <songliubraving@fb.com>
Tested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/uprobes.h     |   5 +
 kernel/events/uprobes.c     | 259 ++++++++++++++++++++++++++++++++++++++++++--
 kernel/trace/trace.c        |   2 +-
 kernel/trace/trace_uprobe.c |  38 ++++++-
 4 files changed, 293 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index bb9d2084af03..103a48a48872 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -123,6 +123,7 @@ extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
+extern int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_mmap(struct vm_area_struct *vma);
@@ -160,6 +161,10 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 {
 	return -ENOSYS;
 }
+static inline int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc)
+{
+	return -ENOSYS;
+}
 static inline int
 uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add)
 {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 3207a4d26849..934feb39f6be 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -73,6 +73,7 @@ struct uprobe {
 	struct uprobe_consumer	*consumers;
 	struct inode		*inode;		/* Also hold a ref to inode */
 	loff_t			offset;
+	loff_t			ref_ctr_offset;
 	unsigned long		flags;
 
 	/*
@@ -88,6 +89,15 @@ struct uprobe {
 	struct arch_uprobe	arch;
 };
 
+struct delayed_uprobe {
+	struct list_head list;
+	struct uprobe *uprobe;
+	struct mm_struct *mm;
+};
+
+static DEFINE_MUTEX(delayed_uprobe_lock);
+static LIST_HEAD(delayed_uprobe_list);
+
 /*
  * Execute out of line area: anonymous executable mapping installed
  * by the probed task to execute the copy of the original instruction
@@ -282,6 +292,166 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
 	return 1;
 }
 
+static struct delayed_uprobe *
+delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm)
+{
+	struct delayed_uprobe *du;
+
+	list_for_each_entry(du, &delayed_uprobe_list, list)
+		if (du->uprobe == uprobe && du->mm == mm)
+			return du;
+	return NULL;
+}
+
+static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm)
+{
+	struct delayed_uprobe *du;
+
+	if (delayed_uprobe_check(uprobe, mm))
+		return 0;
+
+	du  = kzalloc(sizeof(*du), GFP_KERNEL);
+	if (!du)
+		return -ENOMEM;
+
+	du->uprobe = uprobe;
+	du->mm = mm;
+	list_add(&du->list, &delayed_uprobe_list);
+	return 0;
+}
+
+static void delayed_uprobe_delete(struct delayed_uprobe *du)
+{
+	if (WARN_ON(!du))
+		return;
+	list_del(&du->list);
+	kfree(du);
+}
+
+static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm)
+{
+	struct list_head *pos, *q;
+	struct delayed_uprobe *du;
+
+	if (!uprobe && !mm)
+		return;
+
+	list_for_each_safe(pos, q, &delayed_uprobe_list) {
+		du = list_entry(pos, struct delayed_uprobe, list);
+
+		if (uprobe && du->uprobe != uprobe)
+			continue;
+		if (mm && du->mm != mm)
+			continue;
+
+		delayed_uprobe_delete(du);
+	}
+}
+
+static bool valid_ref_ctr_vma(struct uprobe *uprobe,
+			      struct vm_area_struct *vma)
+{
+	unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset);
+
+	return uprobe->ref_ctr_offset &&
+		vma->vm_file &&
+		file_inode(vma->vm_file) == uprobe->inode &&
+		(vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
+		vma->vm_start <= vaddr &&
+		vma->vm_end > vaddr;
+}
+
+static struct vm_area_struct *
+find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm)
+{
+	struct vm_area_struct *tmp;
+
+	for (tmp = mm->mmap; tmp; tmp = tmp->vm_next)
+		if (valid_ref_ctr_vma(uprobe, tmp))
+			return tmp;
+
+	return NULL;
+}
+
+static int
+__update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
+{
+	void *kaddr;
+	struct page *page;
+	struct vm_area_struct *vma;
+	int ret;
+	short *ptr;
+
+	if (!vaddr || !d)
+		return -EINVAL;
+
+	ret = get_user_pages_remote(NULL, mm, vaddr, 1,
+			FOLL_WRITE, &page, &vma, NULL);
+	if (unlikely(ret <= 0)) {
+		/*
+		 * We are asking for 1 page. If get_user_pages_remote() fails,
+		 * it may return 0, in that case we have to return error.
+		 */
+		return ret == 0 ? -EBUSY : ret;
+	}
+
+	kaddr = kmap_atomic(page);
+	ptr = kaddr + (vaddr & ~PAGE_MASK);
+
+	if (unlikely(*ptr + d < 0)) {
+		pr_warn("ref_ctr going negative. vaddr: 0x%lx, "
+			"curr val: %d, delta: %d\n", vaddr, *ptr, d);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	*ptr += d;
+	ret = 0;
+out:
+	kunmap_atomic(kaddr);
+	put_page(page);
+	return ret;
+}
+
+static void update_ref_ctr_warn(struct uprobe *uprobe,
+				struct mm_struct *mm, short d)
+{
+	pr_warn("ref_ctr %s failed for inode: 0x%lx offset: "
+		"0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n",
+		d > 0 ? "increment" : "decrement", uprobe->inode->i_ino,
+		(unsigned long long) uprobe->offset,
+		(unsigned long long) uprobe->ref_ctr_offset, mm);
+}
+
+static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
+			  short d)
+{
+	struct vm_area_struct *rc_vma;
+	unsigned long rc_vaddr;
+	int ret = 0;
+
+	rc_vma = find_ref_ctr_vma(uprobe, mm);
+
+	if (rc_vma) {
+		rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset);
+		ret = __update_ref_ctr(mm, rc_vaddr, d);
+		if (ret)
+			update_ref_ctr_warn(uprobe, mm, d);
+
+		if (d > 0)
+			return ret;
+	}
+
+	mutex_lock(&delayed_uprobe_lock);
+	if (d > 0)
+		ret = delayed_uprobe_add(uprobe, mm);
+	else
+		delayed_uprobe_remove(uprobe, mm);
+	mutex_unlock(&delayed_uprobe_lock);
+
+	return ret;
+}
+
 /*
  * NOTE:
  * Expect the breakpoint instruction to be the smallest size instruction for
@@ -302,9 +472,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
 int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 			unsigned long vaddr, uprobe_opcode_t opcode)
 {
+	struct uprobe *uprobe;
 	struct page *old_page, *new_page;
 	struct vm_area_struct *vma;
-	int ret;
+	int ret, is_register, ref_ctr_updated = 0;
+
+	is_register = is_swbp_insn(&opcode);
+	uprobe = container_of(auprobe, struct uprobe, arch);
 
 retry:
 	/* Read the page with vaddr into memory */
@@ -317,6 +491,15 @@ retry:
 	if (ret <= 0)
 		goto put_old;
 
+	/* We are going to replace instruction, update ref_ctr. */
+	if (!ref_ctr_updated && uprobe->ref_ctr_offset) {
+		ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1);
+		if (ret)
+			goto put_old;
+
+		ref_ctr_updated = 1;
+	}
+
 	ret = anon_vma_prepare(vma);
 	if (ret)
 		goto put_old;
@@ -337,6 +520,11 @@ put_old:
 
 	if (unlikely(ret == -EAGAIN))
 		goto retry;
+
+	/* Revert back reference counter if instruction update failed. */
+	if (ret && is_register && ref_ctr_updated)
+		update_ref_ctr(uprobe, mm, -1);
+
 	return ret;
 }
 
@@ -378,8 +566,15 @@ static struct uprobe *get_uprobe(struct uprobe *uprobe)
 
 static void put_uprobe(struct uprobe *uprobe)
 {
-	if (atomic_dec_and_test(&uprobe->ref))
+	if (atomic_dec_and_test(&uprobe->ref)) {
+		/*
+		 * If application munmap(exec_vma) before uprobe_unregister()
+		 * gets called, we don't get a chance to remove uprobe from
+		 * delayed_uprobe_list from remove_breakpoint(). Do it here.
+		 */
+		delayed_uprobe_remove(uprobe, NULL);
 		kfree(uprobe);
+	}
 }
 
 static int match_uprobe(struct uprobe *l, struct uprobe *r)
@@ -484,7 +679,8 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
 	return u;
 }
 
-static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
+static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
+				   loff_t ref_ctr_offset)
 {
 	struct uprobe *uprobe, *cur_uprobe;
 
@@ -494,6 +690,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
 
 	uprobe->inode = inode;
 	uprobe->offset = offset;
+	uprobe->ref_ctr_offset = ref_ctr_offset;
 	init_rwsem(&uprobe->register_rwsem);
 	init_rwsem(&uprobe->consumer_rwsem);
 
@@ -895,7 +1092,7 @@ EXPORT_SYMBOL_GPL(uprobe_unregister);
  * else return 0 (success)
  */
 static int __uprobe_register(struct inode *inode, loff_t offset,
-			     struct uprobe_consumer *uc)
+			     loff_t ref_ctr_offset, struct uprobe_consumer *uc)
 {
 	struct uprobe *uprobe;
 	int ret;
@@ -912,7 +1109,7 @@ static int __uprobe_register(struct inode *inode, loff_t offset,
 		return -EINVAL;
 
  retry:
-	uprobe = alloc_uprobe(inode, offset);
+	uprobe = alloc_uprobe(inode, offset, ref_ctr_offset);
 	if (!uprobe)
 		return -ENOMEM;
 	/*
@@ -938,10 +1135,17 @@ static int __uprobe_register(struct inode *inode, loff_t offset,
 int uprobe_register(struct inode *inode, loff_t offset,
 		    struct uprobe_consumer *uc)
 {
-	return __uprobe_register(inode, offset, uc);
+	return __uprobe_register(inode, offset, 0, uc);
 }
 EXPORT_SYMBOL_GPL(uprobe_register);
 
+int uprobe_register_refctr(struct inode *inode, loff_t offset,
+			   loff_t ref_ctr_offset, struct uprobe_consumer *uc)
+{
+	return __uprobe_register(inode, offset, ref_ctr_offset, uc);
+}
+EXPORT_SYMBOL_GPL(uprobe_register_refctr);
+
 /*
  * uprobe_apply - unregister an already registered probe.
  * @inode: the file in which the probe has to be removed.
@@ -1060,6 +1264,35 @@ static void build_probe_list(struct inode *inode,
 	spin_unlock(&uprobes_treelock);
 }
 
+/* @vma contains reference counter, not the probed instruction. */
+static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
+{
+	struct list_head *pos, *q;
+	struct delayed_uprobe *du;
+	unsigned long vaddr;
+	int ret = 0, err = 0;
+
+	mutex_lock(&delayed_uprobe_lock);
+	list_for_each_safe(pos, q, &delayed_uprobe_list) {
+		du = list_entry(pos, struct delayed_uprobe, list);
+
+		if (du->mm != vma->vm_mm ||
+		    !valid_ref_ctr_vma(du->uprobe, vma))
+			continue;
+
+		vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset);
+		ret = __update_ref_ctr(vma->vm_mm, vaddr, 1);
+		if (ret) {
+			update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1);
+			if (!err)
+				err = ret;
+		}
+		delayed_uprobe_delete(du);
+	}
+	mutex_unlock(&delayed_uprobe_lock);
+	return err;
+}
+
 /*
  * Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
  *
@@ -1072,7 +1305,15 @@ int uprobe_mmap(struct vm_area_struct *vma)
 	struct uprobe *uprobe, *u;
 	struct inode *inode;
 
-	if (no_uprobe_events() || !valid_vma(vma, true))
+	if (no_uprobe_events())
+		return 0;
+
+	if (vma->vm_file &&
+	    (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
+	    test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags))
+		delayed_ref_ctr_inc(vma);
+
+	if (!valid_vma(vma, true))
 		return 0;
 
 	inode = file_inode(vma->vm_file);
@@ -1246,6 +1487,10 @@ void uprobe_clear_state(struct mm_struct *mm)
 {
 	struct xol_area *area = mm->uprobes_state.xol_area;
 
+	mutex_lock(&delayed_uprobe_lock);
+	delayed_uprobe_remove(NULL, mm);
+	mutex_unlock(&delayed_uprobe_lock);
+
 	if (!area)
 		return;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bf6f1d70484d..147be8523560 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4621,7 +4621,7 @@ static const char readme_msg[] =
   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
-	"\t    place: <path>:<offset>\n"
+  "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
 #endif
 	"\t     args: <name>=fetcharg[:type]\n"
 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index e696667da29a..7b85172beab6 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -47,6 +47,7 @@ struct trace_uprobe {
 	struct inode			*inode;
 	char				*filename;
 	unsigned long			offset;
+	unsigned long			ref_ctr_offset;
 	unsigned long			nhit;
 	struct trace_probe		tp;
 };
@@ -352,10 +353,10 @@ end:
 static int create_trace_uprobe(int argc, char **argv)
 {
 	struct trace_uprobe *tu;
-	char *arg, *event, *group, *filename;
+	char *arg, *event, *group, *filename, *rctr, *rctr_end;
 	char buf[MAX_EVENT_NAME_LEN];
 	struct path path;
-	unsigned long offset;
+	unsigned long offset, ref_ctr_offset;
 	bool is_delete, is_return;
 	int i, ret;
 
@@ -364,6 +365,7 @@ static int create_trace_uprobe(int argc, char **argv)
 	is_return = false;
 	event = NULL;
 	group = NULL;
+	ref_ctr_offset = 0;
 
 	/* argc must be >= 1 */
 	if (argv[0][0] == '-')
@@ -438,6 +440,26 @@ static int create_trace_uprobe(int argc, char **argv)
 		goto fail_address_parse;
 	}
 
+	/* Parse reference counter offset if specified. */
+	rctr = strchr(arg, '(');
+	if (rctr) {
+		rctr_end = strchr(rctr, ')');
+		if (rctr > rctr_end || *(rctr_end + 1) != 0) {
+			ret = -EINVAL;
+			pr_info("Invalid reference counter offset.\n");
+			goto fail_address_parse;
+		}
+
+		*rctr++ = '\0';
+		*rctr_end = '\0';
+		ret = kstrtoul(rctr, 0, &ref_ctr_offset);
+		if (ret) {
+			pr_info("Invalid reference counter offset.\n");
+			goto fail_address_parse;
+		}
+	}
+
+	/* Parse uprobe offset. */
 	ret = kstrtoul(arg, 0, &offset);
 	if (ret)
 		goto fail_address_parse;
@@ -472,6 +494,7 @@ static int create_trace_uprobe(int argc, char **argv)
 		goto fail_address_parse;
 	}
 	tu->offset = offset;
+	tu->ref_ctr_offset = ref_ctr_offset;
 	tu->path = path;
 	tu->filename = kstrdup(filename, GFP_KERNEL);
 
@@ -590,6 +613,9 @@ static int probes_seq_show(struct seq_file *m, void *v)
 			trace_event_name(&tu->tp.call), tu->filename,
 			(int)(sizeof(void *) * 2), tu->offset);
 
+	if (tu->ref_ctr_offset)
+		seq_printf(m, "(0x%lx)", tu->ref_ctr_offset);
+
 	for (i = 0; i < tu->tp.nr_args; i++)
 		seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);
 
@@ -905,7 +931,13 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,
 
 	tu->consumer.filter = filter;
 	tu->inode = d_real_inode(tu->path.dentry);
-	ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
+	if (tu->ref_ctr_offset) {
+		ret = uprobe_register_refctr(tu->inode, tu->offset,
+				tu->ref_ctr_offset, &tu->consumer);
+	} else {
+		ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
+	}
+
 	if (ret)
 		goto err_buffer;
 
-- 
cgit v1.2.3-71-gd317


From 463659a08d7999d5461fa45b35b17686189a70ca Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hansverk@cisco.com>
Date: Thu, 13 Sep 2018 07:47:29 -0400
Subject: media: hdmi.h: rename ADOBE_RGB to OPRGB and ADOBE_YCC to OPYCC

These names have been renamed in the CTA-861 standard due to trademark
issues. Replace them here as well so they are in sync with the standard.

Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Cc: stable@vger.kernel.org
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/i2c/adv7511.c               | 4 ++--
 drivers/media/v4l2-core/v4l2-dv-timings.c | 4 ++--
 drivers/video/hdmi.c                      | 8 ++++----
 include/linux/hdmi.h                      | 4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/i2c/adv7511.c b/drivers/media/i2c/adv7511.c
index a1f73d998495..f3899cc84e27 100644
--- a/drivers/media/i2c/adv7511.c
+++ b/drivers/media/i2c/adv7511.c
@@ -1357,8 +1357,8 @@ static int adv7511_set_fmt(struct v4l2_subdev *sd,
 	switch (format->format.colorspace) {
 	case V4L2_COLORSPACE_OPRGB:
 		c = HDMI_COLORIMETRY_EXTENDED;
-		ec = y ? HDMI_EXTENDED_COLORIMETRY_ADOBE_YCC_601 :
-			 HDMI_EXTENDED_COLORIMETRY_ADOBE_RGB;
+		ec = y ? HDMI_EXTENDED_COLORIMETRY_OPYCC_601 :
+			 HDMI_EXTENDED_COLORIMETRY_OPRGB;
 		break;
 	case V4L2_COLORSPACE_SMPTE170M:
 		c = y ? HDMI_COLORIMETRY_ITU_601 : HDMI_COLORIMETRY_NONE;
diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c
index 19aabd1fcd2b..4f23e939ead0 100644
--- a/drivers/media/v4l2-core/v4l2-dv-timings.c
+++ b/drivers/media/v4l2-core/v4l2-dv-timings.c
@@ -877,7 +877,7 @@ v4l2_hdmi_rx_colorimetry(const struct hdmi_avi_infoframe *avi,
 		switch (avi->colorimetry) {
 		case HDMI_COLORIMETRY_EXTENDED:
 			switch (avi->extended_colorimetry) {
-			case HDMI_EXTENDED_COLORIMETRY_ADOBE_RGB:
+			case HDMI_EXTENDED_COLORIMETRY_OPRGB:
 				c.colorspace = V4L2_COLORSPACE_OPRGB;
 				c.xfer_func = V4L2_XFER_FUNC_OPRGB;
 				break;
@@ -948,7 +948,7 @@ v4l2_hdmi_rx_colorimetry(const struct hdmi_avi_infoframe *avi,
 				c.ycbcr_enc = V4L2_YCBCR_ENC_601;
 				c.xfer_func = V4L2_XFER_FUNC_SRGB;
 				break;
-			case HDMI_EXTENDED_COLORIMETRY_ADOBE_YCC_601:
+			case HDMI_EXTENDED_COLORIMETRY_OPYCC_601:
 				c.colorspace = V4L2_COLORSPACE_OPRGB;
 				c.ycbcr_enc = V4L2_YCBCR_ENC_601;
 				c.xfer_func = V4L2_XFER_FUNC_OPRGB;
diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c
index 38716eb50408..8a3e8f61b991 100644
--- a/drivers/video/hdmi.c
+++ b/drivers/video/hdmi.c
@@ -592,10 +592,10 @@ hdmi_extended_colorimetry_get_name(enum hdmi_extended_colorimetry ext_col)
 		return "xvYCC 709";
 	case HDMI_EXTENDED_COLORIMETRY_S_YCC_601:
 		return "sYCC 601";
-	case HDMI_EXTENDED_COLORIMETRY_ADOBE_YCC_601:
-		return "Adobe YCC 601";
-	case HDMI_EXTENDED_COLORIMETRY_ADOBE_RGB:
-		return "Adobe RGB";
+	case HDMI_EXTENDED_COLORIMETRY_OPYCC_601:
+		return "opYCC 601";
+	case HDMI_EXTENDED_COLORIMETRY_OPRGB:
+		return "opRGB";
 	case HDMI_EXTENDED_COLORIMETRY_BT2020_CONST_LUM:
 		return "BT.2020 Constant Luminance";
 	case HDMI_EXTENDED_COLORIMETRY_BT2020:
diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h
index d271ff23984f..4f3febc0f971 100644
--- a/include/linux/hdmi.h
+++ b/include/linux/hdmi.h
@@ -101,8 +101,8 @@ enum hdmi_extended_colorimetry {
 	HDMI_EXTENDED_COLORIMETRY_XV_YCC_601,
 	HDMI_EXTENDED_COLORIMETRY_XV_YCC_709,
 	HDMI_EXTENDED_COLORIMETRY_S_YCC_601,
-	HDMI_EXTENDED_COLORIMETRY_ADOBE_YCC_601,
-	HDMI_EXTENDED_COLORIMETRY_ADOBE_RGB,
+	HDMI_EXTENDED_COLORIMETRY_OPYCC_601,
+	HDMI_EXTENDED_COLORIMETRY_OPRGB,
 
 	/* The following EC values are only defined in CEA-861-F. */
 	HDMI_EXTENDED_COLORIMETRY_BT2020_CONST_LUM,
-- 
cgit v1.2.3-71-gd317


From 40d9f9124822013331367fb4ab59936c3ac944d6 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 24 Sep 2018 12:16:54 -0700
Subject: bus: ti-sysc: Defer suspend as needed

We don't care when we suspend but some our children do. In order to
avoid tagging various modules with SYSC_QUIRK_RESOURCE_PROVIDER, let's
do it automatically by tagging modules that are busy on suspend for
noirq suspend. This way we can just do module detection on define DEBUG.

Note that we still need to keep SYSC_QUIRK_LEGACY_IDLE flag around so
the our legacy single-child devices that set pm_runtime_irq_safe() can
manage the interconnect target module themselves.

Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c                 | 118 ++++++++++++++++++----------------
 include/linux/platform_data/ti-sysc.h |   1 -
 2 files changed, 61 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index c9bac9dc4637..087a67617eef 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -87,6 +87,7 @@ struct sysc {
 	u32 revision;
 	bool enabled;
 	bool needs_resume;
+	unsigned int noirq_suspend:1;
 	bool child_needs_resume;
 	struct delayed_work idle_work;
 };
@@ -712,19 +713,25 @@ static int sysc_suspend(struct device *dev)
 
 	ddata = dev_get_drvdata(dev);
 
-	if (ddata->cfg.quirks & (SYSC_QUIRK_RESOURCE_PROVIDER |
-				 SYSC_QUIRK_LEGACY_IDLE))
+	if (ddata->cfg.quirks & SYSC_QUIRK_LEGACY_IDLE)
 		return 0;
 
-	if (!ddata->enabled)
+	if (!ddata->enabled || ddata->noirq_suspend)
 		return 0;
 
 	dev_dbg(ddata->dev, "%s %s\n", __func__,
 		ddata->name ? ddata->name : "");
 
 	error = pm_runtime_put_sync_suspend(dev);
-	if (error < 0) {
-		dev_warn(ddata->dev, "%s not idle %i %s\n",
+	if (error == -EBUSY) {
+		dev_dbg(ddata->dev, "%s busy, tagging for noirq suspend %s\n",
+			__func__, ddata->name ? ddata->name : "");
+
+		ddata->noirq_suspend = true;
+
+		return 0;
+	} else if (error < 0) {
+		dev_warn(ddata->dev, "%s cannot suspend %i %s\n",
 			 __func__, error,
 			 ddata->name ? ddata->name : "");
 
@@ -743,73 +750,86 @@ static int sysc_resume(struct device *dev)
 
 	ddata = dev_get_drvdata(dev);
 
-	if (ddata->cfg.quirks & (SYSC_QUIRK_RESOURCE_PROVIDER |
-				 SYSC_QUIRK_LEGACY_IDLE))
+	if (ddata->cfg.quirks & SYSC_QUIRK_LEGACY_IDLE)
 		return 0;
 
-	if (ddata->needs_resume) {
-		dev_dbg(ddata->dev, "%s %s\n", __func__,
-			ddata->name ? ddata->name : "");
+	if (!ddata->needs_resume || ddata->noirq_suspend)
+		return 0;
 
-		error = pm_runtime_get_sync(dev);
-		if (error < 0) {
-			dev_err(ddata->dev, "%s  error %i %s\n",
-				__func__, error,
-				 ddata->name ? ddata->name : "");
+	dev_dbg(ddata->dev, "%s %s\n", __func__,
+		ddata->name ? ddata->name : "");
 
-			return error;
-		}
+	error = pm_runtime_get_sync(dev);
+	if (error < 0) {
+		dev_err(ddata->dev, "%s  error %i %s\n",
+			__func__, error,
+			ddata->name ? ddata->name : "");
 
-		ddata->needs_resume = false;
+		return error;
 	}
 
+	ddata->needs_resume = false;
+
 	return 0;
 }
 
 static int sysc_noirq_suspend(struct device *dev)
 {
 	struct sysc *ddata;
+	int error;
 
 	ddata = dev_get_drvdata(dev);
 
 	if (ddata->cfg.quirks & SYSC_QUIRK_LEGACY_IDLE)
 		return 0;
 
-	if (!(ddata->cfg.quirks & SYSC_QUIRK_RESOURCE_PROVIDER))
-		return 0;
-
-	if (!ddata->enabled)
+	if (!ddata->enabled || !ddata->noirq_suspend)
 		return 0;
 
 	dev_dbg(ddata->dev, "%s %s\n", __func__,
 		ddata->name ? ddata->name : "");
 
+	error = sysc_runtime_suspend(dev);
+	if (error) {
+		dev_warn(ddata->dev, "%s busy %i %s\n",
+			 __func__, error, ddata->name ? ddata->name : "");
+
+		return 0;
+	}
+
 	ddata->needs_resume = true;
 
-	return sysc_runtime_suspend(dev);
+	return 0;
 }
 
 static int sysc_noirq_resume(struct device *dev)
 {
 	struct sysc *ddata;
+	int error;
 
 	ddata = dev_get_drvdata(dev);
 
 	if (ddata->cfg.quirks & SYSC_QUIRK_LEGACY_IDLE)
 		return 0;
 
-	if (!(ddata->cfg.quirks & SYSC_QUIRK_RESOURCE_PROVIDER))
+	if (!ddata->needs_resume || !ddata->noirq_suspend)
 		return 0;
 
-	if (ddata->needs_resume) {
-		dev_dbg(ddata->dev, "%s %s\n", __func__,
-			ddata->name ? ddata->name : "");
+	dev_dbg(ddata->dev, "%s %s\n", __func__,
+		ddata->name ? ddata->name : "");
 
-		ddata->needs_resume = false;
+	error = sysc_runtime_resume(dev);
+	if (error) {
+		dev_warn(ddata->dev, "%s cannot resume %i %s\n",
+			 __func__, error,
+			 ddata->name ? ddata->name : "");
 
-		return sysc_runtime_resume(dev);
+		return error;
 	}
 
+	/* Maybe also reconsider clearing noirq_suspend at some point */
+	ddata->needs_resume = false;
+
 	return 0;
 }
 #endif
@@ -848,26 +868,6 @@ struct sysc_revision_quirk {
 	}
 
 static const struct sysc_revision_quirk sysc_revision_quirks[] = {
-	/* These need to use noirq_suspend */
-	SYSC_QUIRK("control", 0, 0, 0x10, -1, 0x40000900, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("i2c", 0, 0, 0x10, 0x90, 0x5040000a, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("mcspi", 0, 0, 0x10, -1, 0x40300a0b, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("prcm", 0, 0, -1, -1, 0x40000100, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("ocp2scp", 0, 0, 0x10, 0x14, 0x50060005, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("padconf", 0, 0, 0x10, -1, 0x4fff0800, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("scm", 0, 0, 0x10, -1, 0x40000900, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("scrm", 0, 0, -1, -1, 0x00000010, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("sdma", 0, 0, 0x2c, 0x28, 0x00010900, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-
 	/* These drivers need to be fixed to not use pm_runtime_irq_safe() */
 	SYSC_QUIRK("gpio", 0, 0, 0x10, 0x114, 0x50600801, 0xffffffff,
 		   SYSC_QUIRK_LEGACY_IDLE | SYSC_QUIRK_OPT_CLKS_IN_RESET),
@@ -892,23 +892,25 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 	SYSC_QUIRK("uart", 0, 0x50, 0x54, 0x58, 0x50411e03, 0xffffffff,
 		   SYSC_QUIRK_LEGACY_IDLE),
 
-	/* These devices don't yet suspend properly without legacy setting */
-	SYSC_QUIRK("sdio", 0, 0, 0x10, -1, 0x40202301, 0xffffffff,
-		   SYSC_QUIRK_LEGACY_IDLE),
-	SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xffffffff,
-		   SYSC_QUIRK_LEGACY_IDLE),
-	SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0d00, 0xffffffff,
-		   SYSC_QUIRK_LEGACY_IDLE),
-
 #ifdef DEBUG
 	SYSC_QUIRK("aess", 0, 0, 0x10, -1, 0x40000000, 0xffffffff, 0),
+	SYSC_QUIRK("control", 0, 0, 0x10, -1, 0x40000900, 0xffffffff, 0),
 	SYSC_QUIRK("gpu", 0, 0x1fc00, 0x1fc10, -1, 0, 0, 0),
 	SYSC_QUIRK("hdq1w", 0, 0, 0x14, 0x18, 0x00000006, 0xffffffff, 0),
 	SYSC_QUIRK("hsi", 0, 0, 0x10, 0x14, 0x50043101, 0xffffffff, 0),
 	SYSC_QUIRK("iss", 0, 0, 0x10, -1, 0x40000101, 0xffffffff, 0),
+	SYSC_QUIRK("i2c", 0, 0, 0x10, 0x90, 0x5040000a, 0xffffffff, 0),
 	SYSC_QUIRK("mcasp", 0, 0, 0x4, -1, 0x44306302, 0xffffffff, 0),
 	SYSC_QUIRK("mcbsp", 0, -1, 0x8c, -1, 0, 0, 0),
+	SYSC_QUIRK("mcspi", 0, 0, 0x10, -1, 0x40300a0b, 0xffffffff, 0),
 	SYSC_QUIRK("mailbox", 0, 0, 0x10, -1, 0x00000400, 0xffffffff, 0),
+	SYSC_QUIRK("ocp2scp", 0, 0, 0x10, 0x14, 0x50060005, 0xffffffff, 0),
+	SYSC_QUIRK("padconf", 0, 0, 0x10, -1, 0x4fff0800, 0xffffffff, 0),
+	SYSC_QUIRK("prcm", 0, 0, -1, -1, 0x40000100, 0xffffffff, 0),
+	SYSC_QUIRK("scm", 0, 0, 0x10, -1, 0x40000900, 0xffffffff, 0),
+	SYSC_QUIRK("scrm", 0, 0, -1, -1, 0x00000010, 0xffffffff, 0),
+	SYSC_QUIRK("sdio", 0, 0, 0x10, -1, 0x40202301, 0xffffffff, 0),
+	SYSC_QUIRK("sdma", 0, 0, 0x2c, 0x28, 0x00010900, 0xffffffff, 0),
 	SYSC_QUIRK("slimbus", 0, 0, 0x10, -1, 0x40000902, 0xffffffff, 0),
 	SYSC_QUIRK("slimbus", 0, 0, 0x10, -1, 0x40002903, 0xffffffff, 0),
 	SYSC_QUIRK("spinlock", 0, 0, 0x10, -1, 0x50020000, 0xffffffff, 0),
@@ -916,6 +918,8 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 	SYSC_QUIRK("usb_host_hs", 0, 0, 0x10, 0x14, 0x50700100, 0xffffffff, 0),
 	SYSC_QUIRK("usb_otg_hs", 0, 0x400, 0x404, 0x408, 0x00000050,
 		   0xffffffff, 0),
+	SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xffffffff, 0),
+	SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0d00, 0xffffffff, 0),
 #endif
 };
 
diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index 2efa3470a451..1ea3aab972b4 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -46,7 +46,6 @@ struct sysc_regbits {
 	s8 emufree_shift;
 };
 
-#define SYSC_QUIRK_RESOURCE_PROVIDER	BIT(9)
 #define SYSC_QUIRK_LEGACY_IDLE		BIT(8)
 #define SYSC_QUIRK_RESET_STATUS		BIT(7)
 #define SYSC_QUIRK_NO_IDLE_ON_INIT	BIT(6)
-- 
cgit v1.2.3-71-gd317


From 76582671eb5d006a78420776cc5f73195b867e81 Mon Sep 17 00:00:00 2001
From: Rajan Vaja <rajanv@xilinx.com>
Date: Wed, 12 Sep 2018 12:38:36 -0700
Subject: firmware: xilinx: Add Zynqmp firmware driver

This patch is adding communication layer with firmware.
Firmware driver provides an interface to firmware APIs.
Interface APIs can be used by any driver to communicate to
PMUFW(Platform Management Unit). All requests go through ATF.

Signed-off-by: Rajan Vaja <rajanv@xilinx.com>
Signed-off-by: Jolly Shah <jollys@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 arch/arm64/Kconfig.platforms         |   1 +
 drivers/firmware/Kconfig             |   1 +
 drivers/firmware/Makefile            |   1 +
 drivers/firmware/xilinx/Kconfig      |  16 ++
 drivers/firmware/xilinx/Makefile     |   4 +
 drivers/firmware/xilinx/zynqmp.c     | 322 +++++++++++++++++++++++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h |  63 +++++++
 7 files changed, 408 insertions(+)
 create mode 100644 drivers/firmware/xilinx/Kconfig
 create mode 100644 drivers/firmware/xilinx/Makefile
 create mode 100644 drivers/firmware/xilinx/zynqmp.c
 create mode 100644 include/linux/firmware/xlnx-zynqmp.h

(limited to 'include/linux')

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 393d2b524284..23f3928743c9 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -301,6 +301,7 @@ config ARCH_ZX
 
 config ARCH_ZYNQMP
 	bool "Xilinx ZynqMP Family"
+	select ZYNQMP_FIRMWARE
 	help
 	  This enables support for Xilinx ZynqMP Family
 
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 6e83880046d7..36344cba764e 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -291,5 +291,6 @@ source "drivers/firmware/google/Kconfig"
 source "drivers/firmware/efi/Kconfig"
 source "drivers/firmware/meson/Kconfig"
 source "drivers/firmware/tegra/Kconfig"
+source "drivers/firmware/xilinx/Kconfig"
 
 endmenu
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index e18a041cfc53..99583d3df52f 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -32,3 +32,4 @@ obj-$(CONFIG_GOOGLE_FIRMWARE)	+= google/
 obj-$(CONFIG_EFI)		+= efi/
 obj-$(CONFIG_UEFI_CPER)		+= efi/
 obj-y				+= tegra/
+obj-y				+= xilinx/
diff --git a/drivers/firmware/xilinx/Kconfig b/drivers/firmware/xilinx/Kconfig
new file mode 100644
index 000000000000..64d976e31010
--- /dev/null
+++ b/drivers/firmware/xilinx/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+# Kconfig for Xilinx firmwares
+
+menu "Zynq MPSoC Firmware Drivers"
+	depends on ARCH_ZYNQMP
+
+config ZYNQMP_FIRMWARE
+	bool "Enable Xilinx Zynq MPSoC firmware interface"
+	help
+	  Firmware interface driver is used by different
+	  drivers to communicate with the firmware for
+	  various platform management services.
+	  Say yes to enable ZynqMP firmware interface driver.
+	  If in doubt, say N.
+
+endmenu
diff --git a/drivers/firmware/xilinx/Makefile b/drivers/firmware/xilinx/Makefile
new file mode 100644
index 000000000000..29f7bf2fd094
--- /dev/null
+++ b/drivers/firmware/xilinx/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for Xilinx firmwares
+
+obj-$(CONFIG_ZYNQMP_FIRMWARE) += zynqmp.o
diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
new file mode 100644
index 000000000000..5bf64acc7b44
--- /dev/null
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx Zynq MPSoC Firmware layer
+ *
+ *  Copyright (C) 2014-2018 Xilinx, Inc.
+ *
+ *  Michal Simek <michal.simek@xilinx.com>
+ *  Davorin Mista <davorin.mista@aggios.com>
+ *  Jolly Shah <jollys@xilinx.com>
+ *  Rajan Vaja <rajanv@xilinx.com>
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/compiler.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <linux/firmware/xlnx-zynqmp.h>
+
+/**
+ * zynqmp_pm_ret_code() - Convert PMU-FW error codes to Linux error codes
+ * @ret_status:		PMUFW return code
+ *
+ * Return: corresponding Linux error code
+ */
+static int zynqmp_pm_ret_code(u32 ret_status)
+{
+	switch (ret_status) {
+	case XST_PM_SUCCESS:
+	case XST_PM_DOUBLE_REQ:
+		return 0;
+	case XST_PM_NO_ACCESS:
+		return -EACCES;
+	case XST_PM_ABORT_SUSPEND:
+		return -ECANCELED;
+	case XST_PM_INTERNAL:
+	case XST_PM_CONFLICT:
+	case XST_PM_INVALID_NODE:
+	default:
+		return -EINVAL;
+	}
+}
+
+static noinline int do_fw_call_fail(u64 arg0, u64 arg1, u64 arg2,
+				    u32 *ret_payload)
+{
+	return -ENODEV;
+}
+
+/*
+ * PM function call wrapper
+ * Invoke do_fw_call_smc or do_fw_call_hvc, depending on the configuration
+ */
+static int (*do_fw_call)(u64, u64, u64, u32 *ret_payload) = do_fw_call_fail;
+
+/**
+ * do_fw_call_smc() - Call system-level platform management layer (SMC)
+ * @arg0:		Argument 0 to SMC call
+ * @arg1:		Argument 1 to SMC call
+ * @arg2:		Argument 2 to SMC call
+ * @ret_payload:	Returned value array
+ *
+ * Invoke platform management function via SMC call (no hypervisor present).
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static noinline int do_fw_call_smc(u64 arg0, u64 arg1, u64 arg2,
+				   u32 *ret_payload)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(arg0, arg1, arg2, 0, 0, 0, 0, 0, &res);
+
+	if (ret_payload) {
+		ret_payload[0] = lower_32_bits(res.a0);
+		ret_payload[1] = upper_32_bits(res.a0);
+		ret_payload[2] = lower_32_bits(res.a1);
+		ret_payload[3] = upper_32_bits(res.a1);
+	}
+
+	return zynqmp_pm_ret_code((enum pm_ret_status)res.a0);
+}
+
+/**
+ * do_fw_call_hvc() - Call system-level platform management layer (HVC)
+ * @arg0:		Argument 0 to HVC call
+ * @arg1:		Argument 1 to HVC call
+ * @arg2:		Argument 2 to HVC call
+ * @ret_payload:	Returned value array
+ *
+ * Invoke platform management function via HVC
+ * HVC-based for communication through hypervisor
+ * (no direct communication with ATF).
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static noinline int do_fw_call_hvc(u64 arg0, u64 arg1, u64 arg2,
+				   u32 *ret_payload)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_hvc(arg0, arg1, arg2, 0, 0, 0, 0, 0, &res);
+
+	if (ret_payload) {
+		ret_payload[0] = lower_32_bits(res.a0);
+		ret_payload[1] = upper_32_bits(res.a0);
+		ret_payload[2] = lower_32_bits(res.a1);
+		ret_payload[3] = upper_32_bits(res.a1);
+	}
+
+	return zynqmp_pm_ret_code((enum pm_ret_status)res.a0);
+}
+
+/**
+ * zynqmp_pm_invoke_fn() - Invoke the system-level platform management layer
+ *			   caller function depending on the configuration
+ * @pm_api_id:		Requested PM-API call
+ * @arg0:		Argument 0 to requested PM-API call
+ * @arg1:		Argument 1 to requested PM-API call
+ * @arg2:		Argument 2 to requested PM-API call
+ * @arg3:		Argument 3 to requested PM-API call
+ * @ret_payload:	Returned value array
+ *
+ * Invoke platform management function for SMC or HVC call, depending on
+ * configuration.
+ * Following SMC Calling Convention (SMCCC) for SMC64:
+ * Pm Function Identifier,
+ * PM_SIP_SVC + PM_API_ID =
+ *	((SMC_TYPE_FAST << FUNCID_TYPE_SHIFT)
+ *	((SMC_64) << FUNCID_CC_SHIFT)
+ *	((SIP_START) << FUNCID_OEN_SHIFT)
+ *	((PM_API_ID) & FUNCID_NUM_MASK))
+ *
+ * PM_SIP_SVC	- Registered ZynqMP SIP Service Call.
+ * PM_API_ID	- Platform Management API ID.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1,
+			u32 arg2, u32 arg3, u32 *ret_payload)
+{
+	/*
+	 * Added SIP service call Function Identifier
+	 * Make sure to stay in x0 register
+	 */
+	u64 smc_arg[4];
+
+	smc_arg[0] = PM_SIP_SVC | pm_api_id;
+	smc_arg[1] = ((u64)arg1 << 32) | arg0;
+	smc_arg[2] = ((u64)arg3 << 32) | arg2;
+
+	return do_fw_call(smc_arg[0], smc_arg[1], smc_arg[2], ret_payload);
+}
+
+static u32 pm_api_version;
+static u32 pm_tz_version;
+
+/**
+ * zynqmp_pm_get_api_version() - Get version number of PMU PM firmware
+ * @version:	Returned version value
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_get_api_version(u32 *version)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	if (!version)
+		return -EINVAL;
+
+	/* Check is PM API version already verified */
+	if (pm_api_version > 0) {
+		*version = pm_api_version;
+		return 0;
+	}
+	ret = zynqmp_pm_invoke_fn(PM_GET_API_VERSION, 0, 0, 0, 0, ret_payload);
+	*version = ret_payload[1];
+
+	return ret;
+}
+
+/**
+ * zynqmp_pm_get_trustzone_version() - Get secure trustzone firmware version
+ * @version:	Returned version value
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_get_trustzone_version(u32 *version)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	if (!version)
+		return -EINVAL;
+
+	/* Check is PM trustzone version already verified */
+	if (pm_tz_version > 0) {
+		*version = pm_tz_version;
+		return 0;
+	}
+	ret = zynqmp_pm_invoke_fn(PM_GET_TRUSTZONE_VERSION, 0, 0,
+				  0, 0, ret_payload);
+	*version = ret_payload[1];
+
+	return ret;
+}
+
+/**
+ * get_set_conduit_method() - Choose SMC or HVC based communication
+ * @np:		Pointer to the device_node structure
+ *
+ * Use SMC or HVC-based functions to communicate with EL2/EL3.
+ *
+ * Return: Returns 0 on success or error code
+ */
+static int get_set_conduit_method(struct device_node *np)
+{
+	const char *method;
+
+	if (of_property_read_string(np, "method", &method)) {
+		pr_warn("%s missing \"method\" property\n", __func__);
+		return -ENXIO;
+	}
+
+	if (!strcmp("hvc", method)) {
+		do_fw_call = do_fw_call_hvc;
+	} else if (!strcmp("smc", method)) {
+		do_fw_call = do_fw_call_smc;
+	} else {
+		pr_warn("%s Invalid \"method\" property: %s\n",
+			__func__, method);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct zynqmp_eemi_ops eemi_ops = {
+	.get_api_version = zynqmp_pm_get_api_version,
+};
+
+/**
+ * zynqmp_pm_get_eemi_ops - Get eemi ops functions
+ *
+ * Return: Pointer of eemi_ops structure
+ */
+const struct zynqmp_eemi_ops *zynqmp_pm_get_eemi_ops(void)
+{
+	return &eemi_ops;
+}
+EXPORT_SYMBOL_GPL(zynqmp_pm_get_eemi_ops);
+
+static int zynqmp_firmware_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np;
+	int ret;
+
+	np = of_find_compatible_node(NULL, NULL, "xlnx,zynqmp");
+	if (!np)
+		return 0;
+	of_node_put(np);
+
+	ret = get_set_conduit_method(dev->of_node);
+	if (ret)
+		return ret;
+
+	/* Check PM API version number */
+	zynqmp_pm_get_api_version(&pm_api_version);
+	if (pm_api_version < ZYNQMP_PM_VERSION) {
+		panic("%s Platform Management API version error. Expected: v%d.%d - Found: v%d.%d\n",
+		      __func__,
+		      ZYNQMP_PM_VERSION_MAJOR, ZYNQMP_PM_VERSION_MINOR,
+		      pm_api_version >> 16, pm_api_version & 0xFFFF);
+	}
+
+	pr_info("%s Platform Management API v%d.%d\n", __func__,
+		pm_api_version >> 16, pm_api_version & 0xFFFF);
+
+	/* Check trustzone version number */
+	ret = zynqmp_pm_get_trustzone_version(&pm_tz_version);
+	if (ret)
+		panic("Legacy trustzone found without version support\n");
+
+	if (pm_tz_version < ZYNQMP_TZ_VERSION)
+		panic("%s Trustzone version error. Expected: v%d.%d - Found: v%d.%d\n",
+		      __func__,
+		      ZYNQMP_TZ_VERSION_MAJOR, ZYNQMP_TZ_VERSION_MINOR,
+		      pm_tz_version >> 16, pm_tz_version & 0xFFFF);
+
+	pr_info("%s Trustzone version v%d.%d\n", __func__,
+		pm_tz_version >> 16, pm_tz_version & 0xFFFF);
+
+	return of_platform_populate(dev->of_node, NULL, NULL, dev);
+}
+
+static int zynqmp_firmware_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static const struct of_device_id zynqmp_firmware_of_match[] = {
+	{.compatible = "xlnx,zynqmp-firmware"},
+	{},
+};
+MODULE_DEVICE_TABLE(of, zynqmp_firmware_of_match);
+
+static struct platform_driver zynqmp_firmware_driver = {
+	.driver = {
+		.name = "zynqmp_firmware",
+		.of_match_table = zynqmp_firmware_of_match,
+	},
+	.probe = zynqmp_firmware_probe,
+	.remove = zynqmp_firmware_remove,
+};
+module_platform_driver(zynqmp_firmware_driver);
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
new file mode 100644
index 000000000000..cb63bedf3dcf
--- /dev/null
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Xilinx Zynq MPSoC Firmware layer
+ *
+ *  Copyright (C) 2014-2018 Xilinx
+ *
+ *  Michal Simek <michal.simek@xilinx.com>
+ *  Davorin Mista <davorin.mista@aggios.com>
+ *  Jolly Shah <jollys@xilinx.com>
+ *  Rajan Vaja <rajanv@xilinx.com>
+ */
+
+#ifndef __FIRMWARE_ZYNQMP_H__
+#define __FIRMWARE_ZYNQMP_H__
+
+#define ZYNQMP_PM_VERSION_MAJOR	1
+#define ZYNQMP_PM_VERSION_MINOR	0
+
+#define ZYNQMP_PM_VERSION	((ZYNQMP_PM_VERSION_MAJOR << 16) | \
+					ZYNQMP_PM_VERSION_MINOR)
+
+#define ZYNQMP_TZ_VERSION_MAJOR	1
+#define ZYNQMP_TZ_VERSION_MINOR	0
+
+#define ZYNQMP_TZ_VERSION	((ZYNQMP_TZ_VERSION_MAJOR << 16) | \
+					ZYNQMP_TZ_VERSION_MINOR)
+
+/* SMC SIP service Call Function Identifier Prefix */
+#define PM_SIP_SVC			0xC2000000
+#define PM_GET_TRUSTZONE_VERSION	0xa03
+
+/* Number of 32bits values in payload */
+#define PAYLOAD_ARG_CNT	4U
+
+enum pm_api_id {
+	PM_GET_API_VERSION = 1,
+};
+
+/* PMU-FW return status codes */
+enum pm_ret_status {
+	XST_PM_SUCCESS = 0,
+	XST_PM_INTERNAL = 2000,
+	XST_PM_CONFLICT,
+	XST_PM_NO_ACCESS,
+	XST_PM_INVALID_NODE,
+	XST_PM_DOUBLE_REQ,
+	XST_PM_ABORT_SUSPEND,
+};
+
+struct zynqmp_eemi_ops {
+	int (*get_api_version)(u32 *version);
+};
+
+#if IS_REACHABLE(CONFIG_ARCH_ZYNQMP)
+const struct zynqmp_eemi_ops *zynqmp_pm_get_eemi_ops(void);
+#else
+static inline struct zynqmp_eemi_ops *zynqmp_pm_get_eemi_ops(void)
+{
+	return NULL;
+}
+#endif
+
+#endif /* __FIRMWARE_ZYNQMP_H__ */
-- 
cgit v1.2.3-71-gd317


From 59ecdd778879f171072b663f91de6c3a595e2ed4 Mon Sep 17 00:00:00 2001
From: Rajan Vaja <rajanv@xilinx.com>
Date: Wed, 12 Sep 2018 12:38:37 -0700
Subject: firmware: xilinx: Add query data API

Add ZynqMP firmware query data API to query platform
specific information(clocks, pins) from firmware.

Signed-off-by: Rajan Vaja <rajanv@xilinx.com>
Signed-off-by: Jolly Shah <jollys@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 drivers/firmware/xilinx/zynqmp.c     | 14 ++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h | 20 ++++++++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 5bf64acc7b44..2a333c04955b 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -241,8 +241,22 @@ static int get_set_conduit_method(struct device_node *np)
 	return 0;
 }
 
+/**
+ * zynqmp_pm_query_data() - Get query data from firmware
+ * @qdata:	Variable to the zynqmp_pm_query_data structure
+ * @out:	Returned output value
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata, u32 *out)
+{
+	return zynqmp_pm_invoke_fn(PM_QUERY_DATA, qdata.qid, qdata.arg1,
+				   qdata.arg2, qdata.arg3, out);
+}
+
 static const struct zynqmp_eemi_ops eemi_ops = {
 	.get_api_version = zynqmp_pm_get_api_version,
+	.query_data = zynqmp_pm_query_data,
 };
 
 /**
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index cb63bedf3dcf..287f42caa623 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -34,6 +34,7 @@
 
 enum pm_api_id {
 	PM_GET_API_VERSION = 1,
+	PM_QUERY_DATA = 35,
 };
 
 /* PMU-FW return status codes */
@@ -47,8 +48,27 @@ enum pm_ret_status {
 	XST_PM_ABORT_SUSPEND,
 };
 
+enum pm_query_id {
+	PM_QID_INVALID,
+};
+
+/**
+ * struct zynqmp_pm_query_data - PM query data
+ * @qid:	query ID
+ * @arg1:	Argument 1 of query data
+ * @arg2:	Argument 2 of query data
+ * @arg3:	Argument 3 of query data
+ */
+struct zynqmp_pm_query_data {
+	u32 qid;
+	u32 arg1;
+	u32 arg2;
+	u32 arg3;
+};
+
 struct zynqmp_eemi_ops {
 	int (*get_api_version)(u32 *version);
+	int (*query_data)(struct zynqmp_pm_query_data qdata, u32 *out);
 };
 
 #if IS_REACHABLE(CONFIG_ARCH_ZYNQMP)
-- 
cgit v1.2.3-71-gd317


From f9627312e20721681ea326bd2b7935bf8034b288 Mon Sep 17 00:00:00 2001
From: Rajan Vaja <rajanv@xilinx.com>
Date: Wed, 12 Sep 2018 12:38:38 -0700
Subject: firmware: xilinx: Add clock APIs

Add clock APIs to control clocks through firmware
interface.

Signed-off-by: Rajan Vaja <rajanv@xilinx.com>
Signed-off-by: Jolly Shah <jollys@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 drivers/firmware/xilinx/zynqmp.c     | 186 ++++++++++++++++++++++++++++++++++-
 include/linux/firmware/xlnx-zynqmp.h |  30 ++++++
 2 files changed, 214 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 2a333c04955b..697f4fa23a45 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -250,13 +250,195 @@ static int get_set_conduit_method(struct device_node *np)
  */
 static int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata, u32 *out)
 {
-	return zynqmp_pm_invoke_fn(PM_QUERY_DATA, qdata.qid, qdata.arg1,
-				   qdata.arg2, qdata.arg3, out);
+	int ret;
+
+	ret = zynqmp_pm_invoke_fn(PM_QUERY_DATA, qdata.qid, qdata.arg1,
+				  qdata.arg2, qdata.arg3, out);
+
+	/*
+	 * For clock name query, all bytes in SMC response are clock name
+	 * characters and return code is always success. For invalid clocks,
+	 * clock name bytes would be zeros.
+	 */
+	return qdata.qid == PM_QID_CLOCK_GET_NAME ? 0 : ret;
+}
+
+/**
+ * zynqmp_pm_clock_enable() - Enable the clock for given id
+ * @clock_id:	ID of the clock to be enabled
+ *
+ * This function is used by master to enable the clock
+ * including peripherals and PLL clocks.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_enable(u32 clock_id)
+{
+	return zynqmp_pm_invoke_fn(PM_CLOCK_ENABLE, clock_id, 0, 0, 0, NULL);
+}
+
+/**
+ * zynqmp_pm_clock_disable() - Disable the clock for given id
+ * @clock_id:	ID of the clock to be disable
+ *
+ * This function is used by master to disable the clock
+ * including peripherals and PLL clocks.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_disable(u32 clock_id)
+{
+	return zynqmp_pm_invoke_fn(PM_CLOCK_DISABLE, clock_id, 0, 0, 0, NULL);
+}
+
+/**
+ * zynqmp_pm_clock_getstate() - Get the clock state for given id
+ * @clock_id:	ID of the clock to be queried
+ * @state:	1/0 (Enabled/Disabled)
+ *
+ * This function is used by master to get the state of clock
+ * including peripherals and PLL clocks.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_getstate(u32 clock_id, u32 *state)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	ret = zynqmp_pm_invoke_fn(PM_CLOCK_GETSTATE, clock_id, 0,
+				  0, 0, ret_payload);
+	*state = ret_payload[1];
+
+	return ret;
+}
+
+/**
+ * zynqmp_pm_clock_setdivider() - Set the clock divider for given id
+ * @clock_id:	ID of the clock
+ * @divider:	divider value
+ *
+ * This function is used by master to set divider for any clock
+ * to achieve desired rate.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_setdivider(u32 clock_id, u32 divider)
+{
+	return zynqmp_pm_invoke_fn(PM_CLOCK_SETDIVIDER, clock_id, divider,
+				   0, 0, NULL);
+}
+
+/**
+ * zynqmp_pm_clock_getdivider() - Get the clock divider for given id
+ * @clock_id:	ID of the clock
+ * @divider:	divider value
+ *
+ * This function is used by master to get divider values
+ * for any clock.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_getdivider(u32 clock_id, u32 *divider)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	ret = zynqmp_pm_invoke_fn(PM_CLOCK_GETDIVIDER, clock_id, 0,
+				  0, 0, ret_payload);
+	*divider = ret_payload[1];
+
+	return ret;
+}
+
+/**
+ * zynqmp_pm_clock_setrate() - Set the clock rate for given id
+ * @clock_id:	ID of the clock
+ * @rate:	rate value in hz
+ *
+ * This function is used by master to set rate for any clock.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_setrate(u32 clock_id, u64 rate)
+{
+	return zynqmp_pm_invoke_fn(PM_CLOCK_SETRATE, clock_id,
+				   lower_32_bits(rate),
+				   upper_32_bits(rate),
+				   0, NULL);
+}
+
+/**
+ * zynqmp_pm_clock_getrate() - Get the clock rate for given id
+ * @clock_id:	ID of the clock
+ * @rate:	rate value in hz
+ *
+ * This function is used by master to get rate
+ * for any clock.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_getrate(u32 clock_id, u64 *rate)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	ret = zynqmp_pm_invoke_fn(PM_CLOCK_GETRATE, clock_id, 0,
+				  0, 0, ret_payload);
+	*rate = ((u64)ret_payload[2] << 32) | ret_payload[1];
+
+	return ret;
+}
+
+/**
+ * zynqmp_pm_clock_setparent() - Set the clock parent for given id
+ * @clock_id:	ID of the clock
+ * @parent_id:	parent id
+ *
+ * This function is used by master to set parent for any clock.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_setparent(u32 clock_id, u32 parent_id)
+{
+	return zynqmp_pm_invoke_fn(PM_CLOCK_SETPARENT, clock_id,
+				   parent_id, 0, 0, NULL);
+}
+
+/**
+ * zynqmp_pm_clock_getparent() - Get the clock parent for given id
+ * @clock_id:	ID of the clock
+ * @parent_id:	parent id
+ *
+ * This function is used by master to get parent index
+ * for any clock.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_getparent(u32 clock_id, u32 *parent_id)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	ret = zynqmp_pm_invoke_fn(PM_CLOCK_GETPARENT, clock_id, 0,
+				  0, 0, ret_payload);
+	*parent_id = ret_payload[1];
+
+	return ret;
 }
 
 static const struct zynqmp_eemi_ops eemi_ops = {
 	.get_api_version = zynqmp_pm_get_api_version,
 	.query_data = zynqmp_pm_query_data,
+	.clock_enable = zynqmp_pm_clock_enable,
+	.clock_disable = zynqmp_pm_clock_disable,
+	.clock_getstate = zynqmp_pm_clock_getstate,
+	.clock_setdivider = zynqmp_pm_clock_setdivider,
+	.clock_getdivider = zynqmp_pm_clock_getdivider,
+	.clock_setrate = zynqmp_pm_clock_setrate,
+	.clock_getrate = zynqmp_pm_clock_getrate,
+	.clock_setparent = zynqmp_pm_clock_setparent,
+	.clock_getparent = zynqmp_pm_clock_getparent,
 };
 
 /**
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 287f42caa623..015e130431e6 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -35,6 +35,15 @@
 enum pm_api_id {
 	PM_GET_API_VERSION = 1,
 	PM_QUERY_DATA = 35,
+	PM_CLOCK_ENABLE,
+	PM_CLOCK_DISABLE,
+	PM_CLOCK_GETSTATE,
+	PM_CLOCK_SETDIVIDER,
+	PM_CLOCK_GETDIVIDER,
+	PM_CLOCK_SETRATE,
+	PM_CLOCK_GETRATE,
+	PM_CLOCK_SETPARENT,
+	PM_CLOCK_GETPARENT,
 };
 
 /* PMU-FW return status codes */
@@ -48,8 +57,20 @@ enum pm_ret_status {
 	XST_PM_ABORT_SUSPEND,
 };
 
+enum pm_ioctl_id {
+	IOCTL_SET_PLL_FRAC_MODE = 8,
+	IOCTL_GET_PLL_FRAC_MODE,
+	IOCTL_SET_PLL_FRAC_DATA,
+	IOCTL_GET_PLL_FRAC_DATA,
+};
+
 enum pm_query_id {
 	PM_QID_INVALID,
+	PM_QID_CLOCK_GET_NAME,
+	PM_QID_CLOCK_GET_TOPOLOGY,
+	PM_QID_CLOCK_GET_FIXEDFACTOR_PARAMS,
+	PM_QID_CLOCK_GET_PARENTS,
+	PM_QID_CLOCK_GET_ATTRIBUTES,
 };
 
 /**
@@ -69,6 +90,15 @@ struct zynqmp_pm_query_data {
 struct zynqmp_eemi_ops {
 	int (*get_api_version)(u32 *version);
 	int (*query_data)(struct zynqmp_pm_query_data qdata, u32 *out);
+	int (*clock_enable)(u32 clock_id);
+	int (*clock_disable)(u32 clock_id);
+	int (*clock_getstate)(u32 clock_id, u32 *state);
+	int (*clock_setdivider)(u32 clock_id, u32 divider);
+	int (*clock_getdivider)(u32 clock_id, u32 *divider);
+	int (*clock_setrate)(u32 clock_id, u64 rate);
+	int (*clock_getrate)(u32 clock_id, u64 *rate);
+	int (*clock_setparent)(u32 clock_id, u32 parent_id);
+	int (*clock_getparent)(u32 clock_id, u32 *parent_id);
 };
 
 #if IS_REACHABLE(CONFIG_ARCH_ZYNQMP)
-- 
cgit v1.2.3-71-gd317


From 96a71f21ef1fcc32bea07c612a332a89a213f054 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 21 Sep 2018 21:20:30 +0300
Subject: fanotify: store fanotify_init() flags in group's fanotify_data

This averts the need to re-generate flags in fanotify_show_fdinfo()
and sets the scene for addition of more upcoming flags without growing
new members to the fanotify_data struct.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify_user.c |  8 ++++----
 fs/notify/fdinfo.c                 | 24 +-----------------------
 include/linux/fanotify.h           |  4 ++++
 include/linux/fsnotify_backend.h   |  4 ++--
 4 files changed, 11 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 1347c588f778..15719d4aa4b5 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -191,7 +191,7 @@ static int process_access_response(struct fsnotify_group *group,
 	if (fd < 0)
 		return -EINVAL;
 
-	if ((response & FAN_AUDIT) && !group->fanotify_data.audit)
+	if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT))
 		return -EINVAL;
 
 	event = dequeue_event(group, fd);
@@ -701,8 +701,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	struct user_struct *user;
 	struct fanotify_event_info *oevent;
 
-	pr_debug("%s: flags=%d event_f_flags=%d\n",
-		__func__, flags, event_f_flags);
+	pr_debug("%s: flags=%x event_f_flags=%x\n",
+		 __func__, flags, event_f_flags);
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -746,6 +746,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	}
 
 	group->fanotify_data.user = user;
+	group->fanotify_data.flags = flags;
 	atomic_inc(&user->fanotify_listeners);
 	group->memcg = get_mem_cgroup_from_mm(current->mm);
 
@@ -798,7 +799,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		fd = -EPERM;
 		if (!capable(CAP_AUDIT_WRITE))
 			goto out_destroy_group;
-		group->fanotify_data.audit = true;
 	}
 
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 25385e336ac7..348a184bcdda 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -142,31 +142,9 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct fsnotify_group *group = f->private_data;
-	unsigned int flags = 0;
-
-	switch (group->priority) {
-	case FS_PRIO_0:
-		flags |= FAN_CLASS_NOTIF;
-		break;
-	case FS_PRIO_1:
-		flags |= FAN_CLASS_CONTENT;
-		break;
-	case FS_PRIO_2:
-		flags |= FAN_CLASS_PRE_CONTENT;
-		break;
-	}
-
-	if (group->max_events == UINT_MAX)
-		flags |= FAN_UNLIMITED_QUEUE;
-
-	if (group->fanotify_data.max_marks == UINT_MAX)
-		flags |= FAN_UNLIMITED_MARKS;
-
-	if (group->fanotify_data.audit)
-		flags |= FAN_ENABLE_AUDIT;
 
 	seq_printf(m, "fanotify flags:%x event-flags:%x\n",
-		   flags, group->fanotify_data.f_flags);
+		   group->fanotify_data.flags, group->fanotify_data.f_flags);
 
 	show_fdinfo(m, f, fanotify_fdinfo);
 }
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 096c96f4f16a..9c5ea3bdfaa0 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -6,4 +6,8 @@
 
 /* not valid from userspace, only kernel internal */
 #define FAN_MARK_ONDIR		0x00000100
+
+#define FAN_GROUP_FLAG(group, flag) \
+	((group)->fanotify_data.flags & (flag))
+
 #endif /* _LINUX_FANOTIFY_H */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 81b88fc9df31..8e91341cbd8a 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -189,10 +189,10 @@ struct fsnotify_group {
 			/* allows a group to block waiting for a userspace response */
 			struct list_head access_list;
 			wait_queue_head_t access_waitq;
-			int f_flags;
+			int flags;           /* flags from fanotify_init() */
+			int f_flags; /* event_f_flags from fanotify_init() */
 			unsigned int max_marks;
 			struct user_struct *user;
-			bool audit;
 		} fanotify_data;
 #endif /* CONFIG_FANOTIFY */
 	};
-- 
cgit v1.2.3-71-gd317


From eb2bccb70b979d996ecb769d692b92ff12eabbb7 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 19 Sep 2018 03:13:21 +0200
Subject: rtc: move rtc_add_group/s definitions

Move rtc_add_group and rtc_add_groups definition to rtc.h that is available
for all RTC drivers.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-core.h | 14 --------------
 include/linux/rtc.h    | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/rtc-core.h b/drivers/rtc/rtc-core.h
index ccc17a2e293d..0abf98983e13 100644
--- a/drivers/rtc/rtc-core.h
+++ b/drivers/rtc/rtc-core.h
@@ -40,23 +40,9 @@ static inline void rtc_proc_del_device(struct rtc_device *rtc)
 
 #ifdef CONFIG_RTC_INTF_SYSFS
 const struct attribute_group **rtc_get_dev_attribute_groups(void);
-int rtc_add_group(struct rtc_device *rtc, const struct attribute_group *grp);
-int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps);
 #else
 static inline const struct attribute_group **rtc_get_dev_attribute_groups(void)
 {
 	return NULL;
 }
-
-static inline
-int rtc_add_group(struct rtc_device *rtc, const struct attribute_group *grp)
-{
-	return 0;
-}
-
-static inline
-int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps)
-{
-	return 0;
-}
 #endif
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index faf00a1472d4..c8bb4a2b48c3 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -272,4 +272,20 @@ static inline int rtc_nvmem_register(struct rtc_device *rtc,
 static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {}
 #endif
 
+#ifdef CONFIG_RTC_INTF_SYSFS
+int rtc_add_group(struct rtc_device *rtc, const struct attribute_group *grp);
+int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps);
+#else
+static inline
+int rtc_add_group(struct rtc_device *rtc, const struct attribute_group *grp)
+{
+	return 0;
+}
+
+static inline
+int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps)
+{
+	return 0;
+}
+#endif
 #endif /* _LINUX_RTC_H_ */
-- 
cgit v1.2.3-71-gd317


From 9e288cefcc551c7b5b04f8abc7099d3451a70f5f Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 25 Sep 2018 09:34:05 +0200
Subject: clk: renesas: Convert to SPDX identifiers

This patch updates license to use SPDX-License-Identifier
instead of verbose license text.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
[rebased against clk-spdx]
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/renesas/Kconfig             |  2 ++
 drivers/clk/renesas/clk-emev2.c         | 14 +-------------
 drivers/clk/renesas/clk-mstp.c          |  5 +----
 drivers/clk/renesas/clk-r8a73a4.c       |  5 +----
 drivers/clk/renesas/clk-r8a7740.c       |  5 +----
 drivers/clk/renesas/clk-r8a7778.c       |  5 +----
 drivers/clk/renesas/clk-r8a7779.c       |  5 +----
 drivers/clk/renesas/clk-rcar-gen2.c     |  5 +----
 drivers/clk/renesas/clk-rz.c            |  5 +----
 drivers/clk/renesas/clk-sh73a0.c        |  5 +----
 drivers/clk/renesas/r8a7743-cpg-mssr.c  |  5 +----
 drivers/clk/renesas/r8a7745-cpg-mssr.c  |  5 +----
 drivers/clk/renesas/r8a7790-cpg-mssr.c  |  5 +----
 drivers/clk/renesas/r8a7791-cpg-mssr.c  |  5 +----
 drivers/clk/renesas/r8a7792-cpg-mssr.c  |  5 +----
 drivers/clk/renesas/r8a7794-cpg-mssr.c  |  5 +----
 drivers/clk/renesas/r8a77970-cpg-mssr.c |  5 +----
 drivers/clk/renesas/rcar-gen2-cpg.c     |  5 +----
 drivers/clk/renesas/rcar-gen2-cpg.h     |  7 ++-----
 drivers/clk/renesas/rcar-gen3-cpg.h     |  7 ++-----
 drivers/clk/renesas/renesas-cpg-mssr.h  |  4 ++--
 include/linux/clk/renesas.h             |  8 ++------
 22 files changed, 27 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/renesas/Kconfig b/drivers/clk/renesas/Kconfig
index 9022bbe1297e..ab2110f96225 100644
--- a/drivers/clk/renesas/Kconfig
+++ b/drivers/clk/renesas/Kconfig
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
 config CLK_RENESAS
 	bool "Renesas SoC clock support" if COMPILE_TEST && !ARCH_RENESAS
 	default y if ARCH_RENESAS
diff --git a/drivers/clk/renesas/clk-emev2.c b/drivers/clk/renesas/clk-emev2.c
index a91825471c79..7f3b5f9631bd 100644
--- a/drivers/clk/renesas/clk-emev2.c
+++ b/drivers/clk/renesas/clk-emev2.c
@@ -1,21 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * EMMA Mobile EV2 common clock framework support
  *
  * Copyright (C) 2013 Takashi Yoshii <takashi.yoshii.ze@renesas.com>
  * Copyright (C) 2012 Magnus Damm
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
diff --git a/drivers/clk/renesas/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c
index e82adcb16a52..7df14bb1cbbc 100644
--- a/drivers/clk/renesas/clk-mstp.c
+++ b/drivers/clk/renesas/clk-mstp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * R-Car MSTP clocks
  *
@@ -5,10 +6,6 @@
  * Copyright (C) 2015 Glider bvba
  *
  * Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/clk/renesas/clk-r8a73a4.c b/drivers/clk/renesas/clk-r8a73a4.c
index 7b903ce4c901..e1a9e89defdd 100644
--- a/drivers/clk/renesas/clk-r8a73a4.c
+++ b/drivers/clk/renesas/clk-r8a73a4.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a73a4 Core CPG Clocks
  *
  * Copyright (C) 2014  Ulrich Hecht
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/clk-r8a7740.c b/drivers/clk/renesas/clk-r8a7740.c
index a7a30d2eca41..d68171263137 100644
--- a/drivers/clk/renesas/clk-r8a7740.c
+++ b/drivers/clk/renesas/clk-r8a7740.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7740 Core CPG Clocks
  *
  * Copyright (C) 2014  Ulrich Hecht
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/clk-r8a7778.c b/drivers/clk/renesas/clk-r8a7778.c
index 886a8380e912..f986ba34661d 100644
--- a/drivers/clk/renesas/clk-r8a7778.c
+++ b/drivers/clk/renesas/clk-r8a7778.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7778 Core CPG Clocks
  *
  * Copyright (C) 2014  Ulrich Hecht
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/clk-r8a7779.c b/drivers/clk/renesas/clk-r8a7779.c
index 5adcca4656c3..437d92cb0630 100644
--- a/drivers/clk/renesas/clk-r8a7779.c
+++ b/drivers/clk/renesas/clk-r8a7779.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7779 Core CPG Clocks
  *
  * Copyright (C) 2013, 2014 Horms Solutions Ltd.
  *
  * Contact: Simon Horman <horms@verge.net.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/clk-rcar-gen2.c b/drivers/clk/renesas/clk-rcar-gen2.c
index bccd62f2cb09..738b723045ce 100644
--- a/drivers/clk/renesas/clk-rcar-gen2.c
+++ b/drivers/clk/renesas/clk-rcar-gen2.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * rcar_gen2 Core CPG Clocks
  *
  * Copyright (C) 2013  Ideas On Board SPRL
  *
  * Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/clk-rz.c b/drivers/clk/renesas/clk-rz.c
index ac2f86d626b6..7fe8729bf66c 100644
--- a/drivers/clk/renesas/clk-rz.c
+++ b/drivers/clk/renesas/clk-rz.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * RZ/A1 Core CPG Clocks
  *
  * Copyright (C) 2013 Ideas On Board SPRL
  * Copyright (C) 2014 Wolfram Sang, Sang Engineering <wsa@sang-engineering.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/clk-sh73a0.c b/drivers/clk/renesas/clk-sh73a0.c
index bab33610eb6c..576259237666 100644
--- a/drivers/clk/renesas/clk-sh73a0.c
+++ b/drivers/clk/renesas/clk-sh73a0.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * sh73a0 Core CPG Clocks
  *
  * Copyright (C) 2014  Ulrich Hecht
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/r8a7743-cpg-mssr.c b/drivers/clk/renesas/r8a7743-cpg-mssr.c
index 011c170ec3f9..f880b9cb72e0 100644
--- a/drivers/clk/renesas/r8a7743-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7743-cpg-mssr.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7743 Clock Pulse Generator / Module Standby and Software Reset
  *
  * Copyright (C) 2016 Cogent Embedded Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation; of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/r8a7745-cpg-mssr.c b/drivers/clk/renesas/r8a7745-cpg-mssr.c
index 4b0a9243b748..493874e5ebee 100644
--- a/drivers/clk/renesas/r8a7745-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7745-cpg-mssr.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7745 Clock Pulse Generator / Module Standby and Software Reset
  *
  * Copyright (C) 2016 Cogent Embedded Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation; of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/r8a7790-cpg-mssr.c b/drivers/clk/renesas/r8a7790-cpg-mssr.c
index f936cb74b681..c57cb93f8315 100644
--- a/drivers/clk/renesas/r8a7790-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7790-cpg-mssr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7790 Clock Pulse Generator / Module Standby and Software Reset
  *
@@ -6,10 +7,6 @@
  * Based on clk-rcar-gen2.c
  *
  * Copyright (C) 2013 Ideas On Board SPRL
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/r8a7791-cpg-mssr.c b/drivers/clk/renesas/r8a7791-cpg-mssr.c
index 1b91f03b7598..65702debcabb 100644
--- a/drivers/clk/renesas/r8a7791-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7791-cpg-mssr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7791 Clock Pulse Generator / Module Standby and Software Reset
  *
@@ -6,10 +7,6 @@
  * Based on clk-rcar-gen2.c
  *
  * Copyright (C) 2013 Ideas On Board SPRL
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/r8a7792-cpg-mssr.c b/drivers/clk/renesas/r8a7792-cpg-mssr.c
index 493e07859f5f..cf8b84a3a060 100644
--- a/drivers/clk/renesas/r8a7792-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7792-cpg-mssr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7792 Clock Pulse Generator / Module Standby and Software Reset
  *
@@ -6,10 +7,6 @@
  * Based on clk-rcar-gen2.c
  *
  * Copyright (C) 2013 Ideas On Board SPRL
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/r8a7794-cpg-mssr.c b/drivers/clk/renesas/r8a7794-cpg-mssr.c
index 088f4b79fdfc..c1948693c5c1 100644
--- a/drivers/clk/renesas/r8a7794-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7794-cpg-mssr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7794 Clock Pulse Generator / Module Standby and Software Reset
  *
@@ -6,10 +7,6 @@
  * Based on clk-rcar-gen2.c
  *
  * Copyright (C) 2013 Ideas On Board SPRL
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/r8a77970-cpg-mssr.c b/drivers/clk/renesas/r8a77970-cpg-mssr.c
index f55842917e8d..38509873d06c 100644
--- a/drivers/clk/renesas/r8a77970-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a77970-cpg-mssr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a77970 Clock Pulse Generator / Module Standby and Software Reset
  *
@@ -6,10 +7,6 @@
  * Based on r8a7795-cpg-mssr.c
  *
  * Copyright (C) 2015 Glider bvba
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/rcar-gen2-cpg.c b/drivers/clk/renesas/rcar-gen2-cpg.c
index daf88bc2cdae..f596a2dafcf4 100644
--- a/drivers/clk/renesas/rcar-gen2-cpg.c
+++ b/drivers/clk/renesas/rcar-gen2-cpg.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * R-Car Gen2 Clock Pulse Generator
  *
  * Copyright (C) 2016 Cogent Embedded Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
  */
 
 #include <linux/bug.h>
diff --git a/drivers/clk/renesas/rcar-gen2-cpg.h b/drivers/clk/renesas/rcar-gen2-cpg.h
index 020a3baad015..bff9551c7a38 100644
--- a/drivers/clk/renesas/rcar-gen2-cpg.h
+++ b/drivers/clk/renesas/rcar-gen2-cpg.h
@@ -1,11 +1,8 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * R-Car Gen2 Clock Pulse Generator
  *
  * Copyright (C) 2016 Cogent Embedded Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation; version 2 of the License.
  */
 
 #ifndef __CLK_RENESAS_RCAR_GEN2_CPG_H__
diff --git a/drivers/clk/renesas/rcar-gen3-cpg.h b/drivers/clk/renesas/rcar-gen3-cpg.h
index ea4f8fc3c4c9..6ebc4b42c5a3 100644
--- a/drivers/clk/renesas/rcar-gen3-cpg.h
+++ b/drivers/clk/renesas/rcar-gen3-cpg.h
@@ -1,11 +1,8 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * R-Car Gen3 Clock Pulse Generator
  *
  * Copyright (C) 2015-2016 Glider bvba
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #ifndef __CLK_RENESAS_RCAR_GEN3_CPG_H__
diff --git a/drivers/clk/renesas/renesas-cpg-mssr.h b/drivers/clk/renesas/renesas-cpg-mssr.h
index 59b1b30c1dba..d52d1f8da9d7 100644
--- a/drivers/clk/renesas/renesas-cpg-mssr.h
+++ b/drivers/clk/renesas/renesas-cpg-mssr.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * Renesas Clock Pulse Generator / Module Standby and Software Reset
  *
  * Copyright (C) 2015 Glider bvba
diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h
index 9ebf1f8243bb..0ebbe2f0b45e 100644
--- a/include/linux/clk/renesas.h
+++ b/include/linux/clk/renesas.h
@@ -1,14 +1,10 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0+
+ *
  * Copyright 2013 Ideas On Board SPRL
  * Copyright 2013, 2014 Horms Solutions Ltd.
  *
  * Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
  * Contact: Simon Horman <horms@verge.net.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __LINUX_CLK_RENESAS_H_
-- 
cgit v1.2.3-71-gd317


From 3d0186bb068e6cc6c23dc1d2f0b1cf64894c40ea Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Sat, 16 Jun 2018 17:32:07 -0400
Subject: Update email address

Redirect some older email addresses that are in the git logs.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 .mailmap                           | 7 +++++++
 MAINTAINERS                        | 6 +++---
 arch/parisc/kernel/syscall.S       | 2 +-
 drivers/input/keyboard/hilkbd.c    | 2 +-
 drivers/pci/hotplug/acpiphp.h      | 2 +-
 drivers/pci/hotplug/acpiphp_core.c | 4 ++--
 drivers/pci/hotplug/acpiphp_glue.c | 2 +-
 fs/isofs/dir.c                     | 2 +-
 include/linux/xarray.h             | 2 +-
 9 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/.mailmap b/.mailmap
index 285e09645b31..89f532caf639 100644
--- a/.mailmap
+++ b/.mailmap
@@ -119,6 +119,13 @@ Mark Brown <broonie@sirena.org.uk>
 Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
+Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
+Matthew Wilcox <willy@infradead.org> <matthew@wil.cx>
+Matthew Wilcox <willy@infradead.org> <mawilcox@linuxonhyperv.com>
+Matthew Wilcox <willy@infradead.org> <mawilcox@microsoft.com>
+Matthew Wilcox <willy@infradead.org> <willy@debian.org>
+Matthew Wilcox <willy@infradead.org> <willy@linux.intel.com>
+Matthew Wilcox <willy@infradead.org> <willy@parisc-linux.org>
 Matthieu CASTET <castet.matthieu@free.fr>
 Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@brturbo.com.br>
 Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index a255240d1452..612e5dbf3431 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -536,7 +536,7 @@ F:	Documentation/hwmon/adt7475
 F:	drivers/hwmon/adt7475.c
 
 ADVANSYS SCSI DRIVER
-M:	Matthew Wilcox <matthew@wil.cx>
+M:	Matthew Wilcox <willy@infradead.org>
 M:	Hannes Reinecke <hare@suse.com>
 L:	linux-scsi@vger.kernel.org
 S:	Maintained
@@ -4364,7 +4364,7 @@ S:	Maintained
 F:	drivers/i2c/busses/i2c-diolan-u2c.c
 
 FILESYSTEM DIRECT ACCESS (DAX)
-M:	Matthew Wilcox <mawilcox@microsoft.com>
+M:	Matthew Wilcox <willy@infradead.org>
 M:	Ross Zwisler <zwisler@kernel.org>
 M:	Jan Kara <jack@suse.cz>
 L:	linux-fsdevel@vger.kernel.org
@@ -8626,7 +8626,7 @@ F:	drivers/message/fusion/
 F:	drivers/scsi/mpt3sas/
 
 LSILOGIC/SYMBIOS/NCR 53C8XX and 53C1010 PCI-SCSI drivers
-M:	Matthew Wilcox <matthew@wil.cx>
+M:	Matthew Wilcox <willy@infradead.org>
 L:	linux-scsi@vger.kernel.org
 S:	Maintained
 F:	drivers/scsi/sym53c8xx_2/
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index f453997a7b8f..a9bc90dc4ae7 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -2,7 +2,7 @@
  * Linux/PA-RISC Project (http://www.parisc-linux.org/)
  * 
  * System call entry code / Linux gateway page
- * Copyright (c) Matthew Wilcox 1999 <willy@bofh.ai>
+ * Copyright (c) Matthew Wilcox 1999 <willy@infradead.org>
  * Licensed under the GNU GPL.
  * thanks to Philipp Rumpf, Mike Shaver and various others
  * sorry about the wall, puffin..
diff --git a/drivers/input/keyboard/hilkbd.c b/drivers/input/keyboard/hilkbd.c
index 5c7afdec192c..f5c5ae8b6c06 100644
--- a/drivers/input/keyboard/hilkbd.c
+++ b/drivers/input/keyboard/hilkbd.c
@@ -2,7 +2,7 @@
  *  linux/drivers/hil/hilkbd.c
  *
  *  Copyright (C) 1998 Philip Blundell <philb@gnu.org>
- *  Copyright (C) 1999 Matthew Wilcox <willy@bofh.ai>
+ *  Copyright (C) 1999 Matthew Wilcox <willy@infradead.org>
  *  Copyright (C) 1999-2007 Helge Deller <deller@gmx.de>
  *
  *  Very basic HP Human Interface Loop (HIL) driver.
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index e438a2d734f2..14cef4cf592b 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -8,7 +8,7 @@
  * Copyright (C) 2002 Hiroshi Aono (h-aono@ap.jp.nec.com)
  * Copyright (C) 2002,2003 Takayoshi Kochi (t-kochi@bq.jp.nec.com)
  * Copyright (C) 2002,2003 NEC Corporation
- * Copyright (C) 2003-2005 Matthew Wilcox (matthew.wilcox@hp.com)
+ * Copyright (C) 2003-2005 Matthew Wilcox (willy@infradead.org)
  * Copyright (C) 2003-2005 Hewlett Packard
  *
  * All rights reserved.
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index ad32ffbc4b91..0447b169e7ff 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -8,7 +8,7 @@
  * Copyright (C) 2002 Hiroshi Aono (h-aono@ap.jp.nec.com)
  * Copyright (C) 2002,2003 Takayoshi Kochi (t-kochi@bq.jp.nec.com)
  * Copyright (C) 2002,2003 NEC Corporation
- * Copyright (C) 2003-2005 Matthew Wilcox (matthew.wilcox@hp.com)
+ * Copyright (C) 2003-2005 Matthew Wilcox (willy@infradead.org)
  * Copyright (C) 2003-2005 Hewlett Packard
  *
  * All rights reserved.
@@ -40,7 +40,7 @@ bool acpiphp_disabled;
 static struct acpiphp_attention_info *attention_info;
 
 #define DRIVER_VERSION	"0.5"
-#define DRIVER_AUTHOR	"Greg Kroah-Hartman <gregkh@us.ibm.com>, Takayoshi Kochi <t-kochi@bq.jp.nec.com>, Matthew Wilcox <willy@hp.com>"
+#define DRIVER_AUTHOR	"Greg Kroah-Hartman <gregkh@us.ibm.com>, Takayoshi Kochi <t-kochi@bq.jp.nec.com>, Matthew Wilcox <willy@infradead.org>"
 #define DRIVER_DESC	"ACPI Hot Plug PCI Controller Driver"
 
 MODULE_AUTHOR(DRIVER_AUTHOR);
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 12afa7fdf77e..e4c46637f32f 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -5,7 +5,7 @@
  * Copyright (C) 2002,2003 Takayoshi Kochi (t-kochi@bq.jp.nec.com)
  * Copyright (C) 2002 Hiroshi Aono (h-aono@ap.jp.nec.com)
  * Copyright (C) 2002,2003 NEC Corporation
- * Copyright (C) 2003-2005 Matthew Wilcox (matthew.wilcox@hp.com)
+ * Copyright (C) 2003-2005 Matthew Wilcox (willy@infradead.org)
  * Copyright (C) 2003-2005 Hewlett Packard
  * Copyright (C) 2005 Rajesh Shah (rajesh.shah@intel.com)
  * Copyright (C) 2005 Intel Corporation
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 947ce22f5b3c..f0fe641893a5 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -46,7 +46,7 @@ int isofs_name_translate(struct iso_directory_record *de, char *new, struct inod
 	return i;
 }
 
-/* Acorn extensions written by Matthew Wilcox <willy@bofh.ai> 1998 */
+/* Acorn extensions written by Matthew Wilcox <willy@infradead.org> 1998 */
 int get_acorn_filename(struct iso_directory_record *de,
 			    char *retname, struct inode *inode)
 {
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 2dfc8006fe64..9e4c86853fa4 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -4,7 +4,7 @@
 /*
  * eXtensible Arrays
  * Copyright (c) 2017 Microsoft Corporation
- * Author: Matthew Wilcox <mawilcox@microsoft.com>
+ * Author: Matthew Wilcox <willy@infradead.org>
  */
 
 #include <linux/spinlock.h>
-- 
cgit v1.2.3-71-gd317


From 3159f943aafdbacb2f94c38fdaadabf2bbde2a14 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 3 Nov 2017 13:30:42 -0400
Subject: xarray: Replace exceptional entries

Introduce xarray value entries and tagged pointers to replace radix
tree exceptional entries.  This is a slight change in encoding to allow
the use of an extra bit (we can now store BITS_PER_LONG - 1 bits in a
value entry).  It is also a change in emphasis; exceptional entries are
intimidating and different.  As the comment explains, you can choose
to store values or pointers in the xarray and they are both first-class
citizens.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h |   4 +-
 arch/powerpc/include/asm/nohash/64/pgtable.h |   4 +-
 drivers/gpu/drm/i915/i915_gem.c              |  17 ++--
 drivers/staging/erofs/utils.c                |  18 ++---
 fs/btrfs/compression.c                       |   2 +-
 fs/dax.c                                     | 112 +++++++++++++--------------
 fs/proc/task_mmu.c                           |   2 +-
 include/linux/radix-tree.h                   |  36 ++-------
 include/linux/swapops.h                      |  19 ++---
 include/linux/xarray.h                       | 102 ++++++++++++++++++++++++
 lib/idr.c                                    |  60 ++++++--------
 lib/radix-tree.c                             |  21 +++--
 mm/filemap.c                                 |  10 +--
 mm/khugepaged.c                              |   2 +-
 mm/madvise.c                                 |   2 +-
 mm/memcontrol.c                              |   2 +-
 mm/mincore.c                                 |   2 +-
 mm/readahead.c                               |   2 +-
 mm/shmem.c                                   |  10 +--
 mm/swap.c                                    |   2 +-
 mm/truncate.c                                |  12 +--
 mm/workingset.c                              |  13 ++--
 tools/testing/radix-tree/idr-test.c          |   6 +-
 tools/testing/radix-tree/linux/radix-tree.h  |   1 -
 tools/testing/radix-tree/multiorder.c        |  47 ++++++-----
 tools/testing/radix-tree/test.c              |   2 +-
 26 files changed, 278 insertions(+), 232 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 2fdc865ca374..62039e557ac0 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -723,9 +723,7 @@ static inline bool pte_user(pte_t pte)
 	BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
 	BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY);	\
 	} while (0)
-/*
- * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
- */
+
 #define SWP_TYPE_BITS 5
 #define __swp_type(x)		(((x).val >> _PAGE_BIT_SWAP_TYPE) \
 				& ((1UL << SWP_TYPE_BITS) - 1))
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 7cd6809f4d33..05765c2d2c1f 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -313,9 +313,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 #define MAX_SWAPFILES_CHECK() do { \
 	BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
 	} while (0)
-/*
- * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
- */
+
 #define SWP_TYPE_BITS 5
 #define __swp_type(x)		(((x).val >> _PAGE_BIT_SWAP_TYPE) \
 				& ((1UL << SWP_TYPE_BITS) - 1))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fcc73a6ab503..316730b45f84 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5996,7 +5996,8 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 	count = __sg_page_count(sg);
 
 	while (idx + count <= n) {
-		unsigned long exception, i;
+		void *entry;
+		unsigned long i;
 		int ret;
 
 		/* If we cannot allocate and insert this entry, or the
@@ -6011,12 +6012,9 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 		if (ret && ret != -EEXIST)
 			goto scan;
 
-		exception =
-			RADIX_TREE_EXCEPTIONAL_ENTRY |
-			idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
+		entry = xa_mk_value(idx);
 		for (i = 1; i < count; i++) {
-			ret = radix_tree_insert(&iter->radix, idx + i,
-						(void *)exception);
+			ret = radix_tree_insert(&iter->radix, idx + i, entry);
 			if (ret && ret != -EEXIST)
 				goto scan;
 		}
@@ -6054,15 +6052,14 @@ lookup:
 	GEM_BUG_ON(!sg);
 
 	/* If this index is in the middle of multi-page sg entry,
-	 * the radixtree will contain an exceptional entry that points
+	 * the radix tree will contain a value entry that points
 	 * to the start of that range. We will return the pointer to
 	 * the base page and the offset of this page within the
 	 * sg entry's range.
 	 */
 	*offset = 0;
-	if (unlikely(radix_tree_exception(sg))) {
-		unsigned long base =
-			(unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+	if (unlikely(xa_is_value(sg))) {
+		unsigned long base = xa_to_value(sg);
 
 		sg = radix_tree_lookup(&iter->radix, base);
 		GEM_BUG_ON(!sg);
diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
index 595cf90af9bb..bdee9bd09f11 100644
--- a/drivers/staging/erofs/utils.c
+++ b/drivers/staging/erofs/utils.c
@@ -35,7 +35,6 @@ static atomic_long_t erofs_global_shrink_cnt;
 
 #ifdef CONFIG_EROFS_FS_ZIP
 
-/* radix_tree and the future XArray both don't use tagptr_t yet */
 struct erofs_workgroup *erofs_find_workgroup(
 	struct super_block *sb, pgoff_t index, bool *tag)
 {
@@ -47,9 +46,8 @@ repeat:
 	rcu_read_lock();
 	grp = radix_tree_lookup(&sbi->workstn_tree, index);
 	if (grp != NULL) {
-		*tag = radix_tree_exceptional_entry(grp);
-		grp = (void *)((unsigned long)grp &
-			~RADIX_TREE_EXCEPTIONAL_ENTRY);
+		*tag = xa_pointer_tag(grp);
+		grp = xa_untag_pointer(grp);
 
 		if (erofs_workgroup_get(grp, &oldcount)) {
 			/* prefer to relax rcu read side */
@@ -83,9 +81,7 @@ int erofs_register_workgroup(struct super_block *sb,
 	sbi = EROFS_SB(sb);
 	erofs_workstn_lock(sbi);
 
-	if (tag)
-		grp = (void *)((unsigned long)grp |
-			1UL << RADIX_TREE_EXCEPTIONAL_SHIFT);
+	grp = xa_tag_pointer(grp, tag);
 
 	err = radix_tree_insert(&sbi->workstn_tree,
 		grp->index, grp);
@@ -131,9 +127,7 @@ repeat:
 
 	for (i = 0; i < found; ++i) {
 		int cnt;
-		struct erofs_workgroup *grp = (void *)
-			((unsigned long)batch[i] &
-				~RADIX_TREE_EXCEPTIONAL_ENTRY);
+		struct erofs_workgroup *grp = xa_untag_pointer(batch[i]);
 
 		first_index = grp->index + 1;
 
@@ -150,8 +144,8 @@ repeat:
 #endif
 			continue;
 
-		if (radix_tree_delete(&sbi->workstn_tree,
-			grp->index) != grp) {
+		if (xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
+			grp->index)) != grp) {
 #ifdef EROFS_FS_HAS_MANAGED_CACHE
 skip:
 			erofs_workgroup_unfreeze(grp, 1);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 9bfa66592aa7..fd25e125303c 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -440,7 +440,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 		rcu_read_lock();
 		page = radix_tree_lookup(&mapping->i_pages, pg_index);
 		rcu_read_unlock();
-		if (page && !radix_tree_exceptional_entry(page)) {
+		if (page && !xa_is_value(page)) {
 			misses++;
 			if (misses > 4)
 				break;
diff --git a/fs/dax.c b/fs/dax.c
index b68ce484e1be..ebcec36335eb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -59,56 +59,57 @@ static int __init init_dax_wait_table(void)
 fs_initcall(init_dax_wait_table);
 
 /*
- * We use lowest available bit in exceptional entry for locking, one bit for
- * the entry size (PMD) and two more to tell us if the entry is a zero page or
- * an empty entry that is just used for locking.  In total four special bits.
+ * DAX pagecache entries use XArray value entries so they can't be mistaken
+ * for pages.  We use one bit for locking, one bit for the entry size (PMD)
+ * and two more to tell us if the entry is a zero page or an empty entry that
+ * is just used for locking.  In total four special bits.
  *
  * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE
  * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
  * block allocation.
  */
-#define RADIX_DAX_SHIFT		(RADIX_TREE_EXCEPTIONAL_SHIFT + 4)
-#define RADIX_DAX_ENTRY_LOCK	(1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
-#define RADIX_DAX_PMD		(1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
-#define RADIX_DAX_ZERO_PAGE	(1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
-#define RADIX_DAX_EMPTY		(1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
+#define DAX_SHIFT	(4)
+#define DAX_LOCKED	(1UL << 0)
+#define DAX_PMD		(1UL << 1)
+#define DAX_ZERO_PAGE	(1UL << 2)
+#define DAX_EMPTY	(1UL << 3)
 
 static unsigned long dax_radix_pfn(void *entry)
 {
-	return (unsigned long)entry >> RADIX_DAX_SHIFT;
+	return xa_to_value(entry) >> DAX_SHIFT;
 }
 
 static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags)
 {
-	return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
-			(pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK);
+	return xa_mk_value(flags | ((unsigned long)pfn << DAX_SHIFT) |
+			DAX_LOCKED);
 }
 
 static unsigned int dax_radix_order(void *entry)
 {
-	if ((unsigned long)entry & RADIX_DAX_PMD)
+	if (xa_to_value(entry) & DAX_PMD)
 		return PMD_SHIFT - PAGE_SHIFT;
 	return 0;
 }
 
 static int dax_is_pmd_entry(void *entry)
 {
-	return (unsigned long)entry & RADIX_DAX_PMD;
+	return xa_to_value(entry) & DAX_PMD;
 }
 
 static int dax_is_pte_entry(void *entry)
 {
-	return !((unsigned long)entry & RADIX_DAX_PMD);
+	return !(xa_to_value(entry) & DAX_PMD);
 }
 
 static int dax_is_zero_entry(void *entry)
 {
-	return (unsigned long)entry & RADIX_DAX_ZERO_PAGE;
+	return xa_to_value(entry) & DAX_ZERO_PAGE;
 }
 
 static int dax_is_empty_entry(void *entry)
 {
-	return (unsigned long)entry & RADIX_DAX_EMPTY;
+	return xa_to_value(entry) & DAX_EMPTY;
 }
 
 /*
@@ -186,9 +187,9 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
  */
 static inline int slot_locked(struct address_space *mapping, void **slot)
 {
-	unsigned long entry = (unsigned long)
-		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
-	return entry & RADIX_DAX_ENTRY_LOCK;
+	unsigned long entry = xa_to_value(
+		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
+	return entry & DAX_LOCKED;
 }
 
 /*
@@ -196,12 +197,11 @@ static inline int slot_locked(struct address_space *mapping, void **slot)
  */
 static inline void *lock_slot(struct address_space *mapping, void **slot)
 {
-	unsigned long entry = (unsigned long)
-		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
-
-	entry |= RADIX_DAX_ENTRY_LOCK;
-	radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
-	return (void *)entry;
+	unsigned long v = xa_to_value(
+		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
+	void *entry = xa_mk_value(v | DAX_LOCKED);
+	radix_tree_replace_slot(&mapping->i_pages, slot, entry);
+	return entry;
 }
 
 /*
@@ -209,17 +209,16 @@ static inline void *lock_slot(struct address_space *mapping, void **slot)
  */
 static inline void *unlock_slot(struct address_space *mapping, void **slot)
 {
-	unsigned long entry = (unsigned long)
-		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
-
-	entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
-	radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
-	return (void *)entry;
+	unsigned long v = xa_to_value(
+		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
+	void *entry = xa_mk_value(v & ~DAX_LOCKED);
+	radix_tree_replace_slot(&mapping->i_pages, slot, entry);
+	return entry;
 }
 
 /*
  * Lookup entry in radix tree, wait for it to become unlocked if it is
- * exceptional entry and return it. The caller must call
+ * a DAX entry and return it. The caller must call
  * put_unlocked_mapping_entry() when he decided not to lock the entry or
  * put_locked_mapping_entry() when he locked the entry and now wants to
  * unlock it.
@@ -242,7 +241,7 @@ static void *__get_unlocked_mapping_entry(struct address_space *mapping,
 		entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
 					  &slot);
 		if (!entry ||
-		    WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) ||
+		    WARN_ON_ONCE(!xa_is_value(entry)) ||
 		    !slot_locked(mapping, slot)) {
 			if (slotp)
 				*slotp = slot;
@@ -283,7 +282,7 @@ static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
 
 	xa_lock_irq(&mapping->i_pages);
 	entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot);
-	if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
+	if (WARN_ON_ONCE(!entry || !xa_is_value(entry) ||
 			 !slot_locked(mapping, slot))) {
 		xa_unlock_irq(&mapping->i_pages);
 		return;
@@ -472,12 +471,11 @@ void dax_unlock_mapping_entry(struct page *page)
 }
 
 /*
- * Find radix tree entry at given index. If it points to an exceptional entry,
- * return it with the radix tree entry locked. If the radix tree doesn't
- * contain given index, create an empty exceptional entry for the index and
- * return with it locked.
+ * Find radix tree entry at given index. If it is a DAX entry, return it
+ * with the radix tree entry locked. If the radix tree doesn't contain the
+ * given index, create an empty entry for the index and return with it locked.
  *
- * When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will
+ * When requesting an entry with size DAX_PMD, grab_mapping_entry() will
  * either return that locked entry or will return an error.  This error will
  * happen if there are any 4k entries within the 2MiB range that we are
  * requesting.
@@ -507,13 +505,13 @@ restart:
 	xa_lock_irq(&mapping->i_pages);
 	entry = get_unlocked_mapping_entry(mapping, index, &slot);
 
-	if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) {
+	if (WARN_ON_ONCE(entry && !xa_is_value(entry))) {
 		entry = ERR_PTR(-EIO);
 		goto out_unlock;
 	}
 
 	if (entry) {
-		if (size_flag & RADIX_DAX_PMD) {
+		if (size_flag & DAX_PMD) {
 			if (dax_is_pte_entry(entry)) {
 				put_unlocked_mapping_entry(mapping, index,
 						entry);
@@ -584,7 +582,7 @@ restart:
 					true);
 		}
 
-		entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY);
+		entry = dax_radix_locked_entry(0, size_flag | DAX_EMPTY);
 
 		err = __radix_tree_insert(&mapping->i_pages, index,
 				dax_radix_order(entry), entry);
@@ -673,8 +671,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
 			if (index >= end)
 				break;
 
-			if (WARN_ON_ONCE(
-			     !radix_tree_exceptional_entry(pvec_ent)))
+			if (WARN_ON_ONCE(!xa_is_value(pvec_ent)))
 				continue;
 
 			xa_lock_irq(&mapping->i_pages);
@@ -713,7 +710,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
 
 	xa_lock_irq(pages);
 	entry = get_unlocked_mapping_entry(mapping, index, NULL);
-	if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)))
+	if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
 		goto out;
 	if (!trunc &&
 	    (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) ||
@@ -729,8 +726,8 @@ out:
 	return ret;
 }
 /*
- * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
- * entry to get unlocked before deleting it.
+ * Delete DAX entry at @index from @mapping.  Wait for it
+ * to be unlocked before deleting it.
  */
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 {
@@ -740,7 +737,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 	 * This gets called from truncate / punch_hole path. As such, the caller
 	 * must hold locks protecting against concurrent modifications of the
 	 * radix tree (usually fs-private i_mmap_sem for writing). Since the
-	 * caller has seen exceptional entry for this index, we better find it
+	 * caller has seen a DAX entry for this index, we better find it
 	 * at that index as well...
 	 */
 	WARN_ON_ONCE(!ret);
@@ -748,7 +745,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 }
 
 /*
- * Invalidate exceptional DAX entry if it is clean.
+ * Invalidate DAX entry if it is clean.
  */
 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
 				      pgoff_t index)
@@ -802,7 +799,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 	if (dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 
-	if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) {
+	if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
 		/* we are replacing a zero page with block mapping */
 		if (dax_is_pmd_entry(entry))
 			unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
@@ -940,13 +937,13 @@ static int dax_writeback_one(struct dax_device *dax_dev,
 	 * A page got tagged dirty in DAX mapping? Something is seriously
 	 * wrong.
 	 */
-	if (WARN_ON(!radix_tree_exceptional_entry(entry)))
+	if (WARN_ON(!xa_is_value(entry)))
 		return -EIO;
 
 	xa_lock_irq(pages);
 	entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
 	/* Entry got punched out / reallocated? */
-	if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2)))
+	if (!entry2 || WARN_ON_ONCE(!xa_is_value(entry2)))
 		goto put_unlocked;
 	/*
 	 * Entry got reallocated elsewhere? No need to writeback. We have to
@@ -1123,8 +1120,9 @@ static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry,
 	pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
 	vm_fault_t ret;
 
-	dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE,
-			false);
+	dax_insert_mapping_entry(mapping, vmf, entry, pfn,
+			DAX_ZERO_PAGE, false);
+
 	ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
 	trace_dax_load_hole(inode, vmf, ret);
 	return ret;
@@ -1514,7 +1512,7 @@ static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
 
 	pfn = page_to_pfn_t(zero_page);
 	ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
-			RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
+			DAX_PMD | DAX_ZERO_PAGE, false);
 
 	ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
 	if (!pmd_none(*(vmf->pmd))) {
@@ -1597,7 +1595,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	 * is already in the tree, for instance), it will return -EEXIST and
 	 * we just fall back to 4k entries.
 	 */
-	entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
+	entry = grab_mapping_entry(mapping, pgoff, DAX_PMD);
 	if (IS_ERR(entry))
 		goto fallback;
 
@@ -1635,7 +1633,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 			goto finish_iomap;
 
 		entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
-						RADIX_DAX_PMD, write && !sync);
+						DAX_PMD, write && !sync);
 
 		/*
 		 * If we are doing synchronous page fault and inode needs fsync,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5ea1d64cb0b4..669abb617321 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -521,7 +521,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 		if (!page)
 			return;
 
-		if (radix_tree_exceptional_entry(page))
+		if (xa_is_value(page))
 			mss->swap += PAGE_SIZE;
 		else
 			put_page(page);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 34149e8b5f73..e9e76ab4fbbf 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -28,34 +28,26 @@
 #include <linux/rcupdate.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
+#include <linux/xarray.h>
 
 /*
  * The bottom two bits of the slot determine how the remaining bits in the
  * slot are interpreted:
  *
  * 00 - data pointer
- * 01 - internal entry
- * 10 - exceptional entry
- * 11 - this bit combination is currently unused/reserved
+ * 10 - internal entry
+ * x1 - value entry
  *
  * The internal entry may be a pointer to the next level in the tree, a
  * sibling entry, or an indicator that the entry in this slot has been moved
  * to another location in the tree and the lookup should be restarted.  While
  * NULL fits the 'data pointer' pattern, it means that there is no entry in
  * the tree for this index (no matter what level of the tree it is found at).
- * This means that you cannot store NULL in the tree as a value for the index.
+ * This means that storing a NULL entry in the tree is the same as deleting
+ * the entry from the tree.
  */
 #define RADIX_TREE_ENTRY_MASK		3UL
-#define RADIX_TREE_INTERNAL_NODE	1UL
-
-/*
- * Most users of the radix tree store pointers but shmem/tmpfs stores swap
- * entries in the same tree.  They are marked as exceptional entries to
- * distinguish them from pointers to struct page.
- * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it.
- */
-#define RADIX_TREE_EXCEPTIONAL_ENTRY	2
-#define RADIX_TREE_EXCEPTIONAL_SHIFT	2
+#define RADIX_TREE_INTERNAL_NODE	2UL
 
 static inline bool radix_tree_is_internal_node(void *ptr)
 {
@@ -83,11 +75,10 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 
 /*
  * @count is the count of every non-NULL element in the ->slots array
- * whether that is an exceptional entry, a retry entry, a user pointer,
+ * whether that is a value entry, a retry entry, a user pointer,
  * a sibling entry or a pointer to the next level of the tree.
  * @exceptional is the count of every element in ->slots which is
- * either radix_tree_exceptional_entry() or is a sibling entry for an
- * exceptional entry.
+ * either a value entry or a sibling of a value entry.
  */
 struct radix_tree_node {
 	unsigned char	shift;		/* Bits remaining in each slot */
@@ -268,17 +259,6 @@ static inline int radix_tree_deref_retry(void *arg)
 	return unlikely(radix_tree_is_internal_node(arg));
 }
 
-/**
- * radix_tree_exceptional_entry	- radix_tree_deref_slot gave exceptional entry?
- * @arg:	value returned by radix_tree_deref_slot
- * Returns:	0 if well-aligned pointer, non-0 if exceptional entry.
- */
-static inline int radix_tree_exceptional_entry(void *arg)
-{
-	/* Not unlikely because radix_tree_exception often tested first */
-	return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY;
-}
-
 /**
  * radix_tree_exception	- radix_tree_deref_slot returned either exception?
  * @arg:	value returned by radix_tree_deref_slot
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 22af9d8a84ae..4d961668e5fc 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -18,9 +18,8 @@
  *
  * swp_entry_t's are *never* stored anywhere in their arch-dependent format.
  */
-#define SWP_TYPE_SHIFT(e)	((sizeof(e.val) * 8) - \
-			(MAX_SWAPFILES_SHIFT + RADIX_TREE_EXCEPTIONAL_SHIFT))
-#define SWP_OFFSET_MASK(e)	((1UL << SWP_TYPE_SHIFT(e)) - 1)
+#define SWP_TYPE_SHIFT	(BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT)
+#define SWP_OFFSET_MASK	((1UL << SWP_TYPE_SHIFT) - 1)
 
 /*
  * Store a type+offset into a swp_entry_t in an arch-independent format
@@ -29,8 +28,7 @@ static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset)
 {
 	swp_entry_t ret;
 
-	ret.val = (type << SWP_TYPE_SHIFT(ret)) |
-			(offset & SWP_OFFSET_MASK(ret));
+	ret.val = (type << SWP_TYPE_SHIFT) | (offset & SWP_OFFSET_MASK);
 	return ret;
 }
 
@@ -40,7 +38,7 @@ static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset)
  */
 static inline unsigned swp_type(swp_entry_t entry)
 {
-	return (entry.val >> SWP_TYPE_SHIFT(entry));
+	return (entry.val >> SWP_TYPE_SHIFT);
 }
 
 /*
@@ -49,7 +47,7 @@ static inline unsigned swp_type(swp_entry_t entry)
  */
 static inline pgoff_t swp_offset(swp_entry_t entry)
 {
-	return entry.val & SWP_OFFSET_MASK(entry);
+	return entry.val & SWP_OFFSET_MASK;
 }
 
 #ifdef CONFIG_MMU
@@ -90,16 +88,13 @@ static inline swp_entry_t radix_to_swp_entry(void *arg)
 {
 	swp_entry_t entry;
 
-	entry.val = (unsigned long)arg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+	entry.val = xa_to_value(arg);
 	return entry;
 }
 
 static inline void *swp_to_radix_entry(swp_entry_t entry)
 {
-	unsigned long value;
-
-	value = entry.val << RADIX_TREE_EXCEPTIONAL_SHIFT;
-	return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY);
+	return xa_mk_value(entry.val);
 }
 
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 9e4c86853fa4..5c8acfc4ff55 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -5,9 +5,111 @@
  * eXtensible Arrays
  * Copyright (c) 2017 Microsoft Corporation
  * Author: Matthew Wilcox <willy@infradead.org>
+ *
+ * See Documentation/core-api/xarray.rst for how to use the XArray.
  */
 
+#include <linux/bug.h>
 #include <linux/spinlock.h>
+#include <linux/types.h>
+
+/*
+ * The bottom two bits of the entry determine how the XArray interprets
+ * the contents:
+ *
+ * 00: Pointer entry
+ * 10: Internal entry
+ * x1: Value entry or tagged pointer
+ *
+ * Attempting to store internal entries in the XArray is a bug.
+ */
+
+#define BITS_PER_XA_VALUE	(BITS_PER_LONG - 1)
+
+/**
+ * xa_mk_value() - Create an XArray entry from an integer.
+ * @v: Value to store in XArray.
+ *
+ * Context: Any context.
+ * Return: An entry suitable for storing in the XArray.
+ */
+static inline void *xa_mk_value(unsigned long v)
+{
+	WARN_ON((long)v < 0);
+	return (void *)((v << 1) | 1);
+}
+
+/**
+ * xa_to_value() - Get value stored in an XArray entry.
+ * @entry: XArray entry.
+ *
+ * Context: Any context.
+ * Return: The value stored in the XArray entry.
+ */
+static inline unsigned long xa_to_value(const void *entry)
+{
+	return (unsigned long)entry >> 1;
+}
+
+/**
+ * xa_is_value() - Determine if an entry is a value.
+ * @entry: XArray entry.
+ *
+ * Context: Any context.
+ * Return: True if the entry is a value, false if it is a pointer.
+ */
+static inline bool xa_is_value(const void *entry)
+{
+	return (unsigned long)entry & 1;
+}
+
+/**
+ * xa_tag_pointer() - Create an XArray entry for a tagged pointer.
+ * @p: Plain pointer.
+ * @tag: Tag value (0, 1 or 3).
+ *
+ * If the user of the XArray prefers, they can tag their pointers instead
+ * of storing value entries.  Three tags are available (0, 1 and 3).
+ * These are distinct from the xa_mark_t as they are not replicated up
+ * through the array and cannot be searched for.
+ *
+ * Context: Any context.
+ * Return: An XArray entry.
+ */
+static inline void *xa_tag_pointer(void *p, unsigned long tag)
+{
+	return (void *)((unsigned long)p | tag);
+}
+
+/**
+ * xa_untag_pointer() - Turn an XArray entry into a plain pointer.
+ * @entry: XArray entry.
+ *
+ * If you have stored a tagged pointer in the XArray, call this function
+ * to get the untagged version of the pointer.
+ *
+ * Context: Any context.
+ * Return: A pointer.
+ */
+static inline void *xa_untag_pointer(void *entry)
+{
+	return (void *)((unsigned long)entry & ~3UL);
+}
+
+/**
+ * xa_pointer_tag() - Get the tag stored in an XArray entry.
+ * @entry: XArray entry.
+ *
+ * If you have stored a tagged pointer in the XArray, call this function
+ * to get the tag of that pointer.
+ *
+ * Context: Any context.
+ * Return: A tag.
+ */
+static inline unsigned int xa_pointer_tag(void *entry)
+{
+	return (unsigned long)entry & 3UL;
+}
 
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
diff --git a/lib/idr.c b/lib/idr.c
index 729e381e23b4..88419fbc5737 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -338,11 +338,8 @@ EXPORT_SYMBOL(idr_replace);
  * by the number of bits in the leaf bitmap before doing a radix tree lookup.
  *
  * As an optimisation, if there are only a few low bits set in any given
- * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional
- * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits
- * directly in the entry.  By being really tricksy, we could store
- * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising
- * for 0-3 allocated IDs.
+ * leaf, instead of allocating a 128-byte bitmap, we store the bits
+ * directly in the entry.
  *
  * We allow the radix tree 'exceptional' count to get out of date.  Nothing
  * in the IDA nor the radix tree code checks it.  If it becomes important
@@ -366,12 +363,11 @@ static int ida_get_new_above(struct ida *ida, int start)
 	struct radix_tree_iter iter;
 	struct ida_bitmap *bitmap;
 	unsigned long index;
-	unsigned bit, ebit;
+	unsigned bit;
 	int new;
 
 	index = start / IDA_BITMAP_BITS;
 	bit = start % IDA_BITMAP_BITS;
-	ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT;
 
 	slot = radix_tree_iter_init(&iter, index);
 	for (;;) {
@@ -386,25 +382,23 @@ static int ida_get_new_above(struct ida *ida, int start)
 				return PTR_ERR(slot);
 			}
 		}
-		if (iter.index > index) {
+		if (iter.index > index)
 			bit = 0;
-			ebit = RADIX_TREE_EXCEPTIONAL_SHIFT;
-		}
 		new = iter.index * IDA_BITMAP_BITS;
 		bitmap = rcu_dereference_raw(*slot);
-		if (radix_tree_exception(bitmap)) {
-			unsigned long tmp = (unsigned long)bitmap;
-			ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit);
-			if (ebit < BITS_PER_LONG) {
-				tmp |= 1UL << ebit;
-				rcu_assign_pointer(*slot, (void *)tmp);
-				return new + ebit -
-					RADIX_TREE_EXCEPTIONAL_SHIFT;
+		if (xa_is_value(bitmap)) {
+			unsigned long tmp = xa_to_value(bitmap);
+			int vbit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE,
+							bit);
+			if (vbit < BITS_PER_XA_VALUE) {
+				tmp |= 1UL << vbit;
+				rcu_assign_pointer(*slot, xa_mk_value(tmp));
+				return new + vbit;
 			}
 			bitmap = this_cpu_xchg(ida_bitmap, NULL);
 			if (!bitmap)
 				return -EAGAIN;
-			bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+			bitmap->bitmap[0] = tmp;
 			rcu_assign_pointer(*slot, bitmap);
 		}
 
@@ -425,17 +419,14 @@ static int ida_get_new_above(struct ida *ida, int start)
 			new += bit;
 			if (new < 0)
 				return -ENOSPC;
-			if (ebit < BITS_PER_LONG) {
-				bitmap = (void *)((1UL << ebit) |
-						RADIX_TREE_EXCEPTIONAL_ENTRY);
-				radix_tree_iter_replace(root, &iter, slot,
-						bitmap);
-				return new;
+			if (bit < BITS_PER_XA_VALUE) {
+				bitmap = xa_mk_value(1UL << bit);
+			} else {
+				bitmap = this_cpu_xchg(ida_bitmap, NULL);
+				if (!bitmap)
+					return -EAGAIN;
+				__set_bit(bit, bitmap->bitmap);
 			}
-			bitmap = this_cpu_xchg(ida_bitmap, NULL);
-			if (!bitmap)
-				return -EAGAIN;
-			__set_bit(bit, bitmap->bitmap);
 			radix_tree_iter_replace(root, &iter, slot, bitmap);
 		}
 
@@ -457,9 +448,9 @@ static void ida_remove(struct ida *ida, int id)
 		goto err;
 
 	bitmap = rcu_dereference_raw(*slot);
-	if (radix_tree_exception(bitmap)) {
+	if (xa_is_value(bitmap)) {
 		btmp = (unsigned long *)slot;
-		offset += RADIX_TREE_EXCEPTIONAL_SHIFT;
+		offset += 1; /* Intimate knowledge of the value encoding */
 		if (offset >= BITS_PER_LONG)
 			goto err;
 	} else {
@@ -470,9 +461,8 @@ static void ida_remove(struct ida *ida, int id)
 
 	__clear_bit(offset, btmp);
 	radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
-	if (radix_tree_exception(bitmap)) {
-		if (rcu_dereference_raw(*slot) ==
-					(void *)RADIX_TREE_EXCEPTIONAL_ENTRY)
+	if (xa_is_value(bitmap)) {
+		if (xa_to_value(rcu_dereference_raw(*slot)) == 0)
 			radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
 	} else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {
 		kfree(bitmap);
@@ -503,7 +493,7 @@ void ida_destroy(struct ida *ida)
 	xa_lock_irqsave(&ida->ida_rt, flags);
 	radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
 		struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
-		if (!radix_tree_exception(bitmap))
+		if (!xa_is_value(bitmap))
 			kfree(bitmap);
 		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
 	}
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index a904a8ddd174..b6c0e7f3a894 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -340,14 +340,12 @@ static void dump_ida_node(void *entry, unsigned long index)
 		for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
 			dump_ida_node(node->slots[i],
 					index | (i << node->shift));
-	} else if (radix_tree_exceptional_entry(entry)) {
+	} else if (xa_is_value(entry)) {
 		pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n",
 				entry, (int)(index & RADIX_TREE_MAP_MASK),
 				index * IDA_BITMAP_BITS,
-				index * IDA_BITMAP_BITS + BITS_PER_LONG -
-					RADIX_TREE_EXCEPTIONAL_SHIFT,
-				(unsigned long)entry >>
-					RADIX_TREE_EXCEPTIONAL_SHIFT);
+				index * IDA_BITMAP_BITS + BITS_PER_XA_VALUE,
+				xa_to_value(entry));
 	} else {
 		struct ida_bitmap *bitmap = entry;
 
@@ -656,7 +654,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 		BUG_ON(shift > BITS_PER_LONG);
 		if (radix_tree_is_internal_node(entry)) {
 			entry_to_node(entry)->parent = node;
-		} else if (radix_tree_exceptional_entry(entry)) {
+		} else if (xa_is_value(entry)) {
 			/* Moving an exceptional root->rnode to a node */
 			node->exceptional = 1;
 		}
@@ -955,12 +953,12 @@ static inline int insert_entries(struct radix_tree_node *node,
 					!is_sibling_entry(node, old) &&
 					(old != RADIX_TREE_RETRY))
 			radix_tree_free_nodes(old);
-		if (radix_tree_exceptional_entry(old))
+		if (xa_is_value(old))
 			node->exceptional--;
 	}
 	if (node) {
 		node->count += n;
-		if (radix_tree_exceptional_entry(item))
+		if (xa_is_value(item))
 			node->exceptional += n;
 	}
 	return n;
@@ -974,7 +972,7 @@ static inline int insert_entries(struct radix_tree_node *node,
 	rcu_assign_pointer(*slot, item);
 	if (node) {
 		node->count++;
-		if (radix_tree_exceptional_entry(item))
+		if (xa_is_value(item))
 			node->exceptional++;
 	}
 	return 1;
@@ -1190,8 +1188,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 			  radix_tree_update_node_t update_node)
 {
 	void *old = rcu_dereference_raw(*slot);
-	int exceptional = !!radix_tree_exceptional_entry(item) -
-				!!radix_tree_exceptional_entry(old);
+	int exceptional = !!xa_is_value(item) - !!xa_is_value(old);
 	int count = calculate_count(root, node, slot, item, old);
 
 	/*
@@ -1992,7 +1989,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
 				struct radix_tree_node *node, void __rcu **slot)
 {
 	void *old = rcu_dereference_raw(*slot);
-	int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0;
+	int exceptional = xa_is_value(old) ? -1 : 0;
 	unsigned offset = get_slot_offset(node, slot);
 	int tag;
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 52517f28e6f4..4de14e75c4ec 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -127,7 +127,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 
 		p = radix_tree_deref_slot_protected(slot,
 						    &mapping->i_pages.xa_lock);
-		if (!radix_tree_exceptional_entry(p))
+		if (!xa_is_value(p))
 			return -EEXIST;
 
 		mapping->nrexceptional--;
@@ -336,7 +336,7 @@ page_cache_tree_delete_batch(struct address_space *mapping,
 			break;
 		page = radix_tree_deref_slot_protected(slot,
 						       &mapping->i_pages.xa_lock);
-		if (radix_tree_exceptional_entry(page))
+		if (xa_is_value(page))
 			continue;
 		if (!tail_pages) {
 			/*
@@ -1355,7 +1355,7 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
 		struct page *page;
 
 		page = radix_tree_lookup(&mapping->i_pages, index);
-		if (!page || radix_tree_exceptional_entry(page))
+		if (!page || xa_is_value(page))
 			break;
 		index++;
 		if (index == 0)
@@ -1396,7 +1396,7 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
 		struct page *page;
 
 		page = radix_tree_lookup(&mapping->i_pages, index);
-		if (!page || radix_tree_exceptional_entry(page))
+		if (!page || xa_is_value(page))
 			break;
 		index--;
 		if (index == ULONG_MAX)
@@ -1539,7 +1539,7 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
 
 repeat:
 	page = find_get_entry(mapping, offset);
-	if (radix_tree_exceptional_entry(page))
+	if (xa_is_value(page))
 		page = NULL;
 	if (!page)
 		goto no_page;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a31d740e6cd1..4820e4adf853 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1369,7 +1369,7 @@ static void collapse_shmem(struct mm_struct *mm,
 
 		page = radix_tree_deref_slot_protected(slot,
 				&mapping->i_pages.xa_lock);
-		if (radix_tree_exceptional_entry(page) || !PageUptodate(page)) {
+		if (xa_is_value(page) || !PageUptodate(page)) {
 			xa_unlock_irq(&mapping->i_pages);
 			/* swap in or instantiate fallocated page */
 			if (shmem_getpage(mapping->host, index, &page,
diff --git a/mm/madvise.c b/mm/madvise.c
index 972a9eaa898b..9d802566c494 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -251,7 +251,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
 		index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
 
 		page = find_get_entry(mapping, index);
-		if (!radix_tree_exceptional_entry(page)) {
+		if (!xa_is_value(page)) {
 			if (page)
 				put_page(page);
 			continue;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e79cb59552d9..29d9d1a69b36 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4750,7 +4750,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
 	/* shmem/tmpfs may report page out on swap: account for that too. */
 	if (shmem_mapping(mapping)) {
 		page = find_get_entry(mapping, pgoff);
-		if (radix_tree_exceptional_entry(page)) {
+		if (xa_is_value(page)) {
 			swp_entry_t swp = radix_to_swp_entry(page);
 			if (do_memsw_account())
 				*entry = swp;
diff --git a/mm/mincore.c b/mm/mincore.c
index fc37afe226e6..4985965aa20a 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -66,7 +66,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
 		 * shmem/tmpfs may return swap: account for swapcache
 		 * page too.
 		 */
-		if (radix_tree_exceptional_entry(page)) {
+		if (xa_is_value(page)) {
 			swp_entry_t swp = radix_to_swp_entry(page);
 			page = find_get_page(swap_address_space(swp),
 					     swp_offset(swp));
diff --git a/mm/readahead.c b/mm/readahead.c
index 4e630143a0ba..de657077d41d 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -179,7 +179,7 @@ unsigned int __do_page_cache_readahead(struct address_space *mapping,
 		rcu_read_lock();
 		page = radix_tree_lookup(&mapping->i_pages, page_offset);
 		rcu_read_unlock();
-		if (page && !radix_tree_exceptional_entry(page)) {
+		if (page && !xa_is_value(page)) {
 			/*
 			 * Page already present?  Kick off the current batch of
 			 * contiguous pages before continuing with the next
diff --git a/mm/shmem.c b/mm/shmem.c
index 446942677cd4..c1062760fe41 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -709,7 +709,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
 			continue;
 		}
 
-		if (radix_tree_exceptional_entry(page))
+		if (xa_is_value(page))
 			swapped++;
 
 		if (need_resched()) {
@@ -824,7 +824,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			if (index >= end)
 				break;
 
-			if (radix_tree_exceptional_entry(page)) {
+			if (xa_is_value(page)) {
 				if (unfalloc)
 					continue;
 				nr_swaps_freed += !shmem_free_swap(mapping,
@@ -921,7 +921,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			if (index >= end)
 				break;
 
-			if (radix_tree_exceptional_entry(page)) {
+			if (xa_is_value(page)) {
 				if (unfalloc)
 					continue;
 				if (shmem_free_swap(mapping, index, page)) {
@@ -1643,7 +1643,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 repeat:
 	swap.val = 0;
 	page = find_lock_entry(mapping, index);
-	if (radix_tree_exceptional_entry(page)) {
+	if (xa_is_value(page)) {
 		swap = radix_to_swp_entry(page);
 		page = NULL;
 	}
@@ -2578,7 +2578,7 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
 				index = indices[i];
 			}
 			page = pvec.pages[i];
-			if (page && !radix_tree_exceptional_entry(page)) {
+			if (page && !xa_is_value(page)) {
 				if (!PageUptodate(page))
 					page = NULL;
 			}
diff --git a/mm/swap.c b/mm/swap.c
index 26fc9b5f1b6c..4c5c7fcc6e46 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -965,7 +965,7 @@ void pagevec_remove_exceptionals(struct pagevec *pvec)
 
 	for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
-		if (!radix_tree_exceptional_entry(page))
+		if (!xa_is_value(page))
 			pvec->pages[j++] = page;
 	}
 	pvec->nr = j;
diff --git a/mm/truncate.c b/mm/truncate.c
index 1d2fb2dca96f..ed778555c9f3 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -70,7 +70,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
 		return;
 
 	for (j = 0; j < pagevec_count(pvec); j++)
-		if (radix_tree_exceptional_entry(pvec->pages[j]))
+		if (xa_is_value(pvec->pages[j]))
 			break;
 
 	if (j == pagevec_count(pvec))
@@ -85,7 +85,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
 		struct page *page = pvec->pages[i];
 		pgoff_t index = indices[i];
 
-		if (!radix_tree_exceptional_entry(page)) {
+		if (!xa_is_value(page)) {
 			pvec->pages[j++] = page;
 			continue;
 		}
@@ -347,7 +347,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			if (index >= end)
 				break;
 
-			if (radix_tree_exceptional_entry(page))
+			if (xa_is_value(page))
 				continue;
 
 			if (!trylock_page(page))
@@ -442,7 +442,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 				break;
 			}
 
-			if (radix_tree_exceptional_entry(page))
+			if (xa_is_value(page))
 				continue;
 
 			lock_page(page);
@@ -561,7 +561,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 			if (index > end)
 				break;
 
-			if (radix_tree_exceptional_entry(page)) {
+			if (xa_is_value(page)) {
 				invalidate_exceptional_entry(mapping, index,
 							     page);
 				continue;
@@ -692,7 +692,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 			if (index > end)
 				break;
 
-			if (radix_tree_exceptional_entry(page)) {
+			if (xa_is_value(page)) {
 				if (!invalidate_exceptional_entry2(mapping,
 								   index, page))
 					ret = -EBUSY;
diff --git a/mm/workingset.c b/mm/workingset.c
index 4516dd790129..bb109b3afac2 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -155,8 +155,8 @@
  * refault distance will immediately activate the refaulting page.
  */
 
-#define EVICTION_SHIFT	(RADIX_TREE_EXCEPTIONAL_ENTRY + \
-			 NODES_SHIFT +	\
+#define EVICTION_SHIFT	((BITS_PER_LONG - BITS_PER_XA_VALUE) +	\
+			 NODES_SHIFT +				\
 			 MEM_CGROUP_ID_SHIFT)
 #define EVICTION_MASK	(~0UL >> EVICTION_SHIFT)
 
@@ -173,20 +173,19 @@ static unsigned int bucket_order __read_mostly;
 static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction)
 {
 	eviction >>= bucket_order;
+	eviction &= EVICTION_MASK;
 	eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
 	eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
-	eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT);
 
-	return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY);
+	return xa_mk_value(eviction);
 }
 
 static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
 			  unsigned long *evictionp)
 {
-	unsigned long entry = (unsigned long)shadow;
+	unsigned long entry = xa_to_value(shadow);
 	int memcgid, nid;
 
-	entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT;
 	nid = entry & ((1UL << NODES_SHIFT) - 1);
 	entry >>= NODES_SHIFT;
 	memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
@@ -453,7 +452,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 		goto out_invalid;
 	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
 		if (node->slots[i]) {
-			if (WARN_ON_ONCE(!radix_tree_exceptional_entry(node->slots[i])))
+			if (WARN_ON_ONCE(!xa_is_value(node->slots[i])))
 				goto out_invalid;
 			if (WARN_ON_ONCE(!node->exceptional))
 				goto out_invalid;
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index f620c831a4b5..a5a4494922a6 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -19,7 +19,7 @@
 
 #include "test.h"
 
-#define DUMMY_PTR	((void *)0x12)
+#define DUMMY_PTR	((void *)0x10)
 
 int item_idr_free(int id, void *p, void *data)
 {
@@ -411,11 +411,11 @@ void ida_check_conv_user(void)
 		int id = ida_alloc(&ida, GFP_NOWAIT);
 		if (id == -ENOMEM) {
 			IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) !=
-					BITS_PER_LONG - 2);
+					BITS_PER_XA_VALUE);
 			id = ida_alloc(&ida, GFP_KERNEL);
 		} else {
 			IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) ==
-					BITS_PER_LONG - 2);
+					BITS_PER_XA_VALUE);
 		}
 		IDA_BUG_ON(&ida, id != i);
 	}
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
index 24f13d27a8da..d1635a5bef02 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -2,7 +2,6 @@
 #ifndef _TEST_RADIX_TREE_H
 #define _TEST_RADIX_TREE_H
 
-#include "generated/map-shift.h"
 #include "../../../../include/linux/radix-tree.h"
 
 extern int kmalloc_verbose;
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 7bf405638b0b..2b4f4dba1882 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -39,12 +39,11 @@ static void __multiorder_tag_test(int index, int order)
 
 	/*
 	 * Verify we get collisions for covered indices.  We try and fail to
-	 * insert an exceptional entry so we don't leak memory via
+	 * insert a value entry so we don't leak memory via
 	 * item_insert_order().
 	 */
 	for_each_index(i, base, order) {
-		err = __radix_tree_insert(&tree, i, order,
-				(void *)(0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY));
+		err = __radix_tree_insert(&tree, i, order, xa_mk_value(0xA0));
 		assert(err == -EEXIST);
 	}
 
@@ -380,8 +379,8 @@ static void multiorder_join1(unsigned long index,
 }
 
 /*
- * Check that the accounting of exceptional entries is handled correctly
- * by joining an exceptional entry to a normal pointer.
+ * Check that the accounting of value entries is handled correctly
+ * by joining a value entry to a normal pointer.
  */
 static void multiorder_join2(unsigned order1, unsigned order2)
 {
@@ -391,9 +390,9 @@ static void multiorder_join2(unsigned order1, unsigned order2)
 	void *item2;
 
 	item_insert_order(&tree, 0, order2);
-	radix_tree_insert(&tree, 1 << order2, (void *)0x12UL);
+	radix_tree_insert(&tree, 1 << order2, xa_mk_value(5));
 	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
-	assert(item2 == (void *)0x12UL);
+	assert(item2 == xa_mk_value(5));
 	assert(node->exceptional == 1);
 
 	item2 = radix_tree_lookup(&tree, 0);
@@ -407,7 +406,7 @@ static void multiorder_join2(unsigned order1, unsigned order2)
 }
 
 /*
- * This test revealed an accounting bug for exceptional entries at one point.
+ * This test revealed an accounting bug for value entries at one point.
  * Nodes were being freed back into the pool with an elevated exception count
  * by radix_tree_join() and then radix_tree_split() was failing to zero the
  * count of exceptional entries.
@@ -421,16 +420,16 @@ static void multiorder_join3(unsigned int order)
 	unsigned long i;
 
 	for (i = 0; i < (1 << order); i++) {
-		radix_tree_insert(&tree, i, (void *)0x12UL);
+		radix_tree_insert(&tree, i, xa_mk_value(5));
 	}
 
-	radix_tree_join(&tree, 0, order, (void *)0x16UL);
+	radix_tree_join(&tree, 0, order, xa_mk_value(7));
 	rcu_barrier();
 
 	radix_tree_split(&tree, 0, 0);
 
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot, (void *)0x12UL);
+		radix_tree_iter_replace(&tree, &iter, slot, xa_mk_value(5));
 	}
 
 	__radix_tree_lookup(&tree, 0, &node, NULL);
@@ -517,10 +516,10 @@ static void __multiorder_split2(int old_order, int new_order)
 	struct radix_tree_node *node;
 	void *item;
 
-	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+	__radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x12);
+	assert(item == xa_mk_value(5));
 	assert(node->exceptional > 0);
 
 	radix_tree_split(&tree, 0, new_order);
@@ -530,7 +529,7 @@ static void __multiorder_split2(int old_order, int new_order)
 	}
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item != (void *)0x12);
+	assert(item != xa_mk_value(5));
 	assert(node->exceptional == 0);
 
 	item_kill_tree(&tree);
@@ -544,40 +543,40 @@ static void __multiorder_split3(int old_order, int new_order)
 	struct radix_tree_node *node;
 	void *item;
 
-	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+	__radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x12);
+	assert(item == xa_mk_value(5));
 	assert(node->exceptional > 0);
 
 	radix_tree_split(&tree, 0, new_order);
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16);
+		radix_tree_iter_replace(&tree, &iter, slot, xa_mk_value(7));
 	}
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x16);
+	assert(item == xa_mk_value(7));
 	assert(node->exceptional > 0);
 
 	item_kill_tree(&tree);
 
-	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+	__radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x12);
+	assert(item == xa_mk_value(5));
 	assert(node->exceptional > 0);
 
 	radix_tree_split(&tree, 0, new_order);
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
 		if (iter.index == (1 << new_order))
 			radix_tree_iter_replace(&tree, &iter, slot,
-						(void *)0x16);
+						xa_mk_value(7));
 		else
 			radix_tree_iter_replace(&tree, &iter, slot, NULL);
 	}
 
 	item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL);
-	assert(item == (void *)0x16);
+	assert(item == xa_mk_value(7));
 	assert(node->count == node->exceptional);
 	do {
 		node = node->parent;
@@ -610,13 +609,13 @@ static void multiorder_account(void)
 
 	item_insert_order(&tree, 0, 5);
 
-	__radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
+	__radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5));
 	__radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(node->count == node->exceptional * 2);
 	radix_tree_delete(&tree, 1 << 5);
 	assert(node->exceptional == 0);
 
-	__radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
+	__radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5));
 	__radix_tree_lookup(&tree, 1 << 5, &node, &slot);
 	assert(node->count == node->exceptional * 2);
 	__radix_tree_replace(&tree, node, slot, NULL, NULL);
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index def6015570b2..62de66c314b7 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -295,7 +295,7 @@ void item_kill_tree(struct radix_tree_root *root)
 	int nfound;
 
 	radix_tree_for_each_slot(slot, root, &iter, 0) {
-		if (radix_tree_exceptional_entry(*slot))
+		if (xa_is_value(*slot))
 			radix_tree_delete(root, iter.index);
 	}
 
-- 
cgit v1.2.3-71-gd317


From 02c02bf12c5d838603eed44195d3e91f094e2ab2 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 3 Nov 2017 23:09:45 -0400
Subject: xarray: Change definition of sibling entries

Instead of storing a pointer to the slot containing the canonical entry,
store the offset of the slot.  Produces slightly more efficient code
(~300 bytes) and simplifies the implementation.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
 include/linux/radix-tree.h                    |  5 +-
 include/linux/xarray.h                        | 92 +++++++++++++++++++++++++++
 lib/Kconfig                                   |  7 ++
 lib/radix-tree.c                              | 64 ++++++-------------
 tools/testing/radix-tree/Makefile             |  2 +-
 tools/testing/radix-tree/generated/autoconf.h |  1 +
 6 files changed, 121 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index e9e76ab4fbbf..60f3d8eb2cb7 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -59,10 +59,7 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 
 #define RADIX_TREE_MAX_TAGS 3
 
-#ifndef RADIX_TREE_MAP_SHIFT
-#define RADIX_TREE_MAP_SHIFT	(CONFIG_BASE_SMALL ? 4 : 6)
-#endif
-
+#define RADIX_TREE_MAP_SHIFT	XA_CHUNK_SHIFT
 #define RADIX_TREE_MAP_SIZE	(1UL << RADIX_TREE_MAP_SHIFT)
 #define RADIX_TREE_MAP_MASK	(RADIX_TREE_MAP_SIZE-1)
 
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 5c8acfc4ff55..4d1cd7a083e8 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -22,6 +22,12 @@
  * x1: Value entry or tagged pointer
  *
  * Attempting to store internal entries in the XArray is a bug.
+ *
+ * Most internal entries are pointers to the next node in the tree.
+ * The following internal entries have a special meaning:
+ *
+ * 0-62: Sibling entries
+ * 256: Retry entry
  */
 
 #define BITS_PER_XA_VALUE	(BITS_PER_LONG - 1)
@@ -111,6 +117,42 @@ static inline unsigned int xa_pointer_tag(void *entry)
 	return (unsigned long)entry & 3UL;
 }
 
+/*
+ * xa_mk_internal() - Create an internal entry.
+ * @v: Value to turn into an internal entry.
+ *
+ * Context: Any context.
+ * Return: An XArray internal entry corresponding to this value.
+ */
+static inline void *xa_mk_internal(unsigned long v)
+{
+	return (void *)((v << 2) | 2);
+}
+
+/*
+ * xa_to_internal() - Extract the value from an internal entry.
+ * @entry: XArray entry.
+ *
+ * Context: Any context.
+ * Return: The value which was stored in the internal entry.
+ */
+static inline unsigned long xa_to_internal(const void *entry)
+{
+	return (unsigned long)entry >> 2;
+}
+
+/*
+ * xa_is_internal() - Is the entry an internal entry?
+ * @entry: XArray entry.
+ *
+ * Context: Any context.
+ * Return: %true if the entry is an internal entry.
+ */
+static inline bool xa_is_internal(const void *entry)
+{
+	return ((unsigned long)entry & 3) == 2;
+}
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
@@ -123,4 +165,54 @@ static inline unsigned int xa_pointer_tag(void *entry)
 #define xa_unlock_irqrestore(xa, flags) \
 				spin_unlock_irqrestore(&(xa)->xa_lock, flags)
 
+/* Everything below here is the Advanced API.  Proceed with caution. */
+
+/*
+ * The xarray is constructed out of a set of 'chunks' of pointers.  Choosing
+ * the best chunk size requires some tradeoffs.  A power of two recommends
+ * itself so that we can walk the tree based purely on shifts and masks.
+ * Generally, the larger the better; as the number of slots per level of the
+ * tree increases, the less tall the tree needs to be.  But that needs to be
+ * balanced against the memory consumption of each node.  On a 64-bit system,
+ * xa_node is currently 576 bytes, and we get 7 of them per 4kB page.  If we
+ * doubled the number of slots per node, we'd get only 3 nodes per 4kB page.
+ */
+#ifndef XA_CHUNK_SHIFT
+#define XA_CHUNK_SHIFT		(CONFIG_BASE_SMALL ? 4 : 6)
+#endif
+#define XA_CHUNK_SIZE		(1UL << XA_CHUNK_SHIFT)
+#define XA_CHUNK_MASK		(XA_CHUNK_SIZE - 1)
+
+/* Private */
+static inline bool xa_is_node(const void *entry)
+{
+	return xa_is_internal(entry) && (unsigned long)entry > 4096;
+}
+
+/* Private */
+static inline void *xa_mk_sibling(unsigned int offset)
+{
+	return xa_mk_internal(offset);
+}
+
+/* Private */
+static inline unsigned long xa_to_sibling(const void *entry)
+{
+	return xa_to_internal(entry);
+}
+
+/**
+ * xa_is_sibling() - Is the entry a sibling entry?
+ * @entry: Entry retrieved from the XArray
+ *
+ * Return: %true if the entry is a sibling entry.
+ */
+static inline bool xa_is_sibling(const void *entry)
+{
+	return IS_ENABLED(CONFIG_XARRAY_MULTI) && xa_is_internal(entry) &&
+		(entry < xa_mk_sibling(XA_CHUNK_SIZE - 1));
+}
+
+#define XA_RETRY_ENTRY		xa_mk_internal(256)
+
 #endif /* _LINUX_XARRAY_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index a3928d4438b5..40bfa6ccd294 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -399,8 +399,15 @@ config INTERVAL_TREE
 
 	  for more information.
 
+config XARRAY_MULTI
+	bool
+	help
+	  Support entries which occupy multiple consecutive indices in the
+	  XArray.
+
 config RADIX_TREE_MULTIORDER
 	bool
+	select XARRAY_MULTI
 
 config ASSOCIATIVE_ARRAY
 	bool
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index b6c0e7f3a894..59b28111eabc 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -38,6 +38,7 @@
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/xarray.h>
 
 
 /* Number of nodes in fully populated tree of given height */
@@ -98,24 +99,7 @@ static inline void *node_to_entry(void *ptr)
 	return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
 }
 
-#define RADIX_TREE_RETRY	node_to_entry(NULL)
-
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-/* Sibling slots point directly to another slot in the same node */
-static inline
-bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
-{
-	void __rcu **ptr = node;
-	return (parent->slots <= ptr) &&
-			(ptr < parent->slots + RADIX_TREE_MAP_SIZE);
-}
-#else
-static inline
-bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
-{
-	return false;
-}
-#endif
+#define RADIX_TREE_RETRY	XA_RETRY_ENTRY
 
 static inline unsigned long
 get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
@@ -129,16 +113,10 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
 	unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
 	void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	if (radix_tree_is_internal_node(entry)) {
-		if (is_sibling_entry(parent, entry)) {
-			void __rcu **sibentry;
-			sibentry = (void __rcu **) entry_to_node(entry);
-			offset = get_slot_offset(parent, sibentry);
-			entry = rcu_dereference_raw(*sibentry);
-		}
+	if (xa_is_sibling(entry)) {
+		offset = xa_to_sibling(entry);
+		entry = rcu_dereference_raw(parent->slots[offset]);
 	}
-#endif
 
 	*nodep = (void *)entry;
 	return offset;
@@ -300,10 +278,10 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
 		} else if (!radix_tree_is_internal_node(entry)) {
 			pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n",
 					entry, i, first, last, node);
-		} else if (is_sibling_entry(node, entry)) {
+		} else if (xa_is_sibling(entry)) {
 			pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n",
 					entry, i, first, last, node,
-					*(void **)entry_to_node(entry));
+					node->slots[xa_to_sibling(entry)]);
 		} else {
 			dump_node(entry_to_node(entry), first);
 		}
@@ -881,8 +859,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
 
 	for (;;) {
 		void *entry = rcu_dereference_raw(child->slots[offset]);
-		if (radix_tree_is_internal_node(entry) && child->shift &&
-				!is_sibling_entry(child, entry)) {
+		if (xa_is_node(entry) && child->shift) {
 			child = entry_to_node(entry);
 			offset = 0;
 			continue;
@@ -904,7 +881,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
 static inline int insert_entries(struct radix_tree_node *node,
 		void __rcu **slot, void *item, unsigned order, bool replace)
 {
-	struct radix_tree_node *child;
+	void *sibling;
 	unsigned i, n, tag, offset, tags = 0;
 
 	if (node) {
@@ -922,7 +899,7 @@ static inline int insert_entries(struct radix_tree_node *node,
 		offset = offset & ~(n - 1);
 		slot = &node->slots[offset];
 	}
-	child = node_to_entry(slot);
+	sibling = xa_mk_sibling(offset);
 
 	for (i = 0; i < n; i++) {
 		if (slot[i]) {
@@ -939,7 +916,7 @@ static inline int insert_entries(struct radix_tree_node *node,
 	for (i = 0; i < n; i++) {
 		struct radix_tree_node *old = rcu_dereference_raw(slot[i]);
 		if (i) {
-			rcu_assign_pointer(slot[i], child);
+			rcu_assign_pointer(slot[i], sibling);
 			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
 				if (tags & (1 << tag))
 					tag_clear(node, tag, offset + i);
@@ -949,9 +926,7 @@ static inline int insert_entries(struct radix_tree_node *node,
 				if (tags & (1 << tag))
 					tag_set(node, tag, offset);
 		}
-		if (radix_tree_is_internal_node(old) &&
-					!is_sibling_entry(node, old) &&
-					(old != RADIX_TREE_RETRY))
+		if (xa_is_node(old))
 			radix_tree_free_nodes(old);
 		if (xa_is_value(old))
 			node->exceptional--;
@@ -1112,10 +1087,10 @@ static inline void replace_sibling_entries(struct radix_tree_node *node,
 				void __rcu **slot, int count, int exceptional)
 {
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
-	void *ptr = node_to_entry(slot);
-	unsigned offset = get_slot_offset(node, slot) + 1;
+	unsigned offset = get_slot_offset(node, slot);
+	void *ptr = xa_mk_sibling(offset);
 
-	while (offset < RADIX_TREE_MAP_SIZE) {
+	while (++offset < RADIX_TREE_MAP_SIZE) {
 		if (rcu_dereference_raw(node->slots[offset]) != ptr)
 			break;
 		if (count < 0) {
@@ -1123,7 +1098,6 @@ static inline void replace_sibling_entries(struct radix_tree_node *node,
 			node->count--;
 		}
 		node->exceptional += exceptional;
-		offset++;
 	}
 #endif
 }
@@ -1319,8 +1293,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 			tags |= 1 << tag;
 
 	for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
-		if (!is_sibling_entry(parent,
-				rcu_dereference_raw(parent->slots[end])))
+		if (!xa_is_sibling(rcu_dereference_raw(parent->slots[end])))
 			break;
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
 			if (tags & (1 << tag))
@@ -1618,7 +1591,7 @@ static void __rcu **skip_siblings(struct radix_tree_node **nodep,
 {
 	while (iter->index < iter->next_index) {
 		*nodep = rcu_dereference_raw(*slot);
-		if (*nodep && !is_sibling_entry(iter->node, *nodep))
+		if (*nodep && !xa_is_sibling(*nodep))
 			return slot;
 		slot++;
 		iter->index = __radix_tree_iter_add(iter, 1);
@@ -1769,7 +1742,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
 				while (++offset	< RADIX_TREE_MAP_SIZE) {
 					void *slot = rcu_dereference_raw(
 							node->slots[offset]);
-					if (is_sibling_entry(node, slot))
+					if (xa_is_sibling(slot))
 						continue;
 					if (slot)
 						break;
@@ -2283,6 +2256,7 @@ void __init radix_tree_init(void)
 
 	BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
 	BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
+	BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
 	radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
 			sizeof(struct radix_tree_node), 0,
 			SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 37baecc3766f..12adcf9ffe86 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -46,6 +46,6 @@ idr.c: ../../../lib/idr.c
 
 generated/map-shift.h:
 	@if ! grep -qws $(SHIFT) generated/map-shift.h; then		\
-		echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >		\
+		echo "#define XA_CHUNK_SHIFT $(SHIFT)" >		\
 				generated/map-shift.h;			\
 	fi
diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h
index cf88dc5b8832..ca8e03ad19ac 100644
--- a/tools/testing/radix-tree/generated/autoconf.h
+++ b/tools/testing/radix-tree/generated/autoconf.h
@@ -1 +1,2 @@
 #define CONFIG_RADIX_TREE_MULTIORDER 1
+#define CONFIG_XARRAY_MULTI 1
-- 
cgit v1.2.3-71-gd317


From c550f01c810f2197c98e6e3103f81797f5e063be Mon Sep 17 00:00:00 2001
From: Steve Sakoman <steve@sakoman.com>
Date: Thu, 20 Sep 2018 09:20:34 -1000
Subject: serial:serial_core: Allow use of CTS for PPS line discipline

Add a "pps_4wire" file to serial ports in sysfs in case the kernel is
configured with CONFIG_PPS_CLIENT_LDISC. Writing 1 to the file enables
the use of CTS instead of DCD for PPS signal input. This is necessary
in case a serial port is not completely wired.
Though this affects PPS processing the patch is against the serial core
as the source of the serial port PPS event dispatching has to be
modified. Furthermore it should be possible to modify the source of
serial port PPS event dispatching before changing the line discipline.

Signed-off-by: Andreas Steinmetz <ast@domdv.de>
Signed-off-by: Steve Sakoman <steve@sakoman.com>
Tested-by: Steve Sakoman <steve@sakoman.com>
Tested-by: Eric Gallimore <egallimore@ucsd.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-tty |  9 +++++
 drivers/tty/serial/serial_core.c    | 70 ++++++++++++++++++++++++++++++++++++-
 include/linux/serial_core.h         |  3 +-
 3 files changed, 80 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-tty b/Documentation/ABI/testing/sysfs-tty
index 9eb3c2b6b040..441105a75d1f 100644
--- a/Documentation/ABI/testing/sysfs-tty
+++ b/Documentation/ABI/testing/sysfs-tty
@@ -154,3 +154,12 @@ Description:
 		 device specification. For example, when user sets 7bytes on
 		 16550A, which has 1/4/8/14 bytes trigger, the RX trigger is
 		 automatically changed to 4 bytes.
+
+What:		/sys/class/tty/ttyS0/pps_4wire
+Date:		September 2018
+Contact:	Steve Sakoman <steve@sakoman.com>
+Description:
+		 Shows/sets "4 wire" mode for the PPS input to the serial driver.
+		 For fully implemented serial ports PPS is normally provided
+		 on the DCD line. For partial "4 wire" implementations CTS is
+		 used instead of DCD.
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 80bb56facfb6..ed0133395cc7 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2664,6 +2664,57 @@ static ssize_t uart_get_attr_iomem_reg_shift(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "%d\n", tmp.iomem_reg_shift);
 }
 
+static ssize_t pps_4wire_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct tty_port *port = dev_get_drvdata(dev);
+	struct uart_state *state = container_of(port, struct uart_state, port);
+	struct uart_port *uport;
+	int mode = 0;
+
+	mutex_lock(&port->mutex);
+	uport = uart_port_check(state);
+	if (!uport)
+		goto out;
+
+	mode = uport->pps_4wire;
+
+out:
+	mutex_unlock(&port->mutex);
+	return sprintf(buf, mode ? "yes\n" : "no\n");
+}
+
+static ssize_t pps_4wire_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct tty_port *port = dev_get_drvdata(dev);
+	struct uart_state *state = container_of(port, struct uart_state, port);
+	struct uart_port *uport;
+	bool mode;
+	int ret;
+
+	if (!count)
+		return -EINVAL;
+
+	ret = kstrtobool(buf, &mode);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&port->mutex);
+	uport = uart_port_check(state);
+	if (!uport)
+		goto out;
+
+	spin_lock_irq(&uport->lock);
+	uport->pps_4wire = mode;
+	spin_unlock_irq(&uport->lock);
+
+out:
+	mutex_unlock(&port->mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(pps_4wire);
+
 static DEVICE_ATTR(type, S_IRUSR | S_IRGRP, uart_get_attr_type, NULL);
 static DEVICE_ATTR(line, S_IRUSR | S_IRGRP, uart_get_attr_line, NULL);
 static DEVICE_ATTR(port, S_IRUSR | S_IRGRP, uart_get_attr_port, NULL);
@@ -2692,6 +2743,7 @@ static struct attribute *tty_dev_attrs[] = {
 	&dev_attr_io_type.attr,
 	&dev_attr_iomem_base.attr,
 	&dev_attr_iomem_reg_shift.attr,
+	&dev_attr_pps_4wire.attr,
 	NULL,
 	};
 
@@ -2748,6 +2800,9 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 		goto out;
 	}
 
+	/* assert that pps handling is done via DCD as default */
+	uport->pps_4wire = 0;
+
 	/*
 	 * If this port is a console, then the spinlock is already
 	 * initialised.
@@ -2923,7 +2978,7 @@ void uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 
 	lockdep_assert_held_once(&uport->lock);
 
-	if (tty) {
+	if (tty && !uport->pps_4wire) {
 		ld = tty_ldisc_ref(tty);
 		if (ld) {
 			if (ld->ops->dcd_change)
@@ -2952,8 +3007,21 @@ EXPORT_SYMBOL_GPL(uart_handle_dcd_change);
  */
 void uart_handle_cts_change(struct uart_port *uport, unsigned int status)
 {
+	struct tty_port *port = &uport->state->port;
+	struct tty_struct *tty = port->tty;
+	struct tty_ldisc *ld;
+
 	lockdep_assert_held_once(&uport->lock);
 
+	if (tty && uport->pps_4wire) {
+		ld = tty_ldisc_ref(tty);
+		if (ld) {
+			if (ld->ops->dcd_change)
+				ld->ops->dcd_change(tty, status);
+			tty_ldisc_deref(ld);
+		}
+	}
+
 	uport->icount.cts++;
 
 	if (uart_softcts_mode(uport)) {
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 406edae44ca3..079793e5d3fa 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -255,7 +255,8 @@ struct uart_port {
 	struct device		*dev;			/* parent device */
 	unsigned char		hub6;			/* this should be in the 8250 driver */
 	unsigned char		suspended;
-	unsigned char		unused[2];
+	unsigned char		pps_4wire;		/* CTS instead of DCD */
+	unsigned char		unused;
 	const char		*name;			/* port name */
 	struct attribute_group	*attr_group;		/* port specific attributes */
 	const struct attribute_group **tty_groups;	/* all attributes (serial core use only) */
-- 
cgit v1.2.3-71-gd317


From ad8c0eaa0a418ae8ef3f9217638bb86439399eac Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@microchip.com>
Date: Wed, 26 Sep 2018 14:58:47 +0200
Subject: tty/serial_core: add ISO7816 infrastructure

Add the ISO7816 ioctl and associated accessors and data structure.
Drivers can then use this common implementation to handle ISO7816
(smart cards).

Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
[ludovic.desroches@microchip.com: squash and rebase, removal of gpios, checkpatch fixes]
Signed-off-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/serial/serial-iso7816.txt | 83 +++++++++++++++++++++++++++++++++
 arch/alpha/include/uapi/asm/ioctls.h    |  2 +
 arch/mips/include/uapi/asm/ioctls.h     |  2 +
 arch/parisc/include/uapi/asm/ioctls.h   |  2 +
 arch/powerpc/include/uapi/asm/ioctls.h  |  2 +
 arch/sh/include/uapi/asm/ioctls.h       |  2 +
 arch/sparc/include/uapi/asm/ioctls.h    |  2 +
 arch/xtensa/include/uapi/asm/ioctls.h   |  2 +
 drivers/tty/serial/serial_core.c        | 60 ++++++++++++++++++++++++
 include/linux/serial_core.h             |  3 ++
 include/uapi/asm-generic/ioctls.h       |  2 +
 include/uapi/linux/serial.h             | 17 +++++++
 12 files changed, 179 insertions(+)
 create mode 100644 Documentation/serial/serial-iso7816.txt

(limited to 'include/linux')

diff --git a/Documentation/serial/serial-iso7816.txt b/Documentation/serial/serial-iso7816.txt
new file mode 100644
index 000000000000..3193d24a2b0f
--- /dev/null
+++ b/Documentation/serial/serial-iso7816.txt
@@ -0,0 +1,83 @@
+                        ISO7816 SERIAL COMMUNICATIONS
+
+1. INTRODUCTION
+
+  ISO/IEC7816 is a series of standards specifying integrated circuit cards (ICC)
+  also known as smart cards.
+
+2. HARDWARE-RELATED CONSIDERATIONS
+
+  Some CPUs/UARTs (e.g., Microchip AT91) contain a built-in mode capable of
+  handling communication with a smart card.
+
+  For these microcontrollers, the Linux driver should be made capable of
+  working in both modes, and proper ioctls (see later) should be made
+  available at user-level to allow switching from one mode to the other, and
+  vice versa.
+
+3. DATA STRUCTURES ALREADY AVAILABLE IN THE KERNEL
+
+  The Linux kernel provides the serial_iso7816 structure (see [1]) to handle
+  ISO7816 communications. This data structure is used to set and configure
+  ISO7816 parameters in ioctls.
+
+  Any driver for devices capable of working both as RS232 and ISO7816 should
+  implement the iso7816_config callback in the uart_port structure. The
+  serial_core calls iso7816_config to do the device specific part in response
+  to TIOCGISO7816 and TIOCSISO7816 ioctls (see below). The iso7816_config
+  callback receives a pointer to struct serial_iso7816.
+
+4. USAGE FROM USER-LEVEL
+
+  From user-level, ISO7816 configuration can be get/set using the previous
+  ioctls. For instance, to set ISO7816 you can use the following code:
+
+	#include <linux/serial.h>
+
+	/* Include definition for ISO7816 ioctls: TIOCSISO7816 and TIOCGISO7816 */
+	#include <sys/ioctl.h>
+
+	/* Open your specific device (e.g., /dev/mydevice): */
+	int fd = open ("/dev/mydevice", O_RDWR);
+	if (fd < 0) {
+		/* Error handling. See errno. */
+	}
+
+	struct serial_iso7816 iso7816conf;
+
+	/* Reserved fields as to be zeroed */
+	memset(&iso7816conf, 0, sizeof(iso7816conf));
+
+	/* Enable ISO7816 mode: */
+	iso7816conf.flags |= SER_ISO7816_ENABLED;
+
+	/* Select the protocol: */
+	/* T=0 */
+	iso7816conf.flags |= SER_ISO7816_T(0);
+	/* or T=1 */
+	iso7816conf.flags |= SER_ISO7816_T(1);
+
+	/* Set the guard time: */
+	iso7816conf.tg = 2;
+
+	/* Set the clock frequency*/
+	iso7816conf.clk = 3571200;
+
+	/* Set transmission factors: */
+	iso7816conf.sc_fi = 372;
+	iso7816conf.sc_di = 1;
+
+	if (ioctl(fd_usart, TIOCSISO7816, &iso7816conf) < 0) {
+		/* Error handling. See errno. */
+	}
+
+	/* Use read() and write() syscalls here... */
+
+	/* Close the device when finished: */
+	if (close (fd) < 0) {
+		/* Error handling. See errno. */
+	}
+
+5. REFERENCES
+
+ [1]    include/uapi/linux/serial.h
diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h
index 3729d92d3fa8..1e9121c9b3c7 100644
--- a/arch/alpha/include/uapi/asm/ioctls.h
+++ b/arch/alpha/include/uapi/asm/ioctls.h
@@ -102,6 +102,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define TIOCSERCONFIG	0x5453
 #define TIOCSERGWILD	0x5454
diff --git a/arch/mips/include/uapi/asm/ioctls.h b/arch/mips/include/uapi/asm/ioctls.h
index 890245a9f0c4..16aa8a766aec 100644
--- a/arch/mips/include/uapi/asm/ioctls.h
+++ b/arch/mips/include/uapi/asm/ioctls.h
@@ -93,6 +93,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 /* I hope the range from 0x5480 on is free ... */
 #define TIOCSCTTY	0x5480		/* become controlling tty */
diff --git a/arch/parisc/include/uapi/asm/ioctls.h b/arch/parisc/include/uapi/asm/ioctls.h
index aafb1c0ca0af..82d1148c6379 100644
--- a/arch/parisc/include/uapi/asm/ioctls.h
+++ b/arch/parisc/include/uapi/asm/ioctls.h
@@ -62,6 +62,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define FIONCLEX	0x5450  /* these numbers need to be adjusted. */
 #define FIOCLEX		0x5451
diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h
index 41b1a5c15734..2c145da3b774 100644
--- a/arch/powerpc/include/uapi/asm/ioctls.h
+++ b/arch/powerpc/include/uapi/asm/ioctls.h
@@ -102,6 +102,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define TIOCSERCONFIG	0x5453
 #define TIOCSERGWILD	0x5454
diff --git a/arch/sh/include/uapi/asm/ioctls.h b/arch/sh/include/uapi/asm/ioctls.h
index cc62f6f98103..11866d4f60e1 100644
--- a/arch/sh/include/uapi/asm/ioctls.h
+++ b/arch/sh/include/uapi/asm/ioctls.h
@@ -95,6 +95,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define TIOCSERCONFIG	_IO('T', 83) /* 0x5453 */
 #define TIOCSERGWILD	_IOR('T', 84,  int) /* 0x5454 */
diff --git a/arch/sparc/include/uapi/asm/ioctls.h b/arch/sparc/include/uapi/asm/ioctls.h
index 2df52711e170..7fd2f5873c9e 100644
--- a/arch/sparc/include/uapi/asm/ioctls.h
+++ b/arch/sparc/include/uapi/asm/ioctls.h
@@ -27,6 +27,8 @@
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGRS485	_IOR('T', 0x41, struct serial_rs485)
 #define TIOCSRS485	_IOWR('T', 0x42, struct serial_rs485)
+#define TIOCGISO7816	_IOR('T', 0x43, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x44, struct serial_iso7816)
 
 /* Note that all the ioctls that are not available in Linux have a
  * double underscore on the front to: a) avoid some programs to
diff --git a/arch/xtensa/include/uapi/asm/ioctls.h b/arch/xtensa/include/uapi/asm/ioctls.h
index ec43609cbfc5..6d4a87296c95 100644
--- a/arch/xtensa/include/uapi/asm/ioctls.h
+++ b/arch/xtensa/include/uapi/asm/ioctls.h
@@ -107,6 +107,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define TIOCSERCONFIG	_IO('T', 83)
 #define TIOCSERGWILD	_IOR('T', 84,  int)
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index ed0133395cc7..0a4e6eeb5ff3 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1308,6 +1308,58 @@ static int uart_set_rs485_config(struct uart_port *port,
 	return 0;
 }
 
+static int uart_get_iso7816_config(struct uart_port *port,
+				   struct serial_iso7816 __user *iso7816)
+{
+	unsigned long flags;
+	struct serial_iso7816 aux;
+
+	if (!port->iso7816_config)
+		return -ENOIOCTLCMD;
+
+	spin_lock_irqsave(&port->lock, flags);
+	aux = port->iso7816;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (copy_to_user(iso7816, &aux, sizeof(aux)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int uart_set_iso7816_config(struct uart_port *port,
+				   struct serial_iso7816 __user *iso7816_user)
+{
+	struct serial_iso7816 iso7816;
+	int i, ret;
+	unsigned long flags;
+
+	if (!port->iso7816_config)
+		return -ENOIOCTLCMD;
+
+	if (copy_from_user(&iso7816, iso7816_user, sizeof(*iso7816_user)))
+		return -EFAULT;
+
+	/*
+	 * There are 5 words reserved for future use. Check that userspace
+	 * doesn't put stuff in there to prevent breakages in the future.
+	 */
+	for (i = 0; i < 5; i++)
+		if (iso7816.reserved[i])
+			return -EINVAL;
+
+	spin_lock_irqsave(&port->lock, flags);
+	ret = port->iso7816_config(port, &iso7816);
+	spin_unlock_irqrestore(&port->lock, flags);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(iso7816_user, &port->iso7816, sizeof(port->iso7816)))
+		return -EFAULT;
+
+	return 0;
+}
+
 /*
  * Called via sys_ioctl.  We can use spin_lock_irq() here.
  */
@@ -1392,6 +1444,14 @@ uart_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg)
 	case TIOCSRS485:
 		ret = uart_set_rs485_config(uport, uarg);
 		break;
+
+	case TIOCSISO7816:
+		ret = uart_set_iso7816_config(state->uart_port, uarg);
+		break;
+
+	case TIOCGISO7816:
+		ret = uart_get_iso7816_config(state->uart_port, uarg);
+		break;
 	default:
 		if (uport->ops->ioctl)
 			ret = uport->ops->ioctl(uport, cmd, arg);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 079793e5d3fa..4e2ba4894dcc 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -144,6 +144,8 @@ struct uart_port {
 	void			(*handle_break)(struct uart_port *);
 	int			(*rs485_config)(struct uart_port *,
 						struct serial_rs485 *rs485);
+	int			(*iso7816_config)(struct uart_port *,
+						  struct serial_iso7816 *iso7816);
 	unsigned int		irq;			/* irq number */
 	unsigned long		irqflags;		/* irq flags  */
 	unsigned int		uartclk;		/* base uart clock */
@@ -261,6 +263,7 @@ struct uart_port {
 	struct attribute_group	*attr_group;		/* port specific attributes */
 	const struct attribute_group **tty_groups;	/* all attributes (serial core use only) */
 	struct serial_rs485     rs485;
+	struct serial_iso7816   iso7816;
 	void			*private_data;		/* generic platform data pointer */
 };
 
diff --git a/include/uapi/asm-generic/ioctls.h b/include/uapi/asm-generic/ioctls.h
index 040651735662..cdc9f4ca8c27 100644
--- a/include/uapi/asm-generic/ioctls.h
+++ b/include/uapi/asm-generic/ioctls.h
@@ -79,6 +79,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define FIONCLEX	0x5450
 #define FIOCLEX		0x5451
diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h
index 3fdd0dee8b41..93eb3c496ff1 100644
--- a/include/uapi/linux/serial.h
+++ b/include/uapi/linux/serial.h
@@ -132,4 +132,21 @@ struct serial_rs485 {
 					   are a royal PITA .. */
 };
 
+/*
+ * Serial interface for controlling ISO7816 settings on chips with suitable
+ * support. Set with TIOCSISO7816 and get with TIOCGISO7816 if supported by
+ * your platform.
+ */
+struct serial_iso7816 {
+	__u32	flags;			/* ISO7816 feature flags */
+#define SER_ISO7816_ENABLED		(1 << 0)
+#define SER_ISO7816_T_PARAM		(0x0f << 4)
+#define SER_ISO7816_T(t)		(((t) & 0x0f) << 4)
+	__u32	tg;
+	__u32	sc_fi;
+	__u32	sc_di;
+	__u32	clk;
+	__u32	reserved[5];
+};
+
 #endif /* _UAPI_LINUX_SERIAL_H */
-- 
cgit v1.2.3-71-gd317


From 47b00dcf141172c4c1c583701ec91923672cec39 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 10 Aug 2018 11:29:09 +0300
Subject: clk: ti: clkctrl: support multiple clkctrl nodes under a cm node

Currently, only one clkctrl node can be added under a specific CM node
due to limitation with the implementation. Modify the code to pick-up
clockdomain name from the clkctrl node instead of CM node if provided.
Also, add a new flag to the TI clock driver so that both modes can
be supported simultaneously.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/clk.c     |  7 ++++--
 drivers/clk/ti/clkctrl.c | 61 +++++++++++++++++++++++++++++++++++-------------
 drivers/clk/ti/clock.h   |  2 ++
 include/linux/clk/ti.h   |  1 +
 4 files changed, 53 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 27e0979b3158..8b89be18e39e 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -34,7 +34,7 @@
 struct ti_clk_ll_ops *ti_clk_ll_ops;
 static struct device_node *clocks_node_ptr[CLK_MAX_MEMMAPS];
 
-static struct ti_clk_features ti_clk_features;
+struct ti_clk_features ti_clk_features;
 
 struct clk_iomap {
 	struct regmap *regmap;
@@ -140,6 +140,9 @@ void __init ti_dt_clocks_register(struct ti_dt_clk oclks[])
 	int ret;
 	static bool clkctrl_nodes_missing;
 	static bool has_clkctrl_data;
+	static bool compat_mode;
+
+	compat_mode = ti_clk_get_features()->flags & TI_CLK_CLKCTRL_COMPAT;
 
 	for (c = oclks; c->node_name != NULL; c++) {
 		strcpy(buf, c->node_name);
@@ -164,7 +167,7 @@ void __init ti_dt_clocks_register(struct ti_dt_clk oclks[])
 			continue;
 
 		node = of_find_node_by_name(NULL, buf);
-		if (num_args) {
+		if (num_args && compat_mode) {
 			parent = node;
 			node = of_get_child_by_name(parent, "clk");
 			of_node_put(parent);
diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c
index 421b05392220..9bff57f0345d 100644
--- a/drivers/clk/ti/clkctrl.c
+++ b/drivers/clk/ti/clkctrl.c
@@ -259,8 +259,13 @@ _ti_clkctrl_clk_register(struct omap_clkctrl_provider *provider,
 	struct omap_clkctrl_clk *clkctrl_clk;
 	int ret = 0;
 
-	init.name = kasprintf(GFP_KERNEL, "%s:%s:%04x:%d", node->parent->name,
-			      node->name, offset, bit);
+	if (ti_clk_get_features()->flags & TI_CLK_CLKCTRL_COMPAT)
+		init.name = kasprintf(GFP_KERNEL, "%s:%s:%04x:%d",
+				      node->parent->name, node->name, offset,
+				      bit);
+	else
+		init.name = kasprintf(GFP_KERNEL, "%s:%04x:%d", node->name,
+				      offset, bit);
 	clkctrl_clk = kzalloc(sizeof(*clkctrl_clk), GFP_KERNEL);
 	if (!init.name || !clkctrl_clk) {
 		ret = -ENOMEM;
@@ -441,6 +446,10 @@ static void __init _ti_omap4_clkctrl_setup(struct device_node *node)
 	u32 addr;
 	int ret;
 
+	if (!(ti_clk_get_features()->flags & TI_CLK_CLKCTRL_COMPAT) &&
+	    !strcmp(node->name, "clk"))
+		ti_clk_features.flags |= TI_CLK_CLKCTRL_COMPAT;
+
 	addrp = of_get_address(node, 0, NULL, NULL);
 	addr = (u32)of_translate_address(node, addrp);
 
@@ -492,19 +501,35 @@ static void __init _ti_omap4_clkctrl_setup(struct device_node *node)
 
 	provider->base = of_iomap(node, 0);
 
-	provider->clkdm_name = kmalloc(strlen(node->parent->name) + 3,
-				       GFP_KERNEL);
-	if (!provider->clkdm_name) {
-		kfree(provider);
-		return;
+	if (ti_clk_get_features()->flags & TI_CLK_CLKCTRL_COMPAT) {
+		provider->clkdm_name = kmalloc(strlen(node->parent->name) + 3,
+					       GFP_KERNEL);
+		if (!provider->clkdm_name) {
+			kfree(provider);
+			return;
+		}
+
+		/*
+		 * Create default clkdm name, replace _cm from end of parent
+		 * node name with _clkdm
+		 */
+		strcpy(provider->clkdm_name, node->parent->name);
+		provider->clkdm_name[strlen(provider->clkdm_name) - 2] = 0;
+	} else {
+		provider->clkdm_name = kmalloc(strlen(node->name), GFP_KERNEL);
+		if (!provider->clkdm_name) {
+			kfree(provider);
+			return;
+		}
+
+		/*
+		 * Create default clkdm name, replace _clkctrl from end of
+		 * node name with _clkdm
+		 */
+		strcpy(provider->clkdm_name, node->name);
+		provider->clkdm_name[strlen(provider->clkdm_name) - 7] = 0;
 	}
 
-	/*
-	 * Create default clkdm name, replace _cm from end of parent node
-	 * name with _clkdm
-	 */
-	strcpy(provider->clkdm_name, node->parent->name);
-	provider->clkdm_name[strlen(provider->clkdm_name) - 2] = 0;
 	strcat(provider->clkdm_name, "clkdm");
 
 	INIT_LIST_HEAD(&provider->clocks);
@@ -539,9 +564,13 @@ static void __init _ti_omap4_clkctrl_setup(struct device_node *node)
 		init.flags = 0;
 		if (reg_data->flags & CLKF_SET_RATE_PARENT)
 			init.flags |= CLK_SET_RATE_PARENT;
-		init.name = kasprintf(GFP_KERNEL, "%s:%s:%04x:%d",
-				      node->parent->name, node->name,
-				      reg_data->offset, 0);
+		if (ti_clk_get_features()->flags & TI_CLK_CLKCTRL_COMPAT)
+			init.name = kasprintf(GFP_KERNEL, "%s:%s:%04x:%d",
+					      node->parent->name, node->name,
+					      reg_data->offset, 0);
+		else
+			init.name = kasprintf(GFP_KERNEL, "%s:%04x:%d",
+					      node->name, reg_data->offset, 0);
 		clkctrl_clk = kzalloc(sizeof(*clkctrl_clk), GFP_KERNEL);
 		if (!init.name || !clkctrl_clk)
 			goto cleanup;
diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index b58278077226..ce4aad6c4c7c 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -233,6 +233,8 @@ extern const struct clk_ops ti_clk_divider_ops;
 extern const struct clk_ops ti_clk_mux_ops;
 extern const struct clk_ops omap_gate_clk_ops;
 
+extern struct ti_clk_features ti_clk_features;
+
 void omap2_init_clk_clkdm(struct clk_hw *hw);
 int omap2_clkops_enable_clkdm(struct clk_hw *hw);
 void omap2_clkops_disable_clkdm(struct clk_hw *hw);
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index a8faa38b1ed6..3301bd025a2c 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -290,6 +290,7 @@ struct ti_clk_features {
 #define TI_CLK_DPLL4_DENY_REPROGRAM		BIT(1)
 #define TI_CLK_DISABLE_CLKDM_CONTROL		BIT(2)
 #define TI_CLK_ERRATA_I810			BIT(3)
+#define TI_CLK_CLKCTRL_COMPAT			BIT(4)
 
 void ti_clk_setup_features(struct ti_clk_features *features);
 const struct ti_clk_features *ti_clk_get_features(void);
-- 
cgit v1.2.3-71-gd317


From 8b95d1ce3300c411728954473316bd04d0ba9883 Mon Sep 17 00:00:00 2001
From: Russ Dill <Russ.Dill@ti.com>
Date: Tue, 4 Sep 2018 12:19:35 +0530
Subject: clk: Add functions to save/restore clock context en-masse

Deep enough power saving mode can result into losing context of the clock
registers also, and they need to be restored once coming back from the power
saving mode. Hence add functions to save/restore clock context.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Russ Dill <Russ.Dill@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 drivers/clk/clk.c            | 74 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/clk-provider.h |  7 +++++
 include/linux/clk.h          | 25 +++++++++++++++
 3 files changed, 106 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index d31055ae6ec6..8a0254a1c303 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -923,6 +923,80 @@ static int clk_core_enable_lock(struct clk_core *core)
 	return ret;
 }
 
+static int _clk_save_context(struct clk_core *clk)
+{
+	struct clk_core *child;
+	int ret = 0;
+
+	hlist_for_each_entry(child, &clk->children, child_node) {
+		ret = _clk_save_context(child);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (clk->ops && clk->ops->save_context)
+		ret = clk->ops->save_context(clk->hw);
+
+	return ret;
+}
+
+static void _clk_restore_context(struct clk_core *clk)
+{
+	struct clk_core *child;
+
+	if (clk->ops && clk->ops->restore_context)
+		clk->ops->restore_context(clk->hw);
+
+	hlist_for_each_entry(child, &clk->children, child_node)
+		_clk_restore_context(child);
+}
+
+/**
+ * clk_save_context - save clock context for poweroff
+ *
+ * Saves the context of the clock register for powerstates in which the
+ * contents of the registers will be lost. Occurs deep within the suspend
+ * code.  Returns 0 on success.
+ */
+int clk_save_context(void)
+{
+	struct clk_core *clk;
+	int ret;
+
+	hlist_for_each_entry(clk, &clk_root_list, child_node) {
+		ret = _clk_save_context(clk);
+		if (ret < 0)
+			return ret;
+	}
+
+	hlist_for_each_entry(clk, &clk_orphan_list, child_node) {
+		ret = _clk_save_context(clk);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(clk_save_context);
+
+/**
+ * clk_restore_context - restore clock context after poweroff
+ *
+ * Restore the saved clock context upon resume.
+ *
+ */
+void clk_restore_context(void)
+{
+	struct clk_core *clk;
+
+	hlist_for_each_entry(clk, &clk_root_list, child_node)
+		_clk_restore_context(clk);
+
+	hlist_for_each_entry(clk, &clk_orphan_list, child_node)
+		_clk_restore_context(clk);
+}
+EXPORT_SYMBOL_GPL(clk_restore_context);
+
 /**
  * clk_enable - ungate a clock
  * @clk: the clk being ungated
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 08b1aa70a38d..df7379da6269 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -119,6 +119,11 @@ struct clk_duty {
  *		Called with enable_lock held.  This function must not
  *		sleep.
  *
+ * @save_context: Save the context of the clock in prepration for poweroff.
+ *
+ * @restore_context: Restore the context of the clock after a restoration
+ *		of power.
+ *
  * @recalc_rate	Recalculate the rate of this clock, by querying hardware. The
  *		parent rate is an input parameter.  It is up to the caller to
  *		ensure that the prepare_mutex is held across this call.
@@ -223,6 +228,8 @@ struct clk_ops {
 	void		(*disable)(struct clk_hw *hw);
 	int		(*is_enabled)(struct clk_hw *hw);
 	void		(*disable_unused)(struct clk_hw *hw);
+	int		(*save_context)(struct clk_hw *hw);
+	void		(*restore_context)(struct clk_hw *hw);
 	unsigned long	(*recalc_rate)(struct clk_hw *hw,
 					unsigned long parent_rate);
 	long		(*round_rate)(struct clk_hw *hw, unsigned long rate,
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 4f750c481b82..7da754d79f9d 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -629,6 +629,23 @@ struct clk *clk_get_parent(struct clk *clk);
  */
 struct clk *clk_get_sys(const char *dev_id, const char *con_id);
 
+/**
+ * clk_save_context - save clock context for poweroff
+ *
+ * Saves the context of the clock register for powerstates in which the
+ * contents of the registers will be lost. Occurs deep within the suspend
+ * code so locking is not necessary.
+ */
+int clk_save_context(void);
+
+/**
+ * clk_restore_context - restore clock context after poweroff
+ *
+ * This occurs with all clocks enabled. Occurs deep within the resume code
+ * so locking is not necessary.
+ */
+void clk_restore_context(void);
+
 #else /* !CONFIG_HAVE_CLK */
 
 static inline struct clk *clk_get(struct device *dev, const char *id)
@@ -728,6 +745,14 @@ static inline struct clk *clk_get_sys(const char *dev_id, const char *con_id)
 {
 	return NULL;
 }
+
+static inline int clk_save_context(void)
+{
+	return 0;
+}
+
+static inline void clk_restore_context(void) {}
+
 #endif
 
 /* clk_prepare_enable helps cases using clk_enable in non-atomic context. */
-- 
cgit v1.2.3-71-gd317


From 435365485f40cf12747d1daa2253a4f4b46b8148 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 4 Sep 2018 12:19:36 +0530
Subject: clk: clk: Add clk_gate_restore_context function

The clock gate restore context function enables or disables
the gate clocks based on the enable_count. This is done in cases
where the clock context is lost and based on the enable_count
the clock either needs to be enabled/disabled.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 drivers/clk/clk.c            | 19 +++++++++++++++++++
 include/linux/clk-provider.h |  2 ++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 8a0254a1c303..dd775771a7cc 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -923,6 +923,25 @@ static int clk_core_enable_lock(struct clk_core *core)
 	return ret;
 }
 
+/**
+ * clk_gate_restore_context - restore context for poweroff
+ * @hw: the clk_hw pointer of clock whose state is to be restored
+ *
+ * The clock gate restore context function enables or disables
+ * the gate clocks based on the enable_count. This is done in cases
+ * where the clock context is lost and based on the enable_count
+ * the clock either needs to be enabled/disabled. This
+ * helps restore the state of gate clocks.
+ */
+void clk_gate_restore_context(struct clk_hw *hw)
+{
+	if (hw->clk->core->enable_count)
+		hw->clk->core->ops->enable(hw);
+	else
+		hw->clk->core->ops->disable(hw);
+}
+EXPORT_SYMBOL_GPL(clk_gate_restore_context);
+
 static int _clk_save_context(struct clk_core *clk)
 {
 	struct clk_core *child;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index df7379da6269..60c51871b04b 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -1018,5 +1018,7 @@ static inline void clk_writel(u32 val, u32 __iomem *reg)
 
 #endif	/* platform dependent I/O accessors */
 
+void clk_gate_restore_context(struct clk_hw *hw);
+
 #endif /* CONFIG_COMMON_CLK */
 #endif /* CLK_PROVIDER_H */
-- 
cgit v1.2.3-71-gd317


From d6e7bbc148f9fbec8a0117b0d0f420c9710e6d81 Mon Sep 17 00:00:00 2001
From: Russ Dill <Russ.Dill@ti.com>
Date: Tue, 4 Sep 2018 12:19:37 +0530
Subject: clk: ti: Add functions to save/restore clk context

SoCs like AM43XX lose clock registers context during RTC-only
suspend. Hence add functions to save/restore the clock registers
context.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Russ Dill <Russ.Dill@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 drivers/clk/ti/clock.h    |   2 +
 drivers/clk/ti/divider.c  |  36 ++++++++++++++
 drivers/clk/ti/dpll.c     |   6 +++
 drivers/clk/ti/dpll3xxx.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/clk/ti/gate.c     |   3 ++
 drivers/clk/ti/mux.c      |  29 +++++++++++
 include/linux/clk/ti.h    |   6 +++
 7 files changed, 206 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index 5a781067a0e7..9f312a219510 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -24,6 +24,7 @@ struct clk_omap_divider {
 	u8			flags;
 	s8			latch;
 	const struct clk_div_table	*table;
+	u32		context;
 };
 
 #define to_clk_omap_divider(_hw) container_of(_hw, struct clk_omap_divider, hw)
@@ -36,6 +37,7 @@ struct clk_omap_mux {
 	u8			shift;
 	s8			latch;
 	u8			flags;
+	u8			saved_parent;
 };
 
 #define to_clk_omap_mux(_hw) container_of(_hw, struct clk_omap_mux, hw)
diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index ccfb4d9a152a..373f620f49cb 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -268,10 +268,46 @@ static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 	return 0;
 }
 
+/**
+ * clk_divider_save_context - Save the divider value
+ * @hw: pointer  struct clk_hw
+ *
+ * Save the divider value
+ */
+static int clk_divider_save_context(struct clk_hw *hw)
+{
+	struct clk_omap_divider *divider = to_clk_omap_divider(hw);
+	u32 val;
+
+	val = ti_clk_ll_ops->clk_readl(&divider->reg) >> divider->shift;
+	divider->context = val & div_mask(divider);
+
+	return 0;
+}
+
+/**
+ * clk_divider_restore_context - restore the saved the divider value
+ * @hw: pointer  struct clk_hw
+ *
+ * Restore the saved the divider value
+ */
+static void clk_divider_restore_context(struct clk_hw *hw)
+{
+	struct clk_omap_divider *divider = to_clk_omap_divider(hw);
+	u32 val;
+
+	val = ti_clk_ll_ops->clk_readl(&divider->reg);
+	val &= ~(div_mask(divider) << divider->shift);
+	val |= divider->context << divider->shift;
+	ti_clk_ll_ops->clk_writel(val, &divider->reg);
+}
+
 const struct clk_ops ti_clk_divider_ops = {
 	.recalc_rate = ti_clk_divider_recalc_rate,
 	.round_rate = ti_clk_divider_round_rate,
 	.set_rate = ti_clk_divider_set_rate,
+	.save_context = clk_divider_save_context,
+	.restore_context = clk_divider_restore_context,
 };
 
 static struct clk *_register_divider(struct device *dev, const char *name,
diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index dc86d07d0921..25d86d5ebb36 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c
@@ -39,6 +39,8 @@ static const struct clk_ops dpll_m4xen_ck_ops = {
 	.set_rate_and_parent	= &omap3_noncore_dpll_set_rate_and_parent,
 	.determine_rate	= &omap4_dpll_regm4xen_determine_rate,
 	.get_parent	= &omap2_init_dpll_parent,
+	.save_context	= &omap3_core_dpll_save_context,
+	.restore_context = &omap3_core_dpll_restore_context,
 };
 #else
 static const struct clk_ops dpll_m4xen_ck_ops = {};
@@ -62,6 +64,8 @@ static const struct clk_ops dpll_ck_ops = {
 	.set_rate_and_parent	= &omap3_noncore_dpll_set_rate_and_parent,
 	.determine_rate	= &omap3_noncore_dpll_determine_rate,
 	.get_parent	= &omap2_init_dpll_parent,
+	.save_context	= &omap3_noncore_dpll_save_context,
+	.restore_context = &omap3_noncore_dpll_restore_context,
 };
 
 static const struct clk_ops dpll_no_gate_ck_ops = {
@@ -72,6 +76,8 @@ static const struct clk_ops dpll_no_gate_ck_ops = {
 	.set_parent	= &omap3_noncore_dpll_set_parent,
 	.set_rate_and_parent	= &omap3_noncore_dpll_set_rate_and_parent,
 	.determine_rate	= &omap3_noncore_dpll_determine_rate,
+	.save_context	= &omap3_noncore_dpll_save_context,
+	.restore_context = &omap3_noncore_dpll_restore_context
 };
 #else
 static const struct clk_ops dpll_core_ck_ops = {};
diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c
index 4534de2ef455..44b6b6403753 100644
--- a/drivers/clk/ti/dpll3xxx.c
+++ b/drivers/clk/ti/dpll3xxx.c
@@ -782,6 +782,130 @@ unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw,
 	return rate;
 }
 
+/**
+ * omap3_core_dpll_save_context - Save the m and n values of the divider
+ * @hw: pointer  struct clk_hw
+ *
+ * Before the dpll registers are lost save the last rounded rate m and n
+ * and the enable mask.
+ */
+int omap3_core_dpll_save_context(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *dd;
+	u32 v;
+
+	dd = clk->dpll_data;
+
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
+	clk->context = (v & dd->enable_mask) >> __ffs(dd->enable_mask);
+
+	if (clk->context == DPLL_LOCKED) {
+		v = ti_clk_ll_ops->clk_readl(&dd->mult_div1_reg);
+		dd->last_rounded_m = (v & dd->mult_mask) >>
+						__ffs(dd->mult_mask);
+		dd->last_rounded_n = ((v & dd->div1_mask) >>
+						__ffs(dd->div1_mask)) + 1;
+	}
+
+	return 0;
+}
+
+/**
+ * omap3_core_dpll_restore_context - restore the m and n values of the divider
+ * @hw: pointer  struct clk_hw
+ *
+ * Restore the last rounded rate m and n
+ * and the enable mask.
+ */
+void omap3_core_dpll_restore_context(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	const struct dpll_data *dd;
+	u32 v;
+
+	dd = clk->dpll_data;
+
+	if (clk->context == DPLL_LOCKED) {
+		_omap3_dpll_write_clken(clk, 0x4);
+		_omap3_wait_dpll_status(clk, 0);
+
+		v = ti_clk_ll_ops->clk_readl(&dd->mult_div1_reg);
+		v &= ~(dd->mult_mask | dd->div1_mask);
+		v |= dd->last_rounded_m << __ffs(dd->mult_mask);
+		v |= (dd->last_rounded_n - 1) << __ffs(dd->div1_mask);
+		ti_clk_ll_ops->clk_writel(v, &dd->mult_div1_reg);
+
+		_omap3_dpll_write_clken(clk, DPLL_LOCKED);
+		_omap3_wait_dpll_status(clk, 1);
+	} else {
+		_omap3_dpll_write_clken(clk, clk->context);
+	}
+}
+
+/**
+ * omap3_non_core_dpll_save_context - Save the m and n values of the divider
+ * @hw: pointer  struct clk_hw
+ *
+ * Before the dpll registers are lost save the last rounded rate m and n
+ * and the enable mask.
+ */
+int omap3_noncore_dpll_save_context(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *dd;
+	u32 v;
+
+	dd = clk->dpll_data;
+
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
+	clk->context = (v & dd->enable_mask) >> __ffs(dd->enable_mask);
+
+	if (clk->context == DPLL_LOCKED) {
+		v = ti_clk_ll_ops->clk_readl(&dd->mult_div1_reg);
+		dd->last_rounded_m = (v & dd->mult_mask) >>
+						__ffs(dd->mult_mask);
+		dd->last_rounded_n = ((v & dd->div1_mask) >>
+						__ffs(dd->div1_mask)) + 1;
+	}
+
+	return 0;
+}
+
+/**
+ * omap3_core_dpll_restore_context - restore the m and n values of the divider
+ * @hw: pointer  struct clk_hw
+ *
+ * Restore the last rounded rate m and n
+ * and the enable mask.
+ */
+void omap3_noncore_dpll_restore_context(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	const struct dpll_data *dd;
+	u32 ctrl, mult_div1;
+
+	dd = clk->dpll_data;
+
+	ctrl = ti_clk_ll_ops->clk_readl(&dd->control_reg);
+	mult_div1 = ti_clk_ll_ops->clk_readl(&dd->mult_div1_reg);
+
+	if (clk->context == ((ctrl & dd->enable_mask) >>
+			     __ffs(dd->enable_mask)) &&
+	    dd->last_rounded_m == ((mult_div1 & dd->mult_mask) >>
+				   __ffs(dd->mult_mask)) &&
+	    dd->last_rounded_n == ((mult_div1 & dd->div1_mask) >>
+				   __ffs(dd->div1_mask)) + 1) {
+		/* nothing to be done */
+		return;
+	}
+
+	if (clk->context == DPLL_LOCKED)
+		omap3_noncore_dpll_program(clk, 0);
+	else
+		_omap3_dpll_write_clken(clk, clk->context);
+}
+
 /* OMAP3/4 non-CORE DPLL clkops */
 const struct clk_hw_omap_ops clkhwops_omap3_dpll = {
 	.allow_idle	= omap3_dpll_allow_idle,
diff --git a/drivers/clk/ti/gate.c b/drivers/clk/ti/gate.c
index 935b2de5fb88..cf9e9f5fc007 100644
--- a/drivers/clk/ti/gate.c
+++ b/drivers/clk/ti/gate.c
@@ -33,6 +33,7 @@ static const struct clk_ops omap_gate_clkdm_clk_ops = {
 	.init		= &omap2_init_clk_clkdm,
 	.enable		= &omap2_clkops_enable_clkdm,
 	.disable	= &omap2_clkops_disable_clkdm,
+	.restore_context = clk_gate_restore_context,
 };
 
 const struct clk_ops omap_gate_clk_ops = {
@@ -40,6 +41,7 @@ const struct clk_ops omap_gate_clk_ops = {
 	.enable		= &omap2_dflt_clk_enable,
 	.disable	= &omap2_dflt_clk_disable,
 	.is_enabled	= &omap2_dflt_clk_is_enabled,
+	.restore_context = clk_gate_restore_context,
 };
 
 static const struct clk_ops omap_gate_clk_hsdiv_restore_ops = {
@@ -47,6 +49,7 @@ static const struct clk_ops omap_gate_clk_hsdiv_restore_ops = {
 	.enable		= &omap36xx_gate_clk_enable_with_hsdiv_restore,
 	.disable	= &omap2_dflt_clk_disable,
 	.is_enabled	= &omap2_dflt_clk_is_enabled,
+	.restore_context = clk_gate_restore_context,
 };
 
 /**
diff --git a/drivers/clk/ti/mux.c b/drivers/clk/ti/mux.c
index 69a4308a5a98..5749b2b4fa61 100644
--- a/drivers/clk/ti/mux.c
+++ b/drivers/clk/ti/mux.c
@@ -91,10 +91,39 @@ static int ti_clk_mux_set_parent(struct clk_hw *hw, u8 index)
 	return 0;
 }
 
+/**
+ * clk_mux_save_context - Save the parent selcted in the mux
+ * @hw: pointer  struct clk_hw
+ *
+ * Save the parent mux value.
+ */
+static int clk_mux_save_context(struct clk_hw *hw)
+{
+	struct clk_omap_mux *mux = to_clk_omap_mux(hw);
+
+	mux->saved_parent = ti_clk_mux_get_parent(hw);
+	return 0;
+}
+
+/**
+ * clk_mux_restore_context - Restore the parent in the mux
+ * @hw: pointer  struct clk_hw
+ *
+ * Restore the saved parent mux value.
+ */
+static void clk_mux_restore_context(struct clk_hw *hw)
+{
+	struct clk_omap_mux *mux = to_clk_omap_mux(hw);
+
+	ti_clk_mux_set_parent(hw, mux->saved_parent);
+}
+
 const struct clk_ops ti_clk_mux_ops = {
 	.get_parent = ti_clk_mux_get_parent,
 	.set_parent = ti_clk_mux_set_parent,
 	.determine_rate = __clk_mux_determine_rate,
+	.save_context = clk_mux_save_context,
+	.restore_context = clk_mux_restore_context,
 };
 
 static struct clk *_register_mux(struct device *dev, const char *name,
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 3301bd025a2c..eacc5df57b99 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -159,6 +159,7 @@ struct clk_hw_omap {
 	const char		*clkdm_name;
 	struct clockdomain	*clkdm;
 	const struct clk_hw_omap_ops	*ops;
+	u32			context;
 };
 
 /*
@@ -294,6 +295,11 @@ struct ti_clk_features {
 
 void ti_clk_setup_features(struct ti_clk_features *features);
 const struct ti_clk_features *ti_clk_get_features(void);
+int omap3_noncore_dpll_save_context(struct clk_hw *hw);
+void omap3_noncore_dpll_restore_context(struct clk_hw *hw);
+
+int omap3_core_dpll_save_context(struct clk_hw *hw);
+void omap3_core_dpll_restore_context(struct clk_hw *hw);
 
 extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll;
 
-- 
cgit v1.2.3-71-gd317


From 608a0ab2f54ab0e301ad76a41aad979ea0d02670 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trondmy@gmail.com>
Date: Mon, 1 Oct 2018 10:41:44 -0400
Subject: SUNRPC: Add lockless lookup of the server's auth domain

Avoid taking the global auth_domain_lock in most lookups of the auth domain
by adding an RCU protected lookup.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcauth.h    |  1 +
 net/sunrpc/auth_gss/svcauth_gss.c |  9 ++++++++-
 net/sunrpc/svcauth.c              | 22 +++++++++++++++++++---
 net/sunrpc/svcauth_unix.c         | 10 ++++++++--
 4 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 04e404a07882..3e53a6e2ada7 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -82,6 +82,7 @@ struct auth_domain {
 	struct hlist_node	hash;
 	char			*name;
 	struct auth_ops		*flavour;
+	struct rcu_head		rcu_head;
 };
 
 /*
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 860f2a1bbb67..87c71fb0f0ea 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1764,14 +1764,21 @@ out_err:
 }
 
 static void
-svcauth_gss_domain_release(struct auth_domain *dom)
+svcauth_gss_domain_release_rcu(struct rcu_head *head)
 {
+	struct auth_domain *dom = container_of(head, struct auth_domain, rcu_head);
 	struct gss_domain *gd = container_of(dom, struct gss_domain, h);
 
 	kfree(dom->name);
 	kfree(gd);
 }
 
+static void
+svcauth_gss_domain_release(struct auth_domain *dom)
+{
+	call_rcu(&dom->rcu_head, svcauth_gss_domain_release_rcu);
+}
+
 static struct auth_ops svcauthops_gss = {
 	.name		= "rpcsec_gss",
 	.owner		= THIS_MODULE,
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index f83443856cd1..775b8c94265b 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -143,10 +143,11 @@ static struct hlist_head	auth_domain_table[DN_HASHMAX];
 static DEFINE_SPINLOCK(auth_domain_lock);
 
 static void auth_domain_release(struct kref *kref)
+	__releases(&auth_domain_lock)
 {
 	struct auth_domain *dom = container_of(kref, struct auth_domain, ref);
 
-	hlist_del(&dom->hash);
+	hlist_del_rcu(&dom->hash);
 	dom->flavour->domain_release(dom);
 	spin_unlock(&auth_domain_lock);
 }
@@ -175,7 +176,7 @@ auth_domain_lookup(char *name, struct auth_domain *new)
 		}
 	}
 	if (new)
-		hlist_add_head(&new->hash, head);
+		hlist_add_head_rcu(&new->hash, head);
 	spin_unlock(&auth_domain_lock);
 	return new;
 }
@@ -183,6 +184,21 @@ EXPORT_SYMBOL_GPL(auth_domain_lookup);
 
 struct auth_domain *auth_domain_find(char *name)
 {
-	return auth_domain_lookup(name, NULL);
+	struct auth_domain *hp;
+	struct hlist_head *head;
+
+	head = &auth_domain_table[hash_str(name, DN_HASHBITS)];
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(hp, head, hash) {
+		if (strcmp(hp->name, name)==0) {
+			if (!kref_get_unless_zero(&hp->ref))
+				hp = NULL;
+			rcu_read_unlock();
+			return hp;
+		}
+	}
+	rcu_read_unlock();
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(auth_domain_find);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index af7f28fb8102..84cf39021a03 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -37,20 +37,26 @@ struct unix_domain {
 extern struct auth_ops svcauth_null;
 extern struct auth_ops svcauth_unix;
 
-static void svcauth_unix_domain_release(struct auth_domain *dom)
+static void svcauth_unix_domain_release_rcu(struct rcu_head *head)
 {
+	struct auth_domain *dom = container_of(head, struct auth_domain, rcu_head);
 	struct unix_domain *ud = container_of(dom, struct unix_domain, h);
 
 	kfree(dom->name);
 	kfree(ud);
 }
 
+static void svcauth_unix_domain_release(struct auth_domain *dom)
+{
+	call_rcu(&dom->rcu_head, svcauth_unix_domain_release_rcu);
+}
+
 struct auth_domain *unix_domain_find(char *name)
 {
 	struct auth_domain *rv;
 	struct unix_domain *new = NULL;
 
-	rv = auth_domain_lookup(name, NULL);
+	rv = auth_domain_find(name);
 	while(1) {
 		if (rv) {
 			if (new && rv != &new->h)
-- 
cgit v1.2.3-71-gd317


From 34845c939082093354a6ffbb1ebff599e30b9b22 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 26 Sep 2018 15:20:03 +0200
Subject: reset: Grammar s/more then once/more than once/

Fix grammar in reset_control_get_exclusive() documentation comment.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/linux/reset.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/reset.h b/include/linux/reset.h
index 09732c36f351..29af6d6b2f4b 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -116,7 +116,7 @@ static inline int device_reset_optional(struct device *dev)
  * @id: reset line name
  *
  * Returns a struct reset_control or IS_ERR() condition containing errno.
- * If this function is called more then once for the same reset_control it will
+ * If this function is called more than once for the same reset_control it will
  * return -EBUSY.
  *
  * See reset_control_get_shared for details on shared references to
-- 
cgit v1.2.3-71-gd317


From b723a7911d028fb55f67a63c4c4d895f72e059d4 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 4 Oct 2018 00:25:32 +0300
Subject: fanotify: fix collision of internal and uapi mark flags

The new mark flag FAN_MARK_FILESYSTEMS collides with existing internal
flag FAN_MARK_ONDIR. Change internal flag value to avoid the collision.

Fixes: d54f4fba889b ("fanotify: add API to attach/detach super block mark")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fanotify.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 9c5ea3bdfaa0..e70fccc3757e 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -5,7 +5,7 @@
 #include <uapi/linux/fanotify.h>
 
 /* not valid from userspace, only kernel internal */
-#define FAN_MARK_ONDIR		0x00000100
+#define FAN_MARK_ONDIR		0x80000000
 
 #define FAN_GROUP_FLAG(group, flag) \
 	((group)->fanotify_data.flags & (flag))
-- 
cgit v1.2.3-71-gd317


From 007d1e8395eaa59b0e7ad9eb2b53a40859446a88 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 4 Oct 2018 00:25:33 +0300
Subject: fsnotify: generalize handling of extra event flags

FS_EVENT_ON_CHILD gets a special treatment in fsnotify() because it is
not a flag specifying an event type, but rather an extra flags that may
be reported along with another event and control the handling of the
event by the backend.

FS_ISDIR is also an "extra flag" and not an "event type" and therefore
desrves the same treatment. With inotify/dnotify backends it was never
possible to set FS_ISDIR in mark masks, so it did not matter.
With fanotify backend, mark adding code jumps through hoops to avoid
setting the FS_ISDIR in the commulative object mask.

Separate the constant ALL_FSNOTIFY_EVENTS to ALL_FSNOTIFY_FLAGS and
ALL_FSNOTIFY_EVENTS, so the latter can be used to test for specific
event types.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fsnotify.c             | 7 +++----
 include/linux/fsnotify_backend.h | 9 +++++++--
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 422fbc6dffde..7391a02bf723 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -196,7 +196,7 @@ static int send_to_group(struct inode *to_tell,
 			 struct fsnotify_iter_info *iter_info)
 {
 	struct fsnotify_group *group = NULL;
-	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
+	__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
 	__u32 marks_mask = 0;
 	__u32 marks_ignored_mask = 0;
 	struct fsnotify_mark *mark;
@@ -329,8 +329,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 	struct mount *mnt = NULL;
 	__u32 mnt_or_sb_mask = 0;
 	int ret = 0;
-	/* global tests shouldn't care about events on child only the specific event */
-	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
+	__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
 
 	if (data_is == FSNOTIFY_EVENT_PATH) {
 		mnt = real_mount(((const struct path *)data)->mnt);
@@ -396,7 +395,7 @@ static __init int fsnotify_init(void)
 {
 	int ret;
 
-	BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
+	BUG_ON(hweight32(ALL_FSNOTIFY_BITS) != 23);
 
 	ret = init_srcu_struct(&fsnotify_mark_srcu);
 	if (ret)
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 8e91341cbd8a..135b973e44d1 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -68,15 +68,20 @@
 
 #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM)
 
+/* Events that can be reported to backends */
 #define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \
 			     FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \
 			     FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \
 			     FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \
 			     FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
-			     FS_OPEN_PERM | FS_ACCESS_PERM | FS_EXCL_UNLINK | \
-			     FS_ISDIR | FS_IN_ONESHOT | FS_DN_RENAME | \
+			     FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME)
+
+/* Extra flags that may be reported with event or control handling of events */
+#define ALL_FSNOTIFY_FLAGS  (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \
 			     FS_DN_MULTISHOT | FS_EVENT_ON_CHILD)
 
+#define ALL_FSNOTIFY_BITS   (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS)
+
 struct fsnotify_group;
 struct fsnotify_event;
 struct fsnotify_mark;
-- 
cgit v1.2.3-71-gd317


From a72fd224e37bf6a0630bce302deebbccbc236ba2 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 4 Oct 2018 00:25:34 +0300
Subject: fanotify: simplify handling of FAN_ONDIR

fanotify mark add/remove code jumps through hoops to avoid setting the
FS_ISDIR in the commulative object mask.

That was just papering over a bug in fsnotify() handling of the FS_ISDIR
extra flag. This bug is now fixed, so all the hoops can be removed along
with the unneeded internal flag FAN_MARK_ONDIR.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify_user.c | 32 +++++---------------------------
 include/linux/fanotify.h           |  3 ---
 2 files changed, 5 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 15719d4aa4b5..34b511407035 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -506,18 +506,10 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
 
 	spin_lock(&fsn_mark->lock);
 	if (!(flags & FAN_MARK_IGNORED_MASK)) {
-		__u32 tmask = fsn_mark->mask & ~mask;
-
-		if (flags & FAN_MARK_ONDIR)
-			tmask &= ~FAN_ONDIR;
-
 		oldmask = fsn_mark->mask;
-		fsn_mark->mask = tmask;
+		fsn_mark->mask &= ~mask;
 	} else {
-		__u32 tmask = fsn_mark->ignored_mask & ~mask;
-		if (flags & FAN_MARK_ONDIR)
-			tmask &= ~FAN_ONDIR;
-		fsn_mark->ignored_mask = tmask;
+		fsn_mark->ignored_mask &= ~mask;
 	}
 	*destroy = !(fsn_mark->mask | fsn_mark->ignored_mask);
 	spin_unlock(&fsn_mark->lock);
@@ -586,19 +578,10 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
 
 	spin_lock(&fsn_mark->lock);
 	if (!(flags & FAN_MARK_IGNORED_MASK)) {
-		__u32 tmask = fsn_mark->mask | mask;
-
-		if (flags & FAN_MARK_ONDIR)
-			tmask |= FAN_ONDIR;
-
 		oldmask = fsn_mark->mask;
-		fsn_mark->mask = tmask;
+		fsn_mark->mask |= mask;
 	} else {
-		__u32 tmask = fsn_mark->ignored_mask | mask;
-		if (flags & FAN_MARK_ONDIR)
-			tmask |= FAN_ONDIR;
-
-		fsn_mark->ignored_mask = tmask;
+		fsn_mark->ignored_mask |= mask;
 		if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
 			fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
 	}
@@ -820,7 +803,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	struct fsnotify_group *group;
 	struct fd f;
 	struct path path;
-	u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD;
+	u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD | FAN_ONDIR;
 	unsigned int mark_type = flags & FAN_MARK_TYPE_MASK;
 	int ret;
 
@@ -857,11 +840,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		return -EINVAL;
 	}
 
-	if (mask & FAN_ONDIR) {
-		flags |= FAN_MARK_ONDIR;
-		mask &= ~FAN_ONDIR;
-	}
-
 	if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
 		valid_mask |= FAN_ALL_PERM_EVENTS;
 
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index e70fccc3757e..a8c3fc54276d 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -4,9 +4,6 @@
 
 #include <uapi/linux/fanotify.h>
 
-/* not valid from userspace, only kernel internal */
-#define FAN_MARK_ONDIR		0x80000000
-
 #define FAN_GROUP_FLAG(group, flag) \
 	((group)->fanotify_data.flags & (flag))
 
-- 
cgit v1.2.3-71-gd317


From 23c9deeb3285d34fd243abb3d6b9f07db60c3cf4 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 4 Oct 2018 00:25:35 +0300
Subject: fanotify: deprecate uapi FAN_ALL_* constants

We do not want to add new bits to the FAN_ALL_* uapi constants
because they have been exposed to userspace.  If there are programs
out there using these constants, those programs could break if
re-compiled with modified FAN_ALL_* constants and run on an old kernel.

We deprecate the uapi constants FAN_ALL_* and define new FANOTIFY_*
constants for internal use to replace them. New feature bits will be
added only to the new constants.

Cc: <linux-api@vger.kernel.org>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c      |  6 ++---
 fs/notify/fanotify/fanotify.h      |  2 +-
 fs/notify/fanotify/fanotify_user.c | 22 +++++++++---------
 include/linux/fanotify.h           | 47 ++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/fanotify.h      | 18 +++++++--------
 5 files changed, 71 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 94b52157bf8d..03498eb995be 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -131,8 +131,8 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
 	    !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
 		return false;
 
-	if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask &
-				 ~marks_ignored_mask)
+	if (event_mask & FANOTIFY_OUTGOING_EVENTS &
+	    marks_mask & ~marks_ignored_mask)
 		return true;
 
 	return false;
@@ -236,7 +236,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 	ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
 	if (ret) {
 		/* Permission events shouldn't be merged */
-		BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
+		BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
 		/* Our event wasn't used in the end. Free it. */
 		fsnotify_destroy_event(group, fsn_event);
 
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 8609ba06f474..88a8290a61cb 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -44,7 +44,7 @@ FANOTIFY_PE(struct fsnotify_event *fse)
 static inline bool fanotify_is_perm_event(u32 mask)
 {
 	return IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS) &&
-		mask & FAN_ALL_PERM_EVENTS;
+		mask & FANOTIFY_PERM_EVENTS;
 }
 
 static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 34b511407035..530e5e486105 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -131,7 +131,7 @@ static int fill_event_metadata(struct fsnotify_group *group,
 	metadata->metadata_len = FAN_EVENT_METADATA_LEN;
 	metadata->vers = FANOTIFY_METADATA_VERSION;
 	metadata->reserved = 0;
-	metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS;
+	metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS;
 	metadata->pid = pid_vnr(event->tgid);
 	if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
 		metadata->fd = FAN_NOFD;
@@ -395,7 +395,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	 */
 	while (!fsnotify_notify_queue_is_empty(group)) {
 		fsn_event = fsnotify_remove_first_event(group);
-		if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) {
+		if (!(fsn_event->mask & FANOTIFY_PERM_EVENTS)) {
 			spin_unlock(&group->notification_lock);
 			fsnotify_destroy_event(group, fsn_event);
 			spin_lock(&group->notification_lock);
@@ -691,9 +691,9 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		return -EPERM;
 
 #ifdef CONFIG_AUDITSYSCALL
-	if (flags & ~(FAN_ALL_INIT_FLAGS | FAN_ENABLE_AUDIT))
+	if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT))
 #else
-	if (flags & ~FAN_ALL_INIT_FLAGS)
+	if (flags & ~FANOTIFY_INIT_FLAGS)
 #endif
 		return -EINVAL;
 
@@ -745,7 +745,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	group->fanotify_data.f_flags = event_f_flags;
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
 	INIT_LIST_HEAD(&group->fanotify_data.access_list);
-	switch (flags & FAN_ALL_CLASS_BITS) {
+	switch (flags & FANOTIFY_CLASS_BITS) {
 	case FAN_CLASS_NOTIF:
 		group->priority = FS_PRIO_0;
 		break;
@@ -803,8 +803,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	struct fsnotify_group *group;
 	struct fd f;
 	struct path path;
-	u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD | FAN_ONDIR;
-	unsigned int mark_type = flags & FAN_MARK_TYPE_MASK;
+	u32 valid_mask = FANOTIFY_EVENTS | FAN_EVENT_ON_CHILD | FAN_ONDIR;
+	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
 	int ret;
 
 	pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
@@ -814,7 +814,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	if (mask & ((__u64)0xffffffff << 32))
 		return -EINVAL;
 
-	if (flags & ~FAN_ALL_MARK_FLAGS)
+	if (flags & ~FANOTIFY_MARK_FLAGS)
 		return -EINVAL;
 
 	switch (mark_type) {
@@ -833,7 +833,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 			return -EINVAL;
 		break;
 	case FAN_MARK_FLUSH:
-		if (flags & ~(FAN_MARK_TYPE_MASK | FAN_MARK_FLUSH))
+		if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH))
 			return -EINVAL;
 		break;
 	default:
@@ -841,7 +841,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	}
 
 	if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
-		valid_mask |= FAN_ALL_PERM_EVENTS;
+		valid_mask |= FANOTIFY_PERM_EVENTS;
 
 	if (mask & ~valid_mask)
 		return -EINVAL;
@@ -861,7 +861,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	 * allowed to set permissions events.
 	 */
 	ret = -EINVAL;
-	if (mask & FAN_ALL_PERM_EVENTS &&
+	if (mask & FANOTIFY_PERM_EVENTS &&
 	    group->priority == FS_PRIO_0)
 		goto fput_and_out;
 
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index a8c3fc54276d..4519b0988afe 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -7,4 +7,51 @@
 #define FAN_GROUP_FLAG(group, flag) \
 	((group)->fanotify_data.flags & (flag))
 
+/*
+ * Flags allowed to be passed from/to userspace.
+ *
+ * We intentionally do not add new bits to the old FAN_ALL_* constants, because
+ * they are uapi exposed constants. If there are programs out there using
+ * these constant, the programs may break if re-compiled with new uapi headers
+ * and then run on an old kernel.
+ */
+#define FANOTIFY_CLASS_BITS	(FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
+				 FAN_CLASS_PRE_CONTENT)
+
+#define FANOTIFY_INIT_FLAGS	(FANOTIFY_CLASS_BITS | \
+				 FAN_CLOEXEC | FAN_NONBLOCK | \
+				 FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS)
+
+#define FANOTIFY_MARK_TYPE_BITS	(FAN_MARK_INODE | FAN_MARK_MOUNT | \
+				 FAN_MARK_FILESYSTEM)
+
+#define FANOTIFY_MARK_FLAGS	(FANOTIFY_MARK_TYPE_BITS | \
+				 FAN_MARK_ADD | \
+				 FAN_MARK_REMOVE | \
+				 FAN_MARK_DONT_FOLLOW | \
+				 FAN_MARK_ONLYDIR | \
+				 FAN_MARK_IGNORED_MASK | \
+				 FAN_MARK_IGNORED_SURV_MODIFY | \
+				 FAN_MARK_FLUSH)
+
+/* Events that user can request to be notified on */
+#define FANOTIFY_EVENTS		(FAN_ACCESS | FAN_MODIFY | \
+				 FAN_CLOSE | FAN_OPEN)
+
+/* Events that require a permission response from user */
+#define FANOTIFY_PERM_EVENTS	(FAN_OPEN_PERM | FAN_ACCESS_PERM)
+
+/* Events that may be reported to user */
+#define FANOTIFY_OUTGOING_EVENTS	(FANOTIFY_EVENTS | \
+					 FANOTIFY_PERM_EVENTS | \
+					 FAN_Q_OVERFLOW)
+
+/* Do not use these old uapi constants internally */
+#undef FAN_ALL_CLASS_BITS
+#undef FAN_ALL_INIT_FLAGS
+#undef FAN_ALL_MARK_FLAGS
+#undef FAN_ALL_EVENTS
+#undef FAN_ALL_PERM_EVENTS
+#undef FAN_ALL_OUTGOING_EVENTS
+
 #endif /* _LINUX_FANOTIFY_H */
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index ad81234d1919..d0c05de670ef 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -31,6 +31,8 @@
 #define FAN_CLASS_NOTIF		0x00000000
 #define FAN_CLASS_CONTENT	0x00000004
 #define FAN_CLASS_PRE_CONTENT	0x00000008
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_CLASS_BITS	(FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
 				 FAN_CLASS_PRE_CONTENT)
 
@@ -38,6 +40,7 @@
 #define FAN_UNLIMITED_MARKS	0x00000020
 #define FAN_ENABLE_AUDIT	0x00000040
 
+/* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_INIT_FLAGS	(FAN_CLOEXEC | FAN_NONBLOCK | \
 				 FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\
 				 FAN_UNLIMITED_MARKS)
@@ -57,23 +60,18 @@
 #define FAN_MARK_INODE		0x00000000
 #define FAN_MARK_MOUNT		0x00000010
 #define FAN_MARK_FILESYSTEM	0x00000100
-#define FAN_MARK_TYPE_MASK	(FAN_MARK_INODE | FAN_MARK_MOUNT | \
-				 FAN_MARK_FILESYSTEM)
 
+/* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_MARK_FLAGS	(FAN_MARK_ADD |\
 				 FAN_MARK_REMOVE |\
 				 FAN_MARK_DONT_FOLLOW |\
 				 FAN_MARK_ONLYDIR |\
+				 FAN_MARK_MOUNT |\
 				 FAN_MARK_IGNORED_MASK |\
 				 FAN_MARK_IGNORED_SURV_MODIFY |\
-				 FAN_MARK_FLUSH|\
-				 FAN_MARK_TYPE_MASK)
+				 FAN_MARK_FLUSH)
 
-/*
- * All of the events - we build the list by hand so that we can add flags in
- * the future and not break backward compatibility.  Apps will get only the
- * events that they originally wanted.  Be sure to add new events here!
- */
+/* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_EVENTS (FAN_ACCESS |\
 			FAN_MODIFY |\
 			FAN_CLOSE |\
@@ -82,9 +80,11 @@
 /*
  * All events which require a permission response from userspace
  */
+/* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\
 			     FAN_ACCESS_PERM)
 
+/* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_OUTGOING_EVENTS	(FAN_ALL_EVENTS |\
 				 FAN_ALL_PERM_EVENTS |\
 				 FAN_Q_OVERFLOW)
-- 
cgit v1.2.3-71-gd317


From bdd5a46fe30653cb4d26c7c787a22159bf79eed9 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 4 Oct 2018 00:25:37 +0300
Subject: fanotify: add BUILD_BUG_ON() to count the bits of fanotify constants

Also define the FANOTIFY_EVENT_FLAGS consisting of the extra flags
FAN_ONDIR and FAN_ON_CHILD.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c      | 2 ++
 fs/notify/fanotify/fanotify_user.c | 5 ++++-
 include/linux/fanotify.h           | 6 ++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 03498eb995be..361e3a0a445c 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -205,6 +205,8 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 	BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
 	BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
 
+	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 10);
+
 	if (!fanotify_should_send_event(iter_info, mask, data, data_type))
 		return 0;
 
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 530e5e486105..14594e491d2b 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -803,7 +803,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	struct fsnotify_group *group;
 	struct fd f;
 	struct path path;
-	u32 valid_mask = FANOTIFY_EVENTS | FAN_EVENT_ON_CHILD | FAN_ONDIR;
+	u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
 	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
 	int ret;
 
@@ -944,6 +944,9 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
  */
 static int __init fanotify_user_setup(void)
 {
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 6);
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
+
 	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
 					 SLAB_PANIC|SLAB_ACCOUNT);
 	fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 4519b0988afe..caf55c67fc6c 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -41,11 +41,17 @@
 /* Events that require a permission response from user */
 #define FANOTIFY_PERM_EVENTS	(FAN_OPEN_PERM | FAN_ACCESS_PERM)
 
+/* Extra flags that may be reported with event or control handling of events */
+#define FANOTIFY_EVENT_FLAGS	(FAN_EVENT_ON_CHILD | FAN_ONDIR)
+
 /* Events that may be reported to user */
 #define FANOTIFY_OUTGOING_EVENTS	(FANOTIFY_EVENTS | \
 					 FANOTIFY_PERM_EVENTS | \
 					 FAN_Q_OVERFLOW)
 
+#define ALL_FANOTIFY_EVENT_BITS		(FANOTIFY_OUTGOING_EVENTS | \
+					 FANOTIFY_EVENT_FLAGS)
+
 /* Do not use these old uapi constants internally */
 #undef FAN_ALL_CLASS_BITS
 #undef FAN_ALL_INIT_FLAGS
-- 
cgit v1.2.3-71-gd317


From 817e9bc8cc04e5c70592525b96812c46c1aa1c46 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 4 Oct 2018 09:57:23 -0700
Subject: Revert "serial:serial_core: Allow use of CTS for PPS line discipline"

This reverts commit c550f01c810f2197c98e6e3103f81797f5e063be.

Turns out the samsung tty driver is mucking around in the "unused" port
fields and this patch breaks that code :(

So we need to fix that driver up before this can be accepted.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Steve Sakoman <steve@sakoman.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-tty |  9 -----
 drivers/tty/serial/serial_core.c    | 70 +------------------------------------
 include/linux/serial_core.h         |  3 +-
 3 files changed, 2 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-tty b/Documentation/ABI/testing/sysfs-tty
index 441105a75d1f..9eb3c2b6b040 100644
--- a/Documentation/ABI/testing/sysfs-tty
+++ b/Documentation/ABI/testing/sysfs-tty
@@ -154,12 +154,3 @@ Description:
 		 device specification. For example, when user sets 7bytes on
 		 16550A, which has 1/4/8/14 bytes trigger, the RX trigger is
 		 automatically changed to 4 bytes.
-
-What:		/sys/class/tty/ttyS0/pps_4wire
-Date:		September 2018
-Contact:	Steve Sakoman <steve@sakoman.com>
-Description:
-		 Shows/sets "4 wire" mode for the PPS input to the serial driver.
-		 For fully implemented serial ports PPS is normally provided
-		 on the DCD line. For partial "4 wire" implementations CTS is
-		 used instead of DCD.
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 0a4e6eeb5ff3..70402cdb4d8c 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2724,57 +2724,6 @@ static ssize_t uart_get_attr_iomem_reg_shift(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "%d\n", tmp.iomem_reg_shift);
 }
 
-static ssize_t pps_4wire_show(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct tty_port *port = dev_get_drvdata(dev);
-	struct uart_state *state = container_of(port, struct uart_state, port);
-	struct uart_port *uport;
-	int mode = 0;
-
-	mutex_lock(&port->mutex);
-	uport = uart_port_check(state);
-	if (!uport)
-		goto out;
-
-	mode = uport->pps_4wire;
-
-out:
-	mutex_unlock(&port->mutex);
-	return sprintf(buf, mode ? "yes\n" : "no\n");
-}
-
-static ssize_t pps_4wire_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct tty_port *port = dev_get_drvdata(dev);
-	struct uart_state *state = container_of(port, struct uart_state, port);
-	struct uart_port *uport;
-	bool mode;
-	int ret;
-
-	if (!count)
-		return -EINVAL;
-
-	ret = kstrtobool(buf, &mode);
-	if (ret < 0)
-		return ret;
-
-	mutex_lock(&port->mutex);
-	uport = uart_port_check(state);
-	if (!uport)
-		goto out;
-
-	spin_lock_irq(&uport->lock);
-	uport->pps_4wire = mode;
-	spin_unlock_irq(&uport->lock);
-
-out:
-	mutex_unlock(&port->mutex);
-	return count;
-}
-static DEVICE_ATTR_RW(pps_4wire);
-
 static DEVICE_ATTR(type, S_IRUSR | S_IRGRP, uart_get_attr_type, NULL);
 static DEVICE_ATTR(line, S_IRUSR | S_IRGRP, uart_get_attr_line, NULL);
 static DEVICE_ATTR(port, S_IRUSR | S_IRGRP, uart_get_attr_port, NULL);
@@ -2803,7 +2752,6 @@ static struct attribute *tty_dev_attrs[] = {
 	&dev_attr_io_type.attr,
 	&dev_attr_iomem_base.attr,
 	&dev_attr_iomem_reg_shift.attr,
-	&dev_attr_pps_4wire.attr,
 	NULL,
 	};
 
@@ -2860,9 +2808,6 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 		goto out;
 	}
 
-	/* assert that pps handling is done via DCD as default */
-	uport->pps_4wire = 0;
-
 	/*
 	 * If this port is a console, then the spinlock is already
 	 * initialised.
@@ -3038,7 +2983,7 @@ void uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 
 	lockdep_assert_held_once(&uport->lock);
 
-	if (tty && !uport->pps_4wire) {
+	if (tty) {
 		ld = tty_ldisc_ref(tty);
 		if (ld) {
 			if (ld->ops->dcd_change)
@@ -3067,21 +3012,8 @@ EXPORT_SYMBOL_GPL(uart_handle_dcd_change);
  */
 void uart_handle_cts_change(struct uart_port *uport, unsigned int status)
 {
-	struct tty_port *port = &uport->state->port;
-	struct tty_struct *tty = port->tty;
-	struct tty_ldisc *ld;
-
 	lockdep_assert_held_once(&uport->lock);
 
-	if (tty && uport->pps_4wire) {
-		ld = tty_ldisc_ref(tty);
-		if (ld) {
-			if (ld->ops->dcd_change)
-				ld->ops->dcd_change(tty, status);
-			tty_ldisc_deref(ld);
-		}
-	}
-
 	uport->icount.cts++;
 
 	if (uart_softcts_mode(uport)) {
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 4e2ba4894dcc..047fa67d039b 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -257,8 +257,7 @@ struct uart_port {
 	struct device		*dev;			/* parent device */
 	unsigned char		hub6;			/* this should be in the 8250 driver */
 	unsigned char		suspended;
-	unsigned char		pps_4wire;		/* CTS instead of DCD */
-	unsigned char		unused;
+	unsigned char		unused[2];
 	const char		*name;			/* port name */
 	struct attribute_group	*attr_group;		/* port specific attributes */
 	const struct attribute_group **tty_groups;	/* all attributes (serial core use only) */
-- 
cgit v1.2.3-71-gd317


From 1bb89893d4fa8275333b3ed74fb0379d63025f9a Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Fri, 14 Sep 2018 19:37:23 -0500
Subject: remoteproc: Add missing kernel-doc comment for auto-boot

The commit ddf711872c9d ("remoteproc: Introduce auto-boot flag")
introduced the auto-boot flag but missed adding the corresponding
kernel-doc comment. Add the same.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/remoteproc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index e3c5d856b6da..75f9ca05b865 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -439,6 +439,7 @@ struct rproc_dump_segment {
  * @cached_table: copy of the resource table
  * @table_sz: size of @cached_table
  * @has_iommu: flag to indicate if remote processor is behind an MMU
+ * @auto_boot: flag to indicate if remote processor should be auto-started
  * @dump_segments: list of segments in the firmware
  */
 struct rproc {
-- 
cgit v1.2.3-71-gd317


From d0a6a87e40da49cfc7954c491d3065a25a641b29 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 4 Oct 2018 00:25:38 +0300
Subject: fanotify: support reporting thread id instead of process id

In order to identify which thread triggered the event in a
multi-threaded program, add the FAN_REPORT_TID flag in fanotify_init to
opt-in for reporting the event creator's thread id information.

Signed-off-by: nixiaoming <nixiaoming@huawei.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c      | 9 ++++++---
 fs/notify/fanotify/fanotify.h      | 2 +-
 fs/notify/fanotify/fanotify_user.c | 4 ++--
 include/linux/fanotify.h           | 1 +
 include/uapi/linux/fanotify.h      | 3 +++
 5 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 361e3a0a445c..5769cf3ff035 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -25,7 +25,7 @@ static bool should_merge(struct fsnotify_event *old_fsn,
 	old = FANOTIFY_E(old_fsn);
 	new = FANOTIFY_E(new_fsn);
 
-	if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid &&
+	if (old_fsn->inode == new_fsn->inode && old->pid == new->pid &&
 	    old->path.mnt == new->path.mnt &&
 	    old->path.dentry == new->path.dentry)
 		return true;
@@ -171,7 +171,10 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
 		goto out;
 init: __maybe_unused
 	fsnotify_init_event(&event->fse, inode, mask);
-	event->tgid = get_pid(task_tgid(current));
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
+		event->pid = get_pid(task_pid(current));
+	else
+		event->pid = get_pid(task_tgid(current));
 	if (path) {
 		event->path = *path;
 		path_get(&event->path);
@@ -270,7 +273,7 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
 
 	event = FANOTIFY_E(fsn_event);
 	path_put(&event->path);
-	put_pid(event->tgid);
+	put_pid(event->pid);
 	if (fanotify_is_perm_event(fsn_event->mask)) {
 		kmem_cache_free(fanotify_perm_event_cachep,
 				FANOTIFY_PE(fsn_event));
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 88a8290a61cb..ea05b8a401e7 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -19,7 +19,7 @@ struct fanotify_event_info {
 	 * during this object's lifetime
 	 */
 	struct path path;
-	struct pid *tgid;
+	struct pid *pid;
 };
 
 /*
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 14594e491d2b..e03be5071362 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -132,7 +132,7 @@ static int fill_event_metadata(struct fsnotify_group *group,
 	metadata->vers = FANOTIFY_METADATA_VERSION;
 	metadata->reserved = 0;
 	metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS;
-	metadata->pid = pid_vnr(event->tgid);
+	metadata->pid = pid_vnr(event->pid);
 	if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
 		metadata->fd = FAN_NOFD;
 	else {
@@ -944,7 +944,7 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
  */
 static int __init fanotify_user_setup(void)
 {
-	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 6);
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 7);
 	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
 
 	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index caf55c67fc6c..a5a60691e48b 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -19,6 +19,7 @@
 				 FAN_CLASS_PRE_CONTENT)
 
 #define FANOTIFY_INIT_FLAGS	(FANOTIFY_CLASS_BITS | \
+				 FAN_REPORT_TID | \
 				 FAN_CLOEXEC | FAN_NONBLOCK | \
 				 FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS)
 
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index d0c05de670ef..b86740d1c50a 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -40,6 +40,9 @@
 #define FAN_UNLIMITED_MARKS	0x00000020
 #define FAN_ENABLE_AUDIT	0x00000040
 
+/* Flags to determine fanotify event format */
+#define FAN_REPORT_TID		0x00000100	/* event->pid is thread id */
+
 /* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_INIT_FLAGS	(FAN_CLOEXEC | FAN_NONBLOCK | \
 				 FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\
-- 
cgit v1.2.3-71-gd317


From edbee095fafb4b727b51032bdc41e345f95bbc20 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Sun, 7 Oct 2018 21:04:42 +0800
Subject: firmware: imx: add SCU firmware driver support

The System Controller Firmware (SCFW) is a low-level system function
which runs on a dedicated Cortex-M core to provide power, clock, and
resource management. It exists on some i.MX8 processors. e.g. i.MX8QM
(QM, QP), and i.MX8QX (QXP, DX).

This patch implements the SCU firmware IPC function and the common
message sending API sc_call_rpc.

Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Fabio Estevam <fabio.estevam@nxp.com>
Cc: Jassi Brar <jassisinghbrar@gmail.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 drivers/firmware/Kconfig           |   1 +
 drivers/firmware/Makefile          |   1 +
 drivers/firmware/imx/Kconfig       |  11 +
 drivers/firmware/imx/Makefile      |   2 +
 drivers/firmware/imx/imx-scu.c     | 270 ++++++++++++++++
 include/linux/firmware/imx/ipc.h   |  59 ++++
 include/linux/firmware/imx/sci.h   |  16 +
 include/linux/firmware/imx/types.h | 617 +++++++++++++++++++++++++++++++++++++
 8 files changed, 977 insertions(+)
 create mode 100644 drivers/firmware/imx/Kconfig
 create mode 100644 drivers/firmware/imx/Makefile
 create mode 100644 drivers/firmware/imx/imx-scu.c
 create mode 100644 include/linux/firmware/imx/ipc.h
 create mode 100644 include/linux/firmware/imx/sci.h
 create mode 100644 include/linux/firmware/imx/types.h

(limited to 'include/linux')

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 6e83880046d7..bd9e4e2c41db 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -289,6 +289,7 @@ config HAVE_ARM_SMCCC
 source "drivers/firmware/broadcom/Kconfig"
 source "drivers/firmware/google/Kconfig"
 source "drivers/firmware/efi/Kconfig"
+source "drivers/firmware/imx/Kconfig"
 source "drivers/firmware/meson/Kconfig"
 source "drivers/firmware/tegra/Kconfig"
 
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index e18a041cfc53..e1281d6a03d4 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -31,4 +31,5 @@ obj-y				+= meson/
 obj-$(CONFIG_GOOGLE_FIRMWARE)	+= google/
 obj-$(CONFIG_EFI)		+= efi/
 obj-$(CONFIG_UEFI_CPER)		+= efi/
+obj-y				+= imx/
 obj-y				+= tegra/
diff --git a/drivers/firmware/imx/Kconfig b/drivers/firmware/imx/Kconfig
new file mode 100644
index 000000000000..b170c2851e48
--- /dev/null
+++ b/drivers/firmware/imx/Kconfig
@@ -0,0 +1,11 @@
+config IMX_SCU
+	bool "IMX SCU Protocol driver"
+	depends on IMX_MBOX
+	help
+	  The System Controller Firmware (SCFW) is a low-level system function
+	  which runs on a dedicated Cortex-M core to provide power, clock, and
+	  resource management. It exists on some i.MX8 processors. e.g. i.MX8QM
+	  (QM, QP), and i.MX8QX (QXP, DX).
+
+	  This driver manages the IPC interface between host CPU and the
+	  SCU firmware running on M4.
diff --git a/drivers/firmware/imx/Makefile b/drivers/firmware/imx/Makefile
new file mode 100644
index 000000000000..9b1e2febb1aa
--- /dev/null
+++ b/drivers/firmware/imx/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_IMX_SCU)	+= imx-scu.o
diff --git a/drivers/firmware/imx/imx-scu.c b/drivers/firmware/imx/imx-scu.c
new file mode 100644
index 000000000000..2bb1a19c413f
--- /dev/null
+++ b/drivers/firmware/imx/imx-scu.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018 NXP
+ *  Author: Dong Aisheng <aisheng.dong@nxp.com>
+ *
+ * Implementation of the SCU IPC functions using MUs (client side).
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/firmware/imx/types.h>
+#include <linux/firmware/imx/ipc.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#define SCU_MU_CHAN_NUM		8
+#define MAX_RX_TIMEOUT		(msecs_to_jiffies(30))
+
+struct imx_sc_chan {
+	struct imx_sc_ipc *sc_ipc;
+
+	struct mbox_client cl;
+	struct mbox_chan *ch;
+	int idx;
+};
+
+struct imx_sc_ipc {
+	/* SCU uses 4 Tx and 4 Rx channels */
+	struct imx_sc_chan chans[SCU_MU_CHAN_NUM];
+	struct device *dev;
+	struct mutex lock;
+	struct completion done;
+
+	/* temporarily store the SCU msg */
+	u32 *msg;
+	u8 rx_size;
+	u8 count;
+};
+
+/*
+ * This type is used to indicate error response for most functions.
+ */
+enum imx_sc_error_codes {
+	IMX_SC_ERR_NONE = 0,	/* Success */
+	IMX_SC_ERR_VERSION = 1,	/* Incompatible API version */
+	IMX_SC_ERR_CONFIG = 2,	/* Configuration error */
+	IMX_SC_ERR_PARM = 3,	/* Bad parameter */
+	IMX_SC_ERR_NOACCESS = 4,	/* Permission error (no access) */
+	IMX_SC_ERR_LOCKED = 5,	/* Permission error (locked) */
+	IMX_SC_ERR_UNAVAILABLE = 6,	/* Unavailable (out of resources) */
+	IMX_SC_ERR_NOTFOUND = 7,	/* Not found */
+	IMX_SC_ERR_NOPOWER = 8,	/* No power */
+	IMX_SC_ERR_IPC = 9,		/* Generic IPC error */
+	IMX_SC_ERR_BUSY = 10,	/* Resource is currently busy/active */
+	IMX_SC_ERR_FAIL = 11,	/* General I/O failure */
+	IMX_SC_ERR_LAST
+};
+
+static int imx_sc_linux_errmap[IMX_SC_ERR_LAST] = {
+	0,	 /* IMX_SC_ERR_NONE */
+	-EINVAL, /* IMX_SC_ERR_VERSION */
+	-EINVAL, /* IMX_SC_ERR_CONFIG */
+	-EINVAL, /* IMX_SC_ERR_PARM */
+	-EACCES, /* IMX_SC_ERR_NOACCESS */
+	-EACCES, /* IMX_SC_ERR_LOCKED */
+	-ERANGE, /* IMX_SC_ERR_UNAVAILABLE */
+	-EEXIST, /* IMX_SC_ERR_NOTFOUND */
+	-EPERM,	 /* IMX_SC_ERR_NOPOWER */
+	-EPIPE,	 /* IMX_SC_ERR_IPC */
+	-EBUSY,	 /* IMX_SC_ERR_BUSY */
+	-EIO,	 /* IMX_SC_ERR_FAIL */
+};
+
+static struct imx_sc_ipc *imx_sc_ipc_handle;
+
+static inline int imx_sc_to_linux_errno(int errno)
+{
+	if (errno >= IMX_SC_ERR_NONE && errno < IMX_SC_ERR_LAST)
+		return imx_sc_linux_errmap[errno];
+	return -EIO;
+}
+
+/*
+ * Get the default handle used by SCU
+ */
+int imx_scu_get_handle(struct imx_sc_ipc **ipc)
+{
+	if (!imx_sc_ipc_handle)
+		return -EPROBE_DEFER;
+
+	*ipc = imx_sc_ipc_handle;
+	return 0;
+}
+EXPORT_SYMBOL(imx_scu_get_handle);
+
+static void imx_scu_rx_callback(struct mbox_client *c, void *msg)
+{
+	struct imx_sc_chan *sc_chan = container_of(c, struct imx_sc_chan, cl);
+	struct imx_sc_ipc *sc_ipc = sc_chan->sc_ipc;
+	struct imx_sc_rpc_msg *hdr;
+	u32 *data = msg;
+
+	if (sc_chan->idx == 0) {
+		hdr = msg;
+		sc_ipc->rx_size = hdr->size;
+		dev_dbg(sc_ipc->dev, "msg rx size %u\n", sc_ipc->rx_size);
+		if (sc_ipc->rx_size > 4)
+			dev_warn(sc_ipc->dev, "RPC does not support receiving over 4 words: %u\n",
+				 sc_ipc->rx_size);
+	}
+
+	sc_ipc->msg[sc_chan->idx] = *data;
+	sc_ipc->count++;
+
+	dev_dbg(sc_ipc->dev, "mu %u msg %u 0x%x\n", sc_chan->idx,
+		sc_ipc->count, *data);
+
+	if ((sc_ipc->rx_size != 0) && (sc_ipc->count == sc_ipc->rx_size))
+		complete(&sc_ipc->done);
+}
+
+static int imx_scu_ipc_write(struct imx_sc_ipc *sc_ipc, void *msg)
+{
+	struct imx_sc_rpc_msg *hdr = msg;
+	struct imx_sc_chan *sc_chan;
+	u32 *data = msg;
+	int ret;
+	int i;
+
+	/* Check size */
+	if (hdr->size > IMX_SC_RPC_MAX_MSG)
+		return -EINVAL;
+
+	dev_dbg(sc_ipc->dev, "RPC SVC %u FUNC %u SIZE %u\n", hdr->svc,
+		hdr->func, hdr->size);
+
+	for (i = 0; i < hdr->size; i++) {
+		sc_chan = &sc_ipc->chans[i % 4];
+		ret = mbox_send_message(sc_chan->ch, &data[i]);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * RPC command/response
+ */
+int imx_scu_call_rpc(struct imx_sc_ipc *sc_ipc, void *msg, bool have_resp)
+{
+	struct imx_sc_rpc_msg *hdr;
+	int ret;
+
+	if (WARN_ON(!sc_ipc || !msg))
+		return -EINVAL;
+
+	mutex_lock(&sc_ipc->lock);
+	reinit_completion(&sc_ipc->done);
+
+	sc_ipc->msg = msg;
+	sc_ipc->count = 0;
+	ret = imx_scu_ipc_write(sc_ipc, msg);
+	if (ret < 0) {
+		dev_err(sc_ipc->dev, "RPC send msg failed: %d\n", ret);
+		goto out;
+	}
+
+	if (have_resp) {
+		if (!wait_for_completion_timeout(&sc_ipc->done,
+						 MAX_RX_TIMEOUT)) {
+			dev_err(sc_ipc->dev, "RPC send msg timeout\n");
+			mutex_unlock(&sc_ipc->lock);
+			return -ETIMEDOUT;
+		}
+
+		/* response status is stored in hdr->func field */
+		hdr = msg;
+		ret = hdr->func;
+	}
+
+out:
+	mutex_unlock(&sc_ipc->lock);
+
+	dev_dbg(sc_ipc->dev, "RPC SVC done\n");
+
+	return imx_sc_to_linux_errno(ret);
+}
+EXPORT_SYMBOL(imx_scu_call_rpc);
+
+static int imx_scu_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct imx_sc_ipc *sc_ipc;
+	struct imx_sc_chan *sc_chan;
+	struct mbox_client *cl;
+	char *chan_name;
+	int ret;
+	int i;
+
+	sc_ipc = devm_kzalloc(dev, sizeof(*sc_ipc), GFP_KERNEL);
+	if (!sc_ipc)
+		return -ENOMEM;
+
+	for (i = 0; i < SCU_MU_CHAN_NUM; i++) {
+		if (i < 4)
+			chan_name = kasprintf(GFP_KERNEL, "tx%d", i);
+		else
+			chan_name = kasprintf(GFP_KERNEL, "rx%d", i - 4);
+
+		if (!chan_name)
+			return -ENOMEM;
+
+		sc_chan = &sc_ipc->chans[i];
+		cl = &sc_chan->cl;
+		cl->dev = dev;
+		cl->tx_block = false;
+		cl->knows_txdone = true;
+		cl->rx_callback = imx_scu_rx_callback;
+
+		sc_chan->sc_ipc = sc_ipc;
+		sc_chan->idx = i % 4;
+		sc_chan->ch = mbox_request_channel_byname(cl, chan_name);
+		if (IS_ERR(sc_chan->ch)) {
+			ret = PTR_ERR(sc_chan->ch);
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "Failed to request mbox chan %s ret %d\n",
+					chan_name, ret);
+			return ret;
+		}
+
+		dev_dbg(dev, "request mbox chan %s\n", chan_name);
+		/* chan_name is not used anymore by framework */
+		kfree(chan_name);
+	}
+
+	sc_ipc->dev = dev;
+	mutex_init(&sc_ipc->lock);
+	init_completion(&sc_ipc->done);
+
+	imx_sc_ipc_handle = sc_ipc;
+
+	dev_info(dev, "NXP i.MX SCU Initialized\n");
+
+	return devm_of_platform_populate(dev);
+}
+
+static const struct of_device_id imx_scu_match[] = {
+	{ .compatible = "fsl,imx-scu", },
+	{ /* Sentinel */ }
+};
+
+static struct platform_driver imx_scu_driver = {
+	.driver = {
+		.name = "imx-scu",
+		.of_match_table = imx_scu_match,
+	},
+	.probe = imx_scu_probe,
+};
+builtin_platform_driver(imx_scu_driver);
+
+MODULE_AUTHOR("Dong Aisheng <aisheng.dong@nxp.com>");
+MODULE_DESCRIPTION("IMX SCU firmware protocol driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/firmware/imx/ipc.h b/include/linux/firmware/imx/ipc.h
new file mode 100644
index 000000000000..6312c8cb084a
--- /dev/null
+++ b/include/linux/firmware/imx/ipc.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2018 NXP
+ *
+ * Header file for the IPC implementation.
+ */
+
+#ifndef _SC_IPC_H
+#define _SC_IPC_H
+
+#include <linux/device.h>
+#include <linux/types.h>
+
+#define IMX_SC_RPC_VERSION	1
+#define IMX_SC_RPC_MAX_MSG	8
+
+struct imx_sc_ipc;
+
+enum imx_sc_rpc_svc {
+	IMX_SC_RPC_SVC_UNKNOWN = 0,
+	IMX_SC_RPC_SVC_RETURN = 1,
+	IMX_SC_RPC_SVC_PM = 2,
+	IMX_SC_RPC_SVC_RM = 3,
+	IMX_SC_RPC_SVC_TIMER = 5,
+	IMX_SC_RPC_SVC_PAD = 6,
+	IMX_SC_RPC_SVC_MISC = 7,
+	IMX_SC_RPC_SVC_IRQ = 8,
+	IMX_SC_RPC_SVC_ABORT = 9
+};
+
+struct imx_sc_rpc_msg {
+	uint8_t ver;
+	uint8_t size;
+	uint8_t svc;
+	uint8_t func;
+};
+
+/*
+ * This is an function to send an RPC message over an IPC channel.
+ * It is called by client-side SCFW API function shims.
+ *
+ * @param[in]     ipc         IPC handle
+ * @param[in,out] msg         handle to a message
+ * @param[in]     have_resp   response flag
+ *
+ * If have_resp is true then this function waits for a response
+ * and returns the result in msg.
+ */
+int imx_scu_call_rpc(struct imx_sc_ipc *ipc, void *msg, bool have_resp);
+
+/*
+ * This function gets the default ipc handle used by SCU
+ *
+ * @param[out]	ipc	sc ipc handle
+ *
+ * @return Returns an error code (0 = success, failed if < 0)
+ */
+int imx_scu_get_handle(struct imx_sc_ipc **ipc);
+#endif /* _SC_IPC_H */
diff --git a/include/linux/firmware/imx/sci.h b/include/linux/firmware/imx/sci.h
new file mode 100644
index 000000000000..ff3227ad8d7c
--- /dev/null
+++ b/include/linux/firmware/imx/sci.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Copyright 2017~2018 NXP
+ *
+ * Header file containing the public System Controller Interface (SCI)
+ * definitions.
+ */
+
+#ifndef _SC_SCI_H
+#define _SC_SCI_H
+
+#include <linux/firmware/imx/ipc.h>
+#include <linux/firmware/imx/types.h>
+
+#endif /* _SC_SCI_H */
diff --git a/include/linux/firmware/imx/types.h b/include/linux/firmware/imx/types.h
new file mode 100644
index 000000000000..9cbf0c4a6069
--- /dev/null
+++ b/include/linux/firmware/imx/types.h
@@ -0,0 +1,617 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Copyright 2017~2018 NXP
+ *
+ * Header file containing types used across multiple service APIs.
+ */
+
+#ifndef _SC_TYPES_H
+#define _SC_TYPES_H
+
+/*
+ * This type is used to indicate a resource. Resources include peripherals
+ * and bus masters (but not memory regions). Note items from list should
+ * never be changed or removed (only added to at the end of the list).
+ */
+enum imx_sc_rsrc {
+	IMX_SC_R_A53 = 0,
+	IMX_SC_R_A53_0 = 1,
+	IMX_SC_R_A53_1 = 2,
+	IMX_SC_R_A53_2 = 3,
+	IMX_SC_R_A53_3 = 4,
+	IMX_SC_R_A72 = 5,
+	IMX_SC_R_A72_0 = 6,
+	IMX_SC_R_A72_1 = 7,
+	IMX_SC_R_A72_2 = 8,
+	IMX_SC_R_A72_3 = 9,
+	IMX_SC_R_CCI = 10,
+	IMX_SC_R_DB = 11,
+	IMX_SC_R_DRC_0 = 12,
+	IMX_SC_R_DRC_1 = 13,
+	IMX_SC_R_GIC_SMMU = 14,
+	IMX_SC_R_IRQSTR_M4_0 = 15,
+	IMX_SC_R_IRQSTR_M4_1 = 16,
+	IMX_SC_R_SMMU = 17,
+	IMX_SC_R_GIC = 18,
+	IMX_SC_R_DC_0_BLIT0 = 19,
+	IMX_SC_R_DC_0_BLIT1 = 20,
+	IMX_SC_R_DC_0_BLIT2 = 21,
+	IMX_SC_R_DC_0_BLIT_OUT = 22,
+	IMX_SC_R_DC_0_CAPTURE0 = 23,
+	IMX_SC_R_DC_0_CAPTURE1 = 24,
+	IMX_SC_R_DC_0_WARP = 25,
+	IMX_SC_R_DC_0_INTEGRAL0 = 26,
+	IMX_SC_R_DC_0_INTEGRAL1 = 27,
+	IMX_SC_R_DC_0_VIDEO0 = 28,
+	IMX_SC_R_DC_0_VIDEO1 = 29,
+	IMX_SC_R_DC_0_FRAC0 = 30,
+	IMX_SC_R_DC_0_FRAC1 = 31,
+	IMX_SC_R_DC_0 = 32,
+	IMX_SC_R_GPU_2_PID0 = 33,
+	IMX_SC_R_DC_0_PLL_0 = 34,
+	IMX_SC_R_DC_0_PLL_1 = 35,
+	IMX_SC_R_DC_1_BLIT0 = 36,
+	IMX_SC_R_DC_1_BLIT1 = 37,
+	IMX_SC_R_DC_1_BLIT2 = 38,
+	IMX_SC_R_DC_1_BLIT_OUT = 39,
+	IMX_SC_R_DC_1_CAPTURE0 = 40,
+	IMX_SC_R_DC_1_CAPTURE1 = 41,
+	IMX_SC_R_DC_1_WARP = 42,
+	IMX_SC_R_DC_1_INTEGRAL0 = 43,
+	IMX_SC_R_DC_1_INTEGRAL1 = 44,
+	IMX_SC_R_DC_1_VIDEO0 = 45,
+	IMX_SC_R_DC_1_VIDEO1 = 46,
+	IMX_SC_R_DC_1_FRAC0 = 47,
+	IMX_SC_R_DC_1_FRAC1 = 48,
+	IMX_SC_R_DC_1 = 49,
+	IMX_SC_R_GPU_3_PID0 = 50,
+	IMX_SC_R_DC_1_PLL_0 = 51,
+	IMX_SC_R_DC_1_PLL_1 = 52,
+	IMX_SC_R_SPI_0 = 53,
+	IMX_SC_R_SPI_1 = 54,
+	IMX_SC_R_SPI_2 = 55,
+	IMX_SC_R_SPI_3 = 56,
+	IMX_SC_R_UART_0 = 57,
+	IMX_SC_R_UART_1 = 58,
+	IMX_SC_R_UART_2 = 59,
+	IMX_SC_R_UART_3 = 60,
+	IMX_SC_R_UART_4 = 61,
+	IMX_SC_R_EMVSIM_0 = 62,
+	IMX_SC_R_EMVSIM_1 = 63,
+	IMX_SC_R_DMA_0_CH0 = 64,
+	IMX_SC_R_DMA_0_CH1 = 65,
+	IMX_SC_R_DMA_0_CH2 = 66,
+	IMX_SC_R_DMA_0_CH3 = 67,
+	IMX_SC_R_DMA_0_CH4 = 68,
+	IMX_SC_R_DMA_0_CH5 = 69,
+	IMX_SC_R_DMA_0_CH6 = 70,
+	IMX_SC_R_DMA_0_CH7 = 71,
+	IMX_SC_R_DMA_0_CH8 = 72,
+	IMX_SC_R_DMA_0_CH9 = 73,
+	IMX_SC_R_DMA_0_CH10 = 74,
+	IMX_SC_R_DMA_0_CH11 = 75,
+	IMX_SC_R_DMA_0_CH12 = 76,
+	IMX_SC_R_DMA_0_CH13 = 77,
+	IMX_SC_R_DMA_0_CH14 = 78,
+	IMX_SC_R_DMA_0_CH15 = 79,
+	IMX_SC_R_DMA_0_CH16 = 80,
+	IMX_SC_R_DMA_0_CH17 = 81,
+	IMX_SC_R_DMA_0_CH18 = 82,
+	IMX_SC_R_DMA_0_CH19 = 83,
+	IMX_SC_R_DMA_0_CH20 = 84,
+	IMX_SC_R_DMA_0_CH21 = 85,
+	IMX_SC_R_DMA_0_CH22 = 86,
+	IMX_SC_R_DMA_0_CH23 = 87,
+	IMX_SC_R_DMA_0_CH24 = 88,
+	IMX_SC_R_DMA_0_CH25 = 89,
+	IMX_SC_R_DMA_0_CH26 = 90,
+	IMX_SC_R_DMA_0_CH27 = 91,
+	IMX_SC_R_DMA_0_CH28 = 92,
+	IMX_SC_R_DMA_0_CH29 = 93,
+	IMX_SC_R_DMA_0_CH30 = 94,
+	IMX_SC_R_DMA_0_CH31 = 95,
+	IMX_SC_R_I2C_0 = 96,
+	IMX_SC_R_I2C_1 = 97,
+	IMX_SC_R_I2C_2 = 98,
+	IMX_SC_R_I2C_3 = 99,
+	IMX_SC_R_I2C_4 = 100,
+	IMX_SC_R_ADC_0 = 101,
+	IMX_SC_R_ADC_1 = 102,
+	IMX_SC_R_FTM_0 = 103,
+	IMX_SC_R_FTM_1 = 104,
+	IMX_SC_R_CAN_0 = 105,
+	IMX_SC_R_CAN_1 = 106,
+	IMX_SC_R_CAN_2 = 107,
+	IMX_SC_R_DMA_1_CH0 = 108,
+	IMX_SC_R_DMA_1_CH1 = 109,
+	IMX_SC_R_DMA_1_CH2 = 110,
+	IMX_SC_R_DMA_1_CH3 = 111,
+	IMX_SC_R_DMA_1_CH4 = 112,
+	IMX_SC_R_DMA_1_CH5 = 113,
+	IMX_SC_R_DMA_1_CH6 = 114,
+	IMX_SC_R_DMA_1_CH7 = 115,
+	IMX_SC_R_DMA_1_CH8 = 116,
+	IMX_SC_R_DMA_1_CH9 = 117,
+	IMX_SC_R_DMA_1_CH10 = 118,
+	IMX_SC_R_DMA_1_CH11 = 119,
+	IMX_SC_R_DMA_1_CH12 = 120,
+	IMX_SC_R_DMA_1_CH13 = 121,
+	IMX_SC_R_DMA_1_CH14 = 122,
+	IMX_SC_R_DMA_1_CH15 = 123,
+	IMX_SC_R_DMA_1_CH16 = 124,
+	IMX_SC_R_DMA_1_CH17 = 125,
+	IMX_SC_R_DMA_1_CH18 = 126,
+	IMX_SC_R_DMA_1_CH19 = 127,
+	IMX_SC_R_DMA_1_CH20 = 128,
+	IMX_SC_R_DMA_1_CH21 = 129,
+	IMX_SC_R_DMA_1_CH22 = 130,
+	IMX_SC_R_DMA_1_CH23 = 131,
+	IMX_SC_R_DMA_1_CH24 = 132,
+	IMX_SC_R_DMA_1_CH25 = 133,
+	IMX_SC_R_DMA_1_CH26 = 134,
+	IMX_SC_R_DMA_1_CH27 = 135,
+	IMX_SC_R_DMA_1_CH28 = 136,
+	IMX_SC_R_DMA_1_CH29 = 137,
+	IMX_SC_R_DMA_1_CH30 = 138,
+	IMX_SC_R_DMA_1_CH31 = 139,
+	IMX_SC_R_UNUSED1 = 140,
+	IMX_SC_R_UNUSED2 = 141,
+	IMX_SC_R_UNUSED3 = 142,
+	IMX_SC_R_UNUSED4 = 143,
+	IMX_SC_R_GPU_0_PID0 = 144,
+	IMX_SC_R_GPU_0_PID1 = 145,
+	IMX_SC_R_GPU_0_PID2 = 146,
+	IMX_SC_R_GPU_0_PID3 = 147,
+	IMX_SC_R_GPU_1_PID0 = 148,
+	IMX_SC_R_GPU_1_PID1 = 149,
+	IMX_SC_R_GPU_1_PID2 = 150,
+	IMX_SC_R_GPU_1_PID3 = 151,
+	IMX_SC_R_PCIE_A = 152,
+	IMX_SC_R_SERDES_0 = 153,
+	IMX_SC_R_MATCH_0 = 154,
+	IMX_SC_R_MATCH_1 = 155,
+	IMX_SC_R_MATCH_2 = 156,
+	IMX_SC_R_MATCH_3 = 157,
+	IMX_SC_R_MATCH_4 = 158,
+	IMX_SC_R_MATCH_5 = 159,
+	IMX_SC_R_MATCH_6 = 160,
+	IMX_SC_R_MATCH_7 = 161,
+	IMX_SC_R_MATCH_8 = 162,
+	IMX_SC_R_MATCH_9 = 163,
+	IMX_SC_R_MATCH_10 = 164,
+	IMX_SC_R_MATCH_11 = 165,
+	IMX_SC_R_MATCH_12 = 166,
+	IMX_SC_R_MATCH_13 = 167,
+	IMX_SC_R_MATCH_14 = 168,
+	IMX_SC_R_PCIE_B = 169,
+	IMX_SC_R_SATA_0 = 170,
+	IMX_SC_R_SERDES_1 = 171,
+	IMX_SC_R_HSIO_GPIO = 172,
+	IMX_SC_R_MATCH_15 = 173,
+	IMX_SC_R_MATCH_16 = 174,
+	IMX_SC_R_MATCH_17 = 175,
+	IMX_SC_R_MATCH_18 = 176,
+	IMX_SC_R_MATCH_19 = 177,
+	IMX_SC_R_MATCH_20 = 178,
+	IMX_SC_R_MATCH_21 = 179,
+	IMX_SC_R_MATCH_22 = 180,
+	IMX_SC_R_MATCH_23 = 181,
+	IMX_SC_R_MATCH_24 = 182,
+	IMX_SC_R_MATCH_25 = 183,
+	IMX_SC_R_MATCH_26 = 184,
+	IMX_SC_R_MATCH_27 = 185,
+	IMX_SC_R_MATCH_28 = 186,
+	IMX_SC_R_LCD_0 = 187,
+	IMX_SC_R_LCD_0_PWM_0 = 188,
+	IMX_SC_R_LCD_0_I2C_0 = 189,
+	IMX_SC_R_LCD_0_I2C_1 = 190,
+	IMX_SC_R_PWM_0 = 191,
+	IMX_SC_R_PWM_1 = 192,
+	IMX_SC_R_PWM_2 = 193,
+	IMX_SC_R_PWM_3 = 194,
+	IMX_SC_R_PWM_4 = 195,
+	IMX_SC_R_PWM_5 = 196,
+	IMX_SC_R_PWM_6 = 197,
+	IMX_SC_R_PWM_7 = 198,
+	IMX_SC_R_GPIO_0 = 199,
+	IMX_SC_R_GPIO_1 = 200,
+	IMX_SC_R_GPIO_2 = 201,
+	IMX_SC_R_GPIO_3 = 202,
+	IMX_SC_R_GPIO_4 = 203,
+	IMX_SC_R_GPIO_5 = 204,
+	IMX_SC_R_GPIO_6 = 205,
+	IMX_SC_R_GPIO_7 = 206,
+	IMX_SC_R_GPT_0 = 207,
+	IMX_SC_R_GPT_1 = 208,
+	IMX_SC_R_GPT_2 = 209,
+	IMX_SC_R_GPT_3 = 210,
+	IMX_SC_R_GPT_4 = 211,
+	IMX_SC_R_KPP = 212,
+	IMX_SC_R_MU_0A = 213,
+	IMX_SC_R_MU_1A = 214,
+	IMX_SC_R_MU_2A = 215,
+	IMX_SC_R_MU_3A = 216,
+	IMX_SC_R_MU_4A = 217,
+	IMX_SC_R_MU_5A = 218,
+	IMX_SC_R_MU_6A = 219,
+	IMX_SC_R_MU_7A = 220,
+	IMX_SC_R_MU_8A = 221,
+	IMX_SC_R_MU_9A = 222,
+	IMX_SC_R_MU_10A = 223,
+	IMX_SC_R_MU_11A = 224,
+	IMX_SC_R_MU_12A = 225,
+	IMX_SC_R_MU_13A = 226,
+	IMX_SC_R_MU_5B = 227,
+	IMX_SC_R_MU_6B = 228,
+	IMX_SC_R_MU_7B = 229,
+	IMX_SC_R_MU_8B = 230,
+	IMX_SC_R_MU_9B = 231,
+	IMX_SC_R_MU_10B = 232,
+	IMX_SC_R_MU_11B = 233,
+	IMX_SC_R_MU_12B = 234,
+	IMX_SC_R_MU_13B = 235,
+	IMX_SC_R_ROM_0 = 236,
+	IMX_SC_R_FSPI_0 = 237,
+	IMX_SC_R_FSPI_1 = 238,
+	IMX_SC_R_IEE = 239,
+	IMX_SC_R_IEE_R0 = 240,
+	IMX_SC_R_IEE_R1 = 241,
+	IMX_SC_R_IEE_R2 = 242,
+	IMX_SC_R_IEE_R3 = 243,
+	IMX_SC_R_IEE_R4 = 244,
+	IMX_SC_R_IEE_R5 = 245,
+	IMX_SC_R_IEE_R6 = 246,
+	IMX_SC_R_IEE_R7 = 247,
+	IMX_SC_R_SDHC_0 = 248,
+	IMX_SC_R_SDHC_1 = 249,
+	IMX_SC_R_SDHC_2 = 250,
+	IMX_SC_R_ENET_0 = 251,
+	IMX_SC_R_ENET_1 = 252,
+	IMX_SC_R_MLB_0 = 253,
+	IMX_SC_R_DMA_2_CH0 = 254,
+	IMX_SC_R_DMA_2_CH1 = 255,
+	IMX_SC_R_DMA_2_CH2 = 256,
+	IMX_SC_R_DMA_2_CH3 = 257,
+	IMX_SC_R_DMA_2_CH4 = 258,
+	IMX_SC_R_USB_0 = 259,
+	IMX_SC_R_USB_1 = 260,
+	IMX_SC_R_USB_0_PHY = 261,
+	IMX_SC_R_USB_2 = 262,
+	IMX_SC_R_USB_2_PHY = 263,
+	IMX_SC_R_DTCP = 264,
+	IMX_SC_R_NAND = 265,
+	IMX_SC_R_LVDS_0 = 266,
+	IMX_SC_R_LVDS_0_PWM_0 = 267,
+	IMX_SC_R_LVDS_0_I2C_0 = 268,
+	IMX_SC_R_LVDS_0_I2C_1 = 269,
+	IMX_SC_R_LVDS_1 = 270,
+	IMX_SC_R_LVDS_1_PWM_0 = 271,
+	IMX_SC_R_LVDS_1_I2C_0 = 272,
+	IMX_SC_R_LVDS_1_I2C_1 = 273,
+	IMX_SC_R_LVDS_2 = 274,
+	IMX_SC_R_LVDS_2_PWM_0 = 275,
+	IMX_SC_R_LVDS_2_I2C_0 = 276,
+	IMX_SC_R_LVDS_2_I2C_1 = 277,
+	IMX_SC_R_M4_0_PID0 = 278,
+	IMX_SC_R_M4_0_PID1 = 279,
+	IMX_SC_R_M4_0_PID2 = 280,
+	IMX_SC_R_M4_0_PID3 = 281,
+	IMX_SC_R_M4_0_PID4 = 282,
+	IMX_SC_R_M4_0_RGPIO = 283,
+	IMX_SC_R_M4_0_SEMA42 = 284,
+	IMX_SC_R_M4_0_TPM = 285,
+	IMX_SC_R_M4_0_PIT = 286,
+	IMX_SC_R_M4_0_UART = 287,
+	IMX_SC_R_M4_0_I2C = 288,
+	IMX_SC_R_M4_0_INTMUX = 289,
+	IMX_SC_R_M4_0_SIM = 290,
+	IMX_SC_R_M4_0_WDOG = 291,
+	IMX_SC_R_M4_0_MU_0B = 292,
+	IMX_SC_R_M4_0_MU_0A0 = 293,
+	IMX_SC_R_M4_0_MU_0A1 = 294,
+	IMX_SC_R_M4_0_MU_0A2 = 295,
+	IMX_SC_R_M4_0_MU_0A3 = 296,
+	IMX_SC_R_M4_0_MU_1A = 297,
+	IMX_SC_R_M4_1_PID0 = 298,
+	IMX_SC_R_M4_1_PID1 = 299,
+	IMX_SC_R_M4_1_PID2 = 300,
+	IMX_SC_R_M4_1_PID3 = 301,
+	IMX_SC_R_M4_1_PID4 = 302,
+	IMX_SC_R_M4_1_RGPIO = 303,
+	IMX_SC_R_M4_1_SEMA42 = 304,
+	IMX_SC_R_M4_1_TPM = 305,
+	IMX_SC_R_M4_1_PIT = 306,
+	IMX_SC_R_M4_1_UART = 307,
+	IMX_SC_R_M4_1_I2C = 308,
+	IMX_SC_R_M4_1_INTMUX = 309,
+	IMX_SC_R_M4_1_SIM = 310,
+	IMX_SC_R_M4_1_WDOG = 311,
+	IMX_SC_R_M4_1_MU_0B = 312,
+	IMX_SC_R_M4_1_MU_0A0 = 313,
+	IMX_SC_R_M4_1_MU_0A1 = 314,
+	IMX_SC_R_M4_1_MU_0A2 = 315,
+	IMX_SC_R_M4_1_MU_0A3 = 316,
+	IMX_SC_R_M4_1_MU_1A = 317,
+	IMX_SC_R_SAI_0 = 318,
+	IMX_SC_R_SAI_1 = 319,
+	IMX_SC_R_SAI_2 = 320,
+	IMX_SC_R_IRQSTR_SCU2 = 321,
+	IMX_SC_R_IRQSTR_DSP = 322,
+	IMX_SC_R_UNUSED5 = 323,
+	IMX_SC_R_UNUSED6 = 324,
+	IMX_SC_R_AUDIO_PLL_0 = 325,
+	IMX_SC_R_PI_0 = 326,
+	IMX_SC_R_PI_0_PWM_0 = 327,
+	IMX_SC_R_PI_0_PWM_1 = 328,
+	IMX_SC_R_PI_0_I2C_0 = 329,
+	IMX_SC_R_PI_0_PLL = 330,
+	IMX_SC_R_PI_1 = 331,
+	IMX_SC_R_PI_1_PWM_0 = 332,
+	IMX_SC_R_PI_1_PWM_1 = 333,
+	IMX_SC_R_PI_1_I2C_0 = 334,
+	IMX_SC_R_PI_1_PLL = 335,
+	IMX_SC_R_SC_PID0 = 336,
+	IMX_SC_R_SC_PID1 = 337,
+	IMX_SC_R_SC_PID2 = 338,
+	IMX_SC_R_SC_PID3 = 339,
+	IMX_SC_R_SC_PID4 = 340,
+	IMX_SC_R_SC_SEMA42 = 341,
+	IMX_SC_R_SC_TPM = 342,
+	IMX_SC_R_SC_PIT = 343,
+	IMX_SC_R_SC_UART = 344,
+	IMX_SC_R_SC_I2C = 345,
+	IMX_SC_R_SC_MU_0B = 346,
+	IMX_SC_R_SC_MU_0A0 = 347,
+	IMX_SC_R_SC_MU_0A1 = 348,
+	IMX_SC_R_SC_MU_0A2 = 349,
+	IMX_SC_R_SC_MU_0A3 = 350,
+	IMX_SC_R_SC_MU_1A = 351,
+	IMX_SC_R_SYSCNT_RD = 352,
+	IMX_SC_R_SYSCNT_CMP = 353,
+	IMX_SC_R_DEBUG = 354,
+	IMX_SC_R_SYSTEM = 355,
+	IMX_SC_R_SNVS = 356,
+	IMX_SC_R_OTP = 357,
+	IMX_SC_R_VPU_PID0 = 358,
+	IMX_SC_R_VPU_PID1 = 359,
+	IMX_SC_R_VPU_PID2 = 360,
+	IMX_SC_R_VPU_PID3 = 361,
+	IMX_SC_R_VPU_PID4 = 362,
+	IMX_SC_R_VPU_PID5 = 363,
+	IMX_SC_R_VPU_PID6 = 364,
+	IMX_SC_R_VPU_PID7 = 365,
+	IMX_SC_R_VPU_UART = 366,
+	IMX_SC_R_VPUCORE = 367,
+	IMX_SC_R_VPUCORE_0 = 368,
+	IMX_SC_R_VPUCORE_1 = 369,
+	IMX_SC_R_VPUCORE_2 = 370,
+	IMX_SC_R_VPUCORE_3 = 371,
+	IMX_SC_R_DMA_4_CH0 = 372,
+	IMX_SC_R_DMA_4_CH1 = 373,
+	IMX_SC_R_DMA_4_CH2 = 374,
+	IMX_SC_R_DMA_4_CH3 = 375,
+	IMX_SC_R_DMA_4_CH4 = 376,
+	IMX_SC_R_ISI_CH0 = 377,
+	IMX_SC_R_ISI_CH1 = 378,
+	IMX_SC_R_ISI_CH2 = 379,
+	IMX_SC_R_ISI_CH3 = 380,
+	IMX_SC_R_ISI_CH4 = 381,
+	IMX_SC_R_ISI_CH5 = 382,
+	IMX_SC_R_ISI_CH6 = 383,
+	IMX_SC_R_ISI_CH7 = 384,
+	IMX_SC_R_MJPEG_DEC_S0 = 385,
+	IMX_SC_R_MJPEG_DEC_S1 = 386,
+	IMX_SC_R_MJPEG_DEC_S2 = 387,
+	IMX_SC_R_MJPEG_DEC_S3 = 388,
+	IMX_SC_R_MJPEG_ENC_S0 = 389,
+	IMX_SC_R_MJPEG_ENC_S1 = 390,
+	IMX_SC_R_MJPEG_ENC_S2 = 391,
+	IMX_SC_R_MJPEG_ENC_S3 = 392,
+	IMX_SC_R_MIPI_0 = 393,
+	IMX_SC_R_MIPI_0_PWM_0 = 394,
+	IMX_SC_R_MIPI_0_I2C_0 = 395,
+	IMX_SC_R_MIPI_0_I2C_1 = 396,
+	IMX_SC_R_MIPI_1 = 397,
+	IMX_SC_R_MIPI_1_PWM_0 = 398,
+	IMX_SC_R_MIPI_1_I2C_0 = 399,
+	IMX_SC_R_MIPI_1_I2C_1 = 400,
+	IMX_SC_R_CSI_0 = 401,
+	IMX_SC_R_CSI_0_PWM_0 = 402,
+	IMX_SC_R_CSI_0_I2C_0 = 403,
+	IMX_SC_R_CSI_1 = 404,
+	IMX_SC_R_CSI_1_PWM_0 = 405,
+	IMX_SC_R_CSI_1_I2C_0 = 406,
+	IMX_SC_R_HDMI = 407,
+	IMX_SC_R_HDMI_I2S = 408,
+	IMX_SC_R_HDMI_I2C_0 = 409,
+	IMX_SC_R_HDMI_PLL_0 = 410,
+	IMX_SC_R_HDMI_RX = 411,
+	IMX_SC_R_HDMI_RX_BYPASS = 412,
+	IMX_SC_R_HDMI_RX_I2C_0 = 413,
+	IMX_SC_R_ASRC_0 = 414,
+	IMX_SC_R_ESAI_0 = 415,
+	IMX_SC_R_SPDIF_0 = 416,
+	IMX_SC_R_SPDIF_1 = 417,
+	IMX_SC_R_SAI_3 = 418,
+	IMX_SC_R_SAI_4 = 419,
+	IMX_SC_R_SAI_5 = 420,
+	IMX_SC_R_GPT_5 = 421,
+	IMX_SC_R_GPT_6 = 422,
+	IMX_SC_R_GPT_7 = 423,
+	IMX_SC_R_GPT_8 = 424,
+	IMX_SC_R_GPT_9 = 425,
+	IMX_SC_R_GPT_10 = 426,
+	IMX_SC_R_DMA_2_CH5 = 427,
+	IMX_SC_R_DMA_2_CH6 = 428,
+	IMX_SC_R_DMA_2_CH7 = 429,
+	IMX_SC_R_DMA_2_CH8 = 430,
+	IMX_SC_R_DMA_2_CH9 = 431,
+	IMX_SC_R_DMA_2_CH10 = 432,
+	IMX_SC_R_DMA_2_CH11 = 433,
+	IMX_SC_R_DMA_2_CH12 = 434,
+	IMX_SC_R_DMA_2_CH13 = 435,
+	IMX_SC_R_DMA_2_CH14 = 436,
+	IMX_SC_R_DMA_2_CH15 = 437,
+	IMX_SC_R_DMA_2_CH16 = 438,
+	IMX_SC_R_DMA_2_CH17 = 439,
+	IMX_SC_R_DMA_2_CH18 = 440,
+	IMX_SC_R_DMA_2_CH19 = 441,
+	IMX_SC_R_DMA_2_CH20 = 442,
+	IMX_SC_R_DMA_2_CH21 = 443,
+	IMX_SC_R_DMA_2_CH22 = 444,
+	IMX_SC_R_DMA_2_CH23 = 445,
+	IMX_SC_R_DMA_2_CH24 = 446,
+	IMX_SC_R_DMA_2_CH25 = 447,
+	IMX_SC_R_DMA_2_CH26 = 448,
+	IMX_SC_R_DMA_2_CH27 = 449,
+	IMX_SC_R_DMA_2_CH28 = 450,
+	IMX_SC_R_DMA_2_CH29 = 451,
+	IMX_SC_R_DMA_2_CH30 = 452,
+	IMX_SC_R_DMA_2_CH31 = 453,
+	IMX_SC_R_ASRC_1 = 454,
+	IMX_SC_R_ESAI_1 = 455,
+	IMX_SC_R_SAI_6 = 456,
+	IMX_SC_R_SAI_7 = 457,
+	IMX_SC_R_AMIX = 458,
+	IMX_SC_R_MQS_0 = 459,
+	IMX_SC_R_DMA_3_CH0 = 460,
+	IMX_SC_R_DMA_3_CH1 = 461,
+	IMX_SC_R_DMA_3_CH2 = 462,
+	IMX_SC_R_DMA_3_CH3 = 463,
+	IMX_SC_R_DMA_3_CH4 = 464,
+	IMX_SC_R_DMA_3_CH5 = 465,
+	IMX_SC_R_DMA_3_CH6 = 466,
+	IMX_SC_R_DMA_3_CH7 = 467,
+	IMX_SC_R_DMA_3_CH8 = 468,
+	IMX_SC_R_DMA_3_CH9 = 469,
+	IMX_SC_R_DMA_3_CH10 = 470,
+	IMX_SC_R_DMA_3_CH11 = 471,
+	IMX_SC_R_DMA_3_CH12 = 472,
+	IMX_SC_R_DMA_3_CH13 = 473,
+	IMX_SC_R_DMA_3_CH14 = 474,
+	IMX_SC_R_DMA_3_CH15 = 475,
+	IMX_SC_R_DMA_3_CH16 = 476,
+	IMX_SC_R_DMA_3_CH17 = 477,
+	IMX_SC_R_DMA_3_CH18 = 478,
+	IMX_SC_R_DMA_3_CH19 = 479,
+	IMX_SC_R_DMA_3_CH20 = 480,
+	IMX_SC_R_DMA_3_CH21 = 481,
+	IMX_SC_R_DMA_3_CH22 = 482,
+	IMX_SC_R_DMA_3_CH23 = 483,
+	IMX_SC_R_DMA_3_CH24 = 484,
+	IMX_SC_R_DMA_3_CH25 = 485,
+	IMX_SC_R_DMA_3_CH26 = 486,
+	IMX_SC_R_DMA_3_CH27 = 487,
+	IMX_SC_R_DMA_3_CH28 = 488,
+	IMX_SC_R_DMA_3_CH29 = 489,
+	IMX_SC_R_DMA_3_CH30 = 490,
+	IMX_SC_R_DMA_3_CH31 = 491,
+	IMX_SC_R_AUDIO_PLL_1 = 492,
+	IMX_SC_R_AUDIO_CLK_0 = 493,
+	IMX_SC_R_AUDIO_CLK_1 = 494,
+	IMX_SC_R_MCLK_OUT_0 = 495,
+	IMX_SC_R_MCLK_OUT_1 = 496,
+	IMX_SC_R_PMIC_0 = 497,
+	IMX_SC_R_PMIC_1 = 498,
+	IMX_SC_R_SECO = 499,
+	IMX_SC_R_CAAM_JR1 = 500,
+	IMX_SC_R_CAAM_JR2 = 501,
+	IMX_SC_R_CAAM_JR3 = 502,
+	IMX_SC_R_SECO_MU_2 = 503,
+	IMX_SC_R_SECO_MU_3 = 504,
+	IMX_SC_R_SECO_MU_4 = 505,
+	IMX_SC_R_HDMI_RX_PWM_0 = 506,
+	IMX_SC_R_A35 = 507,
+	IMX_SC_R_A35_0 = 508,
+	IMX_SC_R_A35_1 = 509,
+	IMX_SC_R_A35_2 = 510,
+	IMX_SC_R_A35_3 = 511,
+	IMX_SC_R_DSP = 512,
+	IMX_SC_R_DSP_RAM = 513,
+	IMX_SC_R_CAAM_JR1_OUT = 514,
+	IMX_SC_R_CAAM_JR2_OUT = 515,
+	IMX_SC_R_CAAM_JR3_OUT = 516,
+	IMX_SC_R_VPU_DEC_0 = 517,
+	IMX_SC_R_VPU_ENC_0 = 518,
+	IMX_SC_R_CAAM_JR0 = 519,
+	IMX_SC_R_CAAM_JR0_OUT = 520,
+	IMX_SC_R_PMIC_2 = 521,
+	IMX_SC_R_DBLOGIC = 522,
+	IMX_SC_R_HDMI_PLL_1 = 523,
+	IMX_SC_R_BOARD_R0 = 524,
+	IMX_SC_R_BOARD_R1 = 525,
+	IMX_SC_R_BOARD_R2 = 526,
+	IMX_SC_R_BOARD_R3 = 527,
+	IMX_SC_R_BOARD_R4 = 528,
+	IMX_SC_R_BOARD_R5 = 529,
+	IMX_SC_R_BOARD_R6 = 530,
+	IMX_SC_R_BOARD_R7 = 531,
+	IMX_SC_R_MJPEG_DEC_MP = 532,
+	IMX_SC_R_MJPEG_ENC_MP = 533,
+	IMX_SC_R_VPU_TS_0 = 534,
+	IMX_SC_R_VPU_MU_0 = 535,
+	IMX_SC_R_VPU_MU_1 = 536,
+	IMX_SC_R_VPU_MU_2 = 537,
+	IMX_SC_R_VPU_MU_3 = 538,
+	IMX_SC_R_VPU_ENC_1 = 539,
+	IMX_SC_R_VPU = 540,
+	IMX_SC_R_LAST
+};
+
+/* NOTE - please add by replacing some of the UNUSED from above! */
+
+/*
+ * This type is used to indicate a control.
+ */
+enum imx_sc_ctrl {
+	IMX_SC_C_TEMP = 0,
+	IMX_SC_C_TEMP_HI = 1,
+	IMX_SC_C_TEMP_LOW = 2,
+	IMX_SC_C_PXL_LINK_MST1_ADDR = 3,
+	IMX_SC_C_PXL_LINK_MST2_ADDR = 4,
+	IMX_SC_C_PXL_LINK_MST_ENB = 5,
+	IMX_SC_C_PXL_LINK_MST1_ENB = 6,
+	IMX_SC_C_PXL_LINK_MST2_ENB = 7,
+	IMX_SC_C_PXL_LINK_SLV1_ADDR = 8,
+	IMX_SC_C_PXL_LINK_SLV2_ADDR = 9,
+	IMX_SC_C_PXL_LINK_MST_VLD = 10,
+	IMX_SC_C_PXL_LINK_MST1_VLD = 11,
+	IMX_SC_C_PXL_LINK_MST2_VLD = 12,
+	IMX_SC_C_SINGLE_MODE = 13,
+	IMX_SC_C_ID = 14,
+	IMX_SC_C_PXL_CLK_POLARITY = 15,
+	IMX_SC_C_LINESTATE = 16,
+	IMX_SC_C_PCIE_G_RST = 17,
+	IMX_SC_C_PCIE_BUTTON_RST = 18,
+	IMX_SC_C_PCIE_PERST = 19,
+	IMX_SC_C_PHY_RESET = 20,
+	IMX_SC_C_PXL_LINK_RATE_CORRECTION = 21,
+	IMX_SC_C_PANIC = 22,
+	IMX_SC_C_PRIORITY_GROUP = 23,
+	IMX_SC_C_TXCLK = 24,
+	IMX_SC_C_CLKDIV = 25,
+	IMX_SC_C_DISABLE_50 = 26,
+	IMX_SC_C_DISABLE_125 = 27,
+	IMX_SC_C_SEL_125 = 28,
+	IMX_SC_C_MODE = 29,
+	IMX_SC_C_SYNC_CTRL0 = 30,
+	IMX_SC_C_KACHUNK_CNT = 31,
+	IMX_SC_C_KACHUNK_SEL = 32,
+	IMX_SC_C_SYNC_CTRL1 = 33,
+	IMX_SC_C_DPI_RESET = 34,
+	IMX_SC_C_MIPI_RESET = 35,
+	IMX_SC_C_DUAL_MODE = 36,
+	IMX_SC_C_VOLTAGE = 37,
+	IMX_SC_C_PXL_LINK_SEL = 38,
+	IMX_SC_C_OFS_SEL = 39,
+	IMX_SC_C_OFS_AUDIO = 40,
+	IMX_SC_C_OFS_PERIPH = 41,
+	IMX_SC_C_OFS_IRQ = 42,
+	IMX_SC_C_RST0 = 43,
+	IMX_SC_C_RST1 = 44,
+	IMX_SC_C_SEL0 = 45,
+	IMX_SC_C_LAST
+};
+
+#endif /* _SC_TYPES_H */
-- 
cgit v1.2.3-71-gd317


From 15e1f2bc8b3b2d238b9e06b128d4a09d28f11733 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Sun, 7 Oct 2018 21:04:43 +0800
Subject: firmware: imx: add misc svc support

Add SCU MISC SVC support which provides misc control get/set functions.

Cc: Shawn Guo <shawnguo@kernel.org>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 drivers/firmware/imx/Makefile         |  2 +-
 drivers/firmware/imx/misc.c           | 99 +++++++++++++++++++++++++++++++++++
 include/linux/firmware/imx/sci.h      |  1 +
 include/linux/firmware/imx/svc/misc.h | 55 +++++++++++++++++++
 4 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 drivers/firmware/imx/misc.c
 create mode 100644 include/linux/firmware/imx/svc/misc.h

(limited to 'include/linux')

diff --git a/drivers/firmware/imx/Makefile b/drivers/firmware/imx/Makefile
index 9b1e2febb1aa..0ac04dfda8d4 100644
--- a/drivers/firmware/imx/Makefile
+++ b/drivers/firmware/imx/Makefile
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_IMX_SCU)	+= imx-scu.o
+obj-$(CONFIG_IMX_SCU)	+= imx-scu.o misc.o
diff --git a/drivers/firmware/imx/misc.c b/drivers/firmware/imx/misc.c
new file mode 100644
index 000000000000..97f5424dbac9
--- /dev/null
+++ b/drivers/firmware/imx/misc.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Copyright 2017~2018 NXP
+ *  Author: Dong Aisheng <aisheng.dong@nxp.com>
+ *
+ * File containing client-side RPC functions for the MISC service. These
+ * function are ported to clients that communicate to the SC.
+ *
+ */
+
+#include <linux/firmware/imx/svc/misc.h>
+
+struct imx_sc_msg_req_misc_set_ctrl {
+	struct imx_sc_rpc_msg hdr;
+	u32 ctrl;
+	u32 val;
+	u16 resource;
+} __packed;
+
+struct imx_sc_msg_req_misc_get_ctrl {
+	struct imx_sc_rpc_msg hdr;
+	u32 ctrl;
+	u16 resource;
+} __packed;
+
+struct imx_sc_msg_resp_misc_get_ctrl {
+	struct imx_sc_rpc_msg hdr;
+	u32 val;
+} __packed;
+
+/*
+ * This function sets a miscellaneous control value.
+ *
+ * @param[in]     ipc         IPC handle
+ * @param[in]     resource    resource the control is associated with
+ * @param[in]     ctrl        control to change
+ * @param[in]     val         value to apply to the control
+ *
+ * @return Returns 0 for success and < 0 for errors.
+ */
+
+int imx_sc_misc_set_control(struct imx_sc_ipc *ipc, u32 resource,
+			    u8 ctrl, u32 val)
+{
+	struct imx_sc_msg_req_misc_set_ctrl msg;
+	struct imx_sc_rpc_msg *hdr = &msg.hdr;
+
+	hdr->ver = IMX_SC_RPC_VERSION;
+	hdr->svc = (uint8_t)IMX_SC_RPC_SVC_MISC;
+	hdr->func = (uint8_t)IMX_SC_MISC_FUNC_SET_CONTROL;
+	hdr->size = 4;
+
+	msg.ctrl = ctrl;
+	msg.val = val;
+	msg.resource = resource;
+
+	return imx_scu_call_rpc(ipc, &msg, true);
+}
+EXPORT_SYMBOL(imx_sc_misc_set_control);
+
+/*
+ * This function gets a miscellaneous control value.
+ *
+ * @param[in]     ipc         IPC handle
+ * @param[in]     resource    resource the control is associated with
+ * @param[in]     ctrl        control to get
+ * @param[out]    val         pointer to return the control value
+ *
+ * @return Returns 0 for success and < 0 for errors.
+ */
+
+int imx_sc_misc_get_control(struct imx_sc_ipc *ipc, u32 resource,
+			    u8 ctrl, u32 *val)
+{
+	struct imx_sc_msg_req_misc_get_ctrl msg;
+	struct imx_sc_msg_resp_misc_get_ctrl *resp;
+	struct imx_sc_rpc_msg *hdr = &msg.hdr;
+	int ret;
+
+	hdr->ver = IMX_SC_RPC_VERSION;
+	hdr->svc = (uint8_t)IMX_SC_RPC_SVC_MISC;
+	hdr->func = (uint8_t)IMX_SC_MISC_FUNC_GET_CONTROL;
+	hdr->size = 3;
+
+	msg.ctrl = ctrl;
+	msg.resource = resource;
+
+	ret = imx_scu_call_rpc(ipc, &msg, true);
+	if (ret)
+		return ret;
+
+	resp = (struct imx_sc_msg_resp_misc_get_ctrl *)&msg;
+	if (val != NULL)
+		*val = resp->val;
+
+	return 0;
+}
+EXPORT_SYMBOL(imx_sc_misc_get_control);
diff --git a/include/linux/firmware/imx/sci.h b/include/linux/firmware/imx/sci.h
index ff3227ad8d7c..29ada609de03 100644
--- a/include/linux/firmware/imx/sci.h
+++ b/include/linux/firmware/imx/sci.h
@@ -13,4 +13,5 @@
 #include <linux/firmware/imx/ipc.h>
 #include <linux/firmware/imx/types.h>
 
+#include <linux/firmware/imx/svc/misc.h>
 #endif /* _SC_SCI_H */
diff --git a/include/linux/firmware/imx/svc/misc.h b/include/linux/firmware/imx/svc/misc.h
new file mode 100644
index 000000000000..e21c49aba92f
--- /dev/null
+++ b/include/linux/firmware/imx/svc/misc.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Copyright 2017~2018 NXP
+ *
+ * Header file containing the public API for the System Controller (SC)
+ * Miscellaneous (MISC) function.
+ *
+ * MISC_SVC (SVC) Miscellaneous Service
+ *
+ * Module for the Miscellaneous (MISC) service.
+ */
+
+#ifndef _SC_MISC_API_H
+#define _SC_MISC_API_H
+
+#include <linux/firmware/imx/sci.h>
+
+/*
+ * This type is used to indicate RPC MISC function calls.
+ */
+enum imx_misc_func {
+	IMX_SC_MISC_FUNC_UNKNOWN = 0,
+	IMX_SC_MISC_FUNC_SET_CONTROL = 1,
+	IMX_SC_MISC_FUNC_GET_CONTROL = 2,
+	IMX_SC_MISC_FUNC_SET_MAX_DMA_GROUP = 4,
+	IMX_SC_MISC_FUNC_SET_DMA_GROUP = 5,
+	IMX_SC_MISC_FUNC_SECO_IMAGE_LOAD = 8,
+	IMX_SC_MISC_FUNC_SECO_AUTHENTICATE = 9,
+	IMX_SC_MISC_FUNC_DEBUG_OUT = 10,
+	IMX_SC_MISC_FUNC_WAVEFORM_CAPTURE = 6,
+	IMX_SC_MISC_FUNC_BUILD_INFO = 15,
+	IMX_SC_MISC_FUNC_UNIQUE_ID = 19,
+	IMX_SC_MISC_FUNC_SET_ARI = 3,
+	IMX_SC_MISC_FUNC_BOOT_STATUS = 7,
+	IMX_SC_MISC_FUNC_BOOT_DONE = 14,
+	IMX_SC_MISC_FUNC_OTP_FUSE_READ = 11,
+	IMX_SC_MISC_FUNC_OTP_FUSE_WRITE = 17,
+	IMX_SC_MISC_FUNC_SET_TEMP = 12,
+	IMX_SC_MISC_FUNC_GET_TEMP = 13,
+	IMX_SC_MISC_FUNC_GET_BOOT_DEV = 16,
+	IMX_SC_MISC_FUNC_GET_BUTTON_STATUS = 18,
+};
+
+/*
+ * Control Functions
+ */
+
+int imx_sc_misc_set_control(struct imx_sc_ipc *ipc, u32 resource,
+			    u8 ctrl, u32 val);
+
+int imx_sc_misc_get_control(struct imx_sc_ipc *ipc, u32 resource,
+			    u8 ctrl, u32 *val);
+
+#endif /* _SC_MISC_API_H */
-- 
cgit v1.2.3-71-gd317


From 3b0296b8c893adb17b422179b9e779e4c32aa347 Mon Sep 17 00:00:00 2001
From: Rajan Vaja <rajan.vaja@xilinx.com>
Date: Mon, 8 Oct 2018 11:21:44 -0700
Subject: firmware: xilinx: Add zynqmp IOCTL API for device control

Add ZynqMP firmware IOCTL API to control and configure
devices like PLLs, SD, Gem, etc.

Signed-off-by: Rajan Vaja <rajan.vaja@xilinx.com>
Signed-off-by: Jolly Shah <jollys@xilinx.com>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 drivers/firmware/xilinx/zynqmp.c     | 42 ++++++++++++++++++++++++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h |  4 +++-
 2 files changed, 45 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 84b3fd2eca8b..9a1c72a9280f 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -428,6 +428,47 @@ static int zynqmp_pm_clock_getparent(u32 clock_id, u32 *parent_id)
 	return ret;
 }
 
+/**
+ * zynqmp_is_valid_ioctl() - Check whether IOCTL ID is valid or not
+ * @ioctl_id:	IOCTL ID
+ *
+ * Return: 1 if IOCTL is valid else 0
+ */
+static inline int zynqmp_is_valid_ioctl(u32 ioctl_id)
+{
+	switch (ioctl_id) {
+	case IOCTL_SET_PLL_FRAC_MODE:
+	case IOCTL_GET_PLL_FRAC_MODE:
+	case IOCTL_SET_PLL_FRAC_DATA:
+	case IOCTL_GET_PLL_FRAC_DATA:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+/**
+ * zynqmp_pm_ioctl() - PM IOCTL API for device control and configs
+ * @node_id:	Node ID of the device
+ * @ioctl_id:	ID of the requested IOCTL
+ * @arg1:	Argument 1 to requested IOCTL call
+ * @arg2:	Argument 2 to requested IOCTL call
+ * @out:	Returned output value
+ *
+ * This function calls IOCTL to firmware for device control and configuration.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_ioctl(u32 node_id, u32 ioctl_id, u32 arg1, u32 arg2,
+			   u32 *out)
+{
+	if (!zynqmp_is_valid_ioctl(ioctl_id))
+		return -EINVAL;
+
+	return zynqmp_pm_invoke_fn(PM_IOCTL, node_id, ioctl_id,
+				   arg1, arg2, out);
+}
+
 static const struct zynqmp_eemi_ops eemi_ops = {
 	.get_api_version = zynqmp_pm_get_api_version,
 	.query_data = zynqmp_pm_query_data,
@@ -440,6 +481,7 @@ static const struct zynqmp_eemi_ops eemi_ops = {
 	.clock_getrate = zynqmp_pm_clock_getrate,
 	.clock_setparent = zynqmp_pm_clock_setparent,
 	.clock_getparent = zynqmp_pm_clock_getparent,
+	.ioctl = zynqmp_pm_ioctl,
 };
 
 /**
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 015e130431e6..7a9db0861803 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -34,7 +34,8 @@
 
 enum pm_api_id {
 	PM_GET_API_VERSION = 1,
-	PM_QUERY_DATA = 35,
+	PM_IOCTL = 34,
+	PM_QUERY_DATA,
 	PM_CLOCK_ENABLE,
 	PM_CLOCK_DISABLE,
 	PM_CLOCK_GETSTATE,
@@ -99,6 +100,7 @@ struct zynqmp_eemi_ops {
 	int (*clock_getrate)(u32 clock_id, u64 *rate);
 	int (*clock_setparent)(u32 clock_id, u32 parent_id);
 	int (*clock_getparent)(u32 clock_id, u32 *parent_id);
+	int (*ioctl)(u32 node_id, u32 ioctl_id, u32 arg1, u32 arg2, u32 *out);
 };
 
 #if IS_REACHABLE(CONFIG_ARCH_ZYNQMP)
-- 
cgit v1.2.3-71-gd317


From 3fde0e16d016ecb273f0fa404b5d56b947fc0576 Mon Sep 17 00:00:00 2001
From: Jolly Shah <jolly.shah@xilinx.com>
Date: Mon, 8 Oct 2018 11:21:46 -0700
Subject: drivers: clk: Add ZynqMP clock driver

This patch adds CCF compliant clock driver for ZynqMP.
Clock driver queries supported clock information from
firmware and regiters pll and output clocks with CCF.

Signed-off-by: Rajan Vaja <rajan.vaja@xilinx.com>
Signed-off-by: Tejas Patel <tejasp@xilinx.com>
Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
Signed-off-by: Jolly Shah <jolly.shah@xilinx.com>
Acked-by: Olof Johansson <olof@lixom.net>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 drivers/clk/Kconfig                  |   1 +
 drivers/clk/Makefile                 |   1 +
 drivers/clk/zynqmp/Kconfig           |  10 +
 drivers/clk/zynqmp/Makefile          |   4 +
 drivers/clk/zynqmp/clk-gate-zynqmp.c | 144 +++++++
 drivers/clk/zynqmp/clk-mux-zynqmp.c  | 141 +++++++
 drivers/clk/zynqmp/clk-zynqmp.h      |  68 ++++
 drivers/clk/zynqmp/clkc.c            | 716 +++++++++++++++++++++++++++++++++++
 drivers/clk/zynqmp/divider.c         | 217 +++++++++++
 drivers/clk/zynqmp/pll.c             | 335 ++++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h |   1 +
 11 files changed, 1638 insertions(+)
 create mode 100644 drivers/clk/zynqmp/Kconfig
 create mode 100644 drivers/clk/zynqmp/Makefile
 create mode 100644 drivers/clk/zynqmp/clk-gate-zynqmp.c
 create mode 100644 drivers/clk/zynqmp/clk-mux-zynqmp.c
 create mode 100644 drivers/clk/zynqmp/clk-zynqmp.h
 create mode 100644 drivers/clk/zynqmp/clkc.c
 create mode 100644 drivers/clk/zynqmp/divider.c
 create mode 100644 drivers/clk/zynqmp/pll.c

(limited to 'include/linux')

diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 292056bbb30e..1deafb4db60c 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -299,5 +299,6 @@ source "drivers/clk/sunxi-ng/Kconfig"
 source "drivers/clk/tegra/Kconfig"
 source "drivers/clk/ti/Kconfig"
 source "drivers/clk/uniphier/Kconfig"
+source "drivers/clk/zynqmp/Kconfig"
 
 endmenu
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index a84c5573cabe..ad11421bdacd 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -108,3 +108,4 @@ obj-$(CONFIG_X86)			+= x86/
 endif
 obj-$(CONFIG_ARCH_ZX)			+= zte/
 obj-$(CONFIG_ARCH_ZYNQ)			+= zynq/
+obj-$(CONFIG_COMMON_CLK_ZYNQMP)         += zynqmp/
diff --git a/drivers/clk/zynqmp/Kconfig b/drivers/clk/zynqmp/Kconfig
new file mode 100644
index 000000000000..17086059be8b
--- /dev/null
+++ b/drivers/clk/zynqmp/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config COMMON_CLK_ZYNQMP
+	bool "Support for Xilinx ZynqMP Ultrascale+ clock controllers"
+	depends on ARCH_ZYNQMP || COMPILE_TEST
+	depends on ZYNQMP_FIRMWARE
+	help
+	  Support for the Zynqmp Ultrascale clock controller.
+	  It has a dependency on the PMU firmware.
+	  Say Y if you want to include clock support.
diff --git a/drivers/clk/zynqmp/Makefile b/drivers/clk/zynqmp/Makefile
new file mode 100644
index 000000000000..0ec24bfe0f18
--- /dev/null
+++ b/drivers/clk/zynqmp/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+# Zynq Ultrascale+ MPSoC clock specific Makefile
+
+obj-$(CONFIG_ARCH_ZYNQMP)	+= pll.o clk-gate-zynqmp.o divider.o clk-mux-zynqmp.o clkc.o
diff --git a/drivers/clk/zynqmp/clk-gate-zynqmp.c b/drivers/clk/zynqmp/clk-gate-zynqmp.c
new file mode 100644
index 000000000000..83b236f20fff
--- /dev/null
+++ b/drivers/clk/zynqmp/clk-gate-zynqmp.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Zynq UltraScale+ MPSoC clock controller
+ *
+ *  Copyright (C) 2016-2018 Xilinx
+ *
+ * Gated clock implementation
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include "clk-zynqmp.h"
+
+/**
+ * struct clk_gate - gating clock
+ * @hw:		handle between common and hardware-specific interfaces
+ * @flags:	hardware-specific flags
+ * @clk_id:	Id of clock
+ */
+struct zynqmp_clk_gate {
+	struct clk_hw hw;
+	u8 flags;
+	u32 clk_id;
+};
+
+#define to_zynqmp_clk_gate(_hw) container_of(_hw, struct zynqmp_clk_gate, hw)
+
+/**
+ * zynqmp_clk_gate_enable() - Enable clock
+ * @hw:		handle between common and hardware-specific interfaces
+ *
+ * Return: 0 on success else error code
+ */
+static int zynqmp_clk_gate_enable(struct clk_hw *hw)
+{
+	struct zynqmp_clk_gate *gate = to_zynqmp_clk_gate(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = gate->clk_id;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_enable(clk_id);
+
+	if (ret)
+		pr_warn_once("%s() clock enabled failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return ret;
+}
+
+/*
+ * zynqmp_clk_gate_disable() - Disable clock
+ * @hw:		handle between common and hardware-specific interfaces
+ */
+static void zynqmp_clk_gate_disable(struct clk_hw *hw)
+{
+	struct zynqmp_clk_gate *gate = to_zynqmp_clk_gate(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = gate->clk_id;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_disable(clk_id);
+
+	if (ret)
+		pr_warn_once("%s() clock disable failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+}
+
+/**
+ * zynqmp_clk_gate_is_enable() - Check clock state
+ * @hw:		handle between common and hardware-specific interfaces
+ *
+ * Return: 1 if enabled, 0 if disabled else error code
+ */
+static int zynqmp_clk_gate_is_enabled(struct clk_hw *hw)
+{
+	struct zynqmp_clk_gate *gate = to_zynqmp_clk_gate(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = gate->clk_id;
+	int state, ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_getstate(clk_id, &state);
+	if (ret) {
+		pr_warn_once("%s() clock get state failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+		return -EIO;
+	}
+
+	return state ? 1 : 0;
+}
+
+static const struct clk_ops zynqmp_clk_gate_ops = {
+	.enable = zynqmp_clk_gate_enable,
+	.disable = zynqmp_clk_gate_disable,
+	.is_enabled = zynqmp_clk_gate_is_enabled,
+};
+
+/**
+ * zynqmp_clk_register_gate() - Register a gate clock with the clock framework
+ * @name:		Name of this clock
+ * @clk_id:		Id of this clock
+ * @parents:		Name of this clock's parents
+ * @num_parents:	Number of parents
+ * @nodes:		Clock topology node
+ *
+ * Return: clock hardware of the registered clock gate
+ */
+struct clk_hw *zynqmp_clk_register_gate(const char *name, u32 clk_id,
+					const char * const *parents,
+					u8 num_parents,
+					const struct clock_topology *nodes)
+{
+	struct zynqmp_clk_gate *gate;
+	struct clk_hw *hw;
+	int ret;
+	struct clk_init_data init;
+
+	/* allocate the gate */
+	gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+	if (!gate)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &zynqmp_clk_gate_ops;
+	init.flags = nodes->flag;
+	init.parent_names = parents;
+	init.num_parents = 1;
+
+	/* struct clk_gate assignments */
+	gate->flags = nodes->type_flag;
+	gate->hw.init = &init;
+	gate->clk_id = clk_id;
+
+	hw = &gate->hw;
+	ret = clk_hw_register(NULL, hw);
+	if (ret) {
+		kfree(gate);
+		hw = ERR_PTR(ret);
+	}
+
+	return hw;
+}
diff --git a/drivers/clk/zynqmp/clk-mux-zynqmp.c b/drivers/clk/zynqmp/clk-mux-zynqmp.c
new file mode 100644
index 000000000000..4143f560c28d
--- /dev/null
+++ b/drivers/clk/zynqmp/clk-mux-zynqmp.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Zynq UltraScale+ MPSoC mux
+ *
+ *  Copyright (C) 2016-2018 Xilinx
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include "clk-zynqmp.h"
+
+/*
+ * DOC: basic adjustable multiplexer clock that cannot gate
+ *
+ * Traits of this clock:
+ * prepare - clk_prepare only ensures that parents are prepared
+ * enable - clk_enable only ensures that parents are enabled
+ * rate - rate is only affected by parent switching.  No clk_set_rate support
+ * parent - parent is adjustable through clk_set_parent
+ */
+
+/**
+ * struct zynqmp_clk_mux - multiplexer clock
+ *
+ * @hw:		handle between common and hardware-specific interfaces
+ * @flags:	hardware-specific flags
+ * @clk_id:	Id of clock
+ */
+struct zynqmp_clk_mux {
+	struct clk_hw hw;
+	u8 flags;
+	u32 clk_id;
+};
+
+#define to_zynqmp_clk_mux(_hw) container_of(_hw, struct zynqmp_clk_mux, hw)
+
+/**
+ * zynqmp_clk_mux_get_parent() - Get parent of clock
+ * @hw:		handle between common and hardware-specific interfaces
+ *
+ * Return: Parent index
+ */
+static u8 zynqmp_clk_mux_get_parent(struct clk_hw *hw)
+{
+	struct zynqmp_clk_mux *mux = to_zynqmp_clk_mux(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = mux->clk_id;
+	u32 val;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_getparent(clk_id, &val);
+
+	if (ret)
+		pr_warn_once("%s() getparent failed for clock: %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return val;
+}
+
+/**
+ * zynqmp_clk_mux_set_parent() - Set parent of clock
+ * @hw:		handle between common and hardware-specific interfaces
+ * @index:	Parent index
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_clk_mux_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct zynqmp_clk_mux *mux = to_zynqmp_clk_mux(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = mux->clk_id;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_setparent(clk_id, index);
+
+	if (ret)
+		pr_warn_once("%s() set parent failed for clock: %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return ret;
+}
+
+static const struct clk_ops zynqmp_clk_mux_ops = {
+	.get_parent = zynqmp_clk_mux_get_parent,
+	.set_parent = zynqmp_clk_mux_set_parent,
+	.determine_rate = __clk_mux_determine_rate,
+};
+
+static const struct clk_ops zynqmp_clk_mux_ro_ops = {
+	.get_parent = zynqmp_clk_mux_get_parent,
+};
+
+/**
+ * zynqmp_clk_register_mux() - Register a mux table with the clock
+ *			       framework
+ * @name:		Name of this clock
+ * @clk_id:		Id of this clock
+ * @parents:		Name of this clock's parents
+ * @num_parents:	Number of parents
+ * @nodes:		Clock topology node
+ *
+ * Return: clock hardware of the registered clock mux
+ */
+struct clk_hw *zynqmp_clk_register_mux(const char *name, u32 clk_id,
+				       const char * const *parents,
+				       u8 num_parents,
+				       const struct clock_topology *nodes)
+{
+	struct zynqmp_clk_mux *mux;
+	struct clk_hw *hw;
+	struct clk_init_data init;
+	int ret;
+
+	mux = kzalloc(sizeof(*mux), GFP_KERNEL);
+	if (!mux)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	if (nodes->type_flag & CLK_MUX_READ_ONLY)
+		init.ops = &zynqmp_clk_mux_ro_ops;
+	else
+		init.ops = &zynqmp_clk_mux_ops;
+	init.flags = nodes->flag;
+	init.parent_names = parents;
+	init.num_parents = num_parents;
+	mux->flags = nodes->type_flag;
+	mux->hw.init = &init;
+	mux->clk_id = clk_id;
+
+	hw = &mux->hw;
+	ret = clk_hw_register(NULL, hw);
+	if (ret) {
+		kfree(hw);
+		hw = ERR_PTR(ret);
+	}
+
+	return hw;
+}
+EXPORT_SYMBOL_GPL(zynqmp_clk_register_mux);
diff --git a/drivers/clk/zynqmp/clk-zynqmp.h b/drivers/clk/zynqmp/clk-zynqmp.h
new file mode 100644
index 000000000000..7ab163b67249
--- /dev/null
+++ b/drivers/clk/zynqmp/clk-zynqmp.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright (C) 2016-2018 Xilinx
+ */
+
+#ifndef __LINUX_CLK_ZYNQMP_H_
+#define __LINUX_CLK_ZYNQMP_H_
+
+#include <linux/spinlock.h>
+
+#include <linux/firmware/xlnx-zynqmp.h>
+
+/* Clock APIs payload parameters */
+#define CLK_GET_NAME_RESP_LEN				16
+#define CLK_GET_TOPOLOGY_RESP_WORDS			3
+#define CLK_GET_PARENTS_RESP_WORDS			3
+#define CLK_GET_ATTR_RESP_WORDS				1
+
+enum topology_type {
+	TYPE_INVALID,
+	TYPE_MUX,
+	TYPE_PLL,
+	TYPE_FIXEDFACTOR,
+	TYPE_DIV1,
+	TYPE_DIV2,
+	TYPE_GATE,
+};
+
+/**
+ * struct clock_topology - Clock topology
+ * @type:	Type of topology
+ * @flag:	Topology flags
+ * @type_flag:	Topology type specific flag
+ */
+struct clock_topology {
+	u32 type;
+	u32 flag;
+	u32 type_flag;
+};
+
+struct clk_hw *zynqmp_clk_register_pll(const char *name, u32 clk_id,
+				       const char * const *parents,
+				       u8 num_parents,
+				       const struct clock_topology *nodes);
+
+struct clk_hw *zynqmp_clk_register_gate(const char *name, u32 clk_id,
+					const char * const *parents,
+					u8 num_parents,
+					const struct clock_topology *nodes);
+
+struct clk_hw *zynqmp_clk_register_divider(const char *name,
+					   u32 clk_id,
+					   const char * const *parents,
+					   u8 num_parents,
+					   const struct clock_topology *nodes);
+
+struct clk_hw *zynqmp_clk_register_mux(const char *name, u32 clk_id,
+				       const char * const *parents,
+				       u8 num_parents,
+				       const struct clock_topology *nodes);
+
+struct clk_hw *zynqmp_clk_register_fixed_factor(const char *name,
+					u32 clk_id,
+					const char * const *parents,
+					u8 num_parents,
+					const struct clock_topology *nodes);
+
+#endif
diff --git a/drivers/clk/zynqmp/clkc.c b/drivers/clk/zynqmp/clkc.c
new file mode 100644
index 000000000000..9d7d297f0ea8
--- /dev/null
+++ b/drivers/clk/zynqmp/clkc.c
@@ -0,0 +1,716 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Zynq UltraScale+ MPSoC clock controller
+ *
+ *  Copyright (C) 2016-2018 Xilinx
+ *
+ * Based on drivers/clk/zynq/clkc.c
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "clk-zynqmp.h"
+
+#define MAX_PARENT			100
+#define MAX_NODES			6
+#define MAX_NAME_LEN			50
+
+#define CLK_TYPE_SHIFT			2
+
+#define PM_API_PAYLOAD_LEN		3
+
+#define NA_PARENT			0xFFFFFFFF
+#define DUMMY_PARENT			0xFFFFFFFE
+
+#define CLK_TYPE_FIELD_LEN		4
+#define CLK_TOPOLOGY_NODE_OFFSET	16
+#define NODES_PER_RESP			3
+
+#define CLK_TYPE_FIELD_MASK		0xF
+#define CLK_FLAG_FIELD_MASK		GENMASK(21, 8)
+#define CLK_TYPE_FLAG_FIELD_MASK	GENMASK(31, 24)
+
+#define CLK_PARENTS_ID_LEN		16
+#define CLK_PARENTS_ID_MASK		0xFFFF
+
+/* Flags for parents */
+#define PARENT_CLK_SELF			0
+#define PARENT_CLK_NODE1		1
+#define PARENT_CLK_NODE2		2
+#define PARENT_CLK_NODE3		3
+#define PARENT_CLK_NODE4		4
+#define PARENT_CLK_EXTERNAL		5
+
+#define END_OF_CLK_NAME			"END_OF_CLK"
+#define END_OF_TOPOLOGY_NODE		1
+#define END_OF_PARENTS			1
+#define RESERVED_CLK_NAME		""
+
+#define CLK_VALID_MASK			0x1
+
+enum clk_type {
+	CLK_TYPE_OUTPUT,
+	CLK_TYPE_EXTERNAL,
+};
+
+/**
+ * struct clock_parent - Clock parent
+ * @name:	Parent name
+ * @id:		Parent clock ID
+ * @flag:	Parent flags
+ */
+struct clock_parent {
+	char name[MAX_NAME_LEN];
+	int id;
+	u32 flag;
+};
+
+/**
+ * struct zynqmp_clock - Clock
+ * @clk_name:		Clock name
+ * @valid:		Validity flag of clock
+ * @type:		Clock type (Output/External)
+ * @node:		Clock topology nodes
+ * @num_nodes:		Number of nodes present in topology
+ * @parent:		Parent of clock
+ * @num_parents:	Number of parents of clock
+ */
+struct zynqmp_clock {
+	char clk_name[MAX_NAME_LEN];
+	u32 valid;
+	enum clk_type type;
+	struct clock_topology node[MAX_NODES];
+	u32 num_nodes;
+	struct clock_parent parent[MAX_PARENT];
+	u32 num_parents;
+};
+
+static const char clk_type_postfix[][10] = {
+	[TYPE_INVALID] = "",
+	[TYPE_MUX] = "_mux",
+	[TYPE_GATE] = "",
+	[TYPE_DIV1] = "_div1",
+	[TYPE_DIV2] = "_div2",
+	[TYPE_FIXEDFACTOR] = "_ff",
+	[TYPE_PLL] = ""
+};
+
+static struct clk_hw *(* const clk_topology[]) (const char *name, u32 clk_id,
+					const char * const *parents,
+					u8 num_parents,
+					const struct clock_topology *nodes)
+					= {
+	[TYPE_INVALID] = NULL,
+	[TYPE_MUX] = zynqmp_clk_register_mux,
+	[TYPE_PLL] = zynqmp_clk_register_pll,
+	[TYPE_FIXEDFACTOR] = zynqmp_clk_register_fixed_factor,
+	[TYPE_DIV1] = zynqmp_clk_register_divider,
+	[TYPE_DIV2] = zynqmp_clk_register_divider,
+	[TYPE_GATE] = zynqmp_clk_register_gate
+};
+
+static struct zynqmp_clock *clock;
+static struct clk_hw_onecell_data *zynqmp_data;
+static unsigned int clock_max_idx;
+static const struct zynqmp_eemi_ops *eemi_ops;
+
+/**
+ * zynqmp_is_valid_clock() - Check whether clock is valid or not
+ * @clk_id:	Clock index
+ *
+ * Return: 1 if clock is valid, 0 if clock is invalid else error code
+ */
+static inline int zynqmp_is_valid_clock(u32 clk_id)
+{
+	if (clk_id > clock_max_idx)
+		return -ENODEV;
+
+	return clock[clk_id].valid;
+}
+
+/**
+ * zynqmp_get_clock_name() - Get name of clock from Clock index
+ * @clk_id:	Clock index
+ * @clk_name:	Name of clock
+ *
+ * Return: 0 on success else error code
+ */
+static int zynqmp_get_clock_name(u32 clk_id, char *clk_name)
+{
+	int ret;
+
+	ret = zynqmp_is_valid_clock(clk_id);
+	if (ret == 1) {
+		strncpy(clk_name, clock[clk_id].clk_name, MAX_NAME_LEN);
+		return 0;
+	}
+
+	return ret == 0 ? -EINVAL : ret;
+}
+
+/**
+ * zynqmp_get_clock_type() - Get type of clock
+ * @clk_id:	Clock index
+ * @type:	Clock type: CLK_TYPE_OUTPUT or CLK_TYPE_EXTERNAL
+ *
+ * Return: 0 on success else error code
+ */
+static int zynqmp_get_clock_type(u32 clk_id, u32 *type)
+{
+	int ret;
+
+	ret = zynqmp_is_valid_clock(clk_id);
+	if (ret == 1) {
+		*type = clock[clk_id].type;
+		return 0;
+	}
+
+	return ret == 0 ? -EINVAL : ret;
+}
+
+/**
+ * zynqmp_pm_clock_get_num_clocks() - Get number of clocks in system
+ * @nclocks:	Number of clocks in system/board.
+ *
+ * Call firmware API to get number of clocks.
+ *
+ * Return: 0 on success else error code.
+ */
+static int zynqmp_pm_clock_get_num_clocks(u32 *nclocks)
+{
+	struct zynqmp_pm_query_data qdata = {0};
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	qdata.qid = PM_QID_CLOCK_GET_NUM_CLOCKS;
+
+	ret = eemi_ops->query_data(qdata, ret_payload);
+	*nclocks = ret_payload[1];
+
+	return ret;
+}
+
+/**
+ * zynqmp_pm_clock_get_name() - Get the name of clock for given id
+ * @clock_id:	ID of the clock to be queried
+ * @name:	Name of given clock
+ *
+ * This function is used to get name of clock specified by given
+ * clock ID.
+ *
+ * Return: Returns 0, in case of error name would be 0
+ */
+static int zynqmp_pm_clock_get_name(u32 clock_id, char *name)
+{
+	struct zynqmp_pm_query_data qdata = {0};
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+
+	qdata.qid = PM_QID_CLOCK_GET_NAME;
+	qdata.arg1 = clock_id;
+
+	eemi_ops->query_data(qdata, ret_payload);
+	memcpy(name, ret_payload, CLK_GET_NAME_RESP_LEN);
+
+	return 0;
+}
+
+/**
+ * zynqmp_pm_clock_get_topology() - Get the topology of clock for given id
+ * @clock_id:	ID of the clock to be queried
+ * @index:	Node index of clock topology
+ * @topology:	Buffer to store nodes in topology and flags
+ *
+ * This function is used to get topology information for the clock
+ * specified by given clock ID.
+ *
+ * This API will return 3 node of topology with a single response. To get
+ * other nodes, master should call same API in loop with new
+ * index till error is returned. E.g First call should have
+ * index 0 which will return nodes 0,1 and 2. Next call, index
+ * should be 3 which will return nodes 3,4 and 5 and so on.
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_pm_clock_get_topology(u32 clock_id, u32 index, u32 *topology)
+{
+	struct zynqmp_pm_query_data qdata = {0};
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	qdata.qid = PM_QID_CLOCK_GET_TOPOLOGY;
+	qdata.arg1 = clock_id;
+	qdata.arg2 = index;
+
+	ret = eemi_ops->query_data(qdata, ret_payload);
+	memcpy(topology, &ret_payload[1], CLK_GET_TOPOLOGY_RESP_WORDS * 4);
+
+	return ret;
+}
+
+/**
+ * zynqmp_clk_register_fixed_factor() - Register fixed factor with the
+ *					clock framework
+ * @name:		Name of this clock
+ * @clk_id:		Clock ID
+ * @parents:		Name of this clock's parents
+ * @num_parents:	Number of parents
+ * @nodes:		Clock topology node
+ *
+ * Return: clock hardware to the registered clock
+ */
+struct clk_hw *zynqmp_clk_register_fixed_factor(const char *name, u32 clk_id,
+					const char * const *parents,
+					u8 num_parents,
+					const struct clock_topology *nodes)
+{
+	u32 mult, div;
+	struct clk_hw *hw;
+	struct zynqmp_pm_query_data qdata = {0};
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	qdata.qid = PM_QID_CLOCK_GET_FIXEDFACTOR_PARAMS;
+	qdata.arg1 = clk_id;
+
+	ret = eemi_ops->query_data(qdata, ret_payload);
+	mult = ret_payload[1];
+	div = ret_payload[2];
+
+	hw = clk_hw_register_fixed_factor(NULL, name,
+					  parents[0],
+					  nodes->flag, mult,
+					  div);
+
+	return hw;
+}
+
+/**
+ * zynqmp_pm_clock_get_parents() - Get the first 3 parents of clock for given id
+ * @clock_id:	Clock ID
+ * @index:	Parent index
+ * @parents:	3 parents of the given clock
+ *
+ * This function is used to get 3 parents for the clock specified by
+ * given clock ID.
+ *
+ * This API will return 3 parents with a single response. To get
+ * other parents, master should call same API in loop with new
+ * parent index till error is returned. E.g First call should have
+ * index 0 which will return parents 0,1 and 2. Next call, index
+ * should be 3 which will return parent 3,4 and 5 and so on.
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_pm_clock_get_parents(u32 clock_id, u32 index, u32 *parents)
+{
+	struct zynqmp_pm_query_data qdata = {0};
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	qdata.qid = PM_QID_CLOCK_GET_PARENTS;
+	qdata.arg1 = clock_id;
+	qdata.arg2 = index;
+
+	ret = eemi_ops->query_data(qdata, ret_payload);
+	memcpy(parents, &ret_payload[1], CLK_GET_PARENTS_RESP_WORDS * 4);
+
+	return ret;
+}
+
+/**
+ * zynqmp_pm_clock_get_attributes() - Get the attributes of clock for given id
+ * @clock_id:	Clock ID
+ * @attr:	Clock attributes
+ *
+ * This function is used to get clock's attributes(e.g. valid, clock type, etc).
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_pm_clock_get_attributes(u32 clock_id, u32 *attr)
+{
+	struct zynqmp_pm_query_data qdata = {0};
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	qdata.qid = PM_QID_CLOCK_GET_ATTRIBUTES;
+	qdata.arg1 = clock_id;
+
+	ret = eemi_ops->query_data(qdata, ret_payload);
+	memcpy(attr, &ret_payload[1], CLK_GET_ATTR_RESP_WORDS * 4);
+
+	return ret;
+}
+
+/**
+ * __zynqmp_clock_get_topology() - Get topology data of clock from firmware
+ *				   response data
+ * @topology:		Clock topology
+ * @data:		Clock topology data received from firmware
+ * @nnodes:		Number of nodes
+ *
+ * Return: 0 on success else error+reason
+ */
+static int __zynqmp_clock_get_topology(struct clock_topology *topology,
+				       u32 *data, u32 *nnodes)
+{
+	int i;
+
+	for (i = 0; i < PM_API_PAYLOAD_LEN; i++) {
+		if (!(data[i] & CLK_TYPE_FIELD_MASK))
+			return END_OF_TOPOLOGY_NODE;
+		topology[*nnodes].type = data[i] & CLK_TYPE_FIELD_MASK;
+		topology[*nnodes].flag = FIELD_GET(CLK_FLAG_FIELD_MASK,
+						   data[i]);
+		topology[*nnodes].type_flag =
+				FIELD_GET(CLK_TYPE_FLAG_FIELD_MASK, data[i]);
+		(*nnodes)++;
+	}
+
+	return 0;
+}
+
+/**
+ * zynqmp_clock_get_topology() - Get topology of clock from firmware using
+ *				 PM_API
+ * @clk_id:		Clock index
+ * @topology:		Clock topology
+ * @num_nodes:		Number of nodes
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_clock_get_topology(u32 clk_id,
+				     struct clock_topology *topology,
+				     u32 *num_nodes)
+{
+	int j, ret;
+	u32 pm_resp[PM_API_PAYLOAD_LEN] = {0};
+
+	*num_nodes = 0;
+	for (j = 0; j <= MAX_NODES; j += 3) {
+		ret = zynqmp_pm_clock_get_topology(clk_id, j, pm_resp);
+		if (ret)
+			return ret;
+		ret = __zynqmp_clock_get_topology(topology, pm_resp, num_nodes);
+		if (ret == END_OF_TOPOLOGY_NODE)
+			return 0;
+	}
+
+	return 0;
+}
+
+/**
+ * __zynqmp_clock_get_topology() - Get parents info of clock from firmware
+ *				   response data
+ * @parents:		Clock parents
+ * @data:		Clock parents data received from firmware
+ * @nparent:		Number of parent
+ *
+ * Return: 0 on success else error+reason
+ */
+static int __zynqmp_clock_get_parents(struct clock_parent *parents, u32 *data,
+				      u32 *nparent)
+{
+	int i;
+	struct clock_parent *parent;
+
+	for (i = 0; i < PM_API_PAYLOAD_LEN; i++) {
+		if (data[i] == NA_PARENT)
+			return END_OF_PARENTS;
+
+		parent = &parents[i];
+		parent->id = data[i] & CLK_PARENTS_ID_MASK;
+		if (data[i] == DUMMY_PARENT) {
+			strcpy(parent->name, "dummy_name");
+			parent->flag = 0;
+		} else {
+			parent->flag = data[i] >> CLK_PARENTS_ID_LEN;
+			if (zynqmp_get_clock_name(parent->id, parent->name))
+				continue;
+		}
+		*nparent += 1;
+	}
+
+	return 0;
+}
+
+/**
+ * zynqmp_clock_get_parents() - Get parents info from firmware using PM_API
+ * @clk_id:		Clock index
+ * @parents:		Clock parents
+ * @num_parents:	Total number of parents
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_clock_get_parents(u32 clk_id, struct clock_parent *parents,
+				    u32 *num_parents)
+{
+	int j = 0, ret;
+	u32 pm_resp[PM_API_PAYLOAD_LEN] = {0};
+
+	*num_parents = 0;
+	do {
+		/* Get parents from firmware */
+		ret = zynqmp_pm_clock_get_parents(clk_id, j, pm_resp);
+		if (ret)
+			return ret;
+
+		ret = __zynqmp_clock_get_parents(&parents[j], pm_resp,
+						 num_parents);
+		if (ret == END_OF_PARENTS)
+			return 0;
+		j += PM_API_PAYLOAD_LEN;
+	} while (*num_parents <= MAX_PARENT);
+
+	return 0;
+}
+
+/**
+ * zynqmp_get_parent_list() - Create list of parents name
+ * @np:			Device node
+ * @clk_id:		Clock index
+ * @parent_list:	List of parent's name
+ * @num_parents:	Total number of parents
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_get_parent_list(struct device_node *np, u32 clk_id,
+				  const char **parent_list, u32 *num_parents)
+{
+	int i = 0, ret;
+	u32 total_parents = clock[clk_id].num_parents;
+	struct clock_topology *clk_nodes;
+	struct clock_parent *parents;
+
+	clk_nodes = clock[clk_id].node;
+	parents = clock[clk_id].parent;
+
+	for (i = 0; i < total_parents; i++) {
+		if (!parents[i].flag) {
+			parent_list[i] = parents[i].name;
+		} else if (parents[i].flag == PARENT_CLK_EXTERNAL) {
+			ret = of_property_match_string(np, "clock-names",
+						       parents[i].name);
+			if (ret < 0)
+				strcpy(parents[i].name, "dummy_name");
+			parent_list[i] = parents[i].name;
+		} else {
+			strcat(parents[i].name,
+			       clk_type_postfix[clk_nodes[parents[i].flag - 1].
+			       type]);
+			parent_list[i] = parents[i].name;
+		}
+	}
+
+	*num_parents = total_parents;
+	return 0;
+}
+
+/**
+ * zynqmp_register_clk_topology() - Register clock topology
+ * @clk_id:		Clock index
+ * @clk_name:		Clock Name
+ * @num_parents:	Total number of parents
+ * @parent_names:	List of parents name
+ *
+ * Return: Returns either clock hardware or error+reason
+ */
+static struct clk_hw *zynqmp_register_clk_topology(int clk_id, char *clk_name,
+						   int num_parents,
+						   const char **parent_names)
+{
+	int j;
+	u32 num_nodes;
+	char *clk_out = NULL;
+	struct clock_topology *nodes;
+	struct clk_hw *hw = NULL;
+
+	nodes = clock[clk_id].node;
+	num_nodes = clock[clk_id].num_nodes;
+
+	for (j = 0; j < num_nodes; j++) {
+		/*
+		 * Clock name received from firmware is output clock name.
+		 * Intermediate clock names are postfixed with type of clock.
+		 */
+		if (j != (num_nodes - 1)) {
+			clk_out = kasprintf(GFP_KERNEL, "%s%s", clk_name,
+					    clk_type_postfix[nodes[j].type]);
+		} else {
+			clk_out = kasprintf(GFP_KERNEL, "%s", clk_name);
+		}
+
+		if (!clk_topology[nodes[j].type])
+			continue;
+
+		hw = (*clk_topology[nodes[j].type])(clk_out, clk_id,
+						    parent_names,
+						    num_parents,
+						    &nodes[j]);
+		if (IS_ERR(hw))
+			pr_warn_once("%s() %s register fail with %ld\n",
+				     __func__, clk_name, PTR_ERR(hw));
+
+		parent_names[0] = clk_out;
+	}
+	kfree(clk_out);
+	return hw;
+}
+
+/**
+ * zynqmp_register_clocks() - Register clocks
+ * @np:		Device node
+ *
+ * Return: 0 on success else error code
+ */
+static int zynqmp_register_clocks(struct device_node *np)
+{
+	int ret;
+	u32 i, total_parents = 0, type = 0;
+	const char *parent_names[MAX_PARENT];
+
+	for (i = 0; i < clock_max_idx; i++) {
+		char clk_name[MAX_NAME_LEN];
+
+		/* get clock name, continue to next clock if name not found */
+		if (zynqmp_get_clock_name(i, clk_name))
+			continue;
+
+		/* Check if clock is valid and output clock.
+		 * Do not register invalid or external clock.
+		 */
+		ret = zynqmp_get_clock_type(i, &type);
+		if (ret || type != CLK_TYPE_OUTPUT)
+			continue;
+
+		/* Get parents of clock*/
+		if (zynqmp_get_parent_list(np, i, parent_names,
+					   &total_parents)) {
+			WARN_ONCE(1, "No parents found for %s\n",
+				  clock[i].clk_name);
+			continue;
+		}
+
+		zynqmp_data->hws[i] =
+			zynqmp_register_clk_topology(i, clk_name,
+						     total_parents,
+						     parent_names);
+	}
+
+	for (i = 0; i < clock_max_idx; i++) {
+		if (IS_ERR(zynqmp_data->hws[i])) {
+			pr_err("Zynq Ultrascale+ MPSoC clk %s: register failed with %ld\n",
+			       clock[i].clk_name, PTR_ERR(zynqmp_data->hws[i]));
+			WARN_ON(1);
+		}
+	}
+	return 0;
+}
+
+/**
+ * zynqmp_get_clock_info() - Get clock information from firmware using PM_API
+ */
+static void zynqmp_get_clock_info(void)
+{
+	int i, ret;
+	u32 attr, type = 0;
+
+	for (i = 0; i < clock_max_idx; i++) {
+		zynqmp_pm_clock_get_name(i, clock[i].clk_name);
+		if (!strcmp(clock[i].clk_name, RESERVED_CLK_NAME))
+			continue;
+
+		ret = zynqmp_pm_clock_get_attributes(i, &attr);
+		if (ret)
+			continue;
+
+		clock[i].valid = attr & CLK_VALID_MASK;
+		clock[i].type = attr >> CLK_TYPE_SHIFT ? CLK_TYPE_EXTERNAL :
+							CLK_TYPE_OUTPUT;
+	}
+
+	/* Get topology of all clock */
+	for (i = 0; i < clock_max_idx; i++) {
+		ret = zynqmp_get_clock_type(i, &type);
+		if (ret || type != CLK_TYPE_OUTPUT)
+			continue;
+
+		ret = zynqmp_clock_get_topology(i, clock[i].node,
+						&clock[i].num_nodes);
+		if (ret)
+			continue;
+
+		ret = zynqmp_clock_get_parents(i, clock[i].parent,
+					       &clock[i].num_parents);
+		if (ret)
+			continue;
+	}
+}
+
+/**
+ * zynqmp_clk_setup() - Setup the clock framework and register clocks
+ * @np:		Device node
+ *
+ * Return: 0 on success else error code
+ */
+static int zynqmp_clk_setup(struct device_node *np)
+{
+	int ret;
+
+	ret = zynqmp_pm_clock_get_num_clocks(&clock_max_idx);
+	if (ret)
+		return ret;
+
+	zynqmp_data = kzalloc(sizeof(*zynqmp_data) + sizeof(*zynqmp_data) *
+						clock_max_idx, GFP_KERNEL);
+	if (!zynqmp_data)
+		return -ENOMEM;
+
+	clock = kcalloc(clock_max_idx, sizeof(*clock), GFP_KERNEL);
+	if (!clock) {
+		kfree(zynqmp_data);
+		return -ENOMEM;
+	}
+
+	zynqmp_get_clock_info();
+	zynqmp_register_clocks(np);
+
+	zynqmp_data->num = clock_max_idx;
+	of_clk_add_hw_provider(np, of_clk_hw_onecell_get, zynqmp_data);
+
+	return 0;
+}
+
+static int zynqmp_clock_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct device *dev = &pdev->dev;
+
+	eemi_ops = zynqmp_pm_get_eemi_ops();
+	if (!eemi_ops)
+		return -ENXIO;
+
+	ret = zynqmp_clk_setup(dev->of_node);
+
+	return ret;
+}
+
+static const struct of_device_id zynqmp_clock_of_match[] = {
+	{.compatible = "xlnx,zynqmp-clk"},
+	{},
+};
+MODULE_DEVICE_TABLE(of, zynqmp_clock_of_match);
+
+static struct platform_driver zynqmp_clock_driver = {
+	.driver = {
+		.name = "zynqmp_clock",
+		.of_match_table = zynqmp_clock_of_match,
+	},
+	.probe = zynqmp_clock_probe,
+};
+module_platform_driver(zynqmp_clock_driver);
diff --git a/drivers/clk/zynqmp/divider.c b/drivers/clk/zynqmp/divider.c
new file mode 100644
index 000000000000..a371c66e72ef
--- /dev/null
+++ b/drivers/clk/zynqmp/divider.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Zynq UltraScale+ MPSoC Divider support
+ *
+ *  Copyright (C) 2016-2018 Xilinx
+ *
+ * Adjustable divider clock implementation
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include "clk-zynqmp.h"
+
+/*
+ * DOC: basic adjustable divider clock that cannot gate
+ *
+ * Traits of this clock:
+ * prepare - clk_prepare only ensures that parents are prepared
+ * enable - clk_enable only ensures that parents are enabled
+ * rate - rate is adjustable.  clk->rate = ceiling(parent->rate / divisor)
+ * parent - fixed parent.  No clk_set_parent support
+ */
+
+#define to_zynqmp_clk_divider(_hw)		\
+	container_of(_hw, struct zynqmp_clk_divider, hw)
+
+#define CLK_FRAC	BIT(13) /* has a fractional parent */
+
+/**
+ * struct zynqmp_clk_divider - adjustable divider clock
+ * @hw:		handle between common and hardware-specific interfaces
+ * @flags:	Hardware specific flags
+ * @clk_id:	Id of clock
+ * @div_type:	divisor type (TYPE_DIV1 or TYPE_DIV2)
+ */
+struct zynqmp_clk_divider {
+	struct clk_hw hw;
+	u8 flags;
+	u32 clk_id;
+	u32 div_type;
+};
+
+static inline int zynqmp_divider_get_val(unsigned long parent_rate,
+					 unsigned long rate)
+{
+	return DIV_ROUND_CLOSEST(parent_rate, rate);
+}
+
+/**
+ * zynqmp_clk_divider_recalc_rate() - Recalc rate of divider clock
+ * @hw:			handle between common and hardware-specific interfaces
+ * @parent_rate:	rate of parent clock
+ *
+ * Return: 0 on success else error+reason
+ */
+static unsigned long zynqmp_clk_divider_recalc_rate(struct clk_hw *hw,
+						    unsigned long parent_rate)
+{
+	struct zynqmp_clk_divider *divider = to_zynqmp_clk_divider(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = divider->clk_id;
+	u32 div_type = divider->div_type;
+	u32 div, value;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_getdivider(clk_id, &div);
+
+	if (ret)
+		pr_warn_once("%s() get divider failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	if (div_type == TYPE_DIV1)
+		value = div & 0xFFFF;
+	else
+		value = div >> 16;
+
+	return DIV_ROUND_UP_ULL(parent_rate, value);
+}
+
+/**
+ * zynqmp_clk_divider_round_rate() - Round rate of divider clock
+ * @hw:			handle between common and hardware-specific interfaces
+ * @rate:		rate of clock to be set
+ * @prate:		rate of parent clock
+ *
+ * Return: 0 on success else error+reason
+ */
+static long zynqmp_clk_divider_round_rate(struct clk_hw *hw,
+					  unsigned long rate,
+					  unsigned long *prate)
+{
+	struct zynqmp_clk_divider *divider = to_zynqmp_clk_divider(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = divider->clk_id;
+	u32 div_type = divider->div_type;
+	u32 bestdiv;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	/* if read only, just return current value */
+	if (divider->flags & CLK_DIVIDER_READ_ONLY) {
+		ret = eemi_ops->clock_getdivider(clk_id, &bestdiv);
+
+		if (ret)
+			pr_warn_once("%s() get divider failed for %s, ret = %d\n",
+				     __func__, clk_name, ret);
+		if (div_type == TYPE_DIV1)
+			bestdiv = bestdiv & 0xFFFF;
+		else
+			bestdiv  = bestdiv >> 16;
+
+		return DIV_ROUND_UP_ULL((u64)*prate, bestdiv);
+	}
+
+	bestdiv = zynqmp_divider_get_val(*prate, rate);
+
+	if ((clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT) &&
+	    (divider->flags & CLK_FRAC))
+		bestdiv = rate % *prate ? 1 : bestdiv;
+	*prate = rate * bestdiv;
+
+	return rate;
+}
+
+/**
+ * zynqmp_clk_divider_set_rate() - Set rate of divider clock
+ * @hw:			handle between common and hardware-specific interfaces
+ * @rate:		rate of clock to be set
+ * @parent_rate:	rate of parent clock
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
+				       unsigned long parent_rate)
+{
+	struct zynqmp_clk_divider *divider = to_zynqmp_clk_divider(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = divider->clk_id;
+	u32 div_type = divider->div_type;
+	u32 value, div;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	value = zynqmp_divider_get_val(parent_rate, rate);
+	if (div_type == TYPE_DIV1) {
+		div = value & 0xFFFF;
+		div |= 0xffff << 16;
+	} else {
+		div = 0xffff;
+		div |= value << 16;
+	}
+
+	ret = eemi_ops->clock_setdivider(clk_id, div);
+
+	if (ret)
+		pr_warn_once("%s() set divider failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return ret;
+}
+
+static const struct clk_ops zynqmp_clk_divider_ops = {
+	.recalc_rate = zynqmp_clk_divider_recalc_rate,
+	.round_rate = zynqmp_clk_divider_round_rate,
+	.set_rate = zynqmp_clk_divider_set_rate,
+};
+
+/**
+ * zynqmp_clk_register_divider() - Register a divider clock
+ * @name:		Name of this clock
+ * @clk_id:		Id of clock
+ * @parents:		Name of this clock's parents
+ * @num_parents:	Number of parents
+ * @nodes:		Clock topology node
+ *
+ * Return: clock hardware to registered clock divider
+ */
+struct clk_hw *zynqmp_clk_register_divider(const char *name,
+					   u32 clk_id,
+					   const char * const *parents,
+					   u8 num_parents,
+					   const struct clock_topology *nodes)
+{
+	struct zynqmp_clk_divider *div;
+	struct clk_hw *hw;
+	struct clk_init_data init;
+	int ret;
+
+	/* allocate the divider */
+	div = kzalloc(sizeof(*div), GFP_KERNEL);
+	if (!div)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &zynqmp_clk_divider_ops;
+	init.flags = nodes->flag;
+	init.parent_names = parents;
+	init.num_parents = 1;
+
+	/* struct clk_divider assignments */
+	div->flags = nodes->type_flag;
+	div->hw.init = &init;
+	div->clk_id = clk_id;
+	div->div_type = nodes->type;
+
+	hw = &div->hw;
+	ret = clk_hw_register(NULL, hw);
+	if (ret) {
+		kfree(div);
+		hw = ERR_PTR(ret);
+	}
+
+	return hw;
+}
+EXPORT_SYMBOL_GPL(zynqmp_clk_register_divider);
diff --git a/drivers/clk/zynqmp/pll.c b/drivers/clk/zynqmp/pll.c
new file mode 100644
index 000000000000..a541397a172c
--- /dev/null
+++ b/drivers/clk/zynqmp/pll.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Zynq UltraScale+ MPSoC PLL driver
+ *
+ *  Copyright (C) 2016-2018 Xilinx
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include "clk-zynqmp.h"
+
+/**
+ * struct zynqmp_pll - PLL clock
+ * @hw:		Handle between common and hardware-specific interfaces
+ * @clk_id:	PLL clock ID
+ */
+struct zynqmp_pll {
+	struct clk_hw hw;
+	u32 clk_id;
+};
+
+#define to_zynqmp_pll(_hw)	container_of(_hw, struct zynqmp_pll, hw)
+
+#define PLL_FBDIV_MIN	25
+#define PLL_FBDIV_MAX	125
+
+#define PS_PLL_VCO_MIN 1500000000
+#define PS_PLL_VCO_MAX 3000000000UL
+
+enum pll_mode {
+	PLL_MODE_INT,
+	PLL_MODE_FRAC,
+};
+
+#define FRAC_OFFSET 0x8
+#define PLLFCFG_FRAC_EN	BIT(31)
+#define FRAC_DIV  BIT(16)  /* 2^16 */
+
+/**
+ * zynqmp_pll_get_mode() - Get mode of PLL
+ * @hw:		Handle between common and hardware-specific interfaces
+ *
+ * Return: Mode of PLL
+ */
+static inline enum pll_mode zynqmp_pll_get_mode(struct clk_hw *hw)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	u32 clk_id = clk->clk_id;
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->ioctl(0, IOCTL_GET_PLL_FRAC_MODE, clk_id, 0,
+			      ret_payload);
+	if (ret)
+		pr_warn_once("%s() PLL get frac mode failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return ret_payload[1];
+}
+
+/**
+ * zynqmp_pll_set_mode() - Set the PLL mode
+ * @hw:		Handle between common and hardware-specific interfaces
+ * @on:		Flag to determine the mode
+ */
+static inline void zynqmp_pll_set_mode(struct clk_hw *hw, bool on)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	u32 clk_id = clk->clk_id;
+	const char *clk_name = clk_hw_get_name(hw);
+	int ret;
+	u32 mode;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	if (on)
+		mode = PLL_MODE_FRAC;
+	else
+		mode = PLL_MODE_INT;
+
+	ret = eemi_ops->ioctl(0, IOCTL_SET_PLL_FRAC_MODE, clk_id, mode, NULL);
+	if (ret)
+		pr_warn_once("%s() PLL set frac mode failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+}
+
+/**
+ * zynqmp_pll_round_rate() - Round a clock frequency
+ * @hw:		Handle between common and hardware-specific interfaces
+ * @rate:	Desired clock frequency
+ * @prate:	Clock frequency of parent clock
+ *
+ * Return: Frequency closest to @rate the hardware can generate
+ */
+static long zynqmp_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+				  unsigned long *prate)
+{
+	u32 fbdiv;
+	long rate_div, f;
+
+	/* Enable the fractional mode if needed */
+	rate_div = (rate * FRAC_DIV) / *prate;
+	f = rate_div % FRAC_DIV;
+	zynqmp_pll_set_mode(hw, !!f);
+
+	if (zynqmp_pll_get_mode(hw) == PLL_MODE_FRAC) {
+		if (rate > PS_PLL_VCO_MAX) {
+			fbdiv = rate / PS_PLL_VCO_MAX;
+			rate = rate / (fbdiv + 1);
+		}
+		if (rate < PS_PLL_VCO_MIN) {
+			fbdiv = DIV_ROUND_UP(PS_PLL_VCO_MIN, rate);
+			rate = rate * fbdiv;
+		}
+		return rate;
+	}
+
+	fbdiv = DIV_ROUND_CLOSEST(rate, *prate);
+	fbdiv = clamp_t(u32, fbdiv, PLL_FBDIV_MIN, PLL_FBDIV_MAX);
+	return *prate * fbdiv;
+}
+
+/**
+ * zynqmp_pll_recalc_rate() - Recalculate clock frequency
+ * @hw:			Handle between common and hardware-specific interfaces
+ * @parent_rate:	Clock frequency of parent clock
+ *
+ * Return: Current clock frequency
+ */
+static unsigned long zynqmp_pll_recalc_rate(struct clk_hw *hw,
+					    unsigned long parent_rate)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	u32 clk_id = clk->clk_id;
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 fbdiv, data;
+	unsigned long rate, frac;
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_getdivider(clk_id, &fbdiv);
+	if (ret)
+		pr_warn_once("%s() get divider failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	rate =  parent_rate * fbdiv;
+	if (zynqmp_pll_get_mode(hw) == PLL_MODE_FRAC) {
+		eemi_ops->ioctl(0, IOCTL_GET_PLL_FRAC_DATA, clk_id, 0,
+				ret_payload);
+		data = ret_payload[1];
+		frac = (parent_rate * data) / FRAC_DIV;
+		rate = rate + frac;
+	}
+
+	return rate;
+}
+
+/**
+ * zynqmp_pll_set_rate() - Set rate of PLL
+ * @hw:			Handle between common and hardware-specific interfaces
+ * @rate:		Frequency of clock to be set
+ * @parent_rate:	Clock frequency of parent clock
+ *
+ * Set PLL divider to set desired rate.
+ *
+ * Returns:            rate which is set on success else error code
+ */
+static int zynqmp_pll_set_rate(struct clk_hw *hw, unsigned long rate,
+			       unsigned long parent_rate)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	u32 clk_id = clk->clk_id;
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 fbdiv;
+	long rate_div, frac, m, f;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	if (zynqmp_pll_get_mode(hw) == PLL_MODE_FRAC) {
+		rate_div = (rate * FRAC_DIV) / parent_rate;
+		m = rate_div / FRAC_DIV;
+		f = rate_div % FRAC_DIV;
+		m = clamp_t(u32, m, (PLL_FBDIV_MIN), (PLL_FBDIV_MAX));
+		rate = parent_rate * m;
+		frac = (parent_rate * f) / FRAC_DIV;
+
+		ret = eemi_ops->clock_setdivider(clk_id, m);
+		if (ret)
+			pr_warn_once("%s() set divider failed for %s, ret = %d\n",
+				     __func__, clk_name, ret);
+
+		eemi_ops->ioctl(0, IOCTL_SET_PLL_FRAC_DATA, clk_id, f, NULL);
+
+		return rate + frac;
+	}
+
+	fbdiv = DIV_ROUND_CLOSEST(rate, parent_rate);
+	fbdiv = clamp_t(u32, fbdiv, PLL_FBDIV_MIN, PLL_FBDIV_MAX);
+	ret = eemi_ops->clock_setdivider(clk_id, fbdiv);
+	if (ret)
+		pr_warn_once("%s() set divider failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return parent_rate * fbdiv;
+}
+
+/**
+ * zynqmp_pll_is_enabled() - Check if a clock is enabled
+ * @hw:		Handle between common and hardware-specific interfaces
+ *
+ * Return: 1 if the clock is enabled, 0 otherwise
+ */
+static int zynqmp_pll_is_enabled(struct clk_hw *hw)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = clk->clk_id;
+	unsigned int state;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_getstate(clk_id, &state);
+	if (ret) {
+		pr_warn_once("%s() clock get state failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+		return -EIO;
+	}
+
+	return state ? 1 : 0;
+}
+
+/**
+ * zynqmp_pll_enable() - Enable clock
+ * @hw:		Handle between common and hardware-specific interfaces
+ *
+ * Return: 0 on success else error code
+ */
+static int zynqmp_pll_enable(struct clk_hw *hw)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = clk->clk_id;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	if (zynqmp_pll_is_enabled(hw))
+		return 0;
+
+	ret = eemi_ops->clock_enable(clk_id);
+	if (ret)
+		pr_warn_once("%s() clock enable failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return ret;
+}
+
+/**
+ * zynqmp_pll_disable() - Disable clock
+ * @hw:		Handle between common and hardware-specific interfaces
+ */
+static void zynqmp_pll_disable(struct clk_hw *hw)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = clk->clk_id;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	if (!zynqmp_pll_is_enabled(hw))
+		return;
+
+	ret = eemi_ops->clock_disable(clk_id);
+	if (ret)
+		pr_warn_once("%s() clock disable failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+}
+
+static const struct clk_ops zynqmp_pll_ops = {
+	.enable = zynqmp_pll_enable,
+	.disable = zynqmp_pll_disable,
+	.is_enabled = zynqmp_pll_is_enabled,
+	.round_rate = zynqmp_pll_round_rate,
+	.recalc_rate = zynqmp_pll_recalc_rate,
+	.set_rate = zynqmp_pll_set_rate,
+};
+
+/**
+ * zynqmp_clk_register_pll() - Register PLL with the clock framework
+ * @name:		PLL name
+ * @clk_id:		Clock ID
+ * @parents:		Name of this clock's parents
+ * @num_parents:	Number of parents
+ * @nodes:		Clock topology node
+ *
+ * Return: clock hardware to the registered clock
+ */
+struct clk_hw *zynqmp_clk_register_pll(const char *name, u32 clk_id,
+				       const char * const *parents,
+				       u8 num_parents,
+				       const struct clock_topology *nodes)
+{
+	struct zynqmp_pll *pll;
+	struct clk_hw *hw;
+	struct clk_init_data init;
+	int ret;
+
+	init.name = name;
+	init.ops = &zynqmp_pll_ops;
+	init.flags = nodes->flag;
+	init.parent_names = parents;
+	init.num_parents = 1;
+
+	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
+	if (!pll)
+		return ERR_PTR(-ENOMEM);
+
+	pll->hw.init = &init;
+	pll->clk_id = clk_id;
+
+	hw = &pll->hw;
+	ret = clk_hw_register(NULL, hw);
+	if (ret) {
+		kfree(pll);
+		return ERR_PTR(ret);
+	}
+
+	clk_hw_set_rate_range(hw, PS_PLL_VCO_MIN, PS_PLL_VCO_MAX);
+	if (ret < 0)
+		pr_err("%s:ERROR clk_set_rate_range failed %d\n", name, ret);
+
+	return hw;
+}
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 7a9db0861803..3c3c28eff56a 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -72,6 +72,7 @@ enum pm_query_id {
 	PM_QID_CLOCK_GET_FIXEDFACTOR_PARAMS,
 	PM_QID_CLOCK_GET_PARENTS,
 	PM_QID_CLOCK_GET_ATTRIBUTES,
+	PM_QID_CLOCK_GET_NUM_CLOCKS = 12,
 };
 
 /**
-- 
cgit v1.2.3-71-gd317


From f2e74abfaad446765ce0350aed1d9c5eed5b1b36 Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:38 +0200
Subject: remoteproc: add release ops in rproc_mem_entry struct

Memory entry could be allocated in different ways (ioremap,
dma_alloc_coherent, internal RAM allocator...).
This patch introduces a release ops in rproc_mem_entry structure
to associate dedicated release mechanism to each memory entry descriptor
in order to keep remoteproc core generic.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 23 +++++++++++++++++++++--
 include/linux/remoteproc.h           |  5 ++++-
 2 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index ebadaad070a5..674f88d237b8 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -599,6 +599,24 @@ out:
 	return ret;
 }
 
+/**
+ * rproc_release_carveout() - release acquired carveout
+ * @rproc: rproc handle
+ * @mem: the memory entry to release
+ *
+ * This function releases specified memory entry @mem allocated via
+ * dma_alloc_coherent() function by @rproc.
+ */
+static int rproc_release_carveout(struct rproc *rproc,
+				  struct rproc_mem_entry *mem)
+{
+	struct device *dev = &rproc->dev;
+
+	/* clean up carveout allocations */
+	dma_free_coherent(dev->parent, mem->len, mem->va, mem->dma);
+	return 0;
+}
+
 /**
  * rproc_handle_carveout() - handle phys contig memory allocation requests
  * @rproc: rproc handle
@@ -733,6 +751,7 @@ static int rproc_handle_carveout(struct rproc *rproc,
 	carveout->len = rsc->len;
 	carveout->dma = dma;
 	carveout->da = rsc->da;
+	carveout->release = rproc_release_carveout;
 
 	list_add_tail(&carveout->node, &rproc->carveouts);
 
@@ -920,8 +939,8 @@ static void rproc_resource_cleanup(struct rproc *rproc)
 
 	/* clean up carveout allocations */
 	list_for_each_entry_safe(entry, tmp, &rproc->carveouts, node) {
-		dma_free_coherent(dev->parent, entry->len, entry->va,
-				  entry->dma);
+		if (entry->release)
+			entry->release(rproc, entry);
 		list_del(&entry->node);
 		kfree(entry);
 	}
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 75f9ca05b865..4116e92d4b3d 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -305,12 +305,15 @@ struct fw_rsc_vdev {
 	struct fw_rsc_vdev_vring vring[0];
 } __packed;
 
+struct rproc;
+
 /**
  * struct rproc_mem_entry - memory entry descriptor
  * @va:	virtual address
  * @dma: dma address
  * @len: length, in bytes
  * @da: device address
+ * @release: release associated memory
  * @priv: associated data
  * @node: list node
  */
@@ -321,9 +324,9 @@ struct rproc_mem_entry {
 	u32 da;
 	void *priv;
 	struct list_head node;
+	int (*release)(struct rproc *rproc, struct rproc_mem_entry *mem);
 };
 
-struct rproc;
 struct firmware;
 
 /**
-- 
cgit v1.2.3-71-gd317


From 3265230c5b05fe919291d09e266a8aedc85ebad0 Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:39 +0200
Subject: remoteproc: add name in rproc_mem_entry struct

Add name field in struct rproc_mem_entry.
This new field will be used to match memory area
requested in resource table with pre-registered carveout.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c    | 1 +
 drivers/remoteproc/remoteproc_debugfs.c | 1 +
 include/linux/remoteproc.h              | 2 ++
 3 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 674f88d237b8..a2d338fc8ddd 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -752,6 +752,7 @@ static int rproc_handle_carveout(struct rproc *rproc,
 	carveout->dma = dma;
 	carveout->da = rsc->da;
 	carveout->release = rproc_release_carveout;
+	strlcpy(carveout->name, rsc->name, sizeof(carveout->name));
 
 	list_add_tail(&carveout->node, &rproc->carveouts);
 
diff --git a/drivers/remoteproc/remoteproc_debugfs.c b/drivers/remoteproc/remoteproc_debugfs.c
index a5c29f2764a3..e90135c64af0 100644
--- a/drivers/remoteproc/remoteproc_debugfs.c
+++ b/drivers/remoteproc/remoteproc_debugfs.c
@@ -260,6 +260,7 @@ static int rproc_carveouts_show(struct seq_file *seq, void *p)
 
 	list_for_each_entry(carveout, &rproc->carveouts, node) {
 		seq_puts(seq, "Carveout memory entry:\n");
+		seq_printf(seq, "\tName: %s\n", carveout->name);
 		seq_printf(seq, "\tVirtual address: %pK\n", carveout->va);
 		seq_printf(seq, "\tDMA address: %pad\n", &carveout->dma);
 		seq_printf(seq, "\tDevice address: 0x%x\n", carveout->da);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 4116e92d4b3d..19908c930f47 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -315,6 +315,7 @@ struct rproc;
  * @da: device address
  * @release: release associated memory
  * @priv: associated data
+ * @name: associated memory region name (optional)
  * @node: list node
  */
 struct rproc_mem_entry {
@@ -323,6 +324,7 @@ struct rproc_mem_entry {
 	int len;
 	u32 da;
 	void *priv;
+	char name[32];
 	struct list_head node;
 	int (*release)(struct rproc *rproc, struct rproc_mem_entry *mem);
 };
-- 
cgit v1.2.3-71-gd317


From 72029c901a0244ca2e1eb09e1c453413a17f5787 Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:40 +0200
Subject: remoteproc: add helper function to allocate and init rproc_mem_entry
 struct

This patch introduces rproc_mem_entry_init helper function to
simplify rproc_mem_entry structure allocation and filling by
client.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 65 +++++++++++++++++++++++++++---------
 include/linux/remoteproc.h           |  6 ++++
 2 files changed, 55 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index a2d338fc8ddd..9decc598944d 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -639,7 +639,7 @@ static int rproc_handle_carveout(struct rproc *rproc,
 				 struct fw_rsc_carveout *rsc,
 				 int offset, int avail)
 {
-	struct rproc_mem_entry *carveout, *mapping;
+	struct rproc_mem_entry *carveout, *mapping = NULL;
 	struct device *dev = &rproc->dev;
 	dma_addr_t dma;
 	void *va;
@@ -659,16 +659,11 @@ static int rproc_handle_carveout(struct rproc *rproc,
 	dev_dbg(dev, "carveout rsc: name: %s, da 0x%x, pa 0x%x, len 0x%x, flags 0x%x\n",
 		rsc->name, rsc->da, rsc->pa, rsc->len, rsc->flags);
 
-	carveout = kzalloc(sizeof(*carveout), GFP_KERNEL);
-	if (!carveout)
-		return -ENOMEM;
-
 	va = dma_alloc_coherent(dev->parent, rsc->len, &dma, GFP_KERNEL);
 	if (!va) {
 		dev_err(dev->parent,
 			"failed to allocate dma memory: len 0x%x\n", rsc->len);
-		ret = -ENOMEM;
-		goto free_carv;
+		return -ENOMEM;
 	}
 
 	dev_dbg(dev, "carveout va %pK, dma %pad, len 0x%x\n",
@@ -747,27 +742,65 @@ static int rproc_handle_carveout(struct rproc *rproc,
 	 */
 	rsc->pa = (u32)rproc_va_to_pa(va);
 
-	carveout->va = va;
-	carveout->len = rsc->len;
-	carveout->dma = dma;
-	carveout->da = rsc->da;
-	carveout->release = rproc_release_carveout;
-	strlcpy(carveout->name, rsc->name, sizeof(carveout->name));
+	carveout = rproc_mem_entry_init(dev, va, dma, rsc->len, rsc->da,
+					rproc_release_carveout, rsc->name);
+	if (!carveout)
+		goto free_carv;
 
 	list_add_tail(&carveout->node, &rproc->carveouts);
 
 	return 0;
 
+free_carv:
+	kfree(carveout);
 free_mapping:
 	kfree(mapping);
 dma_free:
 	dma_free_coherent(dev->parent, rsc->len, va, dma);
-free_carv:
-	kfree(carveout);
 	return ret;
 }
 
-/*
+/**
+ * rproc_mem_entry_init() - allocate and initialize rproc_mem_entry struct
+ * @dev: pointer on device struct
+ * @va: virtual address
+ * @dma: dma address
+ * @len: memory carveout length
+ * @da: device address
+ * @release: memory carveout function
+ * @name: carveout name
+ *
+ * This function allocates a rproc_mem_entry struct and fill it with parameters
+ * provided by client.
+ */
+struct rproc_mem_entry *
+rproc_mem_entry_init(struct device *dev,
+		     void *va, dma_addr_t dma, int len, u32 da,
+		     int (*release)(struct rproc *, struct rproc_mem_entry *),
+		     const char *name, ...)
+{
+	struct rproc_mem_entry *mem;
+	va_list args;
+
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	if (!mem)
+		return mem;
+
+	mem->va = va;
+	mem->dma = dma;
+	mem->da = da;
+	mem->len = len;
+	mem->release = release;
+
+	va_start(args, name);
+	vsnprintf(mem->name, sizeof(mem->name), name, args);
+	va_end(args);
+
+	return mem;
+}
+EXPORT_SYMBOL(rproc_mem_entry_init);
+
+/**
  * A lookup table for resource handlers. The indices are defined in
  * enum fw_resource_type.
  */
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 19908c930f47..9e2b84fa2efa 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -559,6 +559,12 @@ int rproc_add(struct rproc *rproc);
 int rproc_del(struct rproc *rproc);
 void rproc_free(struct rproc *rproc);
 
+struct rproc_mem_entry *
+rproc_mem_entry_init(struct device *dev,
+		     void *va, dma_addr_t dma, int len, u32 da,
+		     int (*release)(struct rproc *, struct rproc_mem_entry *),
+		     const char *name, ...);
+
 int rproc_boot(struct rproc *rproc);
 void rproc_shutdown(struct rproc *rproc);
 void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type);
-- 
cgit v1.2.3-71-gd317


From 15c0b0258e4f1c3c817f34d092d2cc6ff5178bdd Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:41 +0200
Subject: remoteproc: introduce rproc_add_carveout function

This patch introduces a new API to allow platform driver to register
platform specific carveout regions.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 16 +++++++++++++++-
 include/linux/remoteproc.h           |  2 ++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 9decc598944d..db771e53f097 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -747,7 +747,7 @@ static int rproc_handle_carveout(struct rproc *rproc,
 	if (!carveout)
 		goto free_carv;
 
-	list_add_tail(&carveout->node, &rproc->carveouts);
+	rproc_add_carveout(rproc, carveout);
 
 	return 0;
 
@@ -760,6 +760,20 @@ dma_free:
 	return ret;
 }
 
+/**
+ * rproc_add_carveout() - register an allocated carveout region
+ * @rproc: rproc handle
+ * @mem: memory entry to register
+ *
+ * This function registers specified memory entry in @rproc carveouts list.
+ * Specified carveout should have been allocated before registering.
+ */
+void rproc_add_carveout(struct rproc *rproc, struct rproc_mem_entry *mem)
+{
+	list_add_tail(&mem->node, &rproc->carveouts);
+}
+EXPORT_SYMBOL(rproc_add_carveout);
+
 /**
  * rproc_mem_entry_init() - allocate and initialize rproc_mem_entry struct
  * @dev: pointer on device struct
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 9e2b84fa2efa..8a350265d883 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -559,6 +559,8 @@ int rproc_add(struct rproc *rproc);
 int rproc_del(struct rproc *rproc);
 void rproc_free(struct rproc *rproc);
 
+void rproc_add_carveout(struct rproc *rproc, struct rproc_mem_entry *mem);
+
 struct rproc_mem_entry *
 rproc_mem_entry_init(struct device *dev,
 		     void *va, dma_addr_t dma, int len, u32 da,
-- 
cgit v1.2.3-71-gd317


From d7c51706d0956472b7c0530b1bf8fba32d82ee6b Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:43 +0200
Subject: remoteproc: add alloc ops in rproc_mem_entry struct

Memory entry could be allocated in different ways (ioremap,
dma_alloc_coherent, internal RAM allocator...).
This patch introduces an alloc ops in rproc_mem_entry structure
to associate dedicated allocation mechanism to each memory entry
descriptor in order to do remote core agnostic from memory allocators.

The introduction of this ops allows to perform allocation of all registered
carveout at the same time, just before calling rproc_start().
It simplifies and makes uniform carveout management whatever origin.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 261 ++++++++++++++++++++++-------------
 include/linux/remoteproc.h           |   7 +
 2 files changed, 175 insertions(+), 93 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 800320d06cb8..9d17b3079506 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -642,74 +642,31 @@ out:
 }
 
 /**
- * rproc_release_carveout() - release acquired carveout
+ * rproc_alloc_carveout() - allocated specified carveout
  * @rproc: rproc handle
- * @mem: the memory entry to release
- *
- * This function releases specified memory entry @mem allocated via
- * dma_alloc_coherent() function by @rproc.
- */
-static int rproc_release_carveout(struct rproc *rproc,
-				  struct rproc_mem_entry *mem)
-{
-	struct device *dev = &rproc->dev;
-
-	/* clean up carveout allocations */
-	dma_free_coherent(dev->parent, mem->len, mem->va, mem->dma);
-	return 0;
-}
-
-/**
- * rproc_handle_carveout() - handle phys contig memory allocation requests
- * @rproc: rproc handle
- * @rsc: the resource entry
- * @avail: size of available data (for image validation)
- *
- * This function will handle firmware requests for allocation of physically
- * contiguous memory regions.
- *
- * These request entries should come first in the firmware's resource table,
- * as other firmware entries might request placing other data objects inside
- * these memory regions (e.g. data/code segments, trace resource entries, ...).
+ * @mem: the memory entry to allocate
  *
- * Allocating memory this way helps utilizing the reserved physical memory
- * (e.g. CMA) more efficiently, and also minimizes the number of TLB entries
- * needed to map it (in case @rproc is using an IOMMU). Reducing the TLB
- * pressure is important; it may have a substantial impact on performance.
+ * This function allocate specified memory entry @mem using
+ * dma_alloc_coherent() as default allocator
  */
-static int rproc_handle_carveout(struct rproc *rproc,
-				 struct fw_rsc_carveout *rsc,
-				 int offset, int avail)
+static int rproc_alloc_carveout(struct rproc *rproc,
+				struct rproc_mem_entry *mem)
 {
-	struct rproc_mem_entry *carveout, *mapping = NULL;
+	struct rproc_mem_entry *mapping = NULL;
 	struct device *dev = &rproc->dev;
 	dma_addr_t dma;
 	void *va;
 	int ret;
 
-	if (sizeof(*rsc) > avail) {
-		dev_err(dev, "carveout rsc is truncated\n");
-		return -EINVAL;
-	}
-
-	/* make sure reserved bytes are zeroes */
-	if (rsc->reserved) {
-		dev_err(dev, "carveout rsc has non zero reserved bytes\n");
-		return -EINVAL;
-	}
-
-	dev_dbg(dev, "carveout rsc: name: %s, da 0x%x, pa 0x%x, len 0x%x, flags 0x%x\n",
-		rsc->name, rsc->da, rsc->pa, rsc->len, rsc->flags);
-
-	va = dma_alloc_coherent(dev->parent, rsc->len, &dma, GFP_KERNEL);
+	va = dma_alloc_coherent(dev->parent, mem->len, &dma, GFP_KERNEL);
 	if (!va) {
 		dev_err(dev->parent,
-			"failed to allocate dma memory: len 0x%x\n", rsc->len);
+			"failed to allocate dma memory: len 0x%x\n", mem->len);
 		return -ENOMEM;
 	}
 
 	dev_dbg(dev, "carveout va %pK, dma %pad, len 0x%x\n",
-		va, &dma, rsc->len);
+		va, &dma, mem->len);
 
 	/*
 	 * Ok, this is non-standard.
@@ -729,22 +686,22 @@ static int rproc_handle_carveout(struct rproc *rproc,
 	 * physical address in this case.
 	 */
 
-	if (rsc->da != FW_RSC_ADDR_ANY && !rproc->domain) {
-		dev_err(dev->parent,
-			"Bad carveout rsc configuration\n");
-		ret = -ENOMEM;
-		goto dma_free;
-	}
+	if (mem->da != FW_RSC_ADDR_ANY) {
+		if (!rproc->domain) {
+			dev_err(dev->parent,
+				"Bad carveout rsc configuration\n");
+			ret = -ENOMEM;
+			goto dma_free;
+		}
 
-	if (rsc->da != FW_RSC_ADDR_ANY && rproc->domain) {
 		mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
 		if (!mapping) {
 			ret = -ENOMEM;
 			goto dma_free;
 		}
 
-		ret = iommu_map(rproc->domain, rsc->da, dma, rsc->len,
-				rsc->flags);
+		ret = iommu_map(rproc->domain, mem->da, dma, mem->len,
+				mem->flags);
 		if (ret) {
 			dev_err(dev, "iommu_map failed: %d\n", ret);
 			goto free_mapping;
@@ -757,51 +714,101 @@ static int rproc_handle_carveout(struct rproc *rproc,
 		 * We can't trust the remote processor not to change the
 		 * resource table, so we must maintain this info independently.
 		 */
-		mapping->da = rsc->da;
-		mapping->len = rsc->len;
+		mapping->da = mem->da;
+		mapping->len = mem->len;
 		list_add_tail(&mapping->node, &rproc->mappings);
 
 		dev_dbg(dev, "carveout mapped 0x%x to %pad\n",
-			rsc->da, &dma);
+			mem->da, &dma);
+	} else {
+		mem->da = (u32)dma;
 	}
 
-	/*
-	 * Some remote processors might need to know the pa
-	 * even though they are behind an IOMMU. E.g., OMAP4's
-	 * remote M3 processor needs this so it can control
-	 * on-chip hardware accelerators that are not behind
-	 * the IOMMU, and therefor must know the pa.
-	 *
-	 * Generally we don't want to expose physical addresses
-	 * if we don't have to (remote processors are generally
-	 * _not_ trusted), so we might want to do this only for
-	 * remote processor that _must_ have this (e.g. OMAP4's
-	 * dual M3 subsystem).
-	 *
-	 * Non-IOMMU processors might also want to have this info.
-	 * In this case, the device address and the physical address
-	 * are the same.
-	 */
-	rsc->pa = (u32)rproc_va_to_pa(va);
-
-	carveout = rproc_mem_entry_init(dev, va, dma, rsc->len, rsc->da,
-					rproc_release_carveout, rsc->name);
-	if (!carveout)
-		goto free_carv;
-
-	rproc_add_carveout(rproc, carveout);
+	mem->dma = (u32)dma;
+	mem->va = va;
 
 	return 0;
 
-free_carv:
-	kfree(carveout);
 free_mapping:
 	kfree(mapping);
 dma_free:
-	dma_free_coherent(dev->parent, rsc->len, va, dma);
+	dma_free_coherent(dev->parent, mem->len, va, dma);
 	return ret;
 }
 
+/**
+ * rproc_release_carveout() - release acquired carveout
+ * @rproc: rproc handle
+ * @mem: the memory entry to release
+ *
+ * This function releases specified memory entry @mem allocated via
+ * rproc_alloc_carveout() function by @rproc.
+ */
+static int rproc_release_carveout(struct rproc *rproc,
+				  struct rproc_mem_entry *mem)
+{
+	struct device *dev = &rproc->dev;
+
+	/* clean up carveout allocations */
+	dma_free_coherent(dev->parent, mem->len, mem->va, mem->dma);
+	return 0;
+}
+
+/**
+ * rproc_handle_carveout() - handle phys contig memory allocation requests
+ * @rproc: rproc handle
+ * @rsc: the resource entry
+ * @avail: size of available data (for image validation)
+ *
+ * This function will handle firmware requests for allocation of physically
+ * contiguous memory regions.
+ *
+ * These request entries should come first in the firmware's resource table,
+ * as other firmware entries might request placing other data objects inside
+ * these memory regions (e.g. data/code segments, trace resource entries, ...).
+ *
+ * Allocating memory this way helps utilizing the reserved physical memory
+ * (e.g. CMA) more efficiently, and also minimizes the number of TLB entries
+ * needed to map it (in case @rproc is using an IOMMU). Reducing the TLB
+ * pressure is important; it may have a substantial impact on performance.
+ */
+static int rproc_handle_carveout(struct rproc *rproc,
+				 struct fw_rsc_carveout *rsc,
+				 int offset, int avail)
+{
+	struct rproc_mem_entry *carveout;
+	struct device *dev = &rproc->dev;
+
+	if (sizeof(*rsc) > avail) {
+		dev_err(dev, "carveout rsc is truncated\n");
+		return -EINVAL;
+	}
+
+	/* make sure reserved bytes are zeroes */
+	if (rsc->reserved) {
+		dev_err(dev, "carveout rsc has non zero reserved bytes\n");
+		return -EINVAL;
+	}
+
+	dev_dbg(dev, "carveout rsc: name: %s, da 0x%x, pa 0x%x, len 0x%x, flags 0x%x\n",
+		rsc->name, rsc->da, rsc->pa, rsc->len, rsc->flags);
+
+	/* Register carveout in in list */
+	carveout = rproc_mem_entry_init(dev, 0, 0, rsc->len, rsc->da,
+					rproc_alloc_carveout,
+					rproc_release_carveout, rsc->name);
+	if (!carveout) {
+		dev_err(dev, "Can't allocate memory entry structure\n");
+		return -ENOMEM;
+	}
+
+	carveout->flags = rsc->flags;
+	carveout->rsc_offset = offset;
+	rproc_add_carveout(rproc, carveout);
+
+	return 0;
+}
+
 /**
  * rproc_add_carveout() - register an allocated carveout region
  * @rproc: rproc handle
@@ -832,6 +839,7 @@ EXPORT_SYMBOL(rproc_add_carveout);
 struct rproc_mem_entry *
 rproc_mem_entry_init(struct device *dev,
 		     void *va, dma_addr_t dma, int len, u32 da,
+		     int (*alloc)(struct rproc *, struct rproc_mem_entry *),
 		     int (*release)(struct rproc *, struct rproc_mem_entry *),
 		     const char *name, ...)
 {
@@ -846,7 +854,9 @@ rproc_mem_entry_init(struct device *dev,
 	mem->dma = dma;
 	mem->da = da;
 	mem->len = len;
+	mem->alloc = alloc;
 	mem->release = release;
+	mem->rsc_offset = FW_RSC_ADDR_ANY;
 
 	va_start(args, name);
 	vsnprintf(mem->name, sizeof(mem->name), name, args);
@@ -977,6 +987,63 @@ static void rproc_unprepare_subdevices(struct rproc *rproc)
 	}
 }
 
+/**
+ * rproc_alloc_registered_carveouts() - allocate all carveouts registered
+ * in the list
+ * @rproc: the remote processor handle
+ *
+ * This function parses registered carveout list, performs allocation
+ * if alloc() ops registered and updates resource table information
+ * if rsc_offset set.
+ *
+ * Return: 0 on success
+ */
+static int rproc_alloc_registered_carveouts(struct rproc *rproc)
+{
+	struct rproc_mem_entry *entry, *tmp;
+	struct fw_rsc_carveout *rsc;
+	struct device *dev = &rproc->dev;
+	int ret;
+
+	list_for_each_entry_safe(entry, tmp, &rproc->carveouts, node) {
+		if (entry->alloc) {
+			ret = entry->alloc(rproc, entry);
+			if (ret) {
+				dev_err(dev, "Unable to allocate carveout %s: %d\n",
+					entry->name, ret);
+				return -ENOMEM;
+			}
+		}
+
+		if (entry->rsc_offset != FW_RSC_ADDR_ANY) {
+			/* update resource table */
+			rsc = (void *)rproc->table_ptr + entry->rsc_offset;
+
+			/*
+			 * Some remote processors might need to know the pa
+			 * even though they are behind an IOMMU. E.g., OMAP4's
+			 * remote M3 processor needs this so it can control
+			 * on-chip hardware accelerators that are not behind
+			 * the IOMMU, and therefor must know the pa.
+			 *
+			 * Generally we don't want to expose physical addresses
+			 * if we don't have to (remote processors are generally
+			 * _not_ trusted), so we might want to do this only for
+			 * remote processor that _must_ have this (e.g. OMAP4's
+			 * dual M3 subsystem).
+			 *
+			 * Non-IOMMU processors might also want to have this info.
+			 * In this case, the device address and the physical address
+			 * are the same.
+			 */
+			if (entry->va)
+				rsc->pa = (u32)rproc_va_to_pa(entry->va);
+		}
+	}
+
+	return 0;
+}
+
 /**
  * rproc_coredump_cleanup() - clean up dump_segments list
  * @rproc: the remote processor handle
@@ -1149,6 +1216,14 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 		goto clean_up_resources;
 	}
 
+	/* Allocate carveout resources associated to rproc */
+	ret = rproc_alloc_registered_carveouts(rproc);
+	if (ret) {
+		dev_err(dev, "Failed to allocate associated carveouts: %d\n",
+			ret);
+		goto clean_up_resources;
+	}
+
 	ret = rproc_start(rproc, fw);
 	if (ret)
 		goto clean_up_resources;
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 8a350265d883..d251c091303c 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -317,6 +317,9 @@ struct rproc;
  * @priv: associated data
  * @name: associated memory region name (optional)
  * @node: list node
+ * @rsc_offset: offset in resource table
+ * @flags: iommu protection flags
+ * @alloc: specific memory allocator function
  */
 struct rproc_mem_entry {
 	void *va;
@@ -326,6 +329,9 @@ struct rproc_mem_entry {
 	void *priv;
 	char name[32];
 	struct list_head node;
+	u32 rsc_offset;
+	u32 flags;
+	int (*alloc)(struct rproc *rproc, struct rproc_mem_entry *mem);
 	int (*release)(struct rproc *rproc, struct rproc_mem_entry *mem);
 };
 
@@ -564,6 +570,7 @@ void rproc_add_carveout(struct rproc *rproc, struct rproc_mem_entry *mem);
 struct rproc_mem_entry *
 rproc_mem_entry_init(struct device *dev,
 		     void *va, dma_addr_t dma, int len, u32 da,
+		     int (*alloc)(struct rproc *, struct rproc_mem_entry *),
 		     int (*release)(struct rproc *, struct rproc_mem_entry *),
 		     const char *name, ...);
 
-- 
cgit v1.2.3-71-gd317


From 1429cca1175f4cb64dd5d61ffd6037895a41d672 Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:44 +0200
Subject: remoteproc: add helper function to allocate rproc_mem_entry from
 reserved memory

This patch introduces rproc_res_mem_entry_init() helper function to
allocate a rproc_mem_entry structure from a reserved memory region.
In that case, rproc_mem_entry structure has no alloc and release ops.
It will be used to assigned the specified reserved memory to any
rproc sub device.
Relation between rproc_mem_entry and rproc sub device will be done
by name.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 37 ++++++++++++++++++++++++++++++++++++
 include/linux/remoteproc.h           |  6 ++++++
 2 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 9d17b3079506..d7a623b8801c 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -857,6 +857,7 @@ rproc_mem_entry_init(struct device *dev,
 	mem->alloc = alloc;
 	mem->release = release;
 	mem->rsc_offset = FW_RSC_ADDR_ANY;
+	mem->of_resm_idx = -1;
 
 	va_start(args, name);
 	vsnprintf(mem->name, sizeof(mem->name), name, args);
@@ -866,6 +867,42 @@ rproc_mem_entry_init(struct device *dev,
 }
 EXPORT_SYMBOL(rproc_mem_entry_init);
 
+/**
+ * rproc_of_resm_mem_entry_init() - allocate and initialize rproc_mem_entry struct
+ * from a reserved memory phandle
+ * @dev: pointer on device struct
+ * @of_resm_idx: reserved memory phandle index in "memory-region"
+ * @len: memory carveout length
+ * @da: device address
+ * @name: carveout name
+ *
+ * This function allocates a rproc_mem_entry struct and fill it with parameters
+ * provided by client.
+ */
+struct rproc_mem_entry *
+rproc_of_resm_mem_entry_init(struct device *dev, u32 of_resm_idx, int len,
+			     u32 da, const char *name, ...)
+{
+	struct rproc_mem_entry *mem;
+	va_list args;
+
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	if (!mem)
+		return mem;
+
+	mem->da = da;
+	mem->len = len;
+	mem->rsc_offset = FW_RSC_ADDR_ANY;
+	mem->of_resm_idx = of_resm_idx;
+
+	va_start(args, name);
+	vsnprintf(mem->name, sizeof(mem->name), name, args);
+	va_end(args);
+
+	return mem;
+}
+EXPORT_SYMBOL(rproc_of_resm_mem_entry_init);
+
 /**
  * A lookup table for resource handlers. The indices are defined in
  * enum fw_resource_type.
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index d251c091303c..d4cabe8da507 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -319,6 +319,7 @@ struct rproc;
  * @node: list node
  * @rsc_offset: offset in resource table
  * @flags: iommu protection flags
+ * @of_resm_idx: reserved memory phandle index
  * @alloc: specific memory allocator function
  */
 struct rproc_mem_entry {
@@ -331,6 +332,7 @@ struct rproc_mem_entry {
 	struct list_head node;
 	u32 rsc_offset;
 	u32 flags;
+	u32 of_resm_idx;
 	int (*alloc)(struct rproc *rproc, struct rproc_mem_entry *mem);
 	int (*release)(struct rproc *rproc, struct rproc_mem_entry *mem);
 };
@@ -574,6 +576,10 @@ rproc_mem_entry_init(struct device *dev,
 		     int (*release)(struct rproc *, struct rproc_mem_entry *),
 		     const char *name, ...);
 
+struct rproc_mem_entry *
+rproc_of_resm_mem_entry_init(struct device *dev, u32 of_resm_idx, int len,
+			     u32 da, const char *name, ...);
+
 int rproc_boot(struct rproc *rproc);
 void rproc_shutdown(struct rproc *rproc);
 void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type);
-- 
cgit v1.2.3-71-gd317


From df2fc43d09d3ee5ede82cab9299df5e78aa427b5 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 13 Sep 2018 11:17:23 +0200
Subject: list: introduce list_bulk_move_tail helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move all entries between @first and including @last before @head.

This is useful for LRU lists where a whole block of entries should be
moved to the end of the list.

Used as a band aid in TTM, but better placed in the common list headers.

Acked-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 25 +++++--------------------
 include/linux/list.h         | 23 +++++++++++++++++++++++
 2 files changed, 28 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index b2a33bf1ef10..26b889f86670 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -247,20 +247,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
 
-static void ttm_list_move_bulk_tail(struct list_head *list,
-				    struct list_head *first,
-				    struct list_head *last)
-{
-	first->prev->next = last->next;
-	last->next->prev = first->prev;
-
-	list->prev->next = first;
-	first->prev = list->prev;
-
-	last->next = list;
-	list->prev = last;
-}
-
 void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
 {
 	unsigned i;
@@ -276,8 +262,8 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
 		reservation_object_assert_held(pos->last->resv);
 
 		man = &pos->first->bdev->man[TTM_PL_TT];
-		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
-					&pos->last->lru);
+		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
+				    &pos->last->lru);
 	}
 
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
@@ -291,8 +277,8 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
 		reservation_object_assert_held(pos->last->resv);
 
 		man = &pos->first->bdev->man[TTM_PL_VRAM];
-		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
-					&pos->last->lru);
+		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
+				    &pos->last->lru);
 	}
 
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
@@ -306,8 +292,7 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
 		reservation_object_assert_held(pos->last->resv);
 
 		lru = &pos->first->bdev->glob->swap_lru[i];
-		ttm_list_move_bulk_tail(lru, &pos->first->swap,
-					&pos->last->swap);
+		list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap);
 	}
 }
 EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
diff --git a/include/linux/list.h b/include/linux/list.h
index de04cc5ed536..edb7628e46ed 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -183,6 +183,29 @@ static inline void list_move_tail(struct list_head *list,
 	list_add_tail(list, head);
 }
 
+/**
+ * list_bulk_move_tail - move a subsection of a list to its tail
+ * @head: the head that will follow our entry
+ * @first: first entry to move
+ * @last: last entry to move, can be the same as first
+ *
+ * Move all entries between @first and including @last before @head.
+ * All three entries must belong to the same linked list.
+ */
+static inline void list_bulk_move_tail(struct list_head *head,
+				       struct list_head *first,
+				       struct list_head *last)
+{
+	first->prev->next = last->next;
+	last->next->prev = first->prev;
+
+	head->prev->next = first;
+	first->prev = head->prev;
+
+	last->next = head;
+	head->prev = last;
+}
+
 /**
  * list_is_last - tests whether @list is the last entry in list @head
  * @list: the entry to test
-- 
cgit v1.2.3-71-gd317


From a6ca88b241d5e929e6e60b12ad8cd288f0ffa256 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 1 Oct 2018 22:36:36 -0700
Subject: trace_uprobe: support reference counter in fd-based uprobe

This patch enables uprobes with reference counter in fd-based uprobe.
Highest 32 bits of perf_event_attr.config is used to stored offset
of the reference count (semaphore).

Format information in /sys/bus/event_source/devices/uprobe/format/ is
updated to reflect this new feature.

Link: http://lkml.kernel.org/r/20181002053636.1896903-1-songliubraving@fb.com

Cc: Oleg Nesterov <oleg@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-and-tested-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/trace_events.h    |  3 ++-
 kernel/events/core.c            | 49 ++++++++++++++++++++++++++++++++---------
 kernel/trace/trace_event_perf.c |  7 +++---
 kernel/trace/trace_probe.h      |  3 ++-
 kernel/trace/trace_uprobe.c     |  4 +++-
 5 files changed, 50 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 78a010e19ed4..4130a5497d40 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -575,7 +575,8 @@ extern int bpf_get_kprobe_info(const struct perf_event *event,
 			       bool perf_type_tracepoint);
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
-extern int  perf_uprobe_init(struct perf_event *event, bool is_retprobe);
+extern int  perf_uprobe_init(struct perf_event *event,
+			     unsigned long ref_ctr_offset, bool is_retprobe);
 extern void perf_uprobe_destroy(struct perf_event *event);
 extern int bpf_get_uprobe_info(const struct perf_event *event,
 			       u32 *fd_type, const char **filename,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c80549bf82c6..65b30773af3e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8368,30 +8368,39 @@ static struct pmu perf_tracepoint = {
  *
  * PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe
  *                               if not set, create kprobe/uprobe
+ *
+ * The following values specify a reference counter (or semaphore in the
+ * terminology of tools like dtrace, systemtap, etc.) Userspace Statically
+ * Defined Tracepoints (USDT). Currently, we use 40 bit for the offset.
+ *
+ * PERF_UPROBE_REF_CTR_OFFSET_BITS	# of bits in config as th offset
+ * PERF_UPROBE_REF_CTR_OFFSET_SHIFT	# of bits to shift left
  */
 enum perf_probe_config {
 	PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0,  /* [k,u]retprobe */
+	PERF_UPROBE_REF_CTR_OFFSET_BITS = 32,
+	PERF_UPROBE_REF_CTR_OFFSET_SHIFT = 64 - PERF_UPROBE_REF_CTR_OFFSET_BITS,
 };
 
 PMU_FORMAT_ATTR(retprobe, "config:0");
+#endif
 
-static struct attribute *probe_attrs[] = {
+#ifdef CONFIG_KPROBE_EVENTS
+static struct attribute *kprobe_attrs[] = {
 	&format_attr_retprobe.attr,
 	NULL,
 };
 
-static struct attribute_group probe_format_group = {
+static struct attribute_group kprobe_format_group = {
 	.name = "format",
-	.attrs = probe_attrs,
+	.attrs = kprobe_attrs,
 };
 
-static const struct attribute_group *probe_attr_groups[] = {
-	&probe_format_group,
+static const struct attribute_group *kprobe_attr_groups[] = {
+	&kprobe_format_group,
 	NULL,
 };
-#endif
 
-#ifdef CONFIG_KPROBE_EVENTS
 static int perf_kprobe_event_init(struct perf_event *event);
 static struct pmu perf_kprobe = {
 	.task_ctx_nr	= perf_sw_context,
@@ -8401,7 +8410,7 @@ static struct pmu perf_kprobe = {
 	.start		= perf_swevent_start,
 	.stop		= perf_swevent_stop,
 	.read		= perf_swevent_read,
-	.attr_groups	= probe_attr_groups,
+	.attr_groups	= kprobe_attr_groups,
 };
 
 static int perf_kprobe_event_init(struct perf_event *event)
@@ -8433,6 +8442,24 @@ static int perf_kprobe_event_init(struct perf_event *event)
 #endif /* CONFIG_KPROBE_EVENTS */
 
 #ifdef CONFIG_UPROBE_EVENTS
+PMU_FORMAT_ATTR(ref_ctr_offset, "config:32-63");
+
+static struct attribute *uprobe_attrs[] = {
+	&format_attr_retprobe.attr,
+	&format_attr_ref_ctr_offset.attr,
+	NULL,
+};
+
+static struct attribute_group uprobe_format_group = {
+	.name = "format",
+	.attrs = uprobe_attrs,
+};
+
+static const struct attribute_group *uprobe_attr_groups[] = {
+	&uprobe_format_group,
+	NULL,
+};
+
 static int perf_uprobe_event_init(struct perf_event *event);
 static struct pmu perf_uprobe = {
 	.task_ctx_nr	= perf_sw_context,
@@ -8442,12 +8469,13 @@ static struct pmu perf_uprobe = {
 	.start		= perf_swevent_start,
 	.stop		= perf_swevent_stop,
 	.read		= perf_swevent_read,
-	.attr_groups	= probe_attr_groups,
+	.attr_groups	= uprobe_attr_groups,
 };
 
 static int perf_uprobe_event_init(struct perf_event *event)
 {
 	int err;
+	unsigned long ref_ctr_offset;
 	bool is_retprobe;
 
 	if (event->attr.type != perf_uprobe.type)
@@ -8463,7 +8491,8 @@ static int perf_uprobe_event_init(struct perf_event *event)
 		return -EOPNOTSUPP;
 
 	is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
-	err = perf_uprobe_init(event, is_retprobe);
+	ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
+	err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe);
 	if (err)
 		return err;
 
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 69a3fe926e8c..76217bbef815 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -290,7 +290,8 @@ void perf_kprobe_destroy(struct perf_event *p_event)
 #endif /* CONFIG_KPROBE_EVENTS */
 
 #ifdef CONFIG_UPROBE_EVENTS
-int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
+int perf_uprobe_init(struct perf_event *p_event,
+		     unsigned long ref_ctr_offset, bool is_retprobe)
 {
 	int ret;
 	char *path = NULL;
@@ -312,8 +313,8 @@ int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
 		goto out;
 	}
 
-	tp_event = create_local_trace_uprobe(
-		path, p_event->attr.probe_offset, is_retprobe);
+	tp_event = create_local_trace_uprobe(path, p_event->attr.probe_offset,
+					     ref_ctr_offset, is_retprobe);
 	if (IS_ERR(tp_event)) {
 		ret = PTR_ERR(tp_event);
 		goto out;
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 5f52668e165d..03b10f3201a5 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -412,6 +412,7 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
 extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
 
 extern struct trace_event_call *
-create_local_trace_uprobe(char *name, unsigned long offs, bool is_return);
+create_local_trace_uprobe(char *name, unsigned long offs,
+			  unsigned long ref_ctr_offset, bool is_return);
 extern void destroy_local_trace_uprobe(struct trace_event_call *event_call);
 #endif
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 3a7c73c40007..d09638706fe0 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1405,7 +1405,8 @@ static int unregister_uprobe_event(struct trace_uprobe *tu)
 
 #ifdef CONFIG_PERF_EVENTS
 struct trace_event_call *
-create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
+create_local_trace_uprobe(char *name, unsigned long offs,
+			  unsigned long ref_ctr_offset, bool is_return)
 {
 	struct trace_uprobe *tu;
 	struct path path;
@@ -1437,6 +1438,7 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
 
 	tu->offset = offs;
 	tu->path = path;
+	tu->ref_ctr_offset = ref_ctr_offset;
 	tu->filename = kstrdup(name, GFP_KERNEL);
 	init_trace_event_call(tu, &tu->tp.call);
 
-- 
cgit v1.2.3-71-gd317


From 81b45683487a51b0f4d3b29d37f20d6d078544e4 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 26 Aug 2018 03:16:29 +0900
Subject: compiler.h: give up __compiletime_assert_fallback()

__compiletime_assert_fallback() is supposed to stop building earlier
by using the negative-array-size method in case the compiler does not
support "error" attribute, but has never worked like that.

You can simply try:

    BUILD_BUG_ON(1);

GCC immediately terminates the build, but Clang does not report
anything because Clang does not support the "error" attribute now.
It will later fail at link time, but __compiletime_assert_fallback()
is not working at least.

The root cause is commit 1d6a0d19c855 ("bug.h: prevent double evaluation
of `condition' in BUILD_BUG_ON").  Prior to that commit, BUILD_BUG_ON()
was checked by the negative-array-size method *and* the link-time trick.
Since that commit, the negative-array-size is not effective because
'__cond' is no longer constant.  As the comment in <linux/build_bug.h>
says, GCC (and Clang as well) only emits the error for obvious cases.

When '__cond' is a variable,

    ((void)sizeof(char[1 - 2 * __cond]))

... is not obvious for the compiler to know the array size is negative.

Reverting that commit would break BUILD_BUG() because negative-size-array
is evaluated before the code is optimized out.

Let's give up __compiletime_assert_fallback().  This commit does not
change the current behavior since it just rips off the useless code.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/compiler.h | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 681d866efb1e..87c776c3ce73 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -314,29 +314,14 @@ static inline void *offset_to_ptr(const int *off)
 #endif
 #ifndef __compiletime_error
 # define __compiletime_error(message)
-/*
- * Sparse complains of variable sized arrays due to the temporary variable in
- * __compiletime_assert. Unfortunately we can't just expand it out to make
- * sparse see a constant array size without breaking compiletime_assert on old
- * versions of GCC (e.g. 4.2.4), so hide the array from sparse altogether.
- */
-# ifndef __CHECKER__
-#  define __compiletime_error_fallback(condition) \
-	do { ((void)sizeof(char[1 - 2 * condition])); } while (0)
-# endif
-#endif
-#ifndef __compiletime_error_fallback
-# define __compiletime_error_fallback(condition) do { } while (0)
 #endif
 
 #ifdef __OPTIMIZE__
 # define __compiletime_assert(condition, msg, prefix, suffix)		\
 	do {								\
-		int __cond = !(condition);				\
 		extern void prefix ## suffix(void) __compiletime_error(msg); \
-		if (__cond)						\
+		if (!(condition))					\
 			prefix ## suffix();				\
-		__compiletime_error_fallback(__cond);			\
 	} while (0)
 #else
 # define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0)
-- 
cgit v1.2.3-71-gd317


From c6aed238b7a9b15a5c90a0c31f1d36577b5d2cbe Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:47 +0200
Subject: remoteproc: modify vring allocation to rely on centralized carveout
 allocator

Current version of rproc_alloc_vring function supports only dynamic vring
allocation.

This patch allows to allocate vrings based on memory region declatation.
Vrings are now manage like memory carveouts, to communize memory management
code in rproc_alloc_registered_carveouts().

Allocated buffer is retrieved in rp_find_vq() thanks to
rproc_find_carveout_by_name() functions for.

This patch sets vrings names to vdev"x"vring"y" with x vdev index in
resource table and y vring index in vdev. This will be updated when
name will be associated to vdev in firmware resource table.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c     | 61 +++++++++++++++++---------------
 drivers/remoteproc/remoteproc_internal.h |  2 ++
 drivers/remoteproc/remoteproc_virtio.c   | 14 +++++++-
 include/linux/remoteproc.h               |  6 ++--
 4 files changed, 51 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 5abcd27a29f3..f77a42f6a8aa 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -53,6 +53,11 @@ typedef int (*rproc_handle_resources_t)(struct rproc *rproc,
 typedef int (*rproc_handle_resource_t)(struct rproc *rproc,
 				 void *, int offset, int avail);
 
+static int rproc_alloc_carveout(struct rproc *rproc,
+				struct rproc_mem_entry *mem);
+static int rproc_release_carveout(struct rproc *rproc,
+				  struct rproc_mem_entry *mem);
+
 /* Unique indices for remoteproc devices */
 static DEFINE_IDA(rproc_dev_index);
 
@@ -312,21 +317,33 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i)
 	struct device *dev = &rproc->dev;
 	struct rproc_vring *rvring = &rvdev->vring[i];
 	struct fw_rsc_vdev *rsc;
-	dma_addr_t dma;
-	void *va;
 	int ret, size, notifyid;
+	struct rproc_mem_entry *mem;
 
 	/* actual size of vring (in bytes) */
 	size = PAGE_ALIGN(vring_size(rvring->len, rvring->align));
 
-	/*
-	 * Allocate non-cacheable memory for the vring. In the future
-	 * this call will also configure the IOMMU for us
-	 */
-	va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
-	if (!va) {
-		dev_err(dev->parent, "dma_alloc_coherent failed\n");
-		return -EINVAL;
+	rsc = (void *)rproc->table_ptr + rvdev->rsc_offset;
+
+	/* Search for pre-registered carveout */
+	mem = rproc_find_carveout_by_name(rproc, "vdev%dvring%d", rvdev->index,
+					  i);
+	if (mem) {
+		if (rproc_check_carveout_da(rproc, mem, rsc->vring[i].da, size))
+			return -ENOMEM;
+	} else {
+		/* Register carveout in in list */
+		mem = rproc_mem_entry_init(dev, 0, 0, size, rsc->vring[i].da,
+					   rproc_alloc_carveout,
+					   rproc_release_carveout,
+					   "vdev%dvring%d",
+					   rvdev->index, i);
+		if (!mem) {
+			dev_err(dev, "Can't allocate memory entry structure\n");
+			return -ENOMEM;
+		}
+
+		rproc_add_carveout(rproc, mem);
 	}
 
 	/*
@@ -337,7 +354,6 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i)
 	ret = idr_alloc(&rproc->notifyids, rvring, 0, 0, GFP_KERNEL);
 	if (ret < 0) {
 		dev_err(dev, "idr_alloc failed: %d\n", ret);
-		dma_free_coherent(dev->parent, size, va, dma);
 		return ret;
 	}
 	notifyid = ret;
@@ -346,21 +362,9 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i)
 	if (notifyid > rproc->max_notifyid)
 		rproc->max_notifyid = notifyid;
 
-	dev_dbg(dev, "vring%d: va %pK dma %pad size 0x%x idr %d\n",
-		i, va, &dma, size, notifyid);
-
-	rvring->va = va;
-	rvring->dma = dma;
 	rvring->notifyid = notifyid;
 
-	/*
-	 * Let the rproc know the notifyid and da of this vring.
-	 * Not all platforms use dma_alloc_coherent to automatically
-	 * set up the iommu. In this case the device address (da) will
-	 * hold the physical address and not the device address.
-	 */
-	rsc = (void *)rproc->table_ptr + rvdev->rsc_offset;
-	rsc->vring[i].da = dma;
+	/* Let the rproc know the notifyid of this vring.*/
 	rsc->vring[i].notifyid = notifyid;
 	return 0;
 }
@@ -392,12 +396,10 @@ rproc_parse_vring(struct rproc_vdev *rvdev, struct fw_rsc_vdev *rsc, int i)
 
 void rproc_free_vring(struct rproc_vring *rvring)
 {
-	int size = PAGE_ALIGN(vring_size(rvring->len, rvring->align));
 	struct rproc *rproc = rvring->rvdev->rproc;
 	int idx = rvring->rvdev->vring - rvring;
 	struct fw_rsc_vdev *rsc;
 
-	dma_free_coherent(rproc->dev.parent, size, rvring->va, rvring->dma);
 	idr_remove(&rproc->notifyids, rvring->notifyid);
 
 	/* reset resource entry info */
@@ -484,6 +486,7 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc,
 
 	rvdev->id = rsc->id;
 	rvdev->rproc = rproc;
+	rvdev->index = rproc->nb_vdev++;
 
 	/* parse the vrings */
 	for (i = 0; i < rsc->num_of_vrings; i++) {
@@ -528,9 +531,6 @@ void rproc_vdev_release(struct kref *ref)
 
 	for (id = 0; id < ARRAY_SIZE(rvdev->vring); id++) {
 		rvring = &rvdev->vring[id];
-		if (!rvring->va)
-			continue;
-
 		rproc_free_vring(rvring);
 	}
 
@@ -1323,6 +1323,9 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 	/* reset max_notifyid */
 	rproc->max_notifyid = -1;
 
+	/* reset handled vdev */
+	rproc->nb_vdev = 0;
+
 	/* handle fw resources which are required to boot rproc */
 	ret = rproc_handle_resources(rproc, rproc_loading_handlers);
 	if (ret) {
diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h
index 7570beb035b5..f6cad243d7ca 100644
--- a/drivers/remoteproc/remoteproc_internal.h
+++ b/drivers/remoteproc/remoteproc_internal.h
@@ -60,6 +60,8 @@ int rproc_elf_load_segments(struct rproc *rproc, const struct firmware *fw);
 int rproc_elf_load_rsc_table(struct rproc *rproc, const struct firmware *fw);
 struct resource_table *rproc_elf_find_loaded_rsc_table(struct rproc *rproc,
 						       const struct firmware *fw);
+struct rproc_mem_entry *
+rproc_find_carveout_by_name(struct rproc *rproc, const char *name, ...);
 
 static inline
 int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index bbecd44df7e8..de21f620b882 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -76,7 +76,9 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
 	struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
 	struct rproc *rproc = vdev_to_rproc(vdev);
 	struct device *dev = &rproc->dev;
+	struct rproc_mem_entry *mem;
 	struct rproc_vring *rvring;
+	struct fw_rsc_vdev *rsc;
 	struct virtqueue *vq;
 	void *addr;
 	int len, size;
@@ -88,8 +90,14 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
 	if (!name)
 		return NULL;
 
+	/* Search allocated memory region by name */
+	mem = rproc_find_carveout_by_name(rproc, "vdev%dvring%d", rvdev->index,
+					  id);
+	if (!mem || !mem->va)
+		return ERR_PTR(-ENOMEM);
+
 	rvring = &rvdev->vring[id];
-	addr = rvring->va;
+	addr = mem->va;
 	len = rvring->len;
 
 	/* zero vring */
@@ -114,6 +122,10 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
 	rvring->vq = vq;
 	vq->priv = rvring;
 
+	/* Update vring in resource table */
+	rsc = (void *)rproc->table_ptr + rvdev->rsc_offset;
+	rsc->vring[id].da = mem->da;
+
 	return vq;
 }
 
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index d4cabe8da507..8bb0cf0416f1 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -454,6 +454,7 @@ struct rproc_dump_segment {
  * @has_iommu: flag to indicate if remote processor is behind an MMU
  * @auto_boot: flag to indicate if remote processor should be auto-started
  * @dump_segments: list of segments in the firmware
+ * @nb_vdev: number of vdev currently handled by rproc
  */
 struct rproc {
 	struct list_head node;
@@ -486,6 +487,7 @@ struct rproc {
 	bool has_iommu;
 	bool auto_boot;
 	struct list_head dump_segments;
+	int nb_vdev;
 };
 
 /**
@@ -513,7 +515,6 @@ struct rproc_subdev {
 /**
  * struct rproc_vring - remoteproc vring state
  * @va:	virtual address
- * @dma: dma address
  * @len: length, in bytes
  * @da: device address
  * @align: vring alignment
@@ -523,7 +524,6 @@ struct rproc_subdev {
  */
 struct rproc_vring {
 	void *va;
-	dma_addr_t dma;
 	int len;
 	u32 da;
 	u32 align;
@@ -542,6 +542,7 @@ struct rproc_vring {
  * @vdev: the virio device
  * @vring: the vrings for this vdev
  * @rsc_offset: offset of the vdev's resource entry
+ * @index: vdev position versus other vdev declared in resource table
  */
 struct rproc_vdev {
 	struct kref refcount;
@@ -554,6 +555,7 @@ struct rproc_vdev {
 	struct virtio_device vdev;
 	struct rproc_vring vring[RVDEV_NUM_VRINGS];
 	u32 rsc_offset;
+	u32 index;
 };
 
 struct rproc *rproc_get_by_phandle(phandle phandle);
-- 
cgit v1.2.3-71-gd317


From 18127429a854e7607b859484880b8e26cee9ddab Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 15 Oct 2018 15:43:06 +0200
Subject: bitops: protect variables in set_mask_bits() macro

Unprotected naming of local variables within the set_mask_bits() can easily
lead to using the wrong scope.

Noticed this when "set_mask_bits(&foo->bar, 0, mask)" behaved as no-op.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 00a1a053ebe5 ("ext4: atomically set inode->i_flags in ext4_set_inode_flags()")
Cc: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/bitops.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 7ddb1349394d..d18ee0e63c32 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -236,17 +236,17 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
 #ifdef __KERNEL__
 
 #ifndef set_mask_bits
-#define set_mask_bits(ptr, _mask, _bits)	\
+#define set_mask_bits(ptr, mask, bits)	\
 ({								\
-	const typeof(*ptr) mask = (_mask), bits = (_bits);	\
-	typeof(*ptr) old, new;					\
+	const typeof(*(ptr)) mask__ = (mask), bits__ = (bits);	\
+	typeof(*(ptr)) old__, new__;				\
 								\
 	do {							\
-		old = READ_ONCE(*ptr);			\
-		new = (old & ~mask) | bits;			\
-	} while (cmpxchg(ptr, old, new) != old);		\
+		old__ = READ_ONCE(*(ptr));			\
+		new__ = (old__ & ~mask__) | bits__;		\
+	} while (cmpxchg(ptr, old__, new__) != old__);		\
 								\
-	new;							\
+	new__;							\
 })
 #endif
 
-- 
cgit v1.2.3-71-gd317


From edfa87281f4fa1b78a21f6db999935a2faa2f6b8 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 15 Oct 2018 15:43:06 +0200
Subject: bitops: protect variables in bit_clear_unless() macro

Unprotected naming of local variables within bit_clear_unless() can easily
lead to using the wrong scope.

Noticed this by code review after having hit this issue in set_mask_bits()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 85ad1d13ee9b ("md: set MD_CHANGE_PENDING in a atomic region")
Cc: Guoqing Jiang <gqjiang@suse.com>
---
 include/linux/bitops.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index d18ee0e63c32..705f7c442691 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -251,18 +251,18 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
 #endif
 
 #ifndef bit_clear_unless
-#define bit_clear_unless(ptr, _clear, _test)	\
+#define bit_clear_unless(ptr, clear, test)	\
 ({								\
-	const typeof(*ptr) clear = (_clear), test = (_test);	\
-	typeof(*ptr) old, new;					\
+	const typeof(*(ptr)) clear__ = (clear), test__ = (test);\
+	typeof(*(ptr)) old__, new__;				\
 								\
 	do {							\
-		old = READ_ONCE(*ptr);			\
-		new = old & ~clear;				\
-	} while (!(old & test) &&				\
-		 cmpxchg(ptr, old, new) != old);		\
+		old__ = READ_ONCE(*(ptr));			\
+		new__ = old__ & ~clear__;			\
+	} while (!(old__ & test__) &&				\
+		 cmpxchg(ptr, old__, new__) != old__);		\
 								\
-	!(old & test);						\
+	!(old__ & test__);					\
 })
 #endif
 
-- 
cgit v1.2.3-71-gd317


From 616e45df7c4aa71279c07cc803a1d51f43f89f37 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Fri, 31 Aug 2018 12:45:54 +0800
Subject: clk: add new APIs to operate on all available clocks

This patch introduces of_clk_bulk_get_all and clk_bulk_x_all APIs
to users who just want to handle all available clocks from device tree
without need to know the detailed clock information likes clock numbers
and names. This is useful in writing some generic drivers to handle clock
part.

Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Tested-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/clk-bulk.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/clk.h    | 42 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 92 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-bulk.c b/drivers/clk/clk-bulk.c
index 4460ac52199f..6a7118d4250a 100644
--- a/drivers/clk/clk-bulk.c
+++ b/drivers/clk/clk-bulk.c
@@ -17,9 +17,11 @@
  */
 
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/device.h>
 #include <linux/export.h>
 #include <linux/of.h>
+#include <linux/slab.h>
 
 static int __must_check of_clk_bulk_get(struct device_node *np, int num_clks,
 					struct clk_bulk_data *clks)
@@ -49,6 +51,32 @@ err:
 	return ret;
 }
 
+static int __must_check of_clk_bulk_get_all(struct device_node *np,
+					    struct clk_bulk_data **clks)
+{
+	struct clk_bulk_data *clk_bulk;
+	int num_clks;
+	int ret;
+
+	num_clks = of_clk_get_parent_count(np);
+	if (!num_clks)
+		return 0;
+
+	clk_bulk = kmalloc_array(num_clks, sizeof(*clk_bulk), GFP_KERNEL);
+	if (!clk_bulk)
+		return -ENOMEM;
+
+	ret = of_clk_bulk_get(np, num_clks, clk_bulk);
+	if (ret) {
+		kfree(clk_bulk);
+		return ret;
+	}
+
+	*clks = clk_bulk;
+
+	return num_clks;
+}
+
 void clk_bulk_put(int num_clks, struct clk_bulk_data *clks)
 {
 	while (--num_clks >= 0) {
@@ -88,6 +116,29 @@ err:
 }
 EXPORT_SYMBOL(clk_bulk_get);
 
+void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks)
+{
+	if (IS_ERR_OR_NULL(clks))
+		return;
+
+	clk_bulk_put(num_clks, clks);
+
+	kfree(clks);
+}
+EXPORT_SYMBOL(clk_bulk_put_all);
+
+int __must_check clk_bulk_get_all(struct device *dev,
+				  struct clk_bulk_data **clks)
+{
+	struct device_node *np = dev_of_node(dev);
+
+	if (!np)
+		return 0;
+
+	return of_clk_bulk_get_all(np, clks);
+}
+EXPORT_SYMBOL(clk_bulk_get_all);
+
 #ifdef CONFIG_HAVE_CLK_PREPARE
 
 /**
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 4f750c481b82..e9433c76757b 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -312,7 +312,26 @@ struct clk *clk_get(struct device *dev, const char *id);
  */
 int __must_check clk_bulk_get(struct device *dev, int num_clks,
 			      struct clk_bulk_data *clks);
-
+/**
+ * clk_bulk_get_all - lookup and obtain all available references to clock
+ *		      producer.
+ * @dev: device for clock "consumer"
+ * @clks: pointer to the clk_bulk_data table of consumer
+ *
+ * This helper function allows drivers to get all clk consumers in one
+ * operation. If any of the clk cannot be acquired then any clks
+ * that were obtained will be freed before returning to the caller.
+ *
+ * Returns a positive value for the number of clocks obtained while the
+ * clock references are stored in the clk_bulk_data table in @clks field.
+ * Returns 0 if there're none and a negative value if something failed.
+ *
+ * Drivers must assume that the clock source is not enabled.
+ *
+ * clk_bulk_get should not be called from within interrupt context.
+ */
+int __must_check clk_bulk_get_all(struct device *dev,
+				  struct clk_bulk_data **clks);
 /**
  * devm_clk_bulk_get - managed get multiple clk consumers
  * @dev: device for clock "consumer"
@@ -487,6 +506,19 @@ void clk_put(struct clk *clk);
  */
 void clk_bulk_put(int num_clks, struct clk_bulk_data *clks);
 
+/**
+ * clk_bulk_put_all - "free" all the clock source
+ * @num_clks: the number of clk_bulk_data
+ * @clks: the clk_bulk_data table of consumer
+ *
+ * Note: drivers must ensure that all clk_bulk_enable calls made on this
+ * clock source are balanced by clk_bulk_disable calls prior to calling
+ * this function.
+ *
+ * clk_bulk_put_all should not be called from within interrupt context.
+ */
+void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks);
+
 /**
  * devm_clk_put	- "free" a managed clock source
  * @dev: device used to acquire the clock
@@ -642,6 +674,12 @@ static inline int __must_check clk_bulk_get(struct device *dev, int num_clks,
 	return 0;
 }
 
+static inline int __must_check clk_bulk_get_all(struct device *dev,
+					 struct clk_bulk_data **clks)
+{
+	return 0;
+}
+
 static inline struct clk *devm_clk_get(struct device *dev, const char *id)
 {
 	return NULL;
@@ -663,6 +701,8 @@ static inline void clk_put(struct clk *clk) {}
 
 static inline void clk_bulk_put(int num_clks, struct clk_bulk_data *clks) {}
 
+static inline void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks) {}
+
 static inline void devm_clk_put(struct device *dev, struct clk *clk) {}
 
 
-- 
cgit v1.2.3-71-gd317


From f08c2e2865f6f9b172f37d7bbf24716f9ebad553 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Fri, 31 Aug 2018 12:45:55 +0800
Subject: clk: add managed version of clk_bulk_get_all

This patch introduces the managed version of clk_bulk_get_all.

Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Tested-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/clk-devres.c | 24 ++++++++++++++++++++++++
 include/linux/clk.h      | 23 +++++++++++++++++++++++
 2 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
index d854e26a8ddb..12c87457eca1 100644
--- a/drivers/clk/clk-devres.c
+++ b/drivers/clk/clk-devres.c
@@ -70,6 +70,30 @@ int __must_check devm_clk_bulk_get(struct device *dev, int num_clks,
 }
 EXPORT_SYMBOL_GPL(devm_clk_bulk_get);
 
+int __must_check devm_clk_bulk_get_all(struct device *dev,
+				       struct clk_bulk_data **clks)
+{
+	struct clk_bulk_devres *devres;
+	int ret;
+
+	devres = devres_alloc(devm_clk_bulk_release,
+			      sizeof(*devres), GFP_KERNEL);
+	if (!devres)
+		return -ENOMEM;
+
+	ret = clk_bulk_get_all(dev, &devres->clks);
+	if (ret > 0) {
+		*clks = devres->clks;
+		devres->num_clks = ret;
+		devres_add(dev, devres);
+	} else {
+		devres_free(devres);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(devm_clk_bulk_get_all);
+
 static int devm_clk_match(struct device *dev, void *res, void *data)
 {
 	struct clk **c = res;
diff --git a/include/linux/clk.h b/include/linux/clk.h
index e9433c76757b..c705271dc1f2 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -346,6 +346,22 @@ int __must_check clk_bulk_get_all(struct device *dev,
  */
 int __must_check devm_clk_bulk_get(struct device *dev, int num_clks,
 				   struct clk_bulk_data *clks);
+/**
+ * devm_clk_bulk_get_all - managed get multiple clk consumers
+ * @dev: device for clock "consumer"
+ * @clks: pointer to the clk_bulk_data table of consumer
+ *
+ * Returns a positive value for the number of clocks obtained while the
+ * clock references are stored in the clk_bulk_data table in @clks field.
+ * Returns 0 if there're none and a negative value if something failed.
+ *
+ * This helper function allows drivers to get several clk
+ * consumers in one operation with management, the clks will
+ * automatically be freed when the device is unbound.
+ */
+
+int __must_check devm_clk_bulk_get_all(struct device *dev,
+				       struct clk_bulk_data **clks);
 
 /**
  * devm_clk_get - lookup and obtain a managed reference to a clock producer.
@@ -691,6 +707,13 @@ static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clk
 	return 0;
 }
 
+static inline int __must_check devm_clk_bulk_get_all(struct device *dev,
+						     struct clk_bulk_data **clks)
+{
+
+	return 0;
+}
+
 static inline struct clk *devm_get_clk_from_child(struct device *dev,
 				struct device_node *np, const char *con_id)
 {
-- 
cgit v1.2.3-71-gd317


From 99c228a994ec8b1580c43631866fd2c5440f5bfd Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 18 Oct 2018 14:22:55 +0300
Subject: fs: group frequently accessed fields of struct super_block together

Kernel test robot reported [1] a 6% performance regression in a
concurrent unlink(2) workload on commit 60f7ed8c7c4d ("fsnotify: send
path type events to group with super block marks").

The performance test was run with no fsnotify marks at all on the
data set, so the only extra instructions added by the offending
commit are tests of the super_block fields s_fsnotify_{marks,mask}
and these tests happen on almost every single inode access.

When adding those fields to the super_block struct, we did not give much
thought of placing them on a hot cache lines (we just placed them at the
end of the struct).

Re-organize struct super_block to try and keep some frequently accessed
fields on the same cache line.

Move the frequently accessed fields s_fsnotify_{marks,mask} near the
frequently accessed fields s_fs_info,s_time_gran, while filling a 64bit
alignment hole after s_time_gran.

Move the seldom accessed fields s_id,s_uuid,s_max_links,s_mode near the
seldom accessed fields s_vfs_rename_mutex,s_subtype.

Rong Chen confirmed that this patch solved the reported problem.

[1] https://lkml.org/lkml/2018/9/30/206

Reported-by: kernel test robot <rong.a.chen@intel.com>
Tested-by: kernel test robot <rong.a.chen@intel.com>
Fixes: 1e6cb72399 ("fsnotify: add super block object type")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fs.h | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2c14801d0aa3..6da94deb957f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1393,17 +1393,26 @@ struct super_block {
 
 	struct sb_writers	s_writers;
 
+	/*
+	 * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
+	 * s_fsnotify_marks together for cache efficiency. They are frequently
+	 * accessed and rarely modified.
+	 */
+	void			*s_fs_info;	/* Filesystem private info */
+
+	/* Granularity of c/m/atime in ns (cannot be worse than a second) */
+	u32			s_time_gran;
+#ifdef CONFIG_FSNOTIFY
+	__u32			s_fsnotify_mask;
+	struct fsnotify_mark_connector __rcu	*s_fsnotify_marks;
+#endif
+
 	char			s_id[32];	/* Informational name */
 	uuid_t			s_uuid;		/* UUID */
 
-	void 			*s_fs_info;	/* Filesystem private info */
 	unsigned int		s_max_links;
 	fmode_t			s_mode;
 
-	/* Granularity of c/m/atime in ns.
-	   Cannot be worse than a second */
-	u32		   s_time_gran;
-
 	/*
 	 * The next field is for VFS *only*. No filesystems have any business
 	 * even looking at it. You had been warned.
@@ -1464,11 +1473,6 @@ struct super_block {
 
 	spinlock_t		s_inode_wblist_lock;
 	struct list_head	s_inodes_wb;	/* writeback inodes */
-
-#ifdef CONFIG_FSNOTIFY
-	__u32			s_fsnotify_mask;
-	struct fsnotify_mark_connector __rcu	*s_fsnotify_marks;
-#endif
 } __randomize_layout;
 
 /* Helper functions so that in most cases filesystems will
-- 
cgit v1.2.3-71-gd317


From 3952105df4723abbd36b57e88c8dad42cf6c8b59 Mon Sep 17 00:00:00 2001
From: Sibi Sankar <sibis@codeaurora.org>
Date: Wed, 17 Oct 2018 19:25:23 +0530
Subject: remoteproc: Introduce custom dump function for each remoteproc
 segment

Introduce custom dump function and private data per remoteproc dump
segment. The dump function is responsible for filling the device memory
segment associated with coredump

Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 18 +++++++++++-------
 include/linux/remoteproc.h           |  6 ++++++
 2 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index f77a42f6a8aa..6bed40d01dbf 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1508,14 +1508,18 @@ static void rproc_coredump(struct rproc *rproc)
 		phdr->p_flags = PF_R | PF_W | PF_X;
 		phdr->p_align = 0;
 
-		ptr = rproc_da_to_va(rproc, segment->da, segment->size);
-		if (!ptr) {
-			dev_err(&rproc->dev,
-				"invalid coredump segment (%pad, %zu)\n",
-				&segment->da, segment->size);
-			memset(data + offset, 0xff, segment->size);
+		if (segment->dump) {
+			segment->dump(rproc, segment, data + offset);
 		} else {
-			memcpy(data + offset, ptr, segment->size);
+			ptr = rproc_da_to_va(rproc, segment->da, segment->size);
+			if (!ptr) {
+				dev_err(&rproc->dev,
+					"invalid coredump segment (%pad, %zu)\n",
+					&segment->da, segment->size);
+				memset(data + offset, 0xff, segment->size);
+			} else {
+				memcpy(data + offset, ptr, segment->size);
+			}
 		}
 
 		offset += phdr->p_filesz;
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 8bb0cf0416f1..2d036adab7ff 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -412,6 +412,9 @@ enum rproc_crash_type {
  * @node:	list node related to the rproc segment list
  * @da:		device address of the segment
  * @size:	size of the segment
+ * @priv:	private data associated with the dump_segment
+ * @dump:	custom dump function to fill device memory segment associated
+ *		with coredump
  */
 struct rproc_dump_segment {
 	struct list_head node;
@@ -419,6 +422,9 @@ struct rproc_dump_segment {
 	dma_addr_t da;
 	size_t size;
 
+	void *priv;
+	void (*dump)(struct rproc *rproc, struct rproc_dump_segment *segment,
+		     void *dest);
 	loff_t offset;
 };
 
-- 
cgit v1.2.3-71-gd317


From ab8f873bb90da7bbe40e2f41c92a4971c4f0dc76 Mon Sep 17 00:00:00 2001
From: Sibi Sankar <sibis@codeaurora.org>
Date: Wed, 17 Oct 2018 19:25:24 +0530
Subject: remoteproc: Add mechanism for custom dump function assignment

This patch adds a mechanism for assigning each rproc dump segment with
a custom dump function and private data. The dump function is to be
called for each rproc segment during coredump if assigned.

Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
[bjorn: reordred arguments to rproc_coredump_add_custom_segment()]
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 38 ++++++++++++++++++++++++++++++++++++
 include/linux/remoteproc.h           |  6 ++++++
 2 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 6bed40d01dbf..54ec38fc5dca 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1446,6 +1446,44 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
 }
 EXPORT_SYMBOL(rproc_coredump_add_segment);
 
+/**
+ * rproc_coredump_add_custom_segment() - add custom coredump segment
+ * @rproc:	handle of a remote processor
+ * @da:		device address
+ * @size:	size of segment
+ * @dumpfn:	custom dump function called for each segment during coredump
+ * @priv:	private data
+ *
+ * Add device memory to the list of segments to be included in the coredump
+ * and associate the segment with the given custom dump function and private
+ * data.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int rproc_coredump_add_custom_segment(struct rproc *rproc,
+				      dma_addr_t da, size_t size,
+				      void (*dumpfn)(struct rproc *rproc,
+						     struct rproc_dump_segment *segment,
+						     void *dest),
+				      void *priv)
+{
+	struct rproc_dump_segment *segment;
+
+	segment = kzalloc(sizeof(*segment), GFP_KERNEL);
+	if (!segment)
+		return -ENOMEM;
+
+	segment->da = da;
+	segment->size = size;
+	segment->priv = priv;
+	segment->dump = dumpfn;
+
+	list_add_tail(&segment->node, &rproc->dump_segments);
+
+	return 0;
+}
+EXPORT_SYMBOL(rproc_coredump_add_custom_segment);
+
 /**
  * rproc_coredump() - perform coredump
  * @rproc:	rproc handle
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 2d036adab7ff..507a2b524208 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -592,6 +592,12 @@ int rproc_boot(struct rproc *rproc);
 void rproc_shutdown(struct rproc *rproc);
 void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type);
 int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size);
+int rproc_coredump_add_custom_segment(struct rproc *rproc,
+				      dma_addr_t da, size_t size,
+				      void (*dumpfn)(struct rproc *rproc,
+						     struct rproc_dump_segment *segment,
+						     void *dest),
+				      void *priv);
 
 static inline struct rproc_vdev *vdev_to_rvdev(struct virtio_device *vdev)
 {
-- 
cgit v1.2.3-71-gd317


From f8d5d0cc145cc21bfc56ef807dc28102aebbf228 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 7 Nov 2017 16:30:10 -0500
Subject: xarray: Add definition of struct xarray

This is a direct replacement for struct radix_tree_root.  Some of the
struct members have changed name; convert those, and use a #define so
that radix_tree users continue to work without change.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
 include/linux/radix-tree.h               | 28 ++++--------
 include/linux/xarray.h                   | 70 +++++++++++++++++++++++++++++
 lib/Makefile                             |  2 +-
 lib/idr.c                                |  4 +-
 lib/radix-tree.c                         | 75 ++++++++++++++++----------------
 lib/xarray.c                             | 44 +++++++++++++++++++
 tools/testing/radix-tree/Makefile        |  5 ++-
 tools/testing/radix-tree/linux/bug.h     |  1 +
 tools/testing/radix-tree/linux/kconfig.h |  1 +
 tools/testing/radix-tree/multiorder.c    |  6 +--
 tools/testing/radix-tree/test.c          |  6 +--
 tools/testing/radix-tree/xarray.c        |  7 +++
 12 files changed, 181 insertions(+), 68 deletions(-)
 create mode 100644 lib/xarray.c
 create mode 100644 tools/testing/radix-tree/linux/kconfig.h
 create mode 100644 tools/testing/radix-tree/xarray.c

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 60f3d8eb2cb7..0b080bd88ab7 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -30,6 +30,9 @@
 #include <linux/types.h>
 #include <linux/xarray.h>
 
+/* Keep unconverted code working */
+#define radix_tree_root		xarray
+
 /*
  * The bottom two bits of the slot determine how the remaining bits in the
  * slot are interpreted:
@@ -92,36 +95,21 @@ struct radix_tree_node {
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
-/* The IDR tag is stored in the low bits of the GFP flags */
+/* The IDR tag is stored in the low bits of xa_flags */
 #define ROOT_IS_IDR	((__force gfp_t)4)
-/* The top bits of gfp_mask are used to store the root tags */
+/* The top bits of xa_flags are used to store the root tags */
 #define ROOT_TAG_SHIFT	(__GFP_BITS_SHIFT)
 
-struct radix_tree_root {
-	spinlock_t		xa_lock;
-	gfp_t			gfp_mask;
-	struct radix_tree_node	__rcu *rnode;
-};
-
-#define RADIX_TREE_INIT(name, mask)	{				\
-	.xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock),			\
-	.gfp_mask = (mask),						\
-	.rnode = NULL,							\
-}
+#define RADIX_TREE_INIT(name, mask)	XARRAY_INIT(name, mask)
 
 #define RADIX_TREE(name, mask) \
 	struct radix_tree_root name = RADIX_TREE_INIT(name, mask)
 
-#define INIT_RADIX_TREE(root, mask)					\
-do {									\
-	spin_lock_init(&(root)->xa_lock);				\
-	(root)->gfp_mask = (mask);					\
-	(root)->rnode = NULL;						\
-} while (0)
+#define INIT_RADIX_TREE(root, mask) xa_init_flags(root, mask)
 
 static inline bool radix_tree_empty(const struct radix_tree_root *root)
 {
-	return root->rnode == NULL;
+	return root->xa_head == NULL;
 }
 
 /**
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 4d1cd7a083e8..9122cf8bf52a 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -10,6 +10,8 @@
  */
 
 #include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/kconfig.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
@@ -153,6 +155,74 @@ static inline bool xa_is_internal(const void *entry)
 	return ((unsigned long)entry & 3) == 2;
 }
 
+/**
+ * struct xarray - The anchor of the XArray.
+ * @xa_lock: Lock that protects the contents of the XArray.
+ *
+ * To use the xarray, define it statically or embed it in your data structure.
+ * It is a very small data structure, so it does not usually make sense to
+ * allocate it separately and keep a pointer to it in your data structure.
+ *
+ * You may use the xa_lock to protect your own data structures as well.
+ */
+/*
+ * If all of the entries in the array are NULL, @xa_head is a NULL pointer.
+ * If the only non-NULL entry in the array is at index 0, @xa_head is that
+ * entry.  If any other entry in the array is non-NULL, @xa_head points
+ * to an @xa_node.
+ */
+struct xarray {
+	spinlock_t	xa_lock;
+/* private: The rest of the data structure is not to be used directly. */
+	gfp_t		xa_flags;
+	void __rcu *	xa_head;
+};
+
+#define XARRAY_INIT(name, flags) {				\
+	.xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock),		\
+	.xa_flags = flags,					\
+	.xa_head = NULL,					\
+}
+
+/**
+ * DEFINE_XARRAY_FLAGS() - Define an XArray with custom flags.
+ * @name: A string that names your XArray.
+ * @flags: XA_FLAG values.
+ *
+ * This is intended for file scope definitions of XArrays.  It declares
+ * and initialises an empty XArray with the chosen name and flags.  It is
+ * equivalent to calling xa_init_flags() on the array, but it does the
+ * initialisation at compiletime instead of runtime.
+ */
+#define DEFINE_XARRAY_FLAGS(name, flags)				\
+	struct xarray name = XARRAY_INIT(name, flags)
+
+/**
+ * DEFINE_XARRAY() - Define an XArray.
+ * @name: A string that names your XArray.
+ *
+ * This is intended for file scope definitions of XArrays.  It declares
+ * and initialises an empty XArray with the chosen name.  It is equivalent
+ * to calling xa_init() on the array, but it does the initialisation at
+ * compiletime instead of runtime.
+ */
+#define DEFINE_XARRAY(name) DEFINE_XARRAY_FLAGS(name, 0)
+
+void xa_init_flags(struct xarray *, gfp_t flags);
+
+/**
+ * xa_init() - Initialise an empty XArray.
+ * @xa: XArray.
+ *
+ * An empty XArray is full of NULL entries.
+ *
+ * Context: Any context.
+ */
+static inline void xa_init(struct xarray *xa)
+{
+	xa_init_flags(xa, 0);
+}
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
diff --git a/lib/Makefile b/lib/Makefile
index ca3f7ebb900d..057385f1f145 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,7 +18,7 @@ KCOV_INSTRUMENT_debugobjects.o := n
 KCOV_INSTRUMENT_dynamic_debug.o := n
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
-	 rbtree.o radix-tree.o timerqueue.o\
+	 rbtree.o radix-tree.o timerqueue.o xarray.o \
 	 idr.o int_sqrt.o extable.o \
 	 sha1.o chacha20.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
diff --git a/lib/idr.c b/lib/idr.c
index 88419fbc5737..9a366b5be2c2 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -39,8 +39,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
 	unsigned int base = idr->idr_base;
 	unsigned int id = *nextid;
 
-	if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
-		idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
+	if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR)))
+		idr->idr_rt.xa_flags |= IDR_RT_MARKER;
 
 	id = (id < base) ? 0 : id - base;
 	radix_tree_iter_init(&iter, id);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 59b28111eabc..299d4bdba109 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -124,7 +124,7 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
 
 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
 {
-	return root->gfp_mask & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
+	return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
 }
 
 static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
@@ -147,32 +147,32 @@ static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
 
 static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
 {
-	root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
+	root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
 {
-	root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
+	root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline void root_tag_clear_all(struct radix_tree_root *root)
 {
-	root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1;
+	root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
 }
 
 static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
 {
-	return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT));
+	return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline unsigned root_tags_get(const struct radix_tree_root *root)
 {
-	return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT;
+	return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
 }
 
 static inline bool is_idr(const struct radix_tree_root *root)
 {
-	return !!(root->gfp_mask & ROOT_IS_IDR);
+	return !!(root->xa_flags & ROOT_IS_IDR);
 }
 
 /*
@@ -291,12 +291,12 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
 /* For debug */
 static void radix_tree_dump(struct radix_tree_root *root)
 {
-	pr_debug("radix root: %p rnode %p tags %x\n",
-			root, root->rnode,
-			root->gfp_mask >> ROOT_TAG_SHIFT);
-	if (!radix_tree_is_internal_node(root->rnode))
+	pr_debug("radix root: %p xa_head %p tags %x\n",
+			root, root->xa_head,
+			root->xa_flags >> ROOT_TAG_SHIFT);
+	if (!radix_tree_is_internal_node(root->xa_head))
 		return;
-	dump_node(entry_to_node(root->rnode), 0);
+	dump_node(entry_to_node(root->xa_head), 0);
 }
 
 static void dump_ida_node(void *entry, unsigned long index)
@@ -340,9 +340,9 @@ static void dump_ida_node(void *entry, unsigned long index)
 static void ida_dump(struct ida *ida)
 {
 	struct radix_tree_root *root = &ida->ida_rt;
-	pr_debug("ida: %p node %p free %d\n", ida, root->rnode,
-				root->gfp_mask >> ROOT_TAG_SHIFT);
-	dump_ida_node(root->rnode, 0);
+	pr_debug("ida: %p node %p free %d\n", ida, root->xa_head,
+				root->xa_flags >> ROOT_TAG_SHIFT);
+	dump_ida_node(root->xa_head, 0);
 }
 #endif
 
@@ -576,7 +576,7 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
 static unsigned radix_tree_load_root(const struct radix_tree_root *root,
 		struct radix_tree_node **nodep, unsigned long *maxindex)
 {
-	struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
+	struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
 
 	*nodep = node;
 
@@ -605,7 +605,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 	while (index > shift_maxindex(maxshift))
 		maxshift += RADIX_TREE_MAP_SHIFT;
 
-	entry = rcu_dereference_raw(root->rnode);
+	entry = rcu_dereference_raw(root->xa_head);
 	if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
 		goto out;
 
@@ -633,7 +633,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 		if (radix_tree_is_internal_node(entry)) {
 			entry_to_node(entry)->parent = node;
 		} else if (xa_is_value(entry)) {
-			/* Moving an exceptional root->rnode to a node */
+			/* Moving an exceptional root->xa_head to a node */
 			node->exceptional = 1;
 		}
 		/*
@@ -642,7 +642,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 		 */
 		node->slots[0] = (void __rcu *)entry;
 		entry = node_to_entry(node);
-		rcu_assign_pointer(root->rnode, entry);
+		rcu_assign_pointer(root->xa_head, entry);
 		shift += RADIX_TREE_MAP_SHIFT;
 	} while (shift <= maxshift);
 out:
@@ -659,7 +659,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 	bool shrunk = false;
 
 	for (;;) {
-		struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
+		struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
 		struct radix_tree_node *child;
 
 		if (!radix_tree_is_internal_node(node))
@@ -695,9 +695,9 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 		 * moving the node from one part of the tree to another: if it
 		 * was safe to dereference the old pointer to it
 		 * (node->slots[0]), it will be safe to dereference the new
-		 * one (root->rnode) as far as dependent read barriers go.
+		 * one (root->xa_head) as far as dependent read barriers go.
 		 */
-		root->rnode = (void __rcu *)child;
+		root->xa_head = (void __rcu *)child;
 		if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
 			root_tag_clear(root, IDR_FREE);
 
@@ -745,9 +745,8 @@ static bool delete_node(struct radix_tree_root *root,
 
 		if (node->count) {
 			if (node_to_entry(node) ==
-					rcu_dereference_raw(root->rnode))
-				deleted |= radix_tree_shrink(root,
-								update_node);
+					rcu_dereference_raw(root->xa_head))
+				deleted |= radix_tree_shrink(root, update_node);
 			return deleted;
 		}
 
@@ -762,7 +761,7 @@ static bool delete_node(struct radix_tree_root *root,
 			 */
 			if (!is_idr(root))
 				root_tag_clear_all(root);
-			root->rnode = NULL;
+			root->xa_head = NULL;
 		}
 
 		WARN_ON_ONCE(!list_empty(&node->private_list));
@@ -787,7 +786,7 @@ static bool delete_node(struct radix_tree_root *root,
  *	at position @index in the radix tree @root.
  *
  *	Until there is more than one item in the tree, no nodes are
- *	allocated and @root->rnode is used as a direct slot instead of
+ *	allocated and @root->xa_head is used as a direct slot instead of
  *	pointing to a node, in which case *@nodep will be NULL.
  *
  *	Returns -ENOMEM, or 0 for success.
@@ -797,7 +796,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 			void __rcu ***slotp)
 {
 	struct radix_tree_node *node = NULL, *child;
-	void __rcu **slot = (void __rcu **)&root->rnode;
+	void __rcu **slot = (void __rcu **)&root->xa_head;
 	unsigned long maxindex;
 	unsigned int shift, offset = 0;
 	unsigned long max = index | ((1UL << order) - 1);
@@ -813,7 +812,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 		if (error < 0)
 			return error;
 		shift = error;
-		child = rcu_dereference_raw(root->rnode);
+		child = rcu_dereference_raw(root->xa_head);
 	}
 
 	while (shift > order) {
@@ -1004,7 +1003,7 @@ EXPORT_SYMBOL(__radix_tree_insert);
  *	tree @root.
  *
  *	Until there is more than one item in the tree, no nodes are
- *	allocated and @root->rnode is used as a direct slot instead of
+ *	allocated and @root->xa_head is used as a direct slot instead of
  *	pointing to a node, in which case *@nodep will be NULL.
  */
 void *__radix_tree_lookup(const struct radix_tree_root *root,
@@ -1017,7 +1016,7 @@ void *__radix_tree_lookup(const struct radix_tree_root *root,
 
  restart:
 	parent = NULL;
-	slot = (void __rcu **)&root->rnode;
+	slot = (void __rcu **)&root->xa_head;
 	radix_tree_load_root(root, &node, &maxindex);
 	if (index > maxindex)
 		return NULL;
@@ -1168,9 +1167,9 @@ void __radix_tree_replace(struct radix_tree_root *root,
 	/*
 	 * This function supports replacing exceptional entries and
 	 * deleting entries, but that needs accounting against the
-	 * node unless the slot is root->rnode.
+	 * node unless the slot is root->xa_head.
 	 */
-	WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->rnode) &&
+	WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
 			(count || exceptional));
 	replace_slot(slot, item, node, count, exceptional);
 
@@ -1722,7 +1721,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
 		iter->tags = 1;
 		iter->node = NULL;
 		__set_iter_shift(iter, 0);
-		return (void __rcu **)&root->rnode;
+		return (void __rcu **)&root->xa_head;
 	}
 
 	do {
@@ -2109,7 +2108,7 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
 			      unsigned long max)
 {
 	struct radix_tree_node *node = NULL, *child;
-	void __rcu **slot = (void __rcu **)&root->rnode;
+	void __rcu **slot = (void __rcu **)&root->xa_head;
 	unsigned long maxindex, start = iter->next_index;
 	unsigned int shift, offset = 0;
 
@@ -2125,7 +2124,7 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
 		if (error < 0)
 			return ERR_PTR(error);
 		shift = error;
-		child = rcu_dereference_raw(root->rnode);
+		child = rcu_dereference_raw(root->xa_head);
 	}
 	if (start == 0 && shift == 0)
 		shift = RADIX_TREE_MAP_SHIFT;
@@ -2190,10 +2189,10 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
  */
 void idr_destroy(struct idr *idr)
 {
-	struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode);
+	struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
 	if (radix_tree_is_internal_node(node))
 		radix_tree_free_nodes(node);
-	idr->idr_rt.rnode = NULL;
+	idr->idr_rt.xa_head = NULL;
 	root_tag_set(&idr->idr_rt, IDR_FREE);
 }
 EXPORT_SYMBOL(idr_destroy);
diff --git a/lib/xarray.c b/lib/xarray.c
new file mode 100644
index 000000000000..862f4c64c754
--- /dev/null
+++ b/lib/xarray.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * XArray implementation
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox <willy@infradead.org>
+ */
+
+#include <linux/export.h>
+#include <linux/xarray.h>
+
+/*
+ * Coding conventions in this file:
+ *
+ * @xa is used to refer to the entire xarray.
+ * @xas is the 'xarray operation state'.  It may be either a pointer to
+ * an xa_state, or an xa_state stored on the stack.  This is an unfortunate
+ * ambiguity.
+ * @index is the index of the entry being operated on
+ * @mark is an xa_mark_t; a small number indicating one of the mark bits.
+ * @node refers to an xa_node; usually the primary one being operated on by
+ * this function.
+ * @offset is the index into the slots array inside an xa_node.
+ * @parent refers to the @xa_node closer to the head than @node.
+ * @entry refers to something stored in a slot in the xarray
+ */
+
+/**
+ * xa_init_flags() - Initialise an empty XArray with flags.
+ * @xa: XArray.
+ * @flags: XA_FLAG values.
+ *
+ * If you need to initialise an XArray with special flags (eg you need
+ * to take the lock from interrupt context), use this function instead
+ * of xa_init().
+ *
+ * Context: Any context.
+ */
+void xa_init_flags(struct xarray *xa, gfp_t flags)
+{
+	spin_lock_init(&xa->xa_lock);
+	xa->xa_flags = flags;
+	xa->xa_head = NULL;
+}
+EXPORT_SYMBOL(xa_init_flags);
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 12adcf9ffe86..c0cf1c79efd5 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -5,7 +5,7 @@ CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
 LDFLAGS += -fsanitize=address -fsanitize=undefined
 LDLIBS+= -lpthread -lurcu
 TARGETS = main idr-test multiorder
-CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
+CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
 	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
 
@@ -35,6 +35,7 @@ vpath %.c ../../lib
 $(OFILES): Makefile *.h */*.h generated/map-shift.h \
 	../../include/linux/*.h \
 	../../include/asm/*.h \
+	../../../include/linux/xarray.h \
 	../../../include/linux/radix-tree.h \
 	../../../include/linux/idr.h
 
@@ -44,6 +45,8 @@ radix-tree.c: ../../../lib/radix-tree.c
 idr.c: ../../../lib/idr.c
 	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
 
+xarray.o: ../../../lib/xarray.c
+
 generated/map-shift.h:
 	@if ! grep -qws $(SHIFT) generated/map-shift.h; then		\
 		echo "#define XA_CHUNK_SHIFT $(SHIFT)" >		\
diff --git a/tools/testing/radix-tree/linux/bug.h b/tools/testing/radix-tree/linux/bug.h
index 23b8ed52f8c8..03dc8a57eb99 100644
--- a/tools/testing/radix-tree/linux/bug.h
+++ b/tools/testing/radix-tree/linux/bug.h
@@ -1 +1,2 @@
+#include <stdio.h>
 #include "asm/bug.h"
diff --git a/tools/testing/radix-tree/linux/kconfig.h b/tools/testing/radix-tree/linux/kconfig.h
new file mode 100644
index 000000000000..6c8675859913
--- /dev/null
+++ b/tools/testing/radix-tree/linux/kconfig.h
@@ -0,0 +1 @@
+#include "../../../../include/linux/kconfig.h"
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 2b4f4dba1882..080aea450430 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -192,13 +192,13 @@ static void multiorder_shrink(unsigned long index, int order)
 
 	assert(item_insert_order(&tree, 0, order) == 0);
 
-	node = tree.rnode;
+	node = tree.xa_head;
 
 	assert(item_insert(&tree, index) == 0);
-	assert(node != tree.rnode);
+	assert(node != tree.xa_head);
 
 	assert(item_delete(&tree, index) != 0);
-	assert(node == tree.rnode);
+	assert(node == tree.xa_head);
 
 	for (i = 0; i < max; i++) {
 		struct item *item = item_lookup(&tree, i);
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index 62de66c314b7..70ddf964d51c 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -281,7 +281,7 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag,
 
 void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag)
 {
-	struct radix_tree_node *node = root->rnode;
+	struct radix_tree_node *node = root->xa_head;
 	if (!radix_tree_is_internal_node(node))
 		return;
 	verify_node(node, tag, !!root_tag_get(root, tag));
@@ -311,13 +311,13 @@ void item_kill_tree(struct radix_tree_root *root)
 		}
 	}
 	assert(radix_tree_gang_lookup(root, (void **)items, 0, 32) == 0);
-	assert(root->rnode == NULL);
+	assert(root->xa_head == NULL);
 }
 
 void tree_verify_min_height(struct radix_tree_root *root, int maxindex)
 {
 	unsigned shift;
-	struct radix_tree_node *node = root->rnode;
+	struct radix_tree_node *node = root->xa_head;
 	if (!radix_tree_is_internal_node(node)) {
 		assert(maxindex == 0);
 		return;
diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c
new file mode 100644
index 000000000000..9bbd667172a7
--- /dev/null
+++ b/tools/testing/radix-tree/xarray.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * xarray.c: Userspace shim for XArray test-suite
+ * Copyright (c) 2018 Matthew Wilcox <willy@infradead.org>
+ */
+
+#include "../../../lib/xarray.c"
-- 
cgit v1.2.3-71-gd317


From 01959dfe771c6893365482ec78dc1d9cbbbe6de8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Thu, 9 Nov 2017 09:23:56 -0500
Subject: xarray: Define struct xa_node

This is a direct replacement for struct radix_tree_node.  A couple of
struct members have changed name, so convert those.  Use a #define so
that radix tree users continue to work without change.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
 include/linux/radix-tree.h            | 29 +++------------------
 include/linux/xarray.h                | 27 ++++++++++++++++++++
 lib/radix-tree.c                      | 48 +++++++++++++++++------------------
 mm/workingset.c                       | 16 ++++++------
 tools/testing/radix-tree/multiorder.c | 30 +++++++++++-----------
 5 files changed, 77 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 0b080bd88ab7..15388b7e38b9 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -32,6 +32,7 @@
 
 /* Keep unconverted code working */
 #define radix_tree_root		xarray
+#define radix_tree_node		xa_node
 
 /*
  * The bottom two bits of the slot determine how the remaining bits in the
@@ -60,41 +61,17 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 
 /*** radix-tree API starts here ***/
 
-#define RADIX_TREE_MAX_TAGS 3
-
 #define RADIX_TREE_MAP_SHIFT	XA_CHUNK_SHIFT
 #define RADIX_TREE_MAP_SIZE	(1UL << RADIX_TREE_MAP_SHIFT)
 #define RADIX_TREE_MAP_MASK	(RADIX_TREE_MAP_SIZE-1)
 
-#define RADIX_TREE_TAG_LONGS	\
-	((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
+#define RADIX_TREE_MAX_TAGS	XA_MAX_MARKS
+#define RADIX_TREE_TAG_LONGS	XA_MARK_LONGS
 
 #define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
 					  RADIX_TREE_MAP_SHIFT))
 
-/*
- * @count is the count of every non-NULL element in the ->slots array
- * whether that is a value entry, a retry entry, a user pointer,
- * a sibling entry or a pointer to the next level of the tree.
- * @exceptional is the count of every element in ->slots which is
- * either a value entry or a sibling of a value entry.
- */
-struct radix_tree_node {
-	unsigned char	shift;		/* Bits remaining in each slot */
-	unsigned char	offset;		/* Slot offset in parent */
-	unsigned char	count;		/* Total entry count */
-	unsigned char	exceptional;	/* Exceptional entry count */
-	struct radix_tree_node *parent;		/* Used when ascending tree */
-	struct radix_tree_root *root;		/* The tree we belong to */
-	union {
-		struct list_head private_list;	/* For tree user */
-		struct rcu_head	rcu_head;	/* Used when freeing node */
-	};
-	void __rcu	*slots[RADIX_TREE_MAP_SIZE];
-	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
-};
-
 /* The IDR tag is stored in the low bits of xa_flags */
 #define ROOT_IS_IDR	((__force gfp_t)4)
 /* The top bits of xa_flags are used to store the root tags */
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 9122cf8bf52a..52141dfc5a90 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -252,6 +252,33 @@ static inline void xa_init(struct xarray *xa)
 #endif
 #define XA_CHUNK_SIZE		(1UL << XA_CHUNK_SHIFT)
 #define XA_CHUNK_MASK		(XA_CHUNK_SIZE - 1)
+#define XA_MAX_MARKS		3
+#define XA_MARK_LONGS		DIV_ROUND_UP(XA_CHUNK_SIZE, BITS_PER_LONG)
+
+/*
+ * @count is the count of every non-NULL element in the ->slots array
+ * whether that is a value entry, a retry entry, a user pointer,
+ * a sibling entry or a pointer to the next level of the tree.
+ * @nr_values is the count of every element in ->slots which is
+ * either a value entry or a sibling of a value entry.
+ */
+struct xa_node {
+	unsigned char	shift;		/* Bits remaining in each slot */
+	unsigned char	offset;		/* Slot offset in parent */
+	unsigned char	count;		/* Total entry count */
+	unsigned char	nr_values;	/* Value entry count */
+	struct xa_node __rcu *parent;	/* NULL at top of tree */
+	struct xarray	*array;		/* The array we belong to */
+	union {
+		struct list_head private_list;	/* For tree user */
+		struct rcu_head	rcu_head;	/* Used when freeing node */
+	};
+	void __rcu	*slots[XA_CHUNK_SIZE];
+	union {
+		unsigned long	tags[XA_MAX_MARKS][XA_MARK_LONGS];
+		unsigned long	marks[XA_MAX_MARKS][XA_MARK_LONGS];
+	};
+};
 
 /* Private */
 static inline bool xa_is_node(const void *entry)
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 299d4bdba109..8a568cea1237 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -260,11 +260,11 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
 {
 	unsigned long i;
 
-	pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n",
+	pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d nr_values %d\n",
 		node, node->offset, index, index | node_maxindex(node),
 		node->parent,
 		node->tags[0][0], node->tags[1][0], node->tags[2][0],
-		node->shift, node->count, node->exceptional);
+		node->shift, node->count, node->nr_values);
 
 	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
 		unsigned long first = index | (i << node->shift);
@@ -354,7 +354,7 @@ static struct radix_tree_node *
 radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
 			struct radix_tree_root *root,
 			unsigned int shift, unsigned int offset,
-			unsigned int count, unsigned int exceptional)
+			unsigned int count, unsigned int nr_values)
 {
 	struct radix_tree_node *ret = NULL;
 
@@ -401,9 +401,9 @@ out:
 		ret->shift = shift;
 		ret->offset = offset;
 		ret->count = count;
-		ret->exceptional = exceptional;
+		ret->nr_values = nr_values;
 		ret->parent = parent;
-		ret->root = root;
+		ret->array = root;
 	}
 	return ret;
 }
@@ -633,8 +633,8 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 		if (radix_tree_is_internal_node(entry)) {
 			entry_to_node(entry)->parent = node;
 		} else if (xa_is_value(entry)) {
-			/* Moving an exceptional root->xa_head to a node */
-			node->exceptional = 1;
+			/* Moving a value entry root->xa_head to a node */
+			node->nr_values = 1;
 		}
 		/*
 		 * entry was already in the radix tree, so we do not need
@@ -928,12 +928,12 @@ static inline int insert_entries(struct radix_tree_node *node,
 		if (xa_is_node(old))
 			radix_tree_free_nodes(old);
 		if (xa_is_value(old))
-			node->exceptional--;
+			node->nr_values--;
 	}
 	if (node) {
 		node->count += n;
 		if (xa_is_value(item))
-			node->exceptional += n;
+			node->nr_values += n;
 	}
 	return n;
 }
@@ -947,7 +947,7 @@ static inline int insert_entries(struct radix_tree_node *node,
 	if (node) {
 		node->count++;
 		if (xa_is_value(item))
-			node->exceptional++;
+			node->nr_values++;
 	}
 	return 1;
 }
@@ -1083,7 +1083,7 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
 EXPORT_SYMBOL(radix_tree_lookup);
 
 static inline void replace_sibling_entries(struct radix_tree_node *node,
-				void __rcu **slot, int count, int exceptional)
+				void __rcu **slot, int count, int values)
 {
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 	unsigned offset = get_slot_offset(node, slot);
@@ -1096,18 +1096,18 @@ static inline void replace_sibling_entries(struct radix_tree_node *node,
 			node->slots[offset] = NULL;
 			node->count--;
 		}
-		node->exceptional += exceptional;
+		node->nr_values += values;
 	}
 #endif
 }
 
 static void replace_slot(void __rcu **slot, void *item,
-		struct radix_tree_node *node, int count, int exceptional)
+		struct radix_tree_node *node, int count, int values)
 {
-	if (node && (count || exceptional)) {
+	if (node && (count || values)) {
 		node->count += count;
-		node->exceptional += exceptional;
-		replace_sibling_entries(node, slot, count, exceptional);
+		node->nr_values += values;
+		replace_sibling_entries(node, slot, count, values);
 	}
 
 	rcu_assign_pointer(*slot, item);
@@ -1161,17 +1161,17 @@ void __radix_tree_replace(struct radix_tree_root *root,
 			  radix_tree_update_node_t update_node)
 {
 	void *old = rcu_dereference_raw(*slot);
-	int exceptional = !!xa_is_value(item) - !!xa_is_value(old);
+	int values = !!xa_is_value(item) - !!xa_is_value(old);
 	int count = calculate_count(root, node, slot, item, old);
 
 	/*
-	 * This function supports replacing exceptional entries and
+	 * This function supports replacing value entries and
 	 * deleting entries, but that needs accounting against the
 	 * node unless the slot is root->xa_head.
 	 */
 	WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
-			(count || exceptional));
-	replace_slot(slot, item, node, count, exceptional);
+			(count || values));
+	replace_slot(slot, item, node, count, values);
 
 	if (!node)
 		return;
@@ -1193,7 +1193,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
  * across slot lookup and replacement.
  *
  * NOTE: This cannot be used to switch between non-entries (empty slots),
- * regular entries, and exceptional entries, as that requires accounting
+ * regular entries, and value entries, as that requires accounting
  * inside the radix tree node. When switching from one type of entry or
  * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
  * radix_tree_iter_replace().
@@ -1301,7 +1301,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 		rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
 	}
 	rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
-	parent->exceptional -= (end - offset);
+	parent->nr_values -= (end - offset);
 
 	if (order == parent->shift)
 		return 0;
@@ -1961,7 +1961,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
 				struct radix_tree_node *node, void __rcu **slot)
 {
 	void *old = rcu_dereference_raw(*slot);
-	int exceptional = xa_is_value(old) ? -1 : 0;
+	int values = xa_is_value(old) ? -1 : 0;
 	unsigned offset = get_slot_offset(node, slot);
 	int tag;
 
@@ -1971,7 +1971,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
 			node_tag_clear(root, node, tag, offset);
 
-	replace_slot(slot, NULL, node, -1, exceptional);
+	replace_slot(slot, NULL, node, -1, values);
 	return node && delete_node(root, node, NULL);
 }
 
diff --git a/mm/workingset.c b/mm/workingset.c
index bb109b3afac2..c201cbb8c00f 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -349,7 +349,7 @@ void workingset_update_node(struct radix_tree_node *node)
 	 * already where they should be. The list_empty() test is safe
 	 * as node->private_list is protected by the i_pages lock.
 	 */
-	if (node->count && node->count == node->exceptional) {
+	if (node->count && node->count == node->nr_values) {
 		if (list_empty(&node->private_list))
 			list_lru_add(&shadow_nodes, &node->private_list);
 	} else {
@@ -428,8 +428,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	 * to reclaim, take the node off-LRU, and drop the lru_lock.
 	 */
 
-	node = container_of(item, struct radix_tree_node, private_list);
-	mapping = container_of(node->root, struct address_space, i_pages);
+	node = container_of(item, struct xa_node, private_list);
+	mapping = container_of(node->array, struct address_space, i_pages);
 
 	/* Coming from the list, invert the lock order */
 	if (!xa_trylock(&mapping->i_pages)) {
@@ -446,25 +446,25 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	 * no pages, so we expect to be able to remove them all and
 	 * delete and free the empty node afterwards.
 	 */
-	if (WARN_ON_ONCE(!node->exceptional))
+	if (WARN_ON_ONCE(!node->nr_values))
 		goto out_invalid;
-	if (WARN_ON_ONCE(node->count != node->exceptional))
+	if (WARN_ON_ONCE(node->count != node->nr_values))
 		goto out_invalid;
 	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
 		if (node->slots[i]) {
 			if (WARN_ON_ONCE(!xa_is_value(node->slots[i])))
 				goto out_invalid;
-			if (WARN_ON_ONCE(!node->exceptional))
+			if (WARN_ON_ONCE(!node->nr_values))
 				goto out_invalid;
 			if (WARN_ON_ONCE(!mapping->nrexceptional))
 				goto out_invalid;
 			node->slots[i] = NULL;
-			node->exceptional--;
+			node->nr_values--;
 			node->count--;
 			mapping->nrexceptional--;
 		}
 	}
-	if (WARN_ON_ONCE(node->exceptional))
+	if (WARN_ON_ONCE(node->nr_values))
 		goto out_invalid;
 	inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
 	__radix_tree_delete_node(&mapping->i_pages, node,
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 080aea450430..60786fa55302 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -393,7 +393,7 @@ static void multiorder_join2(unsigned order1, unsigned order2)
 	radix_tree_insert(&tree, 1 << order2, xa_mk_value(5));
 	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
 	assert(item2 == xa_mk_value(5));
-	assert(node->exceptional == 1);
+	assert(node->nr_values == 1);
 
 	item2 = radix_tree_lookup(&tree, 0);
 	free(item2);
@@ -401,7 +401,7 @@ static void multiorder_join2(unsigned order1, unsigned order2)
 	radix_tree_join(&tree, 0, order1, item1);
 	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
 	assert(item2 == item1);
-	assert(node->exceptional == 0);
+	assert(node->nr_values == 0);
 	item_kill_tree(&tree);
 }
 
@@ -409,7 +409,7 @@ static void multiorder_join2(unsigned order1, unsigned order2)
  * This test revealed an accounting bug for value entries at one point.
  * Nodes were being freed back into the pool with an elevated exception count
  * by radix_tree_join() and then radix_tree_split() was failing to zero the
- * count of exceptional entries.
+ * count of value entries.
  */
 static void multiorder_join3(unsigned int order)
 {
@@ -433,7 +433,7 @@ static void multiorder_join3(unsigned int order)
 	}
 
 	__radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(node->exceptional == node->count);
+	assert(node->nr_values == node->count);
 
 	item_kill_tree(&tree);
 }
@@ -520,7 +520,7 @@ static void __multiorder_split2(int old_order, int new_order)
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item == xa_mk_value(5));
-	assert(node->exceptional > 0);
+	assert(node->nr_values > 0);
 
 	radix_tree_split(&tree, 0, new_order);
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
@@ -530,7 +530,7 @@ static void __multiorder_split2(int old_order, int new_order)
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item != xa_mk_value(5));
-	assert(node->exceptional == 0);
+	assert(node->nr_values == 0);
 
 	item_kill_tree(&tree);
 }
@@ -547,7 +547,7 @@ static void __multiorder_split3(int old_order, int new_order)
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item == xa_mk_value(5));
-	assert(node->exceptional > 0);
+	assert(node->nr_values > 0);
 
 	radix_tree_split(&tree, 0, new_order);
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
@@ -556,7 +556,7 @@ static void __multiorder_split3(int old_order, int new_order)
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item == xa_mk_value(7));
-	assert(node->exceptional > 0);
+	assert(node->nr_values > 0);
 
 	item_kill_tree(&tree);
 
@@ -564,7 +564,7 @@ static void __multiorder_split3(int old_order, int new_order)
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item == xa_mk_value(5));
-	assert(node->exceptional > 0);
+	assert(node->nr_values > 0);
 
 	radix_tree_split(&tree, 0, new_order);
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
@@ -577,13 +577,13 @@ static void __multiorder_split3(int old_order, int new_order)
 
 	item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL);
 	assert(item == xa_mk_value(7));
-	assert(node->count == node->exceptional);
+	assert(node->count == node->nr_values);
 	do {
 		node = node->parent;
 		if (!node)
 			break;
 		assert(node->count == 1);
-		assert(node->exceptional == 0);
+		assert(node->nr_values == 0);
 	} while (1);
 
 	item_kill_tree(&tree);
@@ -611,15 +611,15 @@ static void multiorder_account(void)
 
 	__radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5));
 	__radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(node->count == node->exceptional * 2);
+	assert(node->count == node->nr_values * 2);
 	radix_tree_delete(&tree, 1 << 5);
-	assert(node->exceptional == 0);
+	assert(node->nr_values == 0);
 
 	__radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5));
 	__radix_tree_lookup(&tree, 1 << 5, &node, &slot);
-	assert(node->count == node->exceptional * 2);
+	assert(node->count == node->nr_values * 2);
 	__radix_tree_replace(&tree, node, slot, NULL, NULL);
-	assert(node->exceptional == 0);
+	assert(node->nr_values == 0);
 
 	item_kill_tree(&tree);
 }
-- 
cgit v1.2.3-71-gd317


From ad3d6c7263e368abdc151e1cc13dc78aa39cc7a7 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 7 Nov 2017 14:57:46 -0500
Subject: xarray: Add XArray load operation

The xa_load function brings with it a lot of infrastructure; xa_empty(),
xa_is_err(), and large chunks of the XArray advanced API that are used
to implement xa_load.

As the test-suite demonstrates, it is possible to use the XArray functions
on a radix tree.  The radix tree functions depend on the GFP flags being
stored in the root of the tree, so it's not possible to use the radix
tree functions on an XArray.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h                    | 336 ++++++++++++++++++++++++++++++
 lib/Kconfig.debug                         |   3 +
 lib/Makefile                              |   1 +
 lib/radix-tree.c                          |  43 ----
 lib/test_xarray.c                         |  87 ++++++++
 lib/xarray.c                              | 195 +++++++++++++++++
 tools/include/linux/kernel.h              |   1 +
 tools/testing/radix-tree/.gitignore       |   1 +
 tools/testing/radix-tree/Makefile         |   6 +-
 tools/testing/radix-tree/linux/kernel.h   |   1 +
 tools/testing/radix-tree/linux/rcupdate.h |   2 +
 tools/testing/radix-tree/main.c           |   1 +
 tools/testing/radix-tree/test.h           |   1 +
 tools/testing/radix-tree/xarray.c         |  28 +++
 14 files changed, 661 insertions(+), 45 deletions(-)
 create mode 100644 lib/test_xarray.c

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 52141dfc5a90..a0df8217068c 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -12,6 +12,8 @@
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/kconfig.h>
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
@@ -30,6 +32,10 @@
  *
  * 0-62: Sibling entries
  * 256: Retry entry
+ *
+ * Errors are also represented as internal entries, but use the negative
+ * space (-4094 to -2).  They're never stored in the slots array; only
+ * returned by the normal API.
  */
 
 #define BITS_PER_XA_VALUE	(BITS_PER_LONG - 1)
@@ -155,6 +161,42 @@ static inline bool xa_is_internal(const void *entry)
 	return ((unsigned long)entry & 3) == 2;
 }
 
+/**
+ * xa_is_err() - Report whether an XArray operation returned an error
+ * @entry: Result from calling an XArray function
+ *
+ * If an XArray operation cannot complete an operation, it will return
+ * a special value indicating an error.  This function tells you
+ * whether an error occurred; xa_err() tells you which error occurred.
+ *
+ * Context: Any context.
+ * Return: %true if the entry indicates an error.
+ */
+static inline bool xa_is_err(const void *entry)
+{
+	return unlikely(xa_is_internal(entry));
+}
+
+/**
+ * xa_err() - Turn an XArray result into an errno.
+ * @entry: Result from calling an XArray function.
+ *
+ * If an XArray operation cannot complete an operation, it will return
+ * a special pointer value which encodes an errno.  This function extracts
+ * the errno from the pointer value, or returns 0 if the pointer does not
+ * represent an errno.
+ *
+ * Context: Any context.
+ * Return: A negative errno or 0.
+ */
+static inline int xa_err(void *entry)
+{
+	/* xa_to_internal() would not do sign extension. */
+	if (xa_is_err(entry))
+		return (long)entry >> 2;
+	return 0;
+}
+
 /**
  * struct xarray - The anchor of the XArray.
  * @xa_lock: Lock that protects the contents of the XArray.
@@ -209,6 +251,7 @@ struct xarray {
 #define DEFINE_XARRAY(name) DEFINE_XARRAY_FLAGS(name, 0)
 
 void xa_init_flags(struct xarray *, gfp_t flags);
+void *xa_load(struct xarray *, unsigned long index);
 
 /**
  * xa_init() - Initialise an empty XArray.
@@ -223,6 +266,18 @@ static inline void xa_init(struct xarray *xa)
 	xa_init_flags(xa, 0);
 }
 
+/**
+ * xa_empty() - Determine if an array has any present entries.
+ * @xa: XArray.
+ *
+ * Context: Any context.
+ * Return: %true if the array contains only NULL pointers.
+ */
+static inline bool xa_empty(const struct xarray *xa)
+{
+	return xa->xa_head == NULL;
+}
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
@@ -280,6 +335,65 @@ struct xa_node {
 	};
 };
 
+void xa_dump(const struct xarray *);
+void xa_dump_node(const struct xa_node *);
+
+#ifdef XA_DEBUG
+#define XA_BUG_ON(xa, x) do {					\
+		if (x) {					\
+			xa_dump(xa);				\
+			BUG();					\
+		}						\
+	} while (0)
+#define XA_NODE_BUG_ON(node, x) do {				\
+		if (x) {					\
+			if (node) xa_dump_node(node);		\
+			BUG();					\
+		}						\
+	} while (0)
+#else
+#define XA_BUG_ON(xa, x)	do { } while (0)
+#define XA_NODE_BUG_ON(node, x)	do { } while (0)
+#endif
+
+/* Private */
+static inline void *xa_head(const struct xarray *xa)
+{
+	return rcu_dereference_check(xa->xa_head,
+						lockdep_is_held(&xa->xa_lock));
+}
+
+/* Private */
+static inline void *xa_head_locked(const struct xarray *xa)
+{
+	return rcu_dereference_protected(xa->xa_head,
+						lockdep_is_held(&xa->xa_lock));
+}
+
+/* Private */
+static inline void *xa_entry(const struct xarray *xa,
+				const struct xa_node *node, unsigned int offset)
+{
+	XA_NODE_BUG_ON(node, offset >= XA_CHUNK_SIZE);
+	return rcu_dereference_check(node->slots[offset],
+						lockdep_is_held(&xa->xa_lock));
+}
+
+/* Private */
+static inline void *xa_entry_locked(const struct xarray *xa,
+				const struct xa_node *node, unsigned int offset)
+{
+	XA_NODE_BUG_ON(node, offset >= XA_CHUNK_SIZE);
+	return rcu_dereference_protected(node->slots[offset],
+						lockdep_is_held(&xa->xa_lock));
+}
+
+/* Private */
+static inline struct xa_node *xa_to_node(const void *entry)
+{
+	return (struct xa_node *)((unsigned long)entry - 2);
+}
+
 /* Private */
 static inline bool xa_is_node(const void *entry)
 {
@@ -312,4 +426,226 @@ static inline bool xa_is_sibling(const void *entry)
 
 #define XA_RETRY_ENTRY		xa_mk_internal(256)
 
+/**
+ * xa_is_retry() - Is the entry a retry entry?
+ * @entry: Entry retrieved from the XArray
+ *
+ * Return: %true if the entry is a retry entry.
+ */
+static inline bool xa_is_retry(const void *entry)
+{
+	return unlikely(entry == XA_RETRY_ENTRY);
+}
+
+/**
+ * typedef xa_update_node_t - A callback function from the XArray.
+ * @node: The node which is being processed
+ *
+ * This function is called every time the XArray updates the count of
+ * present and value entries in a node.  It allows advanced users to
+ * maintain the private_list in the node.
+ *
+ * Context: The xa_lock is held and interrupts may be disabled.
+ *	    Implementations should not drop the xa_lock, nor re-enable
+ *	    interrupts.
+ */
+typedef void (*xa_update_node_t)(struct xa_node *node);
+
+/*
+ * The xa_state is opaque to its users.  It contains various different pieces
+ * of state involved in the current operation on the XArray.  It should be
+ * declared on the stack and passed between the various internal routines.
+ * The various elements in it should not be accessed directly, but only
+ * through the provided accessor functions.  The below documentation is for
+ * the benefit of those working on the code, not for users of the XArray.
+ *
+ * @xa_node usually points to the xa_node containing the slot we're operating
+ * on (and @xa_offset is the offset in the slots array).  If there is a
+ * single entry in the array at index 0, there are no allocated xa_nodes to
+ * point to, and so we store %NULL in @xa_node.  @xa_node is set to
+ * the value %XAS_RESTART if the xa_state is not walked to the correct
+ * position in the tree of nodes for this operation.  If an error occurs
+ * during an operation, it is set to an %XAS_ERROR value.  If we run off the
+ * end of the allocated nodes, it is set to %XAS_BOUNDS.
+ */
+struct xa_state {
+	struct xarray *xa;
+	unsigned long xa_index;
+	unsigned char xa_shift;
+	unsigned char xa_sibs;
+	unsigned char xa_offset;
+	unsigned char xa_pad;		/* Helps gcc generate better code */
+	struct xa_node *xa_node;
+	struct xa_node *xa_alloc;
+	xa_update_node_t xa_update;
+};
+
+/*
+ * We encode errnos in the xas->xa_node.  If an error has happened, we need to
+ * drop the lock to fix it, and once we've done so the xa_state is invalid.
+ */
+#define XA_ERROR(errno) ((struct xa_node *)(((unsigned long)errno << 2) | 2UL))
+#define XAS_BOUNDS	((struct xa_node *)1UL)
+#define XAS_RESTART	((struct xa_node *)3UL)
+
+#define __XA_STATE(array, index, shift, sibs)  {	\
+	.xa = array,					\
+	.xa_index = index,				\
+	.xa_shift = shift,				\
+	.xa_sibs = sibs,				\
+	.xa_offset = 0,					\
+	.xa_pad = 0,					\
+	.xa_node = XAS_RESTART,				\
+	.xa_alloc = NULL,				\
+	.xa_update = NULL				\
+}
+
+/**
+ * XA_STATE() - Declare an XArray operation state.
+ * @name: Name of this operation state (usually xas).
+ * @array: Array to operate on.
+ * @index: Initial index of interest.
+ *
+ * Declare and initialise an xa_state on the stack.
+ */
+#define XA_STATE(name, array, index)				\
+	struct xa_state name = __XA_STATE(array, index, 0, 0)
+
+/**
+ * XA_STATE_ORDER() - Declare an XArray operation state.
+ * @name: Name of this operation state (usually xas).
+ * @array: Array to operate on.
+ * @index: Initial index of interest.
+ * @order: Order of entry.
+ *
+ * Declare and initialise an xa_state on the stack.  This variant of
+ * XA_STATE() allows you to specify the 'order' of the element you
+ * want to operate on.`
+ */
+#define XA_STATE_ORDER(name, array, index, order)		\
+	struct xa_state name = __XA_STATE(array,		\
+			(index >> order) << order,		\
+			order - (order % XA_CHUNK_SHIFT),	\
+			(1U << (order % XA_CHUNK_SHIFT)) - 1)
+
+#define xas_marked(xas, mark)	xa_marked((xas)->xa, (mark))
+#define xas_trylock(xas)	xa_trylock((xas)->xa)
+#define xas_lock(xas)		xa_lock((xas)->xa)
+#define xas_unlock(xas)		xa_unlock((xas)->xa)
+#define xas_lock_bh(xas)	xa_lock_bh((xas)->xa)
+#define xas_unlock_bh(xas)	xa_unlock_bh((xas)->xa)
+#define xas_lock_irq(xas)	xa_lock_irq((xas)->xa)
+#define xas_unlock_irq(xas)	xa_unlock_irq((xas)->xa)
+#define xas_lock_irqsave(xas, flags) \
+				xa_lock_irqsave((xas)->xa, flags)
+#define xas_unlock_irqrestore(xas, flags) \
+				xa_unlock_irqrestore((xas)->xa, flags)
+
+/**
+ * xas_error() - Return an errno stored in the xa_state.
+ * @xas: XArray operation state.
+ *
+ * Return: 0 if no error has been noted.  A negative errno if one has.
+ */
+static inline int xas_error(const struct xa_state *xas)
+{
+	return xa_err(xas->xa_node);
+}
+
+/**
+ * xas_set_err() - Note an error in the xa_state.
+ * @xas: XArray operation state.
+ * @err: Negative error number.
+ *
+ * Only call this function with a negative @err; zero or positive errors
+ * will probably not behave the way you think they should.  If you want
+ * to clear the error from an xa_state, use xas_reset().
+ */
+static inline void xas_set_err(struct xa_state *xas, long err)
+{
+	xas->xa_node = XA_ERROR(err);
+}
+
+/**
+ * xas_invalid() - Is the xas in a retry or error state?
+ * @xas: XArray operation state.
+ *
+ * Return: %true if the xas cannot be used for operations.
+ */
+static inline bool xas_invalid(const struct xa_state *xas)
+{
+	return (unsigned long)xas->xa_node & 3;
+}
+
+/**
+ * xas_valid() - Is the xas a valid cursor into the array?
+ * @xas: XArray operation state.
+ *
+ * Return: %true if the xas can be used for operations.
+ */
+static inline bool xas_valid(const struct xa_state *xas)
+{
+	return !xas_invalid(xas);
+}
+
+/**
+ * xas_reset() - Reset an XArray operation state.
+ * @xas: XArray operation state.
+ *
+ * Resets the error or walk state of the @xas so future walks of the
+ * array will start from the root.  Use this if you have dropped the
+ * xarray lock and want to reuse the xa_state.
+ *
+ * Context: Any context.
+ */
+static inline void xas_reset(struct xa_state *xas)
+{
+	xas->xa_node = XAS_RESTART;
+}
+
+/**
+ * xas_retry() - Retry the operation if appropriate.
+ * @xas: XArray operation state.
+ * @entry: Entry from xarray.
+ *
+ * The advanced functions may sometimes return an internal entry, such as
+ * a retry entry or a zero entry.  This function sets up the @xas to restart
+ * the walk from the head of the array if needed.
+ *
+ * Context: Any context.
+ * Return: true if the operation needs to be retried.
+ */
+static inline bool xas_retry(struct xa_state *xas, const void *entry)
+{
+	if (!xa_is_retry(entry))
+		return false;
+	xas_reset(xas);
+	return true;
+}
+
+void *xas_load(struct xa_state *);
+
+/**
+ * xas_reload() - Refetch an entry from the xarray.
+ * @xas: XArray operation state.
+ *
+ * Use this function to check that a previously loaded entry still has
+ * the same value.  This is useful for the lockless pagecache lookup where
+ * we walk the array with only the RCU lock to protect us, lock the page,
+ * then check that the page hasn't moved since we looked it up.
+ *
+ * The caller guarantees that @xas is still valid.  If it may be in an
+ * error or restart state, call xas_load() instead.
+ *
+ * Return: The entry at this location in the xarray.
+ */
+static inline void *xas_reload(struct xa_state *xas)
+{
+	struct xa_node *node = xas->xa_node;
+
+	if (node)
+		return xa_entry(xas->xa, node, xas->xa_offset);
+	return xa_head(xas->xa);
+}
+
 #endif /* _LINUX_XARRAY_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 4966c4fbe7f7..091155e12422 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1813,6 +1813,9 @@ config TEST_BITFIELD
 config TEST_UUID
 	tristate "Test functions located in the uuid module at runtime"
 
+config TEST_XARRAY
+	tristate "Test the XArray code at runtime"
+
 config TEST_OVERFLOW
 	tristate "Test check_*_overflow() functions at runtime"
 
diff --git a/lib/Makefile b/lib/Makefile
index 057385f1f145..e6f809556af5 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_TEST_PRINTF) += test_printf.o
 obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
 obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o
 obj-$(CONFIG_TEST_UUID) += test_uuid.o
+obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
 obj-$(CONFIG_TEST_KMOD) += test_kmod.o
 obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8a568cea1237..b8e961428484 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -256,49 +256,6 @@ static unsigned long next_index(unsigned long index,
 }
 
 #ifndef __KERNEL__
-static void dump_node(struct radix_tree_node *node, unsigned long index)
-{
-	unsigned long i;
-
-	pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d nr_values %d\n",
-		node, node->offset, index, index | node_maxindex(node),
-		node->parent,
-		node->tags[0][0], node->tags[1][0], node->tags[2][0],
-		node->shift, node->count, node->nr_values);
-
-	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
-		unsigned long first = index | (i << node->shift);
-		unsigned long last = first | ((1UL << node->shift) - 1);
-		void *entry = node->slots[i];
-		if (!entry)
-			continue;
-		if (entry == RADIX_TREE_RETRY) {
-			pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n",
-					i, first, last, node);
-		} else if (!radix_tree_is_internal_node(entry)) {
-			pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n",
-					entry, i, first, last, node);
-		} else if (xa_is_sibling(entry)) {
-			pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n",
-					entry, i, first, last, node,
-					node->slots[xa_to_sibling(entry)]);
-		} else {
-			dump_node(entry_to_node(entry), first);
-		}
-	}
-}
-
-/* For debug */
-static void radix_tree_dump(struct radix_tree_root *root)
-{
-	pr_debug("radix root: %p xa_head %p tags %x\n",
-			root, root->xa_head,
-			root->xa_flags >> ROOT_TAG_SHIFT);
-	if (!radix_tree_is_internal_node(root->xa_head))
-		return;
-	dump_node(entry_to_node(root->xa_head), 0);
-}
-
 static void dump_ida_node(void *entry, unsigned long index)
 {
 	unsigned long i;
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
new file mode 100644
index 000000000000..a7248b87617f
--- /dev/null
+++ b/lib/test_xarray.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * test_xarray.c: Test the XArray API
+ * Copyright (c) 2017-2018 Microsoft Corporation
+ * Author: Matthew Wilcox <willy@infradead.org>
+ */
+
+#include <linux/xarray.h>
+#include <linux/module.h>
+
+static unsigned int tests_run;
+static unsigned int tests_passed;
+
+#ifndef XA_DEBUG
+# ifdef __KERNEL__
+void xa_dump(const struct xarray *xa) { }
+# endif
+#undef XA_BUG_ON
+#define XA_BUG_ON(xa, x) do {					\
+	tests_run++;						\
+	if (x) {						\
+		printk("BUG at %s:%d\n", __func__, __LINE__);	\
+		xa_dump(xa);					\
+		dump_stack();					\
+	} else {						\
+		tests_passed++;					\
+	}							\
+} while (0)
+#endif
+
+static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	radix_tree_insert(xa, index, xa_mk_value(index));
+	return NULL;
+}
+
+static void xa_erase_index(struct xarray *xa, unsigned long index)
+{
+	radix_tree_delete(xa, index);
+}
+
+static noinline void check_xa_load(struct xarray *xa)
+{
+	unsigned long i, j;
+
+	for (i = 0; i < 1024; i++) {
+		for (j = 0; j < 1024; j++) {
+			void *entry = xa_load(xa, j);
+			if (j < i)
+				XA_BUG_ON(xa, xa_to_value(entry) != j);
+			else
+				XA_BUG_ON(xa, entry);
+		}
+		XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
+	}
+
+	for (i = 0; i < 1024; i++) {
+		for (j = 0; j < 1024; j++) {
+			void *entry = xa_load(xa, j);
+			if (j >= i)
+				XA_BUG_ON(xa, xa_to_value(entry) != j);
+			else
+				XA_BUG_ON(xa, entry);
+		}
+		xa_erase_index(xa, i);
+	}
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static RADIX_TREE(array, GFP_KERNEL);
+
+static int xarray_checks(void)
+{
+	check_xa_load(&array);
+
+	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
+	return (tests_run == tests_passed) ? 0 : -EINVAL;
+}
+
+static void xarray_exit(void)
+{
+}
+
+module_init(xarray_checks);
+module_exit(xarray_exit);
+MODULE_AUTHOR("Matthew Wilcox <willy@infradead.org>");
+MODULE_LICENSE("GPL");
diff --git a/lib/xarray.c b/lib/xarray.c
index 862f4c64c754..19cfcbc69a68 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -24,6 +24,100 @@
  * @entry refers to something stored in a slot in the xarray
  */
 
+/* extracts the offset within this node from the index */
+static unsigned int get_offset(unsigned long index, struct xa_node *node)
+{
+	return (index >> node->shift) & XA_CHUNK_MASK;
+}
+
+/* move the index either forwards (find) or backwards (sibling slot) */
+static void xas_move_index(struct xa_state *xas, unsigned long offset)
+{
+	unsigned int shift = xas->xa_node->shift;
+	xas->xa_index &= ~XA_CHUNK_MASK << shift;
+	xas->xa_index += offset << shift;
+}
+
+static void *set_bounds(struct xa_state *xas)
+{
+	xas->xa_node = XAS_BOUNDS;
+	return NULL;
+}
+
+/*
+ * Starts a walk.  If the @xas is already valid, we assume that it's on
+ * the right path and just return where we've got to.  If we're in an
+ * error state, return NULL.  If the index is outside the current scope
+ * of the xarray, return NULL without changing @xas->xa_node.  Otherwise
+ * set @xas->xa_node to NULL and return the current head of the array.
+ */
+static void *xas_start(struct xa_state *xas)
+{
+	void *entry;
+
+	if (xas_valid(xas))
+		return xas_reload(xas);
+	if (xas_error(xas))
+		return NULL;
+
+	entry = xa_head(xas->xa);
+	if (!xa_is_node(entry)) {
+		if (xas->xa_index)
+			return set_bounds(xas);
+	} else {
+		if ((xas->xa_index >> xa_to_node(entry)->shift) > XA_CHUNK_MASK)
+			return set_bounds(xas);
+	}
+
+	xas->xa_node = NULL;
+	return entry;
+}
+
+static void *xas_descend(struct xa_state *xas, struct xa_node *node)
+{
+	unsigned int offset = get_offset(xas->xa_index, node);
+	void *entry = xa_entry(xas->xa, node, offset);
+
+	xas->xa_node = node;
+	if (xa_is_sibling(entry)) {
+		offset = xa_to_sibling(entry);
+		entry = xa_entry(xas->xa, node, offset);
+	}
+
+	xas->xa_offset = offset;
+	return entry;
+}
+
+/**
+ * xas_load() - Load an entry from the XArray (advanced).
+ * @xas: XArray operation state.
+ *
+ * Usually walks the @xas to the appropriate state to load the entry
+ * stored at xa_index.  However, it will do nothing and return %NULL if
+ * @xas is in an error state.  xas_load() will never expand the tree.
+ *
+ * If the xa_state is set up to operate on a multi-index entry, xas_load()
+ * may return %NULL or an internal entry, even if there are entries
+ * present within the range specified by @xas.
+ *
+ * Context: Any context.  The caller should hold the xa_lock or the RCU lock.
+ * Return: Usually an entry in the XArray, but see description for exceptions.
+ */
+void *xas_load(struct xa_state *xas)
+{
+	void *entry = xas_start(xas);
+
+	while (xa_is_node(entry)) {
+		struct xa_node *node = xa_to_node(entry);
+
+		if (xas->xa_shift > node->shift)
+			break;
+		entry = xas_descend(xas, node);
+	}
+	return entry;
+}
+EXPORT_SYMBOL_GPL(xas_load);
+
 /**
  * xa_init_flags() - Initialise an empty XArray with flags.
  * @xa: XArray.
@@ -42,3 +136,104 @@ void xa_init_flags(struct xarray *xa, gfp_t flags)
 	xa->xa_head = NULL;
 }
 EXPORT_SYMBOL(xa_init_flags);
+
+/**
+ * xa_load() - Load an entry from an XArray.
+ * @xa: XArray.
+ * @index: index into array.
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ * Return: The entry at @index in @xa.
+ */
+void *xa_load(struct xarray *xa, unsigned long index)
+{
+	XA_STATE(xas, xa, index);
+	void *entry;
+
+	rcu_read_lock();
+	do {
+		entry = xas_load(&xas);
+	} while (xas_retry(&xas, entry));
+	rcu_read_unlock();
+
+	return entry;
+}
+EXPORT_SYMBOL(xa_load);
+
+#ifdef XA_DEBUG
+void xa_dump_node(const struct xa_node *node)
+{
+	unsigned i, j;
+
+	if (!node)
+		return;
+	if ((unsigned long)node & 3) {
+		pr_cont("node %px\n", node);
+		return;
+	}
+
+	pr_cont("node %px %s %d parent %px shift %d count %d values %d "
+		"array %px list %px %px marks",
+		node, node->parent ? "offset" : "max", node->offset,
+		node->parent, node->shift, node->count, node->nr_values,
+		node->array, node->private_list.prev, node->private_list.next);
+	for (i = 0; i < XA_MAX_MARKS; i++)
+		for (j = 0; j < XA_MARK_LONGS; j++)
+			pr_cont(" %lx", node->marks[i][j]);
+	pr_cont("\n");
+}
+
+void xa_dump_index(unsigned long index, unsigned int shift)
+{
+	if (!shift)
+		pr_info("%lu: ", index);
+	else if (shift >= BITS_PER_LONG)
+		pr_info("0-%lu: ", ~0UL);
+	else
+		pr_info("%lu-%lu: ", index, index | ((1UL << shift) - 1));
+}
+
+void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift)
+{
+	if (!entry)
+		return;
+
+	xa_dump_index(index, shift);
+
+	if (xa_is_node(entry)) {
+		if (shift == 0) {
+			pr_cont("%px\n", entry);
+		} else {
+			unsigned long i;
+			struct xa_node *node = xa_to_node(entry);
+			xa_dump_node(node);
+			for (i = 0; i < XA_CHUNK_SIZE; i++)
+				xa_dump_entry(node->slots[i],
+				      index + (i << node->shift), node->shift);
+		}
+	} else if (xa_is_value(entry))
+		pr_cont("value %ld (0x%lx) [%px]\n", xa_to_value(entry),
+						xa_to_value(entry), entry);
+	else if (!xa_is_internal(entry))
+		pr_cont("%px\n", entry);
+	else if (xa_is_retry(entry))
+		pr_cont("retry (%ld)\n", xa_to_internal(entry));
+	else if (xa_is_sibling(entry))
+		pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry));
+	else
+		pr_cont("UNKNOWN ENTRY (%px)\n", entry);
+}
+
+void xa_dump(const struct xarray *xa)
+{
+	void *entry = xa->xa_head;
+	unsigned int shift = 0;
+
+	pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry,
+			xa->xa_flags, radix_tree_tagged(xa, 0),
+			radix_tree_tagged(xa, 1), radix_tree_tagged(xa, 2));
+	if (xa_is_node(entry))
+		shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT;
+	xa_dump_entry(entry, 0, shift);
+}
+#endif
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 0ad884452c5c..6935ef94e77a 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -70,6 +70,7 @@
 #define BUG_ON(cond) assert(!(cond))
 #endif
 #endif
+#define BUG()	BUG_ON(1)
 
 #if __BYTE_ORDER == __BIG_ENDIAN
 #define cpu_to_le16 bswap_16
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index d4706c0ffceb..3834899b6693 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -4,3 +4,4 @@ idr-test
 main
 multiorder
 radix-tree.c
+xarray
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index c0cf1c79efd5..1379f1d78d0b 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -4,7 +4,7 @@ CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
 	  -fsanitize=undefined
 LDFLAGS += -fsanitize=address -fsanitize=undefined
 LDLIBS+= -lpthread -lurcu
-TARGETS = main idr-test multiorder
+TARGETS = main idr-test multiorder xarray
 CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
 	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
@@ -25,6 +25,8 @@ main:	$(OFILES)
 idr-test.o: ../../../lib/test_ida.c
 idr-test: idr-test.o $(CORE_OFILES)
 
+xarray: $(CORE_OFILES)
+
 multiorder: multiorder.o $(CORE_OFILES)
 
 clean:
@@ -45,7 +47,7 @@ radix-tree.c: ../../../lib/radix-tree.c
 idr.c: ../../../lib/idr.c
 	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
 
-xarray.o: ../../../lib/xarray.c
+xarray.o: ../../../lib/xarray.c ../../../lib/test_xarray.c
 
 generated/map-shift.h:
 	@if ! grep -qws $(SHIFT) generated/map-shift.h; then		\
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 426f32f28547..5d06ac75a14d 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -14,6 +14,7 @@
 #include "../../../include/linux/kconfig.h"
 
 #define printk printf
+#define pr_info printk
 #define pr_debug printk
 #define pr_cont printk
 
diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h
index 73ed33658203..fd280b070fdb 100644
--- a/tools/testing/radix-tree/linux/rcupdate.h
+++ b/tools/testing/radix-tree/linux/rcupdate.h
@@ -6,5 +6,7 @@
 
 #define rcu_dereference_raw(p) rcu_dereference(p)
 #define rcu_dereference_protected(p, cond) rcu_dereference(p)
+#define rcu_dereference_check(p, cond) rcu_dereference(p)
+#define RCU_INIT_POINTER(p, v)	(p) = (v)
 
 #endif
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index b741686e53d6..09deaf4f0959 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -365,6 +365,7 @@ int main(int argc, char **argv)
 	rcu_register_thread();
 	radix_tree_init();
 
+	xarray_tests();
 	regression1_test();
 	regression2_test();
 	regression3_test();
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 92d901eacf49..e3dc7a16f09b 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -34,6 +34,7 @@ int tag_tagged_items(struct radix_tree_root *, pthread_mutex_t *,
 			unsigned iftag, unsigned thentag);
 unsigned long find_item(struct radix_tree_root *, void *item);
 
+void xarray_tests(void);
 void tag_check(void);
 void multiorder_checks(void);
 void iteration_test(unsigned order, unsigned duration);
diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c
index 9bbd667172a7..e61e43efe463 100644
--- a/tools/testing/radix-tree/xarray.c
+++ b/tools/testing/radix-tree/xarray.c
@@ -4,4 +4,32 @@
  * Copyright (c) 2018 Matthew Wilcox <willy@infradead.org>
  */
 
+#define XA_DEBUG
+#include "test.h"
+
+#define module_init(x)
+#define module_exit(x)
+#define MODULE_AUTHOR(x)
+#define MODULE_LICENSE(x)
+#define dump_stack()	assert(0)
+
 #include "../../../lib/xarray.c"
+#undef XA_DEBUG
+#include "../../../lib/test_xarray.c"
+
+void xarray_tests(void)
+{
+	xarray_checks();
+	xarray_exit();
+}
+
+int __weak main(void)
+{
+	radix_tree_init();
+	xarray_tests();
+	radix_tree_cpu_dead(1);
+	rcu_barrier();
+	if (nr_allocated)
+		printf("nr_allocated = %d\n", nr_allocated);
+	return 0;
+}
-- 
cgit v1.2.3-71-gd317


From 9b89a0355144685a787b0dc5bcf7bdd6f2d02968 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 10 Nov 2017 09:34:31 -0500
Subject: xarray: Add XArray marks

XArray marks are like the radix tree tags, only slightly more strongly
typed.  They are renamed in order to distinguish them from tagged
pointers.  This commit adds the basic get/set/clear operations.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h                        |  63 +++++++
 lib/test_xarray.c                             |  34 ++++
 lib/xarray.c                                  | 232 +++++++++++++++++++++++++-
 tools/include/asm-generic/bitops.h            |   1 +
 tools/include/asm-generic/bitops/atomic.h     |   9 -
 tools/include/asm-generic/bitops/non-atomic.h | 109 ++++++++++++
 tools/include/linux/spinlock.h                |  10 +-
 7 files changed, 445 insertions(+), 13 deletions(-)
 create mode 100644 tools/include/asm-generic/bitops/non-atomic.h

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index a0df8217068c..2de504ae9ba4 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -11,6 +11,7 @@
 
 #include <linux/bug.h>
 #include <linux/compiler.h>
+#include <linux/gfp.h>
 #include <linux/kconfig.h>
 #include <linux/kernel.h>
 #include <linux/rcupdate.h>
@@ -197,6 +198,20 @@ static inline int xa_err(void *entry)
 	return 0;
 }
 
+typedef unsigned __bitwise xa_mark_t;
+#define XA_MARK_0		((__force xa_mark_t)0U)
+#define XA_MARK_1		((__force xa_mark_t)1U)
+#define XA_MARK_2		((__force xa_mark_t)2U)
+#define XA_PRESENT		((__force xa_mark_t)8U)
+#define XA_MARK_MAX		XA_MARK_2
+
+/*
+ * Values for xa_flags.  The radix tree stores its GFP flags in the xa_flags,
+ * and we remain compatible with that.
+ */
+#define XA_FLAGS_MARK(mark)	((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \
+						(__force unsigned)(mark)))
+
 /**
  * struct xarray - The anchor of the XArray.
  * @xa_lock: Lock that protects the contents of the XArray.
@@ -252,6 +267,9 @@ struct xarray {
 
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
+bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
+void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
+void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
 /**
  * xa_init() - Initialise an empty XArray.
@@ -278,6 +296,19 @@ static inline bool xa_empty(const struct xarray *xa)
 	return xa->xa_head == NULL;
 }
 
+/**
+ * xa_marked() - Inquire whether any entry in this array has a mark set
+ * @xa: Array
+ * @mark: Mark value
+ *
+ * Context: Any context.
+ * Return: %true if any entry has this mark set.
+ */
+static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark)
+{
+	return xa->xa_flags & XA_FLAGS_MARK(mark);
+}
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
@@ -290,6 +321,12 @@ static inline bool xa_empty(const struct xarray *xa)
 #define xa_unlock_irqrestore(xa, flags) \
 				spin_unlock_irqrestore(&(xa)->xa_lock, flags)
 
+/*
+ * Versions of the normal API which require the caller to hold the xa_lock.
+ */
+void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
+void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
+
 /* Everything below here is the Advanced API.  Proceed with caution. */
 
 /*
@@ -388,6 +425,22 @@ static inline void *xa_entry_locked(const struct xarray *xa,
 						lockdep_is_held(&xa->xa_lock));
 }
 
+/* Private */
+static inline struct xa_node *xa_parent(const struct xarray *xa,
+					const struct xa_node *node)
+{
+	return rcu_dereference_check(node->parent,
+						lockdep_is_held(&xa->xa_lock));
+}
+
+/* Private */
+static inline struct xa_node *xa_parent_locked(const struct xarray *xa,
+					const struct xa_node *node)
+{
+	return rcu_dereference_protected(node->parent,
+						lockdep_is_held(&xa->xa_lock));
+}
+
 /* Private */
 static inline struct xa_node *xa_to_node(const void *entry)
 {
@@ -588,6 +641,12 @@ static inline bool xas_valid(const struct xa_state *xas)
 	return !xas_invalid(xas);
 }
 
+/* True if the pointer is something other than a node */
+static inline bool xas_not_node(struct xa_node *node)
+{
+	return ((unsigned long)node & 3) || !node;
+}
+
 /**
  * xas_reset() - Reset an XArray operation state.
  * @xas: XArray operation state.
@@ -625,6 +684,10 @@ static inline bool xas_retry(struct xa_state *xas, const void *entry)
 
 void *xas_load(struct xa_state *);
 
+bool xas_get_mark(const struct xa_state *, xa_mark_t);
+void xas_set_mark(const struct xa_state *, xa_mark_t);
+void xas_clear_mark(const struct xa_state *, xa_mark_t);
+
 /**
  * xas_reload() - Refetch an entry from the xarray.
  * @xas: XArray operation state.
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index a7248b87617f..f8b0e9ba80a4 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -67,11 +67,45 @@ static noinline void check_xa_load(struct xarray *xa)
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
+static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
+{
+	/* NULL elements have no marks set */
+	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+	xa_set_mark(xa, index, XA_MARK_0);
+	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+
+	/* Storing a pointer will not make a mark appear */
+	XA_BUG_ON(xa, xa_store_index(xa, index, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+	xa_set_mark(xa, index, XA_MARK_0);
+	XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));
+
+	/* Setting one mark will not set another mark */
+	XA_BUG_ON(xa, xa_get_mark(xa, index + 1, XA_MARK_0));
+	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_1));
+
+	/* Storing NULL clears marks, and they can't be set again */
+	xa_erase_index(xa, index);
+	XA_BUG_ON(xa, !xa_empty(xa));
+	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+	xa_set_mark(xa, index, XA_MARK_0);
+	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+}
+
+static noinline void check_xa_mark(struct xarray *xa)
+{
+	unsigned long index;
+
+	for (index = 0; index < 16384; index += 4)
+		check_xa_mark_1(xa, index);
+}
+
 static RADIX_TREE(array, GFP_KERNEL);
 
 static int xarray_checks(void)
 {
 	check_xa_load(&array);
+	check_xa_mark(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
diff --git a/lib/xarray.c b/lib/xarray.c
index 19cfcbc69a68..aa86c47e532f 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -5,6 +5,7 @@
  * Author: Matthew Wilcox <willy@infradead.org>
  */
 
+#include <linux/bitmap.h>
 #include <linux/export.h>
 #include <linux/xarray.h>
 
@@ -24,6 +25,48 @@
  * @entry refers to something stored in a slot in the xarray
  */
 
+static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark)
+{
+	if (!(xa->xa_flags & XA_FLAGS_MARK(mark)))
+		xa->xa_flags |= XA_FLAGS_MARK(mark);
+}
+
+static inline void xa_mark_clear(struct xarray *xa, xa_mark_t mark)
+{
+	if (xa->xa_flags & XA_FLAGS_MARK(mark))
+		xa->xa_flags &= ~(XA_FLAGS_MARK(mark));
+}
+
+static inline unsigned long *node_marks(struct xa_node *node, xa_mark_t mark)
+{
+	return node->marks[(__force unsigned)mark];
+}
+
+static inline bool node_get_mark(struct xa_node *node,
+		unsigned int offset, xa_mark_t mark)
+{
+	return test_bit(offset, node_marks(node, mark));
+}
+
+/* returns true if the bit was set */
+static inline bool node_set_mark(struct xa_node *node, unsigned int offset,
+				xa_mark_t mark)
+{
+	return __test_and_set_bit(offset, node_marks(node, mark));
+}
+
+/* returns true if the bit was set */
+static inline bool node_clear_mark(struct xa_node *node, unsigned int offset,
+				xa_mark_t mark)
+{
+	return __test_and_clear_bit(offset, node_marks(node, mark));
+}
+
+static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark)
+{
+	return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE);
+}
+
 /* extracts the offset within this node from the index */
 static unsigned int get_offset(unsigned long index, struct xa_node *node)
 {
@@ -118,6 +161,85 @@ void *xas_load(struct xa_state *xas)
 }
 EXPORT_SYMBOL_GPL(xas_load);
 
+/**
+ * xas_get_mark() - Returns the state of this mark.
+ * @xas: XArray operation state.
+ * @mark: Mark number.
+ *
+ * Return: true if the mark is set, false if the mark is clear or @xas
+ * is in an error state.
+ */
+bool xas_get_mark(const struct xa_state *xas, xa_mark_t mark)
+{
+	if (xas_invalid(xas))
+		return false;
+	if (!xas->xa_node)
+		return xa_marked(xas->xa, mark);
+	return node_get_mark(xas->xa_node, xas->xa_offset, mark);
+}
+EXPORT_SYMBOL_GPL(xas_get_mark);
+
+/**
+ * xas_set_mark() - Sets the mark on this entry and its parents.
+ * @xas: XArray operation state.
+ * @mark: Mark number.
+ *
+ * Sets the specified mark on this entry, and walks up the tree setting it
+ * on all the ancestor entries.  Does nothing if @xas has not been walked to
+ * an entry, or is in an error state.
+ */
+void xas_set_mark(const struct xa_state *xas, xa_mark_t mark)
+{
+	struct xa_node *node = xas->xa_node;
+	unsigned int offset = xas->xa_offset;
+
+	if (xas_invalid(xas))
+		return;
+
+	while (node) {
+		if (node_set_mark(node, offset, mark))
+			return;
+		offset = node->offset;
+		node = xa_parent_locked(xas->xa, node);
+	}
+
+	if (!xa_marked(xas->xa, mark))
+		xa_mark_set(xas->xa, mark);
+}
+EXPORT_SYMBOL_GPL(xas_set_mark);
+
+/**
+ * xas_clear_mark() - Clears the mark on this entry and its parents.
+ * @xas: XArray operation state.
+ * @mark: Mark number.
+ *
+ * Clears the specified mark on this entry, and walks back to the head
+ * attempting to clear it on all the ancestor entries.  Does nothing if
+ * @xas has not been walked to an entry, or is in an error state.
+ */
+void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark)
+{
+	struct xa_node *node = xas->xa_node;
+	unsigned int offset = xas->xa_offset;
+
+	if (xas_invalid(xas))
+		return;
+
+	while (node) {
+		if (!node_clear_mark(node, offset, mark))
+			return;
+		if (node_any_mark(node, mark))
+			return;
+
+		offset = node->offset;
+		node = xa_parent_locked(xas->xa, node);
+	}
+
+	if (xa_marked(xas->xa, mark))
+		xa_mark_clear(xas->xa, mark);
+}
+EXPORT_SYMBOL_GPL(xas_clear_mark);
+
 /**
  * xa_init_flags() - Initialise an empty XArray with flags.
  * @xa: XArray.
@@ -160,6 +282,112 @@ void *xa_load(struct xarray *xa, unsigned long index)
 }
 EXPORT_SYMBOL(xa_load);
 
+/**
+ * __xa_set_mark() - Set this mark on this entry while locked.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Attempting to set a mark on a NULL entry does not succeed.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.
+ */
+void __xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+	XA_STATE(xas, xa, index);
+	void *entry = xas_load(&xas);
+
+	if (entry)
+		xas_set_mark(&xas, mark);
+}
+EXPORT_SYMBOL_GPL(__xa_set_mark);
+
+/**
+ * __xa_clear_mark() - Clear this mark on this entry while locked.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.
+ */
+void __xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+	XA_STATE(xas, xa, index);
+	void *entry = xas_load(&xas);
+
+	if (entry)
+		xas_clear_mark(&xas, mark);
+}
+EXPORT_SYMBOL_GPL(__xa_clear_mark);
+
+/**
+ * xa_get_mark() - Inquire whether this mark is set on this entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * This function uses the RCU read lock, so the result may be out of date
+ * by the time it returns.  If you need the result to be stable, use a lock.
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ * Return: True if the entry at @index has this mark set, false if it doesn't.
+ */
+bool xa_get_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+	XA_STATE(xas, xa, index);
+	void *entry;
+
+	rcu_read_lock();
+	entry = xas_start(&xas);
+	while (xas_get_mark(&xas, mark)) {
+		if (!xa_is_node(entry))
+			goto found;
+		entry = xas_descend(&xas, xa_to_node(entry));
+	}
+	rcu_read_unlock();
+	return false;
+ found:
+	rcu_read_unlock();
+	return true;
+}
+EXPORT_SYMBOL(xa_get_mark);
+
+/**
+ * xa_set_mark() - Set this mark on this entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Attempting to set a mark on a NULL entry does not succeed.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.
+ */
+void xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+	xa_lock(xa);
+	__xa_set_mark(xa, index, mark);
+	xa_unlock(xa);
+}
+EXPORT_SYMBOL(xa_set_mark);
+
+/**
+ * xa_clear_mark() - Clear this mark on this entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Clearing a mark always succeeds.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.
+ */
+void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+	xa_lock(xa);
+	__xa_clear_mark(xa, index, mark);
+	xa_unlock(xa);
+}
+EXPORT_SYMBOL(xa_clear_mark);
+
 #ifdef XA_DEBUG
 void xa_dump_node(const struct xa_node *node)
 {
@@ -230,8 +458,8 @@ void xa_dump(const struct xarray *xa)
 	unsigned int shift = 0;
 
 	pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry,
-			xa->xa_flags, radix_tree_tagged(xa, 0),
-			radix_tree_tagged(xa, 1), radix_tree_tagged(xa, 2));
+			xa->xa_flags, xa_marked(xa, XA_MARK_0),
+			xa_marked(xa, XA_MARK_1), xa_marked(xa, XA_MARK_2));
 	if (xa_is_node(entry))
 		shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT;
 	xa_dump_entry(entry, 0, shift);
diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h
index 9bce3b56b5e7..5d2ab38965cc 100644
--- a/tools/include/asm-generic/bitops.h
+++ b/tools/include/asm-generic/bitops.h
@@ -27,5 +27,6 @@
 #include <asm-generic/bitops/hweight.h>
 
 #include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-atomic.h>
 
 #endif /* __TOOLS_ASM_GENERIC_BITOPS_H */
diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h
index 21c41ccd1266..2f6ea28764a7 100644
--- a/tools/include/asm-generic/bitops/atomic.h
+++ b/tools/include/asm-generic/bitops/atomic.h
@@ -15,13 +15,4 @@ static inline void clear_bit(int nr, unsigned long *addr)
 	addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG));
 }
 
-static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
-{
-	return ((1UL << (nr % __BITS_PER_LONG)) &
-		(((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0;
-}
-
-#define __set_bit(nr, addr)	set_bit(nr, addr)
-#define __clear_bit(nr, addr)	clear_bit(nr, addr)
-
 #endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */
diff --git a/tools/include/asm-generic/bitops/non-atomic.h b/tools/include/asm-generic/bitops/non-atomic.h
new file mode 100644
index 000000000000..7e10c4b50c5d
--- /dev/null
+++ b/tools/include/asm-generic/bitops/non-atomic.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+
+#include <asm/types.h>
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p  |= mask;
+}
+
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p &= ~mask;
+}
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __change_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p ^= mask;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old | mask;
+	return (old & mask) != 0;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old & ~mask;
+	return (old & mask) != 0;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __test_and_change_bit(int nr,
+					    volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old ^ mask;
+	return (old & mask) != 0;
+}
+
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit(int nr, const volatile unsigned long *addr)
+{
+	return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index 1738c0391da4..622266b197d0 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -8,8 +8,14 @@
 #define spinlock_t		pthread_mutex_t
 #define DEFINE_SPINLOCK(x)	pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER
 #define __SPIN_LOCK_UNLOCKED(x)	(pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER
-#define spin_lock_init(x)      pthread_mutex_init(x, NULL)
-
+#define spin_lock_init(x)	pthread_mutex_init(x, NULL)
+
+#define spin_lock(x)			pthread_mutex_lock(x)
+#define spin_unlock(x)			pthread_mutex_unlock(x)
+#define spin_lock_bh(x)			pthread_mutex_lock(x)
+#define spin_unlock_bh(x)		pthread_mutex_unlock(x)
+#define spin_lock_irq(x)		pthread_mutex_lock(x)
+#define spin_unlock_irq(x)		pthread_mutex_unlock(x)
 #define spin_lock_irqsave(x, f)		(void)f, pthread_mutex_lock(x)
 #define spin_unlock_irqrestore(x, f)	(void)f, pthread_mutex_unlock(x)
 
-- 
cgit v1.2.3-71-gd317


From 58d6ea3085f2e53714810a513c61629f6d2be0a6 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 10 Nov 2017 15:15:08 -0500
Subject: xarray: Add XArray unconditional store operations

xa_store() differs from radix_tree_insert() in that it will overwrite an
existing element in the array rather than returning an error.  This is
the behaviour which most users want, and those that want more complex
behaviour generally want to use the xas family of routines anyway.

For memory allocation, xa_store() will first attempt to request memory
from the slab allocator; if memory is not immediately available, it will
drop the xa_lock and allocate memory, keeping a pointer in the xa_state.
It does not use the per-CPU cache, although those will continue to exist
until all radix tree users are converted to the xarray.

This patch also includes xa_erase() and __xa_erase() for a streamlined
way to store NULL.  Since there is no need to allocate memory in order
to store a NULL in the XArray, we do not need to trouble the user with
deciding what memory allocation flags to use.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h                   | 147 ++++++-
 lib/radix-tree.c                         |   4 +-
 lib/test_xarray.c                        | 177 +++++++-
 lib/xarray.c                             | 693 +++++++++++++++++++++++++++++++
 tools/include/linux/bitmap.h             |   1 +
 tools/include/linux/spinlock.h           |   2 +
 tools/testing/radix-tree/Makefile        |   2 +-
 tools/testing/radix-tree/bitmap.c        |  23 +
 tools/testing/radix-tree/linux/kernel.h  |   4 +
 tools/testing/radix-tree/linux/lockdep.h |  11 +
 10 files changed, 1055 insertions(+), 9 deletions(-)
 create mode 100644 tools/testing/radix-tree/bitmap.c
 create mode 100644 tools/testing/radix-tree/linux/lockdep.h

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 2de504ae9ba4..15eab63286ed 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -205,10 +205,17 @@ typedef unsigned __bitwise xa_mark_t;
 #define XA_PRESENT		((__force xa_mark_t)8U)
 #define XA_MARK_MAX		XA_MARK_2
 
+enum xa_lock_type {
+	XA_LOCK_IRQ = 1,
+	XA_LOCK_BH = 2,
+};
+
 /*
  * Values for xa_flags.  The radix tree stores its GFP flags in the xa_flags,
  * and we remain compatible with that.
  */
+#define XA_FLAGS_LOCK_IRQ	((__force gfp_t)XA_LOCK_IRQ)
+#define XA_FLAGS_LOCK_BH	((__force gfp_t)XA_LOCK_BH)
 #define XA_FLAGS_MARK(mark)	((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \
 						(__force unsigned)(mark)))
 
@@ -267,6 +274,7 @@ struct xarray {
 
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
+void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -309,6 +317,23 @@ static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark)
 	return xa->xa_flags & XA_FLAGS_MARK(mark);
 }
 
+/**
+ * xa_erase() - Erase this entry from the XArray.
+ * @xa: XArray.
+ * @index: Index of entry.
+ *
+ * This function is the equivalent of calling xa_store() with %NULL as
+ * the third argument.  The XArray does not need to allocate memory, so
+ * the user does not need to provide GFP flags.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.
+ * Return: The entry which used to be at this index.
+ */
+static inline void *xa_erase(struct xarray *xa, unsigned long index)
+{
+	return xa_store(xa, index, NULL, 0);
+}
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
@@ -322,11 +347,65 @@ static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark)
 				spin_unlock_irqrestore(&(xa)->xa_lock, flags)
 
 /*
- * Versions of the normal API which require the caller to hold the xa_lock.
- */
+ * Versions of the normal API which require the caller to hold the
+ * xa_lock.  If the GFP flags allow it, they will drop the lock to
+ * allocate memory, then reacquire it afterwards.  These functions
+ * may also re-enable interrupts if the XArray flags indicate the
+ * locking should be interrupt safe.
+ */
+void *__xa_erase(struct xarray *, unsigned long index);
+void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
+/**
+ * xa_erase_bh() - Erase this entry from the XArray.
+ * @xa: XArray.
+ * @index: Index of entry.
+ *
+ * This function is the equivalent of calling xa_store() with %NULL as
+ * the third argument.  The XArray does not need to allocate memory, so
+ * the user does not need to provide GFP flags.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling softirqs.
+ * Return: The entry which used to be at this index.
+ */
+static inline void *xa_erase_bh(struct xarray *xa, unsigned long index)
+{
+	void *entry;
+
+	xa_lock_bh(xa);
+	entry = __xa_erase(xa, index);
+	xa_unlock_bh(xa);
+
+	return entry;
+}
+
+/**
+ * xa_erase_irq() - Erase this entry from the XArray.
+ * @xa: XArray.
+ * @index: Index of entry.
+ *
+ * This function is the equivalent of calling xa_store() with %NULL as
+ * the third argument.  The XArray does not need to allocate memory, so
+ * the user does not need to provide GFP flags.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling interrupts.
+ * Return: The entry which used to be at this index.
+ */
+static inline void *xa_erase_irq(struct xarray *xa, unsigned long index)
+{
+	void *entry;
+
+	xa_lock_irq(xa);
+	entry = __xa_erase(xa, index);
+	xa_unlock_irq(xa);
+
+	return entry;
+}
+
 /* Everything below here is the Advanced API.  Proceed with caution. */
 
 /*
@@ -441,6 +520,12 @@ static inline struct xa_node *xa_parent_locked(const struct xarray *xa,
 						lockdep_is_held(&xa->xa_lock));
 }
 
+/* Private */
+static inline void *xa_mk_node(const struct xa_node *node)
+{
+	return (void *)((unsigned long)node | 2);
+}
+
 /* Private */
 static inline struct xa_node *xa_to_node(const void *entry)
 {
@@ -647,6 +732,12 @@ static inline bool xas_not_node(struct xa_node *node)
 	return ((unsigned long)node & 3) || !node;
 }
 
+/* True if the node represents head-of-tree, RESTART or BOUNDS */
+static inline bool xas_top(struct xa_node *node)
+{
+	return node <= XAS_RESTART;
+}
+
 /**
  * xas_reset() - Reset an XArray operation state.
  * @xas: XArray operation state.
@@ -683,10 +774,14 @@ static inline bool xas_retry(struct xa_state *xas, const void *entry)
 }
 
 void *xas_load(struct xa_state *);
+void *xas_store(struct xa_state *, void *entry);
 
 bool xas_get_mark(const struct xa_state *, xa_mark_t);
 void xas_set_mark(const struct xa_state *, xa_mark_t);
 void xas_clear_mark(const struct xa_state *, xa_mark_t);
+void xas_init_marks(const struct xa_state *);
+
+bool xas_nomem(struct xa_state *, gfp_t);
 
 /**
  * xas_reload() - Refetch an entry from the xarray.
@@ -711,4 +806,52 @@ static inline void *xas_reload(struct xa_state *xas)
 	return xa_head(xas->xa);
 }
 
+/**
+ * xas_set() - Set up XArray operation state for a different index.
+ * @xas: XArray operation state.
+ * @index: New index into the XArray.
+ *
+ * Move the operation state to refer to a different index.  This will
+ * have the effect of starting a walk from the top; see xas_next()
+ * to move to an adjacent index.
+ */
+static inline void xas_set(struct xa_state *xas, unsigned long index)
+{
+	xas->xa_index = index;
+	xas->xa_node = XAS_RESTART;
+}
+
+/**
+ * xas_set_order() - Set up XArray operation state for a multislot entry.
+ * @xas: XArray operation state.
+ * @index: Target of the operation.
+ * @order: Entry occupies 2^@order indices.
+ */
+static inline void xas_set_order(struct xa_state *xas, unsigned long index,
+					unsigned int order)
+{
+#ifdef CONFIG_XARRAY_MULTI
+	xas->xa_index = order < BITS_PER_LONG ? (index >> order) << order : 0;
+	xas->xa_shift = order - (order % XA_CHUNK_SHIFT);
+	xas->xa_sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
+	xas->xa_node = XAS_RESTART;
+#else
+	BUG_ON(order > 0);
+	xas_set(xas, index);
+#endif
+}
+
+/**
+ * xas_set_update() - Set up XArray operation state for a callback.
+ * @xas: XArray operation state.
+ * @update: Function to call when updating a node.
+ *
+ * The XArray can notify a caller after it has updated an xa_node.
+ * This is advanced functionality and is only needed by the page cache.
+ */
+static inline void xas_set_update(struct xa_state *xas, xa_update_node_t update)
+{
+	xas->xa_update = update;
+}
+
 #endif /* _LINUX_XARRAY_H */
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index b8e961428484..3479d93c32b9 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -47,7 +47,7 @@ static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
 /*
  * Radix tree node cache.
  */
-static struct kmem_cache *radix_tree_node_cachep;
+struct kmem_cache *radix_tree_node_cachep;
 
 /*
  * The radix tree is variable-height, so an insert operation not only has
@@ -365,7 +365,7 @@ out:
 	return ret;
 }
 
-static void radix_tree_node_rcu_free(struct rcu_head *head)
+void radix_tree_node_rcu_free(struct rcu_head *head)
 {
 	struct radix_tree_node *node =
 			container_of(head, struct radix_tree_node, rcu_head);
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index f8b0e9ba80a4..b711030fbc32 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -30,13 +30,49 @@ void xa_dump(const struct xarray *xa) { }
 
 static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
-	radix_tree_insert(xa, index, xa_mk_value(index));
-	return NULL;
+	return xa_store(xa, index, xa_mk_value(index & LONG_MAX), gfp);
 }
 
 static void xa_erase_index(struct xarray *xa, unsigned long index)
 {
-	radix_tree_delete(xa, index);
+	XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_value(index & LONG_MAX));
+	XA_BUG_ON(xa, xa_load(xa, index) != NULL);
+}
+
+/*
+ * If anyone needs this, please move it to xarray.c.  We have no current
+ * users outside the test suite because all current multislot users want
+ * to use the advanced API.
+ */
+static void *xa_store_order(struct xarray *xa, unsigned long index,
+		unsigned order, void *entry, gfp_t gfp)
+{
+	XA_STATE_ORDER(xas, xa, index, order);
+	void *curr;
+
+	do {
+		xas_lock(&xas);
+		curr = xas_store(&xas, entry);
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, gfp));
+
+	return curr;
+}
+
+static noinline void check_xa_err(struct xarray *xa)
+{
+	XA_BUG_ON(xa, xa_err(xa_store_index(xa, 0, GFP_NOWAIT)) != 0);
+	XA_BUG_ON(xa, xa_err(xa_erase(xa, 0)) != 0);
+#ifndef __KERNEL__
+	/* The kernel does not fail GFP_NOWAIT allocations */
+	XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_NOWAIT)) != -ENOMEM);
+	XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_NOWAIT)) != -ENOMEM);
+#endif
+	XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_KERNEL)) != 0);
+	XA_BUG_ON(xa, xa_err(xa_store(xa, 1, xa_mk_value(0), GFP_KERNEL)) != 0);
+	XA_BUG_ON(xa, xa_err(xa_erase(xa, 1)) != 0);
+// kills the test-suite :-(
+//	XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL);
 }
 
 static noinline void check_xa_load(struct xarray *xa)
@@ -69,6 +105,9 @@ static noinline void check_xa_load(struct xarray *xa)
 
 static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
 {
+	unsigned int order;
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 8 : 1;
+
 	/* NULL elements have no marks set */
 	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
 	xa_set_mark(xa, index, XA_MARK_0);
@@ -90,6 +129,37 @@ static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
 	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
 	xa_set_mark(xa, index, XA_MARK_0);
 	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+
+	/*
+	 * Storing a multi-index entry over entries with marks gives the
+	 * entire entry the union of the marks
+	 */
+	BUG_ON((index % 4) != 0);
+	for (order = 2; order < max_order; order++) {
+		unsigned long base = round_down(index, 1UL << order);
+		unsigned long next = base + (1UL << order);
+		unsigned long i;
+
+		XA_BUG_ON(xa, xa_store_index(xa, index + 1, GFP_KERNEL));
+		xa_set_mark(xa, index + 1, XA_MARK_0);
+		XA_BUG_ON(xa, xa_store_index(xa, index + 2, GFP_KERNEL));
+		xa_set_mark(xa, index + 2, XA_MARK_1);
+		XA_BUG_ON(xa, xa_store_index(xa, next, GFP_KERNEL));
+		xa_store_order(xa, index, order, xa_mk_value(index),
+				GFP_KERNEL);
+		for (i = base; i < next; i++) {
+			XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0));
+			XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_1));
+			XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_2));
+		}
+		XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_0));
+		XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_1));
+		XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_2));
+		xa_erase_index(xa, index);
+		xa_erase_index(xa, next);
+		XA_BUG_ON(xa, !xa_empty(xa));
+	}
+	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
 static noinline void check_xa_mark(struct xarray *xa)
@@ -100,12 +170,111 @@ static noinline void check_xa_mark(struct xarray *xa)
 		check_xa_mark_1(xa, index);
 }
 
-static RADIX_TREE(array, GFP_KERNEL);
+static noinline void check_xa_shrink(struct xarray *xa)
+{
+	XA_STATE(xas, xa, 1);
+	struct xa_node *node;
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+	XA_BUG_ON(xa, xa_store_index(xa, 0, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL);
+
+	/*
+	 * Check that erasing the entry at 1 shrinks the tree and properly
+	 * marks the node as being deleted.
+	 */
+	xas_lock(&xas);
+	XA_BUG_ON(xa, xas_load(&xas) != xa_mk_value(1));
+	node = xas.xa_node;
+	XA_BUG_ON(xa, xa_entry_locked(xa, node, 0) != xa_mk_value(0));
+	XA_BUG_ON(xa, xas_store(&xas, NULL) != xa_mk_value(1));
+	XA_BUG_ON(xa, xa_load(xa, 1) != NULL);
+	XA_BUG_ON(xa, xas.xa_node != XAS_BOUNDS);
+	XA_BUG_ON(xa, xa_entry_locked(xa, node, 0) != XA_RETRY_ENTRY);
+	XA_BUG_ON(xa, xas_load(&xas) != NULL);
+	xas_unlock(&xas);
+	XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
+	xa_erase_index(xa, 0);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_multi_store(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+	unsigned long i, j, k;
+	unsigned int max_order = (sizeof(long) == 4) ? 30 : 60;
+
+	/* Loading from any position returns the same value */
+	xa_store_order(xa, 0, 1, xa_mk_value(0), GFP_KERNEL);
+	XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
+	XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(0));
+	XA_BUG_ON(xa, xa_load(xa, 2) != NULL);
+	rcu_read_lock();
+	XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 2);
+	XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 2);
+	rcu_read_unlock();
+
+	/* Storing adjacent to the value does not alter the value */
+	xa_store(xa, 3, xa, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
+	XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(0));
+	XA_BUG_ON(xa, xa_load(xa, 2) != NULL);
+	rcu_read_lock();
+	XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 3);
+	XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 2);
+	rcu_read_unlock();
+
+	/* Overwriting multiple indexes works */
+	xa_store_order(xa, 0, 2, xa_mk_value(1), GFP_KERNEL);
+	XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(1));
+	XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(1));
+	XA_BUG_ON(xa, xa_load(xa, 2) != xa_mk_value(1));
+	XA_BUG_ON(xa, xa_load(xa, 3) != xa_mk_value(1));
+	XA_BUG_ON(xa, xa_load(xa, 4) != NULL);
+	rcu_read_lock();
+	XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 4);
+	XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 4);
+	rcu_read_unlock();
+
+	/* We can erase multiple values with a single store */
+	xa_store_order(xa, 0, 63, NULL, GFP_KERNEL);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* Even when the first slot is empty but the others aren't */
+	xa_store_index(xa, 1, GFP_KERNEL);
+	xa_store_index(xa, 2, GFP_KERNEL);
+	xa_store_order(xa, 0, 2, NULL, GFP_KERNEL);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	for (i = 0; i < max_order; i++) {
+		for (j = 0; j < max_order; j++) {
+			xa_store_order(xa, 0, i, xa_mk_value(i), GFP_KERNEL);
+			xa_store_order(xa, 0, j, xa_mk_value(j), GFP_KERNEL);
+
+			for (k = 0; k < max_order; k++) {
+				void *entry = xa_load(xa, (1UL << k) - 1);
+				if ((i < k) && (j < k))
+					XA_BUG_ON(xa, entry != NULL);
+				else
+					XA_BUG_ON(xa, entry != xa_mk_value(j));
+			}
+
+			xa_erase(xa, 0);
+			XA_BUG_ON(xa, !xa_empty(xa));
+		}
+	}
+#endif
+}
+
+static DEFINE_XARRAY(array);
 
 static int xarray_checks(void)
 {
+	check_xa_err(&array);
 	check_xa_load(&array);
 	check_xa_mark(&array);
+	check_xa_shrink(&array);
+	check_multi_store(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
diff --git a/lib/xarray.c b/lib/xarray.c
index aa86c47e532f..4596a95ed9cd 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -7,6 +7,8 @@
 
 #include <linux/bitmap.h>
 #include <linux/export.h>
+#include <linux/list.h>
+#include <linux/slab.h>
 #include <linux/xarray.h>
 
 /*
@@ -25,6 +27,31 @@
  * @entry refers to something stored in a slot in the xarray
  */
 
+static inline unsigned int xa_lock_type(const struct xarray *xa)
+{
+	return (__force unsigned int)xa->xa_flags & 3;
+}
+
+static inline void xas_lock_type(struct xa_state *xas, unsigned int lock_type)
+{
+	if (lock_type == XA_LOCK_IRQ)
+		xas_lock_irq(xas);
+	else if (lock_type == XA_LOCK_BH)
+		xas_lock_bh(xas);
+	else
+		xas_lock(xas);
+}
+
+static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type)
+{
+	if (lock_type == XA_LOCK_IRQ)
+		xas_unlock_irq(xas);
+	else if (lock_type == XA_LOCK_BH)
+		xas_unlock_bh(xas);
+	else
+		xas_unlock(xas);
+}
+
 static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark)
 {
 	if (!(xa->xa_flags & XA_FLAGS_MARK(mark)))
@@ -67,6 +94,34 @@ static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark)
 	return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE);
 }
 
+#define mark_inc(mark) do { \
+	mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \
+} while (0)
+
+/*
+ * xas_squash_marks() - Merge all marks to the first entry
+ * @xas: Array operation state.
+ *
+ * Set a mark on the first entry if any entry has it set.  Clear marks on
+ * all sibling entries.
+ */
+static void xas_squash_marks(const struct xa_state *xas)
+{
+	unsigned int mark = 0;
+	unsigned int limit = xas->xa_offset + xas->xa_sibs + 1;
+
+	if (!xas->xa_sibs)
+		return;
+
+	do {
+		unsigned long *marks = xas->xa_node->marks[mark];
+		if (find_next_bit(marks, limit, xas->xa_offset + 1) == limit)
+			continue;
+		__set_bit(xas->xa_offset, marks);
+		bitmap_clear(marks, xas->xa_offset + 1, xas->xa_sibs);
+	} while (mark++ != (__force unsigned)XA_MARK_MAX);
+}
+
 /* extracts the offset within this node from the index */
 static unsigned int get_offset(unsigned long index, struct xa_node *node)
 {
@@ -161,6 +216,516 @@ void *xas_load(struct xa_state *xas)
 }
 EXPORT_SYMBOL_GPL(xas_load);
 
+/* Move the radix tree node cache here */
+extern struct kmem_cache *radix_tree_node_cachep;
+extern void radix_tree_node_rcu_free(struct rcu_head *head);
+
+#define XA_RCU_FREE	((struct xarray *)1)
+
+static void xa_node_free(struct xa_node *node)
+{
+	XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+	node->array = XA_RCU_FREE;
+	call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
+}
+
+/*
+ * xas_destroy() - Free any resources allocated during the XArray operation.
+ * @xas: XArray operation state.
+ *
+ * This function is now internal-only.
+ */
+static void xas_destroy(struct xa_state *xas)
+{
+	struct xa_node *node = xas->xa_alloc;
+
+	if (!node)
+		return;
+	XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+	kmem_cache_free(radix_tree_node_cachep, node);
+	xas->xa_alloc = NULL;
+}
+
+/**
+ * xas_nomem() - Allocate memory if needed.
+ * @xas: XArray operation state.
+ * @gfp: Memory allocation flags.
+ *
+ * If we need to add new nodes to the XArray, we try to allocate memory
+ * with GFP_NOWAIT while holding the lock, which will usually succeed.
+ * If it fails, @xas is flagged as needing memory to continue.  The caller
+ * should drop the lock and call xas_nomem().  If xas_nomem() succeeds,
+ * the caller should retry the operation.
+ *
+ * Forward progress is guaranteed as one node is allocated here and
+ * stored in the xa_state where it will be found by xas_alloc().  More
+ * nodes will likely be found in the slab allocator, but we do not tie
+ * them up here.
+ *
+ * Return: true if memory was needed, and was successfully allocated.
+ */
+bool xas_nomem(struct xa_state *xas, gfp_t gfp)
+{
+	if (xas->xa_node != XA_ERROR(-ENOMEM)) {
+		xas_destroy(xas);
+		return false;
+	}
+	xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+	if (!xas->xa_alloc)
+		return false;
+	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
+	xas->xa_node = XAS_RESTART;
+	return true;
+}
+EXPORT_SYMBOL_GPL(xas_nomem);
+
+/*
+ * __xas_nomem() - Drop locks and allocate memory if needed.
+ * @xas: XArray operation state.
+ * @gfp: Memory allocation flags.
+ *
+ * Internal variant of xas_nomem().
+ *
+ * Return: true if memory was needed, and was successfully allocated.
+ */
+static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
+	__must_hold(xas->xa->xa_lock)
+{
+	unsigned int lock_type = xa_lock_type(xas->xa);
+
+	if (xas->xa_node != XA_ERROR(-ENOMEM)) {
+		xas_destroy(xas);
+		return false;
+	}
+	if (gfpflags_allow_blocking(gfp)) {
+		xas_unlock_type(xas, lock_type);
+		xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+		xas_lock_type(xas, lock_type);
+	} else {
+		xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+	}
+	if (!xas->xa_alloc)
+		return false;
+	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
+	xas->xa_node = XAS_RESTART;
+	return true;
+}
+
+static void xas_update(struct xa_state *xas, struct xa_node *node)
+{
+	if (xas->xa_update)
+		xas->xa_update(node);
+	else
+		XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+}
+
+static void *xas_alloc(struct xa_state *xas, unsigned int shift)
+{
+	struct xa_node *parent = xas->xa_node;
+	struct xa_node *node = xas->xa_alloc;
+
+	if (xas_invalid(xas))
+		return NULL;
+
+	if (node) {
+		xas->xa_alloc = NULL;
+	} else {
+		node = kmem_cache_alloc(radix_tree_node_cachep,
+					GFP_NOWAIT | __GFP_NOWARN);
+		if (!node) {
+			xas_set_err(xas, -ENOMEM);
+			return NULL;
+		}
+	}
+
+	if (parent) {
+		node->offset = xas->xa_offset;
+		parent->count++;
+		XA_NODE_BUG_ON(node, parent->count > XA_CHUNK_SIZE);
+		xas_update(xas, parent);
+	}
+	XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
+	XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+	node->shift = shift;
+	node->count = 0;
+	node->nr_values = 0;
+	RCU_INIT_POINTER(node->parent, xas->xa_node);
+	node->array = xas->xa;
+
+	return node;
+}
+
+/*
+ * Use this to calculate the maximum index that will need to be created
+ * in order to add the entry described by @xas.  Because we cannot store a
+ * multiple-index entry at index 0, the calculation is a little more complex
+ * than you might expect.
+ */
+static unsigned long xas_max(struct xa_state *xas)
+{
+	unsigned long max = xas->xa_index;
+
+#ifdef CONFIG_XARRAY_MULTI
+	if (xas->xa_shift || xas->xa_sibs) {
+		unsigned long mask;
+		mask = (((xas->xa_sibs + 1UL) << xas->xa_shift) - 1);
+		max |= mask;
+		if (mask == max)
+			max++;
+	}
+#endif
+
+	return max;
+}
+
+/* The maximum index that can be contained in the array without expanding it */
+static unsigned long max_index(void *entry)
+{
+	if (!xa_is_node(entry))
+		return 0;
+	return (XA_CHUNK_SIZE << xa_to_node(entry)->shift) - 1;
+}
+
+static void xas_shrink(struct xa_state *xas)
+{
+	struct xarray *xa = xas->xa;
+	struct xa_node *node = xas->xa_node;
+
+	for (;;) {
+		void *entry;
+
+		XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
+		if (node->count != 1)
+			break;
+		entry = xa_entry_locked(xa, node, 0);
+		if (!entry)
+			break;
+		if (!xa_is_node(entry) && node->shift)
+			break;
+		xas->xa_node = XAS_BOUNDS;
+
+		RCU_INIT_POINTER(xa->xa_head, entry);
+
+		node->count = 0;
+		node->nr_values = 0;
+		if (!xa_is_node(entry))
+			RCU_INIT_POINTER(node->slots[0], XA_RETRY_ENTRY);
+		xas_update(xas, node);
+		xa_node_free(node);
+		if (!xa_is_node(entry))
+			break;
+		node = xa_to_node(entry);
+		node->parent = NULL;
+	}
+}
+
+/*
+ * xas_delete_node() - Attempt to delete an xa_node
+ * @xas: Array operation state.
+ *
+ * Attempts to delete the @xas->xa_node.  This will fail if xa->node has
+ * a non-zero reference count.
+ */
+static void xas_delete_node(struct xa_state *xas)
+{
+	struct xa_node *node = xas->xa_node;
+
+	for (;;) {
+		struct xa_node *parent;
+
+		XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
+		if (node->count)
+			break;
+
+		parent = xa_parent_locked(xas->xa, node);
+		xas->xa_node = parent;
+		xas->xa_offset = node->offset;
+		xa_node_free(node);
+
+		if (!parent) {
+			xas->xa->xa_head = NULL;
+			xas->xa_node = XAS_BOUNDS;
+			return;
+		}
+
+		parent->slots[xas->xa_offset] = NULL;
+		parent->count--;
+		XA_NODE_BUG_ON(parent, parent->count > XA_CHUNK_SIZE);
+		node = parent;
+		xas_update(xas, node);
+	}
+
+	if (!node->parent)
+		xas_shrink(xas);
+}
+
+/**
+ * xas_free_nodes() - Free this node and all nodes that it references
+ * @xas: Array operation state.
+ * @top: Node to free
+ *
+ * This node has been removed from the tree.  We must now free it and all
+ * of its subnodes.  There may be RCU walkers with references into the tree,
+ * so we must replace all entries with retry markers.
+ */
+static void xas_free_nodes(struct xa_state *xas, struct xa_node *top)
+{
+	unsigned int offset = 0;
+	struct xa_node *node = top;
+
+	for (;;) {
+		void *entry = xa_entry_locked(xas->xa, node, offset);
+
+		if (xa_is_node(entry)) {
+			node = xa_to_node(entry);
+			offset = 0;
+			continue;
+		}
+		if (entry)
+			RCU_INIT_POINTER(node->slots[offset], XA_RETRY_ENTRY);
+		offset++;
+		while (offset == XA_CHUNK_SIZE) {
+			struct xa_node *parent;
+
+			parent = xa_parent_locked(xas->xa, node);
+			offset = node->offset + 1;
+			node->count = 0;
+			node->nr_values = 0;
+			xas_update(xas, node);
+			xa_node_free(node);
+			if (node == top)
+				return;
+			node = parent;
+		}
+	}
+}
+
+/*
+ * xas_expand adds nodes to the head of the tree until it has reached
+ * sufficient height to be able to contain @xas->xa_index
+ */
+static int xas_expand(struct xa_state *xas, void *head)
+{
+	struct xarray *xa = xas->xa;
+	struct xa_node *node = NULL;
+	unsigned int shift = 0;
+	unsigned long max = xas_max(xas);
+
+	if (!head) {
+		if (max == 0)
+			return 0;
+		while ((max >> shift) >= XA_CHUNK_SIZE)
+			shift += XA_CHUNK_SHIFT;
+		return shift + XA_CHUNK_SHIFT;
+	} else if (xa_is_node(head)) {
+		node = xa_to_node(head);
+		shift = node->shift + XA_CHUNK_SHIFT;
+	}
+	xas->xa_node = NULL;
+
+	while (max > max_index(head)) {
+		xa_mark_t mark = 0;
+
+		XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
+		node = xas_alloc(xas, shift);
+		if (!node)
+			return -ENOMEM;
+
+		node->count = 1;
+		if (xa_is_value(head))
+			node->nr_values = 1;
+		RCU_INIT_POINTER(node->slots[0], head);
+
+		/* Propagate the aggregated mark info to the new child */
+		for (;;) {
+			if (xa_marked(xa, mark))
+				node_set_mark(node, 0, mark);
+			if (mark == XA_MARK_MAX)
+				break;
+			mark_inc(mark);
+		}
+
+		/*
+		 * Now that the new node is fully initialised, we can add
+		 * it to the tree
+		 */
+		if (xa_is_node(head)) {
+			xa_to_node(head)->offset = 0;
+			rcu_assign_pointer(xa_to_node(head)->parent, node);
+		}
+		head = xa_mk_node(node);
+		rcu_assign_pointer(xa->xa_head, head);
+		xas_update(xas, node);
+
+		shift += XA_CHUNK_SHIFT;
+	}
+
+	xas->xa_node = node;
+	return shift;
+}
+
+/*
+ * xas_create() - Create a slot to store an entry in.
+ * @xas: XArray operation state.
+ *
+ * Most users will not need to call this function directly, as it is called
+ * by xas_store().  It is useful for doing conditional store operations
+ * (see the xa_cmpxchg() implementation for an example).
+ *
+ * Return: If the slot already existed, returns the contents of this slot.
+ * If the slot was newly created, returns NULL.  If it failed to create the
+ * slot, returns NULL and indicates the error in @xas.
+ */
+static void *xas_create(struct xa_state *xas)
+{
+	struct xarray *xa = xas->xa;
+	void *entry;
+	void __rcu **slot;
+	struct xa_node *node = xas->xa_node;
+	int shift;
+	unsigned int order = xas->xa_shift;
+
+	if (xas_top(node)) {
+		entry = xa_head_locked(xa);
+		xas->xa_node = NULL;
+		shift = xas_expand(xas, entry);
+		if (shift < 0)
+			return NULL;
+		entry = xa_head_locked(xa);
+		slot = &xa->xa_head;
+	} else if (xas_error(xas)) {
+		return NULL;
+	} else if (node) {
+		unsigned int offset = xas->xa_offset;
+
+		shift = node->shift;
+		entry = xa_entry_locked(xa, node, offset);
+		slot = &node->slots[offset];
+	} else {
+		shift = 0;
+		entry = xa_head_locked(xa);
+		slot = &xa->xa_head;
+	}
+
+	while (shift > order) {
+		shift -= XA_CHUNK_SHIFT;
+		if (!entry) {
+			node = xas_alloc(xas, shift);
+			if (!node)
+				break;
+			rcu_assign_pointer(*slot, xa_mk_node(node));
+		} else if (xa_is_node(entry)) {
+			node = xa_to_node(entry);
+		} else {
+			break;
+		}
+		entry = xas_descend(xas, node);
+		slot = &node->slots[xas->xa_offset];
+	}
+
+	return entry;
+}
+
+static void update_node(struct xa_state *xas, struct xa_node *node,
+		int count, int values)
+{
+	if (!node || (!count && !values))
+		return;
+
+	node->count += count;
+	node->nr_values += values;
+	XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
+	XA_NODE_BUG_ON(node, node->nr_values > XA_CHUNK_SIZE);
+	xas_update(xas, node);
+	if (count < 0)
+		xas_delete_node(xas);
+}
+
+/**
+ * xas_store() - Store this entry in the XArray.
+ * @xas: XArray operation state.
+ * @entry: New entry.
+ *
+ * If @xas is operating on a multi-index entry, the entry returned by this
+ * function is essentially meaningless (it may be an internal entry or it
+ * may be %NULL, even if there are non-NULL entries at some of the indices
+ * covered by the range).  This is not a problem for any current users,
+ * and can be changed if needed.
+ *
+ * Return: The old entry at this index.
+ */
+void *xas_store(struct xa_state *xas, void *entry)
+{
+	struct xa_node *node;
+	void __rcu **slot = &xas->xa->xa_head;
+	unsigned int offset, max;
+	int count = 0;
+	int values = 0;
+	void *first, *next;
+	bool value = xa_is_value(entry);
+
+	if (entry)
+		first = xas_create(xas);
+	else
+		first = xas_load(xas);
+
+	if (xas_invalid(xas))
+		return first;
+	node = xas->xa_node;
+	if (node && (xas->xa_shift < node->shift))
+		xas->xa_sibs = 0;
+	if ((first == entry) && !xas->xa_sibs)
+		return first;
+
+	next = first;
+	offset = xas->xa_offset;
+	max = xas->xa_offset + xas->xa_sibs;
+	if (node) {
+		slot = &node->slots[offset];
+		if (xas->xa_sibs)
+			xas_squash_marks(xas);
+	}
+	if (!entry)
+		xas_init_marks(xas);
+
+	for (;;) {
+		/*
+		 * Must clear the marks before setting the entry to NULL,
+		 * otherwise xas_for_each_marked may find a NULL entry and
+		 * stop early.  rcu_assign_pointer contains a release barrier
+		 * so the mark clearing will appear to happen before the
+		 * entry is set to NULL.
+		 */
+		rcu_assign_pointer(*slot, entry);
+		if (xa_is_node(next))
+			xas_free_nodes(xas, xa_to_node(next));
+		if (!node)
+			break;
+		count += !next - !entry;
+		values += !xa_is_value(first) - !value;
+		if (entry) {
+			if (offset == max)
+				break;
+			if (!xa_is_sibling(entry))
+				entry = xa_mk_sibling(xas->xa_offset);
+		} else {
+			if (offset == XA_CHUNK_MASK)
+				break;
+		}
+		next = xa_entry_locked(xas->xa, node, ++offset);
+		if (!xa_is_sibling(next)) {
+			if (!entry && (offset > max))
+				break;
+			first = next;
+		}
+		slot++;
+	}
+
+	update_node(xas, node, count, values);
+	return first;
+}
+EXPORT_SYMBOL_GPL(xas_store);
+
 /**
  * xas_get_mark() - Returns the state of this mark.
  * @xas: XArray operation state.
@@ -240,6 +805,30 @@ void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark)
 }
 EXPORT_SYMBOL_GPL(xas_clear_mark);
 
+/**
+ * xas_init_marks() - Initialise all marks for the entry
+ * @xas: Array operations state.
+ *
+ * Initialise all marks for the entry specified by @xas.  If we're tracking
+ * free entries with a mark, we need to set it on all entries.  All other
+ * marks are cleared.
+ *
+ * This implementation is not as efficient as it could be; we may walk
+ * up the tree multiple times.
+ */
+void xas_init_marks(const struct xa_state *xas)
+{
+	xa_mark_t mark = 0;
+
+	for (;;) {
+		xas_clear_mark(xas, mark);
+		if (mark == XA_MARK_MAX)
+			break;
+		mark_inc(mark);
+	}
+}
+EXPORT_SYMBOL_GPL(xas_init_marks);
+
 /**
  * xa_init_flags() - Initialise an empty XArray with flags.
  * @xa: XArray.
@@ -253,9 +842,19 @@ EXPORT_SYMBOL_GPL(xas_clear_mark);
  */
 void xa_init_flags(struct xarray *xa, gfp_t flags)
 {
+	unsigned int lock_type;
+	static struct lock_class_key xa_lock_irq;
+	static struct lock_class_key xa_lock_bh;
+
 	spin_lock_init(&xa->xa_lock);
 	xa->xa_flags = flags;
 	xa->xa_head = NULL;
+
+	lock_type = xa_lock_type(xa);
+	if (lock_type == XA_LOCK_IRQ)
+		lockdep_set_class(&xa->xa_lock, &xa_lock_irq);
+	else if (lock_type == XA_LOCK_BH)
+		lockdep_set_class(&xa->xa_lock, &xa_lock_bh);
 }
 EXPORT_SYMBOL(xa_init_flags);
 
@@ -282,6 +881,100 @@ void *xa_load(struct xarray *xa, unsigned long index)
 }
 EXPORT_SYMBOL(xa_load);
 
+static void *xas_result(struct xa_state *xas, void *curr)
+{
+	XA_NODE_BUG_ON(xas->xa_node, xa_is_internal(curr));
+	if (xas_error(xas))
+		curr = xas->xa_node;
+	return curr;
+}
+
+/**
+ * __xa_erase() - Erase this entry from the XArray while locked.
+ * @xa: XArray.
+ * @index: Index into array.
+ *
+ * If the entry at this index is a multi-index entry then all indices will
+ * be erased, and the entry will no longer be a multi-index entry.
+ * This function expects the xa_lock to be held on entry.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: The old entry at this index.
+ */
+void *__xa_erase(struct xarray *xa, unsigned long index)
+{
+	XA_STATE(xas, xa, index);
+	return xas_result(&xas, xas_store(&xas, NULL));
+}
+EXPORT_SYMBOL_GPL(__xa_erase);
+
+/**
+ * xa_store() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * After this function returns, loads from this index will return @entry.
+ * Storing into an existing multislot entry updates the entry of every index.
+ * The marks associated with @index are unaffected unless @entry is %NULL.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.  May sleep
+ * if the @gfp flags permit.
+ * Return: The old entry at this index on success, xa_err(-EINVAL) if @entry
+ * cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation
+ * failed.
+ */
+void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
+{
+	XA_STATE(xas, xa, index);
+	void *curr;
+
+	if (WARN_ON_ONCE(xa_is_internal(entry)))
+		return XA_ERROR(-EINVAL);
+
+	do {
+		xas_lock(&xas);
+		curr = xas_store(&xas, entry);
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, gfp));
+
+	return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(xa_store);
+
+/**
+ * __xa_store() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * You must already be holding the xa_lock when calling this function.
+ * It will drop the lock if needed to allocate memory, and then reacquire
+ * it afterwards.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: The old entry at this index or xa_err() if an error happened.
+ */
+void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
+{
+	XA_STATE(xas, xa, index);
+	void *curr;
+
+	if (WARN_ON_ONCE(xa_is_internal(entry)))
+		return XA_ERROR(-EINVAL);
+
+	do {
+		curr = xas_store(&xas, entry);
+	} while (__xas_nomem(&xas, gfp));
+
+	return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(__xa_store);
+
 /**
  * __xa_set_mark() - Set this mark on this entry while locked.
  * @xa: XArray.
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index e63662db131b..05dca5c203f3 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -15,6 +15,7 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 		 const unsigned long *bitmap2, int bits);
 int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 		 const unsigned long *bitmap2, unsigned int bits);
+void bitmap_clear(unsigned long *map, unsigned int start, int len);
 
 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
 
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index 622266b197d0..c934572d935c 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -37,4 +37,6 @@ static inline bool arch_spin_is_locked(arch_spinlock_t *mutex)
 	return true;
 }
 
+#include <linux/lockdep.h>
+
 #endif
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 1379f1d78d0b..acf1afa01c5b 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -5,7 +5,7 @@ CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
 LDFLAGS += -fsanitize=address -fsanitize=undefined
 LDLIBS+= -lpthread -lurcu
 TARGETS = main idr-test multiorder xarray
-CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o
+CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
 	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
 
diff --git a/tools/testing/radix-tree/bitmap.c b/tools/testing/radix-tree/bitmap.c
new file mode 100644
index 000000000000..66ec4a24a203
--- /dev/null
+++ b/tools/testing/radix-tree/bitmap.c
@@ -0,0 +1,23 @@
+/* lib/bitmap.c pulls in at least two other files. */
+
+#include <linux/bitmap.h>
+
+void bitmap_clear(unsigned long *map, unsigned int start, int len)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const unsigned int size = start + len;
+	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+	while (len - bits_to_clear >= 0) {
+		*p &= ~mask_to_clear;
+		len -= bits_to_clear;
+		bits_to_clear = BITS_PER_LONG;
+		mask_to_clear = ~0UL;
+		p++;
+	}
+	if (len) {
+		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		*p &= ~mask_to_clear;
+	}
+}
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 5d06ac75a14d..4568248222ae 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -18,4 +18,8 @@
 #define pr_debug printk
 #define pr_cont printk
 
+#define __acquires(x)
+#define __releases(x)
+#define __must_hold(x)
+
 #endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h
new file mode 100644
index 000000000000..565fccdfe6e9
--- /dev/null
+++ b/tools/testing/radix-tree/linux/lockdep.h
@@ -0,0 +1,11 @@
+#ifndef _LINUX_LOCKDEP_H
+#define _LINUX_LOCKDEP_H
+struct lock_class_key {
+	unsigned int a;
+};
+
+static inline void lockdep_set_class(spinlock_t *lock,
+					struct lock_class_key *key)
+{
+}
+#endif /* _LINUX_LOCKDEP_H */
-- 
cgit v1.2.3-71-gd317


From 41aec91f55985e7f14ee75fe2f6e7bcfff0d0fae Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 10 Nov 2017 15:34:55 -0500
Subject: xarray: Add XArray conditional store operations

Like cmpxchg(), xa_cmpxchg will only store to the index if the current
entry matches the old entry.  It returns the current entry, which is
usually more useful than the errno returned by radix_tree_insert().
For the users who really only want the errno, the xa_insert() wrapper
provides a more convenient calling convention.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 60 ++++++++++++++++++++++++++++++++++++++++++
 lib/test_xarray.c      | 20 ++++++++++++++
 lib/xarray.c           | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 151 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 15eab63286ed..7f740f675b96 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -275,6 +275,8 @@ struct xarray {
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
+void *xa_cmpxchg(struct xarray *, unsigned long index,
+			void *old, void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -334,6 +336,34 @@ static inline void *xa_erase(struct xarray *xa, unsigned long index)
 	return xa_store(xa, index, NULL, 0);
 }
 
+/**
+ * xa_insert() - Store this entry in the XArray unless another entry is
+ *			already present.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * If you would rather see the existing entry in the array, use xa_cmpxchg().
+ * This function is for users who don't care what the entry is, only that
+ * one is present.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.
+ *	    May sleep if the @gfp flags permit.
+ * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * -ENOMEM if memory could not be allocated.
+ */
+static inline int xa_insert(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	void *curr = xa_cmpxchg(xa, index, NULL, entry, gfp);
+	if (!curr)
+		return 0;
+	if (xa_is_err(curr))
+		return xa_err(curr);
+	return -EEXIST;
+}
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
@@ -355,9 +385,39 @@ static inline void *xa_erase(struct xarray *xa, unsigned long index)
  */
 void *__xa_erase(struct xarray *, unsigned long index);
 void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
+void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old,
+		void *entry, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
+/**
+ * __xa_insert() - Store this entry in the XArray unless another entry is
+ *			already present.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * If you would rather see the existing entry in the array, use __xa_cmpxchg().
+ * This function is for users who don't care what the entry is, only that
+ * one is present.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ *	    release and reacquire xa_lock if the @gfp flags permit.
+ * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * -ENOMEM if memory could not be allocated.
+ */
+static inline int __xa_insert(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	void *curr = __xa_cmpxchg(xa, index, NULL, entry, gfp);
+	if (!curr)
+		return 0;
+	if (xa_is_err(curr))
+		return xa_err(curr);
+	return -EEXIST;
+}
+
 /**
  * xa_erase_bh() - Erase this entry from the XArray.
  * @xa: XArray.
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index b711030fbc32..fb472258b639 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -198,6 +198,25 @@ static noinline void check_xa_shrink(struct xarray *xa)
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
+static noinline void check_cmpxchg(struct xarray *xa)
+{
+	void *FIVE = xa_mk_value(5);
+	void *SIX = xa_mk_value(6);
+	void *LOTS = xa_mk_value(12345678);
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+	XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EEXIST);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, SIX, FIVE, GFP_KERNEL) != LOTS);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, LOTS, FIVE, GFP_KERNEL) != LOTS);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 5, NULL, FIVE, GFP_KERNEL) != NULL);
+	xa_erase_index(xa, 12345678);
+	xa_erase_index(xa, 5);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
 static noinline void check_multi_store(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
@@ -274,6 +293,7 @@ static int xarray_checks(void)
 	check_xa_load(&array);
 	check_xa_mark(&array);
 	check_xa_shrink(&array);
+	check_cmpxchg(&array);
 	check_multi_store(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
diff --git a/lib/xarray.c b/lib/xarray.c
index 4596a95ed9cd..2ba5a98ec618 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -975,6 +975,77 @@ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 }
 EXPORT_SYMBOL(__xa_store);
 
+/**
+ * xa_cmpxchg() - Conditionally replace an entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @old: Old value to test against.
+ * @entry: New value to place in array.
+ * @gfp: Memory allocation flags.
+ *
+ * If the entry at @index is the same as @old, replace it with @entry.
+ * If the return value is equal to @old, then the exchange was successful.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.  May sleep
+ * if the @gfp flags permit.
+ * Return: The old value at this index or xa_err() if an error happened.
+ */
+void *xa_cmpxchg(struct xarray *xa, unsigned long index,
+			void *old, void *entry, gfp_t gfp)
+{
+	XA_STATE(xas, xa, index);
+	void *curr;
+
+	if (WARN_ON_ONCE(xa_is_internal(entry)))
+		return XA_ERROR(-EINVAL);
+
+	do {
+		xas_lock(&xas);
+		curr = xas_load(&xas);
+		if (curr == old)
+			xas_store(&xas, entry);
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, gfp));
+
+	return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(xa_cmpxchg);
+
+/**
+ * __xa_cmpxchg() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @old: Old value to test against.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * You must already be holding the xa_lock when calling this function.
+ * It will drop the lock if needed to allocate memory, and then reacquire
+ * it afterwards.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: The old entry at this index or xa_err() if an error happened.
+ */
+void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
+			void *old, void *entry, gfp_t gfp)
+{
+	XA_STATE(xas, xa, index);
+	void *curr;
+
+	if (WARN_ON_ONCE(xa_is_internal(entry)))
+		return XA_ERROR(-EINVAL);
+
+	do {
+		curr = xas_load(&xas);
+		if (curr == old)
+			xas_store(&xas, entry);
+	} while (__xas_nomem(&xas, gfp));
+
+	return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(__xa_cmpxchg);
+
 /**
  * __xa_set_mark() - Set this mark on this entry while locked.
  * @xa: XArray.
-- 
cgit v1.2.3-71-gd317


From b803b42823d0d9e8b6deccf01ffc2aba5d0738df Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 14 Nov 2017 08:30:11 -0500
Subject: xarray: Add XArray iterators

The xa_for_each iterator allows the user to efficiently walk a range
of the array, executing the loop body once for each entry in that
range that matches the filter.  This commit also includes xa_find()
and xa_find_after() which are helper functions for xa_for_each() but
may also be useful in their own right.

In the xas family of functions, we have xas_for_each(), xas_find(),
xas_next_entry(), xas_for_each_tagged(), xas_find_tagged(),
xas_next_tagged() and xas_pause().

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 165 ++++++++++++++++++++++++++++
 lib/test_xarray.c      | 183 +++++++++++++++++++++++++++++++
 lib/xarray.c           | 292 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 640 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 7f740f675b96..66b10efa5a50 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -280,6 +280,10 @@ void *xa_cmpxchg(struct xarray *, unsigned long index,
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
+void *xa_find(struct xarray *xa, unsigned long *index,
+		unsigned long max, xa_mark_t) __attribute__((nonnull(2)));
+void *xa_find_after(struct xarray *xa, unsigned long *index,
+		unsigned long max, xa_mark_t) __attribute__((nonnull(2)));
 
 /**
  * xa_init() - Initialise an empty XArray.
@@ -364,6 +368,35 @@ static inline int xa_insert(struct xarray *xa, unsigned long index,
 	return -EEXIST;
 }
 
+/**
+ * xa_for_each() - Iterate over a portion of an XArray.
+ * @xa: XArray.
+ * @entry: Entry retrieved from array.
+ * @index: Index of @entry.
+ * @max: Maximum index to retrieve from array.
+ * @filter: Selection criterion.
+ *
+ * Initialise @index to the lowest index you want to retrieve from the
+ * array.  During the iteration, @entry will have the value of the entry
+ * stored in @xa at @index.  The iteration will skip all entries in the
+ * array which do not match @filter.  You may modify @index during the
+ * iteration if you want to skip or reprocess indices.  It is safe to modify
+ * the array during the iteration.  At the end of the iteration, @entry will
+ * be set to NULL and @index will have a value less than or equal to max.
+ *
+ * xa_for_each() is O(n.log(n)) while xas_for_each() is O(n).  You have
+ * to handle your own locking with xas_for_each(), and if you have to unlock
+ * after each iteration, it will also end up being O(n.log(n)).  xa_for_each()
+ * will spin if it hits a retry entry; if you intend to see retry entries,
+ * you should use the xas_for_each() iterator instead.  The xas_for_each()
+ * iterator will expand into more inline code than xa_for_each().
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ */
+#define xa_for_each(xa, entry, index, max, filter) \
+	for (entry = xa_find(xa, &index, max, filter); entry; \
+	     entry = xa_find_after(xa, &index, max, filter))
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
@@ -835,13 +868,16 @@ static inline bool xas_retry(struct xa_state *xas, const void *entry)
 
 void *xas_load(struct xa_state *);
 void *xas_store(struct xa_state *, void *entry);
+void *xas_find(struct xa_state *, unsigned long max);
 
 bool xas_get_mark(const struct xa_state *, xa_mark_t);
 void xas_set_mark(const struct xa_state *, xa_mark_t);
 void xas_clear_mark(const struct xa_state *, xa_mark_t);
+void *xas_find_marked(struct xa_state *, unsigned long max, xa_mark_t);
 void xas_init_marks(const struct xa_state *);
 
 bool xas_nomem(struct xa_state *, gfp_t);
+void xas_pause(struct xa_state *);
 
 /**
  * xas_reload() - Refetch an entry from the xarray.
@@ -914,4 +950,133 @@ static inline void xas_set_update(struct xa_state *xas, xa_update_node_t update)
 	xas->xa_update = update;
 }
 
+/**
+ * xas_next_entry() - Advance iterator to next present entry.
+ * @xas: XArray operation state.
+ * @max: Highest index to return.
+ *
+ * xas_next_entry() is an inline function to optimise xarray traversal for
+ * speed.  It is equivalent to calling xas_find(), and will call xas_find()
+ * for all the hard cases.
+ *
+ * Return: The next present entry after the one currently referred to by @xas.
+ */
+static inline void *xas_next_entry(struct xa_state *xas, unsigned long max)
+{
+	struct xa_node *node = xas->xa_node;
+	void *entry;
+
+	if (unlikely(xas_not_node(node) || node->shift ||
+			xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)))
+		return xas_find(xas, max);
+
+	do {
+		if (unlikely(xas->xa_index >= max))
+			return xas_find(xas, max);
+		if (unlikely(xas->xa_offset == XA_CHUNK_MASK))
+			return xas_find(xas, max);
+		entry = xa_entry(xas->xa, node, xas->xa_offset + 1);
+		if (unlikely(xa_is_internal(entry)))
+			return xas_find(xas, max);
+		xas->xa_offset++;
+		xas->xa_index++;
+	} while (!entry);
+
+	return entry;
+}
+
+/* Private */
+static inline unsigned int xas_find_chunk(struct xa_state *xas, bool advance,
+		xa_mark_t mark)
+{
+	unsigned long *addr = xas->xa_node->marks[(__force unsigned)mark];
+	unsigned int offset = xas->xa_offset;
+
+	if (advance)
+		offset++;
+	if (XA_CHUNK_SIZE == BITS_PER_LONG) {
+		if (offset < XA_CHUNK_SIZE) {
+			unsigned long data = *addr & (~0UL << offset);
+			if (data)
+				return __ffs(data);
+		}
+		return XA_CHUNK_SIZE;
+	}
+
+	return find_next_bit(addr, XA_CHUNK_SIZE, offset);
+}
+
+/**
+ * xas_next_marked() - Advance iterator to next marked entry.
+ * @xas: XArray operation state.
+ * @max: Highest index to return.
+ * @mark: Mark to search for.
+ *
+ * xas_next_marked() is an inline function to optimise xarray traversal for
+ * speed.  It is equivalent to calling xas_find_marked(), and will call
+ * xas_find_marked() for all the hard cases.
+ *
+ * Return: The next marked entry after the one currently referred to by @xas.
+ */
+static inline void *xas_next_marked(struct xa_state *xas, unsigned long max,
+								xa_mark_t mark)
+{
+	struct xa_node *node = xas->xa_node;
+	unsigned int offset;
+
+	if (unlikely(xas_not_node(node) || node->shift))
+		return xas_find_marked(xas, max, mark);
+	offset = xas_find_chunk(xas, true, mark);
+	xas->xa_offset = offset;
+	xas->xa_index = (xas->xa_index & ~XA_CHUNK_MASK) + offset;
+	if (xas->xa_index > max)
+		return NULL;
+	if (offset == XA_CHUNK_SIZE)
+		return xas_find_marked(xas, max, mark);
+	return xa_entry(xas->xa, node, offset);
+}
+
+/*
+ * If iterating while holding a lock, drop the lock and reschedule
+ * every %XA_CHECK_SCHED loops.
+ */
+enum {
+	XA_CHECK_SCHED = 4096,
+};
+
+/**
+ * xas_for_each() - Iterate over a range of an XArray.
+ * @xas: XArray operation state.
+ * @entry: Entry retrieved from the array.
+ * @max: Maximum index to retrieve from array.
+ *
+ * The loop body will be executed for each entry present in the xarray
+ * between the current xas position and @max.  @entry will be set to
+ * the entry retrieved from the xarray.  It is safe to delete entries
+ * from the array in the loop body.  You should hold either the RCU lock
+ * or the xa_lock while iterating.  If you need to drop the lock, call
+ * xas_pause() first.
+ */
+#define xas_for_each(xas, entry, max) \
+	for (entry = xas_find(xas, max); entry; \
+	     entry = xas_next_entry(xas, max))
+
+/**
+ * xas_for_each_marked() - Iterate over a range of an XArray.
+ * @xas: XArray operation state.
+ * @entry: Entry retrieved from the array.
+ * @max: Maximum index to retrieve from array.
+ * @mark: Mark to search for.
+ *
+ * The loop body will be executed for each marked entry in the xarray
+ * between the current xas position and @max.  @entry will be set to
+ * the entry retrieved from the xarray.  It is safe to delete entries
+ * from the array in the loop body.  You should hold either the RCU lock
+ * or the xa_lock while iterating.  If you need to drop the lock, call
+ * xas_pause() first.
+ */
+#define xas_for_each_marked(xas, entry, max, mark) \
+	for (entry = xas_find_marked(xas, max, mark); entry; \
+	     entry = xas_next_marked(xas, max, mark))
+
 #endif /* _LINUX_XARRAY_H */
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index fb472258b639..e3c2d4d00b15 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -75,6 +75,48 @@ static noinline void check_xa_err(struct xarray *xa)
 //	XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL);
 }
 
+static noinline void check_xas_retry(struct xarray *xa)
+{
+	XA_STATE(xas, xa, 0);
+	void *entry;
+
+	xa_store_index(xa, 0, GFP_KERNEL);
+	xa_store_index(xa, 1, GFP_KERNEL);
+
+	rcu_read_lock();
+	XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_value(0));
+	xa_erase_index(xa, 1);
+	XA_BUG_ON(xa, !xa_is_retry(xas_reload(&xas)));
+	XA_BUG_ON(xa, xas_retry(&xas, NULL));
+	XA_BUG_ON(xa, xas_retry(&xas, xa_mk_value(0)));
+	xas_reset(&xas);
+	XA_BUG_ON(xa, xas.xa_node != XAS_RESTART);
+	XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0));
+	XA_BUG_ON(xa, xas.xa_node != NULL);
+
+	XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, !xa_is_internal(xas_reload(&xas)));
+	xas.xa_node = XAS_RESTART;
+	XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0));
+	rcu_read_unlock();
+
+	/* Make sure we can iterate through retry entries */
+	xas_lock(&xas);
+	xas_set(&xas, 0);
+	xas_store(&xas, XA_RETRY_ENTRY);
+	xas_set(&xas, 1);
+	xas_store(&xas, XA_RETRY_ENTRY);
+
+	xas_set(&xas, 0);
+	xas_for_each(&xas, entry, ULONG_MAX) {
+		xas_store(&xas, xa_mk_value(xas.xa_index));
+	}
+	xas_unlock(&xas);
+
+	xa_erase_index(xa, 0);
+	xa_erase_index(xa, 1);
+}
+
 static noinline void check_xa_load(struct xarray *xa)
 {
 	unsigned long i, j;
@@ -217,6 +259,44 @@ static noinline void check_cmpxchg(struct xarray *xa)
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
+static noinline void check_xas_erase(struct xarray *xa)
+{
+	XA_STATE(xas, xa, 0);
+	void *entry;
+	unsigned long i, j;
+
+	for (i = 0; i < 200; i++) {
+		for (j = i; j < 2 * i + 17; j++) {
+			xas_set(&xas, j);
+			do {
+				xas_lock(&xas);
+				xas_store(&xas, xa_mk_value(j));
+				xas_unlock(&xas);
+			} while (xas_nomem(&xas, GFP_KERNEL));
+		}
+
+		xas_set(&xas, ULONG_MAX);
+		do {
+			xas_lock(&xas);
+			xas_store(&xas, xa_mk_value(0));
+			xas_unlock(&xas);
+		} while (xas_nomem(&xas, GFP_KERNEL));
+
+		xas_lock(&xas);
+		xas_store(&xas, NULL);
+
+		xas_set(&xas, 0);
+		j = i;
+		xas_for_each(&xas, entry, ULONG_MAX) {
+			XA_BUG_ON(xa, entry != xa_mk_value(j));
+			xas_store(&xas, NULL);
+			j++;
+		}
+		xas_unlock(&xas);
+		XA_BUG_ON(xa, !xa_empty(xa));
+	}
+}
+
 static noinline void check_multi_store(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
@@ -285,16 +365,119 @@ static noinline void check_multi_store(struct xarray *xa)
 #endif
 }
 
+static noinline void check_multi_find(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+	unsigned long index;
+
+	xa_store_order(xa, 12, 2, xa_mk_value(12), GFP_KERNEL);
+	XA_BUG_ON(xa, xa_store_index(xa, 16, GFP_KERNEL) != NULL);
+
+	index = 0;
+	XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) !=
+			xa_mk_value(12));
+	XA_BUG_ON(xa, index != 12);
+	index = 13;
+	XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) !=
+			xa_mk_value(12));
+	XA_BUG_ON(xa, (index < 12) || (index >= 16));
+	XA_BUG_ON(xa, xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT) !=
+			xa_mk_value(16));
+	XA_BUG_ON(xa, index != 16);
+
+	xa_erase_index(xa, 12);
+	xa_erase_index(xa, 16);
+	XA_BUG_ON(xa, !xa_empty(xa));
+#endif
+}
+
+static noinline void check_multi_find_2(struct xarray *xa)
+{
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 10 : 1;
+	unsigned int i, j;
+	void *entry;
+
+	for (i = 0; i < max_order; i++) {
+		unsigned long index = 1UL << i;
+		for (j = 0; j < index; j++) {
+			XA_STATE(xas, xa, j + index);
+			xa_store_index(xa, index - 1, GFP_KERNEL);
+			xa_store_order(xa, index, i, xa_mk_value(index),
+					GFP_KERNEL);
+			rcu_read_lock();
+			xas_for_each(&xas, entry, ULONG_MAX) {
+				xa_erase_index(xa, index);
+			}
+			rcu_read_unlock();
+			xa_erase_index(xa, index - 1);
+			XA_BUG_ON(xa, !xa_empty(xa));
+		}
+	}
+}
+
+static noinline void check_find(struct xarray *xa)
+{
+	unsigned long i, j, k;
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/*
+	 * Check xa_find with all pairs between 0 and 99 inclusive,
+	 * starting at every index between 0 and 99
+	 */
+	for (i = 0; i < 100; i++) {
+		XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
+		xa_set_mark(xa, i, XA_MARK_0);
+		for (j = 0; j < i; j++) {
+			XA_BUG_ON(xa, xa_store_index(xa, j, GFP_KERNEL) !=
+					NULL);
+			xa_set_mark(xa, j, XA_MARK_0);
+			for (k = 0; k < 100; k++) {
+				unsigned long index = k;
+				void *entry = xa_find(xa, &index, ULONG_MAX,
+								XA_PRESENT);
+				if (k <= j)
+					XA_BUG_ON(xa, index != j);
+				else if (k <= i)
+					XA_BUG_ON(xa, index != i);
+				else
+					XA_BUG_ON(xa, entry != NULL);
+
+				index = k;
+				entry = xa_find(xa, &index, ULONG_MAX,
+								XA_MARK_0);
+				if (k <= j)
+					XA_BUG_ON(xa, index != j);
+				else if (k <= i)
+					XA_BUG_ON(xa, index != i);
+				else
+					XA_BUG_ON(xa, entry != NULL);
+			}
+			xa_erase_index(xa, j);
+			XA_BUG_ON(xa, xa_get_mark(xa, j, XA_MARK_0));
+			XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0));
+		}
+		xa_erase_index(xa, i);
+		XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_0));
+	}
+	XA_BUG_ON(xa, !xa_empty(xa));
+	check_multi_find(xa);
+	check_multi_find_2(xa);
+}
+
 static DEFINE_XARRAY(array);
 
 static int xarray_checks(void)
 {
 	check_xa_err(&array);
+	check_xas_retry(&array);
 	check_xa_load(&array);
 	check_xa_mark(&array);
 	check_xa_shrink(&array);
+	check_xas_erase(&array);
 	check_cmpxchg(&array);
 	check_multi_store(&array);
+	check_find(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
diff --git a/lib/xarray.c b/lib/xarray.c
index 2ba5a98ec618..24494f42daa6 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -128,6 +128,11 @@ static unsigned int get_offset(unsigned long index, struct xa_node *node)
 	return (index >> node->shift) & XA_CHUNK_MASK;
 }
 
+static void xas_set_offset(struct xa_state *xas)
+{
+	xas->xa_offset = get_offset(xas->xa_index, xas->xa_node);
+}
+
 /* move the index either forwards (find) or backwards (sibling slot) */
 static void xas_move_index(struct xa_state *xas, unsigned long offset)
 {
@@ -136,6 +141,12 @@ static void xas_move_index(struct xa_state *xas, unsigned long offset)
 	xas->xa_index += offset << shift;
 }
 
+static void xas_advance(struct xa_state *xas)
+{
+	xas->xa_offset++;
+	xas_move_index(xas, xas->xa_offset);
+}
+
 static void *set_bounds(struct xa_state *xas)
 {
 	xas->xa_node = XAS_BOUNDS;
@@ -829,6 +840,202 @@ void xas_init_marks(const struct xa_state *xas)
 }
 EXPORT_SYMBOL_GPL(xas_init_marks);
 
+/**
+ * xas_pause() - Pause a walk to drop a lock.
+ * @xas: XArray operation state.
+ *
+ * Some users need to pause a walk and drop the lock they're holding in
+ * order to yield to a higher priority thread or carry out an operation
+ * on an entry.  Those users should call this function before they drop
+ * the lock.  It resets the @xas to be suitable for the next iteration
+ * of the loop after the user has reacquired the lock.  If most entries
+ * found during a walk require you to call xas_pause(), the xa_for_each()
+ * iterator may be more appropriate.
+ *
+ * Note that xas_pause() only works for forward iteration.  If a user needs
+ * to pause a reverse iteration, we will need a xas_pause_rev().
+ */
+void xas_pause(struct xa_state *xas)
+{
+	struct xa_node *node = xas->xa_node;
+
+	if (xas_invalid(xas))
+		return;
+
+	if (node) {
+		unsigned int offset = xas->xa_offset;
+		while (++offset < XA_CHUNK_SIZE) {
+			if (!xa_is_sibling(xa_entry(xas->xa, node, offset)))
+				break;
+		}
+		xas->xa_index += (offset - xas->xa_offset) << node->shift;
+	} else {
+		xas->xa_index++;
+	}
+	xas->xa_node = XAS_RESTART;
+}
+EXPORT_SYMBOL_GPL(xas_pause);
+
+/**
+ * xas_find() - Find the next present entry in the XArray.
+ * @xas: XArray operation state.
+ * @max: Highest index to return.
+ *
+ * If the @xas has not yet been walked to an entry, return the entry
+ * which has an index >= xas.xa_index.  If it has been walked, the entry
+ * currently being pointed at has been processed, and so we move to the
+ * next entry.
+ *
+ * If no entry is found and the array is smaller than @max, the iterator
+ * is set to the smallest index not yet in the array.  This allows @xas
+ * to be immediately passed to xas_store().
+ *
+ * Return: The entry, if found, otherwise %NULL.
+ */
+void *xas_find(struct xa_state *xas, unsigned long max)
+{
+	void *entry;
+
+	if (xas_error(xas))
+		return NULL;
+
+	if (!xas->xa_node) {
+		xas->xa_index = 1;
+		return set_bounds(xas);
+	} else if (xas_top(xas->xa_node)) {
+		entry = xas_load(xas);
+		if (entry || xas_not_node(xas->xa_node))
+			return entry;
+	} else if (!xas->xa_node->shift &&
+		    xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)) {
+		xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1;
+	}
+
+	xas_advance(xas);
+
+	while (xas->xa_node && (xas->xa_index <= max)) {
+		if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
+			xas->xa_offset = xas->xa_node->offset + 1;
+			xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+			continue;
+		}
+
+		entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+		if (xa_is_node(entry)) {
+			xas->xa_node = xa_to_node(entry);
+			xas->xa_offset = 0;
+			continue;
+		}
+		if (entry && !xa_is_sibling(entry))
+			return entry;
+
+		xas_advance(xas);
+	}
+
+	if (!xas->xa_node)
+		xas->xa_node = XAS_BOUNDS;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xas_find);
+
+/**
+ * xas_find_marked() - Find the next marked entry in the XArray.
+ * @xas: XArray operation state.
+ * @max: Highest index to return.
+ * @mark: Mark number to search for.
+ *
+ * If the @xas has not yet been walked to an entry, return the marked entry
+ * which has an index >= xas.xa_index.  If it has been walked, the entry
+ * currently being pointed at has been processed, and so we return the
+ * first marked entry with an index > xas.xa_index.
+ *
+ * If no marked entry is found and the array is smaller than @max, @xas is
+ * set to the bounds state and xas->xa_index is set to the smallest index
+ * not yet in the array.  This allows @xas to be immediately passed to
+ * xas_store().
+ *
+ * If no entry is found before @max is reached, @xas is set to the restart
+ * state.
+ *
+ * Return: The entry, if found, otherwise %NULL.
+ */
+void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
+{
+	bool advance = true;
+	unsigned int offset;
+	void *entry;
+
+	if (xas_error(xas))
+		return NULL;
+
+	if (!xas->xa_node) {
+		xas->xa_index = 1;
+		goto out;
+	} else if (xas_top(xas->xa_node)) {
+		advance = false;
+		entry = xa_head(xas->xa);
+		xas->xa_node = NULL;
+		if (xas->xa_index > max_index(entry))
+			goto bounds;
+		if (!xa_is_node(entry)) {
+			if (xa_marked(xas->xa, mark))
+				return entry;
+			xas->xa_index = 1;
+			goto out;
+		}
+		xas->xa_node = xa_to_node(entry);
+		xas->xa_offset = xas->xa_index >> xas->xa_node->shift;
+	}
+
+	while (xas->xa_index <= max) {
+		if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
+			xas->xa_offset = xas->xa_node->offset + 1;
+			xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+			if (!xas->xa_node)
+				break;
+			advance = false;
+			continue;
+		}
+
+		if (!advance) {
+			entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+			if (xa_is_sibling(entry)) {
+				xas->xa_offset = xa_to_sibling(entry);
+				xas_move_index(xas, xas->xa_offset);
+			}
+		}
+
+		offset = xas_find_chunk(xas, advance, mark);
+		if (offset > xas->xa_offset) {
+			advance = false;
+			xas_move_index(xas, offset);
+			/* Mind the wrap */
+			if ((xas->xa_index - 1) >= max)
+				goto max;
+			xas->xa_offset = offset;
+			if (offset == XA_CHUNK_SIZE)
+				continue;
+		}
+
+		entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+		if (!xa_is_node(entry))
+			return entry;
+		xas->xa_node = xa_to_node(entry);
+		xas_set_offset(xas);
+	}
+
+out:
+	if (!max)
+		goto max;
+bounds:
+	xas->xa_node = XAS_BOUNDS;
+	return NULL;
+max:
+	xas->xa_node = XAS_RESTART;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xas_find_marked);
+
 /**
  * xa_init_flags() - Initialise an empty XArray with flags.
  * @xa: XArray.
@@ -1152,6 +1359,91 @@ void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
 }
 EXPORT_SYMBOL(xa_clear_mark);
 
+/**
+ * xa_find() - Search the XArray for an entry.
+ * @xa: XArray.
+ * @indexp: Pointer to an index.
+ * @max: Maximum index to search to.
+ * @filter: Selection criterion.
+ *
+ * Finds the entry in @xa which matches the @filter, and has the lowest
+ * index that is at least @indexp and no more than @max.
+ * If an entry is found, @indexp is updated to be the index of the entry.
+ * This function is protected by the RCU read lock, so it may not find
+ * entries which are being simultaneously added.  It will not return an
+ * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ * Return: The entry, if found, otherwise %NULL.
+ */
+void *xa_find(struct xarray *xa, unsigned long *indexp,
+			unsigned long max, xa_mark_t filter)
+{
+	XA_STATE(xas, xa, *indexp);
+	void *entry;
+
+	rcu_read_lock();
+	do {
+		if ((__force unsigned int)filter < XA_MAX_MARKS)
+			entry = xas_find_marked(&xas, max, filter);
+		else
+			entry = xas_find(&xas, max);
+	} while (xas_retry(&xas, entry));
+	rcu_read_unlock();
+
+	if (entry)
+		*indexp = xas.xa_index;
+	return entry;
+}
+EXPORT_SYMBOL(xa_find);
+
+/**
+ * xa_find_after() - Search the XArray for a present entry.
+ * @xa: XArray.
+ * @indexp: Pointer to an index.
+ * @max: Maximum index to search to.
+ * @filter: Selection criterion.
+ *
+ * Finds the entry in @xa which matches the @filter and has the lowest
+ * index that is above @indexp and no more than @max.
+ * If an entry is found, @indexp is updated to be the index of the entry.
+ * This function is protected by the RCU read lock, so it may miss entries
+ * which are being simultaneously added.  It will not return an
+ * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ * Return: The pointer, if found, otherwise %NULL.
+ */
+void *xa_find_after(struct xarray *xa, unsigned long *indexp,
+			unsigned long max, xa_mark_t filter)
+{
+	XA_STATE(xas, xa, *indexp + 1);
+	void *entry;
+
+	rcu_read_lock();
+	for (;;) {
+		if ((__force unsigned int)filter < XA_MAX_MARKS)
+			entry = xas_find_marked(&xas, max, filter);
+		else
+			entry = xas_find(&xas, max);
+		if (xas.xa_shift) {
+			if (xas.xa_index & ((1UL << xas.xa_shift) - 1))
+				continue;
+		} else {
+			if (xas.xa_offset < (xas.xa_index & XA_CHUNK_MASK))
+				continue;
+		}
+		if (!xas_retry(&xas, entry))
+			break;
+	}
+	rcu_read_unlock();
+
+	if (entry)
+		*indexp = xas.xa_index;
+	return entry;
+}
+EXPORT_SYMBOL(xa_find_after);
+
 #ifdef XA_DEBUG
 void xa_dump_node(const struct xa_node *node)
 {
-- 
cgit v1.2.3-71-gd317


From 80a0a1a9a3cde9b23851e8eb7160e2786549306a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 14 Nov 2017 16:42:22 -0500
Subject: xarray: Extract entries from an XArray

The xa_extract function combines the functionality of
radix_tree_gang_lookup() and radix_tree_gang_lookup_tagged().
It extracts entries matching the specified filter into a normal array.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h |  2 ++
 lib/xarray.c           | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 66b10efa5a50..82ceed981924 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -284,6 +284,8 @@ void *xa_find(struct xarray *xa, unsigned long *index,
 		unsigned long max, xa_mark_t) __attribute__((nonnull(2)));
 void *xa_find_after(struct xarray *xa, unsigned long *index,
 		unsigned long max, xa_mark_t) __attribute__((nonnull(2)));
+unsigned int xa_extract(struct xarray *, void **dst, unsigned long start,
+		unsigned long max, unsigned int n, xa_mark_t);
 
 /**
  * xa_init() - Initialise an empty XArray.
diff --git a/lib/xarray.c b/lib/xarray.c
index 24494f42daa6..70e04810c8c2 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1444,6 +1444,86 @@ void *xa_find_after(struct xarray *xa, unsigned long *indexp,
 }
 EXPORT_SYMBOL(xa_find_after);
 
+static unsigned int xas_extract_present(struct xa_state *xas, void **dst,
+			unsigned long max, unsigned int n)
+{
+	void *entry;
+	unsigned int i = 0;
+
+	rcu_read_lock();
+	xas_for_each(xas, entry, max) {
+		if (xas_retry(xas, entry))
+			continue;
+		dst[i++] = entry;
+		if (i == n)
+			break;
+	}
+	rcu_read_unlock();
+
+	return i;
+}
+
+static unsigned int xas_extract_marked(struct xa_state *xas, void **dst,
+			unsigned long max, unsigned int n, xa_mark_t mark)
+{
+	void *entry;
+	unsigned int i = 0;
+
+	rcu_read_lock();
+	xas_for_each_marked(xas, entry, max, mark) {
+		if (xas_retry(xas, entry))
+			continue;
+		dst[i++] = entry;
+		if (i == n)
+			break;
+	}
+	rcu_read_unlock();
+
+	return i;
+}
+
+/**
+ * xa_extract() - Copy selected entries from the XArray into a normal array.
+ * @xa: The source XArray to copy from.
+ * @dst: The buffer to copy entries into.
+ * @start: The first index in the XArray eligible to be selected.
+ * @max: The last index in the XArray eligible to be selected.
+ * @n: The maximum number of entries to copy.
+ * @filter: Selection criterion.
+ *
+ * Copies up to @n entries that match @filter from the XArray.  The
+ * copied entries will have indices between @start and @max, inclusive.
+ *
+ * The @filter may be an XArray mark value, in which case entries which are
+ * marked with that mark will be copied.  It may also be %XA_PRESENT, in
+ * which case all entries which are not NULL will be copied.
+ *
+ * The entries returned may not represent a snapshot of the XArray at a
+ * moment in time.  For example, if another thread stores to index 5, then
+ * index 10, calling xa_extract() may return the old contents of index 5
+ * and the new contents of index 10.  Indices not modified while this
+ * function is running will not be skipped.
+ *
+ * If you need stronger guarantees, holding the xa_lock across calls to this
+ * function will prevent concurrent modification.
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ * Return: The number of entries copied.
+ */
+unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start,
+			unsigned long max, unsigned int n, xa_mark_t filter)
+{
+	XA_STATE(xas, xa, start);
+
+	if (!n)
+		return 0;
+
+	if ((__force unsigned int)filter < XA_MAX_MARKS)
+		return xas_extract_marked(&xas, dst, max, n, filter);
+	return xas_extract_present(&xas, dst, max, n);
+}
+EXPORT_SYMBOL(xa_extract);
+
 #ifdef XA_DEBUG
 void xa_dump_node(const struct xa_node *node)
 {
-- 
cgit v1.2.3-71-gd317


From 687149fca1f37c447e5d161e0a4a04cb2c880cb6 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 17 Nov 2017 08:16:34 -0500
Subject: xarray: Destroy an XArray

This function frees all the internal memory allocated to the xarray
and reinitialises it to be empty.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h |  1 +
 lib/test_xarray.c      | 34 ++++++++++++++++++++++++++++++++++
 lib/xarray.c           | 28 ++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 82ceed981924..0a758fa3ed2c 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -286,6 +286,7 @@ void *xa_find_after(struct xarray *xa, unsigned long *index,
 		unsigned long max, xa_mark_t) __attribute__((nonnull(2)));
 unsigned int xa_extract(struct xarray *, void **dst, unsigned long start,
 		unsigned long max, unsigned int n, xa_mark_t);
+void xa_destroy(struct xarray *);
 
 /**
  * xa_init() - Initialise an empty XArray.
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index e3c2d4d00b15..a96f67caa1c2 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -465,6 +465,39 @@ static noinline void check_find(struct xarray *xa)
 	check_multi_find_2(xa);
 }
 
+static noinline void check_destroy(struct xarray *xa)
+{
+	unsigned long index;
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* Destroying an empty array is a no-op */
+	xa_destroy(xa);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* Destroying an array with a single entry */
+	for (index = 0; index < 1000; index++) {
+		xa_store_index(xa, index, GFP_KERNEL);
+		XA_BUG_ON(xa, xa_empty(xa));
+		xa_destroy(xa);
+		XA_BUG_ON(xa, !xa_empty(xa));
+	}
+
+	/* Destroying an array with a single entry at ULONG_MAX */
+	xa_store(xa, ULONG_MAX, xa, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_empty(xa));
+	xa_destroy(xa);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+#ifdef CONFIG_XARRAY_MULTI
+	/* Destroying an array with a multi-index entry */
+	xa_store_order(xa, 1 << 11, 11, xa, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_empty(xa));
+	xa_destroy(xa);
+	XA_BUG_ON(xa, !xa_empty(xa));
+#endif
+}
+
 static DEFINE_XARRAY(array);
 
 static int xarray_checks(void)
@@ -478,6 +511,7 @@ static int xarray_checks(void)
 	check_cmpxchg(&array);
 	check_multi_store(&array);
 	check_find(&array);
+	check_destroy(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
diff --git a/lib/xarray.c b/lib/xarray.c
index 70e04810c8c2..057dbe0d3bf6 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1524,6 +1524,34 @@ unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start,
 }
 EXPORT_SYMBOL(xa_extract);
 
+/**
+ * xa_destroy() - Free all internal data structures.
+ * @xa: XArray.
+ *
+ * After calling this function, the XArray is empty and has freed all memory
+ * allocated for its internal data structures.  You are responsible for
+ * freeing the objects referenced by the XArray.
+ *
+ * Context: Any context.  Takes and releases the xa_lock, interrupt-safe.
+ */
+void xa_destroy(struct xarray *xa)
+{
+	XA_STATE(xas, xa, 0);
+	unsigned long flags;
+	void *entry;
+
+	xas.xa_node = NULL;
+	xas_lock_irqsave(&xas, flags);
+	entry = xa_head_locked(xa);
+	RCU_INIT_POINTER(xa->xa_head, NULL);
+	xas_init_marks(&xas);
+	/* lockdep checks we're still holding the lock in xas_free_nodes() */
+	if (xa_is_node(entry))
+		xas_free_nodes(&xas, xa_to_node(entry));
+	xas_unlock_irqrestore(&xas, flags);
+}
+EXPORT_SYMBOL(xa_destroy);
+
 #ifdef XA_DEBUG
 void xa_dump_node(const struct xa_node *node)
 {
-- 
cgit v1.2.3-71-gd317


From 64d3e9a9e0cc51957d243dd2b0adc5d74ff5e128 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 1 Dec 2017 00:06:52 -0500
Subject: xarray: Step through an XArray

The xas_next and xas_prev functions move the xas index by one position,
and adjust the rest of the iterator state to match it.  This is more
efficient than calling xas_set() as it keeps the iterator at the leaves
of the tree instead of walking the iterator from the root each time.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h |  67 ++++++++++++++++++++++++++++
 lib/test_xarray.c      | 115 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/xarray.c           |  74 +++++++++++++++++++++++++++++++
 3 files changed, 256 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 0a758fa3ed2c..381f94a66762 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -828,6 +828,12 @@ static inline bool xas_not_node(struct xa_node *node)
 	return ((unsigned long)node & 3) || !node;
 }
 
+/* True if the node represents RESTART or an error */
+static inline bool xas_frozen(struct xa_node *node)
+{
+	return (unsigned long)node & 2;
+}
+
 /* True if the node represents head-of-tree, RESTART or BOUNDS */
 static inline bool xas_top(struct xa_node *node)
 {
@@ -1082,4 +1088,65 @@ enum {
 	for (entry = xas_find_marked(xas, max, mark); entry; \
 	     entry = xas_next_marked(xas, max, mark))
 
+void *__xas_next(struct xa_state *);
+void *__xas_prev(struct xa_state *);
+
+/**
+ * xas_prev() - Move iterator to previous index.
+ * @xas: XArray operation state.
+ *
+ * If the @xas was in an error state, it will remain in an error state
+ * and this function will return %NULL.  If the @xas has never been walked,
+ * it will have the effect of calling xas_load().  Otherwise one will be
+ * subtracted from the index and the state will be walked to the correct
+ * location in the array for the next operation.
+ *
+ * If the iterator was referencing index 0, this function wraps
+ * around to %ULONG_MAX.
+ *
+ * Return: The entry at the new index.  This may be %NULL or an internal
+ * entry.
+ */
+static inline void *xas_prev(struct xa_state *xas)
+{
+	struct xa_node *node = xas->xa_node;
+
+	if (unlikely(xas_not_node(node) || node->shift ||
+				xas->xa_offset == 0))
+		return __xas_prev(xas);
+
+	xas->xa_index--;
+	xas->xa_offset--;
+	return xa_entry(xas->xa, node, xas->xa_offset);
+}
+
+/**
+ * xas_next() - Move state to next index.
+ * @xas: XArray operation state.
+ *
+ * If the @xas was in an error state, it will remain in an error state
+ * and this function will return %NULL.  If the @xas has never been walked,
+ * it will have the effect of calling xas_load().  Otherwise one will be
+ * added to the index and the state will be walked to the correct
+ * location in the array for the next operation.
+ *
+ * If the iterator was referencing index %ULONG_MAX, this function wraps
+ * around to 0.
+ *
+ * Return: The entry at the new index.  This may be %NULL or an internal
+ * entry.
+ */
+static inline void *xas_next(struct xa_state *xas)
+{
+	struct xa_node *node = xas->xa_node;
+
+	if (unlikely(xas_not_node(node) || node->shift ||
+				xas->xa_offset == XA_CHUNK_MASK))
+		return __xas_next(xas);
+
+	xas->xa_index++;
+	xas->xa_offset++;
+	return xa_entry(xas->xa, node, xas->xa_offset);
+}
+
 #endif /* _LINUX_XARRAY_H */
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index a96f67caa1c2..85ef1882dd3c 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -465,6 +465,120 @@ static noinline void check_find(struct xarray *xa)
 	check_multi_find_2(xa);
 }
 
+static noinline void check_move_small(struct xarray *xa, unsigned long idx)
+{
+	XA_STATE(xas, xa, 0);
+	unsigned long i;
+
+	xa_store_index(xa, 0, GFP_KERNEL);
+	xa_store_index(xa, idx, GFP_KERNEL);
+
+	rcu_read_lock();
+	for (i = 0; i < idx * 4; i++) {
+		void *entry = xas_next(&xas);
+		if (i <= idx)
+			XA_BUG_ON(xa, xas.xa_node == XAS_RESTART);
+		XA_BUG_ON(xa, xas.xa_index != i);
+		if (i == 0 || i == idx)
+			XA_BUG_ON(xa, entry != xa_mk_value(i));
+		else
+			XA_BUG_ON(xa, entry != NULL);
+	}
+	xas_next(&xas);
+	XA_BUG_ON(xa, xas.xa_index != i);
+
+	do {
+		void *entry = xas_prev(&xas);
+		i--;
+		if (i <= idx)
+			XA_BUG_ON(xa, xas.xa_node == XAS_RESTART);
+		XA_BUG_ON(xa, xas.xa_index != i);
+		if (i == 0 || i == idx)
+			XA_BUG_ON(xa, entry != xa_mk_value(i));
+		else
+			XA_BUG_ON(xa, entry != NULL);
+	} while (i > 0);
+
+	xas_set(&xas, ULONG_MAX);
+	XA_BUG_ON(xa, xas_next(&xas) != NULL);
+	XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+	XA_BUG_ON(xa, xas_next(&xas) != xa_mk_value(0));
+	XA_BUG_ON(xa, xas.xa_index != 0);
+	XA_BUG_ON(xa, xas_prev(&xas) != NULL);
+	XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+	rcu_read_unlock();
+
+	xa_erase_index(xa, 0);
+	xa_erase_index(xa, idx);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_move(struct xarray *xa)
+{
+	XA_STATE(xas, xa, (1 << 16) - 1);
+	unsigned long i;
+
+	for (i = 0; i < (1 << 16); i++)
+		XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
+
+	rcu_read_lock();
+	do {
+		void *entry = xas_prev(&xas);
+		i--;
+		XA_BUG_ON(xa, entry != xa_mk_value(i));
+		XA_BUG_ON(xa, i != xas.xa_index);
+	} while (i != 0);
+
+	XA_BUG_ON(xa, xas_prev(&xas) != NULL);
+	XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+
+	do {
+		void *entry = xas_next(&xas);
+		XA_BUG_ON(xa, entry != xa_mk_value(i));
+		XA_BUG_ON(xa, i != xas.xa_index);
+		i++;
+	} while (i < (1 << 16));
+	rcu_read_unlock();
+
+	for (i = (1 << 8); i < (1 << 15); i++)
+		xa_erase_index(xa, i);
+
+	i = xas.xa_index;
+
+	rcu_read_lock();
+	do {
+		void *entry = xas_prev(&xas);
+		i--;
+		if ((i < (1 << 8)) || (i >= (1 << 15)))
+			XA_BUG_ON(xa, entry != xa_mk_value(i));
+		else
+			XA_BUG_ON(xa, entry != NULL);
+		XA_BUG_ON(xa, i != xas.xa_index);
+	} while (i != 0);
+
+	XA_BUG_ON(xa, xas_prev(&xas) != NULL);
+	XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+
+	do {
+		void *entry = xas_next(&xas);
+		if ((i < (1 << 8)) || (i >= (1 << 15)))
+			XA_BUG_ON(xa, entry != xa_mk_value(i));
+		else
+			XA_BUG_ON(xa, entry != NULL);
+		XA_BUG_ON(xa, i != xas.xa_index);
+		i++;
+	} while (i < (1 << 16));
+	rcu_read_unlock();
+
+	xa_destroy(xa);
+
+	for (i = 0; i < 16; i++)
+		check_move_small(xa, 1UL << i);
+
+	for (i = 2; i < 16; i++)
+		check_move_small(xa, (1UL << i) - 1);
+}
+
 static noinline void check_destroy(struct xarray *xa)
 {
 	unsigned long index;
@@ -512,6 +626,7 @@ static int xarray_checks(void)
 	check_multi_store(&array);
 	check_find(&array);
 	check_destroy(&array);
+	check_move(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
diff --git a/lib/xarray.c b/lib/xarray.c
index 057dbe0d3bf6..303c46579598 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -876,6 +876,80 @@ void xas_pause(struct xa_state *xas)
 }
 EXPORT_SYMBOL_GPL(xas_pause);
 
+/*
+ * __xas_prev() - Find the previous entry in the XArray.
+ * @xas: XArray operation state.
+ *
+ * Helper function for xas_prev() which handles all the complex cases
+ * out of line.
+ */
+void *__xas_prev(struct xa_state *xas)
+{
+	void *entry;
+
+	if (!xas_frozen(xas->xa_node))
+		xas->xa_index--;
+	if (xas_not_node(xas->xa_node))
+		return xas_load(xas);
+
+	if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
+		xas->xa_offset--;
+
+	while (xas->xa_offset == 255) {
+		xas->xa_offset = xas->xa_node->offset - 1;
+		xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+		if (!xas->xa_node)
+			return set_bounds(xas);
+	}
+
+	for (;;) {
+		entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+		if (!xa_is_node(entry))
+			return entry;
+
+		xas->xa_node = xa_to_node(entry);
+		xas_set_offset(xas);
+	}
+}
+EXPORT_SYMBOL_GPL(__xas_prev);
+
+/*
+ * __xas_next() - Find the next entry in the XArray.
+ * @xas: XArray operation state.
+ *
+ * Helper function for xas_next() which handles all the complex cases
+ * out of line.
+ */
+void *__xas_next(struct xa_state *xas)
+{
+	void *entry;
+
+	if (!xas_frozen(xas->xa_node))
+		xas->xa_index++;
+	if (xas_not_node(xas->xa_node))
+		return xas_load(xas);
+
+	if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
+		xas->xa_offset++;
+
+	while (xas->xa_offset == XA_CHUNK_SIZE) {
+		xas->xa_offset = xas->xa_node->offset + 1;
+		xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+		if (!xas->xa_node)
+			return set_bounds(xas);
+	}
+
+	for (;;) {
+		entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+		if (!xa_is_node(entry))
+			return entry;
+
+		xas->xa_node = xa_to_node(entry);
+		xas_set_offset(xas);
+	}
+}
+EXPORT_SYMBOL_GPL(__xas_next);
+
 /**
  * xas_find() - Find the next present entry in the XArray.
  * @xas: XArray operation state.
-- 
cgit v1.2.3-71-gd317


From 4e99d4e9579d3b950bf4b38d0d64eb1b9be78761 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 1 Jun 2018 22:46:02 -0400
Subject: xarray: Add xas_for_each_conflict

This iterator iterates over each entry that is stored in the index or
indices specified by the xa_state.  This is intended for use for a
conditional store of a multiindex entry, or to allow entries which are
about to be removed from the xarray to be disposed of properly.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 17 +++++++++++++
 lib/test_xarray.c      | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/xarray.c           | 61 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 146 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 381f94a66762..2664e718dbd3 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -878,6 +878,7 @@ static inline bool xas_retry(struct xa_state *xas, const void *entry)
 void *xas_load(struct xa_state *);
 void *xas_store(struct xa_state *, void *entry);
 void *xas_find(struct xa_state *, unsigned long max);
+void *xas_find_conflict(struct xa_state *);
 
 bool xas_get_mark(const struct xa_state *, xa_mark_t);
 void xas_set_mark(const struct xa_state *, xa_mark_t);
@@ -1088,6 +1089,22 @@ enum {
 	for (entry = xas_find_marked(xas, max, mark); entry; \
 	     entry = xas_next_marked(xas, max, mark))
 
+/**
+ * xas_for_each_conflict() - Iterate over a range of an XArray.
+ * @xas: XArray operation state.
+ * @entry: Entry retrieved from the array.
+ *
+ * The loop body will be executed for each entry in the XArray that lies
+ * within the range specified by @xas.  If the loop completes successfully,
+ * any entries that lie in this range will be replaced by @entry.  The caller
+ * may break out of the loop; if they do so, the contents of the XArray will
+ * be unchanged.  The operation may fail due to an out of memory condition.
+ * The caller may also call xa_set_err() to exit the loop while setting an
+ * error to record the reason.
+ */
+#define xas_for_each_conflict(xas, entry) \
+	while ((entry = xas_find_conflict(xas)))
+
 void *__xas_next(struct xa_state *);
 void *__xas_prev(struct xa_state *);
 
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 85ef1882dd3c..8eba3de1baea 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -365,6 +365,73 @@ static noinline void check_multi_store(struct xarray *xa)
 #endif
 }
 
+static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
+			unsigned int order, unsigned int present)
+{
+	XA_STATE_ORDER(xas, xa, start, order);
+	void *entry;
+	unsigned int count = 0;
+
+retry:
+	xas_lock(&xas);
+	xas_for_each_conflict(&xas, entry) {
+		XA_BUG_ON(xa, !xa_is_value(entry));
+		XA_BUG_ON(xa, entry < xa_mk_value(start));
+		XA_BUG_ON(xa, entry > xa_mk_value(start + (1UL << order) - 1));
+		count++;
+	}
+	xas_store(&xas, xa_mk_value(start));
+	xas_unlock(&xas);
+	if (xas_nomem(&xas, GFP_KERNEL)) {
+		count = 0;
+		goto retry;
+	}
+	XA_BUG_ON(xa, xas_error(&xas));
+	XA_BUG_ON(xa, count != present);
+	XA_BUG_ON(xa, xa_load(xa, start) != xa_mk_value(start));
+	XA_BUG_ON(xa, xa_load(xa, start + (1UL << order) - 1) !=
+			xa_mk_value(start));
+	xa_erase_index(xa, start);
+}
+
+static noinline void check_store_iter(struct xarray *xa)
+{
+	unsigned int i, j;
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+
+	for (i = 0; i < max_order; i++) {
+		unsigned int min = 1 << i;
+		unsigned int max = (2 << i) - 1;
+		__check_store_iter(xa, 0, i, 0);
+		XA_BUG_ON(xa, !xa_empty(xa));
+		__check_store_iter(xa, min, i, 0);
+		XA_BUG_ON(xa, !xa_empty(xa));
+
+		xa_store_index(xa, min, GFP_KERNEL);
+		__check_store_iter(xa, min, i, 1);
+		XA_BUG_ON(xa, !xa_empty(xa));
+		xa_store_index(xa, max, GFP_KERNEL);
+		__check_store_iter(xa, min, i, 1);
+		XA_BUG_ON(xa, !xa_empty(xa));
+
+		for (j = 0; j < min; j++)
+			xa_store_index(xa, j, GFP_KERNEL);
+		__check_store_iter(xa, 0, i, min);
+		XA_BUG_ON(xa, !xa_empty(xa));
+		for (j = 0; j < min; j++)
+			xa_store_index(xa, min + j, GFP_KERNEL);
+		__check_store_iter(xa, min, i, min);
+		XA_BUG_ON(xa, !xa_empty(xa));
+	}
+#ifdef CONFIG_XARRAY_MULTI
+	xa_store_index(xa, 63, GFP_KERNEL);
+	xa_store_index(xa, 65, GFP_KERNEL);
+	__check_store_iter(xa, 64, 2, 1);
+	xa_erase_index(xa, 63);
+#endif
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
 static noinline void check_multi_find(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
@@ -627,6 +694,7 @@ static int xarray_checks(void)
 	check_find(&array);
 	check_destroy(&array);
 	check_move(&array);
+	check_store_iter(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
diff --git a/lib/xarray.c b/lib/xarray.c
index 303c46579598..41f8ebc651f5 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1110,6 +1110,67 @@ max:
 }
 EXPORT_SYMBOL_GPL(xas_find_marked);
 
+/**
+ * xas_find_conflict() - Find the next present entry in a range.
+ * @xas: XArray operation state.
+ *
+ * The @xas describes both a range and a position within that range.
+ *
+ * Context: Any context.  Expects xa_lock to be held.
+ * Return: The next entry in the range covered by @xas or %NULL.
+ */
+void *xas_find_conflict(struct xa_state *xas)
+{
+	void *curr;
+
+	if (xas_error(xas))
+		return NULL;
+
+	if (!xas->xa_node)
+		return NULL;
+
+	if (xas_top(xas->xa_node)) {
+		curr = xas_start(xas);
+		if (!curr)
+			return NULL;
+		while (xa_is_node(curr)) {
+			struct xa_node *node = xa_to_node(curr);
+			curr = xas_descend(xas, node);
+		}
+		if (curr)
+			return curr;
+	}
+
+	if (xas->xa_node->shift > xas->xa_shift)
+		return NULL;
+
+	for (;;) {
+		if (xas->xa_node->shift == xas->xa_shift) {
+			if ((xas->xa_offset & xas->xa_sibs) == xas->xa_sibs)
+				break;
+		} else if (xas->xa_offset == XA_CHUNK_MASK) {
+			xas->xa_offset = xas->xa_node->offset;
+			xas->xa_node = xa_parent_locked(xas->xa, xas->xa_node);
+			if (!xas->xa_node)
+				break;
+			continue;
+		}
+		curr = xa_entry_locked(xas->xa, xas->xa_node, ++xas->xa_offset);
+		if (xa_is_sibling(curr))
+			continue;
+		while (xa_is_node(curr)) {
+			xas->xa_node = xa_to_node(curr);
+			xas->xa_offset = 0;
+			curr = xa_entry_locked(xas->xa, xas->xa_node, 0);
+		}
+		if (curr)
+			return curr;
+	}
+	xas->xa_offset -= xas->xa_sibs;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xas_find_conflict);
+
 /**
  * xa_init_flags() - Initialise an empty XArray with flags.
  * @xa: XArray.
-- 
cgit v1.2.3-71-gd317


From 2264f5132fe45571139727ebdeb78696b35d1506 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 4 Dec 2017 00:11:48 -0500
Subject: xarray: Add xas_create_range

This hopefully temporary function is useful for users who have not yet
been converted to multi-index entries.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h |  13 ++++++
 lib/test_xarray.c      | 119 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/xarray.c           |  50 +++++++++++++++++++++
 3 files changed, 182 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 2664e718dbd3..deae17aa0ed7 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -822,6 +822,17 @@ static inline bool xas_valid(const struct xa_state *xas)
 	return !xas_invalid(xas);
 }
 
+/**
+ * xas_is_node() - Does the xas point to a node?
+ * @xas: XArray operation state.
+ *
+ * Return: %true if the xas currently references a node.
+ */
+static inline bool xas_is_node(const struct xa_state *xas)
+{
+	return xas_valid(xas) && xas->xa_node;
+}
+
 /* True if the pointer is something other than a node */
 static inline bool xas_not_node(struct xa_node *node)
 {
@@ -889,6 +900,8 @@ void xas_init_marks(const struct xa_state *);
 bool xas_nomem(struct xa_state *, gfp_t);
 void xas_pause(struct xa_state *);
 
+void xas_create_range(struct xa_state *);
+
 /**
  * xas_reload() - Refetch an entry from the xarray.
  * @xas: XArray operation state.
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 8eba3de1baea..703370015d10 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -646,6 +646,124 @@ static noinline void check_move(struct xarray *xa)
 		check_move_small(xa, (1UL << i) - 1);
 }
 
+static noinline void xa_store_many_order(struct xarray *xa,
+		unsigned long index, unsigned order)
+{
+	XA_STATE_ORDER(xas, xa, index, order);
+	unsigned int i = 0;
+
+	do {
+		xas_lock(&xas);
+		XA_BUG_ON(xa, xas_find_conflict(&xas));
+		xas_create_range(&xas);
+		if (xas_error(&xas))
+			goto unlock;
+		for (i = 0; i < (1U << order); i++) {
+			XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(index + i)));
+			xas_next(&xas);
+		}
+unlock:
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, GFP_KERNEL));
+
+	XA_BUG_ON(xa, xas_error(&xas));
+}
+
+static noinline void check_create_range_1(struct xarray *xa,
+		unsigned long index, unsigned order)
+{
+	unsigned long i;
+
+	xa_store_many_order(xa, index, order);
+	for (i = index; i < index + (1UL << order); i++)
+		xa_erase_index(xa, i);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_create_range_2(struct xarray *xa, unsigned order)
+{
+	unsigned long i;
+	unsigned long nr = 1UL << order;
+
+	for (i = 0; i < nr * nr; i += nr)
+		xa_store_many_order(xa, i, order);
+	for (i = 0; i < nr * nr; i++)
+		xa_erase_index(xa, i);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_create_range_3(void)
+{
+	XA_STATE(xas, NULL, 0);
+	xas_set_err(&xas, -EEXIST);
+	xas_create_range(&xas);
+	XA_BUG_ON(NULL, xas_error(&xas) != -EEXIST);
+}
+
+static noinline void check_create_range_4(struct xarray *xa,
+		unsigned long index, unsigned order)
+{
+	XA_STATE_ORDER(xas, xa, index, order);
+	unsigned long base = xas.xa_index;
+	unsigned long i = 0;
+
+	xa_store_index(xa, index, GFP_KERNEL);
+	do {
+		xas_lock(&xas);
+		xas_create_range(&xas);
+		if (xas_error(&xas))
+			goto unlock;
+		for (i = 0; i < (1UL << order); i++) {
+			void *old = xas_store(&xas, xa_mk_value(base + i));
+			if (xas.xa_index == index)
+				XA_BUG_ON(xa, old != xa_mk_value(base + i));
+			else
+				XA_BUG_ON(xa, old != NULL);
+			xas_next(&xas);
+		}
+unlock:
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, GFP_KERNEL));
+
+	XA_BUG_ON(xa, xas_error(&xas));
+
+	for (i = base; i < base + (1UL << order); i++)
+		xa_erase_index(xa, i);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_create_range(struct xarray *xa)
+{
+	unsigned int order;
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 12 : 1;
+
+	for (order = 0; order < max_order; order++) {
+		check_create_range_1(xa, 0, order);
+		check_create_range_1(xa, 1U << order, order);
+		check_create_range_1(xa, 2U << order, order);
+		check_create_range_1(xa, 3U << order, order);
+		check_create_range_1(xa, 1U << 24, order);
+		if (order < 10)
+			check_create_range_2(xa, order);
+
+		check_create_range_4(xa, 0, order);
+		check_create_range_4(xa, 1U << order, order);
+		check_create_range_4(xa, 2U << order, order);
+		check_create_range_4(xa, 3U << order, order);
+		check_create_range_4(xa, 1U << 24, order);
+
+		check_create_range_4(xa, 1, order);
+		check_create_range_4(xa, (1U << order) + 1, order);
+		check_create_range_4(xa, (2U << order) + 1, order);
+		check_create_range_4(xa, (2U << order) - 1, order);
+		check_create_range_4(xa, (3U << order) + 1, order);
+		check_create_range_4(xa, (3U << order) - 1, order);
+		check_create_range_4(xa, (1U << 24) + 1, order);
+	}
+
+	check_create_range_3();
+}
+
 static noinline void check_destroy(struct xarray *xa)
 {
 	unsigned long index;
@@ -694,6 +812,7 @@ static int xarray_checks(void)
 	check_find(&array);
 	check_destroy(&array);
 	check_move(&array);
+	check_create_range(&array);
 	check_store_iter(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
diff --git a/lib/xarray.c b/lib/xarray.c
index 41f8ebc651f5..ff37516fe832 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -637,6 +637,56 @@ static void *xas_create(struct xa_state *xas)
 	return entry;
 }
 
+/**
+ * xas_create_range() - Ensure that stores to this range will succeed
+ * @xas: XArray operation state.
+ *
+ * Creates all of the slots in the range covered by @xas.  Sets @xas to
+ * create single-index entries and positions it at the beginning of the
+ * range.  This is for the benefit of users which have not yet been
+ * converted to use multi-index entries.
+ */
+void xas_create_range(struct xa_state *xas)
+{
+	unsigned long index = xas->xa_index;
+	unsigned char shift = xas->xa_shift;
+	unsigned char sibs = xas->xa_sibs;
+
+	xas->xa_index |= ((sibs + 1) << shift) - 1;
+	if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift)
+		xas->xa_offset |= sibs;
+	xas->xa_shift = 0;
+	xas->xa_sibs = 0;
+
+	for (;;) {
+		xas_create(xas);
+		if (xas_error(xas))
+			goto restore;
+		if (xas->xa_index <= (index | XA_CHUNK_MASK))
+			goto success;
+		xas->xa_index -= XA_CHUNK_SIZE;
+
+		for (;;) {
+			struct xa_node *node = xas->xa_node;
+			xas->xa_node = xa_parent_locked(xas->xa, node);
+			xas->xa_offset = node->offset - 1;
+			if (node->offset != 0)
+				break;
+		}
+	}
+
+restore:
+	xas->xa_shift = shift;
+	xas->xa_sibs = sibs;
+	xas->xa_index = index;
+	return;
+success:
+	xas->xa_index = index;
+	if (xas->xa_node)
+		xas_set_offset(xas);
+}
+EXPORT_SYMBOL_GPL(xas_create_range);
+
 static void update_node(struct xa_state *xas, struct xa_node *node,
 		int count, int values)
 {
-- 
cgit v1.2.3-71-gd317


From 9f14d4f1f1045f161fd4db8a8e194b7825c2874a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 1 Oct 2018 14:54:59 -0400
Subject: xarray: Add xa_reserve and xa_release

This function reserves a slot in the XArray for users which need
to acquire multiple locks before storing their entry in the tree and
so cannot use a plain xa_store().

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |  6 +++++
 include/linux/xarray.h            | 34 ++++++++++++++++++++++++++--
 lib/test_xarray.c                 | 40 +++++++++++++++++++++++++++++++++
 lib/xarray.c                      | 47 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 125 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 4b101b4f3aa1..2b397da84bcd 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -293,6 +293,12 @@ to :c:func:`xas_retry`, and retry the operation if it returns ``true``.
        of this RCU period.  You should restart the lookup from the head
        of the array.
 
+   * - Zero
+     - :c:func:`xa_is_zero`
+     - Zero entries appear as ``NULL`` through the Normal API, but occupy
+       an entry in the XArray which can be used to reserve the index for
+       future use.
+
 Other internal entries may be added in the future.  As far as possible, they
 will be handled by :c:func:`xas_retry`.
 
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index deae17aa0ed7..bed63d3cfea8 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -32,7 +32,8 @@
  * The following internal entries have a special meaning:
  *
  * 0-62: Sibling entries
- * 256: Retry entry
+ * 256: Zero entry
+ * 257: Retry entry
  *
  * Errors are also represented as internal entries, but use the negative
  * space (-4094 to -2).  They're never stored in the slots array; only
@@ -277,6 +278,7 @@ void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *xa_cmpxchg(struct xarray *, unsigned long index,
 			void *old, void *entry, gfp_t);
+int xa_reserve(struct xarray *, unsigned long index, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -371,6 +373,20 @@ static inline int xa_insert(struct xarray *xa, unsigned long index,
 	return -EEXIST;
 }
 
+/**
+ * xa_release() - Release a reserved entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ *
+ * After calling xa_reserve(), you can call this function to release the
+ * reservation.  If the entry at @index has been stored to, this function
+ * will do nothing.
+ */
+static inline void xa_release(struct xarray *xa, unsigned long index)
+{
+	xa_cmpxchg(xa, index, NULL, NULL, 0);
+}
+
 /**
  * xa_for_each() - Iterate over a portion of an XArray.
  * @xa: XArray.
@@ -658,7 +674,19 @@ static inline bool xa_is_sibling(const void *entry)
 		(entry < xa_mk_sibling(XA_CHUNK_SIZE - 1));
 }
 
-#define XA_RETRY_ENTRY		xa_mk_internal(256)
+#define XA_ZERO_ENTRY		xa_mk_internal(256)
+#define XA_RETRY_ENTRY		xa_mk_internal(257)
+
+/**
+ * xa_is_zero() - Is the entry a zero entry?
+ * @entry: Entry retrieved from the XArray
+ *
+ * Return: %true if the entry is a zero entry.
+ */
+static inline bool xa_is_zero(const void *entry)
+{
+	return unlikely(entry == XA_ZERO_ENTRY);
+}
 
 /**
  * xa_is_retry() - Is the entry a retry entry?
@@ -880,6 +908,8 @@ static inline void xas_reset(struct xa_state *xas)
  */
 static inline bool xas_retry(struct xa_state *xas, const void *entry)
 {
+	if (xa_is_zero(entry))
+		return true;
 	if (!xa_is_retry(entry))
 		return false;
 	xas_reset(xas);
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 703370015d10..6aafd411a5c3 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -259,6 +259,45 @@ static noinline void check_cmpxchg(struct xarray *xa)
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
+static noinline void check_reserve(struct xarray *xa)
+{
+	void *entry;
+	unsigned long index = 0;
+
+	/* An array with a reserved entry is not empty */
+	XA_BUG_ON(xa, !xa_empty(xa));
+	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_empty(xa));
+	XA_BUG_ON(xa, xa_load(xa, 12345678));
+	xa_release(xa, 12345678);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* Releasing a used entry does nothing */
+	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL);
+	xa_release(xa, 12345678);
+	xa_erase_index(xa, 12345678);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* cmpxchg sees a reserved entry as NULL */
+	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, NULL, xa_mk_value(12345678),
+				GFP_NOWAIT) != NULL);
+	xa_release(xa, 12345678);
+	xa_erase_index(xa, 12345678);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* Can iterate through a reserved entry */
+	xa_store_index(xa, 5, GFP_KERNEL);
+	xa_reserve(xa, 6, GFP_KERNEL);
+	xa_store_index(xa, 7, GFP_KERNEL);
+
+	xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
+		XA_BUG_ON(xa, index != 5 && index != 7);
+	}
+	xa_destroy(xa);
+}
+
 static noinline void check_xas_erase(struct xarray *xa)
 {
 	XA_STATE(xas, xa, 0);
@@ -808,6 +847,7 @@ static int xarray_checks(void)
 	check_xa_shrink(&array);
 	check_xas_erase(&array);
 	check_cmpxchg(&array);
+	check_reserve(&array);
 	check_multi_store(&array);
 	check_find(&array);
 	check_destroy(&array);
diff --git a/lib/xarray.c b/lib/xarray.c
index ff37516fe832..546461914282 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1266,6 +1266,8 @@ void *xa_load(struct xarray *xa, unsigned long index)
 	rcu_read_lock();
 	do {
 		entry = xas_load(&xas);
+		if (xa_is_zero(entry))
+			entry = NULL;
 	} while (xas_retry(&xas, entry));
 	rcu_read_unlock();
 
@@ -1275,6 +1277,8 @@ EXPORT_SYMBOL(xa_load);
 
 static void *xas_result(struct xa_state *xas, void *curr)
 {
+	if (xa_is_zero(curr))
+		return NULL;
 	XA_NODE_BUG_ON(xas->xa_node, xa_is_internal(curr));
 	if (xas_error(xas))
 		curr = xas->xa_node;
@@ -1394,6 +1398,8 @@ void *xa_cmpxchg(struct xarray *xa, unsigned long index,
 	do {
 		xas_lock(&xas);
 		curr = xas_load(&xas);
+		if (curr == XA_ZERO_ENTRY)
+			curr = NULL;
 		if (curr == old)
 			xas_store(&xas, entry);
 		xas_unlock(&xas);
@@ -1430,6 +1436,8 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 
 	do {
 		curr = xas_load(&xas);
+		if (curr == XA_ZERO_ENTRY)
+			curr = NULL;
 		if (curr == old)
 			xas_store(&xas, entry);
 	} while (__xas_nomem(&xas, gfp));
@@ -1438,6 +1446,43 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 }
 EXPORT_SYMBOL(__xa_cmpxchg);
 
+/**
+ * xa_reserve() - Reserve this index in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @gfp: Memory allocation flags.
+ *
+ * Ensures there is somewhere to store an entry at @index in the array.
+ * If there is already something stored at @index, this function does
+ * nothing.  If there was nothing there, the entry is marked as reserved.
+ * Loads from @index will continue to see a %NULL pointer until a
+ * subsequent store to @index.
+ *
+ * If you do not use the entry that you have reserved, call xa_release()
+ * or xa_erase() to free any unnecessary memory.
+ *
+ * Context: Process context.  Takes and releases the xa_lock, IRQ or BH safe
+ * if specified in XArray flags.  May sleep if the @gfp flags permit.
+ * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ */
+int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	XA_STATE(xas, xa, index);
+	unsigned int lock_type = xa_lock_type(xa);
+	void *curr;
+
+	do {
+		xas_lock_type(&xas, lock_type);
+		curr = xas_load(&xas);
+		if (!curr)
+			xas_store(&xas, XA_ZERO_ENTRY);
+		xas_unlock_type(&xas, lock_type);
+	} while (xas_nomem(&xas, gfp));
+
+	return xas_error(&xas);
+}
+EXPORT_SYMBOL(xa_reserve);
+
 /**
  * __xa_set_mark() - Set this mark on this entry while locked.
  * @xa: XArray.
@@ -1797,6 +1842,8 @@ void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift)
 		pr_cont("retry (%ld)\n", xa_to_internal(entry));
 	else if (xa_is_sibling(entry))
 		pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry));
+	else if (xa_is_zero(entry))
+		pr_cont("zero (%ld)\n", xa_to_internal(entry));
 	else
 		pr_cont("UNKNOWN ENTRY (%px)\n", entry);
 }
-- 
cgit v1.2.3-71-gd317


From 371c752dc66948714ee3b66c3306f3ff1ff71d2e Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 4 Jul 2018 10:50:12 -0400
Subject: xarray: Track free entries in an XArray

Add the optional ability to track which entries in an XArray are free
and provide xa_alloc() to replace most of the functionality of the IDR.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |  23 +++++++--
 include/linux/xarray.h            | 101 ++++++++++++++++++++++++++++++++++++++
 lib/test_xarray.c                 |  61 +++++++++++++++++++++++
 lib/xarray.c                      |  88 +++++++++++++++++++++++++++++++--
 4 files changed, 266 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 2b397da84bcd..463e4c798ec1 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -103,12 +103,25 @@ Finally, you can remove all entries from an XArray by calling
 to free the entries first.  You can do this by iterating over all present
 entries in the XArray using the :c:func:`xa_for_each` iterator.
 
+ID assignment
+-------------
+
+You can call :c:func:`xa_alloc` to store the entry at any unused index
+in the XArray.  If you need to modify the array from interrupt context,
+you can use :c:func:`xa_alloc_bh` or :c:func:`xa_alloc_irq` to disable
+interrupts while allocating the ID.  Unlike :c:func:`xa_store`, allocating
+a ``NULL`` pointer does not delete an entry.  Instead it reserves an
+entry like :c:func:`xa_reserve` and you can release it using either
+:c:func:`xa_erase` or :c:func:`xa_release`.  To use ID assignment, the
+XArray must be defined with :c:func:`DEFINE_XARRAY_ALLOC`, or initialised
+by passing ``XA_FLAGS_ALLOC`` to :c:func:`xa_init_flags`,
+
 Memory allocation
 -----------------
 
-The :c:func:`xa_store`, :c:func:`xa_cmpxchg`, :c:func:`xa_reserve`
-and :c:func:`xa_insert` functions take a gfp_t parameter in case
-the XArray needs to allocate memory to store this entry.
+The :c:func:`xa_store`, :c:func:`xa_cmpxchg`, :c:func:`xa_alloc`,
+:c:func:`xa_reserve` and :c:func:`xa_insert` functions take a gfp_t
+parameter in case the XArray needs to allocate memory to store this entry.
 If the entry is being deleted, no memory allocation needs to be performed,
 and the GFP flags specified will be ignored.
 
@@ -143,6 +156,9 @@ Takes xa_lock internally:
  * :c:func:`xa_erase_bh`
  * :c:func:`xa_erase_irq`
  * :c:func:`xa_cmpxchg`
+ * :c:func:`xa_alloc`
+ * :c:func:`xa_alloc_bh`
+ * :c:func:`xa_alloc_irq`
  * :c:func:`xa_destroy`
  * :c:func:`xa_set_mark`
  * :c:func:`xa_clear_mark`
@@ -152,6 +168,7 @@ Assumes xa_lock held on entry:
  * :c:func:`__xa_insert`
  * :c:func:`__xa_erase`
  * :c:func:`__xa_cmpxchg`
+ * :c:func:`__xa_alloc`
  * :c:func:`__xa_set_mark`
  * :c:func:`__xa_clear_mark`
 
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index bed63d3cfea8..e0b57af52e9b 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -205,6 +205,7 @@ typedef unsigned __bitwise xa_mark_t;
 #define XA_MARK_2		((__force xa_mark_t)2U)
 #define XA_PRESENT		((__force xa_mark_t)8U)
 #define XA_MARK_MAX		XA_MARK_2
+#define XA_FREE_MARK		XA_MARK_0
 
 enum xa_lock_type {
 	XA_LOCK_IRQ = 1,
@@ -217,9 +218,12 @@ enum xa_lock_type {
  */
 #define XA_FLAGS_LOCK_IRQ	((__force gfp_t)XA_LOCK_IRQ)
 #define XA_FLAGS_LOCK_BH	((__force gfp_t)XA_LOCK_BH)
+#define XA_FLAGS_TRACK_FREE	((__force gfp_t)4U)
 #define XA_FLAGS_MARK(mark)	((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \
 						(__force unsigned)(mark)))
 
+#define XA_FLAGS_ALLOC	(XA_FLAGS_TRACK_FREE | XA_FLAGS_MARK(XA_FREE_MARK))
+
 /**
  * struct xarray - The anchor of the XArray.
  * @xa_lock: Lock that protects the contents of the XArray.
@@ -273,6 +277,15 @@ struct xarray {
  */
 #define DEFINE_XARRAY(name) DEFINE_XARRAY_FLAGS(name, 0)
 
+/**
+ * DEFINE_XARRAY_ALLOC() - Define an XArray which can allocate IDs.
+ * @name: A string that names your XArray.
+ *
+ * This is intended for file scope definitions of allocating XArrays.
+ * See also DEFINE_XARRAY().
+ */
+#define DEFINE_XARRAY_ALLOC(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC)
+
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
@@ -439,6 +452,7 @@ void *__xa_erase(struct xarray *, unsigned long index);
 void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old,
 		void *entry, gfp_t);
+int __xa_alloc(struct xarray *, u32 *id, u32 max, void *entry, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
@@ -518,6 +532,93 @@ static inline void *xa_erase_irq(struct xarray *xa, unsigned long index)
 	return entry;
 }
 
+/**
+ * xa_alloc() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @max: Maximum ID to allocate (inclusive).
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @id and @max.
+ * Updates the @id pointer with the index, then stores the entry at that
+ * index.  A concurrent lookup will not see an uninitialised @id.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.  May sleep if
+ * the @gfp flags permit.
+ * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
+ * there is no more space in the XArray.
+ */
+static inline int xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry,
+		gfp_t gfp)
+{
+	int err;
+
+	xa_lock(xa);
+	err = __xa_alloc(xa, id, max, entry, gfp);
+	xa_unlock(xa);
+
+	return err;
+}
+
+/**
+ * xa_alloc_bh() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @max: Maximum ID to allocate (inclusive).
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @id and @max.
+ * Updates the @id pointer with the index, then stores the entry at that
+ * index.  A concurrent lookup will not see an uninitialised @id.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling softirqs.  May sleep if the @gfp flags permit.
+ * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
+ * there is no more space in the XArray.
+ */
+static inline int xa_alloc_bh(struct xarray *xa, u32 *id, u32 max, void *entry,
+		gfp_t gfp)
+{
+	int err;
+
+	xa_lock_bh(xa);
+	err = __xa_alloc(xa, id, max, entry, gfp);
+	xa_unlock_bh(xa);
+
+	return err;
+}
+
+/**
+ * xa_alloc_irq() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @max: Maximum ID to allocate (inclusive).
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @id and @max.
+ * Updates the @id pointer with the index, then stores the entry at that
+ * index.  A concurrent lookup will not see an uninitialised @id.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling interrupts.  May sleep if the @gfp flags permit.
+ * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
+ * there is no more space in the XArray.
+ */
+static inline int xa_alloc_irq(struct xarray *xa, u32 *id, u32 max, void *entry,
+		gfp_t gfp)
+{
+	int err;
+
+	xa_lock_irq(xa);
+	err = __xa_alloc(xa, id, max, entry, gfp);
+	xa_unlock_irq(xa);
+
+	return err;
+}
+
 /* Everything below here is the Advanced API.  Proceed with caution. */
 
 /*
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 6aafd411a5c3..a752e6a37e6f 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -33,6 +33,15 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 	return xa_store(xa, index, xa_mk_value(index & LONG_MAX), gfp);
 }
 
+static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	u32 id = 0;
+
+	XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_value(index & LONG_MAX),
+				gfp) != 0);
+	XA_BUG_ON(xa, id != index);
+}
+
 static void xa_erase_index(struct xarray *xa, unsigned long index)
 {
 	XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_value(index & LONG_MAX));
@@ -404,6 +413,57 @@ static noinline void check_multi_store(struct xarray *xa)
 #endif
 }
 
+static DEFINE_XARRAY_ALLOC(xa0);
+
+static noinline void check_xa_alloc(void)
+{
+	int i;
+	u32 id;
+
+	/* An empty array should assign 0 to the first alloc */
+	xa_alloc_index(&xa0, 0, GFP_KERNEL);
+
+	/* Erasing it should make the array empty again */
+	xa_erase_index(&xa0, 0);
+	XA_BUG_ON(&xa0, !xa_empty(&xa0));
+
+	/* And it should assign 0 again */
+	xa_alloc_index(&xa0, 0, GFP_KERNEL);
+
+	/* The next assigned ID should be 1 */
+	xa_alloc_index(&xa0, 1, GFP_KERNEL);
+	xa_erase_index(&xa0, 1);
+
+	/* Storing a value should mark it used */
+	xa_store_index(&xa0, 1, GFP_KERNEL);
+	xa_alloc_index(&xa0, 2, GFP_KERNEL);
+
+	/* If we then erase 0, it should be free */
+	xa_erase_index(&xa0, 0);
+	xa_alloc_index(&xa0, 0, GFP_KERNEL);
+
+	xa_erase_index(&xa0, 1);
+	xa_erase_index(&xa0, 2);
+
+	for (i = 1; i < 5000; i++) {
+		xa_alloc_index(&xa0, i, GFP_KERNEL);
+	}
+
+	xa_destroy(&xa0);
+
+	id = 0xfffffffeU;
+	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+				GFP_KERNEL) != 0);
+	XA_BUG_ON(&xa0, id != 0xfffffffeU);
+	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+				GFP_KERNEL) != 0);
+	XA_BUG_ON(&xa0, id != 0xffffffffU);
+	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+				GFP_KERNEL) != -ENOSPC);
+	XA_BUG_ON(&xa0, id != 0xffffffffU);
+	xa_destroy(&xa0);
+}
+
 static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
 			unsigned int order, unsigned int present)
 {
@@ -849,6 +909,7 @@ static int xarray_checks(void)
 	check_cmpxchg(&array);
 	check_reserve(&array);
 	check_multi_store(&array);
+	check_xa_alloc();
 	check_find(&array);
 	check_destroy(&array);
 	check_move(&array);
diff --git a/lib/xarray.c b/lib/xarray.c
index 546461914282..9a0d49d4b5f0 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -52,6 +52,11 @@ static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type)
 		xas_unlock(xas);
 }
 
+static inline bool xa_track_free(const struct xarray *xa)
+{
+	return xa->xa_flags & XA_FLAGS_TRACK_FREE;
+}
+
 static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark)
 {
 	if (!(xa->xa_flags & XA_FLAGS_MARK(mark)))
@@ -94,6 +99,11 @@ static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark)
 	return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE);
 }
 
+static inline void node_mark_all(struct xa_node *node, xa_mark_t mark)
+{
+	bitmap_fill(node_marks(node, mark), XA_CHUNK_SIZE);
+}
+
 #define mark_inc(mark) do { \
 	mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \
 } while (0)
@@ -416,6 +426,8 @@ static void xas_shrink(struct xa_state *xas)
 		xas->xa_node = XAS_BOUNDS;
 
 		RCU_INIT_POINTER(xa->xa_head, entry);
+		if (xa_track_free(xa) && !node_get_mark(node, 0, XA_FREE_MARK))
+			xa_mark_clear(xa, XA_FREE_MARK);
 
 		node->count = 0;
 		node->nr_values = 0;
@@ -549,8 +561,15 @@ static int xas_expand(struct xa_state *xas, void *head)
 
 		/* Propagate the aggregated mark info to the new child */
 		for (;;) {
-			if (xa_marked(xa, mark))
+			if (xa_track_free(xa) && mark == XA_FREE_MARK) {
+				node_mark_all(node, XA_FREE_MARK);
+				if (!xa_marked(xa, XA_FREE_MARK)) {
+					node_clear_mark(node, 0, XA_FREE_MARK);
+					xa_mark_set(xa, XA_FREE_MARK);
+				}
+			} else if (xa_marked(xa, mark)) {
 				node_set_mark(node, 0, mark);
+			}
 			if (mark == XA_MARK_MAX)
 				break;
 			mark_inc(mark);
@@ -624,6 +643,8 @@ static void *xas_create(struct xa_state *xas)
 			node = xas_alloc(xas, shift);
 			if (!node)
 				break;
+			if (xa_track_free(xa))
+				node_mark_all(node, XA_FREE_MARK);
 			rcu_assign_pointer(*slot, xa_mk_node(node));
 		} else if (xa_is_node(entry)) {
 			node = xa_to_node(entry);
@@ -882,7 +903,10 @@ void xas_init_marks(const struct xa_state *xas)
 	xa_mark_t mark = 0;
 
 	for (;;) {
-		xas_clear_mark(xas, mark);
+		if (xa_track_free(xas->xa) && mark == XA_FREE_MARK)
+			xas_set_mark(xas, mark);
+		else
+			xas_clear_mark(xas, mark);
 		if (mark == XA_MARK_MAX)
 			break;
 		mark_inc(mark);
@@ -1333,6 +1357,8 @@ void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 	do {
 		xas_lock(&xas);
 		curr = xas_store(&xas, entry);
+		if (xa_track_free(xa) && entry)
+			xas_clear_mark(&xas, XA_FREE_MARK);
 		xas_unlock(&xas);
 	} while (xas_nomem(&xas, gfp));
 
@@ -1365,6 +1391,8 @@ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 
 	do {
 		curr = xas_store(&xas, entry);
+		if (xa_track_free(xa) && entry)
+			xas_clear_mark(&xas, XA_FREE_MARK);
 	} while (__xas_nomem(&xas, gfp));
 
 	return xas_result(&xas, curr);
@@ -1400,8 +1428,11 @@ void *xa_cmpxchg(struct xarray *xa, unsigned long index,
 		curr = xas_load(&xas);
 		if (curr == XA_ZERO_ENTRY)
 			curr = NULL;
-		if (curr == old)
+		if (curr == old) {
 			xas_store(&xas, entry);
+			if (xa_track_free(xa) && entry)
+				xas_clear_mark(&xas, XA_FREE_MARK);
+		}
 		xas_unlock(&xas);
 	} while (xas_nomem(&xas, gfp));
 
@@ -1438,8 +1469,11 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 		curr = xas_load(&xas);
 		if (curr == XA_ZERO_ENTRY)
 			curr = NULL;
-		if (curr == old)
+		if (curr == old) {
 			xas_store(&xas, entry);
+			if (xa_track_free(xa) && entry)
+				xas_clear_mark(&xas, XA_FREE_MARK);
+		}
 	} while (__xas_nomem(&xas, gfp));
 
 	return xas_result(&xas, curr);
@@ -1483,6 +1517,52 @@ int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
 }
 EXPORT_SYMBOL(xa_reserve);
 
+/**
+ * __xa_alloc() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @max: Maximum ID to allocate (inclusive).
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @id and @max.
+ * Updates the @id pointer with the index, then stores the entry at that
+ * index.  A concurrent lookup will not see an uninitialised @id.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
+ * there is no more space in the XArray.
+ */
+int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp)
+{
+	XA_STATE(xas, xa, 0);
+	int err;
+
+	if (WARN_ON_ONCE(xa_is_internal(entry)))
+		return -EINVAL;
+	if (WARN_ON_ONCE(!xa_track_free(xa)))
+		return -EINVAL;
+
+	if (!entry)
+		entry = XA_ZERO_ENTRY;
+
+	do {
+		xas.xa_index = *id;
+		xas_find_marked(&xas, max, XA_FREE_MARK);
+		if (xas.xa_node == XAS_RESTART)
+			xas_set_err(&xas, -ENOSPC);
+		xas_store(&xas, entry);
+		xas_clear_mark(&xas, XA_FREE_MARK);
+	} while (__xas_nomem(&xas, gfp));
+
+	err = xas_error(&xas);
+	if (!err)
+		*id = xas.xa_index;
+	return err;
+}
+EXPORT_SYMBOL(__xa_alloc);
+
 /**
  * __xa_set_mark() - Set this mark on this entry while locked.
  * @xa: XArray.
-- 
cgit v1.2.3-71-gd317


From f32f004cddf86d63a9c42994bbce9f4e2f07c9fa Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 4 Jul 2018 15:42:46 -0400
Subject: ida: Convert to XArray

Use the XA_TRACK_FREE ability to track which entries have a free bit,
similarly to how it uses the radix tree's IDR_FREE tag.  This eliminates
the per-cpu ida_bitmap preload, and fixes the memory consumption
regression I introduced when making the IDR able to store any pointer.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/idr.h                 |  18 +-
 lib/idr.c                           | 379 +++++++++++++++++++-----------------
 lib/radix-tree.c                    |  71 -------
 tools/testing/radix-tree/idr-test.c |   8 +-
 4 files changed, 213 insertions(+), 263 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 3ec8628ce17f..60daf34b625d 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -214,8 +214,7 @@ static inline void idr_preload_end(void)
 	     ++id, (entry) = idr_get_next((idr), &(id)))
 
 /*
- * IDA - IDR based id allocator, use when translation from id to
- * pointer isn't necessary.
+ * IDA - ID Allocator, use when translation from id to pointer isn't necessary.
  */
 #define IDA_CHUNK_SIZE		128	/* 128 bytes per chunk */
 #define IDA_BITMAP_LONGS	(IDA_CHUNK_SIZE / sizeof(long))
@@ -225,14 +224,14 @@ struct ida_bitmap {
 	unsigned long		bitmap[IDA_BITMAP_LONGS];
 };
 
-DECLARE_PER_CPU(struct ida_bitmap *, ida_bitmap);
-
 struct ida {
-	struct radix_tree_root	ida_rt;
+	struct xarray xa;
 };
 
+#define IDA_INIT_FLAGS	(XA_FLAGS_LOCK_IRQ | XA_FLAGS_ALLOC)
+
 #define IDA_INIT(name)	{						\
-	.ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT),	\
+	.xa = XARRAY_INIT(name, IDA_INIT_FLAGS)				\
 }
 #define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
 
@@ -292,7 +291,7 @@ static inline int ida_alloc_max(struct ida *ida, unsigned int max, gfp_t gfp)
 
 static inline void ida_init(struct ida *ida)
 {
-	INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
+	xa_init_flags(&ida->xa, IDA_INIT_FLAGS);
 }
 
 #define ida_simple_get(ida, start, end, gfp)	\
@@ -301,9 +300,6 @@ static inline void ida_init(struct ida *ida)
 
 static inline bool ida_is_empty(const struct ida *ida)
 {
-	return radix_tree_empty(&ida->ida_rt);
+	return xa_empty(&ida->xa);
 }
-
-/* in lib/radix-tree.c */
-int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 #endif /* __IDR_H__ */
diff --git a/lib/idr.c b/lib/idr.c
index 9a366b5be2c2..3c20ad9b0463 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -6,8 +6,6 @@
 #include <linux/spinlock.h>
 #include <linux/xarray.h>
 
-DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
-
 /**
  * idr_alloc_u32() - Allocate an ID.
  * @idr: IDR handle.
@@ -320,6 +318,9 @@ EXPORT_SYMBOL(idr_replace);
  * free the individual IDs in it.  You can use ida_is_empty() to find
  * out whether the IDA has any IDs currently allocated.
  *
+ * The IDA handles its own locking.  It is safe to call any of the IDA
+ * functions without synchronisation in your code.
+ *
  * IDs are currently limited to the range [0-INT_MAX].  If this is an awkward
  * limitation, it should be quite straightforward to raise the maximum.
  */
@@ -327,151 +328,197 @@ EXPORT_SYMBOL(idr_replace);
 /*
  * Developer's notes:
  *
- * The IDA uses the functionality provided by the IDR & radix tree to store
- * bitmaps in each entry.  The IDR_FREE tag means there is at least one bit
- * free, unlike the IDR where it means at least one entry is free.
+ * The IDA uses the functionality provided by the XArray to store bitmaps in
+ * each entry.  The XA_FREE_MARK is only cleared when all bits in the bitmap
+ * have been set.
  *
- * I considered telling the radix tree that each slot is an order-10 node
- * and storing the bit numbers in the radix tree, but the radix tree can't
- * allow a single multiorder entry at index 0, which would significantly
- * increase memory consumption for the IDA.  So instead we divide the index
- * by the number of bits in the leaf bitmap before doing a radix tree lookup.
+ * I considered telling the XArray that each slot is an order-10 node
+ * and indexing by bit number, but the XArray can't allow a single multi-index
+ * entry in the head, which would significantly increase memory consumption
+ * for the IDA.  So instead we divide the index by the number of bits in the
+ * leaf bitmap before doing a radix tree lookup.
  *
  * As an optimisation, if there are only a few low bits set in any given
  * leaf, instead of allocating a 128-byte bitmap, we store the bits
- * directly in the entry.
- *
- * We allow the radix tree 'exceptional' count to get out of date.  Nothing
- * in the IDA nor the radix tree code checks it.  If it becomes important
- * to maintain an accurate exceptional count, switch the rcu_assign_pointer()
- * calls to radix_tree_iter_replace() which will correct the exceptional
- * count.
- *
- * The IDA always requires a lock to alloc/free.  If we add a 'test_bit'
+ * as a value entry.  Value entries never have the XA_FREE_MARK cleared
+ * because we can always convert them into a bitmap entry.
+ *
+ * It would be possible to optimise further; once we've run out of a
+ * single 128-byte bitmap, we currently switch to a 576-byte node, put
+ * the 128-byte bitmap in the first entry and then start allocating extra
+ * 128-byte entries.  We could instead use the 512 bytes of the node's
+ * data as a bitmap before moving to that scheme.  I do not believe this
+ * is a worthwhile optimisation; Rasmus Villemoes surveyed the current
+ * users of the IDA and almost none of them use more than 1024 entries.
+ * Those that do use more than the 8192 IDs that the 512 bytes would
+ * provide.
+ *
+ * The IDA always uses a lock to alloc/free.  If we add a 'test_bit'
  * equivalent, it will still need locking.  Going to RCU lookup would require
  * using RCU to free bitmaps, and that's not trivial without embedding an
  * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte
  * bitmap, which is excessive.
  */
 
-#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1)
-
-static int ida_get_new_above(struct ida *ida, int start)
+/**
+ * ida_alloc_range() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @min: Lowest ID to allocate.
+ * @max: Highest ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between @min and @max, inclusive.  The allocated ID will
+ * not exceed %INT_MAX, even if @max is larger.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
+			gfp_t gfp)
 {
-	struct radix_tree_root *root = &ida->ida_rt;
-	void __rcu **slot;
-	struct radix_tree_iter iter;
-	struct ida_bitmap *bitmap;
-	unsigned long index;
-	unsigned bit;
-	int new;
-
-	index = start / IDA_BITMAP_BITS;
-	bit = start % IDA_BITMAP_BITS;
-
-	slot = radix_tree_iter_init(&iter, index);
-	for (;;) {
-		if (slot)
-			slot = radix_tree_next_slot(slot, &iter,
-						RADIX_TREE_ITER_TAGGED);
-		if (!slot) {
-			slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX);
-			if (IS_ERR(slot)) {
-				if (slot == ERR_PTR(-ENOMEM))
-					return -EAGAIN;
-				return PTR_ERR(slot);
+	XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS);
+	unsigned bit = min % IDA_BITMAP_BITS;
+	unsigned long flags;
+	struct ida_bitmap *bitmap, *alloc = NULL;
+
+	if ((int)min < 0)
+		return -ENOSPC;
+
+	if ((int)max < 0)
+		max = INT_MAX;
+
+retry:
+	xas_lock_irqsave(&xas, flags);
+next:
+	bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK);
+	if (xas.xa_index > min / IDA_BITMAP_BITS)
+		bit = 0;
+	if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+		goto nospc;
+
+	if (xa_is_value(bitmap)) {
+		unsigned long tmp = xa_to_value(bitmap);
+
+		if (bit < BITS_PER_XA_VALUE) {
+			bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit);
+			if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+				goto nospc;
+			if (bit < BITS_PER_XA_VALUE) {
+				tmp |= 1UL << bit;
+				xas_store(&xas, xa_mk_value(tmp));
+				goto out;
 			}
 		}
-		if (iter.index > index)
-			bit = 0;
-		new = iter.index * IDA_BITMAP_BITS;
-		bitmap = rcu_dereference_raw(*slot);
-		if (xa_is_value(bitmap)) {
-			unsigned long tmp = xa_to_value(bitmap);
-			int vbit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE,
-							bit);
-			if (vbit < BITS_PER_XA_VALUE) {
-				tmp |= 1UL << vbit;
-				rcu_assign_pointer(*slot, xa_mk_value(tmp));
-				return new + vbit;
-			}
-			bitmap = this_cpu_xchg(ida_bitmap, NULL);
-			if (!bitmap)
-				return -EAGAIN;
-			bitmap->bitmap[0] = tmp;
-			rcu_assign_pointer(*slot, bitmap);
+		bitmap = alloc;
+		if (!bitmap)
+			bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
+		if (!bitmap)
+			goto alloc;
+		bitmap->bitmap[0] = tmp;
+		xas_store(&xas, bitmap);
+		if (xas_error(&xas)) {
+			bitmap->bitmap[0] = 0;
+			goto out;
 		}
+	}
 
-		if (bitmap) {
-			bit = find_next_zero_bit(bitmap->bitmap,
-							IDA_BITMAP_BITS, bit);
-			new += bit;
-			if (new < 0)
-				return -ENOSPC;
-			if (bit == IDA_BITMAP_BITS)
-				continue;
+	if (bitmap) {
+		bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit);
+		if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+			goto nospc;
+		if (bit == IDA_BITMAP_BITS)
+			goto next;
 
-			__set_bit(bit, bitmap->bitmap);
-			if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
-				radix_tree_iter_tag_clear(root, &iter,
-								IDR_FREE);
+		__set_bit(bit, bitmap->bitmap);
+		if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
+			xas_clear_mark(&xas, XA_FREE_MARK);
+	} else {
+		if (bit < BITS_PER_XA_VALUE) {
+			bitmap = xa_mk_value(1UL << bit);
 		} else {
-			new += bit;
-			if (new < 0)
-				return -ENOSPC;
-			if (bit < BITS_PER_XA_VALUE) {
-				bitmap = xa_mk_value(1UL << bit);
-			} else {
-				bitmap = this_cpu_xchg(ida_bitmap, NULL);
-				if (!bitmap)
-					return -EAGAIN;
-				__set_bit(bit, bitmap->bitmap);
-			}
-			radix_tree_iter_replace(root, &iter, slot, bitmap);
+			bitmap = alloc;
+			if (!bitmap)
+				bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
+			if (!bitmap)
+				goto alloc;
+			__set_bit(bit, bitmap->bitmap);
 		}
-
-		return new;
+		xas_store(&xas, bitmap);
+	}
+out:
+	xas_unlock_irqrestore(&xas, flags);
+	if (xas_nomem(&xas, gfp)) {
+		xas.xa_index = min / IDA_BITMAP_BITS;
+		bit = min % IDA_BITMAP_BITS;
+		goto retry;
 	}
+	if (bitmap != alloc)
+		kfree(alloc);
+	if (xas_error(&xas))
+		return xas_error(&xas);
+	return xas.xa_index * IDA_BITMAP_BITS + bit;
+alloc:
+	xas_unlock_irqrestore(&xas, flags);
+	alloc = kzalloc(sizeof(*bitmap), gfp);
+	if (!alloc)
+		return -ENOMEM;
+	xas_set(&xas, min / IDA_BITMAP_BITS);
+	bit = min % IDA_BITMAP_BITS;
+	goto retry;
+nospc:
+	xas_unlock_irqrestore(&xas, flags);
+	return -ENOSPC;
 }
+EXPORT_SYMBOL(ida_alloc_range);
 
-static void ida_remove(struct ida *ida, int id)
+/**
+ * ida_free() - Release an allocated ID.
+ * @ida: IDA handle.
+ * @id: Previously allocated ID.
+ *
+ * Context: Any context.
+ */
+void ida_free(struct ida *ida, unsigned int id)
 {
-	unsigned long index = id / IDA_BITMAP_BITS;
-	unsigned offset = id % IDA_BITMAP_BITS;
+	XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS);
+	unsigned bit = id % IDA_BITMAP_BITS;
 	struct ida_bitmap *bitmap;
-	unsigned long *btmp;
-	struct radix_tree_iter iter;
-	void __rcu **slot;
+	unsigned long flags;
 
-	slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index);
-	if (!slot)
-		goto err;
+	BUG_ON((int)id < 0);
+
+	xas_lock_irqsave(&xas, flags);
+	bitmap = xas_load(&xas);
 
-	bitmap = rcu_dereference_raw(*slot);
 	if (xa_is_value(bitmap)) {
-		btmp = (unsigned long *)slot;
-		offset += 1; /* Intimate knowledge of the value encoding */
-		if (offset >= BITS_PER_LONG)
+		unsigned long v = xa_to_value(bitmap);
+		if (bit >= BITS_PER_XA_VALUE)
+			goto err;
+		if (!(v & (1UL << bit)))
 			goto err;
+		v &= ~(1UL << bit);
+		if (!v)
+			goto delete;
+		xas_store(&xas, xa_mk_value(v));
 	} else {
-		btmp = bitmap->bitmap;
-	}
-	if (!test_bit(offset, btmp))
-		goto err;
-
-	__clear_bit(offset, btmp);
-	radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
-	if (xa_is_value(bitmap)) {
-		if (xa_to_value(rcu_dereference_raw(*slot)) == 0)
-			radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
-	} else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {
-		kfree(bitmap);
-		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+		if (!test_bit(bit, bitmap->bitmap))
+			goto err;
+		__clear_bit(bit, bitmap->bitmap);
+		xas_set_mark(&xas, XA_FREE_MARK);
+		if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
+			kfree(bitmap);
+delete:
+			xas_store(&xas, NULL);
+		}
 	}
+	xas_unlock_irqrestore(&xas, flags);
 	return;
  err:
+	xas_unlock_irqrestore(&xas, flags);
 	WARN(1, "ida_free called for id=%d which is not allocated.\n", id);
 }
+EXPORT_SYMBOL(ida_free);
 
 /**
  * ida_destroy() - Free all IDs.
@@ -486,80 +533,60 @@ static void ida_remove(struct ida *ida, int id)
  */
 void ida_destroy(struct ida *ida)
 {
+	XA_STATE(xas, &ida->xa, 0);
+	struct ida_bitmap *bitmap;
 	unsigned long flags;
-	struct radix_tree_iter iter;
-	void __rcu **slot;
 
-	xa_lock_irqsave(&ida->ida_rt, flags);
-	radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
-		struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
+	xas_lock_irqsave(&xas, flags);
+	xas_for_each(&xas, bitmap, ULONG_MAX) {
 		if (!xa_is_value(bitmap))
 			kfree(bitmap);
-		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+		xas_store(&xas, NULL);
 	}
-	xa_unlock_irqrestore(&ida->ida_rt, flags);
+	xas_unlock_irqrestore(&xas, flags);
 }
 EXPORT_SYMBOL(ida_destroy);
 
-/**
- * ida_alloc_range() - Allocate an unused ID.
- * @ida: IDA handle.
- * @min: Lowest ID to allocate.
- * @max: Highest ID to allocate.
- * @gfp: Memory allocation flags.
- *
- * Allocate an ID between @min and @max, inclusive.  The allocated ID will
- * not exceed %INT_MAX, even if @max is larger.
- *
- * Context: Any context.
- * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
- * or %-ENOSPC if there are no free IDs.
- */
-int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
-			gfp_t gfp)
-{
-	int id = 0;
-	unsigned long flags;
-
-	if ((int)min < 0)
-		return -ENOSPC;
+#ifndef __KERNEL__
+extern void xa_dump_index(unsigned long index, unsigned int shift);
+#define IDA_CHUNK_SHIFT		ilog2(IDA_BITMAP_BITS)
 
-	if ((int)max < 0)
-		max = INT_MAX;
-
-again:
-	xa_lock_irqsave(&ida->ida_rt, flags);
-	id = ida_get_new_above(ida, min);
-	if (id > (int)max) {
-		ida_remove(ida, id);
-		id = -ENOSPC;
-	}
-	xa_unlock_irqrestore(&ida->ida_rt, flags);
+static void ida_dump_entry(void *entry, unsigned long index)
+{
+	unsigned long i;
+
+	if (!entry)
+		return;
+
+	if (xa_is_node(entry)) {
+		struct xa_node *node = xa_to_node(entry);
+		unsigned int shift = node->shift + IDA_CHUNK_SHIFT +
+			XA_CHUNK_SHIFT;
+
+		xa_dump_index(index * IDA_BITMAP_BITS, shift);
+		xa_dump_node(node);
+		for (i = 0; i < XA_CHUNK_SIZE; i++)
+			ida_dump_entry(node->slots[i],
+					index | (i << node->shift));
+	} else if (xa_is_value(entry)) {
+		xa_dump_index(index * IDA_BITMAP_BITS, ilog2(BITS_PER_LONG));
+		pr_cont("value: data %lx [%px]\n", xa_to_value(entry), entry);
+	} else {
+		struct ida_bitmap *bitmap = entry;
 
-	if (unlikely(id == -EAGAIN)) {
-		if (!ida_pre_get(ida, gfp))
-			return -ENOMEM;
-		goto again;
+		xa_dump_index(index * IDA_BITMAP_BITS, IDA_CHUNK_SHIFT);
+		pr_cont("bitmap: %p data", bitmap);
+		for (i = 0; i < IDA_BITMAP_LONGS; i++)
+			pr_cont(" %lx", bitmap->bitmap[i]);
+		pr_cont("\n");
 	}
-
-	return id;
 }
-EXPORT_SYMBOL(ida_alloc_range);
 
-/**
- * ida_free() - Release an allocated ID.
- * @ida: IDA handle.
- * @id: Previously allocated ID.
- *
- * Context: Any context.
- */
-void ida_free(struct ida *ida, unsigned int id)
+static void ida_dump(struct ida *ida)
 {
-	unsigned long flags;
-
-	BUG_ON((int)id < 0);
-	xa_lock_irqsave(&ida->ida_rt, flags);
-	ida_remove(ida, id);
-	xa_unlock_irqrestore(&ida->ida_rt, flags);
+	struct xarray *xa = &ida->xa;
+	pr_debug("ida: %p node %p free %d\n", ida, xa->xa_head,
+				xa->xa_flags >> ROOT_TAG_SHIFT);
+	ida_dump_entry(xa->xa_head, 0);
 }
-EXPORT_SYMBOL(ida_free);
+#endif
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 3479d93c32b9..68702061464f 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -255,54 +255,6 @@ static unsigned long next_index(unsigned long index,
 	return (index & ~node_maxindex(node)) + (offset << node->shift);
 }
 
-#ifndef __KERNEL__
-static void dump_ida_node(void *entry, unsigned long index)
-{
-	unsigned long i;
-
-	if (!entry)
-		return;
-
-	if (radix_tree_is_internal_node(entry)) {
-		struct radix_tree_node *node = entry_to_node(entry);
-
-		pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n",
-			node, node->offset, index * IDA_BITMAP_BITS,
-			((index | node_maxindex(node)) + 1) *
-				IDA_BITMAP_BITS - 1,
-			node->parent, node->tags[0][0], node->shift,
-			node->count);
-		for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
-			dump_ida_node(node->slots[i],
-					index | (i << node->shift));
-	} else if (xa_is_value(entry)) {
-		pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n",
-				entry, (int)(index & RADIX_TREE_MAP_MASK),
-				index * IDA_BITMAP_BITS,
-				index * IDA_BITMAP_BITS + BITS_PER_XA_VALUE,
-				xa_to_value(entry));
-	} else {
-		struct ida_bitmap *bitmap = entry;
-
-		pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap,
-				(int)(index & RADIX_TREE_MAP_MASK),
-				index * IDA_BITMAP_BITS,
-				(index + 1) * IDA_BITMAP_BITS - 1);
-		for (i = 0; i < IDA_BITMAP_LONGS; i++)
-			pr_cont(" %lx", bitmap->bitmap[i]);
-		pr_cont("\n");
-	}
-}
-
-static void ida_dump(struct ida *ida)
-{
-	struct radix_tree_root *root = &ida->ida_rt;
-	pr_debug("ida: %p node %p free %d\n", ida, root->xa_head,
-				root->xa_flags >> ROOT_TAG_SHIFT);
-	dump_ida_node(root->xa_head, 0);
-}
-#endif
-
 /*
  * This assumes that the caller has performed appropriate preallocation, and
  * that the caller has pinned this thread of control to the current CPU.
@@ -2039,27 +1991,6 @@ void idr_preload(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(idr_preload);
 
-int ida_pre_get(struct ida *ida, gfp_t gfp)
-{
-	/*
-	 * The IDA API has no preload_end() equivalent.  Instead,
-	 * ida_get_new() can return -EAGAIN, prompting the caller
-	 * to return to the ida_pre_get() step.
-	 */
-	if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE))
-		preempt_enable();
-
-	if (!this_cpu_read(ida_bitmap)) {
-		struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp);
-		if (!bitmap)
-			return 0;
-		if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
-			kfree(bitmap);
-	}
-
-	return 1;
-}
-
 void __rcu **idr_get_free(struct radix_tree_root *root,
 			      struct radix_tree_iter *iter, gfp_t gfp,
 			      unsigned long max)
@@ -2201,8 +2132,6 @@ static int radix_tree_cpu_dead(unsigned int cpu)
 		kmem_cache_free(radix_tree_node_cachep, node);
 		rtp->nr--;
 	}
-	kfree(per_cpu(ida_bitmap, cpu));
-	per_cpu(ida_bitmap, cpu) = NULL;
 	return 0;
 }
 
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index a5a4494922a6..1b63bdb7688f 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -402,16 +402,15 @@ void ida_check_nomem(void)
  */
 void ida_check_conv_user(void)
 {
-#if 0
 	DEFINE_IDA(ida);
 	unsigned long i;
 
-	radix_tree_cpu_dead(1);
 	for (i = 0; i < 1000000; i++) {
 		int id = ida_alloc(&ida, GFP_NOWAIT);
 		if (id == -ENOMEM) {
-			IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) !=
-					BITS_PER_XA_VALUE);
+			IDA_BUG_ON(&ida, ((i % IDA_BITMAP_BITS) !=
+					  BITS_PER_XA_VALUE) &&
+					 ((i % IDA_BITMAP_BITS) != 0));
 			id = ida_alloc(&ida, GFP_KERNEL);
 		} else {
 			IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) ==
@@ -420,7 +419,6 @@ void ida_check_conv_user(void)
 		IDA_BUG_ON(&ida, id != i);
 	}
 	ida_destroy(&ida);
-#endif
 }
 
 void ida_check_random(void)
-- 
cgit v1.2.3-71-gd317


From eb797a8ee0ab4cd03df556980ce7bf167cadaa50 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 5 Mar 2018 22:46:03 -0500
Subject: page cache: Rearrange address_space

Change i_pages from a radix_tree_root to an xarray, convert the
documentation into kernel-doc format and change the order of the elements
to pack them better on 64-bit systems.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/fs.h | 46 +++++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6c0b4a1c22ff..d126cad0f621 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -403,24 +403,40 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned copied,
 				struct page *page, void *fsdata);
 
+/**
+ * struct address_space - Contents of a cacheable, mappable object.
+ * @host: Owner, either the inode or the block_device.
+ * @i_pages: Cached pages.
+ * @gfp_mask: Memory allocation flags to use for allocating pages.
+ * @i_mmap_writable: Number of VM_SHARED mappings.
+ * @i_mmap: Tree of private and shared mappings.
+ * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
+ * @nrpages: Number of page entries, protected by the i_pages lock.
+ * @nrexceptional: Shadow or DAX entries, protected by the i_pages lock.
+ * @writeback_index: Writeback starts here.
+ * @a_ops: Methods.
+ * @flags: Error bits and flags (AS_*).
+ * @wb_err: The most recent error which has occurred.
+ * @private_lock: For use by the owner of the address_space.
+ * @private_list: For use by the owner of the address_space.
+ * @private_data: For use by the owner of the address_space.
+ */
 struct address_space {
-	struct inode		*host;		/* owner: inode, block_device */
-	struct radix_tree_root	i_pages;	/* cached pages */
-	atomic_t		i_mmap_writable;/* count VM_SHARED mappings */
-	struct rb_root_cached	i_mmap;		/* tree of private and shared mappings */
-	struct rw_semaphore	i_mmap_rwsem;	/* protect tree, count, list */
-	/* Protected by the i_pages lock */
-	unsigned long		nrpages;	/* number of total pages */
-	/* number of shadow or DAX exceptional entries */
+	struct inode		*host;
+	struct xarray		i_pages;
+	gfp_t			gfp_mask;
+	atomic_t		i_mmap_writable;
+	struct rb_root_cached	i_mmap;
+	struct rw_semaphore	i_mmap_rwsem;
+	unsigned long		nrpages;
 	unsigned long		nrexceptional;
-	pgoff_t			writeback_index;/* writeback starts here */
-	const struct address_space_operations *a_ops;	/* methods */
-	unsigned long		flags;		/* error bits */
-	spinlock_t		private_lock;	/* for use by the address_space */
-	gfp_t			gfp_mask;	/* implicit gfp mask for allocations */
-	struct list_head	private_list;	/* for use by the address_space */
-	void			*private_data;	/* ditto */
+	pgoff_t			writeback_index;
+	const struct address_space_operations *a_ops;
+	unsigned long		flags;
 	errseq_t		wb_err;
+	spinlock_t		private_lock;
+	struct list_head	private_list;
+	void			*private_data;
 } __attribute__((aligned(sizeof(long)))) __randomize_layout;
 	/*
 	 * On most architectures that alignment is already the case; but
-- 
cgit v1.2.3-71-gd317


From 0d3f92966629e536b0c5c2355c1ada8e21c245f6 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 21 Nov 2017 14:07:06 -0500
Subject: page cache: Convert hole search to XArray

The page cache offers the ability to search for a miss in the previous or
next N locations.  Rather than teach the XArray about the page cache's
definition of a miss, use xas_prev() and xas_next() to search the page
array.  This should be more efficient as it does not have to start the
lookup from the top for each index.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 fs/nfs/blocklayout/blocklayout.c |   2 +-
 include/linux/pagemap.h          |   4 +-
 mm/filemap.c                     | 110 ++++++++++++++++++---------------------
 mm/readahead.c                   |   4 +-
 4 files changed, 55 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 06cb0c1d9aee..d3781cd983f6 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -896,7 +896,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
 	end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
 	if (end != inode->i_mapping->nrpages) {
 		rcu_read_lock();
-		end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
+		end = page_cache_next_miss(mapping, idx + 1, ULONG_MAX);
 		rcu_read_unlock();
 	}
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b1bd2186e6d2..cf9ad413eee9 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -241,9 +241,9 @@ static inline gfp_t readahead_gfp_mask(struct address_space *x)
 
 typedef int filler_t(void *, struct page *);
 
-pgoff_t page_cache_next_hole(struct address_space *mapping,
+pgoff_t page_cache_next_miss(struct address_space *mapping,
 			     pgoff_t index, unsigned long max_scan);
-pgoff_t page_cache_prev_hole(struct address_space *mapping,
+pgoff_t page_cache_prev_miss(struct address_space *mapping,
 			     pgoff_t index, unsigned long max_scan);
 
 #define FGP_ACCESSED		0x00000001
diff --git a/mm/filemap.c b/mm/filemap.c
index 4de14e75c4ec..714d3d0f60f5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1326,86 +1326,76 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 }
 
 /**
- * page_cache_next_hole - find the next hole (not-present entry)
- * @mapping: mapping
- * @index: index
- * @max_scan: maximum range to search
- *
- * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the
- * lowest indexed hole.
- *
- * Returns: the index of the hole if found, otherwise returns an index
- * outside of the set specified (in which case 'return - index >=
- * max_scan' will be true). In rare cases of index wrap-around, 0 will
- * be returned.
- *
- * page_cache_next_hole may be called under rcu_read_lock. However,
- * like radix_tree_gang_lookup, this will not atomically search a
- * snapshot of the tree at a single point in time. For example, if a
- * hole is created at index 5, then subsequently a hole is created at
- * index 10, page_cache_next_hole covering both indexes may return 10
- * if called under rcu_read_lock.
+ * page_cache_next_miss() - Find the next gap in the page cache.
+ * @mapping: Mapping.
+ * @index: Index.
+ * @max_scan: Maximum range to search.
+ *
+ * Search the range [index, min(index + max_scan - 1, ULONG_MAX)] for the
+ * gap with the lowest index.
+ *
+ * This function may be called under the rcu_read_lock.  However, this will
+ * not atomically search a snapshot of the cache at a single point in time.
+ * For example, if a gap is created at index 5, then subsequently a gap is
+ * created at index 10, page_cache_next_miss covering both indices may
+ * return 10 if called under the rcu_read_lock.
+ *
+ * Return: The index of the gap if found, otherwise an index outside the
+ * range specified (in which case 'return - index >= max_scan' will be true).
+ * In the rare case of index wrap-around, 0 will be returned.
  */
-pgoff_t page_cache_next_hole(struct address_space *mapping,
+pgoff_t page_cache_next_miss(struct address_space *mapping,
 			     pgoff_t index, unsigned long max_scan)
 {
-	unsigned long i;
+	XA_STATE(xas, &mapping->i_pages, index);
 
-	for (i = 0; i < max_scan; i++) {
-		struct page *page;
-
-		page = radix_tree_lookup(&mapping->i_pages, index);
-		if (!page || xa_is_value(page))
+	while (max_scan--) {
+		void *entry = xas_next(&xas);
+		if (!entry || xa_is_value(entry))
 			break;
-		index++;
-		if (index == 0)
+		if (xas.xa_index == 0)
 			break;
 	}
 
-	return index;
+	return xas.xa_index;
 }
-EXPORT_SYMBOL(page_cache_next_hole);
+EXPORT_SYMBOL(page_cache_next_miss);
 
 /**
- * page_cache_prev_hole - find the prev hole (not-present entry)
- * @mapping: mapping
- * @index: index
- * @max_scan: maximum range to search
- *
- * Search backwards in the range [max(index-max_scan+1, 0), index] for
- * the first hole.
- *
- * Returns: the index of the hole if found, otherwise returns an index
- * outside of the set specified (in which case 'index - return >=
- * max_scan' will be true). In rare cases of wrap-around, ULONG_MAX
- * will be returned.
- *
- * page_cache_prev_hole may be called under rcu_read_lock. However,
- * like radix_tree_gang_lookup, this will not atomically search a
- * snapshot of the tree at a single point in time. For example, if a
- * hole is created at index 10, then subsequently a hole is created at
- * index 5, page_cache_prev_hole covering both indexes may return 5 if
- * called under rcu_read_lock.
+ * page_cache_prev_miss() - Find the next gap in the page cache.
+ * @mapping: Mapping.
+ * @index: Index.
+ * @max_scan: Maximum range to search.
+ *
+ * Search the range [max(index - max_scan + 1, 0), index] for the
+ * gap with the highest index.
+ *
+ * This function may be called under the rcu_read_lock.  However, this will
+ * not atomically search a snapshot of the cache at a single point in time.
+ * For example, if a gap is created at index 10, then subsequently a gap is
+ * created at index 5, page_cache_prev_miss() covering both indices may
+ * return 5 if called under the rcu_read_lock.
+ *
+ * Return: The index of the gap if found, otherwise an index outside the
+ * range specified (in which case 'index - return >= max_scan' will be true).
+ * In the rare case of wrap-around, ULONG_MAX will be returned.
  */
-pgoff_t page_cache_prev_hole(struct address_space *mapping,
+pgoff_t page_cache_prev_miss(struct address_space *mapping,
 			     pgoff_t index, unsigned long max_scan)
 {
-	unsigned long i;
-
-	for (i = 0; i < max_scan; i++) {
-		struct page *page;
+	XA_STATE(xas, &mapping->i_pages, index);
 
-		page = radix_tree_lookup(&mapping->i_pages, index);
-		if (!page || xa_is_value(page))
+	while (max_scan--) {
+		void *entry = xas_prev(&xas);
+		if (!entry || xa_is_value(entry))
 			break;
-		index--;
-		if (index == ULONG_MAX)
+		if (xas.xa_index == ULONG_MAX)
 			break;
 	}
 
-	return index;
+	return xas.xa_index;
 }
-EXPORT_SYMBOL(page_cache_prev_hole);
+EXPORT_SYMBOL(page_cache_prev_miss);
 
 /**
  * find_get_entry - find and get a page cache entry
diff --git a/mm/readahead.c b/mm/readahead.c
index de657077d41d..fc4dd364b37a 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -336,7 +336,7 @@ static pgoff_t count_history_pages(struct address_space *mapping,
 	pgoff_t head;
 
 	rcu_read_lock();
-	head = page_cache_prev_hole(mapping, offset - 1, max);
+	head = page_cache_prev_miss(mapping, offset - 1, max);
 	rcu_read_unlock();
 
 	return offset - 1 - head;
@@ -425,7 +425,7 @@ ondemand_readahead(struct address_space *mapping,
 		pgoff_t start;
 
 		rcu_read_lock();
-		start = page_cache_next_hole(mapping, offset + 1, max_pages);
+		start = page_cache_next_miss(mapping, offset + 1, max_pages);
 		rcu_read_unlock();
 
 		if (!start || start - offset > max_pages)
-- 
cgit v1.2.3-71-gd317


From 74d609585d8bd6083bd9d75bc1fd2c0d3851bcc5 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 17 Nov 2017 10:01:45 -0500
Subject: page cache: Add and replace pages using the XArray

Use the XArray APIs to add and replace pages in the page cache.  This
removes two uses of the radix tree preload API and is significantly
shorter code.  It also removes the last user of __radix_tree_create()
outside radix-tree.c itself, so make it static.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h |   3 -
 include/linux/swap.h       |   8 ++-
 lib/radix-tree.c           |   6 +-
 mm/filemap.c               | 139 +++++++++++++++++++--------------------------
 4 files changed, 66 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 15388b7e38b9..d55de428a589 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -231,9 +231,6 @@ static inline int radix_tree_exception(void *arg)
 	return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
 }
 
-int __radix_tree_create(struct radix_tree_root *, unsigned long index,
-			unsigned order, struct radix_tree_node **nodep,
-			void __rcu ***slotp);
 int __radix_tree_insert(struct radix_tree_root *, unsigned long index,
 			unsigned order, void *);
 static inline int radix_tree_insert(struct radix_tree_root *root,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 8e2c11e692ba..6ea50cf41b4c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -299,8 +299,12 @@ void *workingset_eviction(struct address_space *mapping, struct page *page);
 bool workingset_refault(void *shadow);
 void workingset_activation(struct page *page);
 
-/* Do not use directly, use workingset_lookup_update */
-void workingset_update_node(struct radix_tree_node *node);
+/* Only track the nodes of mappings with shadow entries */
+void workingset_update_node(struct xa_node *node);
+#define mapping_set_update(xas, mapping) do {				\
+	if (!dax_mapping(mapping) && !shmem_mapping(mapping))		\
+		xas_set_update(xas, workingset_update_node);		\
+} while (0)
 
 /* Returns workingset_update_node() if the mapping has shadow entries. */
 #define workingset_lookup_update(mapping)				\
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 68702061464f..8a58051eb5b3 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -700,9 +700,9 @@ static bool delete_node(struct radix_tree_root *root,
  *
  *	Returns -ENOMEM, or 0 for success.
  */
-int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
-			unsigned order, struct radix_tree_node **nodep,
-			void __rcu ***slotp)
+static int __radix_tree_create(struct radix_tree_root *root,
+		unsigned long index, unsigned order,
+		struct radix_tree_node **nodep, void __rcu ***slotp)
 {
 	struct radix_tree_node *node = NULL, *child;
 	void __rcu **slot = (void __rcu **)&root->xa_head;
diff --git a/mm/filemap.c b/mm/filemap.c
index 714d3d0f60f5..b63caebd1367 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -111,35 +111,6 @@
  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
  */
 
-static int page_cache_tree_insert(struct address_space *mapping,
-				  struct page *page, void **shadowp)
-{
-	struct radix_tree_node *node;
-	void **slot;
-	int error;
-
-	error = __radix_tree_create(&mapping->i_pages, page->index, 0,
-				    &node, &slot);
-	if (error)
-		return error;
-	if (*slot) {
-		void *p;
-
-		p = radix_tree_deref_slot_protected(slot,
-						    &mapping->i_pages.xa_lock);
-		if (!xa_is_value(p))
-			return -EEXIST;
-
-		mapping->nrexceptional--;
-		if (shadowp)
-			*shadowp = p;
-	}
-	__radix_tree_replace(&mapping->i_pages, node, slot, page,
-			     workingset_lookup_update(mapping));
-	mapping->nrpages++;
-	return 0;
-}
-
 static void page_cache_tree_delete(struct address_space *mapping,
 				   struct page *page, void *shadow)
 {
@@ -775,51 +746,44 @@ EXPORT_SYMBOL(file_write_and_wait_range);
  * locked.  This function does not add the new page to the LRU, the
  * caller must do that.
  *
- * The remove + add is atomic.  The only way this function can fail is
- * memory allocation failure.
+ * The remove + add is atomic.  This function cannot fail.
  */
 int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 {
-	int error;
+	struct address_space *mapping = old->mapping;
+	void (*freepage)(struct page *) = mapping->a_ops->freepage;
+	pgoff_t offset = old->index;
+	XA_STATE(xas, &mapping->i_pages, offset);
+	unsigned long flags;
 
 	VM_BUG_ON_PAGE(!PageLocked(old), old);
 	VM_BUG_ON_PAGE(!PageLocked(new), new);
 	VM_BUG_ON_PAGE(new->mapping, new);
 
-	error = radix_tree_preload(gfp_mask & GFP_RECLAIM_MASK);
-	if (!error) {
-		struct address_space *mapping = old->mapping;
-		void (*freepage)(struct page *);
-		unsigned long flags;
-
-		pgoff_t offset = old->index;
-		freepage = mapping->a_ops->freepage;
+	get_page(new);
+	new->mapping = mapping;
+	new->index = offset;
 
-		get_page(new);
-		new->mapping = mapping;
-		new->index = offset;
+	xas_lock_irqsave(&xas, flags);
+	xas_store(&xas, new);
 
-		xa_lock_irqsave(&mapping->i_pages, flags);
-		__delete_from_page_cache(old, NULL);
-		error = page_cache_tree_insert(mapping, new, NULL);
-		BUG_ON(error);
-
-		/*
-		 * hugetlb pages do not participate in page cache accounting.
-		 */
-		if (!PageHuge(new))
-			__inc_node_page_state(new, NR_FILE_PAGES);
-		if (PageSwapBacked(new))
-			__inc_node_page_state(new, NR_SHMEM);
-		xa_unlock_irqrestore(&mapping->i_pages, flags);
-		mem_cgroup_migrate(old, new);
-		radix_tree_preload_end();
-		if (freepage)
-			freepage(old);
-		put_page(old);
-	}
+	old->mapping = NULL;
+	/* hugetlb pages do not participate in page cache accounting. */
+	if (!PageHuge(old))
+		__dec_node_page_state(new, NR_FILE_PAGES);
+	if (!PageHuge(new))
+		__inc_node_page_state(new, NR_FILE_PAGES);
+	if (PageSwapBacked(old))
+		__dec_node_page_state(new, NR_SHMEM);
+	if (PageSwapBacked(new))
+		__inc_node_page_state(new, NR_SHMEM);
+	xas_unlock_irqrestore(&xas, flags);
+	mem_cgroup_migrate(old, new);
+	if (freepage)
+		freepage(old);
+	put_page(old);
 
-	return error;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(replace_page_cache_page);
 
@@ -828,12 +792,15 @@ static int __add_to_page_cache_locked(struct page *page,
 				      pgoff_t offset, gfp_t gfp_mask,
 				      void **shadowp)
 {
+	XA_STATE(xas, &mapping->i_pages, offset);
 	int huge = PageHuge(page);
 	struct mem_cgroup *memcg;
 	int error;
+	void *old;
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(PageSwapBacked(page), page);
+	mapping_set_update(&xas, mapping);
 
 	if (!huge) {
 		error = mem_cgroup_try_charge(page, current->mm,
@@ -842,39 +809,47 @@ static int __add_to_page_cache_locked(struct page *page,
 			return error;
 	}
 
-	error = radix_tree_maybe_preload(gfp_mask & GFP_RECLAIM_MASK);
-	if (error) {
-		if (!huge)
-			mem_cgroup_cancel_charge(page, memcg, false);
-		return error;
-	}
-
 	get_page(page);
 	page->mapping = mapping;
 	page->index = offset;
 
-	xa_lock_irq(&mapping->i_pages);
-	error = page_cache_tree_insert(mapping, page, shadowp);
-	radix_tree_preload_end();
-	if (unlikely(error))
-		goto err_insert;
+	do {
+		xas_lock_irq(&xas);
+		old = xas_load(&xas);
+		if (old && !xa_is_value(old))
+			xas_set_err(&xas, -EEXIST);
+		xas_store(&xas, page);
+		if (xas_error(&xas))
+			goto unlock;
+
+		if (xa_is_value(old)) {
+			mapping->nrexceptional--;
+			if (shadowp)
+				*shadowp = old;
+		}
+		mapping->nrpages++;
+
+		/* hugetlb pages do not participate in page cache accounting */
+		if (!huge)
+			__inc_node_page_state(page, NR_FILE_PAGES);
+unlock:
+		xas_unlock_irq(&xas);
+	} while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
+
+	if (xas_error(&xas))
+		goto error;
 
-	/* hugetlb pages do not participate in page cache accounting. */
-	if (!huge)
-		__inc_node_page_state(page, NR_FILE_PAGES);
-	xa_unlock_irq(&mapping->i_pages);
 	if (!huge)
 		mem_cgroup_commit_charge(page, memcg, false, false);
 	trace_mm_filemap_add_to_page_cache(page);
 	return 0;
-err_insert:
+error:
 	page->mapping = NULL;
 	/* Leave page->index set: truncation relies upon it */
-	xa_unlock_irq(&mapping->i_pages);
 	if (!huge)
 		mem_cgroup_cancel_charge(page, memcg, false);
 	put_page(page);
-	return error;
+	return xas_error(&xas);
 }
 
 /**
-- 
cgit v1.2.3-71-gd317


From 3ece58a270cd1e5026282abe778bd50db7a11d08 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 16 May 2018 18:00:33 -0400
Subject: page cache: Convert find_get_pages_contig to XArray

There's no direct replacement for radix_tree_for_each_contig()
in the XArray API as it's an unusual thing to do.  Instead,
open-code a loop using xas_next().  This removes the only user of
radix_tree_for_each_contig() so delete the iterator from the API and
the test suite code for it.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 .clang-format                          |  1 -
 include/linux/radix-tree.h             | 17 -----------
 mm/filemap.c                           | 53 ++++++++++++++--------------------
 tools/testing/radix-tree/regression3.c | 23 ---------------
 4 files changed, 22 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/.clang-format b/.clang-format
index 1d5da22e0ba5..e6080f5834a3 100644
--- a/.clang-format
+++ b/.clang-format
@@ -323,7 +323,6 @@ ForEachMacros:
   - 'protocol_for_each_card'
   - 'protocol_for_each_dev'
   - 'queue_for_each_hw_ctx'
-  - 'radix_tree_for_each_contig'
   - 'radix_tree_for_each_slot'
   - 'radix_tree_for_each_tagged'
   - 'rbtree_postorder_for_each_entry_safe'
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index d55de428a589..023e888e1163 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -522,23 +522,6 @@ static __always_inline void __rcu **radix_tree_next_slot(void __rcu **slot,
 	     slot || (slot = radix_tree_next_chunk(root, iter, 0)) ;	\
 	     slot = radix_tree_next_slot(slot, iter, 0))
 
-/**
- * radix_tree_for_each_contig - iterate over contiguous slots
- *
- * @slot:	the void** variable for pointer to slot
- * @root:	the struct radix_tree_root pointer
- * @iter:	the struct radix_tree_iter pointer
- * @start:	iteration starting index
- *
- * @slot points to radix tree slot, @iter->index contains its index.
- */
-#define radix_tree_for_each_contig(slot, root, iter, start)		\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter,		\
-				RADIX_TREE_ITER_CONTIG)) ;		\
-	     slot = radix_tree_next_slot(slot, iter,			\
-				RADIX_TREE_ITER_CONTIG))
-
 /**
  * radix_tree_for_each_tagged - iterate over tagged slots
  *
diff --git a/mm/filemap.c b/mm/filemap.c
index b72c39fe61c2..089b67598100 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1721,57 +1721,43 @@ out:
 unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 			       unsigned int nr_pages, struct page **pages)
 {
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, &mapping->i_pages, index);
+	struct page *page;
 	unsigned int ret = 0;
 
 	if (unlikely(!nr_pages))
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, index) {
-		struct page *head, *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		/* The hole, there no reason to continue */
-		if (unlikely(!page))
-			break;
-
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-			/*
-			 * A shadow entry of a recently evicted page,
-			 * or a swap entry from shmem/tmpfs.  Stop
-			 * looking for contiguous pages.
-			 */
+	for (page = xas_load(&xas); page; page = xas_next(&xas)) {
+		struct page *head;
+		if (xas_retry(&xas, page))
+			continue;
+		/*
+		 * If the entry has been swapped out, we can stop looking.
+		 * No current caller is looking for DAX entries.
+		 */
+		if (xa_is_value(page))
 			break;
-		}
 
 		head = compound_head(page);
 		if (!page_cache_get_speculative(head))
-			goto repeat;
+			goto retry;
 
 		/* The page was split under us? */
-		if (compound_head(page) != head) {
-			put_page(head);
-			goto repeat;
-		}
+		if (compound_head(page) != head)
+			goto put_page;
 
 		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			put_page(head);
-			goto repeat;
-		}
+		if (unlikely(page != xas_reload(&xas)))
+			goto put_page;
 
 		/*
 		 * must check mapping and index after taking the ref.
 		 * otherwise we can get both false positives and false
 		 * negatives, which is just confusing to the caller.
 		 */
-		if (page->mapping == NULL || page_to_pgoff(page) != iter.index) {
+		if (!page->mapping || page_to_pgoff(page) != xas.xa_index) {
 			put_page(page);
 			break;
 		}
@@ -1779,6 +1765,11 @@ repeat:
 		pages[ret] = page;
 		if (++ret == nr_pages)
 			break;
+		continue;
+put_page:
+		put_page(head);
+retry:
+		xas_reset(&xas);
 	}
 	rcu_read_unlock();
 	return ret;
diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c
index ace2543c3eda..9f9a3b280f56 100644
--- a/tools/testing/radix-tree/regression3.c
+++ b/tools/testing/radix-tree/regression3.c
@@ -69,21 +69,6 @@ void regression3_test(void)
 			continue;
 		}
 	}
-	radix_tree_delete(&root, 1);
-
-	first = true;
-	radix_tree_for_each_contig(slot, &root, &iter, 0) {
-		printv(2, "contig %ld %p\n", iter.index, *slot);
-		if (first) {
-			radix_tree_insert(&root, 1, ptr);
-			first = false;
-		}
-		if (radix_tree_deref_retry(*slot)) {
-			printv(2, "retry at %ld\n", iter.index);
-			slot = radix_tree_iter_retry(&iter);
-			continue;
-		}
-	}
 
 	radix_tree_for_each_slot(slot, &root, &iter, 0) {
 		printv(2, "slot %ld %p\n", iter.index, *slot);
@@ -93,14 +78,6 @@ void regression3_test(void)
 		}
 	}
 
-	radix_tree_for_each_contig(slot, &root, &iter, 0) {
-		printv(2, "contig %ld %p\n", iter.index, *slot);
-		if (!iter.index) {
-			printv(2, "next at %ld\n", iter.index);
-			slot = radix_tree_iter_resume(slot, &iter);
-		}
-	}
-
 	radix_tree_tag_set(&root, 0, 0);
 	radix_tree_tag_set(&root, 1, 0);
 	radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
-- 
cgit v1.2.3-71-gd317


From a6906972fe67fb6f73ba04088f7897227fd1cd8f Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 16 May 2018 18:12:54 -0400
Subject: page cache; Convert find_get_pages_range_tag to XArray

The 'end' parameter of the xas_for_each iterator avoids a useless
iteration at the end of the range.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/pagemap.h |  4 +--
 mm/filemap.c            | 68 +++++++++++++++++++------------------------------
 2 files changed, 28 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cf9ad413eee9..c9bbb9a05764 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -363,10 +363,10 @@ static inline unsigned find_get_pages(struct address_space *mapping,
 unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
 			       unsigned int nr_pages, struct page **pages);
 unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
-			pgoff_t end, int tag, unsigned int nr_pages,
+			pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
 			struct page **pages);
 static inline unsigned find_get_pages_tag(struct address_space *mapping,
-			pgoff_t *index, int tag, unsigned int nr_pages,
+			pgoff_t *index, xa_mark_t tag, unsigned int nr_pages,
 			struct page **pages)
 {
 	return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag,
diff --git a/mm/filemap.c b/mm/filemap.c
index 089b67598100..0df28aa6411c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1789,74 +1789,58 @@ EXPORT_SYMBOL(find_get_pages_contig);
  * @tag.   We update @index to index the next page for the traversal.
  */
 unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
-			pgoff_t end, int tag, unsigned int nr_pages,
+			pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
 			struct page **pages)
 {
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, &mapping->i_pages, *index);
+	struct page *page;
 	unsigned ret = 0;
 
 	if (unlikely(!nr_pages))
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, *index, tag) {
-		struct page *head, *page;
-
-		if (iter.index > end)
-			break;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
+	xas_for_each_marked(&xas, page, end, tag) {
+		struct page *head;
+		if (xas_retry(&xas, page))
 			continue;
-
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-			/*
-			 * A shadow entry of a recently evicted page.
-			 *
-			 * Those entries should never be tagged, but
-			 * this tree walk is lockless and the tags are
-			 * looked up in bulk, one radix tree node at a
-			 * time, so there is a sizable window for page
-			 * reclaim to evict a page we saw tagged.
-			 *
-			 * Skip over it.
-			 */
+		/*
+		 * Shadow entries should never be tagged, but this iteration
+		 * is lockless so there is a window for page reclaim to evict
+		 * a page we saw tagged.  Skip over it.
+		 */
+		if (xa_is_value(page))
 			continue;
-		}
 
 		head = compound_head(page);
 		if (!page_cache_get_speculative(head))
-			goto repeat;
+			goto retry;
 
 		/* The page was split under us? */
-		if (compound_head(page) != head) {
-			put_page(head);
-			goto repeat;
-		}
+		if (compound_head(page) != head)
+			goto put_page;
 
 		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			put_page(head);
-			goto repeat;
-		}
+		if (unlikely(page != xas_reload(&xas)))
+			goto put_page;
 
 		pages[ret] = page;
 		if (++ret == nr_pages) {
-			*index = pages[ret - 1]->index + 1;
+			*index = page->index + 1;
 			goto out;
 		}
+		continue;
+put_page:
+		put_page(head);
+retry:
+		xas_reset(&xas);
 	}
 
 	/*
-	 * We come here when we got at @end. We take care to not overflow the
+	 * We come here when we got to @end. We take care to not overflow the
 	 * index @index as it confuses some of the callers. This breaks the
-	 * iteration when there is page at index -1 but that is already broken
-	 * anyway.
+	 * iteration when there is a page at index -1 but that is already
+	 * broken anyway.
 	 */
 	if (end == (pgoff_t)-1)
 		*index = (pgoff_t)-1;
-- 
cgit v1.2.3-71-gd317


From c1901cd33cf407d77a181f8dd4ffff98041ef480 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 16 May 2018 23:56:04 -0400
Subject: page cache: Convert find_get_entries_tag to XArray

Slightly shorter and simpler code.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/pagemap.h |  2 +-
 mm/filemap.c            | 54 ++++++++++++++++++++++---------------------------
 2 files changed, 25 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c9bbb9a05764..226f96f0dee0 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -373,7 +373,7 @@ static inline unsigned find_get_pages_tag(struct address_space *mapping,
 					nr_pages, pages);
 }
 unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-			int tag, unsigned int nr_entries,
+			xa_mark_t tag, unsigned int nr_entries,
 			struct page **entries, pgoff_t *indices);
 
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
diff --git a/mm/filemap.c b/mm/filemap.c
index 0df28aa6411c..35ae011971e1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1866,57 +1866,51 @@ EXPORT_SYMBOL(find_get_pages_range_tag);
  * @tag.
  */
 unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-			int tag, unsigned int nr_entries,
+			xa_mark_t tag, unsigned int nr_entries,
 			struct page **entries, pgoff_t *indices)
 {
-	void **slot;
+	XA_STATE(xas, &mapping->i_pages, start);
+	struct page *page;
 	unsigned int ret = 0;
-	struct radix_tree_iter iter;
 
 	if (!nr_entries)
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start, tag) {
-		struct page *head, *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
+	xas_for_each_marked(&xas, page, ULONG_MAX, tag) {
+		struct page *head;
+		if (xas_retry(&xas, page))
 			continue;
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-
-			/*
-			 * A shadow entry of a recently evicted page, a swap
-			 * entry from shmem/tmpfs or a DAX entry.  Return it
-			 * without attempting to raise page count.
-			 */
+		/*
+		 * A shadow entry of a recently evicted page, a swap
+		 * entry from shmem/tmpfs or a DAX entry.  Return it
+		 * without attempting to raise page count.
+		 */
+		if (xa_is_value(page))
 			goto export;
-		}
 
 		head = compound_head(page);
 		if (!page_cache_get_speculative(head))
-			goto repeat;
+			goto retry;
 
 		/* The page was split under us? */
-		if (compound_head(page) != head) {
-			put_page(head);
-			goto repeat;
-		}
+		if (compound_head(page) != head)
+			goto put_page;
 
 		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			put_page(head);
-			goto repeat;
-		}
+		if (unlikely(page != xas_reload(&xas)))
+			goto put_page;
+
 export:
-		indices[ret] = iter.index;
+		indices[ret] = xas.xa_index;
 		entries[ret] = page;
 		if (++ret == nr_entries)
 			break;
+		continue;
+put_page:
+		put_page(head);
+retry:
+		xas_reset(&xas);
 	}
 	rcu_read_unlock();
 	return ret;
-- 
cgit v1.2.3-71-gd317


From ff9c745b81ff1e482167fd73558450e66ad43a33 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 22 Nov 2017 11:41:23 -0500
Subject: mm: Convert page-writeback to XArray

Includes moving mapping_tagged() to fs.h as a static inline, and
changing it to return bool.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/fs.h  | 17 +++++++------
 mm/page-writeback.c | 72 +++++++++++++++++++----------------------------------
 2 files changed, 36 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d126cad0f621..e10278e4db66 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -483,15 +483,18 @@ struct block_device {
 	struct mutex		bd_fsfreeze_mutex;
 } __randomize_layout;
 
+/* XArray tags, for tagging dirty and writeback pages in the pagecache. */
+#define PAGECACHE_TAG_DIRTY	XA_MARK_0
+#define PAGECACHE_TAG_WRITEBACK	XA_MARK_1
+#define PAGECACHE_TAG_TOWRITE	XA_MARK_2
+
 /*
- * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
- * radix trees
+ * Returns true if any of the pages in the mapping are marked with the tag.
  */
-#define PAGECACHE_TAG_DIRTY	0
-#define PAGECACHE_TAG_WRITEBACK	1
-#define PAGECACHE_TAG_TOWRITE	2
-
-int mapping_tagged(struct address_space *mapping, int tag);
+static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag)
+{
+	return xa_marked(&mapping->i_pages, tag);
+}
 
 static inline void i_mmap_lock_write(struct address_space *mapping)
 {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 84ae9bf5858a..fc6e5743b0bf 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2097,34 +2097,25 @@ void __init page_writeback_init(void)
  * dirty pages in the file (thus it is important for this function to be quick
  * so that it can tag pages faster than a dirtying process can create them).
  */
-/*
- * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce the i_pages lock
- * latency.
- */
 void tag_pages_for_writeback(struct address_space *mapping,
 			     pgoff_t start, pgoff_t end)
 {
-#define WRITEBACK_TAG_BATCH 4096
-	unsigned long tagged = 0;
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, &mapping->i_pages, start);
+	unsigned int tagged = 0;
+	void *page;
 
-	xa_lock_irq(&mapping->i_pages);
-	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start,
-							PAGECACHE_TAG_DIRTY) {
-		if (iter.index > end)
-			break;
-		radix_tree_iter_tag_set(&mapping->i_pages, &iter,
-							PAGECACHE_TAG_TOWRITE);
-		tagged++;
-		if ((tagged % WRITEBACK_TAG_BATCH) != 0)
+	xas_lock_irq(&xas);
+	xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
+		xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
+		if (++tagged % XA_CHECK_SCHED)
 			continue;
-		slot = radix_tree_iter_resume(slot, &iter);
-		xa_unlock_irq(&mapping->i_pages);
+
+		xas_pause(&xas);
+		xas_unlock_irq(&xas);
 		cond_resched();
-		xa_lock_irq(&mapping->i_pages);
+		xas_lock_irq(&xas);
 	}
-	xa_unlock_irq(&mapping->i_pages);
+	xas_unlock_irq(&xas);
 }
 EXPORT_SYMBOL(tag_pages_for_writeback);
 
@@ -2164,7 +2155,7 @@ int write_cache_pages(struct address_space *mapping,
 	pgoff_t done_index;
 	int cycled;
 	int range_whole = 0;
-	int tag;
+	xa_mark_t tag;
 
 	pagevec_init(&pvec);
 	if (wbc->range_cyclic) {
@@ -2445,7 +2436,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
 
 /*
  * For address_spaces which do not use buffers.  Just tag the page as dirty in
- * its radix tree.
+ * the xarray.
  *
  * This is also used when a single buffer is being dirtied: we want to set the
  * page dirty in that case, but not all the buffers.  This is a "bottom-up"
@@ -2471,7 +2462,7 @@ int __set_page_dirty_nobuffers(struct page *page)
 		BUG_ON(page_mapping(page) != mapping);
 		WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
 		account_page_dirtied(page, mapping);
-		radix_tree_tag_set(&mapping->i_pages, page_index(page),
+		__xa_set_mark(&mapping->i_pages, page_index(page),
 				   PAGECACHE_TAG_DIRTY);
 		xa_unlock_irqrestore(&mapping->i_pages, flags);
 		unlock_page_memcg(page);
@@ -2634,13 +2625,13 @@ EXPORT_SYMBOL(__cancel_dirty_page);
  * Returns true if the page was previously dirty.
  *
  * This is for preparing to put the page under writeout.  We leave the page
- * tagged as dirty in the radix tree so that a concurrent write-for-sync
+ * tagged as dirty in the xarray so that a concurrent write-for-sync
  * can discover it via a PAGECACHE_TAG_DIRTY walk.  The ->writepage
  * implementation will run either set_page_writeback() or set_page_dirty(),
- * at which stage we bring the page's dirty flag and radix-tree dirty tag
+ * at which stage we bring the page's dirty flag and xarray dirty tag
  * back into sync.
  *
- * This incoherency between the page's dirty flag and radix-tree tag is
+ * This incoherency between the page's dirty flag and xarray tag is
  * unfortunate, but it only exists while the page is locked.
  */
 int clear_page_dirty_for_io(struct page *page)
@@ -2721,7 +2712,7 @@ int test_clear_page_writeback(struct page *page)
 		xa_lock_irqsave(&mapping->i_pages, flags);
 		ret = TestClearPageWriteback(page);
 		if (ret) {
-			radix_tree_tag_clear(&mapping->i_pages, page_index(page),
+			__xa_clear_mark(&mapping->i_pages, page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
 			if (bdi_cap_account_writeback(bdi)) {
 				struct bdi_writeback *wb = inode_to_wb(inode);
@@ -2761,11 +2752,13 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 
 	lock_page_memcg(page);
 	if (mapping && mapping_use_writeback_tags(mapping)) {
+		XA_STATE(xas, &mapping->i_pages, page_index(page));
 		struct inode *inode = mapping->host;
 		struct backing_dev_info *bdi = inode_to_bdi(inode);
 		unsigned long flags;
 
-		xa_lock_irqsave(&mapping->i_pages, flags);
+		xas_lock_irqsave(&xas, flags);
+		xas_load(&xas);
 		ret = TestSetPageWriteback(page);
 		if (!ret) {
 			bool on_wblist;
@@ -2773,8 +2766,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 			on_wblist = mapping_tagged(mapping,
 						   PAGECACHE_TAG_WRITEBACK);
 
-			radix_tree_tag_set(&mapping->i_pages, page_index(page),
-						PAGECACHE_TAG_WRITEBACK);
+			xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
 			if (bdi_cap_account_writeback(bdi))
 				inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
 
@@ -2787,12 +2779,10 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 				sb_mark_inode_writeback(mapping->host);
 		}
 		if (!PageDirty(page))
-			radix_tree_tag_clear(&mapping->i_pages, page_index(page),
-						PAGECACHE_TAG_DIRTY);
+			xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
 		if (!keep_write)
-			radix_tree_tag_clear(&mapping->i_pages, page_index(page),
-						PAGECACHE_TAG_TOWRITE);
-		xa_unlock_irqrestore(&mapping->i_pages, flags);
+			xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
+		xas_unlock_irqrestore(&xas, flags);
 	} else {
 		ret = TestSetPageWriteback(page);
 	}
@@ -2806,16 +2796,6 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 }
 EXPORT_SYMBOL(__test_set_page_writeback);
 
-/*
- * Return true if any of the pages in the mapping are marked with the
- * passed tag.
- */
-int mapping_tagged(struct address_space *mapping, int tag)
-{
-	return radix_tree_tagged(&mapping->i_pages, tag);
-}
-EXPORT_SYMBOL(mapping_tagged);
-
 /**
  * wait_for_stable_page() - wait for writeback to finish, if necessary.
  * @page:	The page to wait on.
-- 
cgit v1.2.3-71-gd317


From a97e7904c0806309fd77103005bb7820c3f1c5e4 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 24 Nov 2017 14:24:59 -0500
Subject: mm: Convert workingset to XArray

We construct an XA_STATE and use it to delete the node with
xas_store() rather than adding a special function for this unique
use case.  Includes a test that simulates this usage for the
test suite.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/swap.h |  9 --------
 lib/test_xarray.c    | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/workingset.c      | 51 +++++++++++++++++------------------------
 3 files changed, 86 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 6ea50cf41b4c..112cebcf0402 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -306,15 +306,6 @@ void workingset_update_node(struct xa_node *node);
 		xas_set_update(xas, workingset_update_node);		\
 } while (0)
 
-/* Returns workingset_update_node() if the mapping has shadow entries. */
-#define workingset_lookup_update(mapping)				\
-({									\
-	radix_tree_update_node_t __helper = workingset_update_node;	\
-	if (dax_mapping(mapping) || shmem_mapping(mapping))		\
-		__helper = NULL;					\
-	__helper;							\
-})
-
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index a752e6a37e6f..128c6489082f 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -863,6 +863,67 @@ static noinline void check_create_range(struct xarray *xa)
 	check_create_range_3();
 }
 
+static LIST_HEAD(shadow_nodes);
+
+static void test_update_node(struct xa_node *node)
+{
+	if (node->count && node->count == node->nr_values) {
+		if (list_empty(&node->private_list))
+			list_add(&shadow_nodes, &node->private_list);
+	} else {
+		if (!list_empty(&node->private_list))
+			list_del_init(&node->private_list);
+	}
+}
+
+static noinline void shadow_remove(struct xarray *xa)
+{
+	struct xa_node *node;
+
+	xa_lock(xa);
+	while ((node = list_first_entry_or_null(&shadow_nodes,
+					struct xa_node, private_list))) {
+		XA_STATE(xas, node->array, 0);
+		XA_BUG_ON(xa, node->array != xa);
+		list_del_init(&node->private_list);
+		xas.xa_node = xa_parent_locked(node->array, node);
+		xas.xa_offset = node->offset;
+		xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
+		xas_set_update(&xas, test_update_node);
+		xas_store(&xas, NULL);
+	}
+	xa_unlock(xa);
+}
+
+static noinline void check_workingset(struct xarray *xa, unsigned long index)
+{
+	XA_STATE(xas, xa, index);
+	xas_set_update(&xas, test_update_node);
+
+	do {
+		xas_lock(&xas);
+		xas_store(&xas, xa_mk_value(0));
+		xas_next(&xas);
+		xas_store(&xas, xa_mk_value(1));
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, GFP_KERNEL));
+
+	XA_BUG_ON(xa, list_empty(&shadow_nodes));
+
+	xas_lock(&xas);
+	xas_next(&xas);
+	xas_store(&xas, &xas);
+	XA_BUG_ON(xa, !list_empty(&shadow_nodes));
+
+	xas_store(&xas, xa_mk_value(2));
+	xas_unlock(&xas);
+	XA_BUG_ON(xa, list_empty(&shadow_nodes));
+
+	shadow_remove(xa);
+	XA_BUG_ON(xa, !list_empty(&shadow_nodes));
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
 static noinline void check_destroy(struct xarray *xa)
 {
 	unsigned long index;
@@ -916,6 +977,10 @@ static int xarray_checks(void)
 	check_create_range(&array);
 	check_store_iter(&array);
 
+	check_workingset(&array, 0);
+	check_workingset(&array, 64);
+	check_workingset(&array, 4096);
+
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
 }
diff --git a/mm/workingset.c b/mm/workingset.c
index c201cbb8c00f..5cfb29ec3fd9 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -148,7 +148,7 @@
  * and activations is maintained (node->inactive_age).
  *
  * On eviction, a snapshot of this counter (along with some bits to
- * identify the node) is stored in the now empty page cache radix tree
+ * identify the node) is stored in the now empty page cache
  * slot of the evicted page.  This is called a shadow entry.
  *
  * On cache misses for which there are shadow entries, an eligible
@@ -162,7 +162,7 @@
 
 /*
  * Eviction timestamps need to be able to cover the full range of
- * actionable refaults. However, bits are tight in the radix tree
+ * actionable refaults. However, bits are tight in the xarray
  * entry, and after storing the identifier for the lruvec there might
  * not be enough left to represent every single actionable refault. In
  * that case, we have to sacrifice granularity for distance, and group
@@ -339,7 +339,7 @@ out:
 
 static struct list_lru shadow_nodes;
 
-void workingset_update_node(struct radix_tree_node *node)
+void workingset_update_node(struct xa_node *node)
 {
 	/*
 	 * Track non-empty nodes that contain only shadow entries;
@@ -368,7 +368,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 	nodes = list_lru_shrink_count(&shadow_nodes, sc);
 
 	/*
-	 * Approximate a reasonable limit for the radix tree nodes
+	 * Approximate a reasonable limit for the nodes
 	 * containing shadow entries. We don't need to keep more
 	 * shadow entries than possible pages on the active list,
 	 * since refault distances bigger than that are dismissed.
@@ -383,11 +383,11 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 	 * worst-case density of 1/8th. Below that, not all eligible
 	 * refaults can be detected anymore.
 	 *
-	 * On 64-bit with 7 radix_tree_nodes per page and 64 slots
+	 * On 64-bit with 7 xa_nodes per page and 64 slots
 	 * each, this will reclaim shadow entries when they consume
 	 * ~1.8% of available memory:
 	 *
-	 * PAGE_SIZE / radix_tree_nodes / node_entries * 8 / PAGE_SIZE
+	 * PAGE_SIZE / xa_nodes / node_entries * 8 / PAGE_SIZE
 	 */
 	if (sc->memcg) {
 		cache = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
@@ -396,7 +396,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 		cache = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) +
 			node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE);
 	}
-	max_nodes = cache >> (RADIX_TREE_MAP_SHIFT - 3);
+	max_nodes = cache >> (XA_CHUNK_SHIFT - 3);
 
 	if (!nodes)
 		return SHRINK_EMPTY;
@@ -409,11 +409,11 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 static enum lru_status shadow_lru_isolate(struct list_head *item,
 					  struct list_lru_one *lru,
 					  spinlock_t *lru_lock,
-					  void *arg)
+					  void *arg) __must_hold(lru_lock)
 {
+	struct xa_node *node = container_of(item, struct xa_node, private_list);
+	XA_STATE(xas, node->array, 0);
 	struct address_space *mapping;
-	struct radix_tree_node *node;
-	unsigned int i;
 	int ret;
 
 	/*
@@ -421,14 +421,13 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	 * the shadow node LRU under the i_pages lock and the
 	 * lru_lock.  Because the page cache tree is emptied before
 	 * the inode can be destroyed, holding the lru_lock pins any
-	 * address_space that has radix tree nodes on the LRU.
+	 * address_space that has nodes on the LRU.
 	 *
 	 * We can then safely transition to the i_pages lock to
 	 * pin only the address_space of the particular node we want
 	 * to reclaim, take the node off-LRU, and drop the lru_lock.
 	 */
 
-	node = container_of(item, struct xa_node, private_list);
 	mapping = container_of(node->array, struct address_space, i_pages);
 
 	/* Coming from the list, invert the lock order */
@@ -450,25 +449,17 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 		goto out_invalid;
 	if (WARN_ON_ONCE(node->count != node->nr_values))
 		goto out_invalid;
-	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
-		if (node->slots[i]) {
-			if (WARN_ON_ONCE(!xa_is_value(node->slots[i])))
-				goto out_invalid;
-			if (WARN_ON_ONCE(!node->nr_values))
-				goto out_invalid;
-			if (WARN_ON_ONCE(!mapping->nrexceptional))
-				goto out_invalid;
-			node->slots[i] = NULL;
-			node->nr_values--;
-			node->count--;
-			mapping->nrexceptional--;
-		}
-	}
-	if (WARN_ON_ONCE(node->nr_values))
-		goto out_invalid;
+	mapping->nrexceptional -= node->nr_values;
+	xas.xa_node = xa_parent_locked(&mapping->i_pages, node);
+	xas.xa_offset = node->offset;
+	xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
+	xas_set_update(&xas, workingset_update_node);
+	/*
+	 * We could store a shadow entry here which was the minimum of the
+	 * shadow entries we were tracking ...
+	 */
+	xas_store(&xas, NULL);
 	inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
-	__radix_tree_delete_node(&mapping->i_pages, node,
-				 workingset_lookup_update(mapping));
 
 out_invalid:
 	xa_unlock_irq(&mapping->i_pages);
-- 
cgit v1.2.3-71-gd317


From 4e17ec250fce0eba9b70a91c9622da2748a3ec50 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 29 Nov 2017 08:32:39 -0500
Subject: mm: Convert delete_from_swap_cache to XArray

Both callers of __delete_from_swap_cache have the swp_entry_t already,
so pass that in to make constructing the XA_STATE easier.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/swap.h |  5 +++--
 mm/swap_state.c      | 24 ++++++++++--------------
 mm/vmscan.c          |  2 +-
 3 files changed, 14 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 112cebcf0402..cb479bf5842e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -403,7 +403,7 @@ extern void show_swap_cache_info(void);
 extern int add_to_swap(struct page *page);
 extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
 extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
-extern void __delete_from_swap_cache(struct page *);
+extern void __delete_from_swap_cache(struct page *, swp_entry_t entry);
 extern void delete_from_swap_cache(struct page *);
 extern void free_page_and_swap_cache(struct page *);
 extern void free_pages_and_swap_cache(struct page **, int);
@@ -557,7 +557,8 @@ static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,
 	return -1;
 }
 
-static inline void __delete_from_swap_cache(struct page *page)
+static inline void __delete_from_swap_cache(struct page *page,
+							swp_entry_t entry)
 {
 }
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 850314c2c3ab..f393c994cc60 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -154,23 +154,22 @@ unlock:
  * This must be called only on pages that have
  * been verified to be in the swap cache.
  */
-void __delete_from_swap_cache(struct page *page)
+void __delete_from_swap_cache(struct page *page, swp_entry_t entry)
 {
-	struct address_space *address_space;
+	struct address_space *address_space = swap_address_space(entry);
 	int i, nr = hpage_nr_pages(page);
-	swp_entry_t entry;
-	pgoff_t idx;
+	pgoff_t idx = swp_offset(entry);
+	XA_STATE(xas, &address_space->i_pages, idx);
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
 	VM_BUG_ON_PAGE(PageWriteback(page), page);
 
-	entry.val = page_private(page);
-	address_space = swap_address_space(entry);
-	idx = swp_offset(entry);
 	for (i = 0; i < nr; i++) {
-		radix_tree_delete(&address_space->i_pages, idx + i);
+		void *entry = xas_store(&xas, NULL);
+		VM_BUG_ON_PAGE(entry != page + i, entry);
 		set_page_private(page + i, 0);
+		xas_next(&xas);
 	}
 	ClearPageSwapCache(page);
 	address_space->nrpages -= nr;
@@ -243,14 +242,11 @@ fail:
  */
 void delete_from_swap_cache(struct page *page)
 {
-	swp_entry_t entry;
-	struct address_space *address_space;
-
-	entry.val = page_private(page);
+	swp_entry_t entry = { .val = page_private(page) };
+	struct address_space *address_space = swap_address_space(entry);
 
-	address_space = swap_address_space(entry);
 	xa_lock_irq(&address_space->i_pages);
-	__delete_from_swap_cache(page);
+	__delete_from_swap_cache(page, entry);
 	xa_unlock_irq(&address_space->i_pages);
 
 	put_swap_page(page, entry);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c7ce2c161225..80f731cf974e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -923,7 +923,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 	if (PageSwapCache(page)) {
 		swp_entry_t swap = { .val = page_private(page) };
 		mem_cgroup_swapout(page, swap);
-		__delete_from_swap_cache(page);
+		__delete_from_swap_cache(page, swap);
 		xa_unlock_irqrestore(&mapping->i_pages, flags);
 		put_swap_page(page, swap);
 	} else {
-- 
cgit v1.2.3-71-gd317


From 10bbd235859bf483f9a8a4ebe95463d700bae394 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 5 Dec 2017 17:30:38 -0500
Subject: pagevec: Use xa_mark_t

Removes sparse warnings.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 fs/btrfs/extent_io.c    | 4 ++--
 fs/ext4/inode.c         | 2 +-
 fs/f2fs/data.c          | 2 +-
 fs/gfs2/aops.c          | 2 +-
 include/linux/pagevec.h | 8 +++++---
 mm/swap.c               | 4 ++--
 6 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4dd6faab02bb..fc7ca7d991ad 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3778,7 +3778,7 @@ int btree_write_cache_pages(struct address_space *mapping,
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
 	int scanned = 0;
-	int tag;
+	xa_mark_t tag;
 
 	pagevec_init(&pvec);
 	if (wbc->range_cyclic) {
@@ -3903,7 +3903,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
 	pgoff_t done_index;
 	int range_whole = 0;
 	int scanned = 0;
-	int tag;
+	xa_mark_t tag;
 
 	/*
 	 * We have to hold onto the inode so that ordered extents can do their
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d767e993591d..57bad3edfbed 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2613,7 +2613,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
 	long left = mpd->wbc->nr_to_write;
 	pgoff_t index = mpd->first_page;
 	pgoff_t end = mpd->last_page;
-	int tag;
+	xa_mark_t tag;
 	int i, err = 0;
 	int blkbits = mpd->inode->i_blkbits;
 	ext4_lblk_t lblk;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 382c1ef9a9e4..5b760809eecc 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2003,7 +2003,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 	pgoff_t last_idx = ULONG_MAX;
 	int cycled;
 	int range_whole = 0;
-	int tag;
+	xa_mark_t tag;
 
 	pagevec_init(&pvec);
 
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 31e8270d0b26..8afbb35559b9 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -366,7 +366,7 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
 	pgoff_t done_index;
 	int cycled;
 	int range_whole = 0;
-	int tag;
+	xa_mark_t tag;
 
 	pagevec_init(&pvec);
 	if (wbc->range_cyclic) {
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 6dc456ac6136..081d934eda64 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -9,6 +9,8 @@
 #ifndef _LINUX_PAGEVEC_H
 #define _LINUX_PAGEVEC_H
 
+#include <linux/xarray.h>
+
 /* 15 pointers + header align the pagevec structure to a power of two */
 #define PAGEVEC_SIZE	15
 
@@ -40,12 +42,12 @@ static inline unsigned pagevec_lookup(struct pagevec *pvec,
 
 unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
 		struct address_space *mapping, pgoff_t *index, pgoff_t end,
-		int tag);
+		xa_mark_t tag);
 unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
 		struct address_space *mapping, pgoff_t *index, pgoff_t end,
-		int tag, unsigned max_pages);
+		xa_mark_t tag, unsigned max_pages);
 static inline unsigned pagevec_lookup_tag(struct pagevec *pvec,
-		struct address_space *mapping, pgoff_t *index, int tag)
+		struct address_space *mapping, pgoff_t *index, xa_mark_t tag)
 {
 	return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag);
 }
diff --git a/mm/swap.c b/mm/swap.c
index 4c5c7fcc6e46..6861f3140a13 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1002,7 +1002,7 @@ EXPORT_SYMBOL(pagevec_lookup_range);
 
 unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
 		struct address_space *mapping, pgoff_t *index, pgoff_t end,
-		int tag)
+		xa_mark_t tag)
 {
 	pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
 					PAGEVEC_SIZE, pvec->pages);
@@ -1012,7 +1012,7 @@ EXPORT_SYMBOL(pagevec_lookup_range_tag);
 
 unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
 		struct address_space *mapping, pgoff_t *index, pgoff_t end,
-		int tag, unsigned max_pages)
+		xa_mark_t tag, unsigned max_pages)
 {
 	pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
 		min_t(unsigned int, max_pages, PAGEVEC_SIZE), pvec->pages);
-- 
cgit v1.2.3-71-gd317


From 7b8d046fba91d62beb8a8f78244aaa3c23a60cdd Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 1 Dec 2017 22:13:06 -0500
Subject: shmem: Convert shmem_alloc_hugepage to XArray

xa_find() is a slightly easier API to use than
radix_tree_gang_lookup_slot() because it contains its own RCU locking.
This commit removes the last user of radix_tree_gang_lookup_slot()
so remove the function too.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h |  6 +-----
 lib/radix-tree.c           | 44 +-------------------------------------------
 mm/shmem.c                 | 14 ++++----------
 3 files changed, 6 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 023e888e1163..8a4280bc350f 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -147,12 +147,11 @@ static inline unsigned int iter_shift(const struct radix_tree_iter *iter)
  * radix_tree_lookup_slot
  * radix_tree_tag_get
  * radix_tree_gang_lookup
- * radix_tree_gang_lookup_slot
  * radix_tree_gang_lookup_tag
  * radix_tree_gang_lookup_tag_slot
  * radix_tree_tagged
  *
- * The first 8 functions are able to be called locklessly, using RCU. The
+ * The first 7 functions are able to be called locklessly, using RCU. The
  * caller must ensure calls to these functions are made within rcu_read_lock()
  * regions. Other readers (lock-free or otherwise) and modifications may be
  * running concurrently.
@@ -263,9 +262,6 @@ void radix_tree_clear_tags(struct radix_tree_root *, struct radix_tree_node *,
 unsigned int radix_tree_gang_lookup(const struct radix_tree_root *,
 			void **results, unsigned long first_index,
 			unsigned int max_items);
-unsigned int radix_tree_gang_lookup_slot(const struct radix_tree_root *,
-			void __rcu ***results, unsigned long *indices,
-			unsigned long first_index, unsigned int max_items);
 int radix_tree_preload(gfp_t gfp_mask);
 int radix_tree_maybe_preload(gfp_t gfp_mask);
 int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8a58051eb5b3..2f9c0e45eeb7 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1097,7 +1097,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
  * @slot:	pointer to slot
  * @item:	new item to store in the slot.
  *
- * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(),
+ * For use with radix_tree_lookup_slot() and
  * radix_tree_gang_lookup_tag_slot().  Caller must hold tree write locked
  * across slot lookup and replacement.
  *
@@ -1731,48 +1731,6 @@ radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup);
 
-/**
- *	radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
- *	@root:		radix tree root
- *	@results:	where the results of the lookup are placed
- *	@indices:	where their indices should be placed (but usually NULL)
- *	@first_index:	start the lookup from this key
- *	@max_items:	place up to this many items at *results
- *
- *	Performs an index-ascending scan of the tree for present items.  Places
- *	their slots at *@results and returns the number of items which were
- *	placed at *@results.
- *
- *	The implementation is naive.
- *
- *	Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
- *	be dereferenced with radix_tree_deref_slot, and if using only RCU
- *	protection, radix_tree_deref_slot may fail requiring a retry.
- */
-unsigned int
-radix_tree_gang_lookup_slot(const struct radix_tree_root *root,
-			void __rcu ***results, unsigned long *indices,
-			unsigned long first_index, unsigned int max_items)
-{
-	struct radix_tree_iter iter;
-	void __rcu **slot;
-	unsigned int ret = 0;
-
-	if (unlikely(!max_items))
-		return 0;
-
-	radix_tree_for_each_slot(slot, root, &iter, first_index) {
-		results[ret] = slot;
-		if (indices)
-			indices[ret] = iter.index;
-		if (++ret == max_items)
-			break;
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
-
 /**
  *	radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
  *	                             based on a tag
diff --git a/mm/shmem.c b/mm/shmem.c
index 8633bd3dc433..608c9252248f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1431,23 +1431,17 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 		struct shmem_inode_info *info, pgoff_t index)
 {
 	struct vm_area_struct pvma;
-	struct inode *inode = &info->vfs_inode;
-	struct address_space *mapping = inode->i_mapping;
-	pgoff_t idx, hindex;
-	void __rcu **results;
+	struct address_space *mapping = info->vfs_inode.i_mapping;
+	pgoff_t hindex;
 	struct page *page;
 
 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
 		return NULL;
 
 	hindex = round_down(index, HPAGE_PMD_NR);
-	rcu_read_lock();
-	if (radix_tree_gang_lookup_slot(&mapping->i_pages, &results, &idx,
-				hindex, 1) && idx < hindex + HPAGE_PMD_NR) {
-		rcu_read_unlock();
+	if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1,
+								XA_PRESENT))
 		return NULL;
-	}
-	rcu_read_unlock();
 
 	shmem_pseudo_vma_init(&pvma, info, hindex);
 	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-- 
cgit v1.2.3-71-gd317


From 1cf56f9d670b88b2e947a7ccdb8ba32e6477915d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 9 Apr 2018 16:24:45 -0400
Subject: radix tree: Remove radix_tree_update_node_t

The only user of this functionality was the workingset code, and it's
now been converted to the XArray.  Remove __radix_tree_delete_node()
entirely as it was also only used by the workingset code.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h            |  7 +-----
 lib/idr.c                             |  2 +-
 lib/radix-tree.c                      | 42 +++++++----------------------------
 tools/testing/radix-tree/multiorder.c |  2 +-
 4 files changed, 11 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 8a4280bc350f..7513d0df984b 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -242,17 +242,12 @@ void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index,
 void *radix_tree_lookup(const struct radix_tree_root *, unsigned long);
 void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *,
 					unsigned long index);
-typedef void (*radix_tree_update_node_t)(struct radix_tree_node *);
 void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *,
-			  void __rcu **slot, void *entry,
-			  radix_tree_update_node_t update_node);
+			  void __rcu **slot, void *entry);
 void radix_tree_iter_replace(struct radix_tree_root *,
 		const struct radix_tree_iter *, void __rcu **slot, void *entry);
 void radix_tree_replace_slot(struct radix_tree_root *,
 			     void __rcu **slot, void *entry);
-void __radix_tree_delete_node(struct radix_tree_root *,
-			      struct radix_tree_node *,
-			      radix_tree_update_node_t update_node);
 void radix_tree_iter_delete(struct radix_tree_root *,
 			struct radix_tree_iter *iter, void __rcu **slot);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
diff --git a/lib/idr.c b/lib/idr.c
index 3c20ad9b0463..cb1db9b8d3f6 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -297,7 +297,7 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
 	if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
 		return ERR_PTR(-ENOENT);
 
-	__radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL);
+	__radix_tree_replace(&idr->idr_rt, node, slot, ptr);
 
 	return entry;
 }
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 2f9c0e45eeb7..c4c252185734 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -562,8 +562,7 @@ out:
  *	radix_tree_shrink    -    shrink radix tree to minimum height
  *	@root		radix tree root
  */
-static inline bool radix_tree_shrink(struct radix_tree_root *root,
-				     radix_tree_update_node_t update_node)
+static inline bool radix_tree_shrink(struct radix_tree_root *root)
 {
 	bool shrunk = false;
 
@@ -631,8 +630,6 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 		node->count = 0;
 		if (!radix_tree_is_internal_node(child)) {
 			node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
-			if (update_node)
-				update_node(node);
 		}
 
 		WARN_ON_ONCE(!list_empty(&node->private_list));
@@ -644,8 +641,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 }
 
 static bool delete_node(struct radix_tree_root *root,
-			struct radix_tree_node *node,
-			radix_tree_update_node_t update_node)
+			struct radix_tree_node *node)
 {
 	bool deleted = false;
 
@@ -655,7 +651,7 @@ static bool delete_node(struct radix_tree_root *root,
 		if (node->count) {
 			if (node_to_entry(node) ==
 					rcu_dereference_raw(root->xa_head))
-				deleted |= radix_tree_shrink(root, update_node);
+				deleted |= radix_tree_shrink(root);
 			return deleted;
 		}
 
@@ -1059,15 +1055,13 @@ static int calculate_count(struct radix_tree_root *root,
  * @node:		pointer to tree node
  * @slot:		pointer to slot in @node
  * @item:		new item to store in the slot.
- * @update_node:	callback for changing leaf nodes
  *
  * For use with __radix_tree_lookup().  Caller must hold tree write locked
  * across slot lookup and replacement.
  */
 void __radix_tree_replace(struct radix_tree_root *root,
 			  struct radix_tree_node *node,
-			  void __rcu **slot, void *item,
-			  radix_tree_update_node_t update_node)
+			  void __rcu **slot, void *item)
 {
 	void *old = rcu_dereference_raw(*slot);
 	int values = !!xa_is_value(item) - !!xa_is_value(old);
@@ -1085,10 +1079,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 	if (!node)
 		return;
 
-	if (update_node)
-		update_node(node);
-
-	delete_node(root, node, update_node);
+	delete_node(root, node);
 }
 
 /**
@@ -1110,7 +1101,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 void radix_tree_replace_slot(struct radix_tree_root *root,
 			     void __rcu **slot, void *item)
 {
-	__radix_tree_replace(root, NULL, slot, item, NULL);
+	__radix_tree_replace(root, NULL, slot, item);
 }
 EXPORT_SYMBOL(radix_tree_replace_slot);
 
@@ -1127,7 +1118,7 @@ void radix_tree_iter_replace(struct radix_tree_root *root,
 				const struct radix_tree_iter *iter,
 				void __rcu **slot, void *item)
 {
-	__radix_tree_replace(root, iter->node, slot, item, NULL);
+	__radix_tree_replace(root, iter->node, slot, item);
 }
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
@@ -1807,23 +1798,6 @@ radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
 
-/**
- *	__radix_tree_delete_node    -    try to free node after clearing a slot
- *	@root:		radix tree root
- *	@node:		node containing @index
- *	@update_node:	callback for changing leaf nodes
- *
- *	After clearing the slot at @index in @node from radix tree
- *	rooted at @root, call this function to attempt freeing the
- *	node and shrinking the tree.
- */
-void __radix_tree_delete_node(struct radix_tree_root *root,
-			      struct radix_tree_node *node,
-			      radix_tree_update_node_t update_node)
-{
-	delete_node(root, node, update_node);
-}
-
 static bool __radix_tree_delete(struct radix_tree_root *root,
 				struct radix_tree_node *node, void __rcu **slot)
 {
@@ -1839,7 +1813,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
 			node_tag_clear(root, node, tag, offset);
 
 	replace_slot(slot, NULL, node, -1, values);
-	return node && delete_node(root, node, NULL);
+	return node && delete_node(root, node);
 }
 
 /**
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 60786fa55302..0e0ff26c9bcb 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -618,7 +618,7 @@ static void multiorder_account(void)
 	__radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5));
 	__radix_tree_lookup(&tree, 1 << 5, &node, &slot);
 	assert(node->count == node->nr_values * 2);
-	__radix_tree_replace(&tree, node, slot, NULL, NULL);
+	__radix_tree_replace(&tree, node, slot, NULL);
 	assert(node->nr_values == 0);
 
 	item_kill_tree(&tree);
-- 
cgit v1.2.3-71-gd317


From 2956c6644bfd9aab9f6b21a12e1bd75876d9dd73 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Sat, 19 May 2018 16:47:47 -0400
Subject: radix tree: Remove split/join code

radix_tree_split and radix_tree_join were never used upstream.  Remove
them; if they're needed in future they will be replaced by XArray
equivalents.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h            |   6 -
 lib/radix-tree.c                      | 171 +----------------------
 tools/testing/radix-tree/benchmark.c  |  91 -------------
 tools/testing/radix-tree/multiorder.c | 247 ----------------------------------
 4 files changed, 2 insertions(+), 513 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 7513d0df984b..44f9abdcb5ab 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -284,12 +284,6 @@ static inline void radix_tree_preload_end(void)
 	preempt_enable();
 }
 
-int radix_tree_split_preload(unsigned old_order, unsigned new_order, gfp_t);
-int radix_tree_split(struct radix_tree_root *, unsigned long index,
-			unsigned new_order);
-int radix_tree_join(struct radix_tree_root *, unsigned long index,
-			unsigned new_order, void *);
-
 void __rcu **idr_get_free(struct radix_tree_root *root,
 			      struct radix_tree_iter *iter, gfp_t gfp,
 			      unsigned long max);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index c4c252185734..c52e0bef5264 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -415,28 +415,6 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(radix_tree_maybe_preload);
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-/*
- * Preload with enough objects to ensure that we can split a single entry
- * of order @old_order into many entries of size @new_order
- */
-int radix_tree_split_preload(unsigned int old_order, unsigned int new_order,
-							gfp_t gfp_mask)
-{
-	unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT);
-	unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) -
-				(new_order / RADIX_TREE_MAP_SHIFT);
-	unsigned nr = 0;
-
-	WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
-	BUG_ON(new_order >= old_order);
-
-	while (layers--)
-		nr = nr * RADIX_TREE_MAP_SIZE + 1;
-	return __radix_tree_preload(gfp_mask, top * nr);
-}
-#endif
-
 /*
  * The same as function above, but preload number of nodes required to insert
  * (1 << order) continuous naturally-aligned elements.
@@ -1111,8 +1089,8 @@ EXPORT_SYMBOL(radix_tree_replace_slot);
  * @slot:	pointer to slot
  * @item:	new item to store in the slot.
  *
- * For use with radix_tree_split() and radix_tree_for_each_slot().
- * Caller must hold tree write locked across split and replacement.
+ * For use with radix_tree_for_each_slot().
+ * Caller must hold tree write locked.
  */
 void radix_tree_iter_replace(struct radix_tree_root *root,
 				const struct radix_tree_iter *iter,
@@ -1121,151 +1099,6 @@ void radix_tree_iter_replace(struct radix_tree_root *root,
 	__radix_tree_replace(root, iter->node, slot, item);
 }
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-/**
- * radix_tree_join - replace multiple entries with one multiorder entry
- * @root: radix tree root
- * @index: an index inside the new entry
- * @order: order of the new entry
- * @item: new entry
- *
- * Call this function to replace several entries with one larger entry.
- * The existing entries are presumed to not need freeing as a result of
- * this call.
- *
- * The replacement entry will have all the tags set on it that were set
- * on any of the entries it is replacing.
- */
-int radix_tree_join(struct radix_tree_root *root, unsigned long index,
-			unsigned order, void *item)
-{
-	struct radix_tree_node *node;
-	void __rcu **slot;
-	int error;
-
-	BUG_ON(radix_tree_is_internal_node(item));
-
-	error = __radix_tree_create(root, index, order, &node, &slot);
-	if (!error)
-		error = insert_entries(node, slot, item, order, true);
-	if (error > 0)
-		error = 0;
-
-	return error;
-}
-
-/**
- * radix_tree_split - Split an entry into smaller entries
- * @root: radix tree root
- * @index: An index within the large entry
- * @order: Order of new entries
- *
- * Call this function as the first step in replacing a multiorder entry
- * with several entries of lower order.  After this function returns,
- * loop over the relevant portion of the tree using radix_tree_for_each_slot()
- * and call radix_tree_iter_replace() to set up each new entry.
- *
- * The tags from this entry are replicated to all the new entries.
- *
- * The radix tree should be locked against modification during the entire
- * replacement operation.  Lock-free lookups will see RADIX_TREE_RETRY which
- * should prompt RCU walkers to restart the lookup from the root.
- */
-int radix_tree_split(struct radix_tree_root *root, unsigned long index,
-				unsigned order)
-{
-	struct radix_tree_node *parent, *node, *child;
-	void __rcu **slot;
-	unsigned int offset, end;
-	unsigned n, tag, tags = 0;
-	gfp_t gfp = root_gfp_mask(root);
-
-	if (!__radix_tree_lookup(root, index, &parent, &slot))
-		return -ENOENT;
-	if (!parent)
-		return -ENOENT;
-
-	offset = get_slot_offset(parent, slot);
-
-	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-		if (tag_get(parent, tag, offset))
-			tags |= 1 << tag;
-
-	for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
-		if (!xa_is_sibling(rcu_dereference_raw(parent->slots[end])))
-			break;
-		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-			if (tags & (1 << tag))
-				tag_set(parent, tag, end);
-		/* rcu_assign_pointer ensures tags are set before RETRY */
-		rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
-	}
-	rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
-	parent->nr_values -= (end - offset);
-
-	if (order == parent->shift)
-		return 0;
-	if (order > parent->shift) {
-		while (offset < end)
-			offset += insert_entries(parent, &parent->slots[offset],
-					RADIX_TREE_RETRY, order, true);
-		return 0;
-	}
-
-	node = parent;
-
-	for (;;) {
-		if (node->shift > order) {
-			child = radix_tree_node_alloc(gfp, node, root,
-					node->shift - RADIX_TREE_MAP_SHIFT,
-					offset, 0, 0);
-			if (!child)
-				goto nomem;
-			if (node != parent) {
-				node->count++;
-				rcu_assign_pointer(node->slots[offset],
-							node_to_entry(child));
-				for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-					if (tags & (1 << tag))
-						tag_set(node, tag, offset);
-			}
-
-			node = child;
-			offset = 0;
-			continue;
-		}
-
-		n = insert_entries(node, &node->slots[offset],
-					RADIX_TREE_RETRY, order, false);
-		BUG_ON(n > RADIX_TREE_MAP_SIZE);
-
-		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-			if (tags & (1 << tag))
-				tag_set(node, tag, offset);
-		offset += n;
-
-		while (offset == RADIX_TREE_MAP_SIZE) {
-			if (node == parent)
-				break;
-			offset = node->offset;
-			child = node;
-			node = node->parent;
-			rcu_assign_pointer(node->slots[offset],
-						node_to_entry(child));
-			offset++;
-		}
-		if ((node == parent) && (offset == end))
-			return 0;
-	}
-
- nomem:
-	/* Shouldn't happen; did user forget to preload? */
-	/* TODO: free all the allocated nodes */
-	WARN_ON(1);
-	return -ENOMEM;
-}
-#endif
-
 static void node_tag_set(struct radix_tree_root *root,
 				struct radix_tree_node *node,
 				unsigned int tag, unsigned int offset)
diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index 99c40f3ed133..35741b9c2a4a 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -146,90 +146,6 @@ static void benchmark_size(unsigned long size, unsigned long step, int order)
 	rcu_barrier();
 }
 
-static long long  __benchmark_split(unsigned long index,
-				    int old_order, int new_order)
-{
-	struct timespec start, finish;
-	long long nsec;
-	RADIX_TREE(tree, GFP_ATOMIC);
-
-	item_insert_order(&tree, index, old_order);
-
-	clock_gettime(CLOCK_MONOTONIC, &start);
-	radix_tree_split(&tree, index, new_order);
-	clock_gettime(CLOCK_MONOTONIC, &finish);
-	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
-	       (finish.tv_nsec - start.tv_nsec);
-
-	item_kill_tree(&tree);
-
-	return nsec;
-
-}
-
-static void benchmark_split(unsigned long size, unsigned long step)
-{
-	int i, j, idx;
-	long long nsec = 0;
-
-
-	for (idx = 0; idx < size; idx += step) {
-		for (i = 3; i < 11; i++) {
-			for (j = 0; j < i; j++) {
-				nsec += __benchmark_split(idx, i, j);
-			}
-		}
-	}
-
-	printv(2, "Size %8ld, step %8ld, split time %10lld ns\n",
-			size, step, nsec);
-
-}
-
-static long long  __benchmark_join(unsigned long index,
-			     unsigned order1, unsigned order2)
-{
-	unsigned long loc;
-	struct timespec start, finish;
-	long long nsec;
-	void *item, *item2 = item_create(index + 1, order1);
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	item_insert_order(&tree, index, order2);
-	item = radix_tree_lookup(&tree, index);
-
-	clock_gettime(CLOCK_MONOTONIC, &start);
-	radix_tree_join(&tree, index + 1, order1, item2);
-	clock_gettime(CLOCK_MONOTONIC, &finish);
-	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
-		(finish.tv_nsec - start.tv_nsec);
-
-	loc = find_item(&tree, item);
-	if (loc == -1)
-		free(item);
-
-	item_kill_tree(&tree);
-
-	return nsec;
-}
-
-static void benchmark_join(unsigned long step)
-{
-	int i, j, idx;
-	long long nsec = 0;
-
-	for (idx = 0; idx < 1 << 10; idx += step) {
-		for (i = 1; i < 15; i++) {
-			for (j = 0; j < i; j++) {
-				nsec += __benchmark_join(idx, i, j);
-			}
-		}
-	}
-
-	printv(2, "Size %8d, step %8ld, join time %10lld ns\n",
-			1 << 10, step, nsec);
-}
-
 void benchmark(void)
 {
 	unsigned long size[] = {1 << 10, 1 << 20, 0};
@@ -247,11 +163,4 @@ void benchmark(void)
 	for (c = 0; size[c]; c++)
 		for (s = 0; step[s]; s++)
 			benchmark_size(size[c], step[s] << 9, 9);
-
-	for (c = 0; size[c]; c++)
-		for (s = 0; step[s]; s++)
-			benchmark_split(size[c], step[s]);
-
-	for (s = 0; step[s]; s++)
-		benchmark_join(step[s]);
 }
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 0e0ff26c9bcb..221e042d1b89 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -356,251 +356,6 @@ void multiorder_tagged_iteration(void)
 	item_kill_tree(&tree);
 }
 
-/*
- * Basic join checks: make sure we can't find an entry in the tree after
- * a larger entry has replaced it
- */
-static void multiorder_join1(unsigned long index,
-				unsigned order1, unsigned order2)
-{
-	unsigned long loc;
-	void *item, *item2 = item_create(index + 1, order1);
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	item_insert_order(&tree, index, order2);
-	item = radix_tree_lookup(&tree, index);
-	radix_tree_join(&tree, index + 1, order1, item2);
-	loc = find_item(&tree, item);
-	if (loc == -1)
-		free(item);
-	item = radix_tree_lookup(&tree, index + 1);
-	assert(item == item2);
-	item_kill_tree(&tree);
-}
-
-/*
- * Check that the accounting of value entries is handled correctly
- * by joining a value entry to a normal pointer.
- */
-static void multiorder_join2(unsigned order1, unsigned order2)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_node *node;
-	void *item1 = item_create(0, order1);
-	void *item2;
-
-	item_insert_order(&tree, 0, order2);
-	radix_tree_insert(&tree, 1 << order2, xa_mk_value(5));
-	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
-	assert(item2 == xa_mk_value(5));
-	assert(node->nr_values == 1);
-
-	item2 = radix_tree_lookup(&tree, 0);
-	free(item2);
-
-	radix_tree_join(&tree, 0, order1, item1);
-	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
-	assert(item2 == item1);
-	assert(node->nr_values == 0);
-	item_kill_tree(&tree);
-}
-
-/*
- * This test revealed an accounting bug for value entries at one point.
- * Nodes were being freed back into the pool with an elevated exception count
- * by radix_tree_join() and then radix_tree_split() was failing to zero the
- * count of value entries.
- */
-static void multiorder_join3(unsigned int order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_node *node;
-	void **slot;
-	struct radix_tree_iter iter;
-	unsigned long i;
-
-	for (i = 0; i < (1 << order); i++) {
-		radix_tree_insert(&tree, i, xa_mk_value(5));
-	}
-
-	radix_tree_join(&tree, 0, order, xa_mk_value(7));
-	rcu_barrier();
-
-	radix_tree_split(&tree, 0, 0);
-
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot, xa_mk_value(5));
-	}
-
-	__radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(node->nr_values == node->count);
-
-	item_kill_tree(&tree);
-}
-
-static void multiorder_join(void)
-{
-	int i, j, idx;
-
-	for (idx = 0; idx < 1024; idx = idx * 2 + 3) {
-		for (i = 1; i < 15; i++) {
-			for (j = 0; j < i; j++) {
-				multiorder_join1(idx, i, j);
-			}
-		}
-	}
-
-	for (i = 1; i < 15; i++) {
-		for (j = 0; j < i; j++) {
-			multiorder_join2(i, j);
-		}
-	}
-
-	for (i = 3; i < 10; i++) {
-		multiorder_join3(i);
-	}
-}
-
-static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
-{
-	struct radix_tree_preload *rtp = &radix_tree_preloads;
-	if (rtp->nr != 0)
-		printv(2, "split(%u %u) remaining %u\n", old_order, new_order,
-							rtp->nr);
-	/*
-	 * Can't check for equality here as some nodes may have been
-	 * RCU-freed while we ran.  But we should never finish with more
-	 * nodes allocated since they should have all been preloaded.
-	 */
-	if (nr_allocated > alloc)
-		printv(2, "split(%u %u) allocated %u %u\n", old_order, new_order,
-							alloc, nr_allocated);
-}
-
-static void __multiorder_split(int old_order, int new_order)
-{
-	RADIX_TREE(tree, GFP_ATOMIC);
-	void **slot;
-	struct radix_tree_iter iter;
-	unsigned alloc;
-	struct item *item;
-
-	radix_tree_preload(GFP_KERNEL);
-	assert(item_insert_order(&tree, 0, old_order) == 0);
-	radix_tree_preload_end();
-
-	/* Wipe out the preloaded cache or it'll confuse check_mem() */
-	radix_tree_cpu_dead(0);
-
-	item = radix_tree_tag_set(&tree, 0, 2);
-
-	radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
-	alloc = nr_allocated;
-	radix_tree_split(&tree, 0, new_order);
-	check_mem(old_order, new_order, alloc);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot,
-					item_create(iter.index, new_order));
-	}
-	radix_tree_preload_end();
-
-	item_kill_tree(&tree);
-	free(item);
-}
-
-static void __multiorder_split2(int old_order, int new_order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	void **slot;
-	struct radix_tree_iter iter;
-	struct radix_tree_node *node;
-	void *item;
-
-	__radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == xa_mk_value(5));
-	assert(node->nr_values > 0);
-
-	radix_tree_split(&tree, 0, new_order);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot,
-					item_create(iter.index, new_order));
-	}
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item != xa_mk_value(5));
-	assert(node->nr_values == 0);
-
-	item_kill_tree(&tree);
-}
-
-static void __multiorder_split3(int old_order, int new_order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	void **slot;
-	struct radix_tree_iter iter;
-	struct radix_tree_node *node;
-	void *item;
-
-	__radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == xa_mk_value(5));
-	assert(node->nr_values > 0);
-
-	radix_tree_split(&tree, 0, new_order);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot, xa_mk_value(7));
-	}
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == xa_mk_value(7));
-	assert(node->nr_values > 0);
-
-	item_kill_tree(&tree);
-
-	__radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == xa_mk_value(5));
-	assert(node->nr_values > 0);
-
-	radix_tree_split(&tree, 0, new_order);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		if (iter.index == (1 << new_order))
-			radix_tree_iter_replace(&tree, &iter, slot,
-						xa_mk_value(7));
-		else
-			radix_tree_iter_replace(&tree, &iter, slot, NULL);
-	}
-
-	item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL);
-	assert(item == xa_mk_value(7));
-	assert(node->count == node->nr_values);
-	do {
-		node = node->parent;
-		if (!node)
-			break;
-		assert(node->count == 1);
-		assert(node->nr_values == 0);
-	} while (1);
-
-	item_kill_tree(&tree);
-}
-
-static void multiorder_split(void)
-{
-	int i, j;
-
-	for (i = 3; i < 11; i++)
-		for (j = 0; j < i; j++) {
-			__multiorder_split(i, j);
-			__multiorder_split2(i, j);
-			__multiorder_split3(i, j);
-		}
-}
-
 static void multiorder_account(void)
 {
 	RADIX_TREE(tree, GFP_KERNEL);
@@ -702,8 +457,6 @@ void multiorder_checks(void)
 	multiorder_tag_tests();
 	multiorder_iteration();
 	multiorder_tagged_iteration();
-	multiorder_join();
-	multiorder_split();
 	multiorder_account();
 	multiorder_iteration_race();
 
-- 
cgit v1.2.3-71-gd317


From 8cf2f98411e3a0865026a1061af637161b16d32b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Sat, 19 May 2018 16:47:47 -0400
Subject: radix tree: Remove radix_tree_maybe_preload_order

This function was only used by the page cache which is now converted
to the XArray.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h |  1 -
 lib/radix-tree.c           | 74 ----------------------------------------------
 2 files changed, 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 44f9abdcb5ab..664d50c5c2ff 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -259,7 +259,6 @@ unsigned int radix_tree_gang_lookup(const struct radix_tree_root *,
 			unsigned int max_items);
 int radix_tree_preload(gfp_t gfp_mask);
 int radix_tree_maybe_preload(gfp_t gfp_mask);
-int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order);
 void radix_tree_init(void);
 void *radix_tree_tag_set(struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index c52e0bef5264..b57ddc3dbbbd 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -41,9 +41,6 @@
 #include <linux/xarray.h>
 
 
-/* Number of nodes in fully populated tree of given height */
-static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
-
 /*
  * Radix tree node cache.
  */
@@ -415,51 +412,6 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(radix_tree_maybe_preload);
 
-/*
- * The same as function above, but preload number of nodes required to insert
- * (1 << order) continuous naturally-aligned elements.
- */
-int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
-{
-	unsigned long nr_subtrees;
-	int nr_nodes, subtree_height;
-
-	/* Preloading doesn't help anything with this gfp mask, skip it */
-	if (!gfpflags_allow_blocking(gfp_mask)) {
-		preempt_disable();
-		return 0;
-	}
-
-	/*
-	 * Calculate number and height of fully populated subtrees it takes to
-	 * store (1 << order) elements.
-	 */
-	nr_subtrees = 1 << order;
-	for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE;
-			subtree_height++)
-		nr_subtrees >>= RADIX_TREE_MAP_SHIFT;
-
-	/*
-	 * The worst case is zero height tree with a single item at index 0 and
-	 * then inserting items starting at ULONG_MAX - (1 << order).
-	 *
-	 * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to
-	 * 0-index item.
-	 */
-	nr_nodes = RADIX_TREE_MAX_PATH;
-
-	/* Plus branch to fully populated subtrees. */
-	nr_nodes += RADIX_TREE_MAX_PATH - subtree_height;
-
-	/* Root node is shared. */
-	nr_nodes--;
-
-	/* Plus nodes required to build subtrees. */
-	nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height];
-
-	return __radix_tree_preload(gfp_mask, nr_nodes);
-}
-
 static unsigned radix_tree_load_root(const struct radix_tree_root *root,
 		struct radix_tree_node **nodep, unsigned long *maxindex)
 {
@@ -1859,31 +1811,6 @@ radix_tree_node_ctor(void *arg)
 	INIT_LIST_HEAD(&node->private_list);
 }
 
-static __init unsigned long __maxindex(unsigned int height)
-{
-	unsigned int width = height * RADIX_TREE_MAP_SHIFT;
-	int shift = RADIX_TREE_INDEX_BITS - width;
-
-	if (shift < 0)
-		return ~0UL;
-	if (shift >= BITS_PER_LONG)
-		return 0UL;
-	return ~0UL >> shift;
-}
-
-static __init void radix_tree_init_maxnodes(void)
-{
-	unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1];
-	unsigned int i, j;
-
-	for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
-		height_to_maxindex[i] = __maxindex(i);
-	for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) {
-		for (j = i; j > 0; j--)
-			height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1;
-	}
-}
-
 static int radix_tree_cpu_dead(unsigned int cpu)
 {
 	struct radix_tree_preload *rtp;
@@ -1911,7 +1838,6 @@ void __init radix_tree_init(void)
 			sizeof(struct radix_tree_node), 0,
 			SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
 			radix_tree_node_ctor);
-	radix_tree_init_maxnodes();
 	ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
 					NULL, radix_tree_cpu_dead);
 	WARN_ON(ret < 0);
-- 
cgit v1.2.3-71-gd317


From adb9d9c4ccb1ff0bf1d376e65f36aa5573c75c1a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 9 Apr 2018 16:52:21 -0400
Subject: radix tree: Remove radix_tree_clear_tags

The page cache was the only user of this interface and it has now
been converted to the XArray.  Transform the test into a test of
xas_init_marks().

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h           |  2 --
 lib/radix-tree.c                     | 13 ------------
 lib/test_xarray.c                    | 40 ++++++++++++++++++++++++++++++++++++
 tools/testing/radix-tree/tag_check.c | 29 --------------------------
 4 files changed, 40 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 664d50c5c2ff..7b009af3bb1e 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -252,8 +252,6 @@ void radix_tree_iter_delete(struct radix_tree_root *,
 			struct radix_tree_iter *iter, void __rcu **slot);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
 void *radix_tree_delete(struct radix_tree_root *, unsigned long);
-void radix_tree_clear_tags(struct radix_tree_root *, struct radix_tree_node *,
-			   void __rcu **slot);
 unsigned int radix_tree_gang_lookup(const struct radix_tree_root *,
 			void **results, unsigned long first_index,
 			unsigned int max_items);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index b57ddc3dbbbd..e92f2b6ae9c5 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1670,19 +1670,6 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 }
 EXPORT_SYMBOL(radix_tree_delete);
 
-void radix_tree_clear_tags(struct radix_tree_root *root,
-			   struct radix_tree_node *node,
-			   void __rcu **slot)
-{
-	if (node) {
-		unsigned int tag, offset = get_slot_offset(node, slot);
-		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-			node_tag_clear(root, node, tag, offset);
-	} else {
-		root_tag_clear_all(root);
-	}
-}
-
 /**
  *	radix_tree_tagged - test whether any items in the tree are tagged
  *	@root:		radix tree root
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 815daffdd8c9..ca86141641cb 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -213,12 +213,52 @@ static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
+static noinline void check_xa_mark_2(struct xarray *xa)
+{
+	XA_STATE(xas, xa, 0);
+	unsigned long index;
+	unsigned int count = 0;
+	void *entry;
+
+	xa_store_index(xa, 0, GFP_KERNEL);
+	xa_set_mark(xa, 0, XA_MARK_0);
+	xas_lock(&xas);
+	xas_load(&xas);
+	xas_init_marks(&xas);
+	xas_unlock(&xas);
+	XA_BUG_ON(xa, !xa_get_mark(xa, 0, XA_MARK_0) == 0);
+
+	for (index = 3500; index < 4500; index++) {
+		xa_store_index(xa, index, GFP_KERNEL);
+		xa_set_mark(xa, index, XA_MARK_0);
+	}
+
+	xas_reset(&xas);
+	rcu_read_lock();
+	xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0)
+		count++;
+	rcu_read_unlock();
+	XA_BUG_ON(xa, count != 1000);
+
+	xas_lock(&xas);
+	xas_for_each(&xas, entry, ULONG_MAX) {
+		xas_init_marks(&xas);
+		XA_BUG_ON(xa, !xa_get_mark(xa, xas.xa_index, XA_MARK_0));
+		XA_BUG_ON(xa, !xas_get_mark(&xas, XA_MARK_0));
+	}
+	xas_unlock(&xas);
+
+	xa_destroy(xa);
+}
+
 static noinline void check_xa_mark(struct xarray *xa)
 {
 	unsigned long index;
 
 	for (index = 0; index < 16384; index += 4)
 		check_xa_mark_1(xa, index);
+
+	check_xa_mark_2(xa);
 }
 
 static noinline void check_xa_shrink(struct xarray *xa)
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
index 543181e4847b..56a42f1c5ab0 100644
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -331,34 +331,6 @@ static void single_check(void)
 	item_kill_tree(&tree);
 }
 
-void radix_tree_clear_tags_test(void)
-{
-	unsigned long index;
-	struct radix_tree_node *node;
-	struct radix_tree_iter iter;
-	void **slot;
-
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	item_insert(&tree, 0);
-	item_tag_set(&tree, 0, 0);
-	__radix_tree_lookup(&tree, 0, &node, &slot);
-	radix_tree_clear_tags(&tree, node, slot);
-	assert(item_tag_get(&tree, 0, 0) == 0);
-
-	for (index = 0; index < 1000; index++) {
-		item_insert(&tree, index);
-		item_tag_set(&tree, index, 0);
-	}
-
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_clear_tags(&tree, iter.node, slot);
-		assert(item_tag_get(&tree, iter.index, 0) == 0);
-	}
-
-	item_kill_tree(&tree);
-}
-
 void tag_check(void)
 {
 	single_check();
@@ -376,5 +348,4 @@ void tag_check(void)
 	thrash_tags();
 	rcu_barrier();
 	printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
-	radix_tree_clear_tags_test();
 }
-- 
cgit v1.2.3-71-gd317


From 372266ba0267803564824b1c09f1bb7f3f3fc761 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Sat, 18 Aug 2018 07:09:22 -0400
Subject: radix tree test suite: Convert tag_tagged_items to XArray

The tag_tagged_items() function is supposed to test the page-writeback
tagging code.  Since that has been converted to the XArray, there's
not much point in testing the radix tree's tagging code.  This requires
using the pthread mutex embedded in the xarray instead of an external
lock, so remove the pthread mutexes which protect xarrays/radix trees.
Also remove radix_tree_iter_tag_set() as this was the last user.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h                 |  2 --
 lib/radix-tree.c                           | 12 ----------
 tools/testing/radix-tree/iteration_check.c | 16 ++++++-------
 tools/testing/radix-tree/main.c            |  4 ++--
 tools/testing/radix-tree/multiorder.c      | 12 +++++-----
 tools/testing/radix-tree/regression1.c     | 17 +++++++-------
 tools/testing/radix-tree/regression2.c     |  8 +++----
 tools/testing/radix-tree/tag_check.c       |  4 ++--
 tools/testing/radix-tree/test.c            | 37 ++++++++++++------------------
 tools/testing/radix-tree/test.h            |  5 ++--
 10 files changed, 46 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 7b009af3bb1e..9a1460488163 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -264,8 +264,6 @@ void *radix_tree_tag_clear(struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
 int radix_tree_tag_get(const struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
-void radix_tree_iter_tag_set(struct radix_tree_root *,
-		const struct radix_tree_iter *iter, unsigned int tag);
 void radix_tree_iter_tag_clear(struct radix_tree_root *,
 		const struct radix_tree_iter *iter, unsigned int tag);
 unsigned int radix_tree_gang_lookup_tag(const struct radix_tree_root *,
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index e92f2b6ae9c5..f107dd2698e3 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1108,18 +1108,6 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_tag_set);
 
-/**
- * radix_tree_iter_tag_set - set a tag on the current iterator entry
- * @root:	radix tree root
- * @iter:	iterator state
- * @tag:	tag to set
- */
-void radix_tree_iter_tag_set(struct radix_tree_root *root,
-			const struct radix_tree_iter *iter, unsigned int tag)
-{
-	node_tag_set(root, iter->node, tag, iter_offset(iter));
-}
-
 static void node_tag_clear(struct radix_tree_root *root,
 				struct radix_tree_node *node,
 				unsigned int tag, unsigned int offset)
diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c
index a92bab513701..d047327bb9ef 100644
--- a/tools/testing/radix-tree/iteration_check.c
+++ b/tools/testing/radix-tree/iteration_check.c
@@ -18,10 +18,9 @@
 
 #define NUM_THREADS	5
 #define MAX_IDX		100
-#define TAG		0
-#define NEW_TAG		1
+#define TAG		XA_MARK_0
+#define NEW_TAG		XA_MARK_1
 
-static pthread_mutex_t tree_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_t threads[NUM_THREADS];
 static unsigned int seeds[3];
 static RADIX_TREE(tree, GFP_KERNEL);
@@ -38,7 +37,7 @@ static void *add_entries_fn(void *arg)
 		int order;
 
 		for (pgoff = 0; pgoff < MAX_IDX; pgoff++) {
-			pthread_mutex_lock(&tree_lock);
+			xa_lock(&tree);
 			for (order = max_order; order >= 0; order--) {
 				if (item_insert_order(&tree, pgoff, order)
 						== 0) {
@@ -46,7 +45,7 @@ static void *add_entries_fn(void *arg)
 					break;
 				}
 			}
-			pthread_mutex_unlock(&tree_lock);
+			xa_unlock(&tree);
 		}
 	}
 
@@ -150,9 +149,9 @@ static void *remove_entries_fn(void *arg)
 
 		pgoff = rand_r(&seeds[2]) % MAX_IDX;
 
-		pthread_mutex_lock(&tree_lock);
+		xa_lock(&tree);
 		item_delete(&tree, pgoff);
-		pthread_mutex_unlock(&tree_lock);
+		xa_unlock(&tree);
 	}
 
 	rcu_unregister_thread();
@@ -165,8 +164,7 @@ static void *tag_entries_fn(void *arg)
 	rcu_register_thread();
 
 	while (!test_complete) {
-		tag_tagged_items(&tree, &tree_lock, 0, MAX_IDX, 10, TAG,
-					NEW_TAG);
+		tag_tagged_items(&tree, 0, MAX_IDX, 10, TAG, NEW_TAG);
 	}
 	rcu_unregister_thread();
 	return NULL;
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index 79589ea570ab..77a44c54998f 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -214,7 +214,7 @@ void copy_tag_check(void)
 	}
 
 //	printf("\ncopying tags...\n");
-	tagged = tag_tagged_items(&tree, NULL, start, end, ITEMS, 0, 1);
+	tagged = tag_tagged_items(&tree, start, end, ITEMS, XA_MARK_0, XA_MARK_1);
 
 //	printf("checking copied tags\n");
 	assert(tagged == count);
@@ -223,7 +223,7 @@ void copy_tag_check(void)
 	/* Copy tags in several rounds */
 //	printf("\ncopying tags...\n");
 	tmp = rand() % (count / 10 + 2);
-	tagged = tag_tagged_items(&tree, NULL, start, end, tmp, 0, 2);
+	tagged = tag_tagged_items(&tree, start, end, tmp, XA_MARK_0, XA_MARK_2);
 	assert(tagged == count);
 
 //	printf("%lu %lu %lu\n", tagged, tmp, count);
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 221e042d1b89..0436554a099a 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -59,7 +59,7 @@ static void __multiorder_tag_test(int index, int order)
 		assert(!radix_tree_tag_get(&tree, i, 1));
 	}
 
-	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 1);
+	assert(tag_tagged_items(&tree, 0, ~0UL, 10, XA_MARK_0, XA_MARK_1) == 1);
 	assert(radix_tree_tag_clear(&tree, index, 0));
 
 	for_each_index(i, base, order) {
@@ -87,7 +87,7 @@ static void __multiorder_tag_test2(unsigned order, unsigned long index2)
 	assert(radix_tree_tag_set(&tree, 0, 0));
 	assert(radix_tree_tag_set(&tree, index2, 0));
 
-	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 2);
+	assert(tag_tagged_items(&tree, 0, ~0UL, 10, XA_MARK_0, XA_MARK_1) == 2);
 
 	item_kill_tree(&tree);
 }
@@ -318,8 +318,8 @@ void multiorder_tagged_iteration(void)
 		}
 	}
 
-	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, TAG_ENTRIES, 1, 2) ==
-				TAG_ENTRIES);
+	assert(tag_tagged_items(&tree, 0, ~0UL, TAG_ENTRIES, XA_MARK_1,
+				XA_MARK_2) == TAG_ENTRIES);
 
 	for (j = 0; j < 256; j++) {
 		int mask, k;
@@ -345,8 +345,8 @@ void multiorder_tagged_iteration(void)
 		}
 	}
 
-	assert(tag_tagged_items(&tree, NULL, 1, ~0UL, MT_NUM_ENTRIES * 2, 1, 0)
-			== TAG_ENTRIES);
+	assert(tag_tagged_items(&tree, 1, ~0UL, MT_NUM_ENTRIES * 2, XA_MARK_1,
+				XA_MARK_0) == TAG_ENTRIES);
 	i = 0;
 	radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) {
 		assert(iter.index == tag_index[i]);
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
index b4a4a7168986..a61c7bcbc72d 100644
--- a/tools/testing/radix-tree/regression1.c
+++ b/tools/testing/radix-tree/regression1.c
@@ -44,7 +44,6 @@
 #include "regression.h"
 
 static RADIX_TREE(mt_tree, GFP_KERNEL);
-static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER;
 
 struct page {
 	pthread_mutex_t lock;
@@ -126,29 +125,29 @@ static void *regression1_fn(void *arg)
 			struct page *p;
 
 			p = page_alloc(0);
-			pthread_mutex_lock(&mt_lock);
+			xa_lock(&mt_tree);
 			radix_tree_insert(&mt_tree, 0, p);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 
 			p = page_alloc(1);
-			pthread_mutex_lock(&mt_lock);
+			xa_lock(&mt_tree);
 			radix_tree_insert(&mt_tree, 1, p);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 
-			pthread_mutex_lock(&mt_lock);
+			xa_lock(&mt_tree);
 			p = radix_tree_delete(&mt_tree, 1);
 			pthread_mutex_lock(&p->lock);
 			p->count--;
 			pthread_mutex_unlock(&p->lock);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 			page_free(p);
 
-			pthread_mutex_lock(&mt_lock);
+			xa_lock(&mt_tree);
 			p = radix_tree_delete(&mt_tree, 0);
 			pthread_mutex_lock(&p->lock);
 			p->count--;
 			pthread_mutex_unlock(&p->lock);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 			page_free(p);
 		}
 	} else {
diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c
index 424b91c77831..f2c7e640a919 100644
--- a/tools/testing/radix-tree/regression2.c
+++ b/tools/testing/radix-tree/regression2.c
@@ -53,9 +53,9 @@
 #include "regression.h"
 #include "test.h"
 
-#define PAGECACHE_TAG_DIRTY     0
-#define PAGECACHE_TAG_WRITEBACK 1
-#define PAGECACHE_TAG_TOWRITE   2
+#define PAGECACHE_TAG_DIRTY     XA_MARK_0
+#define PAGECACHE_TAG_WRITEBACK XA_MARK_1
+#define PAGECACHE_TAG_TOWRITE   XA_MARK_2
 
 static RADIX_TREE(mt_tree, GFP_KERNEL);
 unsigned long page_count = 0;
@@ -92,7 +92,7 @@ void regression2_test(void)
 	/* 1. */
 	start = 0;
 	end = max_slots - 2;
-	tag_tagged_items(&mt_tree, NULL, start, end, 1,
+	tag_tagged_items(&mt_tree, start, end, 1,
 				PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
 
 	/* 2. */
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
index 56a42f1c5ab0..f898957b1a19 100644
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -24,7 +24,7 @@ __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag)
 	item_tag_set(tree, index, tag);
 	ret = item_tag_get(tree, index, tag);
 	assert(ret != 0);
-	ret = tag_tagged_items(tree, NULL, first, ~0UL, 10, tag, !tag);
+	ret = tag_tagged_items(tree, first, ~0UL, 10, tag, !tag);
 	assert(ret == 1);
 	ret = item_tag_get(tree, index, !tag);
 	assert(ret != 0);
@@ -321,7 +321,7 @@ static void single_check(void)
 	assert(ret == 0);
 	verify_tag_consistency(&tree, 0);
 	verify_tag_consistency(&tree, 1);
-	ret = tag_tagged_items(&tree, NULL, first, 10, 10, 0, 1);
+	ret = tag_tagged_items(&tree, first, 10, 10, XA_MARK_0, XA_MARK_1);
 	assert(ret == 1);
 	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1);
 	assert(ret == 1);
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index 470419bfd49d..d70adcd03d35 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -176,35 +176,28 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
 }
 
 /* Use the same pattern as tag_pages_for_writeback() in mm/page-writeback.c */
-int tag_tagged_items(struct radix_tree_root *root, pthread_mutex_t *lock,
-			unsigned long start, unsigned long end, unsigned batch,
-			unsigned iftag, unsigned thentag)
+int tag_tagged_items(struct xarray *xa, unsigned long start, unsigned long end,
+		unsigned batch, xa_mark_t iftag, xa_mark_t thentag)
 {
-	unsigned long tagged = 0;
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, xa, start);
+	unsigned int tagged = 0;
+	struct item *item;
 
 	if (batch == 0)
 		batch = 1;
 
-	if (lock)
-		pthread_mutex_lock(lock);
-	radix_tree_for_each_tagged(slot, root, &iter, start, iftag) {
-		if (iter.index > end)
-			break;
-		radix_tree_iter_tag_set(root, &iter, thentag);
-		tagged++;
-		if ((tagged % batch) != 0)
+	xas_lock_irq(&xas);
+	xas_for_each_marked(&xas, item, end, iftag) {
+		xas_set_mark(&xas, thentag);
+		if (++tagged % batch)
 			continue;
-		slot = radix_tree_iter_resume(slot, &iter);
-		if (lock) {
-			pthread_mutex_unlock(lock);
-			rcu_barrier();
-			pthread_mutex_lock(lock);
-		}
+
+		xas_pause(&xas);
+		xas_unlock_irq(&xas);
+		rcu_barrier();
+		xas_lock_irq(&xas);
 	}
-	if (lock)
-		pthread_mutex_unlock(lock);
+	xas_unlock_irq(&xas);
 
 	return tagged;
 }
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 9532c18c6cb1..100e9a456f91 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -29,9 +29,8 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
 			unsigned long nr, int chunk);
 void item_kill_tree(struct radix_tree_root *root);
 
-int tag_tagged_items(struct radix_tree_root *, pthread_mutex_t *,
-			unsigned long start, unsigned long end, unsigned batch,
-			unsigned iftag, unsigned thentag);
+int tag_tagged_items(struct xarray *, unsigned long start, unsigned long end,
+		unsigned batch, xa_mark_t iftag, xa_mark_t thentag);
 
 void xarray_tests(void);
 void tag_check(void);
-- 
cgit v1.2.3-71-gd317


From 0e9446c35a80931044b6d8d2d74a9cabd248539f Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 15 Aug 2018 14:13:29 -0400
Subject: xarray: Add range store functionality

This version of xa_store_range() really only supports load and store.
Our only user only needs basic load and store functionality, so there's
no need to do the extra work to support marking and overlapping stores
correctly yet.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |  8 ++++
 include/linux/xarray.h            |  2 +
 lib/test_xarray.c                 | 34 ++++++++++++++
 lib/xarray.c                      | 97 ++++++++++++++++++++++++++++++++++++++-
 4 files changed, 139 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 463e4c798ec1..a4e705108f42 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -98,6 +98,13 @@ the XArray by calling :c:func:`xa_for_each`.  You may prefer to use
 :c:func:`xa_find` or :c:func:`xa_find_after` to move to the next present
 entry in the XArray.
 
+Calling :c:func:`xa_store_range` stores the same entry in a range
+of indices.  If you do this, some of the other operations will behave
+in a slightly odd way.  For example, marking the entry at one index
+may result in the entry being marked at some, but not all of the other
+indices.  Storing into one index may result in the entry retrieved by
+some, but not all of the other indices changing.
+
 Finally, you can remove all entries from an XArray by calling
 :c:func:`xa_destroy`.  If the XArray entries are pointers, you may wish
 to free the entries first.  You can do this by iterating over all present
@@ -156,6 +163,7 @@ Takes xa_lock internally:
  * :c:func:`xa_erase_bh`
  * :c:func:`xa_erase_irq`
  * :c:func:`xa_cmpxchg`
+ * :c:func:`xa_store_range`
  * :c:func:`xa_alloc`
  * :c:func:`xa_alloc_bh`
  * :c:func:`xa_alloc_irq`
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index e0b57af52e9b..d9514928ddac 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -292,6 +292,8 @@ void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *xa_cmpxchg(struct xarray *, unsigned long index,
 			void *old, void *entry, gfp_t);
 int xa_reserve(struct xarray *, unsigned long index, gfp_t);
+void *xa_store_range(struct xarray *, unsigned long first, unsigned long last,
+			void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 0f06a93b4d0e..aa47754150ce 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -1039,6 +1039,39 @@ static noinline void check_create_range(struct xarray *xa)
 	check_create_range_3();
 }
 
+static noinline void __check_store_range(struct xarray *xa, unsigned long first,
+		unsigned long last)
+{
+#ifdef CONFIG_XARRAY_MULTI
+	xa_store_range(xa, first, last, xa_mk_value(first), GFP_KERNEL);
+
+	XA_BUG_ON(xa, xa_load(xa, first) != xa_mk_value(first));
+	XA_BUG_ON(xa, xa_load(xa, last) != xa_mk_value(first));
+	XA_BUG_ON(xa, xa_load(xa, first - 1) != NULL);
+	XA_BUG_ON(xa, xa_load(xa, last + 1) != NULL);
+
+	xa_store_range(xa, first, last, NULL, GFP_KERNEL);
+#endif
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_store_range(struct xarray *xa)
+{
+	unsigned long i, j;
+
+	for (i = 0; i < 128; i++) {
+		for (j = i; j < 128; j++) {
+			__check_store_range(xa, i, j);
+			__check_store_range(xa, 128 + i, 128 + j);
+			__check_store_range(xa, 4095 + i, 4095 + j);
+			__check_store_range(xa, 4096 + i, 4096 + j);
+			__check_store_range(xa, 123456 + i, 123456 + j);
+			__check_store_range(xa, UINT_MAX + i, UINT_MAX + j);
+		}
+	}
+}
+
 static LIST_HEAD(shadow_nodes);
 
 static void test_update_node(struct xa_node *node)
@@ -1184,6 +1217,7 @@ static int xarray_checks(void)
 	check_destroy(&array);
 	check_move(&array);
 	check_create_range(&array);
+	check_store_range(&array);
 	check_store_iter(&array);
 
 	check_workingset(&array, 0);
diff --git a/lib/xarray.c b/lib/xarray.c
index 9a0d49d4b5f0..8b176f009c08 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -376,6 +376,14 @@ static void *xas_alloc(struct xa_state *xas, unsigned int shift)
 	return node;
 }
 
+#ifdef CONFIG_XARRAY_MULTI
+/* Returns the number of indices covered by a given xa_state */
+static unsigned long xas_size(const struct xa_state *xas)
+{
+	return (xas->xa_sibs + 1UL) << xas->xa_shift;
+}
+#endif
+
 /*
  * Use this to calculate the maximum index that will need to be created
  * in order to add the entry described by @xas.  Because we cannot store a
@@ -388,8 +396,7 @@ static unsigned long xas_max(struct xa_state *xas)
 
 #ifdef CONFIG_XARRAY_MULTI
 	if (xas->xa_shift || xas->xa_sibs) {
-		unsigned long mask;
-		mask = (((xas->xa_sibs + 1UL) << xas->xa_shift) - 1);
+		unsigned long mask = xas_size(xas) - 1;
 		max |= mask;
 		if (mask == max)
 			max++;
@@ -1517,6 +1524,92 @@ int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
 }
 EXPORT_SYMBOL(xa_reserve);
 
+#ifdef CONFIG_XARRAY_MULTI
+static void xas_set_range(struct xa_state *xas, unsigned long first,
+		unsigned long last)
+{
+	unsigned int shift = 0;
+	unsigned long sibs = last - first;
+	unsigned int offset = XA_CHUNK_MASK;
+
+	xas_set(xas, first);
+
+	while ((first & XA_CHUNK_MASK) == 0) {
+		if (sibs < XA_CHUNK_MASK)
+			break;
+		if ((sibs == XA_CHUNK_MASK) && (offset < XA_CHUNK_MASK))
+			break;
+		shift += XA_CHUNK_SHIFT;
+		if (offset == XA_CHUNK_MASK)
+			offset = sibs & XA_CHUNK_MASK;
+		sibs >>= XA_CHUNK_SHIFT;
+		first >>= XA_CHUNK_SHIFT;
+	}
+
+	offset = first & XA_CHUNK_MASK;
+	if (offset + sibs > XA_CHUNK_MASK)
+		sibs = XA_CHUNK_MASK - offset;
+	if ((((first + sibs + 1) << shift) - 1) > last)
+		sibs -= 1;
+
+	xas->xa_shift = shift;
+	xas->xa_sibs = sibs;
+}
+
+/**
+ * xa_store_range() - Store this entry at a range of indices in the XArray.
+ * @xa: XArray.
+ * @first: First index to affect.
+ * @last: Last index to affect.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * After this function returns, loads from any index between @first and @last,
+ * inclusive will return @entry.
+ * Storing into an existing multislot entry updates the entry of every index.
+ * The marks associated with @index are unaffected unless @entry is %NULL.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.  May sleep
+ * if the @gfp flags permit.
+ * Return: %NULL on success, xa_err(-EINVAL) if @entry cannot be stored in
+ * an XArray, or xa_err(-ENOMEM) if memory allocation failed.
+ */
+void *xa_store_range(struct xarray *xa, unsigned long first,
+		unsigned long last, void *entry, gfp_t gfp)
+{
+	XA_STATE(xas, xa, 0);
+
+	if (WARN_ON_ONCE(xa_is_internal(entry)))
+		return XA_ERROR(-EINVAL);
+	if (last < first)
+		return XA_ERROR(-EINVAL);
+
+	do {
+		xas_lock(&xas);
+		if (entry) {
+			unsigned int order = (last == ~0UL) ? 64 :
+						ilog2(last + 1);
+			xas_set_order(&xas, last, order);
+			xas_create(&xas);
+			if (xas_error(&xas))
+				goto unlock;
+		}
+		do {
+			xas_set_range(&xas, first, last);
+			xas_store(&xas, entry);
+			if (xas_error(&xas))
+				goto unlock;
+			first += xas_size(&xas);
+		} while (first <= last);
+unlock:
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, gfp));
+
+	return xas_result(&xas, NULL);
+}
+EXPORT_SYMBOL(xa_store_range);
+#endif /* CONFIG_XARRAY_MULTI */
+
 /**
  * __xa_alloc() - Find somewhere to store this entry in the XArray.
  * @xa: XArray.
-- 
cgit v1.2.3-71-gd317


From 3a08cd52c37c793ffc199f6fc2ecfc368e284b2d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Sat, 22 Sep 2018 16:14:30 -0400
Subject: radix tree: Remove multiorder support

All users have now been converted to the XArray.  Removing the support
reduces code size and ensures new users will use the XArray instead.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h                    |  40 +----
 lib/Kconfig                                   |   4 -
 lib/radix-tree.c                              | 215 ++------------------------
 mm/Kconfig                                    |   4 +-
 tools/testing/radix-tree/generated/autoconf.h |   1 -
 5 files changed, 19 insertions(+), 245 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 9a1460488163..06c4c7a6c09c 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -96,7 +96,6 @@ static inline bool radix_tree_empty(const struct radix_tree_root *root)
  * @next_index:	one beyond the last index for this chunk
  * @tags:	bit-mask for tag-iterating
  * @node:	node that contains current slot
- * @shift:	shift for the node that holds our slots
  *
  * This radix tree iterator works in terms of "chunks" of slots.  A chunk is a
  * subinterval of slots contained within one radix tree leaf node.  It is
@@ -110,20 +109,8 @@ struct radix_tree_iter {
 	unsigned long	next_index;
 	unsigned long	tags;
 	struct radix_tree_node *node;
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	unsigned int	shift;
-#endif
 };
 
-static inline unsigned int iter_shift(const struct radix_tree_iter *iter)
-{
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	return iter->shift;
-#else
-	return 0;
-#endif
-}
-
 /**
  * Radix-tree synchronization
  *
@@ -230,13 +217,8 @@ static inline int radix_tree_exception(void *arg)
 	return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
 }
 
-int __radix_tree_insert(struct radix_tree_root *, unsigned long index,
-			unsigned order, void *);
-static inline int radix_tree_insert(struct radix_tree_root *root,
-			unsigned long index, void *entry)
-{
-	return __radix_tree_insert(root, index, 0, entry);
-}
+int radix_tree_insert(struct radix_tree_root *, unsigned long index,
+			void *);
 void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index,
 			  struct radix_tree_node **nodep, void __rcu ***slotp);
 void *radix_tree_lookup(const struct radix_tree_root *, unsigned long);
@@ -384,7 +366,7 @@ void __rcu **radix_tree_iter_retry(struct radix_tree_iter *iter)
 static inline unsigned long
 __radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots)
 {
-	return iter->index + (slots << iter_shift(iter));
+	return iter->index + slots;
 }
 
 /**
@@ -409,20 +391,8 @@ void __rcu **__must_check radix_tree_iter_resume(void __rcu **slot,
 static __always_inline long
 radix_tree_chunk_size(struct radix_tree_iter *iter)
 {
-	return (iter->next_index - iter->index) >> iter_shift(iter);
-}
-
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-void __rcu **__radix_tree_next_slot(void __rcu **slot,
-				struct radix_tree_iter *iter, unsigned flags);
-#else
-/* Can't happen without sibling entries, but the compiler can't tell that */
-static inline void __rcu **__radix_tree_next_slot(void __rcu **slot,
-				struct radix_tree_iter *iter, unsigned flags)
-{
-	return slot;
+	return iter->next_index - iter->index;
 }
-#endif
 
 /**
  * radix_tree_next_slot - find next slot in chunk
@@ -482,8 +452,6 @@ static __always_inline void __rcu **radix_tree_next_slot(void __rcu **slot,
 	return NULL;
 
  found:
-	if (unlikely(radix_tree_is_internal_node(rcu_dereference_raw(*slot))))
-		return __radix_tree_next_slot(slot, iter, flags);
 	return slot;
 }
 
diff --git a/lib/Kconfig b/lib/Kconfig
index 40bfa6ccd294..a9965f4af4dd 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -405,10 +405,6 @@ config XARRAY_MULTI
 	  Support entries which occupy multiple consecutive indices in the
 	  XArray.
 
-config RADIX_TREE_MULTIORDER
-	bool
-	select XARRAY_MULTI
-
 config ASSOCIATIVE_ARRAY
 	bool
 	help
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index f107dd2698e3..1106bb6aa01e 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -110,11 +110,6 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
 	unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
 	void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
 
-	if (xa_is_sibling(entry)) {
-		offset = xa_to_sibling(entry);
-		entry = rcu_dereference_raw(parent->slots[offset]);
-	}
-
 	*nodep = (void *)entry;
 	return offset;
 }
@@ -229,7 +224,7 @@ radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
 
 static unsigned int iter_offset(const struct radix_tree_iter *iter)
 {
-	return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK;
+	return iter->index & RADIX_TREE_MAP_MASK;
 }
 
 /*
@@ -506,16 +501,13 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root)
 
 		/*
 		 * The candidate node has more than one child, or its child
-		 * is not at the leftmost slot, or the child is a multiorder
-		 * entry, we cannot shrink.
+		 * is not at the leftmost slot, we cannot shrink.
 		 */
 		if (node->count != 1)
 			break;
 		child = rcu_dereference_raw(node->slots[0]);
 		if (!child)
 			break;
-		if (!radix_tree_is_internal_node(child) && node->shift)
-			break;
 
 		/*
 		 * For an IDR, we must not shrink entry 0 into the root in
@@ -613,7 +605,6 @@ static bool delete_node(struct radix_tree_root *root,
  *	__radix_tree_create	-	create a slot in a radix tree
  *	@root:		radix tree root
  *	@index:		index key
- *	@order:		index occupies 2^order aligned slots
  *	@nodep:		returns node
  *	@slotp:		returns slot
  *
@@ -627,21 +618,19 @@ static bool delete_node(struct radix_tree_root *root,
  *	Returns -ENOMEM, or 0 for success.
  */
 static int __radix_tree_create(struct radix_tree_root *root,
-		unsigned long index, unsigned order,
-		struct radix_tree_node **nodep, void __rcu ***slotp)
+		unsigned long index, struct radix_tree_node **nodep,
+		void __rcu ***slotp)
 {
 	struct radix_tree_node *node = NULL, *child;
 	void __rcu **slot = (void __rcu **)&root->xa_head;
 	unsigned long maxindex;
 	unsigned int shift, offset = 0;
-	unsigned long max = index | ((1UL << order) - 1);
+	unsigned long max = index;
 	gfp_t gfp = root_gfp_mask(root);
 
 	shift = radix_tree_load_root(root, &child, &maxindex);
 
 	/* Make sure the tree is high enough.  */
-	if (order > 0 && max == ((1UL << order) - 1))
-		max++;
 	if (max > maxindex) {
 		int error = radix_tree_extend(root, gfp, max, shift);
 		if (error < 0)
@@ -650,7 +639,7 @@ static int __radix_tree_create(struct radix_tree_root *root,
 		child = rcu_dereference_raw(root->xa_head);
 	}
 
-	while (shift > order) {
+	while (shift > 0) {
 		shift -= RADIX_TREE_MAP_SHIFT;
 		if (child == NULL) {
 			/* Have to add a child node.  */
@@ -711,70 +700,8 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
 	}
 }
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-static inline int insert_entries(struct radix_tree_node *node,
-		void __rcu **slot, void *item, unsigned order, bool replace)
-{
-	void *sibling;
-	unsigned i, n, tag, offset, tags = 0;
-
-	if (node) {
-		if (order > node->shift)
-			n = 1 << (order - node->shift);
-		else
-			n = 1;
-		offset = get_slot_offset(node, slot);
-	} else {
-		n = 1;
-		offset = 0;
-	}
-
-	if (n > 1) {
-		offset = offset & ~(n - 1);
-		slot = &node->slots[offset];
-	}
-	sibling = xa_mk_sibling(offset);
-
-	for (i = 0; i < n; i++) {
-		if (slot[i]) {
-			if (replace) {
-				node->count--;
-				for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-					if (tag_get(node, tag, offset + i))
-						tags |= 1 << tag;
-			} else
-				return -EEXIST;
-		}
-	}
-
-	for (i = 0; i < n; i++) {
-		struct radix_tree_node *old = rcu_dereference_raw(slot[i]);
-		if (i) {
-			rcu_assign_pointer(slot[i], sibling);
-			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-				if (tags & (1 << tag))
-					tag_clear(node, tag, offset + i);
-		} else {
-			rcu_assign_pointer(slot[i], item);
-			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-				if (tags & (1 << tag))
-					tag_set(node, tag, offset);
-		}
-		if (xa_is_node(old))
-			radix_tree_free_nodes(old);
-		if (xa_is_value(old))
-			node->nr_values--;
-	}
-	if (node) {
-		node->count += n;
-		if (xa_is_value(item))
-			node->nr_values += n;
-	}
-	return n;
-}
-#else
 static inline int insert_entries(struct radix_tree_node *node,
-		void __rcu **slot, void *item, unsigned order, bool replace)
+		void __rcu **slot, void *item, bool replace)
 {
 	if (*slot)
 		return -EEXIST;
@@ -786,19 +713,17 @@ static inline int insert_entries(struct radix_tree_node *node,
 	}
 	return 1;
 }
-#endif
 
 /**
  *	__radix_tree_insert    -    insert into a radix tree
  *	@root:		radix tree root
  *	@index:		index key
- *	@order:		key covers the 2^order indices around index
  *	@item:		item to insert
  *
  *	Insert an item into the radix tree at position @index.
  */
-int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
-			unsigned order, void *item)
+int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
+			void *item)
 {
 	struct radix_tree_node *node;
 	void __rcu **slot;
@@ -806,11 +731,11 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
 
 	BUG_ON(radix_tree_is_internal_node(item));
 
-	error = __radix_tree_create(root, index, order, &node, &slot);
+	error = __radix_tree_create(root, index, &node, &slot);
 	if (error)
 		return error;
 
-	error = insert_entries(node, slot, item, order, false);
+	error = insert_entries(node, slot, item, false);
 	if (error < 0)
 		return error;
 
@@ -825,7 +750,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
 
 	return 0;
 }
-EXPORT_SYMBOL(__radix_tree_insert);
+EXPORT_SYMBOL(radix_tree_insert);
 
 /**
  *	__radix_tree_lookup	-	lookup an item in a radix tree
@@ -917,32 +842,12 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
-static inline void replace_sibling_entries(struct radix_tree_node *node,
-				void __rcu **slot, int count, int values)
-{
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	unsigned offset = get_slot_offset(node, slot);
-	void *ptr = xa_mk_sibling(offset);
-
-	while (++offset < RADIX_TREE_MAP_SIZE) {
-		if (rcu_dereference_raw(node->slots[offset]) != ptr)
-			break;
-		if (count < 0) {
-			node->slots[offset] = NULL;
-			node->count--;
-		}
-		node->nr_values += values;
-	}
-#endif
-}
-
 static void replace_slot(void __rcu **slot, void *item,
 		struct radix_tree_node *node, int count, int values)
 {
 	if (node && (count || values)) {
 		node->count += count;
 		node->nr_values += values;
-		replace_sibling_entries(node, slot, count, values);
 	}
 
 	rcu_assign_pointer(*slot, item);
@@ -1223,14 +1128,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_tag_get);
 
-static inline void __set_iter_shift(struct radix_tree_iter *iter,
-					unsigned int shift)
-{
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	iter->shift = shift;
-#endif
-}
-
 /* Construct iter->tags bit-mask from node->tags[tag] array */
 static void set_iter_tags(struct radix_tree_iter *iter,
 				struct radix_tree_node *node, unsigned offset,
@@ -1257,92 +1154,11 @@ static void set_iter_tags(struct radix_tree_iter *iter,
 	}
 }
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-static void __rcu **skip_siblings(struct radix_tree_node **nodep,
-			void __rcu **slot, struct radix_tree_iter *iter)
-{
-	while (iter->index < iter->next_index) {
-		*nodep = rcu_dereference_raw(*slot);
-		if (*nodep && !xa_is_sibling(*nodep))
-			return slot;
-		slot++;
-		iter->index = __radix_tree_iter_add(iter, 1);
-		iter->tags >>= 1;
-	}
-
-	*nodep = NULL;
-	return NULL;
-}
-
-void __rcu **__radix_tree_next_slot(void __rcu **slot,
-				struct radix_tree_iter *iter, unsigned flags)
-{
-	unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
-	struct radix_tree_node *node;
-
-	slot = skip_siblings(&node, slot, iter);
-
-	while (radix_tree_is_internal_node(node)) {
-		unsigned offset;
-		unsigned long next_index;
-
-		if (node == RADIX_TREE_RETRY)
-			return slot;
-		node = entry_to_node(node);
-		iter->node = node;
-		iter->shift = node->shift;
-
-		if (flags & RADIX_TREE_ITER_TAGGED) {
-			offset = radix_tree_find_next_bit(node, tag, 0);
-			if (offset == RADIX_TREE_MAP_SIZE)
-				return NULL;
-			slot = &node->slots[offset];
-			iter->index = __radix_tree_iter_add(iter, offset);
-			set_iter_tags(iter, node, offset, tag);
-			node = rcu_dereference_raw(*slot);
-		} else {
-			offset = 0;
-			slot = &node->slots[0];
-			for (;;) {
-				node = rcu_dereference_raw(*slot);
-				if (node)
-					break;
-				slot++;
-				offset++;
-				if (offset == RADIX_TREE_MAP_SIZE)
-					return NULL;
-			}
-			iter->index = __radix_tree_iter_add(iter, offset);
-		}
-		if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0))
-			goto none;
-		next_index = (iter->index | shift_maxindex(iter->shift)) + 1;
-		if (next_index < iter->next_index)
-			iter->next_index = next_index;
-	}
-
-	return slot;
- none:
-	iter->next_index = 0;
-	return NULL;
-}
-EXPORT_SYMBOL(__radix_tree_next_slot);
-#else
-static void __rcu **skip_siblings(struct radix_tree_node **nodep,
-			void __rcu **slot, struct radix_tree_iter *iter)
-{
-	return slot;
-}
-#endif
-
 void __rcu **radix_tree_iter_resume(void __rcu **slot,
 					struct radix_tree_iter *iter)
 {
-	struct radix_tree_node *node;
-
 	slot++;
 	iter->index = __radix_tree_iter_add(iter, 1);
-	skip_siblings(&node, slot, iter);
 	iter->next_index = iter->index;
 	iter->tags = 0;
 	return NULL;
@@ -1393,7 +1209,6 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
 		iter->next_index = maxindex + 1;
 		iter->tags = 1;
 		iter->node = NULL;
-		__set_iter_shift(iter, 0);
 		return (void __rcu **)&root->xa_head;
 	}
 
@@ -1414,8 +1229,6 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
 				while (++offset	< RADIX_TREE_MAP_SIZE) {
 					void *slot = rcu_dereference_raw(
 							node->slots[offset]);
-					if (xa_is_sibling(slot))
-						continue;
 					if (slot)
 						break;
 				}
@@ -1436,10 +1249,9 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
 	} while (node->shift && radix_tree_is_internal_node(child));
 
 	/* Update the iterator state */
-	iter->index = (index &~ node_maxindex(node)) | (offset << node->shift);
+	iter->index = (index &~ node_maxindex(node)) | offset;
 	iter->next_index = (index | node_maxindex(node)) + 1;
 	iter->node = node;
-	__set_iter_shift(iter, node->shift);
 
 	if (flags & RADIX_TREE_ITER_TAGGED)
 		set_iter_tags(iter, node, offset, tag);
@@ -1750,7 +1562,6 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
 	else
 		iter->next_index = 1;
 	iter->node = node;
-	__set_iter_shift(iter, shift);
 	set_iter_tags(iter, node, offset, IDR_FREE);
 
 	return slot;
diff --git a/mm/Kconfig b/mm/Kconfig
index de64ea658716..02301a89089e 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -379,7 +379,7 @@ config TRANSPARENT_HUGEPAGE
 	bool "Transparent Hugepage Support"
 	depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select COMPACTION
-	select RADIX_TREE_MULTIORDER
+	select XARRAY_MULTI
 	help
 	  Transparent Hugepages allows the kernel to use huge pages and
 	  huge tlb transparently to the applications whenever possible.
@@ -671,7 +671,7 @@ config ZONE_DEVICE
 	depends on MEMORY_HOTREMOVE
 	depends on SPARSEMEM_VMEMMAP
 	depends on ARCH_HAS_ZONE_DEVICE
-	select RADIX_TREE_MULTIORDER
+	select XARRAY_MULTI
 
 	help
 	  Device memory hotplug support allows for establishing pmem,
diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h
index ca8e03ad19ac..2218b3cc184e 100644
--- a/tools/testing/radix-tree/generated/autoconf.h
+++ b/tools/testing/radix-tree/generated/autoconf.h
@@ -1,2 +1 @@
-#define CONFIG_RADIX_TREE_MULTIORDER 1
 #define CONFIG_XARRAY_MULTI 1
-- 
cgit v1.2.3-71-gd317


From 94e6992bb560be8bffb47f287194adf070b57695 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 26 Sep 2018 18:03:16 +0200
Subject: libceph: bump CEPH_MSG_MAX_DATA_LEN

If the read is large enough, we end up spinning in the messenger:

  libceph: osd0 192.168.122.1:6801 io error
  libceph: osd0 192.168.122.1:6801 io error
  libceph: osd0 192.168.122.1:6801 io error

This is a receive side limit, so only reads were affected.

Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 49c93b9308d7..68bb09c29ce8 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -81,7 +81,13 @@ struct ceph_options {
 
 #define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
 #define CEPH_MSG_MAX_MIDDLE_LEN	(16*1024*1024)
-#define CEPH_MSG_MAX_DATA_LEN	(16*1024*1024)
+
+/*
+ * Handle the largest possible rbd object in one message.
+ * There is no limit on the size of cephfs objects, but it has to obey
+ * rsize and wsize mount options anyway.
+ */
+#define CEPH_MSG_MAX_DATA_LEN	(32*1024*1024)
 
 #define CEPH_AUTH_NAME_DEFAULT   "guest"
 
-- 
cgit v1.2.3-71-gd317


From 24639ce56040a8ea890ad8836068c1ad8bd177c7 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 26 Sep 2018 19:12:07 +0200
Subject: libceph: osd_req_op_cls_init() doesn't need to take opcode

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c             | 3 +--
 include/linux/ceph/osd_client.h | 5 ++---
 net/ceph/osd_client.c           | 9 ++++-----
 3 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index f2fe692dda40..9cc7ee3b427f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2374,8 +2374,7 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
 	if (!obj_req->osd_req)
 		return -ENOMEM;
 
-	ret = osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd",
-				  "copyup");
+	ret = osd_req_op_cls_init(obj_req->osd_req, 0, "rbd", "copyup");
 	if (ret)
 		return ret;
 
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 02096da01845..f3e828460c91 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -444,9 +444,8 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
 					struct page **pages, u64 length,
 					u32 alignment, bool pages_from_pool,
 					bool own_pages);
-extern int osd_req_op_cls_init(struct ceph_osd_request *osd_req,
-					unsigned int which, u16 opcode,
-					const char *class, const char *method);
+int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
+			const char *class, const char *method);
 extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
 				 u16 opcode, const char *name, const void *value,
 				 size_t size, u8 cmp_op, u8 cmp_mode);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 60934bd8796c..a871b234cd90 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -767,15 +767,14 @@ void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
 EXPORT_SYMBOL(osd_req_op_extent_dup_last);
 
 int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
-			u16 opcode, const char *class, const char *method)
+			const char *class, const char *method)
 {
-	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
-						      opcode, 0);
+	struct ceph_osd_req_op *op;
 	struct ceph_pagelist *pagelist;
 	size_t payload_len = 0;
 	size_t size;
 
-	BUG_ON(opcode != CEPH_OSD_OP_CALL);
+	op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0);
 
 	pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
 	if (!pagelist)
@@ -4962,7 +4961,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
 	if (ret)
 		goto out_put_req;
 
-	ret = osd_req_op_cls_init(req, 0, CEPH_OSD_OP_CALL, class, method);
+	ret = osd_req_op_cls_init(req, 0, class, method);
 	if (ret)
 		goto out_put_req;
 
-- 
cgit v1.2.3-71-gd317


From 33165d472310262d8c79c7e4d1a17dc60cea7e35 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 28 Sep 2018 15:38:34 +0200
Subject: libceph: introduce ceph_pagelist_alloc()

struct ceph_pagelist cannot be embedded into anything else because it
has its own refcount.  Merge allocation and initialization together.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/acl.c                 |  3 +--
 fs/ceph/mds_client.c          |  3 +--
 fs/ceph/xattr.c               |  3 +--
 include/linux/ceph/pagelist.h | 11 +----------
 net/ceph/osd_client.c         | 14 ++++----------
 net/ceph/pagelist.c           | 20 ++++++++++++++++++++
 6 files changed, 28 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 8a9b562ae4ca..5f0103f40079 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -206,10 +206,9 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
 	tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL);
 	if (!tmp_buf)
 		goto out_err;
-	pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_KERNEL);
+	pagelist = ceph_pagelist_alloc(GFP_KERNEL);
 	if (!pagelist)
 		goto out_err;
-	ceph_pagelist_init(pagelist);
 
 	err = ceph_pagelist_reserve(pagelist, PAGE_SIZE);
 	if (err)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index bc43c822426a..580a79b9a91f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3126,10 +3126,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 
 	pr_info("mds%d reconnect start\n", mds);
 
-	pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+	pagelist = ceph_pagelist_alloc(GFP_NOFS);
 	if (!pagelist)
 		goto fail_nopagelist;
-	ceph_pagelist_init(pagelist);
 
 	reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS, false);
 	if (!reply)
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 5cc8b94f8206..316f6ad10644 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -951,11 +951,10 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
 
 	if (size > 0) {
 		/* copy value into pagelist */
-		pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+		pagelist = ceph_pagelist_alloc(GFP_NOFS);
 		if (!pagelist)
 			return -ENOMEM;
 
-		ceph_pagelist_init(pagelist);
 		err = ceph_pagelist_append(pagelist, value, size);
 		if (err)
 			goto out;
diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index d0223364349f..5dead8486fd8 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -23,16 +23,7 @@ struct ceph_pagelist_cursor {
 	size_t room;		    /* room remaining to reset to */
 };
 
-static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
-{
-	INIT_LIST_HEAD(&pl->head);
-	pl->mapped_tail = NULL;
-	pl->length = 0;
-	pl->room = 0;
-	INIT_LIST_HEAD(&pl->free_list);
-	pl->num_pages_free = 0;
-	refcount_set(&pl->refcnt, 1);
-}
+struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags);
 
 extern void ceph_pagelist_release(struct ceph_pagelist *pl);
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a871b234cd90..db2ebc9e5f1f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -776,12 +776,10 @@ int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
 
 	op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0);
 
-	pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
+	pagelist = ceph_pagelist_alloc(GFP_NOFS);
 	if (!pagelist)
 		return -ENOMEM;
 
-	ceph_pagelist_init(pagelist);
-
 	op->cls.class_name = class;
 	size = strlen(class);
 	BUG_ON(size > (size_t) U8_MAX);
@@ -814,12 +812,10 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
 
 	BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR);
 
-	pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+	pagelist = ceph_pagelist_alloc(GFP_NOFS);
 	if (!pagelist)
 		return -ENOMEM;
 
-	ceph_pagelist_init(pagelist);
-
 	payload_len = strlen(name);
 	op->xattr.name_len = payload_len;
 	ceph_pagelist_append(pagelist, name, payload_len);
@@ -4598,11 +4594,10 @@ static int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which,
 
 	op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0);
 
-	pl = kmalloc(sizeof(*pl), GFP_NOIO);
+	pl = ceph_pagelist_alloc(GFP_NOIO);
 	if (!pl)
 		return -ENOMEM;
 
-	ceph_pagelist_init(pl);
 	ret = ceph_pagelist_encode_64(pl, notify_id);
 	ret |= ceph_pagelist_encode_64(pl, cookie);
 	if (payload) {
@@ -4669,11 +4664,10 @@ static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
 	op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
 	op->notify.cookie = cookie;
 
-	pl = kmalloc(sizeof(*pl), GFP_NOIO);
+	pl = ceph_pagelist_alloc(GFP_NOIO);
 	if (!pl)
 		return -ENOMEM;
 
-	ceph_pagelist_init(pl);
 	ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
 	ret |= ceph_pagelist_encode_32(pl, timeout);
 	ret |= ceph_pagelist_encode_32(pl, payload_len);
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 2ea0564771d2..65e34f78b05d 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -6,6 +6,26 @@
 #include <linux/highmem.h>
 #include <linux/ceph/pagelist.h>
 
+struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags)
+{
+	struct ceph_pagelist *pl;
+
+	pl = kmalloc(sizeof(*pl), gfp_flags);
+	if (!pl)
+		return NULL;
+
+	INIT_LIST_HEAD(&pl->head);
+	pl->mapped_tail = NULL;
+	pl->length = 0;
+	pl->room = 0;
+	INIT_LIST_HEAD(&pl->free_list);
+	pl->num_pages_free = 0;
+	refcount_set(&pl->refcnt, 1);
+
+	return pl;
+}
+EXPORT_SYMBOL(ceph_pagelist_alloc);
+
 static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
 {
 	if (pl->mapped_tail) {
-- 
cgit v1.2.3-71-gd317


From 0d9c1ab3be4c0187663096a6a084421d0a1e45c6 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 15 Oct 2018 17:38:23 +0200
Subject: libceph: preallocate message data items

Currently message data items are allocated with ceph_msg_data_create()
in setup_request_data() inside send_request().  send_request() has never
been allowed to fail, so each allocation is followed by a BUG_ON:

  data = ceph_msg_data_create(...);
  BUG_ON(!data);

It's been this way since support for multiple message data items was
added in commit 6644ed7b7e04 ("libceph: make message data be a pointer")
in 3.10.

There is no reason to delay the allocation of message data items until
the last possible moment and we certainly don't need a linked list of
them as they are only ever appended to the end and never erased.  Make
ceph_msg_new2() take max_data_items and adapt the rest of the code.

Reported-by: Jerry Lee <leisurelysw24@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c           |   4 +-
 include/linux/ceph/messenger.h |  24 ++--------
 include/linux/ceph/msgpool.h   |  11 +++--
 net/ceph/messenger.c           | 106 +++++++++++++++--------------------------
 net/ceph/msgpool.c             |  25 ++++++----
 net/ceph/osd_client.c          | 102 +++++++++++++++++++++++++++++++++------
 6 files changed, 157 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 97de674ea377..67a9aeb2f4ec 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2071,7 +2071,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 	if (req->r_old_dentry_drop)
 		len += req->r_old_dentry->d_name.len;
 
-	msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS, false);
+	msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false);
 	if (!msg) {
 		msg = ERR_PTR(-ENOMEM);
 		goto out_free2;
@@ -3129,7 +3129,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	if (!pagelist)
 		goto fail_nopagelist;
 
-	reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS, false);
+	reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false);
 	if (!reply)
 		goto fail_nomsg;
 
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index fc2b4491ee0a..800a2128d411 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -82,22 +82,6 @@ enum ceph_msg_data_type {
 	CEPH_MSG_DATA_BVECS,	/* data source/destination is a bio_vec array */
 };
 
-static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
-{
-	switch (type) {
-	case CEPH_MSG_DATA_NONE:
-	case CEPH_MSG_DATA_PAGES:
-	case CEPH_MSG_DATA_PAGELIST:
-#ifdef CONFIG_BLOCK
-	case CEPH_MSG_DATA_BIO:
-#endif /* CONFIG_BLOCK */
-	case CEPH_MSG_DATA_BVECS:
-		return true;
-	default:
-		return false;
-	}
-}
-
 #ifdef CONFIG_BLOCK
 
 struct ceph_bio_iter {
@@ -181,7 +165,6 @@ struct ceph_bvec_iter {
 } while (0)
 
 struct ceph_msg_data {
-	struct list_head		links;	/* ceph_msg->data */
 	enum ceph_msg_data_type		type;
 	union {
 #ifdef CONFIG_BLOCK
@@ -202,7 +185,6 @@ struct ceph_msg_data {
 
 struct ceph_msg_data_cursor {
 	size_t			total_resid;	/* across all data items */
-	struct list_head	*data_head;	/* = &ceph_msg->data */
 
 	struct ceph_msg_data	*data;		/* current data item */
 	size_t			resid;		/* bytes not yet consumed */
@@ -240,7 +222,9 @@ struct ceph_msg {
 	struct ceph_buffer *middle;
 
 	size_t				data_length;
-	struct list_head		data;
+	struct ceph_msg_data		*data;
+	int				num_data_items;
+	int				max_data_items;
 	struct ceph_msg_data_cursor	cursor;
 
 	struct ceph_connection *con;
@@ -381,6 +365,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
 void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
 			     struct ceph_bvec_iter *bvec_pos);
 
+struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
+			       gfp_t flags, bool can_fail);
 extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 				     bool can_fail);
 
diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
index 76c98a512758..729cdf700eae 100644
--- a/include/linux/ceph/msgpool.h
+++ b/include/linux/ceph/msgpool.h
@@ -13,14 +13,15 @@ struct ceph_msgpool {
 	mempool_t *pool;
 	int type;               /* preallocated message type */
 	int front_len;          /* preallocated payload size */
+	int max_data_items;
 };
 
-extern int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
-			     int front_len, int size, bool blocking,
-			     const char *name);
+int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
+		      int front_len, int max_data_items, int size,
+		      const char *name);
 extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
-extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *,
-					 int front_len);
+struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len,
+				  int max_data_items);
 extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *);
 
 #endif
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 76684edc43ef..88e35830198c 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -156,7 +156,6 @@ static bool con_flag_test_and_set(struct ceph_connection *con,
 /* Slab caches for frequently-allocated structures */
 
 static struct kmem_cache	*ceph_msg_cache;
-static struct kmem_cache	*ceph_msg_data_cache;
 
 /* static tag bytes (protocol control messages) */
 static char tag_msg = CEPH_MSGR_TAG_MSG;
@@ -235,23 +234,11 @@ static int ceph_msgr_slab_init(void)
 	if (!ceph_msg_cache)
 		return -ENOMEM;
 
-	BUG_ON(ceph_msg_data_cache);
-	ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0);
-	if (ceph_msg_data_cache)
-		return 0;
-
-	kmem_cache_destroy(ceph_msg_cache);
-	ceph_msg_cache = NULL;
-
-	return -ENOMEM;
+	return 0;
 }
 
 static void ceph_msgr_slab_exit(void)
 {
-	BUG_ON(!ceph_msg_data_cache);
-	kmem_cache_destroy(ceph_msg_data_cache);
-	ceph_msg_data_cache = NULL;
-
 	BUG_ON(!ceph_msg_cache);
 	kmem_cache_destroy(ceph_msg_cache);
 	ceph_msg_cache = NULL;
@@ -1141,16 +1128,13 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
 static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
 {
 	struct ceph_msg_data_cursor *cursor = &msg->cursor;
-	struct ceph_msg_data *data;
 
 	BUG_ON(!length);
 	BUG_ON(length > msg->data_length);
-	BUG_ON(list_empty(&msg->data));
+	BUG_ON(!msg->num_data_items);
 
-	cursor->data_head = &msg->data;
 	cursor->total_resid = length;
-	data = list_first_entry(&msg->data, struct ceph_msg_data, links);
-	cursor->data = data;
+	cursor->data = msg->data;
 
 	__ceph_msg_data_cursor_init(cursor);
 }
@@ -1231,8 +1215,7 @@ static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
 
 	if (!cursor->resid && cursor->total_resid) {
 		WARN_ON(!cursor->last_piece);
-		BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
-		cursor->data = list_next_entry(cursor->data, links);
+		cursor->data++;
 		__ceph_msg_data_cursor_init(cursor);
 		new_piece = true;
 	}
@@ -1248,9 +1231,6 @@ static size_t sizeof_footer(struct ceph_connection *con)
 
 static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
 {
-	BUG_ON(!msg);
-	BUG_ON(!data_len);
-
 	/* Initialize data cursor */
 
 	ceph_msg_data_cursor_init(msg, (size_t)data_len);
@@ -1590,7 +1570,7 @@ static int write_partial_message_data(struct ceph_connection *con)
 
 	dout("%s %p msg %p\n", __func__, con, msg);
 
-	if (list_empty(&msg->data))
+	if (!msg->num_data_items)
 		return -EINVAL;
 
 	/*
@@ -2347,8 +2327,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
 	u32 crc = 0;
 	int ret;
 
-	BUG_ON(!msg);
-	if (list_empty(&msg->data))
+	if (!msg->num_data_items)
 		return -EIO;
 
 	if (do_datacrc)
@@ -3256,32 +3235,16 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con,
 	return false;
 }
 
-static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
+static struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg)
 {
-	struct ceph_msg_data *data;
-
-	if (WARN_ON(!ceph_msg_data_type_valid(type)))
-		return NULL;
-
-	data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
-	if (!data)
-		return NULL;
-
-	data->type = type;
-	INIT_LIST_HEAD(&data->links);
-
-	return data;
+	BUG_ON(msg->num_data_items >= msg->max_data_items);
+	return &msg->data[msg->num_data_items++];
 }
 
 static void ceph_msg_data_destroy(struct ceph_msg_data *data)
 {
-	if (!data)
-		return;
-
-	WARN_ON(!list_empty(&data->links));
 	if (data->type == CEPH_MSG_DATA_PAGELIST)
 		ceph_pagelist_release(data->pagelist);
-	kmem_cache_free(ceph_msg_data_cache, data);
 }
 
 void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
@@ -3292,13 +3255,12 @@ void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
 	BUG_ON(!pages);
 	BUG_ON(!length);
 
-	data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES);
-	BUG_ON(!data);
+	data = ceph_msg_data_add(msg);
+	data->type = CEPH_MSG_DATA_PAGES;
 	data->pages = pages;
 	data->length = length;
 	data->alignment = alignment & ~PAGE_MASK;
 
-	list_add_tail(&data->links, &msg->data);
 	msg->data_length += length;
 }
 EXPORT_SYMBOL(ceph_msg_data_add_pages);
@@ -3311,12 +3273,11 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
 	BUG_ON(!pagelist);
 	BUG_ON(!pagelist->length);
 
-	data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST);
-	BUG_ON(!data);
+	data = ceph_msg_data_add(msg);
+	data->type = CEPH_MSG_DATA_PAGELIST;
 	refcount_inc(&pagelist->refcnt);
 	data->pagelist = pagelist;
 
-	list_add_tail(&data->links, &msg->data);
 	msg->data_length += pagelist->length;
 }
 EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
@@ -3327,12 +3288,11 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
 {
 	struct ceph_msg_data *data;
 
-	data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
-	BUG_ON(!data);
+	data = ceph_msg_data_add(msg);
+	data->type = CEPH_MSG_DATA_BIO;
 	data->bio_pos = *bio_pos;
 	data->bio_length = length;
 
-	list_add_tail(&data->links, &msg->data);
 	msg->data_length += length;
 }
 EXPORT_SYMBOL(ceph_msg_data_add_bio);
@@ -3343,11 +3303,10 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
 {
 	struct ceph_msg_data *data;
 
-	data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS);
-	BUG_ON(!data);
+	data = ceph_msg_data_add(msg);
+	data->type = CEPH_MSG_DATA_BVECS;
 	data->bvec_pos = *bvec_pos;
 
-	list_add_tail(&data->links, &msg->data);
 	msg->data_length += bvec_pos->iter.bi_size;
 }
 EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
@@ -3356,8 +3315,8 @@ EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
  * construct a new message with given type, size
  * the new msg has a ref count of 1.
  */
-struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
-			      bool can_fail)
+struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
+			       gfp_t flags, bool can_fail)
 {
 	struct ceph_msg *m;
 
@@ -3371,7 +3330,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 
 	INIT_LIST_HEAD(&m->list_head);
 	kref_init(&m->kref);
-	INIT_LIST_HEAD(&m->data);
 
 	/* front */
 	if (front_len) {
@@ -3386,6 +3344,15 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 	}
 	m->front_alloc_len = m->front.iov_len = front_len;
 
+	if (max_data_items) {
+		m->data = kmalloc_array(max_data_items, sizeof(*m->data),
+					flags);
+		if (!m->data)
+			goto out2;
+
+		m->max_data_items = max_data_items;
+	}
+
 	dout("ceph_msg_new %p front %d\n", m, front_len);
 	return m;
 
@@ -3402,6 +3369,13 @@ out:
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(ceph_msg_new2);
+
+struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
+			      bool can_fail)
+{
+	return ceph_msg_new2(type, front_len, 0, flags, can_fail);
+}
 EXPORT_SYMBOL(ceph_msg_new);
 
 /*
@@ -3497,13 +3471,14 @@ static void ceph_msg_free(struct ceph_msg *m)
 {
 	dout("%s %p\n", __func__, m);
 	kvfree(m->front.iov_base);
+	kfree(m->data);
 	kmem_cache_free(ceph_msg_cache, m);
 }
 
 static void ceph_msg_release(struct kref *kref)
 {
 	struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
-	struct ceph_msg_data *data, *next;
+	int i;
 
 	dout("%s %p\n", __func__, m);
 	WARN_ON(!list_empty(&m->list_head));
@@ -3516,11 +3491,8 @@ static void ceph_msg_release(struct kref *kref)
 		m->middle = NULL;
 	}
 
-	list_for_each_entry_safe(data, next, &m->data, links) {
-		list_del_init(&data->links);
-		ceph_msg_data_destroy(data);
-	}
-	m->data_length = 0;
+	for (i = 0; i < m->num_data_items; i++)
+		ceph_msg_data_destroy(&m->data[i]);
 
 	if (m->pool)
 		ceph_msgpool_put(m->pool, m);
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index 3dddc074f0d7..e3ecb80cd182 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -14,7 +14,8 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
 	struct ceph_msgpool *pool = arg;
 	struct ceph_msg *msg;
 
-	msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true);
+	msg = ceph_msg_new2(pool->type, pool->front_len, pool->max_data_items,
+			    gfp_mask, true);
 	if (!msg) {
 		dout("msgpool_alloc %s failed\n", pool->name);
 	} else {
@@ -35,11 +36,13 @@ static void msgpool_free(void *element, void *arg)
 }
 
 int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
-		      int front_len, int size, bool blocking, const char *name)
+		      int front_len, int max_data_items, int size,
+		      const char *name)
 {
 	dout("msgpool %s init\n", name);
 	pool->type = type;
 	pool->front_len = front_len;
+	pool->max_data_items = max_data_items;
 	pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
 	if (!pool->pool)
 		return -ENOMEM;
@@ -53,18 +56,21 @@ void ceph_msgpool_destroy(struct ceph_msgpool *pool)
 	mempool_destroy(pool->pool);
 }
 
-struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
-				  int front_len)
+struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len,
+				  int max_data_items)
 {
 	struct ceph_msg *msg;
 
-	if (front_len > pool->front_len) {
-		dout("msgpool_get %s need front %d, pool size is %d\n",
-		       pool->name, front_len, pool->front_len);
+	if (front_len > pool->front_len ||
+	    max_data_items > pool->max_data_items) {
+		pr_warn_ratelimited("%s need %d/%d, pool %s has %d/%d\n",
+		    __func__, front_len, max_data_items, pool->name,
+		    pool->front_len, pool->max_data_items);
 		WARN_ON_ONCE(1);
 
 		/* try to alloc a fresh message */
-		return ceph_msg_new(pool->type, front_len, GFP_NOFS, false);
+		return ceph_msg_new2(pool->type, front_len, max_data_items,
+				     GFP_NOFS, false);
 	}
 
 	msg = mempool_alloc(pool->pool, GFP_NOFS);
@@ -80,6 +86,9 @@ void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
 	msg->front.iov_len = pool->front_len;
 	msg->hdr.front_len = cpu_to_le32(pool->front_len);
 
+	msg->data_length = 0;
+	msg->num_data_items = 0;
+
 	kref_init(&msg->kref);  /* retake single ref */
 	mempool_free(msg, pool->pool);
 }
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 7ac7f21ff317..cf0bd2cce848 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -614,12 +614,15 @@ static int ceph_oloc_encoding_size(const struct ceph_object_locator *oloc)
 	return 8 + 4 + 4 + 4 + (oloc->pool_ns ? oloc->pool_ns->len : 0);
 }
 
-int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
+static int __ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp,
+				      int num_request_data_items,
+				      int num_reply_data_items)
 {
 	struct ceph_osd_client *osdc = req->r_osdc;
 	struct ceph_msg *msg;
 	int msg_size;
 
+	WARN_ON(req->r_request || req->r_reply);
 	WARN_ON(ceph_oid_empty(&req->r_base_oid));
 	WARN_ON(ceph_oloc_empty(&req->r_base_oloc));
 
@@ -641,9 +644,11 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
 	msg_size += 4 + 8; /* retry_attempt, features */
 
 	if (req->r_mempool)
-		msg = ceph_msgpool_get(&osdc->msgpool_op, msg_size);
+		msg = ceph_msgpool_get(&osdc->msgpool_op, msg_size,
+				       num_request_data_items);
 	else
-		msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp, true);
+		msg = ceph_msg_new2(CEPH_MSG_OSD_OP, msg_size,
+				    num_request_data_items, gfp, true);
 	if (!msg)
 		return -ENOMEM;
 
@@ -656,9 +661,11 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
 	msg_size += req->r_num_ops * sizeof(struct ceph_osd_op);
 
 	if (req->r_mempool)
-		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, msg_size);
+		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, msg_size,
+				       num_reply_data_items);
 	else
-		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size, gfp, true);
+		msg = ceph_msg_new2(CEPH_MSG_OSD_OPREPLY, msg_size,
+				    num_reply_data_items, gfp, true);
 	if (!msg)
 		return -ENOMEM;
 
@@ -666,7 +673,6 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
 
 	return 0;
 }
-EXPORT_SYMBOL(ceph_osdc_alloc_messages);
 
 static bool osd_req_opcode_valid(u16 opcode)
 {
@@ -679,6 +685,64 @@ __CEPH_FORALL_OSD_OPS(GENERATE_CASE)
 	}
 }
 
+static void get_num_data_items(struct ceph_osd_request *req,
+			       int *num_request_data_items,
+			       int *num_reply_data_items)
+{
+	struct ceph_osd_req_op *op;
+
+	*num_request_data_items = 0;
+	*num_reply_data_items = 0;
+
+	for (op = req->r_ops; op != &req->r_ops[req->r_num_ops]; op++) {
+		switch (op->op) {
+		/* request */
+		case CEPH_OSD_OP_WRITE:
+		case CEPH_OSD_OP_WRITEFULL:
+		case CEPH_OSD_OP_SETXATTR:
+		case CEPH_OSD_OP_CMPXATTR:
+		case CEPH_OSD_OP_NOTIFY_ACK:
+			*num_request_data_items += 1;
+			break;
+
+		/* reply */
+		case CEPH_OSD_OP_STAT:
+		case CEPH_OSD_OP_READ:
+		case CEPH_OSD_OP_LIST_WATCHERS:
+			*num_reply_data_items += 1;
+			break;
+
+		/* both */
+		case CEPH_OSD_OP_NOTIFY:
+			*num_request_data_items += 1;
+			*num_reply_data_items += 1;
+			break;
+		case CEPH_OSD_OP_CALL:
+			*num_request_data_items += 2;
+			*num_reply_data_items += 1;
+			break;
+
+		default:
+			WARN_ON(!osd_req_opcode_valid(op->op));
+			break;
+		}
+	}
+}
+
+/*
+ * oid, oloc and OSD op opcode(s) must be filled in before this function
+ * is called.
+ */
+int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
+{
+	int num_request_data_items, num_reply_data_items;
+
+	get_num_data_items(req, &num_request_data_items, &num_reply_data_items);
+	return __ceph_osdc_alloc_messages(req, gfp, num_request_data_items,
+					  num_reply_data_items);
+}
+EXPORT_SYMBOL(ceph_osdc_alloc_messages);
+
 /*
  * This is an osd op init function for opcodes that have no data or
  * other information associated with them.  It also serves as a
@@ -1035,7 +1099,15 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	if (flags & CEPH_OSD_FLAG_WRITE)
 		req->r_data_offset = off;
 
-	r = ceph_osdc_alloc_messages(req, GFP_NOFS);
+	if (num_ops > 1)
+		/*
+		 * This is a special case for ceph_writepages_start(), but it
+		 * also covers ceph_uninline_data().  If more multi-op request
+		 * use cases emerge, we will need a separate helper.
+		 */
+		r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_ops, 0);
+	else
+		r = ceph_osdc_alloc_messages(req, GFP_NOFS);
 	if (r)
 		goto fail;
 
@@ -1842,13 +1914,16 @@ static bool should_plug_request(struct ceph_osd_request *req)
 	return true;
 }
 
+/*
+ * Keep get_num_data_items() in sync with this function.
+ */
 static void setup_request_data(struct ceph_osd_request *req,
 			       struct ceph_msg *msg)
 {
 	u32 data_len = 0;
 	int i;
 
-	if (!list_empty(&msg->data))
+	if (msg->num_data_items)
 		return;
 
 	WARN_ON(msg->data_length);
@@ -4325,9 +4400,7 @@ static void handle_watch_notify(struct ceph_osd_client *osdc,
 			     lreq->notify_id, notify_id);
 		} else if (!completion_done(&lreq->notify_finish_wait)) {
 			struct ceph_msg_data *data =
-			    list_first_entry_or_null(&msg->data,
-						     struct ceph_msg_data,
-						     links);
+			    msg->num_data_items ? &msg->data[0] : NULL;
 
 			if (data) {
 				if (lreq->preply_pages) {
@@ -5036,11 +5109,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 		goto out_map;
 
 	err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
-				PAGE_SIZE, 10, true, "osd_op");
+				PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10, "osd_op");
 	if (err < 0)
 		goto out_mempool;
 	err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
-				PAGE_SIZE, 10, true, "osd_op_reply");
+				PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10,
+				"osd_op_reply");
 	if (err < 0)
 		goto out_msgpool;
 
@@ -5310,7 +5384,7 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
 	u32 front_len = le32_to_cpu(hdr->front_len);
 	u32 data_len = le32_to_cpu(hdr->data_len);
 
-	m = ceph_msg_new(type, front_len, GFP_NOIO, false);
+	m = ceph_msg_new2(type, front_len, 1, GFP_NOIO, false);
 	if (!m)
 		return NULL;
 
-- 
cgit v1.2.3-71-gd317


From 23ddf9bea900b4ce39e5707e1ddf66062cfe6d91 Mon Sep 17 00:00:00 2001
From: Luis Henriques <lhenriques@suse.com>
Date: Mon, 15 Oct 2018 16:45:58 +0100
Subject: libceph: support the RADOS copy-from operation

Add support for performing remote object copies using the 'copy-from'
operation.

[ Add COPY_FROM to get_num_data_items(). ]

Signed-off-by: Luis Henriques <lhenriques@suse.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h | 17 ++++++++
 include/linux/ceph/rados.h      | 28 +++++++++++++
 net/ceph/osd_client.c           | 90 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index f3e828460c91..7a2af5034278 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -136,6 +136,13 @@ struct ceph_osd_req_op {
 			u64 expected_object_size;
 			u64 expected_write_size;
 		} alloc_hint;
+		struct {
+			u64 snapid;
+			u64 src_version;
+			u8 flags;
+			u32 src_fadvise_flags;
+			struct ceph_osd_data osd_data;
+		} copy_from;
 	};
 };
 
@@ -510,6 +517,16 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
 				struct timespec64 *mtime,
 				struct page **pages, int nr_pages);
 
+int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
+			u64 src_snapid, u64 src_version,
+			struct ceph_object_id *src_oid,
+			struct ceph_object_locator *src_oloc,
+			u32 src_fadvise_flags,
+			struct ceph_object_id *dst_oid,
+			struct ceph_object_locator *dst_oloc,
+			u32 dst_fadvise_flags,
+			u8 copy_from_flags);
+
 /* watch/notify */
 struct ceph_osd_linger_request *
 ceph_osdc_watch(struct ceph_osd_client *osdc,
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index f1988387c5ad..3eb0e55665b4 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -410,6 +410,14 @@ enum {
 enum {
 	CEPH_OSD_OP_FLAG_EXCL = 1,      /* EXCL object create */
 	CEPH_OSD_OP_FLAG_FAILOK = 2,    /* continue despite failure */
+	CEPH_OSD_OP_FLAG_FADVISE_RANDOM	    = 0x4, /* the op is random */
+	CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL = 0x8, /* the op is sequential */
+	CEPH_OSD_OP_FLAG_FADVISE_WILLNEED   = 0x10,/* data will be accessed in
+						      the near future */
+	CEPH_OSD_OP_FLAG_FADVISE_DONTNEED   = 0x20,/* data will not be accessed
+						      in the near future */
+	CEPH_OSD_OP_FLAG_FADVISE_NOCACHE    = 0x40,/* data will be accessed only
+						      once by this client */
 };
 
 #define EOLDSNAPC    ERESTART  /* ORDERSNAP flag set; writer has old snapc*/
@@ -431,6 +439,15 @@ enum {
 	CEPH_OSD_CMPXATTR_MODE_U64    = 2
 };
 
+enum {
+	CEPH_OSD_COPY_FROM_FLAG_FLUSH = 1,       /* part of a flush operation */
+	CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY = 2, /* ignore pool overlay */
+	CEPH_OSD_COPY_FROM_FLAG_IGNORE_CACHE = 4,   /* ignore osd cache logic */
+	CEPH_OSD_COPY_FROM_FLAG_MAP_SNAP_CLONE = 8, /* map snap direct to
+						     * cloneid */
+	CEPH_OSD_COPY_FROM_FLAG_RWORDERED = 16,     /* order with write */
+};
+
 enum {
 	CEPH_OSD_WATCH_OP_UNWATCH = 0,
 	CEPH_OSD_WATCH_OP_LEGACY_WATCH = 1,
@@ -497,6 +514,17 @@ struct ceph_osd_op {
 			__le64 expected_object_size;
 			__le64 expected_write_size;
 		} __attribute__ ((packed)) alloc_hint;
+		struct {
+			__le64 snapid;
+			__le64 src_version;
+			__u8 flags; /* CEPH_OSD_COPY_FROM_FLAG_* */
+			/*
+			 * CEPH_OSD_OP_FLAG_FADVISE_*: fadvise flags
+			 * for src object, flags for dest object are in
+			 * ceph_osd_op::flags.
+			 */
+			__le32 src_fadvise_flags;
+		} __attribute__ ((packed)) copy_from;
 	};
 	__le32 payload_len;
 } __attribute__ ((packed));
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a0148de51cc6..d23a9f81f3d7 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -410,6 +410,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
 	case CEPH_OSD_OP_LIST_WATCHERS:
 		ceph_osd_data_release(&op->list_watchers.response_data);
 		break;
+	case CEPH_OSD_OP_COPY_FROM:
+		ceph_osd_data_release(&op->copy_from.osd_data);
+		break;
 	default:
 		break;
 	}
@@ -702,6 +705,7 @@ static void get_num_data_items(struct ceph_osd_request *req,
 		case CEPH_OSD_OP_SETXATTR:
 		case CEPH_OSD_OP_CMPXATTR:
 		case CEPH_OSD_OP_NOTIFY_ACK:
+		case CEPH_OSD_OP_COPY_FROM:
 			*num_request_data_items += 1;
 			break;
 
@@ -1016,6 +1020,14 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
 	case CEPH_OSD_OP_CREATE:
 	case CEPH_OSD_OP_DELETE:
 		break;
+	case CEPH_OSD_OP_COPY_FROM:
+		dst->copy_from.snapid = cpu_to_le64(src->copy_from.snapid);
+		dst->copy_from.src_version =
+			cpu_to_le64(src->copy_from.src_version);
+		dst->copy_from.flags = src->copy_from.flags;
+		dst->copy_from.src_fadvise_flags =
+			cpu_to_le32(src->copy_from.src_fadvise_flags);
+		break;
 	default:
 		pr_err("unsupported osd opcode %s\n",
 			ceph_osd_op_name(src->op));
@@ -1947,6 +1959,10 @@ static void setup_request_data(struct ceph_osd_request *req)
 			ceph_osdc_msg_data_add(request_msg,
 					       &op->notify_ack.request_data);
 			break;
+		case CEPH_OSD_OP_COPY_FROM:
+			ceph_osdc_msg_data_add(request_msg,
+					       &op->copy_from.osd_data);
+			break;
 
 		/* reply */
 		case CEPH_OSD_OP_STAT:
@@ -5255,6 +5271,80 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 }
 EXPORT_SYMBOL(ceph_osdc_writepages);
 
+static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
+				     u64 src_snapid, u64 src_version,
+				     struct ceph_object_id *src_oid,
+				     struct ceph_object_locator *src_oloc,
+				     u32 src_fadvise_flags,
+				     u32 dst_fadvise_flags,
+				     u8 copy_from_flags)
+{
+	struct ceph_osd_req_op *op;
+	struct page **pages;
+	void *p, *end;
+
+	pages = ceph_alloc_page_vector(1, GFP_KERNEL);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM, dst_fadvise_flags);
+	op->copy_from.snapid = src_snapid;
+	op->copy_from.src_version = src_version;
+	op->copy_from.flags = copy_from_flags;
+	op->copy_from.src_fadvise_flags = src_fadvise_flags;
+
+	p = page_address(pages[0]);
+	end = p + PAGE_SIZE;
+	ceph_encode_string(&p, end, src_oid->name, src_oid->name_len);
+	encode_oloc(&p, end, src_oloc);
+	op->indata_len = PAGE_SIZE - (end - p);
+
+	ceph_osd_data_pages_init(&op->copy_from.osd_data, pages,
+				 op->indata_len, 0, false, true);
+	return 0;
+}
+
+int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
+			u64 src_snapid, u64 src_version,
+			struct ceph_object_id *src_oid,
+			struct ceph_object_locator *src_oloc,
+			u32 src_fadvise_flags,
+			struct ceph_object_id *dst_oid,
+			struct ceph_object_locator *dst_oloc,
+			u32 dst_fadvise_flags,
+			u8 copy_from_flags)
+{
+	struct ceph_osd_request *req;
+	int ret;
+
+	req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	req->r_flags = CEPH_OSD_FLAG_WRITE;
+
+	ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc);
+	ceph_oid_copy(&req->r_t.base_oid, dst_oid);
+
+	ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid,
+					src_oloc, src_fadvise_flags,
+					dst_fadvise_flags, copy_from_flags);
+	if (ret)
+		goto out;
+
+	ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
+	if (ret)
+		goto out;
+
+	ceph_osdc_start_request(osdc, req, false);
+	ret = ceph_osdc_wait_request(osdc, req);
+
+out:
+	ceph_osdc_put_request(req);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_osdc_copy_from);
+
 int __init ceph_osdc_setup(void)
 {
 	size_t size = sizeof(struct ceph_osd_request) +
-- 
cgit v1.2.3-71-gd317


From 721fb6fbfd2132164c2e8777cc837f9b2c1794dc Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 17 Oct 2018 13:07:05 +0200
Subject: fsnotify: Fix busy inodes during unmount

Detaching of mark connector from fsnotify_put_mark() can race with
unmounting of the filesystem like:

  CPU1				CPU2
fsnotify_put_mark()
  spin_lock(&conn->lock);
  ...
  inode = fsnotify_detach_connector_from_object(conn)
  spin_unlock(&conn->lock);
				generic_shutdown_super()
				  fsnotify_unmount_inodes()
				    sees connector detached for inode
				      -> nothing to do
				  evict_inode()
				    barfs on pending inode reference
  iput(inode);

Resulting in "Busy inodes after unmount" message and possible kernel
oops. Make fsnotify_unmount_inodes() properly wait for outstanding inode
references from detached connectors.

Note that the accounting of outstanding inode references in the
superblock can cause some cacheline contention on the counter. OTOH it
happens only during deletion of the last notification mark from an inode
(or during unlinking of watched inode) and that is not too bad. I have
measured time to create & delete inotify watch 100000 times from 64
processes in parallel (each process having its own inotify group and its
own file on a shared superblock) on a 64 CPU machine. Average and
standard deviation of 15 runs look like:

	Avg		Stddev
Vanilla	9.817400	0.276165
Fixed	9.710467	0.228294

So there's no statistically significant difference.

Fixes: 6b3f05d24d35 ("fsnotify: Detach mark from object list when last reference is dropped")
CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fsnotify.c |  3 +++
 fs/notify/mark.c     | 39 +++++++++++++++++++++++++++++++--------
 include/linux/fs.h   |  3 +++
 3 files changed, 37 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 875975504409..2172ba516c61 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -96,6 +96,9 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
 
 	if (iput_inode)
 		iput(iput_inode);
+	/* Wait for outstanding inode references from connectors */
+	wait_var_event(&sb->s_fsnotify_inode_refs,
+		       !atomic_long_read(&sb->s_fsnotify_inode_refs));
 }
 
 void fsnotify_sb_delete(struct super_block *sb)
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index b5172ccb2e60..d2dd16cb5989 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -181,17 +181,20 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
 	}
 }
 
-static struct inode *fsnotify_detach_connector_from_object(
-					struct fsnotify_mark_connector *conn)
+static void *fsnotify_detach_connector_from_object(
+					struct fsnotify_mark_connector *conn,
+					unsigned int *type)
 {
 	struct inode *inode = NULL;
 
+	*type = conn->type;
 	if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED)
 		return NULL;
 
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
 		inode = fsnotify_conn_inode(conn);
 		inode->i_fsnotify_mask = 0;
+		atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs);
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
 		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
@@ -215,10 +218,29 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
 	fsnotify_put_group(group);
 }
 
+/* Drop object reference originally held by a connector */
+static void fsnotify_drop_object(unsigned int type, void *objp)
+{
+	struct inode *inode;
+	struct super_block *sb;
+
+	if (!objp)
+		return;
+	/* Currently only inode references are passed to be dropped */
+	if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
+		return;
+	inode = objp;
+	sb = inode->i_sb;
+	iput(inode);
+	if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs))
+		wake_up_var(&sb->s_fsnotify_inode_refs);
+}
+
 void fsnotify_put_mark(struct fsnotify_mark *mark)
 {
 	struct fsnotify_mark_connector *conn;
-	struct inode *inode = NULL;
+	void *objp = NULL;
+	unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED;
 	bool free_conn = false;
 
 	/* Catch marks that were actually never attached to object */
@@ -238,7 +260,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
 	conn = mark->connector;
 	hlist_del_init_rcu(&mark->obj_list);
 	if (hlist_empty(&conn->list)) {
-		inode = fsnotify_detach_connector_from_object(conn);
+		objp = fsnotify_detach_connector_from_object(conn, &type);
 		free_conn = true;
 	} else {
 		__fsnotify_recalc_mask(conn);
@@ -246,7 +268,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
 	mark->connector = NULL;
 	spin_unlock(&conn->lock);
 
-	iput(inode);
+	fsnotify_drop_object(type, objp);
 
 	if (free_conn) {
 		spin_lock(&destroy_lock);
@@ -713,7 +735,8 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
 {
 	struct fsnotify_mark_connector *conn;
 	struct fsnotify_mark *mark, *old_mark = NULL;
-	struct inode *inode;
+	void *objp;
+	unsigned int type;
 
 	conn = fsnotify_grab_connector(connp);
 	if (!conn)
@@ -739,11 +762,11 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
 	 * mark references get dropped. It would lead to strange results such
 	 * as delaying inode deletion or blocking unmount.
 	 */
-	inode = fsnotify_detach_connector_from_object(conn);
+	objp = fsnotify_detach_connector_from_object(conn, &type);
 	spin_unlock(&conn->lock);
 	if (old_mark)
 		fsnotify_put_mark(old_mark);
-	iput(inode);
+	fsnotify_drop_object(type, objp);
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6da94deb957f..00b23b21e78a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1437,6 +1437,9 @@ struct super_block {
 	/* Number of inodes with nlink == 0 but still referenced */
 	atomic_long_t s_remove_count;
 
+	/* Pending fsnotify inode refs */
+	atomic_long_t s_fsnotify_inode_refs;
+
 	/* Being remounted read-only */
 	int s_readonly_remount;
 
-- 
cgit v1.2.3-71-gd317


From a7fe5190c03f8137ef08db84a58dd4daf2c4785d Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Thu, 4 Oct 2018 14:04:03 +0200
Subject: cpuidle: menu: Remove get_loadavg() from the performance multiplier

The function get_loadavg() returns almost always zero. To be more
precise, statistically speaking for a total of 1023379 times passing
in the function, the load is equal to zero 1020728 times, greater than
100, 610 times, the remaining is between 0 and 5.

In 2011, the get_loadavg() was removed from the Android tree because
of the above [1]. At this time, the load was:

unsigned long this_cpu_load(void)
{
        struct rq *this = this_rq();
        return this->cpu_load[0];
}

In 2014, the code was changed by commit 372ba8cb46b2 (cpuidle: menu: Lookup CPU
runqueues less) and the load is:

void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
{
        struct rq *rq = this_rq();
        *nr_waiters = atomic_read(&rq->nr_iowait);
        *load = rq->load.weight;
}

with the same result.

Both measurements show using the load in this code path does no matter
anymore. Removing it.

[1] https://android.googlesource.com/kernel/common/+/4dedd9f124703207895777ac6e91dacde0f7cc17

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/menu.c | 25 ++++++-------------------
 include/linux/sched/stat.h       |  1 -
 kernel/sched/core.c              |  7 -------
 3 files changed, 6 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 575a68f31761..76df4f947f07 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -134,11 +134,6 @@ struct menu_device {
 #define LOAD_INT(x) ((x) >> FSHIFT)
 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 
-static inline int get_loadavg(unsigned long load)
-{
-	return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10;
-}
-
 static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters)
 {
 	int bucket = 0;
@@ -172,18 +167,10 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters
  * to be, the higher this multiplier, and thus the higher
  * the barrier to go to an expensive C state.
  */
-static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned long load)
+static inline int performance_multiplier(unsigned long nr_iowaiters)
 {
-	int mult = 1;
-
-	/* for higher loadavg, we are more reluctant */
-
-	mult += 2 * get_loadavg(load);
-
-	/* for IO wait tasks (per cpu!) we add 5x each */
-	mult += 10 * nr_iowaiters;
-
-	return mult;
+	/* for IO wait tasks (per cpu!) we add 10x each */
+	return 1 + 10 * nr_iowaiters;
 }
 
 static DEFINE_PER_CPU(struct menu_device, menu_devices);
@@ -301,7 +288,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	int idx;
 	unsigned int interactivity_req;
 	unsigned int predicted_us;
-	unsigned long nr_iowaiters, cpu_load;
+	unsigned long nr_iowaiters;
 	ktime_t delta_next;
 
 	if (data->needs_update) {
@@ -312,7 +299,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	/* determine the expected residency time, round up */
 	data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
 
-	get_iowait_load(&nr_iowaiters, &cpu_load);
+	nr_iowaiters = nr_iowait_cpu(dev->cpu);
 	data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
 
 	if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
@@ -356,7 +343,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		 * Use the performance multiplier and the user-configurable
 		 * latency_req to determine the maximum exit latency.
 		 */
-		interactivity_req = predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
+		interactivity_req = predicted_us / performance_multiplier(nr_iowaiters);
 		if (latency_req > interactivity_req)
 			latency_req = interactivity_req;
 	}
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 04f1321d14c4..f30954cc059d 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -20,7 +20,6 @@ extern unsigned long nr_running(void);
 extern bool single_task_running(void);
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
-extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
 static inline int sched_info_on(void)
 {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9245c56b8f5f..c8d5c279be14 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2887,13 +2887,6 @@ unsigned long nr_iowait_cpu(int cpu)
 	return atomic_read(&cpu_rq(cpu)->nr_iowait);
 }
 
-void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
-{
-	struct rq *rq = this_rq();
-	*nr_waiters = atomic_read(&rq->nr_iowait);
-	*load = rq->load.weight;
-}
-
 /*
  * IO-wait accounting, and how its mostly bollocks (on SMP).
  *
-- 
cgit v1.2.3-71-gd317


From ede95a63b5e84ddeea6b0c473b36ab8bfd8c6ce3 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 23 Oct 2018 01:11:04 +0200
Subject: bpf: add bpf_jit_limit knob to restrict unpriv allocations

Rick reported that the BPF JIT could potentially fill the entire module
space with BPF programs from unprivileged users which would prevent later
attempts to load normal kernel modules or privileged BPF programs, for
example. If JIT was enabled but unsuccessful to generate the image, then
before commit 290af86629b2 ("bpf: introduce BPF_JIT_ALWAYS_ON config")
we would always fall back to the BPF interpreter. Nowadays in the case
where the CONFIG_BPF_JIT_ALWAYS_ON could be set, then the load will abort
with a failure since the BPF interpreter was compiled out.

Add a global limit and enforce it for unprivileged users such that in case
of BPF interpreter compiled out we fail once the limit has been reached
or we fall back to BPF interpreter earlier w/o using module mem if latter
was compiled in. In a next step, fair share among unprivileged users can
be resolved in particular for the case where we would fail hard once limit
is reached.

Fixes: 290af86629b2 ("bpf: introduce BPF_JIT_ALWAYS_ON config")
Fixes: 0a14842f5a3c ("net: filter: Just In Time compiler for x86-64")
Co-Developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jann Horn <jannh@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/sysctl/net.txt |  8 ++++++++
 include/linux/filter.h       |  1 +
 kernel/bpf/core.c            | 49 +++++++++++++++++++++++++++++++++++++++++---
 net/core/sysctl_net_core.c   | 10 +++++++--
 4 files changed, 63 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 9ecde517728c..2793d4eac55f 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -92,6 +92,14 @@ Values :
 	0 - disable JIT kallsyms export (default value)
 	1 - enable JIT kallsyms export for privileged users only
 
+bpf_jit_limit
+-------------
+
+This enforces a global limit for memory allocations to the BPF JIT
+compiler in order to reject unprivileged JIT requests once it has
+been surpassed. bpf_jit_limit contains the value of the global limit
+in bytes.
+
 dev_weight
 --------------
 
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 91b4c934f02e..de629b706d1d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -854,6 +854,7 @@ bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
 extern int bpf_jit_enable;
 extern int bpf_jit_harden;
 extern int bpf_jit_kallsyms;
+extern int bpf_jit_limit;
 
 typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7c7eeea8cffc..6377225b2082 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -365,10 +365,13 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
 }
 
 #ifdef CONFIG_BPF_JIT
+# define BPF_JIT_LIMIT_DEFAULT	(PAGE_SIZE * 40000)
+
 /* All BPF JIT sysctl knobs here. */
 int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
 int bpf_jit_harden   __read_mostly;
 int bpf_jit_kallsyms __read_mostly;
+int bpf_jit_limit    __read_mostly = BPF_JIT_LIMIT_DEFAULT;
 
 static __always_inline void
 bpf_get_prog_addr_region(const struct bpf_prog *prog,
@@ -577,27 +580,64 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 	return ret;
 }
 
+static atomic_long_t bpf_jit_current;
+
+#if defined(MODULES_VADDR)
+static int __init bpf_jit_charge_init(void)
+{
+	/* Only used as heuristic here to derive limit. */
+	bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2,
+					    PAGE_SIZE), INT_MAX);
+	return 0;
+}
+pure_initcall(bpf_jit_charge_init);
+#endif
+
+static int bpf_jit_charge_modmem(u32 pages)
+{
+	if (atomic_long_add_return(pages, &bpf_jit_current) >
+	    (bpf_jit_limit >> PAGE_SHIFT)) {
+		if (!capable(CAP_SYS_ADMIN)) {
+			atomic_long_sub(pages, &bpf_jit_current);
+			return -EPERM;
+		}
+	}
+
+	return 0;
+}
+
+static void bpf_jit_uncharge_modmem(u32 pages)
+{
+	atomic_long_sub(pages, &bpf_jit_current);
+}
+
 struct bpf_binary_header *
 bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 		     unsigned int alignment,
 		     bpf_jit_fill_hole_t bpf_fill_ill_insns)
 {
 	struct bpf_binary_header *hdr;
-	unsigned int size, hole, start;
+	u32 size, hole, start, pages;
 
 	/* Most of BPF filters are really small, but if some of them
 	 * fill a page, allow at least 128 extra bytes to insert a
 	 * random section of illegal instructions.
 	 */
 	size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
+	pages = size / PAGE_SIZE;
+
+	if (bpf_jit_charge_modmem(pages))
+		return NULL;
 	hdr = module_alloc(size);
-	if (hdr == NULL)
+	if (!hdr) {
+		bpf_jit_uncharge_modmem(pages);
 		return NULL;
+	}
 
 	/* Fill space with illegal/arch-dep instructions. */
 	bpf_fill_ill_insns(hdr, size);
 
-	hdr->pages = size / PAGE_SIZE;
+	hdr->pages = pages;
 	hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
 		     PAGE_SIZE - sizeof(*hdr));
 	start = (get_random_int() % hole) & ~(alignment - 1);
@@ -610,7 +650,10 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 
 void bpf_jit_binary_free(struct bpf_binary_header *hdr)
 {
+	u32 pages = hdr->pages;
+
 	module_memfree(hdr);
+	bpf_jit_uncharge_modmem(pages);
 }
 
 /* This symbol is only overridden by archs that have different
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b1a2c5e38530..37b4667128a3 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -279,7 +279,6 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
 	return ret;
 }
 
-# ifdef CONFIG_HAVE_EBPF_JIT
 static int
 proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
 				    void __user *buffer, size_t *lenp,
@@ -290,7 +289,6 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
 
 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 }
-# endif
 #endif
 
 static struct ctl_table net_core_table[] = {
@@ -397,6 +395,14 @@ static struct ctl_table net_core_table[] = {
 		.extra2		= &one,
 	},
 # endif
+	{
+		.procname	= "bpf_jit_limit",
+		.data		= &bpf_jit_limit,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax_bpf_restricted,
+		.extra1		= &one,
+	},
 #endif
 	{
 		.procname	= "netdev_tstamp_prequeue",
-- 
cgit v1.2.3-71-gd317


From ae74136b4bb64440a55117e12065b8c282ab6c1a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trondmy@gmail.com>
Date: Wed, 3 Oct 2018 12:01:22 -0400
Subject: SUNRPC: Allow cache lookups to use RCU protection rather than the r/w
 spinlock

Instead of the reader/writer spinlock, allow cache lookups to use RCU
for looking up entries. This is more efficient since modifications can
occur while other entries are being looked up.

Note that for now, we keep the reader/writer spinlock until all users
have been converted to use RCU-safe freeing of their cache entries.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 12 ++++++
 net/sunrpc/cache.c           | 93 +++++++++++++++++++++++++++++++++++++-------
 2 files changed, 91 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 40d2822f0e2f..cf3e17ee2786 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -167,6 +167,9 @@ extern const struct file_operations cache_file_operations_pipefs;
 extern const struct file_operations content_file_operations_pipefs;
 extern const struct file_operations cache_flush_operations_pipefs;
 
+extern struct cache_head *
+sunrpc_cache_lookup_rcu(struct cache_detail *detail,
+			struct cache_head *key, int hash);
 extern struct cache_head *
 sunrpc_cache_lookup(struct cache_detail *detail,
 		    struct cache_head *key, int hash);
@@ -186,6 +189,12 @@ static inline struct cache_head  *cache_get(struct cache_head *h)
 	return h;
 }
 
+static inline struct cache_head  *cache_get_rcu(struct cache_head *h)
+{
+	if (kref_get_unless_zero(&h->ref))
+		return h;
+	return NULL;
+}
 
 static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
 {
@@ -227,6 +236,9 @@ extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *);
 extern void *cache_seq_start(struct seq_file *file, loff_t *pos);
 extern void *cache_seq_next(struct seq_file *file, void *p, loff_t *pos);
 extern void cache_seq_stop(struct seq_file *file, void *p);
+extern void *cache_seq_start_rcu(struct seq_file *file, loff_t *pos);
+extern void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos);
+extern void cache_seq_stop_rcu(struct seq_file *file, void *p);
 
 extern void qword_add(char **bpp, int *lp, char *str);
 extern void qword_addhex(char **bpp, int *lp, char *buf, int blen);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index aa8e62d61f4d..7593afed9036 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -54,6 +54,27 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail)
 	h->last_refresh = now;
 }
 
+static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail,
+						struct cache_head *key,
+						int hash)
+{
+	struct hlist_head *head = &detail->hash_table[hash];
+	struct cache_head *tmp;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tmp, head, cache_list) {
+		if (detail->match(tmp, key)) {
+			if (cache_is_expired(detail, tmp))
+				continue;
+			tmp = cache_get_rcu(tmp);
+			rcu_read_unlock();
+			return tmp;
+		}
+	}
+	rcu_read_unlock();
+	return NULL;
+}
+
 static struct cache_head *sunrpc_cache_find(struct cache_detail *detail,
 					    struct cache_head *key, int hash)
 {
@@ -61,7 +82,6 @@ static struct cache_head *sunrpc_cache_find(struct cache_detail *detail,
 	struct cache_head *tmp;
 
 	read_lock(&detail->hash_lock);
-
 	hlist_for_each_entry(tmp, head, cache_list) {
 		if (detail->match(tmp, key)) {
 			if (cache_is_expired(detail, tmp))
@@ -96,10 +116,10 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 	write_lock(&detail->hash_lock);
 
 	/* check if entry appeared while we slept */
-	hlist_for_each_entry(tmp, head, cache_list) {
+	hlist_for_each_entry_rcu(tmp, head, cache_list) {
 		if (detail->match(tmp, key)) {
 			if (cache_is_expired(detail, tmp)) {
-				hlist_del_init(&tmp->cache_list);
+				hlist_del_init_rcu(&tmp->cache_list);
 				detail->entries --;
 				freeme = tmp;
 				break;
@@ -111,7 +131,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 		}
 	}
 
-	hlist_add_head(&new->cache_list, head);
+	hlist_add_head_rcu(&new->cache_list, head);
 	detail->entries++;
 	cache_get(new);
 	write_unlock(&detail->hash_lock);
@@ -121,6 +141,19 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 	return new;
 }
 
+struct cache_head *sunrpc_cache_lookup_rcu(struct cache_detail *detail,
+					   struct cache_head *key, int hash)
+{
+	struct cache_head *ret;
+
+	ret = sunrpc_cache_find_rcu(detail, key, hash);
+	if (ret)
+		return ret;
+	/* Didn't find anything, insert an empty entry */
+	return sunrpc_cache_add_entry(detail, key, hash);
+}
+EXPORT_SYMBOL_GPL(sunrpc_cache_lookup_rcu);
+
 struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 				       struct cache_head *key, int hash)
 {
@@ -134,6 +167,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
 
+
 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
 
 static void cache_fresh_locked(struct cache_head *head, time_t expiry,
@@ -450,7 +484,7 @@ static int cache_clean(void)
 			if (!cache_is_expired(current_detail, ch))
 				continue;
 
-			hlist_del_init(&ch->cache_list);
+			hlist_del_init_rcu(&ch->cache_list);
 			current_detail->entries--;
 			rv = 1;
 			break;
@@ -521,7 +555,7 @@ void cache_purge(struct cache_detail *detail)
 	for (i = 0; i < detail->hash_size; i++) {
 		head = &detail->hash_table[i];
 		hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
-			hlist_del_init(&ch->cache_list);
+			hlist_del_init_rcu(&ch->cache_list);
 			detail->entries--;
 
 			set_bit(CACHE_CLEANED, &ch->flags);
@@ -1306,21 +1340,19 @@ EXPORT_SYMBOL_GPL(qword_get);
  * get a header, then pass each real item in the cache
  */
 
-void *cache_seq_start(struct seq_file *m, loff_t *pos)
-	__acquires(cd->hash_lock)
+static void *__cache_seq_start(struct seq_file *m, loff_t *pos)
 {
 	loff_t n = *pos;
 	unsigned int hash, entry;
 	struct cache_head *ch;
 	struct cache_detail *cd = m->private;
 
-	read_lock(&cd->hash_lock);
 	if (!n--)
 		return SEQ_START_TOKEN;
 	hash = n >> 32;
 	entry = n & ((1LL<<32) - 1);
 
-	hlist_for_each_entry(ch, &cd->hash_table[hash], cache_list)
+	hlist_for_each_entry_rcu(ch, &cd->hash_table[hash], cache_list)
 		if (!entry--)
 			return ch;
 	n &= ~((1LL<<32) - 1);
@@ -1332,9 +1364,19 @@ void *cache_seq_start(struct seq_file *m, loff_t *pos)
 	if (hash >= cd->hash_size)
 		return NULL;
 	*pos = n+1;
-	return hlist_entry_safe(cd->hash_table[hash].first,
+	return hlist_entry_safe(rcu_dereference_raw(
+				hlist_first_rcu(&cd->hash_table[hash])),
 				struct cache_head, cache_list);
 }
+
+void *cache_seq_start(struct seq_file *m, loff_t *pos)
+	__acquires(cd->hash_lock)
+{
+	struct cache_detail *cd = m->private;
+
+	read_lock(&cd->hash_lock);
+	return __cache_seq_start(m, pos);
+}
 EXPORT_SYMBOL_GPL(cache_seq_start);
 
 void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
@@ -1350,7 +1392,8 @@ void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
 		*pos += 1LL<<32;
 	} else {
 		++*pos;
-		return hlist_entry_safe(ch->cache_list.next,
+		return hlist_entry_safe(rcu_dereference_raw(
+					hlist_next_rcu(&ch->cache_list)),
 					struct cache_head, cache_list);
 	}
 	*pos &= ~((1LL<<32) - 1);
@@ -1362,7 +1405,8 @@ void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
 	if (hash >= cd->hash_size)
 		return NULL;
 	++*pos;
-	return hlist_entry_safe(cd->hash_table[hash].first,
+	return hlist_entry_safe(rcu_dereference_raw(
+				hlist_first_rcu(&cd->hash_table[hash])),
 				struct cache_head, cache_list);
 }
 EXPORT_SYMBOL_GPL(cache_seq_next);
@@ -1375,6 +1419,27 @@ void cache_seq_stop(struct seq_file *m, void *p)
 }
 EXPORT_SYMBOL_GPL(cache_seq_stop);
 
+void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos)
+	__acquires(RCU)
+{
+	rcu_read_lock();
+	return __cache_seq_start(m, pos);
+}
+EXPORT_SYMBOL_GPL(cache_seq_start_rcu);
+
+void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos)
+{
+	return cache_seq_next(file, p, pos);
+}
+EXPORT_SYMBOL_GPL(cache_seq_next_rcu);
+
+void cache_seq_stop_rcu(struct seq_file *m, void *p)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(cache_seq_stop_rcu);
+
 static int c_show(struct seq_file *m, void *p)
 {
 	struct cache_head *cp = p;
@@ -1863,7 +1928,7 @@ void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
 {
 	write_lock(&cd->hash_lock);
 	if (!hlist_unhashed(&h->cache_list)){
-		hlist_del_init(&h->cache_list);
+		hlist_del_init_rcu(&h->cache_list);
 		cd->entries--;
 		write_unlock(&cd->hash_lock);
 		cache_put(h, cd);
-- 
cgit v1.2.3-71-gd317


From d48cf356a13073853f19be6ca5ebbecfc2762ebe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trondmy@gmail.com>
Date: Mon, 1 Oct 2018 10:41:51 -0400
Subject: SUNRPC: Remove non-RCU protected lookup

Clean up the cache code by removing the non-RCU protected lookup.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 Documentation/filesystems/nfs/rpc-cache.txt |  6 +--
 include/linux/sunrpc/cache.h                |  6 ---
 net/sunrpc/cache.c                          | 61 ++---------------------------
 3 files changed, 7 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/nfs/rpc-cache.txt b/Documentation/filesystems/nfs/rpc-cache.txt
index ebcaaee21616..c4dac829db0f 100644
--- a/Documentation/filesystems/nfs/rpc-cache.txt
+++ b/Documentation/filesystems/nfs/rpc-cache.txt
@@ -84,7 +84,7 @@ Creating a Cache
 		A message from user space has arrived to fill out a
 		cache entry.  It is in 'buf' of length 'len'.
 		cache_parse should parse this, find the item in the
-		cache with sunrpc_cache_lookup, and update the item
+		cache with sunrpc_cache_lookup_rcu, and update the item
 		with sunrpc_cache_update.
 
 
@@ -95,7 +95,7 @@ Creating a Cache
 Using a cache
 -------------
 
-To find a value in a cache, call sunrpc_cache_lookup passing a pointer
+To find a value in a cache, call sunrpc_cache_lookup_rcu passing a pointer
 to the cache_head in a sample item with the 'key' fields filled in.
 This will be passed to ->match to identify the target entry.  If no
 entry is found, a new entry will be create, added to the cache, and
@@ -116,7 +116,7 @@ item does become valid, the deferred copy of the request will be
 revisited (->revisit).  It is expected that this method will
 reschedule the request for processing.
 
-The value returned by sunrpc_cache_lookup can also be passed to
+The value returned by sunrpc_cache_lookup_rcu can also be passed to
 sunrpc_cache_update to set the content for the item.  A second item is
 passed which should hold the content.  If the item found by _lookup
 has valid data, then it is discarded and a new item is created.  This
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index cf3e17ee2786..c3d67e893430 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -171,9 +171,6 @@ extern struct cache_head *
 sunrpc_cache_lookup_rcu(struct cache_detail *detail,
 			struct cache_head *key, int hash);
 extern struct cache_head *
-sunrpc_cache_lookup(struct cache_detail *detail,
-		    struct cache_head *key, int hash);
-extern struct cache_head *
 sunrpc_cache_update(struct cache_detail *detail,
 		    struct cache_head *new, struct cache_head *old, int hash);
 
@@ -233,9 +230,6 @@ extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
 extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *);
 
 /* Must store cache_detail in seq_file->private if using next three functions */
-extern void *cache_seq_start(struct seq_file *file, loff_t *pos);
-extern void *cache_seq_next(struct seq_file *file, void *p, loff_t *pos);
-extern void cache_seq_stop(struct seq_file *file, void *p);
 extern void *cache_seq_start_rcu(struct seq_file *file, loff_t *pos);
 extern void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos);
 extern void cache_seq_stop_rcu(struct seq_file *file, void *p);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 7593afed9036..593cf8607414 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -75,27 +75,6 @@ static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail,
 	return NULL;
 }
 
-static struct cache_head *sunrpc_cache_find(struct cache_detail *detail,
-					    struct cache_head *key, int hash)
-{
-	struct hlist_head *head = &detail->hash_table[hash];
-	struct cache_head *tmp;
-
-	read_lock(&detail->hash_lock);
-	hlist_for_each_entry(tmp, head, cache_list) {
-		if (detail->match(tmp, key)) {
-			if (cache_is_expired(detail, tmp))
-				/* This entry is expired, we will discard it. */
-				break;
-			cache_get(tmp);
-			read_unlock(&detail->hash_lock);
-			return tmp;
-		}
-	}
-	read_unlock(&detail->hash_lock);
-	return NULL;
-}
-
 static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 						 struct cache_head *key,
 						 int hash)
@@ -154,20 +133,6 @@ struct cache_head *sunrpc_cache_lookup_rcu(struct cache_detail *detail,
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_lookup_rcu);
 
-struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
-				       struct cache_head *key, int hash)
-{
-	struct cache_head *ret;
-
-	ret = sunrpc_cache_find(detail, key, hash);
-	if (ret)
-		return ret;
-	/* Didn't find anything, insert an empty entry */
-	return sunrpc_cache_add_entry(detail, key, hash);
-}
-EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
-
-
 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
 
 static void cache_fresh_locked(struct cache_head *head, time_t expiry,
@@ -1369,17 +1334,7 @@ static void *__cache_seq_start(struct seq_file *m, loff_t *pos)
 				struct cache_head, cache_list);
 }
 
-void *cache_seq_start(struct seq_file *m, loff_t *pos)
-	__acquires(cd->hash_lock)
-{
-	struct cache_detail *cd = m->private;
-
-	read_lock(&cd->hash_lock);
-	return __cache_seq_start(m, pos);
-}
-EXPORT_SYMBOL_GPL(cache_seq_start);
-
-void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
+static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
 {
 	struct cache_head *ch = p;
 	int hash = (*pos >> 32);
@@ -1411,14 +1366,6 @@ void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
 }
 EXPORT_SYMBOL_GPL(cache_seq_next);
 
-void cache_seq_stop(struct seq_file *m, void *p)
-	__releases(cd->hash_lock)
-{
-	struct cache_detail *cd = m->private;
-	read_unlock(&cd->hash_lock);
-}
-EXPORT_SYMBOL_GPL(cache_seq_stop);
-
 void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos)
 	__acquires(RCU)
 {
@@ -1466,9 +1413,9 @@ static int c_show(struct seq_file *m, void *p)
 }
 
 static const struct seq_operations cache_content_op = {
-	.start	= cache_seq_start,
-	.next	= cache_seq_next,
-	.stop	= cache_seq_stop,
+	.start	= cache_seq_start_rcu,
+	.next	= cache_seq_next_rcu,
+	.stop	= cache_seq_stop_rcu,
 	.show	= c_show,
 };
 
-- 
cgit v1.2.3-71-gd317


From 1863d77f15da0addcd293a1719fa5d3ef8cde3ca Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trondmy@gmail.com>
Date: Mon, 1 Oct 2018 10:41:52 -0400
Subject: SUNRPC: Replace the cache_detail->hash_lock with a regular spinlock

Now that the reader functions are all RCU protected, use a regular
spinlock rather than a reader/writer lock.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h |  2 +-
 net/sunrpc/cache.c           | 46 ++++++++++++++++++++++----------------------
 2 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index c3d67e893430..5a3e95017fc6 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -67,7 +67,7 @@ struct cache_detail {
 	struct module *		owner;
 	int			hash_size;
 	struct hlist_head *	hash_table;
-	rwlock_t		hash_lock;
+	spinlock_t		hash_lock;
 
 	char			*name;
 	void			(*cache_put)(struct kref *);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 593cf8607414..f96345b1180e 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -92,7 +92,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 	cache_init(new, detail);
 	detail->init(new, key);
 
-	write_lock(&detail->hash_lock);
+	spin_lock(&detail->hash_lock);
 
 	/* check if entry appeared while we slept */
 	hlist_for_each_entry_rcu(tmp, head, cache_list) {
@@ -104,7 +104,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 				break;
 			}
 			cache_get(tmp);
-			write_unlock(&detail->hash_lock);
+			spin_unlock(&detail->hash_lock);
 			cache_put(new, detail);
 			return tmp;
 		}
@@ -113,7 +113,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 	hlist_add_head_rcu(&new->cache_list, head);
 	detail->entries++;
 	cache_get(new);
-	write_unlock(&detail->hash_lock);
+	spin_unlock(&detail->hash_lock);
 
 	if (freeme)
 		cache_put(freeme, detail);
@@ -167,18 +167,18 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	struct cache_head *tmp;
 
 	if (!test_bit(CACHE_VALID, &old->flags)) {
-		write_lock(&detail->hash_lock);
+		spin_lock(&detail->hash_lock);
 		if (!test_bit(CACHE_VALID, &old->flags)) {
 			if (test_bit(CACHE_NEGATIVE, &new->flags))
 				set_bit(CACHE_NEGATIVE, &old->flags);
 			else
 				detail->update(old, new);
 			cache_fresh_locked(old, new->expiry_time, detail);
-			write_unlock(&detail->hash_lock);
+			spin_unlock(&detail->hash_lock);
 			cache_fresh_unlocked(old, detail);
 			return old;
 		}
-		write_unlock(&detail->hash_lock);
+		spin_unlock(&detail->hash_lock);
 	}
 	/* We need to insert a new entry */
 	tmp = detail->alloc();
@@ -189,7 +189,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	cache_init(tmp, detail);
 	detail->init(tmp, old);
 
-	write_lock(&detail->hash_lock);
+	spin_lock(&detail->hash_lock);
 	if (test_bit(CACHE_NEGATIVE, &new->flags))
 		set_bit(CACHE_NEGATIVE, &tmp->flags);
 	else
@@ -199,7 +199,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	cache_get(tmp);
 	cache_fresh_locked(tmp, new->expiry_time, detail);
 	cache_fresh_locked(old, 0, detail);
-	write_unlock(&detail->hash_lock);
+	spin_unlock(&detail->hash_lock);
 	cache_fresh_unlocked(tmp, detail);
 	cache_fresh_unlocked(old, detail);
 	cache_put(old, detail);
@@ -239,7 +239,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
 {
 	int rv;
 
-	write_lock(&detail->hash_lock);
+	spin_lock(&detail->hash_lock);
 	rv = cache_is_valid(h);
 	if (rv == -EAGAIN) {
 		set_bit(CACHE_NEGATIVE, &h->flags);
@@ -247,7 +247,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
 				   detail);
 		rv = -ENOENT;
 	}
-	write_unlock(&detail->hash_lock);
+	spin_unlock(&detail->hash_lock);
 	cache_fresh_unlocked(h, detail);
 	return rv;
 }
@@ -357,7 +357,7 @@ static struct delayed_work cache_cleaner;
 
 void sunrpc_init_cache_detail(struct cache_detail *cd)
 {
-	rwlock_init(&cd->hash_lock);
+	spin_lock_init(&cd->hash_lock);
 	INIT_LIST_HEAD(&cd->queue);
 	spin_lock(&cache_list_lock);
 	cd->nextcheck = 0;
@@ -377,11 +377,11 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
 {
 	cache_purge(cd);
 	spin_lock(&cache_list_lock);
-	write_lock(&cd->hash_lock);
+	spin_lock(&cd->hash_lock);
 	if (current_detail == cd)
 		current_detail = NULL;
 	list_del_init(&cd->others);
-	write_unlock(&cd->hash_lock);
+	spin_unlock(&cd->hash_lock);
 	spin_unlock(&cache_list_lock);
 	if (list_empty(&cache_list)) {
 		/* module must be being unloaded so its safe to kill the worker */
@@ -438,7 +438,7 @@ static int cache_clean(void)
 		struct hlist_head *head;
 		struct hlist_node *tmp;
 
-		write_lock(&current_detail->hash_lock);
+		spin_lock(&current_detail->hash_lock);
 
 		/* Ok, now to clean this strand */
 
@@ -455,7 +455,7 @@ static int cache_clean(void)
 			break;
 		}
 
-		write_unlock(&current_detail->hash_lock);
+		spin_unlock(&current_detail->hash_lock);
 		d = current_detail;
 		if (!ch)
 			current_index ++;
@@ -510,9 +510,9 @@ void cache_purge(struct cache_detail *detail)
 	struct hlist_node *tmp = NULL;
 	int i = 0;
 
-	write_lock(&detail->hash_lock);
+	spin_lock(&detail->hash_lock);
 	if (!detail->entries) {
-		write_unlock(&detail->hash_lock);
+		spin_unlock(&detail->hash_lock);
 		return;
 	}
 
@@ -524,13 +524,13 @@ void cache_purge(struct cache_detail *detail)
 			detail->entries--;
 
 			set_bit(CACHE_CLEANED, &ch->flags);
-			write_unlock(&detail->hash_lock);
+			spin_unlock(&detail->hash_lock);
 			cache_fresh_unlocked(ch, detail);
 			cache_put(ch, detail);
-			write_lock(&detail->hash_lock);
+			spin_lock(&detail->hash_lock);
 		}
 	}
-	write_unlock(&detail->hash_lock);
+	spin_unlock(&detail->hash_lock);
 }
 EXPORT_SYMBOL_GPL(cache_purge);
 
@@ -1873,13 +1873,13 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
 
 void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
 {
-	write_lock(&cd->hash_lock);
+	spin_lock(&cd->hash_lock);
 	if (!hlist_unhashed(&h->cache_list)){
 		hlist_del_init_rcu(&h->cache_list);
 		cd->entries--;
-		write_unlock(&cd->hash_lock);
+		spin_unlock(&cd->hash_lock);
 		cache_put(h, cd);
 	} else
-		write_unlock(&cd->hash_lock);
+		spin_unlock(&cd->hash_lock);
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_unhash);
-- 
cgit v1.2.3-71-gd317


From 3ae2cefb613b00d613677c05ffa384b4f660f468 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 1 Oct 2018 14:16:11 -0400
Subject: svcrdma: Increase the default connection credit limit

Reduce queuing on clients by allowing more credits by default.

64 is the default NFSv4.1 slot table size on Linux clients. This
size prevents the credit limit from putting RPC requests to sleep
again after they have already slept waiting for a session slot.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index fd78f78df5c6..e6e26918504c 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -113,13 +113,14 @@ struct svcxprt_rdma {
 /* sc_flags */
 #define RDMAXPRT_CONN_PENDING	3
 
-#define RPCRDMA_LISTEN_BACKLOG  10
-#define RPCRDMA_MAX_REQUESTS    32
-
-/* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our
- * current NFSv4.1 implementation supports one backchannel slot.
+/*
+ * Default connection parameters
  */
-#define RPCRDMA_MAX_BC_REQUESTS	2
+enum {
+	RPCRDMA_LISTEN_BACKLOG	= 10,
+	RPCRDMA_MAX_REQUESTS	= 64,
+	RPCRDMA_MAX_BC_REQUESTS	= 2,
+};
 
 #define RPCSVC_MAXPAYLOAD_RDMA	RPCSVC_MAXPAYLOAD
 
-- 
cgit v1.2.3-71-gd317


From f813f21971b96f61a789dd48151f92220fdd2e0a Mon Sep 17 00:00:00 2001
From: Jérôme Glisse <jglisse@redhat.com>
Date: Tue, 30 Oct 2018 15:04:06 -0700
Subject: mm/hmm: fix utf8 ...
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "HMM updates, improvements and fixes", v2

Few fixes that only affect HMM users.  Improve the synchronization call
back so that we match was other mmu_notifier listener do and add proper
support to the new blockable flags in the process.

For curious folks here are branches to leverage HMM in various existing
device drivers:

https://cgit.freedesktop.org/~glisse/linux/log/?h=hmm-nouveau-v01
https://cgit.freedesktop.org/~glisse/linux/log/?h=hmm-radeon-v00
https://cgit.freedesktop.org/~glisse/linux/log/?h=hmm-intel-v00

More to come (amd gpu, Mellanox, ...)

I expect more of the preparatory work for nouveau will be merge in 4.20
(like we have been doing since 4.16) and i will wait until this patchset
is upstream before pushing the patches that actualy make use of HMM (to
avoid complex tree inter-dependency).

This patch (of 6):

Somehow utf=8 must have been broken.

Link: http://lkml.kernel.org/r/20181019160442.18723-2-jglisse@redhat.com
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hmm.h | 2 +-
 mm/hmm.c            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index dde947083d4e..42b1ae915915 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -11,7 +11,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * Authors: JÃ©rÃ´me Glisse <jglisse@redhat.com>
+ * Authors: Jérôme Glisse <jglisse@redhat.com>
  */
 /*
  * Heterogeneous Memory Management (HMM)
diff --git a/mm/hmm.c b/mm/hmm.c
index 774d684fa2b4..de9840f60100 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -11,7 +11,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * Authors: JÃ©rÃ´me Glisse <jglisse@redhat.com>
+ * Authors: Jérôme Glisse <jglisse@redhat.com>
  */
 /*
  * Refer to include/linux/hmm.h for information about heterogeneous memory
-- 
cgit v1.2.3-71-gd317


From 44532d4c591c10d6907ac5030373bc306617d92b Mon Sep 17 00:00:00 2001
From: Jérôme Glisse <jglisse@redhat.com>
Date: Tue, 30 Oct 2018 15:04:24 -0700
Subject: mm/hmm: use a structure for update callback parameters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use a structure to gather all the parameters for the update callback.
This make it easier when adding new parameters by avoiding having to
update all callback function signature.

The hmm_update structure is always associated with a mmu_notifier
callbacks so we are not planing on grouping multiple updates together.
Nor do we care about page size for the range as range will over fully
cover the page being invalidated (this is a mmu_notifier property).

Link: http://lkml.kernel.org/r/20181019160442.18723-6-jglisse@redhat.com
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hmm.h | 31 ++++++++++++++++++++++---------
 mm/hmm.c            | 33 ++++++++++++++++++++++-----------
 2 files changed, 44 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 42b1ae915915..c6fb869a81c0 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -274,13 +274,28 @@ static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
 struct hmm_mirror;
 
 /*
- * enum hmm_update_type - type of update
+ * enum hmm_update_event - type of update
  * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why)
  */
-enum hmm_update_type {
+enum hmm_update_event {
 	HMM_UPDATE_INVALIDATE,
 };
 
+/*
+ * struct hmm_update - HMM update informations for callback
+ *
+ * @start: virtual start address of the range to update
+ * @end: virtual end address of the range to update
+ * @event: event triggering the update (what is happening)
+ * @blockable: can the callback block/sleep ?
+ */
+struct hmm_update {
+	unsigned long start;
+	unsigned long end;
+	enum hmm_update_event event;
+	bool blockable;
+};
+
 /*
  * struct hmm_mirror_ops - HMM mirror device operations callback
  *
@@ -300,9 +315,9 @@ struct hmm_mirror_ops {
 	/* sync_cpu_device_pagetables() - synchronize page tables
 	 *
 	 * @mirror: pointer to struct hmm_mirror
-	 * @update_type: type of update that occurred to the CPU page table
-	 * @start: virtual start address of the range to update
-	 * @end: virtual end address of the range to update
+	 * @update: update informations (see struct hmm_update)
+	 * Returns: -EAGAIN if update.blockable false and callback need to
+	 *          block, 0 otherwise.
 	 *
 	 * This callback ultimately originates from mmu_notifiers when the CPU
 	 * page table is updated. The device driver must update its page table
@@ -313,10 +328,8 @@ struct hmm_mirror_ops {
 	 * page tables are completely updated (TLBs flushed, etc); this is a
 	 * synchronous call.
 	 */
-	void (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
-					   enum hmm_update_type update_type,
-					   unsigned long start,
-					   unsigned long end);
+	int (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
+					  const struct hmm_update *update);
 };
 
 /*
diff --git a/mm/hmm.c b/mm/hmm.c
index a3d532ff7c3d..b4e9afdc2181 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -126,10 +126,8 @@ void hmm_mm_destroy(struct mm_struct *mm)
 	kfree(mm->hmm);
 }
 
-static void hmm_invalidate_range(struct hmm *hmm,
-				 enum hmm_update_type action,
-				 unsigned long start,
-				 unsigned long end)
+static int hmm_invalidate_range(struct hmm *hmm,
+				const struct hmm_update *update)
 {
 	struct hmm_mirror *mirror;
 	struct hmm_range *range;
@@ -138,22 +136,30 @@ static void hmm_invalidate_range(struct hmm *hmm,
 	list_for_each_entry(range, &hmm->ranges, list) {
 		unsigned long addr, idx, npages;
 
-		if (end < range->start || start >= range->end)
+		if (update->end < range->start || update->start >= range->end)
 			continue;
 
 		range->valid = false;
-		addr = max(start, range->start);
+		addr = max(update->start, range->start);
 		idx = (addr - range->start) >> PAGE_SHIFT;
-		npages = (min(range->end, end) - addr) >> PAGE_SHIFT;
+		npages = (min(range->end, update->end) - addr) >> PAGE_SHIFT;
 		memset(&range->pfns[idx], 0, sizeof(*range->pfns) * npages);
 	}
 	spin_unlock(&hmm->lock);
 
 	down_read(&hmm->mirrors_sem);
-	list_for_each_entry(mirror, &hmm->mirrors, list)
-		mirror->ops->sync_cpu_device_pagetables(mirror, action,
-							start, end);
+	list_for_each_entry(mirror, &hmm->mirrors, list) {
+		int ret;
+
+		ret = mirror->ops->sync_cpu_device_pagetables(mirror, update);
+		if (!update->blockable && ret == -EAGAIN) {
+			up_read(&hmm->mirrors_sem);
+			return -EAGAIN;
+		}
+	}
 	up_read(&hmm->mirrors_sem);
+
+	return 0;
 }
 
 static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
@@ -202,11 +208,16 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn,
 				     unsigned long start,
 				     unsigned long end)
 {
+	struct hmm_update update;
 	struct hmm *hmm = mm->hmm;
 
 	VM_BUG_ON(!hmm);
 
-	hmm_invalidate_range(mm->hmm, HMM_UPDATE_INVALIDATE, start, end);
+	update.start = start;
+	update.end = end;
+	update.event = HMM_UPDATE_INVALIDATE;
+	update.blockable = true;
+	hmm_invalidate_range(hmm, &update);
 }
 
 static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
-- 
cgit v1.2.3-71-gd317


From 7275b097851a5e2e0dd4da039c7e96b59ac5314e Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 30 Oct 2018 15:04:59 -0700
Subject: linux/bitmap.h: handle constant zero-size bitmaps correctly

The static inlines in bitmap.h do not handle a compile-time constant
nbits==0 correctly (they dereference the passed src or dst pointers,
despite only 0 words being valid to access).  I had the 0-day buildbot
chew on a patch [1] that would cause build failures for such cases without
complaining, suggesting that we don't have any such users currently, at
least for the 70 .config/arch combinations that was built.  Should any
turn up, make sure they use the out-of-line versions, which do handle
nbits==0 correctly.

This is of course not the most efficient, but it's much less churn than
teaching all the static inlines an "if (zero_const_nbits())", and since we
don't have any current instances, this doesn't affect existing code at
all.

[1] lkml.kernel.org/r/20180815085539.27485-1-linux@rasmusvillemoes.dk

Link: http://lkml.kernel.org/r/20180818131623.8755-3-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Yury Norov <ynorov@caviumnetworks.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index acf5e8df3504..a9805bacbd7c 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -204,8 +204,13 @@ extern int bitmap_print_to_pagebuf(bool list, char *buf,
 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
 #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
 
+/*
+ * The static inlines below do not handle constant nbits==0 correctly,
+ * so make such users (should any ever turn up) call the out-of-line
+ * versions.
+ */
 #define small_const_nbits(nbits) \
-	(__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG)
+	(__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0)
 
 static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
 {
-- 
cgit v1.2.3-71-gd317


From c8cebc553368209426f7279736db4f88f1853396 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 30 Oct 2018 15:05:02 -0700
Subject: linux/bitmap.h: remove redundant uses of small_const_nbits()

In the _zero, _fill and _copy functions, the small_const_nbits branch is
redundant.  If nbits is small and const, gcc knows full well that
BITS_TO_LONGS(nbits) is 1, so len is also a compile-time constant
(sizeof(long)), and calling memset or memcpy with a length argument of
sizeof(long) makes gcc generate the expected code anyway:

#include <string.h>
void a(unsigned long *x) { memset(x, 0, 8); }
void b(unsigned long *x) { memset(x, 0xff, 8); }
void c(unsigned long *x, const unsigned long *y) { memcpy(x, y, 8); }

turns into

0000000000000000 <a>:
   0:   48 c7 07 00 00 00 00    movq   $0x0,(%rdi)
   7:   c3                      retq
   8:   0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
   f:   00

0000000000000010 <b>:
  10:   48 c7 07 ff ff ff ff    movq   $0xffffffffffffffff,(%rdi)
  17:   c3                      retq
  18:   0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
  1f:   00

0000000000000020 <c>:
  20:   48 8b 06                mov    (%rsi),%rax
  23:   48 89 07                mov    %rax,(%rdi)
  26:   c3                      retq

Link: http://lkml.kernel.org/r/20180818131623.8755-4-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Yury Norov <ynorov@caviumnetworks.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index a9805bacbd7c..004cd42a3c4d 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -214,33 +214,21 @@ extern int bitmap_print_to_pagebuf(bool list, char *buf,
 
 static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
 {
-	if (small_const_nbits(nbits))
-		*dst = 0UL;
-	else {
-		unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
-		memset(dst, 0, len);
-	}
+	unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+	memset(dst, 0, len);
 }
 
 static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
 {
-	if (small_const_nbits(nbits))
-		*dst = ~0UL;
-	else {
-		unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
-		memset(dst, 0xff, len);
-	}
+	unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+	memset(dst, 0xff, len);
 }
 
 static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
 			unsigned int nbits)
 {
-	if (small_const_nbits(nbits))
-		*dst = *src;
-	else {
-		unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
-		memcpy(dst, src, len);
-	}
+	unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+	memcpy(dst, src, len);
 }
 
 /*
-- 
cgit v1.2.3-71-gd317


From d9873969fa8725dc6a5a21ab788c057fd8719751 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 30 Oct 2018 15:05:07 -0700
Subject: linux/bitmap.h: fix type of nbits in bitmap_shift_right()

Most other bitmap API, including the OOL version __bitmap_shift_right,
take unsigned nbits.  This was accidentally left out from 2fbad29917c98.

Link: http://lkml.kernel.org/r/20180818131623.8755-5-linux@rasmusvillemoes.dk
Fixes: 2fbad29917c98 ("lib: bitmap: change bitmap_shift_right to take unsigned parameters")
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reported-by: Yury Norov <ynorov@caviumnetworks.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 004cd42a3c4d..4032680e629d 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -391,7 +391,7 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
 }
 
 static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
-				unsigned int shift, int nbits)
+				unsigned int shift, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift;
-- 
cgit v1.2.3-71-gd317


From 41e7b1661ffbf562d3aa2b7ce4ad283db50b711a Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 30 Oct 2018 15:05:10 -0700
Subject: linux/bitmap.h: relax comment on compile-time constant nbits

It's not clear what's so horrible about emitting a function call to handle
a run-time sized bitmap.  Moreover, gcc also emits a function call for a
compile-time-constant-but-huge nbits, so the comment isn't even accurate.

Link: http://lkml.kernel.org/r/20180818131623.8755-6-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Yury Norov <ynorov@caviumnetworks.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 4032680e629d..f58e97446abc 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -28,8 +28,8 @@
  * The available bitmap operations and their rough meaning in the
  * case that the bitmap is a single unsigned long are thus:
  *
- * Note that nbits should be always a compile time evaluable constant.
- * Otherwise many inlines will generate horrible code.
+ * The generated code is more efficient when nbits is known at
+ * compile-time and at most BITS_PER_LONG.
  *
  * ::
  *
-- 
cgit v1.2.3-71-gd317


From 7e5ca363a5a1ec42c54dc1e0644b361a2daf984c Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Tue, 30 Oct 2018 15:05:42 -0700
Subject: lib/rbtree.c: fix typo in comment of rb_insert_augmented()

The function name in the comment is not correct.

Link: http://lkml.kernel.org/r/20181010021344.60433-1-richard.weiyang@gmail.com
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree_augmented.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index af8a61be2d8d..9510c677ac70 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -51,8 +51,8 @@ extern void __rb_insert_augmented(struct rb_node *node,
  *
  * On insertion, the user must update the augmented information on the path
  * leading to the inserted node, then call rb_link_node() as usual and
- * rb_augment_inserted() instead of the usual rb_insert_color() call.
- * If rb_augment_inserted() rebalances the rbtree, it will callback into
+ * rb_insert_augmented() instead of the usual rb_insert_color() call.
+ * If rb_insert_augmented() rebalances the rbtree, it will callback into
  * a user provided function to update the augmented information on the
  * affected subtrees.
  */
-- 
cgit v1.2.3-71-gd317


From 89976005536c47a788bdd6cef5a4e9fef3c0de32 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 30 Oct 2018 15:05:49 -0700
Subject: include/linux/compat.h: mark expected switch fall-throughs

In preparation to enabling -Wimplicit-fallthrough, mark switch cases where
we are expecting to fall through.

Link: http://lkml.kernel.org/r/20181013115048.GA3262@embeddedor.com
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compat.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index d30e4dbd4be2..06e77473f175 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -488,8 +488,11 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
 	compat_sigset_t v;
 	switch (_NSIG_WORDS) {
 	case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3];
+		/* fall through */
 	case 3: v.sig[5] = (set->sig[2] >> 32); v.sig[4] = set->sig[2];
+		/* fall through */
 	case 2: v.sig[3] = (set->sig[1] >> 32); v.sig[2] = set->sig[1];
+		/* fall through */
 	case 1: v.sig[1] = (set->sig[0] >> 32); v.sig[0] = set->sig[0];
 	}
 	return copy_to_user(compat, &v, size) ? -EFAULT : 0;
-- 
cgit v1.2.3-71-gd317


From 3819ddec1f8c8a5daf215bec4067f8fd1dc40d6d Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 30 Oct 2018 15:07:10 -0700
Subject: include/linux/signal.h: mark expected switch fall-throughs

In preparation to enabling -Wimplicit-fallthrough, mark switch cases where
we are expecting to fall through.

Link: http://lkml.kernel.org/r/20181013114847.GA3160@embeddedor.com
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 200ed96a05af..f428e86f4800 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -129,9 +129,11 @@ static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \
 		b3 = b->sig[3]; b2 = b->sig[2];				\
 		r->sig[3] = op(a3, b3);					\
 		r->sig[2] = op(a2, b2);					\
+		/* fall through */					\
 	case 2:								\
 		a1 = a->sig[1]; b1 = b->sig[1];				\
 		r->sig[1] = op(a1, b1);					\
+		/* fall through */					\
 	case 1:								\
 		a0 = a->sig[0]; b0 = b->sig[0];				\
 		r->sig[0] = op(a0, b0);					\
@@ -161,7 +163,9 @@ static inline void name(sigset_t *set)					\
 	switch (_NSIG_WORDS) {						\
 	case 4:	set->sig[3] = op(set->sig[3]);				\
 		set->sig[2] = op(set->sig[2]);				\
+		/* fall through */					\
 	case 2:	set->sig[1] = op(set->sig[1]);				\
+		/* fall through */					\
 	case 1:	set->sig[0] = op(set->sig[0]);				\
 		    break;						\
 	default:							\
@@ -182,6 +186,7 @@ static inline void sigemptyset(sigset_t *set)
 		memset(set, 0, sizeof(sigset_t));
 		break;
 	case 2: set->sig[1] = 0;
+		/* fall through */
 	case 1:	set->sig[0] = 0;
 		break;
 	}
@@ -194,6 +199,7 @@ static inline void sigfillset(sigset_t *set)
 		memset(set, -1, sizeof(sigset_t));
 		break;
 	case 2: set->sig[1] = -1;
+		/* fall through */
 	case 1:	set->sig[0] = -1;
 		break;
 	}
-- 
cgit v1.2.3-71-gd317


From 69a60bc75fe73511af89328ded1b33bc4a625a5c Mon Sep 17 00:00:00 2001
From: Alexander Pateenok <pateenoc@gmail.com>
Date: Tue, 30 Oct 2018 15:07:36 -0700
Subject: percpu: remove PER_CPU_DEF_ATTRIBUTES macro

The macro is not used:

  $ grep -r PER_CPU_DEF_ATTRIBUTES
  include/linux/percpu-defs.h:	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak		\
  include/linux/percpu-defs.h:	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES		\
  include/asm-generic/percpu.h:#ifndef PER_CPU_DEF_ATTRIBUTES
  include/asm-generic/percpu.h:#define PER_CPU_DEF_ATTRIBUTES

It was added with b01e8dc34379 ("alpha: fix percpu build breakage") and
removed in 2009 with b01e8dc34379..6088464cf1ae.

Link: http://lkml.kernel.org/r/20180821164904.qqhcduimjznods66@K55DR.localdomain
Signed-off-by: Alexander Pateenok <pateenoc@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/percpu.h | 4 ----
 include/linux/percpu-defs.h  | 6 ++----
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 1817a8415a5e..c2de013b2cf4 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -62,10 +62,6 @@ extern void setup_per_cpu_areas(void);
 #define PER_CPU_ATTRIBUTES
 #endif
 
-#ifndef PER_CPU_DEF_ATTRIBUTES
-#define PER_CPU_DEF_ATTRIBUTES
-#endif
-
 #define raw_cpu_generic_read(pcp)					\
 ({									\
 	*raw_cpu_ptr(&(pcp));						\
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 2d2096ba1cfe..1ce8e264a269 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -91,8 +91,7 @@
 	extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name;		\
 	__PCPU_DUMMY_ATTRS char __pcpu_unique_##name;			\
 	extern __PCPU_ATTRS(sec) __typeof__(type) name;			\
-	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak			\
-	__typeof__(type) name
+	__PCPU_ATTRS(sec) __weak __typeof__(type) name
 #else
 /*
  * Normal declaration and definition macros.
@@ -101,8 +100,7 @@
 	extern __PCPU_ATTRS(sec) __typeof__(type) name
 
 #define DEFINE_PER_CPU_SECTION(type, name, sec)				\
-	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES			\
-	__typeof__(type) name
+	__PCPU_ATTRS(sec) __typeof__(type) name
 #endif
 
 /*
-- 
cgit v1.2.3-71-gd317


From b4a991ec584b3ce333342c431c3cc4fef8a690d7 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:07:40 -0700
Subject: mm: remove CONFIG_NO_BOOTMEM

All achitectures select NO_BOOTMEM which essentially becomes 'Y' for any
kernel configuration and therefore it can be removed.

[alexander.h.duyck@linux.intel.com: remove now defunct NO_BOOTMEM from depends list for deferred init]
  Link: http://lkml.kernel.org/r/20180925201814.3576.15105.stgit@localhost.localdomain
Link: http://lkml.kernel.org/r/1536927045-23536-3-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/Kconfig      |  1 -
 arch/arc/Kconfig        |  1 -
 arch/arm/Kconfig        |  1 -
 arch/arm64/Kconfig      |  1 -
 arch/csky/Kconfig       |  1 -
 arch/h8300/Kconfig      |  1 -
 arch/hexagon/Kconfig    |  1 -
 arch/ia64/Kconfig       |  1 -
 arch/m68k/Kconfig       |  1 -
 arch/microblaze/Kconfig |  1 -
 arch/mips/Kconfig       |  1 -
 arch/nds32/Kconfig      |  1 -
 arch/nios2/Kconfig      |  1 -
 arch/openrisc/Kconfig   |  1 -
 arch/parisc/Kconfig     |  1 -
 arch/powerpc/Kconfig    |  1 -
 arch/riscv/Kconfig      |  1 -
 arch/s390/Kconfig       |  1 -
 arch/sh/Kconfig         |  1 -
 arch/sparc/Kconfig      |  1 -
 arch/um/Kconfig         |  1 -
 arch/unicore32/Kconfig  |  1 -
 arch/x86/Kconfig        |  3 ---
 arch/xtensa/Kconfig     |  1 -
 include/linux/bootmem.h | 36 ++----------------------------------
 include/linux/mmzone.h  |  5 +----
 mm/Kconfig              |  4 ----
 mm/Makefile             |  7 +------
 mm/memblock.c           |  2 --
 29 files changed, 4 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 620b0a711ee4..04de6be101bc 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -32,7 +32,6 @@ config ALPHA
 	select OLD_SIGSUSPEND
 	select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
 	select HAVE_MEMBLOCK
-	select NO_BOOTMEM
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index e98c6b8e6186..56995f356f69 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -44,7 +44,6 @@ config ARC
 	select HANDLE_DOMAIN_IRQ
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
-	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
 	select OF_RESERVED_MEM
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b8c6062ca0c1..37d4c40f405a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -100,7 +100,6 @@ config ARM
 	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_REL
 	select NEED_DMA_MAP_STATE
-	select NO_BOOTMEM
 	select OF_EARLY_FLATTREE if OF
 	select OF_RESERVED_MEM if OF
 	select OLD_SIGACTION
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 964f682a2b7b..a8f36c7041ff 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -161,7 +161,6 @@ config ARM64
 	select MULTI_IRQ_HANDLER
 	select NEED_DMA_MAP_STATE
 	select NEED_SG_DMA_LENGTH
-	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
 	select OF_RESERVED_MEM
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 0a0558567eaa..46966f576249 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -39,7 +39,6 @@ config CSKY
 	select HAVE_MEMBLOCK
 	select MAY_HAVE_SPARSE_IRQ
 	select MODULES_USE_ELF_RELA if MODULES
-	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
 	select OF_RESERVED_MEM
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 0b334b671e90..5e89d40be8cd 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -16,7 +16,6 @@ config H8300
 	select OF_IRQ
 	select OF_EARLY_FLATTREE
 	select HAVE_MEMBLOCK
-	select NO_BOOTMEM
 	select TIMER_OF
 	select H8300_TMR8
 	select HAVE_KERNEL_GZIP
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 7b25d7c8fa49..7823f15e17b2 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -23,7 +23,6 @@ config HEXAGON
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_MEMBLOCK
 	select ARCH_DISCARD_MEMBLOCK
-	select NO_BOOTMEM
 	select NEED_SG_DMA_LENGTH
 	select NO_IOPORT_MAP
 	select GENERIC_IOMAP
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 8b4a0c1748c0..2bf4ef792f2c 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -28,7 +28,6 @@ config IA64
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
-	select NO_BOOTMEM
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select ARCH_HAS_DMA_MARK_CLEAN
 	select ARCH_HAS_SG_CHAIN
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index c7b2a8d60a41..26edf2542295 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -29,7 +29,6 @@ config M68K
 	select DMA_DIRECT_OPS if HAS_DMA
 	select HAVE_MEMBLOCK
 	select ARCH_DISCARD_MEMBLOCK
-	select NO_BOOTMEM
 
 config CPU_BIG_ENDIAN
 	def_bool y
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 164a4857737a..5c2777bb5555 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -28,7 +28,6 @@ config MICROBLAZE
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
-	select NO_BOOTMEM
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_OPROFILE
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 80778b40f8fa..366934157632 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -78,7 +78,6 @@ config MIPS
 	select RTC_LIB
 	select SYSCTL_EXCEPTION_TRACE
 	select VIRT_TO_BUS
-	select NO_BOOTMEM
 
 menu "Machine selection"
 
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 56992330026a..93c0b587ee23 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -36,7 +36,6 @@ config NDS32
 	select MODULES_USE_ELF_RELA
 	select OF
 	select OF_EARLY_FLATTREE
-	select NO_BOOTMEM
 	select NO_IOPORT_MAP
 	select RTC_LIB
 	select THREAD_INFO_IN_TASK
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 2df0c57f2833..b01febeb5ce3 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -25,7 +25,6 @@ config NIOS2
 	select CPU_NO_EFFICIENT_FFS
 	select HAVE_MEMBLOCK
 	select ARCH_DISCARD_MEMBLOCK
-	select NO_BOOTMEM
 
 config GENERIC_CSUM
 	def_bool y
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index a655ae280637..cbfaf15f9d70 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -32,7 +32,6 @@ config OPENRISC
 	select HAVE_DEBUG_STACKOVERFLOW
 	select OR1K_PIC
 	select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1
-	select NO_BOOTMEM
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_USE_QUEUED_RWLOCKS
 	select OMPIC if SMP
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index f1cd12afd943..7418ebeeb4a4 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -16,7 +16,6 @@ config PARISC
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
 	select HAVE_MEMBLOCK
-	select NO_BOOTMEM
 	select BUG
 	select BUILDTIME_EXTABLE_SORT
 	select HAVE_PERF_EVENTS
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e84943d24e5c..5798ffb1379b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -231,7 +231,6 @@ config PPC
 	select MODULES_USE_ELF_RELA
 	select NEED_DMA_MAP_STATE		if PPC64 || NOT_COHERENT_CACHE
 	select NEED_SG_DMA_LENGTH
-	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
 	select OF_RESERVED_MEM
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index fe451348ae57..f7c6b7cf124a 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -35,7 +35,6 @@ config RISCV
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_PERF_EVENTS
 	select IRQ_DOMAIN
-	select NO_BOOTMEM
 	select RISCV_ISA_A if SMP
 	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8b25e1f45b27..c7af83a15b2d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -175,7 +175,6 @@ config S390
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select MODULES_USE_ELF_RELA
-	select NO_BOOTMEM
 	select OLD_SIGACTION
 	select OLD_SIGSUSPEND3
 	select SPARSE_IRQ
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 475d786a65b0..7aaea9690e18 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -11,7 +11,6 @@ config SUPERH
 	select HAVE_IDE if HAS_IOPORT_MAP
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
-	select NO_BOOTMEM
 	select ARCH_DISCARD_MEMBLOCK
 	select HAVE_OPROFILE
 	select HAVE_GENERIC_DMA_COHERENT
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 7e2aa59fcc29..98c0996f3c2c 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -46,7 +46,6 @@ config SPARC
 	select NEED_DMA_MAP_STATE
 	select NEED_SG_DMA_LENGTH
 	select HAVE_MEMBLOCK
-	select NO_BOOTMEM
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 10c15b8853ae..ce3d56299b28 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -13,7 +13,6 @@ config UML
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_MEMBLOCK
-	select NO_BOOTMEM
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select GENERIC_CLOCKEVENTS
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 0c5111b206bd..3a3b40f79558 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -6,7 +6,6 @@ config UNICORE32
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select DMA_DIRECT_OPS
 	select HAVE_MEMBLOCK
-	select NO_BOOTMEM
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ffebfc3f43c1..0354f52ef30e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -834,9 +834,6 @@ config JAILHOUSE_GUEST
 
 endif #HYPERVISOR_GUEST
 
-config NO_BOOTMEM
-	def_bool y
-
 source "arch/x86/Kconfig.cpu"
 
 config HPET_TIMER
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index ea5d8d03e53b..df43d2e76842 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -34,7 +34,6 @@ config XTENSA
 	select HAVE_STACKPROTECTOR
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
-	select NO_BOOTMEM
 	select PERF_USE_VMALLOC
 	select VIRT_TO_BUS
 	help
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 42515195d7d8..1f005b5c1555 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -26,34 +26,6 @@ extern unsigned long max_pfn;
  */
 extern unsigned long long max_possible_pfn;
 
-#ifndef CONFIG_NO_BOOTMEM
-/**
- * struct bootmem_data - per-node information used by the bootmem allocator
- * @node_min_pfn: the starting physical address of the node's memory
- * @node_low_pfn: the end physical address of the directly addressable memory
- * @node_bootmem_map: is a bitmap pointer - the bits represent all physical
- *		      memory pages (including holes) on the node.
- * @last_end_off: the offset within the page of the end of the last allocation;
- *                if 0, the page used is full
- * @hint_idx: the PFN of the page used with the last allocation;
- *            together with using this with the @last_end_offset field,
- *            a test can be made to see if allocations can be merged
- *            with the page used for the last allocation rather than
- *            using up a full new page.
- * @list: list entry in the linked list ordered by the memory addresses
- */
-typedef struct bootmem_data {
-	unsigned long node_min_pfn;
-	unsigned long node_low_pfn;
-	void *node_bootmem_map;
-	unsigned long last_end_off;
-	unsigned long hint_idx;
-	struct list_head list;
-} bootmem_data_t;
-
-extern bootmem_data_t bootmem_node_data[];
-#endif
-
 extern unsigned long bootmem_bootmap_pages(unsigned long);
 
 extern unsigned long init_bootmem_node(pg_data_t *pgdat,
@@ -125,12 +97,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 				      unsigned long align,
 				      unsigned long goal) __malloc;
 
-#ifdef CONFIG_NO_BOOTMEM
 /* We are using top down, so it is safe to use 0 here */
 #define BOOTMEM_LOW_LIMIT 0
-#else
-#define BOOTMEM_LOW_LIMIT __pa(MAX_DMA_ADDRESS)
-#endif
 
 #ifndef ARCH_LOW_ADDRESS_LIMIT
 #define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
@@ -165,7 +133,7 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
 
 
-#if defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM)
+#if defined(CONFIG_HAVE_MEMBLOCK)
 
 /* FIXME: use MEMBLOCK_ALLOC_* variants here */
 #define BOOTMEM_ALLOC_ACCESSIBLE	0
@@ -373,7 +341,7 @@ static inline void __init memblock_free_late(
 {
 	free_bootmem_late(base, size);
 }
-#endif /* defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM) */
+#endif /* defined(CONFIG_HAVE_MEMBLOCK) */
 
 extern void *alloc_large_system_hash(const char *tablename,
 				     unsigned long bucketsize,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9f0caccd5833..847705a6d0ec 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -633,9 +633,6 @@ typedef struct pglist_data {
 	struct page_ext *node_page_ext;
 #endif
 #endif
-#ifndef CONFIG_NO_BOOTMEM
-	struct bootmem_data *bdata;
-#endif
 #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
 	/*
 	 * Must be held any time you expect node_start_pfn, node_present_pages
@@ -869,7 +866,7 @@ static inline int is_highmem_idx(enum zone_type idx)
 }
 
 /**
- * is_highmem - helper function to quickly check if a struct zone is a 
+ * is_highmem - helper function to quickly check if a struct zone is a
  *              highmem zone or not.  This is an attempt to keep references
  *              to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
  * @zone - pointer to struct zone variable
diff --git a/mm/Kconfig b/mm/Kconfig
index 02301a89089e..f7cb608f9ab2 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -142,9 +142,6 @@ config HAVE_GENERIC_GUP
 config ARCH_DISCARD_MEMBLOCK
 	bool
 
-config NO_BOOTMEM
-	bool
-
 config MEMORY_ISOLATION
 	bool
 
@@ -634,7 +631,6 @@ config MAX_STACK_SIZE_MB
 config DEFERRED_STRUCT_PAGE_INIT
 	bool "Defer initialisation of struct pages to kthreads"
 	default n
-	depends on NO_BOOTMEM
 	depends on SPARSEMEM
 	depends on !NEED_PER_CPU_KM
 	depends on 64BIT
diff --git a/mm/Makefile b/mm/Makefile
index 6485d5745dd7..ea5333886593 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -42,12 +42,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
 			   debug.o $(mmu-y)
 
 obj-y += init-mm.o
-
-ifdef CONFIG_NO_BOOTMEM
-	obj-y		+= nobootmem.o
-else
-	obj-y		+= bootmem.o
-endif
+obj-y += nobootmem.o
 
 ifdef CONFIG_MMU
 	obj-$(CONFIG_ADVISE_SYSCALLS)	+= madvise.o
diff --git a/mm/memblock.c b/mm/memblock.c
index a85315083b5a..d6897fbe021f 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1318,7 +1318,6 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
 	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
 }
 
-#if defined(CONFIG_NO_BOOTMEM)
 /**
  * memblock_virt_alloc_internal - allocate boot memory block
  * @size: size of memory block to be allocated in bytes
@@ -1524,7 +1523,6 @@ void * __init memblock_virt_alloc_try_nid(
 	      __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr);
 	return NULL;
 }
-#endif
 
 /**
  * __memblock_free_early - free boot memory block
-- 
cgit v1.2.3-71-gd317


From aca52c39838910605b1063a2243f553aa2a02d5c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:07:44 -0700
Subject: mm: remove CONFIG_HAVE_MEMBLOCK

All architecures use memblock for early memory management. There is no need
for the CONFIG_HAVE_MEMBLOCK configuration option.

[rppt@linux.vnet.ibm.com: of/fdt: fixup #ifdefs]
  Link: http://lkml.kernel.org/r/20180919103457.GA20545@rapoport-lnx
[rppt@linux.vnet.ibm.com: csky: fixups after bootmem removal]
  Link: http://lkml.kernel.org/r/20180926112744.GC4628@rapoport-lnx
[rppt@linux.vnet.ibm.com: remove stale #else and the code it protects]
  Link: http://lkml.kernel.org/r/1538067825-24835-1-git-send-email-rppt@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1536927045-23536-4-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Tested-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/Kconfig                  |   1 -
 arch/arc/Kconfig                    |   1 -
 arch/arm/Kconfig                    |   1 -
 arch/arm64/Kconfig                  |   1 -
 arch/c6x/Kconfig                    |   1 -
 arch/csky/Kconfig                   |   1 -
 arch/csky/kernel/setup.c            |   1 -
 arch/csky/mm/highmem.c              |   4 +-
 arch/csky/mm/init.c                 |   3 +-
 arch/h8300/Kconfig                  |   1 -
 arch/hexagon/Kconfig                |   1 -
 arch/ia64/Kconfig                   |   1 -
 arch/m68k/Kconfig                   |   1 -
 arch/microblaze/Kconfig             |   1 -
 arch/mips/Kconfig                   |   1 -
 arch/nds32/Kconfig                  |   1 -
 arch/nios2/Kconfig                  |   1 -
 arch/openrisc/Kconfig               |   1 -
 arch/parisc/Kconfig                 |   1 -
 arch/powerpc/Kconfig                |   1 -
 arch/riscv/Kconfig                  |   1 -
 arch/s390/Kconfig                   |   1 -
 arch/sh/Kconfig                     |   1 -
 arch/sparc/Kconfig                  |   1 -
 arch/um/Kconfig                     |   1 -
 arch/unicore32/Kconfig              |   1 -
 arch/x86/Kconfig                    |   1 -
 arch/xtensa/Kconfig                 |   1 -
 drivers/of/fdt.c                    |  21 -------
 drivers/of/of_reserved_mem.c        |  13 +----
 drivers/staging/android/ion/Kconfig |   2 +-
 fs/pstore/Kconfig                   |   1 -
 include/linux/bootmem.h             | 112 ------------------------------------
 include/linux/memblock.h            |   7 ---
 include/linux/mm.h                  |   2 +-
 lib/Kconfig.debug                   |   3 +-
 mm/Kconfig                          |   5 +-
 mm/Makefile                         |   2 +-
 mm/nobootmem.c                      |   4 --
 mm/page_alloc.c                     |   5 +-
 40 files changed, 11 insertions(+), 199 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 04de6be101bc..5b4f88363453 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -31,7 +31,6 @@ config ALPHA
 	select ODD_RT_SIGACTION
 	select OLD_SIGSUSPEND
 	select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
-	select HAVE_MEMBLOCK
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 56995f356f69..c9e2a1323536 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -37,7 +37,6 @@ config ARC
 	select HAVE_KERNEL_LZMA
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
-	select HAVE_MEMBLOCK
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 37d4c40f405a..91be74d8df65 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -82,7 +82,6 @@ config ARM
 	select HAVE_KERNEL_XZ
 	select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M
 	select HAVE_KRETPROBES if (HAVE_KPROBES)
-	select HAVE_MEMBLOCK
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI
 	select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a8f36c7041ff..787d7850e064 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -139,7 +139,6 @@ config ARM64
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IRQ_TIME_ACCOUNTING
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP if NUMA
 	select HAVE_NMI
 	select HAVE_PATA_PLATFORM
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index f65a084607fd..84420109113d 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -13,7 +13,6 @@ config C6X
 	select GENERIC_ATOMIC64
 	select GENERIC_IRQ_SHOW
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_MEMBLOCK
 	select SPARSE_IRQ
 	select IRQ_DOMAIN
 	select OF
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 46966f576249..cb64f8dacd08 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -36,7 +36,6 @@ config CSKY
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_CONTIGUOUS
-	select HAVE_MEMBLOCK
 	select MAY_HAVE_SPARSE_IRQ
 	select MODULES_USE_ELF_RELA if MODULES
 	select OF
diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c
index a5e3ab1d5360..dff8b89444ec 100644
--- a/arch/csky/kernel/setup.c
+++ b/arch/csky/kernel/setup.c
@@ -3,7 +3,6 @@
 
 #include <linux/console.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/initrd.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
diff --git a/arch/csky/mm/highmem.c b/arch/csky/mm/highmem.c
index e168ac087ccb..53b1bfa4c462 100644
--- a/arch/csky/mm/highmem.c
+++ b/arch/csky/mm/highmem.c
@@ -4,7 +4,7 @@
 #include <linux/module.h>
 #include <linux/highmem.h>
 #include <linux/smp.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
@@ -140,7 +140,7 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
 			pmd = (pmd_t *)pud;
 			for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) {
 				if (pmd_none(*pmd)) {
-					pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+					pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
 					set_pmd(pmd, __pmd(__pa(pte)));
 					BUG_ON(pte != pte_offset_kernel(pmd, 0));
 				}
diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c
index ce2711e050ad..dc07c078f9b8 100644
--- a/arch/csky/mm/init.c
+++ b/arch/csky/mm/init.c
@@ -14,7 +14,6 @@
 #include <linux/ptrace.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
 #include <linux/highmem.h>
 #include <linux/memblock.h>
 #include <linux/swap.h>
@@ -47,7 +46,7 @@ void __init mem_init(void)
 #endif
 	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
-	free_all_bootmem();
+	memblock_free_all();
 
 #ifdef CONFIG_HIGHMEM
 	for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 5e89d40be8cd..d19c6b16cd5d 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -15,7 +15,6 @@ config H8300
 	select OF
 	select OF_IRQ
 	select OF_EARLY_FLATTREE
-	select HAVE_MEMBLOCK
 	select TIMER_OF
 	select H8300_TMR8
 	select HAVE_KERNEL_GZIP
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 7823f15e17b2..2b688af379e6 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -21,7 +21,6 @@ config HEXAGON
 	select GENERIC_IRQ_SHOW
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_MEMBLOCK
 	select ARCH_DISCARD_MEMBLOCK
 	select NEED_SG_DMA_LENGTH
 	select NO_IOPORT_MAP
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 2bf4ef792f2c..36773def6920 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -26,7 +26,6 @@ config IA64
 	select HAVE_FUNCTION_TRACER
 	select TTY
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select ARCH_HAS_DMA_MARK_CLEAN
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 26edf2542295..1bc9f1ba759a 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -27,7 +27,6 @@ config M68K
 	select OLD_SIGSUSPEND3
 	select OLD_SIGACTION
 	select DMA_DIRECT_OPS if HAS_DMA
-	select HAVE_MEMBLOCK
 	select ARCH_DISCARD_MEMBLOCK
 
 config CPU_BIG_ENDIAN
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 5c2777bb5555..effed2efd306 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -28,7 +28,6 @@ config MICROBLAZE
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_OPROFILE
 	select IRQ_DOMAIN
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 366934157632..8272ea4c7264 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -60,7 +60,6 @@ config MIPS
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 93c0b587ee23..7a04adacb2f0 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -29,7 +29,6 @@ config NDS32
 	select HANDLE_DOMAIN_IRQ
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_MEMBLOCK
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select IRQ_DOMAIN
 	select LOCKDEP_SUPPORT
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index b01febeb5ce3..7e95506e957a 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -23,7 +23,6 @@ config NIOS2
 	select SPARSE_IRQ
 	select USB_ARCH_HAS_HCD if USB_SUPPORT
 	select CPU_NO_EFFICIENT_FFS
-	select HAVE_MEMBLOCK
 	select ARCH_DISCARD_MEMBLOCK
 
 config GENERIC_CSUM
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index cbfaf15f9d70..285f7d05c8ed 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -12,7 +12,6 @@ config OPENRISC
 	select OF_EARLY_FLATTREE
 	select IRQ_DOMAIN
 	select HANDLE_DOMAIN_IRQ
-	select HAVE_MEMBLOCK
 	select GPIOLIB
         select HAVE_ARCH_TRACEHOOK
 	select SPARSE_IRQ
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 7418ebeeb4a4..92a339ee28b3 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -15,7 +15,6 @@ config PARISC
 	select RTC_CLASS
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
-	select HAVE_MEMBLOCK
 	select BUG
 	select BUILDTIME_EXTABLE_SORT
 	select HAVE_PERF_EVENTS
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5798ffb1379b..2d51b2bd4aa1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -206,7 +206,6 @@ config PPC
 	select HAVE_KRETPROBES
 	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
 	select HAVE_LIVEPATCH			if HAVE_DYNAMIC_FTRACE_WITH_REGS
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI				if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index f7c6b7cf124a..d86842c21710 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -28,7 +28,6 @@ config RISCV
 	select GENERIC_STRNLEN_USER
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_FUTEX_CMPXCHG if FUTEX
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c7af83a15b2d..5173366af8f3 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -163,7 +163,6 @@ config S390
 	select HAVE_LIVEPATCH
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MEMBLOCK_PHYS_MAP
 	select HAVE_MOD_ARCH_SPECIFIC
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 7aaea9690e18..f82a4da7adf3 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -9,7 +9,6 @@ config SUPERH
 	select CLKDEV_LOOKUP
 	select DMA_DIRECT_OPS
 	select HAVE_IDE if HAS_IOPORT_MAP
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select ARCH_DISCARD_MEMBLOCK
 	select HAVE_OPROFILE
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 98c0996f3c2c..490b2c95c212 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -45,7 +45,6 @@ config SPARC
 	select LOCKDEP_SMALL if LOCKDEP
 	select NEED_DMA_MAP_STATE
 	select NEED_SG_DMA_LENGTH
-	select HAVE_MEMBLOCK
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index ce3d56299b28..6b9938919f0b 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -12,7 +12,6 @@ config UML
 	select HAVE_UID16
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_MEMBLOCK
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select GENERIC_CLOCKEVENTS
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 3a3b40f79558..a4c05159dca5 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -5,7 +5,6 @@ config UNICORE32
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select DMA_DIRECT_OPS
-	select HAVE_MEMBLOCK
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0354f52ef30e..c51c989c19c0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -169,7 +169,6 @@ config X86
 	select HAVE_KRETPROBES
 	select HAVE_KVM
 	select HAVE_LIVEPATCH			if X86_64
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MIXED_BREAKPOINTS_REGS
 	select HAVE_MOD_ARCH_SPECIFIC
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index df43d2e76842..60c141af222b 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -28,7 +28,6 @@ config XTENSA
 	select HAVE_FUTEX_CMPXCHG if !MMU
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IRQ_TIME_ACCOUNTING
-	select HAVE_MEMBLOCK
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
 	select HAVE_STACKPROTECTOR
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 76c83c1ffeda..4f915cea6f75 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1115,7 +1115,6 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 	return 1;
 }
 
-#ifdef CONFIG_HAVE_MEMBLOCK
 #ifndef MIN_MEMBLOCK_ADDR
 #define MIN_MEMBLOCK_ADDR	__pa(PAGE_OFFSET)
 #endif
@@ -1178,26 +1177,6 @@ int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
 	return memblock_reserve(base, size);
 }
 
-#else
-void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
-{
-	WARN_ON(1);
-}
-
-int __init __weak early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size)
-{
-	return -ENOSYS;
-}
-
-int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
-					phys_addr_t size, bool nomap)
-{
-	pr_err("Reserved memory not supported, ignoring range %pa - %pa%s\n",
-		  &base, &size, nomap ? " (nomap)" : "");
-	return -ENOSYS;
-}
-#endif
-
 static void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 {
 	return memblock_virt_alloc(size, align);
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 895c83e0c7b6..d6255c276a41 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -20,13 +20,12 @@
 #include <linux/of_reserved_mem.h>
 #include <linux/sort.h>
 #include <linux/slab.h>
+#include <linux/memblock.h>
 
 #define MAX_RESERVED_REGIONS	32
 static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS];
 static int reserved_mem_count;
 
-#if defined(CONFIG_HAVE_MEMBLOCK)
-#include <linux/memblock.h>
 int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 	phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap,
 	phys_addr_t *res_base)
@@ -54,16 +53,6 @@ int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 		return memblock_remove(base, size);
 	return 0;
 }
-#else
-int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
-	phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap,
-	phys_addr_t *res_base)
-{
-	pr_err("Reserved memory not supported, ignoring region 0x%llx%s\n",
-		  size, nomap ? " (nomap)" : "");
-	return -ENOSYS;
-}
-#endif
 
 /**
  * res_mem_save_node() - save fdt node for second pass initialization
diff --git a/drivers/staging/android/ion/Kconfig b/drivers/staging/android/ion/Kconfig
index c16dd16afe6a..0fdda6f62953 100644
--- a/drivers/staging/android/ion/Kconfig
+++ b/drivers/staging/android/ion/Kconfig
@@ -1,6 +1,6 @@
 menuconfig ION
 	bool "Ion Memory Manager"
-	depends on HAVE_MEMBLOCK && HAS_DMA && MMU
+	depends on HAS_DMA && MMU
 	select GENERIC_ALLOCATOR
 	select DMA_SHARED_BUFFER
 	help
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 503086f7f7c1..0d19d191ae70 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -141,7 +141,6 @@ config PSTORE_RAM
 	tristate "Log panic/oops to a RAM buffer"
 	depends on PSTORE
 	depends on HAS_IOMEM
-	depends on HAVE_MEMBLOCK
 	select REED_SOLOMON
 	select REED_SOLOMON_ENC8
 	select REED_SOLOMON_DEC8
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 1f005b5c1555..ee61ac355104 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -132,9 +132,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 #define alloc_bootmem_low_pages_node(pgdat, x) \
 	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
 
-
-#if defined(CONFIG_HAVE_MEMBLOCK)
-
 /* FIXME: use MEMBLOCK_ALLOC_* variants here */
 #define BOOTMEM_ALLOC_ACCESSIBLE	0
 #define BOOTMEM_ALLOC_ANYWHERE		(~(phys_addr_t)0)
@@ -234,115 +231,6 @@ static inline void __init memblock_free_late(
 	__memblock_free_late(base, size);
 }
 
-#else
-
-#define BOOTMEM_ALLOC_ACCESSIBLE	0
-
-
-/* Fall back to all the existing bootmem APIs */
-static inline void * __init memblock_virt_alloc(
-					phys_addr_t size,  phys_addr_t align)
-{
-	if (!align)
-		align = SMP_CACHE_BYTES;
-	return __alloc_bootmem(size, align, BOOTMEM_LOW_LIMIT);
-}
-
-static inline void * __init memblock_virt_alloc_raw(
-					phys_addr_t size,  phys_addr_t align)
-{
-	if (!align)
-		align = SMP_CACHE_BYTES;
-	return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT);
-}
-
-static inline void * __init memblock_virt_alloc_nopanic(
-					phys_addr_t size, phys_addr_t align)
-{
-	if (!align)
-		align = SMP_CACHE_BYTES;
-	return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT);
-}
-
-static inline void * __init memblock_virt_alloc_low(
-					phys_addr_t size, phys_addr_t align)
-{
-	if (!align)
-		align = SMP_CACHE_BYTES;
-	return __alloc_bootmem_low(size, align, 0);
-}
-
-static inline void * __init memblock_virt_alloc_low_nopanic(
-					phys_addr_t size, phys_addr_t align)
-{
-	if (!align)
-		align = SMP_CACHE_BYTES;
-	return __alloc_bootmem_low_nopanic(size, align, 0);
-}
-
-static inline void * __init memblock_virt_alloc_from_nopanic(
-		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
-{
-	return __alloc_bootmem_nopanic(size, align, min_addr);
-}
-
-static inline void * __init memblock_virt_alloc_node(
-						phys_addr_t size, int nid)
-{
-	return __alloc_bootmem_node(NODE_DATA(nid), size, SMP_CACHE_BYTES,
-				     BOOTMEM_LOW_LIMIT);
-}
-
-static inline void * __init memblock_virt_alloc_node_nopanic(
-						phys_addr_t size, int nid)
-{
-	return __alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
-					     SMP_CACHE_BYTES,
-					     BOOTMEM_LOW_LIMIT);
-}
-
-static inline void * __init memblock_virt_alloc_try_nid(phys_addr_t size,
-	phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid)
-{
-	return __alloc_bootmem_node_high(NODE_DATA(nid), size, align,
-					  min_addr);
-}
-
-static inline void * __init memblock_virt_alloc_try_nid_raw(
-			phys_addr_t size, phys_addr_t align,
-			phys_addr_t min_addr, phys_addr_t max_addr, int nid)
-{
-	return ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, align,
-				min_addr, max_addr);
-}
-
-static inline void * __init memblock_virt_alloc_try_nid_nopanic(
-			phys_addr_t size, phys_addr_t align,
-			phys_addr_t min_addr, phys_addr_t max_addr, int nid)
-{
-	return ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, align,
-				min_addr, max_addr);
-}
-
-static inline void __init memblock_free_early(
-					phys_addr_t base, phys_addr_t size)
-{
-	free_bootmem(base, size);
-}
-
-static inline void __init memblock_free_early_nid(
-				phys_addr_t base, phys_addr_t size, int nid)
-{
-	free_bootmem_node(NODE_DATA(nid), base, size);
-}
-
-static inline void __init memblock_free_late(
-					phys_addr_t base, phys_addr_t size)
-{
-	free_bootmem_late(base, size);
-}
-#endif /* defined(CONFIG_HAVE_MEMBLOCK) */
-
 extern void *alloc_large_system_hash(const char *tablename,
 				     unsigned long bucketsize,
 				     unsigned long numentries,
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 2acdd046df2d..224d27363cca 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -2,7 +2,6 @@
 #define _LINUX_MEMBLOCK_H
 #ifdef __KERNEL__
 
-#ifdef CONFIG_HAVE_MEMBLOCK
 /*
  * Logical memory blocks.
  *
@@ -440,12 +439,6 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end)
 {
 }
 #endif
-#else
-static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
-{
-	return 0;
-}
-#endif /* CONFIG_HAVE_MEMBLOCK */
 
 #endif /* __KERNEL__ */
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1e52b8fd1685..fcf9cc9d535f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2163,7 +2163,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
 					struct mminit_pfnnid_cache *state);
 #endif
 
-#if defined(CONFIG_HAVE_MEMBLOCK) && !defined(CONFIG_FLAT_NODE_MEM_MAP)
+#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
 void zero_resv_unavail(void);
 #else
 static inline void zero_resv_unavail(void) {}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e0ba05e6f6bd..1af29b8224fd 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1292,7 +1292,7 @@ config DEBUG_KOBJECT
 	depends on DEBUG_KERNEL
 	help
 	  If you say Y here, some extra kobject debugging messages will be sent
-	  to the syslog. 
+	  to the syslog.
 
 config DEBUG_KOBJECT_RELEASE
 	bool "kobject release debugging"
@@ -1980,7 +1980,6 @@ endif # RUNTIME_TESTING_MENU
 
 config MEMTEST
 	bool "Memtest"
-	depends on HAVE_MEMBLOCK
 	---help---
 	  This option adds a kernel parameter 'memtest', which allows memtest
 	  to be set.
diff --git a/mm/Kconfig b/mm/Kconfig
index f7cb608f9ab2..d85e39da47ae 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -127,9 +127,6 @@ config SPARSEMEM_VMEMMAP
 	 pfn_to_page and page_to_pfn operations.  This is the most
 	 efficient option when sufficient kernel resources are available.
 
-config HAVE_MEMBLOCK
-	bool
-
 config HAVE_MEMBLOCK_NODE_MAP
 	bool
 
@@ -478,7 +475,7 @@ config FRONTSWAP
 
 config CMA
 	bool "Contiguous Memory Allocator"
-	depends on HAVE_MEMBLOCK && MMU
+	depends on MMU
 	select MIGRATION
 	select MEMORY_ISOLATION
 	help
diff --git a/mm/Makefile b/mm/Makefile
index ea5333886593..ca3c844d7a56 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -43,11 +43,11 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
 
 obj-y += init-mm.o
 obj-y += nobootmem.o
+obj-y += memblock.o
 
 ifdef CONFIG_MMU
 	obj-$(CONFIG_ADVISE_SYSCALLS)	+= madvise.o
 endif
-obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o swap_slots.o
 obj-$(CONFIG_FRONTSWAP)	+= frontswap.o
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 439af3b765a7..d4d0cd474087 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -23,10 +23,6 @@
 
 #include "internal.h"
 
-#ifndef CONFIG_HAVE_MEMBLOCK
-#error CONFIG_HAVE_MEMBLOCK not defined
-#endif
-
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 struct pglist_data __refdata contig_page_data;
 EXPORT_SYMBOL(contig_page_data);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 863d46da6586..59d171f84445 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6508,8 +6508,7 @@ void __init free_area_init_node(int nid, unsigned long *zones_size,
 	free_area_init_core(pgdat);
 }
 
-#if defined(CONFIG_HAVE_MEMBLOCK) && !defined(CONFIG_FLAT_NODE_MEM_MAP)
-
+#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
 /*
  * Zero all valid struct pages in range [spfn, epfn), return number of struct
  * pages zeroed
@@ -6569,7 +6568,7 @@ void __init zero_resv_unavail(void)
 	if (pgcnt)
 		pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt);
 }
-#endif /* CONFIG_HAVE_MEMBLOCK && !CONFIG_FLAT_NODE_MEM_MAP */
+#endif /* !CONFIG_FLAT_NODE_MEM_MAP */
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 
-- 
cgit v1.2.3-71-gd317


From 355c45affca7114ab510e296a5b7012943aeea17 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:07:50 -0700
Subject: mm: remove bootmem allocator implementation.

All architectures have been converted to use MEMBLOCK + NO_BOOTMEM. The
bootmem allocator implementation can be removed.

Link: http://lkml.kernel.org/r/1536927045-23536-5-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  16 -
 mm/bootmem.c            | 811 ------------------------------------------------
 2 files changed, 827 deletions(-)
 delete mode 100644 mm/bootmem.c

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index ee61ac355104..fce6278a1724 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -26,14 +26,6 @@ extern unsigned long max_pfn;
  */
 extern unsigned long long max_possible_pfn;
 
-extern unsigned long bootmem_bootmap_pages(unsigned long);
-
-extern unsigned long init_bootmem_node(pg_data_t *pgdat,
-				       unsigned long freepfn,
-				       unsigned long startpfn,
-				       unsigned long endpfn);
-extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
-
 extern unsigned long free_all_bootmem(void);
 extern void reset_node_managed_pages(pg_data_t *pgdat);
 extern void reset_all_zones_managed_pages(void);
@@ -55,14 +47,6 @@ extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
 #define BOOTMEM_DEFAULT		0
 #define BOOTMEM_EXCLUSIVE	(1<<0)
 
-extern int reserve_bootmem(unsigned long addr,
-			   unsigned long size,
-			   int flags);
-extern int reserve_bootmem_node(pg_data_t *pgdat,
-				unsigned long physaddr,
-				unsigned long size,
-				int flags);
-
 extern void *__alloc_bootmem(unsigned long size,
 			     unsigned long align,
 			     unsigned long goal);
diff --git a/mm/bootmem.c b/mm/bootmem.c
deleted file mode 100644
index 97db0e8e362b..000000000000
--- a/mm/bootmem.c
+++ /dev/null
@@ -1,811 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  bootmem - A boot-time physical memory allocator and configurator
- *
- *  Copyright (C) 1999 Ingo Molnar
- *                1999 Kanoj Sarcar, SGI
- *                2008 Johannes Weiner
- *
- * Access to this subsystem has to be serialized externally (which is true
- * for the boot process anyway).
- */
-#include <linux/init.h>
-#include <linux/pfn.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/kmemleak.h>
-#include <linux/range.h>
-#include <linux/bug.h>
-#include <linux/io.h>
-#include <linux/bootmem.h>
-
-#include "internal.h"
-
-/**
- * DOC: bootmem overview
- *
- * Bootmem is a boot-time physical memory allocator and configurator.
- *
- * It is used early in the boot process before the page allocator is
- * set up.
- *
- * Bootmem is based on the most basic of allocators, a First Fit
- * allocator which uses a bitmap to represent memory. If a bit is 1,
- * the page is allocated and 0 if unallocated. To satisfy allocations
- * of sizes smaller than a page, the allocator records the Page Frame
- * Number (PFN) of the last allocation and the offset the allocation
- * ended at. Subsequent small allocations are merged together and
- * stored on the same page.
- *
- * The information used by the bootmem allocator is represented by
- * :c:type:`struct bootmem_data`. An array to hold up to %MAX_NUMNODES
- * such structures is statically allocated and then it is discarded
- * when the system initialization completes. Each entry in this array
- * corresponds to a node with memory. For UMA systems only entry 0 is
- * used.
- *
- * The bootmem allocator is initialized during early architecture
- * specific setup. Each architecture is required to supply a
- * :c:func:`setup_arch` function which, among other tasks, is
- * responsible for acquiring the necessary parameters to initialise
- * the boot memory allocator. These parameters define limits of usable
- * physical memory:
- *
- * * @min_low_pfn - the lowest PFN that is available in the system
- * * @max_low_pfn - the highest PFN that may be addressed by low
- *   memory (%ZONE_NORMAL)
- * * @max_pfn - the last PFN available to the system.
- *
- * After those limits are determined, the :c:func:`init_bootmem` or
- * :c:func:`init_bootmem_node` function should be called to initialize
- * the bootmem allocator. The UMA case should use the `init_bootmem`
- * function. It will initialize ``contig_page_data`` structure that
- * represents the only memory node in the system. In the NUMA case the
- * `init_bootmem_node` function should be called to initialize the
- * bootmem allocator for each node.
- *
- * Once the allocator is set up, it is possible to use either single
- * node or NUMA variant of the allocation APIs.
- */
-
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-struct pglist_data __refdata contig_page_data = {
-	.bdata = &bootmem_node_data[0]
-};
-EXPORT_SYMBOL(contig_page_data);
-#endif
-
-unsigned long max_low_pfn;
-unsigned long min_low_pfn;
-unsigned long max_pfn;
-unsigned long long max_possible_pfn;
-
-bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
-
-static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
-
-static int bootmem_debug;
-
-static int __init bootmem_debug_setup(char *buf)
-{
-	bootmem_debug = 1;
-	return 0;
-}
-early_param("bootmem_debug", bootmem_debug_setup);
-
-#define bdebug(fmt, args...) ({				\
-	if (unlikely(bootmem_debug))			\
-		pr_info("bootmem::%s " fmt,		\
-			__func__, ## args);		\
-})
-
-static unsigned long __init bootmap_bytes(unsigned long pages)
-{
-	unsigned long bytes = DIV_ROUND_UP(pages, BITS_PER_BYTE);
-
-	return ALIGN(bytes, sizeof(long));
-}
-
-/**
- * bootmem_bootmap_pages - calculate bitmap size in pages
- * @pages: number of pages the bitmap has to represent
- *
- * Return: the number of pages needed to hold the bitmap.
- */
-unsigned long __init bootmem_bootmap_pages(unsigned long pages)
-{
-	unsigned long bytes = bootmap_bytes(pages);
-
-	return PAGE_ALIGN(bytes) >> PAGE_SHIFT;
-}
-
-/*
- * link bdata in order
- */
-static void __init link_bootmem(bootmem_data_t *bdata)
-{
-	bootmem_data_t *ent;
-
-	list_for_each_entry(ent, &bdata_list, list) {
-		if (bdata->node_min_pfn < ent->node_min_pfn) {
-			list_add_tail(&bdata->list, &ent->list);
-			return;
-		}
-	}
-
-	list_add_tail(&bdata->list, &bdata_list);
-}
-
-/*
- * Called once to set up the allocator itself.
- */
-static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
-	unsigned long mapstart, unsigned long start, unsigned long end)
-{
-	unsigned long mapsize;
-
-	mminit_validate_memmodel_limits(&start, &end);
-	bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
-	bdata->node_min_pfn = start;
-	bdata->node_low_pfn = end;
-	link_bootmem(bdata);
-
-	/*
-	 * Initially all pages are reserved - setup_arch() has to
-	 * register free RAM areas explicitly.
-	 */
-	mapsize = bootmap_bytes(end - start);
-	memset(bdata->node_bootmem_map, 0xff, mapsize);
-
-	bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
-		bdata - bootmem_node_data, start, mapstart, end, mapsize);
-
-	return mapsize;
-}
-
-/**
- * init_bootmem_node - register a node as boot memory
- * @pgdat: node to register
- * @freepfn: pfn where the bitmap for this node is to be placed
- * @startpfn: first pfn on the node
- * @endpfn: first pfn after the node
- *
- * Return: the number of bytes needed to hold the bitmap for this node.
- */
-unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
-				unsigned long startpfn, unsigned long endpfn)
-{
-	return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
-}
-
-/**
- * init_bootmem - register boot memory
- * @start: pfn where the bitmap is to be placed
- * @pages: number of available physical pages
- *
- * Return: the number of bytes needed to hold the bitmap.
- */
-unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
-{
-	max_low_pfn = pages;
-	min_low_pfn = start;
-	return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
-}
-
-void __init free_bootmem_late(unsigned long physaddr, unsigned long size)
-{
-	unsigned long cursor, end;
-
-	kmemleak_free_part_phys(physaddr, size);
-
-	cursor = PFN_UP(physaddr);
-	end = PFN_DOWN(physaddr + size);
-
-	for (; cursor < end; cursor++) {
-		__free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
-		totalram_pages++;
-	}
-}
-
-static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
-{
-	struct page *page;
-	unsigned long *map, start, end, pages, cur, count = 0;
-
-	if (!bdata->node_bootmem_map)
-		return 0;
-
-	map = bdata->node_bootmem_map;
-	start = bdata->node_min_pfn;
-	end = bdata->node_low_pfn;
-
-	bdebug("nid=%td start=%lx end=%lx\n",
-		bdata - bootmem_node_data, start, end);
-
-	while (start < end) {
-		unsigned long idx, vec;
-		unsigned shift;
-
-		idx = start - bdata->node_min_pfn;
-		shift = idx & (BITS_PER_LONG - 1);
-		/*
-		 * vec holds at most BITS_PER_LONG map bits,
-		 * bit 0 corresponds to start.
-		 */
-		vec = ~map[idx / BITS_PER_LONG];
-
-		if (shift) {
-			vec >>= shift;
-			if (end - start >= BITS_PER_LONG)
-				vec |= ~map[idx / BITS_PER_LONG + 1] <<
-					(BITS_PER_LONG - shift);
-		}
-		/*
-		 * If we have a properly aligned and fully unreserved
-		 * BITS_PER_LONG block of pages in front of us, free
-		 * it in one go.
-		 */
-		if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) {
-			int order = ilog2(BITS_PER_LONG);
-
-			__free_pages_bootmem(pfn_to_page(start), start, order);
-			count += BITS_PER_LONG;
-			start += BITS_PER_LONG;
-		} else {
-			cur = start;
-
-			start = ALIGN(start + 1, BITS_PER_LONG);
-			while (vec && cur != start) {
-				if (vec & 1) {
-					page = pfn_to_page(cur);
-					__free_pages_bootmem(page, cur, 0);
-					count++;
-				}
-				vec >>= 1;
-				++cur;
-			}
-		}
-	}
-
-	cur = bdata->node_min_pfn;
-	page = virt_to_page(bdata->node_bootmem_map);
-	pages = bdata->node_low_pfn - bdata->node_min_pfn;
-	pages = bootmem_bootmap_pages(pages);
-	count += pages;
-	while (pages--)
-		__free_pages_bootmem(page++, cur++, 0);
-	bdata->node_bootmem_map = NULL;
-
-	bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
-
-	return count;
-}
-
-static int reset_managed_pages_done __initdata;
-
-void reset_node_managed_pages(pg_data_t *pgdat)
-{
-	struct zone *z;
-
-	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
-		z->managed_pages = 0;
-}
-
-void __init reset_all_zones_managed_pages(void)
-{
-	struct pglist_data *pgdat;
-
-	if (reset_managed_pages_done)
-		return;
-
-	for_each_online_pgdat(pgdat)
-		reset_node_managed_pages(pgdat);
-
-	reset_managed_pages_done = 1;
-}
-
-unsigned long __init free_all_bootmem(void)
-{
-	unsigned long total_pages = 0;
-	bootmem_data_t *bdata;
-
-	reset_all_zones_managed_pages();
-
-	list_for_each_entry(bdata, &bdata_list, list)
-		total_pages += free_all_bootmem_core(bdata);
-
-	totalram_pages += total_pages;
-
-	return total_pages;
-}
-
-static void __init __free(bootmem_data_t *bdata,
-			unsigned long sidx, unsigned long eidx)
-{
-	unsigned long idx;
-
-	bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data,
-		sidx + bdata->node_min_pfn,
-		eidx + bdata->node_min_pfn);
-
-	if (WARN_ON(bdata->node_bootmem_map == NULL))
-		return;
-
-	if (bdata->hint_idx > sidx)
-		bdata->hint_idx = sidx;
-
-	for (idx = sidx; idx < eidx; idx++)
-		if (!test_and_clear_bit(idx, bdata->node_bootmem_map))
-			BUG();
-}
-
-static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
-			unsigned long eidx, int flags)
-{
-	unsigned long idx;
-	int exclusive = flags & BOOTMEM_EXCLUSIVE;
-
-	bdebug("nid=%td start=%lx end=%lx flags=%x\n",
-		bdata - bootmem_node_data,
-		sidx + bdata->node_min_pfn,
-		eidx + bdata->node_min_pfn,
-		flags);
-
-	if (WARN_ON(bdata->node_bootmem_map == NULL))
-		return 0;
-
-	for (idx = sidx; idx < eidx; idx++)
-		if (test_and_set_bit(idx, bdata->node_bootmem_map)) {
-			if (exclusive) {
-				__free(bdata, sidx, idx);
-				return -EBUSY;
-			}
-			bdebug("silent double reserve of PFN %lx\n",
-				idx + bdata->node_min_pfn);
-		}
-	return 0;
-}
-
-static int __init mark_bootmem_node(bootmem_data_t *bdata,
-				unsigned long start, unsigned long end,
-				int reserve, int flags)
-{
-	unsigned long sidx, eidx;
-
-	bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n",
-		bdata - bootmem_node_data, start, end, reserve, flags);
-
-	BUG_ON(start < bdata->node_min_pfn);
-	BUG_ON(end > bdata->node_low_pfn);
-
-	sidx = start - bdata->node_min_pfn;
-	eidx = end - bdata->node_min_pfn;
-
-	if (reserve)
-		return __reserve(bdata, sidx, eidx, flags);
-	else
-		__free(bdata, sidx, eidx);
-	return 0;
-}
-
-static int __init mark_bootmem(unsigned long start, unsigned long end,
-				int reserve, int flags)
-{
-	unsigned long pos;
-	bootmem_data_t *bdata;
-
-	pos = start;
-	list_for_each_entry(bdata, &bdata_list, list) {
-		int err;
-		unsigned long max;
-
-		if (pos < bdata->node_min_pfn ||
-		    pos >= bdata->node_low_pfn) {
-			BUG_ON(pos != start);
-			continue;
-		}
-
-		max = min(bdata->node_low_pfn, end);
-
-		err = mark_bootmem_node(bdata, pos, max, reserve, flags);
-		if (reserve && err) {
-			mark_bootmem(start, pos, 0, 0);
-			return err;
-		}
-
-		if (max == end)
-			return 0;
-		pos = bdata->node_low_pfn;
-	}
-	BUG();
-}
-
-void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
-			      unsigned long size)
-{
-	unsigned long start, end;
-
-	kmemleak_free_part_phys(physaddr, size);
-
-	start = PFN_UP(physaddr);
-	end = PFN_DOWN(physaddr + size);
-
-	mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
-}
-
-void __init free_bootmem(unsigned long physaddr, unsigned long size)
-{
-	unsigned long start, end;
-
-	kmemleak_free_part_phys(physaddr, size);
-
-	start = PFN_UP(physaddr);
-	end = PFN_DOWN(physaddr + size);
-
-	mark_bootmem(start, end, 0, 0);
-}
-
-/**
- * reserve_bootmem_node - mark a page range as reserved
- * @pgdat: node the range resides on
- * @physaddr: starting address of the range
- * @size: size of the range in bytes
- * @flags: reservation flags (see linux/bootmem.h)
- *
- * Partial pages will be reserved.
- *
- * The range must reside completely on the specified node.
- *
- * Return: 0 on success, -errno on failure.
- */
-int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
-				 unsigned long size, int flags)
-{
-	unsigned long start, end;
-
-	start = PFN_DOWN(physaddr);
-	end = PFN_UP(physaddr + size);
-
-	return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
-}
-
-/**
- * reserve_bootmem - mark a page range as reserved
- * @addr: starting address of the range
- * @size: size of the range in bytes
- * @flags: reservation flags (see linux/bootmem.h)
- *
- * Partial pages will be reserved.
- *
- * The range must be contiguous but may span node boundaries.
- *
- * Return: 0 on success, -errno on failure.
- */
-int __init reserve_bootmem(unsigned long addr, unsigned long size,
-			    int flags)
-{
-	unsigned long start, end;
-
-	start = PFN_DOWN(addr);
-	end = PFN_UP(addr + size);
-
-	return mark_bootmem(start, end, 1, flags);
-}
-
-static unsigned long __init align_idx(struct bootmem_data *bdata,
-				      unsigned long idx, unsigned long step)
-{
-	unsigned long base = bdata->node_min_pfn;
-
-	/*
-	 * Align the index with respect to the node start so that the
-	 * combination of both satisfies the requested alignment.
-	 */
-
-	return ALIGN(base + idx, step) - base;
-}
-
-static unsigned long __init align_off(struct bootmem_data *bdata,
-				      unsigned long off, unsigned long align)
-{
-	unsigned long base = PFN_PHYS(bdata->node_min_pfn);
-
-	/* Same as align_idx for byte offsets */
-
-	return ALIGN(base + off, align) - base;
-}
-
-static void * __init alloc_bootmem_bdata(struct bootmem_data *bdata,
-					unsigned long size, unsigned long align,
-					unsigned long goal, unsigned long limit)
-{
-	unsigned long fallback = 0;
-	unsigned long min, max, start, sidx, midx, step;
-
-	bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n",
-		bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
-		align, goal, limit);
-
-	BUG_ON(!size);
-	BUG_ON(align & (align - 1));
-	BUG_ON(limit && goal + size > limit);
-
-	if (!bdata->node_bootmem_map)
-		return NULL;
-
-	min = bdata->node_min_pfn;
-	max = bdata->node_low_pfn;
-
-	goal >>= PAGE_SHIFT;
-	limit >>= PAGE_SHIFT;
-
-	if (limit && max > limit)
-		max = limit;
-	if (max <= min)
-		return NULL;
-
-	step = max(align >> PAGE_SHIFT, 1UL);
-
-	if (goal && min < goal && goal < max)
-		start = ALIGN(goal, step);
-	else
-		start = ALIGN(min, step);
-
-	sidx = start - bdata->node_min_pfn;
-	midx = max - bdata->node_min_pfn;
-
-	if (bdata->hint_idx > sidx) {
-		/*
-		 * Handle the valid case of sidx being zero and still
-		 * catch the fallback below.
-		 */
-		fallback = sidx + 1;
-		sidx = align_idx(bdata, bdata->hint_idx, step);
-	}
-
-	while (1) {
-		int merge;
-		void *region;
-		unsigned long eidx, i, start_off, end_off;
-find_block:
-		sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
-		sidx = align_idx(bdata, sidx, step);
-		eidx = sidx + PFN_UP(size);
-
-		if (sidx >= midx || eidx > midx)
-			break;
-
-		for (i = sidx; i < eidx; i++)
-			if (test_bit(i, bdata->node_bootmem_map)) {
-				sidx = align_idx(bdata, i, step);
-				if (sidx == i)
-					sidx += step;
-				goto find_block;
-			}
-
-		if (bdata->last_end_off & (PAGE_SIZE - 1) &&
-				PFN_DOWN(bdata->last_end_off) + 1 == sidx)
-			start_off = align_off(bdata, bdata->last_end_off, align);
-		else
-			start_off = PFN_PHYS(sidx);
-
-		merge = PFN_DOWN(start_off) < sidx;
-		end_off = start_off + size;
-
-		bdata->last_end_off = end_off;
-		bdata->hint_idx = PFN_UP(end_off);
-
-		/*
-		 * Reserve the area now:
-		 */
-		if (__reserve(bdata, PFN_DOWN(start_off) + merge,
-				PFN_UP(end_off), BOOTMEM_EXCLUSIVE))
-			BUG();
-
-		region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
-				start_off);
-		memset(region, 0, size);
-		/*
-		 * The min_count is set to 0 so that bootmem allocated blocks
-		 * are never reported as leaks.
-		 */
-		kmemleak_alloc(region, size, 0, 0);
-		return region;
-	}
-
-	if (fallback) {
-		sidx = align_idx(bdata, fallback - 1, step);
-		fallback = 0;
-		goto find_block;
-	}
-
-	return NULL;
-}
-
-static void * __init alloc_bootmem_core(unsigned long size,
-					unsigned long align,
-					unsigned long goal,
-					unsigned long limit)
-{
-	bootmem_data_t *bdata;
-	void *region;
-
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc(size, GFP_NOWAIT);
-
-	list_for_each_entry(bdata, &bdata_list, list) {
-		if (goal && bdata->node_low_pfn <= PFN_DOWN(goal))
-			continue;
-		if (limit && bdata->node_min_pfn >= PFN_DOWN(limit))
-			break;
-
-		region = alloc_bootmem_bdata(bdata, size, align, goal, limit);
-		if (region)
-			return region;
-	}
-
-	return NULL;
-}
-
-static void * __init ___alloc_bootmem_nopanic(unsigned long size,
-					      unsigned long align,
-					      unsigned long goal,
-					      unsigned long limit)
-{
-	void *ptr;
-
-restart:
-	ptr = alloc_bootmem_core(size, align, goal, limit);
-	if (ptr)
-		return ptr;
-	if (goal) {
-		goal = 0;
-		goto restart;
-	}
-
-	return NULL;
-}
-
-void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
-					unsigned long goal)
-{
-	unsigned long limit = 0;
-
-	return ___alloc_bootmem_nopanic(size, align, goal, limit);
-}
-
-static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
-					unsigned long goal, unsigned long limit)
-{
-	void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit);
-
-	if (mem)
-		return mem;
-	/*
-	 * Whoops, we cannot satisfy the allocation request.
-	 */
-	pr_alert("bootmem alloc of %lu bytes failed!\n", size);
-	panic("Out of memory");
-	return NULL;
-}
-
-void * __init __alloc_bootmem(unsigned long size, unsigned long align,
-			      unsigned long goal)
-{
-	unsigned long limit = 0;
-
-	return ___alloc_bootmem(size, align, goal, limit);
-}
-
-void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
-				unsigned long size, unsigned long align,
-				unsigned long goal, unsigned long limit)
-{
-	void *ptr;
-
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-again:
-
-	/* do not panic in alloc_bootmem_bdata() */
-	if (limit && goal + size > limit)
-		limit = 0;
-
-	ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit);
-	if (ptr)
-		return ptr;
-
-	ptr = alloc_bootmem_core(size, align, goal, limit);
-	if (ptr)
-		return ptr;
-
-	if (goal) {
-		goal = 0;
-		goto again;
-	}
-
-	return NULL;
-}
-
-void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
-				   unsigned long align, unsigned long goal)
-{
-	return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
-}
-
-void * __init ___alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
-				    unsigned long align, unsigned long goal,
-				    unsigned long limit)
-{
-	void *ptr;
-
-	ptr = ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
-	if (ptr)
-		return ptr;
-
-	pr_alert("bootmem alloc of %lu bytes failed!\n", size);
-	panic("Out of memory");
-	return NULL;
-}
-
-void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
-				   unsigned long align, unsigned long goal)
-{
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-	return  ___alloc_bootmem_node(pgdat, size, align, goal, 0);
-}
-
-void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
-				   unsigned long align, unsigned long goal)
-{
-#ifdef MAX_DMA32_PFN
-	unsigned long end_pfn;
-
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-	/* update goal according ...MAX_DMA32_PFN */
-	end_pfn = pgdat_end_pfn(pgdat);
-
-	if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
-	    (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
-		void *ptr;
-		unsigned long new_goal;
-
-		new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
-		ptr = alloc_bootmem_bdata(pgdat->bdata, size, align,
-						 new_goal, 0);
-		if (ptr)
-			return ptr;
-	}
-#endif
-
-	return __alloc_bootmem_node(pgdat, size, align, goal);
-
-}
-
-void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
-				  unsigned long goal)
-{
-	return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
-}
-
-void * __init __alloc_bootmem_low_nopanic(unsigned long size,
-					  unsigned long align,
-					  unsigned long goal)
-{
-	return ___alloc_bootmem_nopanic(size, align, goal,
-					ARCH_LOW_ADDRESS_LIMIT);
-}
-
-void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
-				       unsigned long align, unsigned long goal)
-{
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-	return ___alloc_bootmem_node(pgdat, size, align,
-				     goal, ARCH_LOW_ADDRESS_LIMIT);
-}
-- 
cgit v1.2.3-71-gd317


From b146ada221c178a384fee2a8e2e5b2e8a04476b1 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:07:54 -0700
Subject: mm: nobootmem: remove dead code

Several bootmem functions and macros are not used. Remove them.

Link: http://lkml.kernel.org/r/1536927045-23536-6-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 26 --------------------------
 mm/nobootmem.c          | 35 -----------------------------------
 2 files changed, 61 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index fce6278a1724..b74bafd110b9 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -36,17 +36,6 @@ extern void free_bootmem_node(pg_data_t *pgdat,
 extern void free_bootmem(unsigned long physaddr, unsigned long size);
 extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
 
-/*
- * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
- * the architecture-specific code should honor this).
- *
- * If flags is BOOTMEM_DEFAULT, then the return value is always 0 (success).
- * If flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the memory
- * already was reserved.
- */
-#define BOOTMEM_DEFAULT		0
-#define BOOTMEM_EXCLUSIVE	(1<<0)
-
 extern void *__alloc_bootmem(unsigned long size,
 			     unsigned long align,
 			     unsigned long goal);
@@ -73,13 +62,6 @@ void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 extern void *__alloc_bootmem_low(unsigned long size,
 				 unsigned long align,
 				 unsigned long goal) __malloc;
-void *__alloc_bootmem_low_nopanic(unsigned long size,
-				 unsigned long align,
-				 unsigned long goal) __malloc;
-extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
-				      unsigned long size,
-				      unsigned long align,
-				      unsigned long goal) __malloc;
 
 /* We are using top down, so it is safe to use 0 here */
 #define BOOTMEM_LOW_LIMIT 0
@@ -92,8 +74,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 	__alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_align(x, align) \
 	__alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_nopanic(x) \
-	__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages(x) \
 	__alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_nopanic(x) \
@@ -104,17 +84,11 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 	__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_pages_node_nopanic(pgdat, x) \
-	__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 
 #define alloc_bootmem_low(x) \
 	__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
-#define alloc_bootmem_low_pages_nopanic(x) \
-	__alloc_bootmem_low_nopanic(x, PAGE_SIZE, 0)
 #define alloc_bootmem_low_pages(x) \
 	__alloc_bootmem_low(x, PAGE_SIZE, 0)
-#define alloc_bootmem_low_pages_node(pgdat, x) \
-	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
 
 /* FIXME: use MEMBLOCK_ALLOC_* variants here */
 #define BOOTMEM_ALLOC_ACCESSIBLE	0
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index d4d0cd474087..44ce7de1be8f 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -404,38 +404,3 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 {
 	return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
 }
-
-void * __init __alloc_bootmem_low_nopanic(unsigned long size,
-					  unsigned long align,
-					  unsigned long goal)
-{
-	return ___alloc_bootmem_nopanic(size, align, goal,
-					ARCH_LOW_ADDRESS_LIMIT);
-}
-
-/**
- * __alloc_bootmem_low_node - allocate low boot memory from a specific node
- * @pgdat: node to allocate from
- * @size: size of the request in bytes
- * @align: alignment of the region
- * @goal: preferred starting address of the region
- *
- * The goal is dropped if it can not be satisfied and the allocation will
- * fall back to memory below @goal.
- *
- * Allocation may fall back to any node in the system if the specified node
- * can not hold the requested memory.
- *
- * The function panics if the request can not be satisfied.
- *
- * Return: address of the allocated region.
- */
-void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
-				       unsigned long align, unsigned long goal)
-{
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-	return ___alloc_bootmem_node(pgdat, size, align, goal,
-				     ARCH_LOW_ADDRESS_LIMIT);
-}
-- 
cgit v1.2.3-71-gd317


From 9a8dd708d547268c899f1cb443c49bd4d8c84eb3 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:07:59 -0700
Subject: memblock: rename memblock_alloc{_nid,_try_nid} to
 memblock_phys_alloc*

Make it explicit that the caller gets a physical address rather than a
virtual one.

This will also allow using meblock_alloc prefix for memblock allocations
returning virtual address, which is done in the following patches.

The conversion is done using the following semantic patch:

@@
expression e1, e2, e3;
@@
(
- memblock_alloc(e1, e2)
+ memblock_phys_alloc(e1, e2)
|
- memblock_alloc_nid(e1, e2, e3)
+ memblock_phys_alloc_nid(e1, e2, e3)
|
- memblock_alloc_try_nid(e1, e2, e3)
+ memblock_phys_alloc_try_nid(e1, e2, e3)
)

Link: http://lkml.kernel.org/r/1536927045-23536-7-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/mmu.c                     |  2 +-
 arch/arm64/mm/mmu.c                   |  2 +-
 arch/arm64/mm/numa.c                  |  2 +-
 arch/c6x/mm/dma-coherent.c            |  4 ++--
 arch/nds32/mm/init.c                  |  8 ++++----
 arch/openrisc/mm/init.c               |  2 +-
 arch/openrisc/mm/ioremap.c            |  2 +-
 arch/powerpc/kernel/dt_cpu_ftrs.c     |  4 +---
 arch/powerpc/kernel/paca.c            |  2 +-
 arch/powerpc/kernel/prom.c            |  2 +-
 arch/powerpc/kernel/setup-common.c    |  3 +--
 arch/powerpc/kernel/setup_32.c        | 10 +++++-----
 arch/powerpc/mm/numa.c                |  2 +-
 arch/powerpc/mm/pgtable_32.c          |  2 +-
 arch/powerpc/mm/ppc_mmu_32.c          |  2 +-
 arch/powerpc/platforms/pasemi/iommu.c |  2 +-
 arch/powerpc/platforms/powernv/opal.c |  2 +-
 arch/powerpc/sysdev/dart_iommu.c      |  2 +-
 arch/s390/kernel/crash_dump.c         |  2 +-
 arch/s390/kernel/setup.c              |  3 ++-
 arch/s390/mm/vmem.c                   |  4 ++--
 arch/s390/numa/numa.c                 |  2 +-
 arch/sparc/kernel/mdesc.c             |  2 +-
 arch/sparc/kernel/prom_64.c           |  2 +-
 arch/sparc/mm/init_64.c               | 11 ++++++-----
 arch/unicore32/mm/mmu.c               |  2 +-
 arch/x86/mm/numa.c                    |  2 +-
 drivers/firmware/efi/memmap.c         |  2 +-
 include/linux/memblock.h              |  6 +++---
 mm/memblock.c                         |  8 ++++----
 30 files changed, 50 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e46a6a446cdd..f5cc1ccfea3d 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -721,7 +721,7 @@ EXPORT_SYMBOL(phys_mem_access_prot);
 
 static void __init *early_alloc_aligned(unsigned long sz, unsigned long align)
 {
-	void *ptr = __va(memblock_alloc(sz, align));
+	void *ptr = __va(memblock_phys_alloc(sz, align));
 	memset(ptr, 0, sz);
 	return ptr;
 }
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9498c15b847b..394b8d554def 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -101,7 +101,7 @@ static phys_addr_t __init early_pgtable_alloc(void)
 	phys_addr_t phys;
 	void *ptr;
 
-	phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+	phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
 
 	/*
 	 * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index d7b66fc5e1c5..c7fb34efd23e 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -237,7 +237,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 	if (start_pfn >= end_pfn)
 		pr_info("Initmem setup node %d [<memory-less node>]\n", nid);
 
-	nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+	nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
 	nd = __va(nd_pa);
 
 	/* report and initialize */
diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c
index d0a8e0c4b27e..01305c787201 100644
--- a/arch/c6x/mm/dma-coherent.c
+++ b/arch/c6x/mm/dma-coherent.c
@@ -135,8 +135,8 @@ void __init coherent_mem_init(phys_addr_t start, u32 size)
 	if (dma_size & (PAGE_SIZE - 1))
 		++dma_pages;
 
-	bitmap_phys = memblock_alloc(BITS_TO_LONGS(dma_pages) * sizeof(long),
-				     sizeof(long));
+	bitmap_phys = memblock_phys_alloc(BITS_TO_LONGS(dma_pages) * sizeof(long),
+					  sizeof(long));
 
 	dma_bitmap = phys_to_virt(bitmap_phys);
 	memset(dma_bitmap, 0, dma_pages * PAGE_SIZE);
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index c713d2ad55dc..5af81b866aa5 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -81,7 +81,7 @@ static void __init map_ram(void)
 		}
 
 		/* Alloc one page for holding PTE's... */
-		pte = (pte_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+		pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 		memset(pte, 0, PAGE_SIZE);
 		set_pmd(pme, __pmd(__pa(pte) + _PAGE_KERNEL_TABLE));
 
@@ -114,7 +114,7 @@ static void __init fixedrange_init(void)
 	pgd = swapper_pg_dir + pgd_index(vaddr);
 	pud = pud_offset(pgd, vaddr);
 	pmd = pmd_offset(pud, vaddr);
-	fixmap_pmd_p = (pmd_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+	fixmap_pmd_p = (pmd_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 	memset(fixmap_pmd_p, 0, PAGE_SIZE);
 	set_pmd(pmd, __pmd(__pa(fixmap_pmd_p) + _PAGE_KERNEL_TABLE));
 
@@ -127,7 +127,7 @@ static void __init fixedrange_init(void)
 	pgd = swapper_pg_dir + pgd_index(vaddr);
 	pud = pud_offset(pgd, vaddr);
 	pmd = pmd_offset(pud, vaddr);
-	pte = (pte_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+	pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 	memset(pte, 0, PAGE_SIZE);
 	set_pmd(pmd, __pmd(__pa(pte) + _PAGE_KERNEL_TABLE));
 	pkmap_page_table = pte;
@@ -153,7 +153,7 @@ void __init paging_init(void)
 	fixedrange_init();
 
 	/* allocate space for empty_zero_page */
-	zero_page = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+	zero_page = __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 	memset(zero_page, 0, PAGE_SIZE);
 	zone_sizes_init();
 
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index 6972d5d6f23f..b7670de26c11 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -106,7 +106,7 @@ static void __init map_ram(void)
 			}
 
 			/* Alloc one page for holding PTE's... */
-			pte = (pte_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+			pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 			set_pmd(pme, __pmd(_KERNPG_TABLE + __pa(pte)));
 
 			/* Fill the newly allocated page with PTE'S */
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 2175e4bfd9fc..c9697529b3f0 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -126,7 +126,7 @@ pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
 	if (likely(mem_init_done)) {
 		pte = (pte_t *) __get_free_page(GFP_KERNEL);
 	} else {
-		pte = (pte_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+		pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 	}
 
 	if (pte)
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index f432054234a4..8be3721d9302 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -1008,9 +1008,7 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
 	/* Count and allocate space for cpu features */
 	of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes,
 						&nr_dt_cpu_features);
-	dt_cpu_features = __va(
-		memblock_alloc(sizeof(struct dt_cpu_feature)*
-				nr_dt_cpu_features, PAGE_SIZE));
+	dt_cpu_features = __va(memblock_phys_alloc(sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, PAGE_SIZE));
 
 	cpufeatures_setup_start(isa);
 
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 0ee3e6d50f28..f331a0054b3a 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -198,7 +198,7 @@ void __init allocate_paca_ptrs(void)
 	paca_nr_cpu_ids = nr_cpu_ids;
 
 	paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
-	paca_ptrs = __va(memblock_alloc(paca_ptrs_size, 0));
+	paca_ptrs = __va(memblock_phys_alloc(paca_ptrs_size, 0));
 	memset(paca_ptrs, 0x88, paca_ptrs_size);
 }
 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index c4d7078e5295..fe758cedb93f 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -126,7 +126,7 @@ static void __init move_device_tree(void)
 	if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) ||
 			overlaps_crashkernel(start, size) ||
 			overlaps_initrd(start, size)) {
-		p = __va(memblock_alloc(size, PAGE_SIZE));
+		p = __va(memblock_phys_alloc(size, PAGE_SIZE));
 		memcpy(p, initial_boot_params, size);
 		initial_boot_params = p;
 		DBG("Moved device tree to 0x%p\n", p);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 9ca9db707bcb..2b56d1f30387 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -460,8 +460,7 @@ void __init smp_setup_cpu_maps(void)
 
 	DBG("smp_setup_cpu_maps()\n");
 
-	cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32),
-							__alignof__(u32)));
+	cpu_to_phys_id = __va(memblock_phys_alloc(nr_cpu_ids * sizeof(u32), __alignof__(u32)));
 	memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32));
 
 	for_each_node_by_type(dn, "cpu") {
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 8c507be12c3c..81909600013a 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -206,9 +206,9 @@ void __init irqstack_early_init(void)
 	 * as the memblock is limited to lowmem by default */
 	for_each_possible_cpu(i) {
 		softirq_ctx[i] = (struct thread_info *)
-			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
 		hardirq_ctx[i] = (struct thread_info *)
-			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
 	}
 }
 
@@ -227,12 +227,12 @@ void __init exc_lvl_early_init(void)
 #endif
 
 		critirq_ctx[hw_cpu] = (struct thread_info *)
-			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
 #ifdef CONFIG_BOOKE
 		dbgirq_ctx[hw_cpu] = (struct thread_info *)
-			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
 		mcheckirq_ctx[hw_cpu] = (struct thread_info *)
-			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
 #endif
 	}
 }
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 693ae1c1acba..f04f15f9d232 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -788,7 +788,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 	void *nd;
 	int tnid;
 
-	nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+	nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
 	nd = __va(nd_pa);
 
 	/* report and initialize */
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 5877f5aa8f5d..bda3c6f1bd32 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -50,7 +50,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	if (slab_is_available()) {
 		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	} else {
-		pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+		pte = __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 		if (pte)
 			clear_page(pte);
 	}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 38a793bfca37..f6f575bae3bc 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -224,7 +224,7 @@ void __init MMU_init_hw(void)
 	 * Find some memory for the hash table.
 	 */
 	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
-	Hash = __va(memblock_alloc(Hash_size, Hash_size));
+	Hash = __va(memblock_phys_alloc(Hash_size, Hash_size));
 	memset(Hash, 0, Hash_size);
 	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
 
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index f06c83f321e6..f2971522fb4a 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -213,7 +213,7 @@ static int __init iob_init(struct device_node *dn)
 	pr_info("IOBMAP L2 allocated at: %p\n", iob_l2_base);
 
 	/* Allocate a spare page to map all invalid IOTLB pages. */
-	tmp = memblock_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE);
+	tmp = memblock_phys_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE);
 	if (!tmp)
 		panic("IOBMAP: Cannot allocate spare page!");
 	/* Empty l1 is marked invalid */
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index a4641515956f..beed86f4224b 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -171,7 +171,7 @@ int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
 	/*
 	 * Allocate a buffer to hold the MC recoverable ranges.
 	 */
-	mc_recoverable_range =__va(memblock_alloc(size, __alignof__(u64)));
+	mc_recoverable_range =__va(memblock_phys_alloc(size, __alignof__(u64)));
 	memset(mc_recoverable_range, 0, size);
 
 	for (i = 0; i < mc_recoverable_range_len; i++) {
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 5ca3e22d0512..a5b40d1460f1 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -261,7 +261,7 @@ static void allocate_dart(void)
 	 * that to work around what looks like a problem with the HT bridge
 	 * prefetching into invalid pages and corrupting data
 	 */
-	tmp = memblock_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
+	tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
 	dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
 					 DARTMAP_RPNMASK);
 
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 376f6b6dfb3c..d17566a8c76f 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -61,7 +61,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
 {
 	struct save_area *sa;
 
-	sa = (void *) memblock_alloc(sizeof(*sa), 8);
+	sa = (void *) memblock_phys_alloc(sizeof(*sa), 8);
 	if (is_boot_cpu)
 		list_add(&sa->list, &dump_save_areas);
 	else
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index a2e952b66248..204ccfa54bf3 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -967,7 +967,8 @@ static void __init setup_randomness(void)
 {
 	struct sysinfo_3_2_2 *vmms;
 
-	vmms = (struct sysinfo_3_2_2 *) memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+	vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE,
+							    PAGE_SIZE);
 	if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
 		add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
 	memblock_free((unsigned long) vmms, PAGE_SIZE);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index db55561c5981..04638b0b9ef1 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -36,7 +36,7 @@ static void __ref *vmem_alloc_pages(unsigned int order)
 
 	if (slab_is_available())
 		return (void *)__get_free_pages(GFP_KERNEL, order);
-	return (void *) memblock_alloc(size, size);
+	return (void *) memblock_phys_alloc(size, size);
 }
 
 void *vmem_crst_alloc(unsigned long val)
@@ -57,7 +57,7 @@ pte_t __ref *vmem_pte_alloc(void)
 	if (slab_is_available())
 		pte = (pte_t *) page_table_alloc(&init_mm);
 	else
-		pte = (pte_t *) memblock_alloc(size, size);
+		pte = (pte_t *) memblock_phys_alloc(size, size);
 	if (!pte)
 		return NULL;
 	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index 5bd374491f94..297f5d8b0890 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -64,7 +64,7 @@ static __init pg_data_t *alloc_node_data(void)
 {
 	pg_data_t *res;
 
-	res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 8);
+	res = (pg_data_t *) memblock_phys_alloc(sizeof(pg_data_t), 8);
 	memset(res, 0, sizeof(pg_data_t));
 	return res;
 }
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 39a2503fa3e1..59131e72ee78 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -170,7 +170,7 @@ static struct mdesc_handle * __init mdesc_memblock_alloc(unsigned int mdesc_size
 		       mdesc_size);
 	alloc_size = PAGE_ALIGN(handle_size);
 
-	paddr = memblock_alloc(alloc_size, PAGE_SIZE);
+	paddr = memblock_phys_alloc(alloc_size, PAGE_SIZE);
 
 	hp = NULL;
 	if (paddr) {
diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c
index baeaeed64993..c37955d127fe 100644
--- a/arch/sparc/kernel/prom_64.c
+++ b/arch/sparc/kernel/prom_64.c
@@ -34,7 +34,7 @@
 
 void * __init prom_early_alloc(unsigned long size)
 {
-	unsigned long paddr = memblock_alloc(size, SMP_CACHE_BYTES);
+	unsigned long paddr = memblock_phys_alloc(size, SMP_CACHE_BYTES);
 	void *ret;
 
 	if (!paddr) {
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 39822f611c01..b338f0440f6b 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1092,7 +1092,8 @@ static void __init allocate_node_data(int nid)
 #ifdef CONFIG_NEED_MULTIPLE_NODES
 	unsigned long paddr;
 
-	paddr = memblock_alloc_try_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid);
+	paddr = memblock_phys_alloc_try_nid(sizeof(struct pglist_data),
+					    SMP_CACHE_BYTES, nid);
 	if (!paddr) {
 		prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid);
 		prom_halt();
@@ -1266,8 +1267,8 @@ static int __init grab_mlgroups(struct mdesc_handle *md)
 	if (!count)
 		return -ENOENT;
 
-	paddr = memblock_alloc(count * sizeof(struct mdesc_mlgroup),
-			  SMP_CACHE_BYTES);
+	paddr = memblock_phys_alloc(count * sizeof(struct mdesc_mlgroup),
+				    SMP_CACHE_BYTES);
 	if (!paddr)
 		return -ENOMEM;
 
@@ -1307,8 +1308,8 @@ static int __init grab_mblocks(struct mdesc_handle *md)
 	if (!count)
 		return -ENOENT;
 
-	paddr = memblock_alloc(count * sizeof(struct mdesc_mblock),
-			  SMP_CACHE_BYTES);
+	paddr = memblock_phys_alloc(count * sizeof(struct mdesc_mblock),
+				    SMP_CACHE_BYTES);
 	if (!paddr)
 		return -ENOMEM;
 
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c
index 0c94b7b4514d..18b355a20f0b 100644
--- a/arch/unicore32/mm/mmu.c
+++ b/arch/unicore32/mm/mmu.c
@@ -144,7 +144,7 @@ static void __init build_mem_type_table(void)
 
 static void __init *early_alloc(unsigned long sz)
 {
-	void *ptr = __va(memblock_alloc(sz, sz));
+	void *ptr = __va(memblock_phys_alloc(sz, sz));
 	memset(ptr, 0, sz);
 	return ptr;
 }
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index fa150855647c..16e37d712ffd 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -196,7 +196,7 @@ static void __init alloc_node_data(int nid)
 	 * Allocate node data.  Try node-local memory and then any node.
 	 * Never allocate in DMA zone.
 	 */
-	nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
+	nd_pa = memblock_phys_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
 	if (!nd_pa) {
 		nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES,
 					      MEMBLOCK_ALLOC_ACCESSIBLE);
diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c
index 5fc70520e04c..ef618bceb79a 100644
--- a/drivers/firmware/efi/memmap.c
+++ b/drivers/firmware/efi/memmap.c
@@ -15,7 +15,7 @@
 
 static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size)
 {
-	return memblock_alloc(size, 0);
+	return memblock_phys_alloc(size, 0);
 }
 
 static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 224d27363cca..9d46a7204975 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -300,10 +300,10 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
 }
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
-phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
-phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
+phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
+phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
 
-phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
+phys_addr_t memblock_phys_alloc(phys_addr_t size, phys_addr_t align);
 
 /*
  * Set the allocation direction to bottom-up or top-down.
diff --git a/mm/memblock.c b/mm/memblock.c
index d6897fbe021f..20358374e8a8 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1269,7 +1269,7 @@ phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
 	return memblock_alloc_range_nid(size, align, 0, max_addr, nid, flags);
 }
 
-phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
+phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
 	enum memblock_flags flags = choose_memblock_flags();
 	phys_addr_t ret;
@@ -1304,14 +1304,14 @@ phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys
 	return alloc;
 }
 
-phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
+phys_addr_t __init memblock_phys_alloc(phys_addr_t size, phys_addr_t align)
 {
 	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
 }
 
-phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
+phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
-	phys_addr_t res = memblock_alloc_nid(size, align, nid);
+	phys_addr_t res = memblock_phys_alloc_nid(size, align, nid);
 
 	if (res)
 		return res;
-- 
cgit v1.2.3-71-gd317


From eb31d559f1e8390195372cd51cfb198da8bc84b9 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:08:04 -0700
Subject: memblock: remove _virt from APIs returning virtual address

The conversion is done using

sed -i 's@memblock_virt_alloc@memblock_alloc@g' \
	$(git grep -l memblock_virt_alloc)

Link: http://lkml.kernel.org/r/1536927045-23536-8-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/setup.c                   |  4 ++--
 arch/arm/mach-omap2/omap_hwmod.c          |  6 ++---
 arch/arm64/mm/kasan_init.c                |  2 +-
 arch/arm64/mm/numa.c                      |  2 +-
 arch/mips/kernel/setup.c                  |  2 +-
 arch/powerpc/kernel/pci_32.c              |  2 +-
 arch/powerpc/lib/alloc.c                  |  2 +-
 arch/powerpc/mm/mmu_context_nohash.c      |  6 ++---
 arch/powerpc/platforms/powermac/nvram.c   |  2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  6 ++---
 arch/powerpc/platforms/ps3/setup.c        |  2 +-
 arch/powerpc/sysdev/msi_bitmap.c          |  2 +-
 arch/s390/kernel/setup.c                  |  8 +++----
 arch/s390/kernel/smp.c                    |  2 +-
 arch/s390/kernel/topology.c               |  4 ++--
 arch/s390/numa/mode_emu.c                 |  2 +-
 arch/s390/numa/toptree.c                  |  2 +-
 arch/x86/mm/kasan_init_64.c               |  4 ++--
 arch/xtensa/mm/kasan_init.c               |  2 +-
 drivers/clk/ti/clk.c                      |  2 +-
 drivers/firmware/memmap.c                 |  2 +-
 drivers/of/fdt.c                          |  2 +-
 drivers/of/unittest.c                     |  2 +-
 include/linux/bootmem.h                   | 38 +++++++++++++++----------------
 init/main.c                               |  6 ++---
 kernel/dma/swiotlb.c                      |  6 ++---
 kernel/power/snapshot.c                   |  2 +-
 kernel/printk/printk.c                    |  4 ++--
 lib/cpumask.c                             |  2 +-
 mm/hugetlb.c                              |  2 +-
 mm/kasan/kasan_init.c                     |  2 +-
 mm/memblock.c                             | 26 ++++++++++-----------
 mm/page_alloc.c                           |  8 +++----
 mm/page_ext.c                             |  2 +-
 mm/percpu.c                               | 28 +++++++++++------------
 mm/sparse-vmemmap.c                       |  2 +-
 mm/sparse.c                               | 12 +++++-----
 37 files changed, 105 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 4c249cb261f3..39e6090d23ac 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -857,7 +857,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
 		 */
 		boot_alias_start = phys_to_idmap(start);
 		if (arm_has_idmap_alias() && boot_alias_start != IDMAP_INVALID_ADDR) {
-			res = memblock_virt_alloc(sizeof(*res), 0);
+			res = memblock_alloc(sizeof(*res), 0);
 			res->name = "System RAM (boot alias)";
 			res->start = boot_alias_start;
 			res->end = phys_to_idmap(end);
@@ -865,7 +865,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
 			request_resource(&iomem_resource, res);
 		}
 
-		res = memblock_virt_alloc(sizeof(*res), 0);
+		res = memblock_alloc(sizeof(*res), 0);
 		res->name  = "System RAM";
 		res->start = start;
 		res->end = end;
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 56a1fe90d394..1f9b34a7eccd 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -726,7 +726,7 @@ static int __init _setup_clkctrl_provider(struct device_node *np)
 	u64 size;
 	int i;
 
-	provider = memblock_virt_alloc(sizeof(*provider), 0);
+	provider = memblock_alloc(sizeof(*provider), 0);
 	if (!provider)
 		return -ENOMEM;
 
@@ -736,12 +736,12 @@ static int __init _setup_clkctrl_provider(struct device_node *np)
 		of_property_count_elems_of_size(np, "reg", sizeof(u32)) / 2;
 
 	provider->addr =
-		memblock_virt_alloc(sizeof(void *) * provider->num_addrs, 0);
+		memblock_alloc(sizeof(void *) * provider->num_addrs, 0);
 	if (!provider->addr)
 		return -ENOMEM;
 
 	provider->size =
-		memblock_virt_alloc(sizeof(u32) * provider->num_addrs, 0);
+		memblock_alloc(sizeof(u32) * provider->num_addrs, 0);
 	if (!provider->size)
 		return -ENOMEM;
 
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index fccb1a6f8c6f..6a65a2912d36 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -38,7 +38,7 @@ static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
 
 static phys_addr_t __init kasan_alloc_zeroed_page(int node)
 {
-	void *p = memblock_virt_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
+	void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
 					      __pa(MAX_DMA_ADDRESS),
 					      MEMBLOCK_ALLOC_ACCESSIBLE, node);
 	return __pa(p);
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index c7fb34efd23e..0bff116c07a8 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -168,7 +168,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
 {
 	int nid = early_cpu_to_node(cpu);
 
-	return  memblock_virt_alloc_try_nid(size, align,
+	return  memblock_alloc_try_nid(size, align,
 			__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 }
 
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 01a5ff4c41ff..0c997645e8f0 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -859,7 +859,7 @@ static void __init arch_mem_init(char **cmdline_p)
 	 * Prevent memblock from allocating high memory.
 	 * This cannot be done before max_low_pfn is detected, so up
 	 * to this point is possible to only reserve physical memory
-	 * with memblock_reserve; memblock_virt_alloc* can be used
+	 * with memblock_reserve; memblock_alloc* can be used
 	 * only after this point
 	 */
 	memblock_set_current_limit(PFN_PHYS(max_low_pfn));
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 4da8ed576229..d39ec3a4550a 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -203,7 +203,7 @@ pci_create_OF_bus_map(void)
 	struct property* of_prop;
 	struct device_node *dn;
 
-	of_prop = memblock_virt_alloc(sizeof(struct property) + 256, 0);
+	of_prop = memblock_alloc(sizeof(struct property) + 256, 0);
 	dn = of_find_node_by_path("/");
 	if (dn) {
 		memset(of_prop, -1, sizeof(struct property) + 256);
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index 06796dec01ea..bf87d6e13369 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -14,7 +14,7 @@ void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 	if (slab_is_available())
 		p = kzalloc(size, mask);
 	else {
-		p = memblock_virt_alloc(size, 0);
+		p = memblock_alloc(size, 0);
 	}
 	return p;
 }
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 4d80239ef83c..954f1986af4d 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -461,10 +461,10 @@ void __init mmu_context_init(void)
 	/*
 	 * Allocate the maps used by context management
 	 */
-	context_map = memblock_virt_alloc(CTX_MAP_SIZE, 0);
-	context_mm = memblock_virt_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 0);
+	context_map = memblock_alloc(CTX_MAP_SIZE, 0);
+	context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 0);
 #ifdef CONFIG_SMP
-	stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
+	stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, 0);
 
 	cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
 				  "powerpc/mmu/ctx:prepare",
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index 60b03a1703d1..f45b369177a4 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -513,7 +513,7 @@ static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr)
 		printk(KERN_ERR "nvram: no address\n");
 		return -EINVAL;
 	}
-	nvram_image = memblock_virt_alloc(NVRAM_SIZE, 0);
+	nvram_image = memblock_alloc(NVRAM_SIZE, 0);
 	nvram_data = ioremap(addr, NVRAM_SIZE*2);
 	nvram_naddrs = 1; /* Make sure we get the correct case */
 
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index cde710297a4e..23a67b545b70 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3770,7 +3770,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	phb_id = be64_to_cpup(prop64);
 	pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
 
-	phb = memblock_virt_alloc(sizeof(*phb), 0);
+	phb = memblock_alloc(sizeof(*phb), 0);
 
 	/* Allocate PCI controller */
 	phb->hose = hose = pcibios_alloc_controller(np);
@@ -3816,7 +3816,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	else
 		phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE;
 
-	phb->diag_data = memblock_virt_alloc(phb->diag_data_size, 0);
+	phb->diag_data = memblock_alloc(phb->diag_data_size, 0);
 
 	/* Parse 32-bit and IO ranges (if any) */
 	pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
@@ -3875,7 +3875,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	}
 	pemap_off = size;
 	size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
-	aux = memblock_virt_alloc(size, 0);
+	aux = memblock_alloc(size, 0);
 	phb->ioda.pe_alloc = aux;
 	phb->ioda.m64_segmap = aux + m64map_off;
 	phb->ioda.m32_segmap = aux + m32map_off;
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 77a37520068d..12519857a33c 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -126,7 +126,7 @@ static void __init prealloc(struct ps3_prealloc *p)
 	if (!p->size)
 		return;
 
-	p->address = memblock_virt_alloc(p->size, p->align);
+	p->address = memblock_alloc(p->size, p->align);
 
 	printk(KERN_INFO "%s: %lu bytes at %p\n", p->name, p->size,
 	       p->address);
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index e64a411d1a00..349a9ff6ca5b 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -128,7 +128,7 @@ int __ref msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count,
 	if (bmp->bitmap_from_slab)
 		bmp->bitmap = kzalloc(size, GFP_KERNEL);
 	else {
-		bmp->bitmap = memblock_virt_alloc(size, 0);
+		bmp->bitmap = memblock_alloc(size, 0);
 		/* the bitmap won't be freed from memblock allocator */
 		kmemleak_not_leak(bmp->bitmap);
 	}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 204ccfa54bf3..781c1053a773 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -378,7 +378,7 @@ static void __init setup_lowcore(void)
 	 * Setup lowcore for boot cpu
 	 */
 	BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
-	lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc));
+	lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
 	lc->restart_psw.mask = PSW_KERNEL_BITS;
 	lc->restart_psw.addr = (unsigned long) restart_int_handler;
 	lc->external_new_psw.mask = PSW_KERNEL_BITS |
@@ -422,7 +422,7 @@ static void __init setup_lowcore(void)
 	 * Allocate the global restart stack which is the same for
 	 * all CPUs in cast *one* of them does a PSW restart.
 	 */
-	restart_stack = memblock_virt_alloc(THREAD_SIZE, THREAD_SIZE);
+	restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
 	restart_stack += STACK_INIT_OFFSET;
 
 	/*
@@ -488,7 +488,7 @@ static void __init setup_resources(void)
 	bss_resource.end = (unsigned long) __bss_stop - 1;
 
 	for_each_memblock(memory, reg) {
-		res = memblock_virt_alloc(sizeof(*res), 8);
+		res = memblock_alloc(sizeof(*res), 8);
 		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 
 		res->name = "System RAM";
@@ -502,7 +502,7 @@ static void __init setup_resources(void)
 			    std_res->start > res->end)
 				continue;
 			if (std_res->end > res->end) {
-				sub_res = memblock_virt_alloc(sizeof(*sub_res), 8);
+				sub_res = memblock_alloc(sizeof(*sub_res), 8);
 				*sub_res = *std_res;
 				sub_res->end = res->end;
 				std_res->start = res->end + 1;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 1b3188f57b58..44f9a7d6450b 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -761,7 +761,7 @@ void __init smp_detect_cpus(void)
 	u16 address;
 
 	/* Get CPU information */
-	info = memblock_virt_alloc(sizeof(*info), 8);
+	info = memblock_alloc(sizeof(*info), 8);
 	smp_get_core_info(info, 1);
 	/* Find boot CPU type */
 	if (sclp.has_core_type) {
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index e8184a15578a..799a91882a76 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -519,7 +519,7 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info,
 		nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
 	nr_masks = max(nr_masks, 1);
 	for (i = 0; i < nr_masks; i++) {
-		mask->next = memblock_virt_alloc(sizeof(*mask->next), 8);
+		mask->next = memblock_alloc(sizeof(*mask->next), 8);
 		mask = mask->next;
 	}
 }
@@ -537,7 +537,7 @@ void __init topology_init_early(void)
 	}
 	if (!MACHINE_HAS_TOPOLOGY)
 		goto out;
-	tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE);
+	tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 	info = tl_info;
 	store_topology(info);
 	pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
index 83b222c57609..5a381fc8e958 100644
--- a/arch/s390/numa/mode_emu.c
+++ b/arch/s390/numa/mode_emu.c
@@ -313,7 +313,7 @@ static void __ref create_core_to_node_map(void)
 {
 	int i;
 
-	emu_cores = memblock_virt_alloc(sizeof(*emu_cores), 8);
+	emu_cores = memblock_alloc(sizeof(*emu_cores), 8);
 	for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
 		emu_cores->to_node_id[i] = NODE_ID_FREE;
 }
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
index 21d1e8a1546d..7f61cc3fd4d1 100644
--- a/arch/s390/numa/toptree.c
+++ b/arch/s390/numa/toptree.c
@@ -34,7 +34,7 @@ struct toptree __ref *toptree_alloc(int level, int id)
 	if (slab_is_available())
 		res = kzalloc(sizeof(*res), GFP_KERNEL);
 	else
-		res = memblock_virt_alloc(sizeof(*res), 8);
+		res = memblock_alloc(sizeof(*res), 8);
 	if (!res)
 		return res;
 
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index e3e77527f8df..77b857cb036f 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -28,10 +28,10 @@ static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
 static __init void *early_alloc(size_t size, int nid, bool panic)
 {
 	if (panic)
-		return memblock_virt_alloc_try_nid(size, size,
+		return memblock_alloc_try_nid(size, size,
 			__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
 	else
-		return memblock_virt_alloc_try_nid_nopanic(size, size,
+		return memblock_alloc_try_nid_nopanic(size, size,
 			__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
 }
 
diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c
index 6b532b6bd785..1a30a258ccd0 100644
--- a/arch/xtensa/mm/kasan_init.c
+++ b/arch/xtensa/mm/kasan_init.c
@@ -43,7 +43,7 @@ static void __init populate(void *start, void *end)
 	unsigned long vaddr = (unsigned long)start;
 	pgd_t *pgd = pgd_offset_k(vaddr);
 	pmd_t *pmd = pmd_offset(pgd, vaddr);
-	pte_t *pte = memblock_virt_alloc(n_pages * sizeof(pte_t), PAGE_SIZE);
+	pte_t *pte = memblock_alloc(n_pages * sizeof(pte_t), PAGE_SIZE);
 
 	pr_debug("%s: %p - %p\n", __func__, start, end);
 
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 7d22e1af2247..5c54d3734daf 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -342,7 +342,7 @@ void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem)
 {
 	struct clk_iomap *io;
 
-	io = memblock_virt_alloc(sizeof(*io), 0);
+	io = memblock_alloc(sizeof(*io), 0);
 
 	io->mem = mem;
 
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 5de3ed29282c..03cead6d5f97 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -333,7 +333,7 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type)
 {
 	struct firmware_map_entry *entry;
 
-	entry = memblock_virt_alloc(sizeof(struct firmware_map_entry), 0);
+	entry = memblock_alloc(sizeof(struct firmware_map_entry), 0);
 	if (WARN_ON(!entry))
 		return -ENOMEM;
 
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 4f915cea6f75..ffe62a7ae19b 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1179,7 +1179,7 @@ int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
 
 static void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 {
-	return memblock_virt_alloc(size, align);
+	return memblock_alloc(size, align);
 }
 
 bool __init early_init_dt_verify(void *params)
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index a3a6866765f2..01e23b85e798 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -2192,7 +2192,7 @@ static struct device_node *overlay_base_root;
 
 static void * __init dt_alloc_memory(u64 size, u64 align)
 {
-	return memblock_virt_alloc(size, align);
+	return memblock_alloc(size, align);
 }
 
 /*
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index b74bafd110b9..7d91f0f5ee44 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -95,78 +95,78 @@ extern void *__alloc_bootmem_low(unsigned long size,
 #define BOOTMEM_ALLOC_ANYWHERE		(~(phys_addr_t)0)
 
 /* FIXME: Move to memblock.h at a point where we remove nobootmem.c */
-void *memblock_virt_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
+void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
 				      phys_addr_t min_addr,
 				      phys_addr_t max_addr, int nid);
-void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size,
+void *memblock_alloc_try_nid_nopanic(phys_addr_t size,
 		phys_addr_t align, phys_addr_t min_addr,
 		phys_addr_t max_addr, int nid);
-void *memblock_virt_alloc_try_nid(phys_addr_t size, phys_addr_t align,
+void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
 		phys_addr_t min_addr, phys_addr_t max_addr, int nid);
 void __memblock_free_early(phys_addr_t base, phys_addr_t size);
 void __memblock_free_late(phys_addr_t base, phys_addr_t size);
 
-static inline void * __init memblock_virt_alloc(
+static inline void * __init memblock_alloc(
 					phys_addr_t size,  phys_addr_t align)
 {
-	return memblock_virt_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
+	return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
 					    BOOTMEM_ALLOC_ACCESSIBLE,
 					    NUMA_NO_NODE);
 }
 
-static inline void * __init memblock_virt_alloc_raw(
+static inline void * __init memblock_alloc_raw(
 					phys_addr_t size,  phys_addr_t align)
 {
-	return memblock_virt_alloc_try_nid_raw(size, align, BOOTMEM_LOW_LIMIT,
+	return memblock_alloc_try_nid_raw(size, align, BOOTMEM_LOW_LIMIT,
 					    BOOTMEM_ALLOC_ACCESSIBLE,
 					    NUMA_NO_NODE);
 }
 
-static inline void * __init memblock_virt_alloc_nopanic(
+static inline void * __init memblock_alloc_nopanic(
 					phys_addr_t size, phys_addr_t align)
 {
-	return memblock_virt_alloc_try_nid_nopanic(size, align,
+	return memblock_alloc_try_nid_nopanic(size, align,
 						    BOOTMEM_LOW_LIMIT,
 						    BOOTMEM_ALLOC_ACCESSIBLE,
 						    NUMA_NO_NODE);
 }
 
-static inline void * __init memblock_virt_alloc_low(
+static inline void * __init memblock_alloc_low(
 					phys_addr_t size, phys_addr_t align)
 {
-	return memblock_virt_alloc_try_nid(size, align,
+	return memblock_alloc_try_nid(size, align,
 						   BOOTMEM_LOW_LIMIT,
 						   ARCH_LOW_ADDRESS_LIMIT,
 						   NUMA_NO_NODE);
 }
-static inline void * __init memblock_virt_alloc_low_nopanic(
+static inline void * __init memblock_alloc_low_nopanic(
 					phys_addr_t size, phys_addr_t align)
 {
-	return memblock_virt_alloc_try_nid_nopanic(size, align,
+	return memblock_alloc_try_nid_nopanic(size, align,
 						   BOOTMEM_LOW_LIMIT,
 						   ARCH_LOW_ADDRESS_LIMIT,
 						   NUMA_NO_NODE);
 }
 
-static inline void * __init memblock_virt_alloc_from_nopanic(
+static inline void * __init memblock_alloc_from_nopanic(
 		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
 {
-	return memblock_virt_alloc_try_nid_nopanic(size, align, min_addr,
+	return memblock_alloc_try_nid_nopanic(size, align, min_addr,
 						    BOOTMEM_ALLOC_ACCESSIBLE,
 						    NUMA_NO_NODE);
 }
 
-static inline void * __init memblock_virt_alloc_node(
+static inline void * __init memblock_alloc_node(
 						phys_addr_t size, int nid)
 {
-	return memblock_virt_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT,
+	return memblock_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT,
 					    BOOTMEM_ALLOC_ACCESSIBLE, nid);
 }
 
-static inline void * __init memblock_virt_alloc_node_nopanic(
+static inline void * __init memblock_alloc_node_nopanic(
 						phys_addr_t size, int nid)
 {
-	return memblock_virt_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT,
+	return memblock_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT,
 						    BOOTMEM_ALLOC_ACCESSIBLE,
 						    nid);
 }
diff --git a/init/main.c b/init/main.c
index 1c3f90264280..86b59cf3bec7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -375,10 +375,10 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
 static void __init setup_command_line(char *command_line)
 {
 	saved_command_line =
-		memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
+		memblock_alloc(strlen(boot_command_line) + 1, 0);
 	initcall_command_line =
-		memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
-	static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
+		memblock_alloc(strlen(boot_command_line) + 1, 0);
+	static_command_line = memblock_alloc(strlen(command_line) + 1, 0);
 	strcpy(saved_command_line, boot_command_line);
 	strcpy(static_command_line, command_line);
 }
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index ebecaf255ea2..801da67e957b 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -204,10 +204,10 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 	 * between io_tlb_start and io_tlb_end.
 	 */
-	io_tlb_list = memblock_virt_alloc(
+	io_tlb_list = memblock_alloc(
 				PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
 				PAGE_SIZE);
-	io_tlb_orig_addr = memblock_virt_alloc(
+	io_tlb_orig_addr = memblock_alloc(
 				PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
 				PAGE_SIZE);
 	for (i = 0; i < io_tlb_nslabs; i++) {
@@ -242,7 +242,7 @@ swiotlb_init(int verbose)
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
 	/* Get IO TLB memory from the low pages */
-	vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
+	vstart = memblock_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
 	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
 		return;
 
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3d37c279c090..34116a6097be 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -963,7 +963,7 @@ void __init __register_nosave_region(unsigned long start_pfn,
 		BUG_ON(!region);
 	} else {
 		/* This allocation cannot fail */
-		region = memblock_virt_alloc(sizeof(struct nosave_region), 0);
+		region = memblock_alloc(sizeof(struct nosave_region), 0);
 	}
 	region->start_pfn = start_pfn;
 	region->end_pfn = end_pfn;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index b77150ad1965..429e4a3833ca 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1111,9 +1111,9 @@ void __init setup_log_buf(int early)
 
 	if (early) {
 		new_log_buf =
-			memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
+			memblock_alloc(new_log_buf_len, LOG_ALIGN);
 	} else {
-		new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
+		new_log_buf = memblock_alloc_nopanic(new_log_buf_len,
 							  LOG_ALIGN);
 	}
 
diff --git a/lib/cpumask.c b/lib/cpumask.c
index beca6244671a..1405cb22e6bc 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -163,7 +163,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var);
  */
 void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
-	*mask = memblock_virt_alloc(cpumask_size(), 0);
+	*mask = memblock_alloc(cpumask_size(), 0);
 }
 
 /**
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7b5c0ad9a6bd..51e9f17dbd5c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2100,7 +2100,7 @@ int __alloc_bootmem_huge_page(struct hstate *h)
 	for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) {
 		void *addr;
 
-		addr = memblock_virt_alloc_try_nid_raw(
+		addr = memblock_alloc_try_nid_raw(
 				huge_page_size(h), huge_page_size(h),
 				0, BOOTMEM_ALLOC_ACCESSIBLE, node);
 		if (addr) {
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 7a2a2f13f86f..24d734bdff6b 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -83,7 +83,7 @@ static inline bool kasan_zero_page_entry(pte_t pte)
 
 static __init void *early_alloc(size_t size, int node)
 {
-	return memblock_virt_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
+	return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
 					BOOTMEM_ALLOC_ACCESSIBLE, node);
 }
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 20358374e8a8..58340de3ebc6 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1319,7 +1319,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
 }
 
 /**
- * memblock_virt_alloc_internal - allocate boot memory block
+ * memblock_alloc_internal - allocate boot memory block
  * @size: size of memory block to be allocated in bytes
  * @align: alignment of the region and block's size
  * @min_addr: the lower bound of the memory region to allocate (phys address)
@@ -1345,7 +1345,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
  * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
-static void * __init memblock_virt_alloc_internal(
+static void * __init memblock_alloc_internal(
 				phys_addr_t size, phys_addr_t align,
 				phys_addr_t min_addr, phys_addr_t max_addr,
 				int nid)
@@ -1412,7 +1412,7 @@ done:
 }
 
 /**
- * memblock_virt_alloc_try_nid_raw - allocate boot memory block without zeroing
+ * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing
  * memory and without panicking
  * @size: size of memory block to be allocated in bytes
  * @align: alignment of the region and block's size
@@ -1430,7 +1430,7 @@ done:
  * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
-void * __init memblock_virt_alloc_try_nid_raw(
+void * __init memblock_alloc_try_nid_raw(
 			phys_addr_t size, phys_addr_t align,
 			phys_addr_t min_addr, phys_addr_t max_addr,
 			int nid)
@@ -1441,7 +1441,7 @@ void * __init memblock_virt_alloc_try_nid_raw(
 		     __func__, (u64)size, (u64)align, nid, &min_addr,
 		     &max_addr, (void *)_RET_IP_);
 
-	ptr = memblock_virt_alloc_internal(size, align,
+	ptr = memblock_alloc_internal(size, align,
 					   min_addr, max_addr, nid);
 	if (ptr && size > 0)
 		page_init_poison(ptr, size);
@@ -1450,7 +1450,7 @@ void * __init memblock_virt_alloc_try_nid_raw(
 }
 
 /**
- * memblock_virt_alloc_try_nid_nopanic - allocate boot memory block
+ * memblock_alloc_try_nid_nopanic - allocate boot memory block
  * @size: size of memory block to be allocated in bytes
  * @align: alignment of the region and block's size
  * @min_addr: the lower bound of the memory region from where the allocation
@@ -1466,7 +1466,7 @@ void * __init memblock_virt_alloc_try_nid_raw(
  * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
-void * __init memblock_virt_alloc_try_nid_nopanic(
+void * __init memblock_alloc_try_nid_nopanic(
 				phys_addr_t size, phys_addr_t align,
 				phys_addr_t min_addr, phys_addr_t max_addr,
 				int nid)
@@ -1477,7 +1477,7 @@ void * __init memblock_virt_alloc_try_nid_nopanic(
 		     __func__, (u64)size, (u64)align, nid, &min_addr,
 		     &max_addr, (void *)_RET_IP_);
 
-	ptr = memblock_virt_alloc_internal(size, align,
+	ptr = memblock_alloc_internal(size, align,
 					   min_addr, max_addr, nid);
 	if (ptr)
 		memset(ptr, 0, size);
@@ -1485,7 +1485,7 @@ void * __init memblock_virt_alloc_try_nid_nopanic(
 }
 
 /**
- * memblock_virt_alloc_try_nid - allocate boot memory block with panicking
+ * memblock_alloc_try_nid - allocate boot memory block with panicking
  * @size: size of memory block to be allocated in bytes
  * @align: alignment of the region and block's size
  * @min_addr: the lower bound of the memory region from where the allocation
@@ -1495,14 +1495,14 @@ void * __init memblock_virt_alloc_try_nid_nopanic(
  *	      allocate only from memory limited by memblock.current_limit value
  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  *
- * Public panicking version of memblock_virt_alloc_try_nid_nopanic()
+ * Public panicking version of memblock_alloc_try_nid_nopanic()
  * which provides debug information (including caller info), if enabled,
  * and panics if the request can not be satisfied.
  *
  * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
-void * __init memblock_virt_alloc_try_nid(
+void * __init memblock_alloc_try_nid(
 			phys_addr_t size, phys_addr_t align,
 			phys_addr_t min_addr, phys_addr_t max_addr,
 			int nid)
@@ -1512,7 +1512,7 @@ void * __init memblock_virt_alloc_try_nid(
 	memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n",
 		     __func__, (u64)size, (u64)align, nid, &min_addr,
 		     &max_addr, (void *)_RET_IP_);
-	ptr = memblock_virt_alloc_internal(size, align,
+	ptr = memblock_alloc_internal(size, align,
 					   min_addr, max_addr, nid);
 	if (ptr) {
 		memset(ptr, 0, size);
@@ -1529,7 +1529,7 @@ void * __init memblock_virt_alloc_try_nid(
  * @base: phys starting address of the  boot memory block
  * @size: size of the boot memory block in bytes
  *
- * Free boot memory block previously allocated by memblock_virt_alloc_xx() API.
+ * Free boot memory block previously allocated by memblock_alloc_xx() API.
  * The freeing memory will not be released to the buddy allocator.
  */
 void __init __memblock_free_early(phys_addr_t base, phys_addr_t size)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 59d171f84445..8ca6954fdcdc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6209,7 +6209,7 @@ static void __ref setup_usemap(struct pglist_data *pgdat,
 	zone->pageblock_flags = NULL;
 	if (usemapsize)
 		zone->pageblock_flags =
-			memblock_virt_alloc_node_nopanic(usemapsize,
+			memblock_alloc_node_nopanic(usemapsize,
 							 pgdat->node_id);
 }
 #else
@@ -6439,7 +6439,7 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
 		end = pgdat_end_pfn(pgdat);
 		end = ALIGN(end, MAX_ORDER_NR_PAGES);
 		size =  (end - start) * sizeof(struct page);
-		map = memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
+		map = memblock_alloc_node_nopanic(size, pgdat->node_id);
 		pgdat->node_mem_map = map + offset;
 	}
 	pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
@@ -7711,9 +7711,9 @@ void *__init alloc_large_system_hash(const char *tablename,
 		size = bucketsize << log2qty;
 		if (flags & HASH_EARLY) {
 			if (flags & HASH_ZERO)
-				table = memblock_virt_alloc_nopanic(size, 0);
+				table = memblock_alloc_nopanic(size, 0);
 			else
-				table = memblock_virt_alloc_raw(size, 0);
+				table = memblock_alloc_raw(size, 0);
 		} else if (hashdist) {
 			table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
 		} else {
diff --git a/mm/page_ext.c b/mm/page_ext.c
index a9826da84ccb..e77c0f031dd0 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -161,7 +161,7 @@ static int __init alloc_node_page_ext(int nid)
 
 	table_size = get_entry_size() * nr_pages;
 
-	base = memblock_virt_alloc_try_nid_nopanic(
+	base = memblock_alloc_try_nid_nopanic(
 			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
 			BOOTMEM_ALLOC_ACCESSIBLE, nid);
 	if (!base)
diff --git a/mm/percpu.c b/mm/percpu.c
index 4b90682623e9..3050c1d37d37 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1101,7 +1101,7 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
 	region_size = ALIGN(start_offset + map_size, lcm_align);
 
 	/* allocate chunk */
-	chunk = memblock_virt_alloc(sizeof(struct pcpu_chunk) +
+	chunk = memblock_alloc(sizeof(struct pcpu_chunk) +
 				    BITS_TO_LONGS(region_size >> PAGE_SHIFT),
 				    0);
 
@@ -1114,11 +1114,11 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
 	chunk->nr_pages = region_size >> PAGE_SHIFT;
 	region_bits = pcpu_chunk_map_bits(chunk);
 
-	chunk->alloc_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits) *
+	chunk->alloc_map = memblock_alloc(BITS_TO_LONGS(region_bits) *
 					       sizeof(chunk->alloc_map[0]), 0);
-	chunk->bound_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits + 1) *
+	chunk->bound_map = memblock_alloc(BITS_TO_LONGS(region_bits + 1) *
 					       sizeof(chunk->bound_map[0]), 0);
-	chunk->md_blocks = memblock_virt_alloc(pcpu_chunk_nr_blocks(chunk) *
+	chunk->md_blocks = memblock_alloc(pcpu_chunk_nr_blocks(chunk) *
 					       sizeof(chunk->md_blocks[0]), 0);
 	pcpu_init_md_blocks(chunk);
 
@@ -1888,7 +1888,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
 			  __alignof__(ai->groups[0].cpu_map[0]));
 	ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
 
-	ptr = memblock_virt_alloc_nopanic(PFN_ALIGN(ai_size), PAGE_SIZE);
+	ptr = memblock_alloc_nopanic(PFN_ALIGN(ai_size), PAGE_SIZE);
 	if (!ptr)
 		return NULL;
 	ai = ptr;
@@ -2075,12 +2075,12 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
 
 	/* process group information and build config tables accordingly */
-	group_offsets = memblock_virt_alloc(ai->nr_groups *
+	group_offsets = memblock_alloc(ai->nr_groups *
 					     sizeof(group_offsets[0]), 0);
-	group_sizes = memblock_virt_alloc(ai->nr_groups *
+	group_sizes = memblock_alloc(ai->nr_groups *
 					   sizeof(group_sizes[0]), 0);
-	unit_map = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0);
-	unit_off = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0);
+	unit_map = memblock_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0);
+	unit_off = memblock_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0);
 
 	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
 		unit_map[cpu] = UINT_MAX;
@@ -2144,7 +2144,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	 * empty chunks.
 	 */
 	pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
-	pcpu_slot = memblock_virt_alloc(
+	pcpu_slot = memblock_alloc(
 			pcpu_nr_slots * sizeof(pcpu_slot[0]), 0);
 	for (i = 0; i < pcpu_nr_slots; i++)
 		INIT_LIST_HEAD(&pcpu_slot[i]);
@@ -2458,7 +2458,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 	size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
 	areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
 
-	areas = memblock_virt_alloc_nopanic(areas_size, 0);
+	areas = memblock_alloc_nopanic(areas_size, 0);
 	if (!areas) {
 		rc = -ENOMEM;
 		goto out_free;
@@ -2599,7 +2599,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 	/* unaligned allocations can't be freed, round up to page size */
 	pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
 			       sizeof(pages[0]));
-	pages = memblock_virt_alloc(pages_size, 0);
+	pages = memblock_alloc(pages_size, 0);
 
 	/* allocate pages */
 	j = 0;
@@ -2688,7 +2688,7 @@ EXPORT_SYMBOL(__per_cpu_offset);
 static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
 				       size_t align)
 {
-	return  memblock_virt_alloc_from_nopanic(
+	return  memblock_alloc_from_nopanic(
 			size, align, __pa(MAX_DMA_ADDRESS));
 }
 
@@ -2737,7 +2737,7 @@ void __init setup_per_cpu_areas(void)
 	void *fc;
 
 	ai = pcpu_alloc_alloc_info(1, 1);
-	fc = memblock_virt_alloc_from_nopanic(unit_size,
+	fc = memblock_alloc_from_nopanic(unit_size,
 					      PAGE_SIZE,
 					      __pa(MAX_DMA_ADDRESS));
 	if (!ai || !fc)
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 8301293331a2..91c2c3d25827 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -42,7 +42,7 @@ static void * __ref __earlyonly_bootmem_alloc(int node,
 				unsigned long align,
 				unsigned long goal)
 {
-	return memblock_virt_alloc_try_nid_raw(size, align, goal,
+	return memblock_alloc_try_nid_raw(size, align, goal,
 					       BOOTMEM_ALLOC_ACCESSIBLE, node);
 }
 
diff --git a/mm/sparse.c b/mm/sparse.c
index 67ad061f7fb8..cb900dda7fd2 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -68,7 +68,7 @@ static noinline struct mem_section __ref *sparse_index_alloc(int nid)
 	if (slab_is_available())
 		section = kzalloc_node(array_size, GFP_KERNEL, nid);
 	else
-		section = memblock_virt_alloc_node(array_size, nid);
+		section = memblock_alloc_node(array_size, nid);
 
 	return section;
 }
@@ -216,7 +216,7 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
 
 		size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;
 		align = 1 << (INTERNODE_CACHE_SHIFT);
-		mem_section = memblock_virt_alloc(size, align);
+		mem_section = memblock_alloc(size, align);
 	}
 #endif
 
@@ -306,7 +306,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 	limit = goal + (1UL << PA_SECTION_SHIFT);
 	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
 again:
-	p = memblock_virt_alloc_try_nid_nopanic(size,
+	p = memblock_alloc_try_nid_nopanic(size,
 						SMP_CACHE_BYTES, goal, limit,
 						nid);
 	if (!p && limit) {
@@ -362,7 +362,7 @@ static unsigned long * __init
 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 					 unsigned long size)
 {
-	return memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
+	return memblock_alloc_node_nopanic(size, pgdat->node_id);
 }
 
 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
@@ -391,7 +391,7 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
 	if (map)
 		return map;
 
-	map = memblock_virt_alloc_try_nid(size,
+	map = memblock_alloc_try_nid(size,
 					  PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
 					  BOOTMEM_ALLOC_ACCESSIBLE, nid);
 	return map;
@@ -405,7 +405,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
 {
 	WARN_ON(sparsemap_buf);	/* forgot to call sparse_buffer_fini()? */
 	sparsemap_buf =
-		memblock_virt_alloc_try_nid_raw(size, PAGE_SIZE,
+		memblock_alloc_try_nid_raw(size, PAGE_SIZE,
 						__pa(MAX_DMA_ADDRESS),
 						BOOTMEM_ALLOC_ACCESSIBLE, nid);
 	sparsemap_buf_end = sparsemap_buf + size;
-- 
cgit v1.2.3-71-gd317


From 3913c8f9f96bb75a062ad16ea10a1cdad48bb716 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:08:36 -0700
Subject: memblock: add align parameter to memblock_alloc_node()

With the align parameter memblock_alloc_node() can be used as drop in
replacement for alloc_bootmem_pages_node() and __alloc_bootmem_node(),
which is done in the following patches.

Link: http://lkml.kernel.org/r/1536927045-23536-15-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 4 ++--
 mm/sparse.c             | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 7d91f0f5ee44..3896af205303 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -157,9 +157,9 @@ static inline void * __init memblock_alloc_from_nopanic(
 }
 
 static inline void * __init memblock_alloc_node(
-						phys_addr_t size, int nid)
+		phys_addr_t size, phys_addr_t align, int nid)
 {
-	return memblock_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT,
+	return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
 					    BOOTMEM_ALLOC_ACCESSIBLE, nid);
 }
 
diff --git a/mm/sparse.c b/mm/sparse.c
index cb900dda7fd2..d1296610562b 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -68,7 +68,7 @@ static noinline struct mem_section __ref *sparse_index_alloc(int nid)
 	if (slab_is_available())
 		section = kzalloc_node(array_size, GFP_KERNEL, nid);
 	else
-		section = memblock_alloc_node(array_size, nid);
+		section = memblock_alloc_node(array_size, 0, nid);
 
 	return section;
 }
-- 
cgit v1.2.3-71-gd317


From 4fc4a09e4cc1126c4e8a86c293425cffa2a2eb3c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:03 -0700
Subject: memblock: replace __alloc_bootmem with memblock_alloc_from

The functions are equivalent, just the later does not require nobootmem
translation layer.

The conversion is done using the following semantic patch:

@@
expression size, align, goal;
@@
- __alloc_bootmem(size, align, goal)
+ memblock_alloc_from(size, align, goal)

Link: http://lkml.kernel.org/r/1536927045-23536-21-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/core_cia.c  |  2 +-
 arch/alpha/kernel/pci_iommu.c |  4 ++--
 arch/alpha/kernel/setup.c     |  2 +-
 arch/ia64/kernel/mca.c        |  4 ++--
 arch/ia64/mm/contig.c         |  5 +++--
 arch/mips/kernel/traps.c      |  2 +-
 arch/sparc/kernel/prom_32.c   |  2 +-
 arch/sparc/kernel/smp_64.c    | 10 +++++-----
 arch/sparc/mm/init_32.c       |  2 +-
 arch/sparc/mm/init_64.c       |  9 ++++++---
 arch/sparc/mm/srmmu.c         | 10 +++++-----
 include/linux/bootmem.h       |  8 ++++++++
 12 files changed, 36 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c
index 4b38386f6e62..026ee955fd10 100644
--- a/arch/alpha/kernel/core_cia.c
+++ b/arch/alpha/kernel/core_cia.c
@@ -331,7 +331,7 @@ cia_prepare_tbia_workaround(int window)
 	long i;
 
 	/* Use minimal 1K map. */
-	ppte = __alloc_bootmem(CIA_BROKEN_TBIA_SIZE, 32768, 0);
+	ppte = memblock_alloc_from(CIA_BROKEN_TBIA_SIZE, 32768, 0);
 	pte = (virt_to_phys(ppte) >> (PAGE_SHIFT - 1)) | 1;
 
 	for (i = 0; i < CIA_BROKEN_TBIA_SIZE / sizeof(unsigned long); ++i)
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index b52d76fd534e..0c05493e6495 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -87,13 +87,13 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
 		printk("%s: couldn't allocate arena ptes from node %d\n"
 		       "    falling back to system-wide allocation\n",
 		       __func__, nid);
-		arena->ptes = __alloc_bootmem(mem_size, align, 0);
+		arena->ptes = memblock_alloc_from(mem_size, align, 0);
 	}
 
 #else /* CONFIG_DISCONTIGMEM */
 
 	arena = alloc_bootmem(sizeof(*arena));
-	arena->ptes = __alloc_bootmem(mem_size, align, 0);
+	arena->ptes = memblock_alloc_from(mem_size, align, 0);
 
 #endif /* CONFIG_DISCONTIGMEM */
 
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 4f0d94471bc9..64c06a0adf3d 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -294,7 +294,7 @@ move_initrd(unsigned long mem_limit)
 	unsigned long size;
 
 	size = initrd_end - initrd_start;
-	start = __alloc_bootmem(PAGE_ALIGN(size), PAGE_SIZE, 0);
+	start = memblock_alloc_from(PAGE_ALIGN(size), PAGE_SIZE, 0);
 	if (!start || __pa(start) + size > mem_limit) {
 		initrd_start = initrd_end = 0;
 		return NULL;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6115464d5f03..5586926dd85d 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1835,8 +1835,8 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
 /* Caller prevents this from being called after init */
 static void * __ref mca_bootmem(void)
 {
-	return __alloc_bootmem(sizeof(struct ia64_mca_cpu),
-	                    KERNEL_STACK_SIZE, 0);
+	return memblock_alloc_from(sizeof(struct ia64_mca_cpu),
+				   KERNEL_STACK_SIZE, 0);
 }
 
 /* Do per-CPU MCA-related initialization.  */
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index e2e40bbd391c..9e5c23a6b8b4 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -85,8 +85,9 @@ skip:
 static inline void
 alloc_per_cpu_data(void)
 {
-	cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * num_possible_cpus(),
-				   PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+	cpu_data = memblock_alloc_from(PERCPU_PAGE_SIZE * num_possible_cpus(),
+				       PERCPU_PAGE_SIZE,
+				       __pa(MAX_DMA_ADDRESS));
 }
 
 /**
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 5feef28deac8..623dc18f7f2f 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2263,7 +2263,7 @@ void __init trap_init(void)
 
 		memblock_set_bottom_up(true);
 		ebase = (unsigned long)
-			__alloc_bootmem(size, 1 << fls(size), 0);
+			memblock_alloc_from(size, 1 << fls(size), 0);
 		memblock_set_bottom_up(false);
 
 		/*
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index b51cbb9e87dc..4389944735c6 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -32,7 +32,7 @@ void * __init prom_early_alloc(unsigned long size)
 {
 	void *ret;
 
-	ret = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL);
+	ret = memblock_alloc_from(size, SMP_CACHE_BYTES, 0UL);
 	if (ret != NULL)
 		memset(ret, 0, size);
 
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 83ff88df6bdf..337febdf94b8 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1588,7 +1588,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
 	void *ptr;
 
 	if (!node_online(node) || !NODE_DATA(node)) {
-		ptr = __alloc_bootmem(size, align, goal);
+		ptr = memblock_alloc_from(size, align, goal);
 		pr_info("cpu %d has no node %d or node-local memory\n",
 			cpu, node);
 		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
@@ -1601,7 +1601,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
 	}
 	return ptr;
 #else
-	return __alloc_bootmem(size, align, goal);
+	return memblock_alloc_from(size, align, goal);
 #endif
 }
 
@@ -1627,7 +1627,7 @@ static void __init pcpu_populate_pte(unsigned long addr)
 	if (pgd_none(*pgd)) {
 		pud_t *new;
 
-		new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
 		pgd_populate(&init_mm, pgd, new);
 	}
 
@@ -1635,7 +1635,7 @@ static void __init pcpu_populate_pte(unsigned long addr)
 	if (pud_none(*pud)) {
 		pmd_t *new;
 
-		new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
 		pud_populate(&init_mm, pud, new);
 	}
 
@@ -1643,7 +1643,7 @@ static void __init pcpu_populate_pte(unsigned long addr)
 	if (!pmd_present(*pmd)) {
 		pte_t *new;
 
-		new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
 		pmd_populate_kernel(&init_mm, pmd, new);
 	}
 }
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 92634d4e440c..885dd3881874 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -265,7 +265,7 @@ void __init mem_init(void)
 	i = last_valid_pfn >> ((20 - PAGE_SHIFT) + 5);
 	i += 1;
 	sparc_valid_addr_bitmap = (unsigned long *)
-		__alloc_bootmem(i << 2, SMP_CACHE_BYTES, 0UL);
+		memblock_alloc_from(i << 2, SMP_CACHE_BYTES, 0UL);
 
 	if (sparc_valid_addr_bitmap == NULL) {
 		prom_printf("mem_init: Cannot alloc valid_addr_bitmap.\n");
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index b338f0440f6b..6f965e6d01cc 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1811,7 +1811,8 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
 		if (pgd_none(*pgd)) {
 			pud_t *new;
 
-			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
+						  PAGE_SIZE);
 			alloc_bytes += PAGE_SIZE;
 			pgd_populate(&init_mm, pgd, new);
 		}
@@ -1823,7 +1824,8 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
 				vstart = kernel_map_hugepud(vstart, vend, pud);
 				continue;
 			}
-			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
+						  PAGE_SIZE);
 			alloc_bytes += PAGE_SIZE;
 			pud_populate(&init_mm, pud, new);
 		}
@@ -1836,7 +1838,8 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
 				vstart = kernel_map_hugepmd(vstart, vend, pmd);
 				continue;
 			}
-			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
+						  PAGE_SIZE);
 			alloc_bytes += PAGE_SIZE;
 			pmd_populate_kernel(&init_mm, pmd, new);
 		}
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be9cb0065179..b48fea5ad9ef 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -303,13 +303,13 @@ static void __init srmmu_nocache_init(void)
 
 	bitmap_bits = srmmu_nocache_size >> SRMMU_NOCACHE_BITMAP_SHIFT;
 
-	srmmu_nocache_pool = __alloc_bootmem(srmmu_nocache_size,
-		SRMMU_NOCACHE_ALIGN_MAX, 0UL);
+	srmmu_nocache_pool = memblock_alloc_from(srmmu_nocache_size,
+						 SRMMU_NOCACHE_ALIGN_MAX, 0UL);
 	memset(srmmu_nocache_pool, 0, srmmu_nocache_size);
 
 	srmmu_nocache_bitmap =
-		__alloc_bootmem(BITS_TO_LONGS(bitmap_bits) * sizeof(long),
-				SMP_CACHE_BYTES, 0UL);
+		memblock_alloc_from(BITS_TO_LONGS(bitmap_bits) * sizeof(long),
+				    SMP_CACHE_BYTES, 0UL);
 	bit_map_init(&srmmu_nocache_map, srmmu_nocache_bitmap, bitmap_bits);
 
 	srmmu_swapper_pg_dir = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE);
@@ -467,7 +467,7 @@ static void __init sparc_context_init(int numctx)
 	unsigned long size;
 
 	size = numctx * sizeof(struct ctx_list);
-	ctx_list_pool = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL);
+	ctx_list_pool = memblock_alloc_from(size, SMP_CACHE_BYTES, 0UL);
 
 	for (ctx = 0; ctx < numctx; ctx++) {
 		struct ctx_list *clist;
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 3896af205303..c97c105cfc67 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -122,6 +122,14 @@ static inline void * __init memblock_alloc_raw(
 					    NUMA_NO_NODE);
 }
 
+static inline void * __init memblock_alloc_from(
+		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
+{
+	return memblock_alloc_try_nid(size, align, min_addr,
+				      BOOTMEM_ALLOC_ACCESSIBLE,
+				      NUMA_NO_NODE);
+}
+
 static inline void * __init memblock_alloc_nopanic(
 					phys_addr_t size, phys_addr_t align)
 {
-- 
cgit v1.2.3-71-gd317


From 6c7835f8d0d1839ca93bd3cf6faa15706f03d604 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:15 -0700
Subject: mm: nobootmem: remove bootmem allocation APIs

The bootmem compatibility APIs are not used and can be removed.

Link: http://lkml.kernel.org/r/1536927045-23536-23-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  47 ----------
 mm/nobootmem.c          | 224 ------------------------------------------------
 2 files changed, 271 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index c97c105cfc67..73f1272fce20 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -36,33 +36,6 @@ extern void free_bootmem_node(pg_data_t *pgdat,
 extern void free_bootmem(unsigned long physaddr, unsigned long size);
 extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
 
-extern void *__alloc_bootmem(unsigned long size,
-			     unsigned long align,
-			     unsigned long goal);
-extern void *__alloc_bootmem_nopanic(unsigned long size,
-				     unsigned long align,
-				     unsigned long goal) __malloc;
-extern void *__alloc_bootmem_node(pg_data_t *pgdat,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal) __malloc;
-void *__alloc_bootmem_node_high(pg_data_t *pgdat,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal) __malloc;
-extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal) __malloc;
-void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal,
-				  unsigned long limit) __malloc;
-extern void *__alloc_bootmem_low(unsigned long size,
-				 unsigned long align,
-				 unsigned long goal) __malloc;
-
 /* We are using top down, so it is safe to use 0 here */
 #define BOOTMEM_LOW_LIMIT 0
 
@@ -70,26 +43,6 @@ extern void *__alloc_bootmem_low(unsigned long size,
 #define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
 #endif
 
-#define alloc_bootmem(x) \
-	__alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_align(x, align) \
-	__alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_pages(x) \
-	__alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_pages_nopanic(x) \
-	__alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_node(pgdat, x) \
-	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_node_nopanic(pgdat, x) \
-	__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_pages_node(pgdat, x) \
-	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-
-#define alloc_bootmem_low(x) \
-	__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
-#define alloc_bootmem_low_pages(x) \
-	__alloc_bootmem_low(x, PAGE_SIZE, 0)
-
 /* FIXME: use MEMBLOCK_ALLOC_* variants here */
 #define BOOTMEM_ALLOC_ACCESSIBLE	0
 #define BOOTMEM_ALLOC_ANYWHERE		(~(phys_addr_t)0)
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 44ce7de1be8f..bc38e5673d31 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -33,41 +33,6 @@ unsigned long min_low_pfn;
 unsigned long max_pfn;
 unsigned long long max_possible_pfn;
 
-static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
-					u64 goal, u64 limit)
-{
-	void *ptr;
-	u64 addr;
-	enum memblock_flags flags = choose_memblock_flags();
-
-	if (limit > memblock.current_limit)
-		limit = memblock.current_limit;
-
-again:
-	addr = memblock_find_in_range_node(size, align, goal, limit, nid,
-					   flags);
-	if (!addr && (flags & MEMBLOCK_MIRROR)) {
-		flags &= ~MEMBLOCK_MIRROR;
-		pr_warn("Could not allocate %pap bytes of mirrored memory\n",
-			&size);
-		goto again;
-	}
-	if (!addr)
-		return NULL;
-
-	if (memblock_reserve(addr, size))
-		return NULL;
-
-	ptr = phys_to_virt(addr);
-	memset(ptr, 0, size);
-	/*
-	 * The min_count is set to 0 so that bootmem allocated blocks
-	 * are never reported as leaks.
-	 */
-	kmemleak_alloc(ptr, size, 0, 0);
-	return ptr;
-}
-
 /**
  * free_bootmem_late - free bootmem pages directly to page allocator
  * @addr: starting address of the range
@@ -215,192 +180,3 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
 {
 	memblock_free(addr, size);
 }
-
-static void * __init ___alloc_bootmem_nopanic(unsigned long size,
-					unsigned long align,
-					unsigned long goal,
-					unsigned long limit)
-{
-	void *ptr;
-
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc(size, GFP_NOWAIT);
-
-restart:
-
-	ptr = __alloc_memory_core_early(NUMA_NO_NODE, size, align, goal, limit);
-
-	if (ptr)
-		return ptr;
-
-	if (goal != 0) {
-		goal = 0;
-		goto restart;
-	}
-
-	return NULL;
-}
-
-/**
- * __alloc_bootmem_nopanic - allocate boot memory without panicking
- * @size: size of the request in bytes
- * @align: alignment of the region
- * @goal: preferred starting address of the region
- *
- * The goal is dropped if it can not be satisfied and the allocation will
- * fall back to memory below @goal.
- *
- * Allocation may happen on any node in the system.
- *
- * Return: address of the allocated region or %NULL on failure.
- */
-void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
-					unsigned long goal)
-{
-	unsigned long limit = -1UL;
-
-	return ___alloc_bootmem_nopanic(size, align, goal, limit);
-}
-
-static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
-					unsigned long goal, unsigned long limit)
-{
-	void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit);
-
-	if (mem)
-		return mem;
-	/*
-	 * Whoops, we cannot satisfy the allocation request.
-	 */
-	pr_alert("bootmem alloc of %lu bytes failed!\n", size);
-	panic("Out of memory");
-	return NULL;
-}
-
-/**
- * __alloc_bootmem - allocate boot memory
- * @size: size of the request in bytes
- * @align: alignment of the region
- * @goal: preferred starting address of the region
- *
- * The goal is dropped if it can not be satisfied and the allocation will
- * fall back to memory below @goal.
- *
- * Allocation may happen on any node in the system.
- *
- * The function panics if the request can not be satisfied.
- *
- * Return: address of the allocated region.
- */
-void * __init __alloc_bootmem(unsigned long size, unsigned long align,
-			      unsigned long goal)
-{
-	unsigned long limit = -1UL;
-
-	return ___alloc_bootmem(size, align, goal, limit);
-}
-
-void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
-						   unsigned long size,
-						   unsigned long align,
-						   unsigned long goal,
-						   unsigned long limit)
-{
-	void *ptr;
-
-again:
-	ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
-					goal, limit);
-	if (ptr)
-		return ptr;
-
-	ptr = __alloc_memory_core_early(NUMA_NO_NODE, size, align,
-					goal, limit);
-	if (ptr)
-		return ptr;
-
-	if (goal) {
-		goal = 0;
-		goto again;
-	}
-
-	return NULL;
-}
-
-void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
-				   unsigned long align, unsigned long goal)
-{
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-	return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
-}
-
-static void * __init ___alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
-				    unsigned long align, unsigned long goal,
-				    unsigned long limit)
-{
-	void *ptr;
-
-	ptr = ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, limit);
-	if (ptr)
-		return ptr;
-
-	pr_alert("bootmem alloc of %lu bytes failed!\n", size);
-	panic("Out of memory");
-	return NULL;
-}
-
-/**
- * __alloc_bootmem_node - allocate boot memory from a specific node
- * @pgdat: node to allocate from
- * @size: size of the request in bytes
- * @align: alignment of the region
- * @goal: preferred starting address of the region
- *
- * The goal is dropped if it can not be satisfied and the allocation will
- * fall back to memory below @goal.
- *
- * Allocation may fall back to any node in the system if the specified node
- * can not hold the requested memory.
- *
- * The function panics if the request can not be satisfied.
- *
- * Return: address of the allocated region.
- */
-void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
-				   unsigned long align, unsigned long goal)
-{
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-	return ___alloc_bootmem_node(pgdat, size, align, goal, 0);
-}
-
-void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
-				   unsigned long align, unsigned long goal)
-{
-	return __alloc_bootmem_node(pgdat, size, align, goal);
-}
-
-
-/**
- * __alloc_bootmem_low - allocate low boot memory
- * @size: size of the request in bytes
- * @align: alignment of the region
- * @goal: preferred starting address of the region
- *
- * The goal is dropped if it can not be satisfied and the allocation will
- * fall back to memory below @goal.
- *
- * Allocation may happen on any node in the system.
- *
- * The function panics if the request can not be satisfied.
- *
- * Return: address of the allocated region.
- */
-void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
-				  unsigned long goal)
-{
-	return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
-}
-- 
cgit v1.2.3-71-gd317


From 2013288f723887837d2f1cebef5fcf663b2319de Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:21 -0700
Subject: memblock: replace free_bootmem{_node} with memblock_free

The free_bootmem and free_bootmem_node are merely wrappers for
memblock_free. Replace their usage with a call to memblock_free using the
following semantic patch:

@@
expression e1, e2, e3;
@@
(
- free_bootmem(e1, e2)
+ memblock_free(e1, e2)
|
- free_bootmem_node(e1, e2, e3)
+ memblock_free(e2, e3)
)

Link: http://lkml.kernel.org/r/1536927045-23536-24-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/core_irongate.c |  3 +--
 arch/arm64/mm/init.c              |  2 +-
 arch/mips/kernel/setup.c          |  2 +-
 arch/powerpc/kernel/setup_64.c    |  2 +-
 arch/sparc/kernel/smp_64.c        |  2 +-
 arch/um/kernel/mem.c              |  3 ++-
 arch/unicore32/mm/init.c          |  2 +-
 arch/x86/kernel/setup_percpu.c    |  3 ++-
 arch/x86/kernel/tce_64.c          |  3 ++-
 arch/x86/xen/p2m.c                |  3 ++-
 drivers/macintosh/smu.c           |  2 +-
 drivers/usb/early/xhci-dbc.c      | 11 ++++++-----
 drivers/xen/swiotlb-xen.c         |  4 +++-
 include/linux/bootmem.h           |  4 ----
 mm/nobootmem.c                    | 30 ------------------------------
 15 files changed, 24 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c
index f70986683fc6..35572be9deb5 100644
--- a/arch/alpha/kernel/core_irongate.c
+++ b/arch/alpha/kernel/core_irongate.c
@@ -234,8 +234,7 @@ albacore_init_arch(void)
 			unsigned long size;
 
 			size = initrd_end - initrd_start;
-			free_bootmem_node(NODE_DATA(0), __pa(initrd_start),
-					  PAGE_ALIGN(size));
+			memblock_free(__pa(initrd_start), PAGE_ALIGN(size));
 			if (!move_initrd(pci_mem))
 				printk("irongate_init_arch: initrd too big "
 				       "(%ldK)\ndisabling initrd\n",
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 3cf87341859f..2ddb1c5e988d 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -536,7 +536,7 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 	 * memmap array.
 	 */
 	if (pg < pgend)
-		free_bootmem(pg, pgend - pg);
+		memblock_free(pg, pgend - pg);
 }
 
 /*
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index c1f95359d298..31522d3bc8bf 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -561,7 +561,7 @@ static void __init bootmem_init(void)
 		extern void show_kernel_relocation(const char *level);
 
 		offset = __pa_symbol(_text) - __pa_symbol(VMLINUX_LOAD_ADDRESS);
-		free_bootmem(__pa_symbol(VMLINUX_LOAD_ADDRESS), offset);
+		memblock_free(__pa_symbol(VMLINUX_LOAD_ADDRESS), offset);
 
 #if defined(CONFIG_DEBUG_KERNEL) && defined(CONFIG_DEBUG_INFO)
 		/*
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 26d7c49a157b..f90ab3ea9af3 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -771,7 +771,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-	free_bootmem(__pa(ptr), size);
+	memblock_free(__pa(ptr), size);
 }
 
 static int pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 337febdf94b8..a087a6a25f06 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1607,7 +1607,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
 
 static void __init pcpu_free_bootmem(void *ptr, size_t size)
 {
-	free_bootmem(__pa(ptr), size);
+	memblock_free(__pa(ptr), size);
 }
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 185f6bb79269..3555c139389c 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -6,6 +6,7 @@
 #include <linux/stddef.h>
 #include <linux/module.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
@@ -46,7 +47,7 @@ void __init mem_init(void)
 	 */
 	brk_end = (unsigned long) UML_ROUND_UP(sbrk(0));
 	map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
-	free_bootmem(__pa(brk_end), uml_reserved - brk_end);
+	memblock_free(__pa(brk_end), uml_reserved - brk_end);
 	uml_reserved = brk_end;
 
 	/* this will put all low memory onto the freelists */
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 8f8699e62bd5..4ba51991c7de 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -238,7 +238,7 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 	 * free the section of the memmap array.
 	 */
 	if (pg < pgend)
-		free_bootmem(pg, pgend - pg);
+		memblock_free(pg, pgend - pg);
 }
 
 /*
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 041663abc028..a006f1ba4c39 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -5,6 +5,7 @@
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/percpu.h>
 #include <linux/kexec.h>
 #include <linux/crash_dump.h>
@@ -135,7 +136,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-	free_bootmem(__pa(ptr), size);
+	memblock_free(__pa(ptr), size);
 }
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index 54c9b5a696b1..75730ce01f8d 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -31,6 +31,7 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/tce.h>
 #include <asm/calgary.h>
 #include <asm/proto.h>
@@ -186,5 +187,5 @@ void __init free_tce_table(void *tbl)
 	size = table_size_to_number_of_entries(specified_table_size);
 	size *= TCE_ENTRY_SIZE;
 
-	free_bootmem(__pa(tbl), size);
+	memblock_free(__pa(tbl), size);
 }
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 5de761b4cec8..b3e11afed25b 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -68,6 +68,7 @@
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
@@ -190,7 +191,7 @@ static void * __ref alloc_p2m_page(void)
 static void __ref free_p2m_page(void *p)
 {
 	if (unlikely(!slab_is_available())) {
-		free_bootmem((unsigned long)p, PAGE_SIZE);
+		memblock_free((unsigned long)p, PAGE_SIZE);
 		return;
 	}
 
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 332fcca30944..0069f9084f9f 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -569,7 +569,7 @@ fail_msg_node:
 fail_db_node:
 	of_node_put(smu->db_node);
 fail_bootmem:
-	free_bootmem(__pa(smu), sizeof(struct smu_device));
+	memblock_free(__pa(smu), sizeof(struct smu_device));
 	smu = NULL;
 fail_np:
 	of_node_put(np);
diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c
index 21494c8973b6..ddc5fa88f268 100644
--- a/drivers/usb/early/xhci-dbc.c
+++ b/drivers/usb/early/xhci-dbc.c
@@ -13,6 +13,7 @@
 #include <linux/pci_regs.h>
 #include <linux/pci_ids.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/io.h>
 #include <asm/pci-direct.h>
 #include <asm/fixmap.h>
@@ -191,7 +192,7 @@ static void __init xdbc_free_ring(struct xdbc_ring *ring)
 	if (!seg)
 		return;
 
-	free_bootmem(seg->dma, PAGE_SIZE);
+	memblock_free(seg->dma, PAGE_SIZE);
 	ring->segment = NULL;
 }
 
@@ -675,10 +676,10 @@ int __init early_xdbc_setup_hardware(void)
 		xdbc_free_ring(&xdbc.in_ring);
 
 		if (xdbc.table_dma)
-			free_bootmem(xdbc.table_dma, PAGE_SIZE);
+			memblock_free(xdbc.table_dma, PAGE_SIZE);
 
 		if (xdbc.out_dma)
-			free_bootmem(xdbc.out_dma, PAGE_SIZE);
+			memblock_free(xdbc.out_dma, PAGE_SIZE);
 
 		xdbc.table_base = NULL;
 		xdbc.out_buf = NULL;
@@ -997,8 +998,8 @@ free_and_quit:
 	xdbc_free_ring(&xdbc.evt_ring);
 	xdbc_free_ring(&xdbc.out_ring);
 	xdbc_free_ring(&xdbc.in_ring);
-	free_bootmem(xdbc.table_dma, PAGE_SIZE);
-	free_bootmem(xdbc.out_dma, PAGE_SIZE);
+	memblock_free(xdbc.table_dma, PAGE_SIZE);
+	memblock_free(xdbc.out_dma, PAGE_SIZE);
 	writel(0, &xdbc.xdbc_reg->control);
 	early_iounmap(xdbc.xhci_base, xdbc.xhci_length);
 
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 91a6208ec1a5..c5f26a87d238 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -36,6 +36,7 @@
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/dma-direct.h>
 #include <linux/export.h>
 #include <xen/swiotlb-xen.h>
@@ -248,7 +249,8 @@ retry:
 			       xen_io_tlb_nslabs);
 	if (rc) {
 		if (early)
-			free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes));
+			memblock_free(__pa(xen_io_tlb_start),
+				      PAGE_ALIGN(bytes));
 		else {
 			free_pages((unsigned long)xen_io_tlb_start, order);
 			xen_io_tlb_start = NULL;
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 73f1272fce20..706cf8ef6678 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -30,10 +30,6 @@ extern unsigned long free_all_bootmem(void);
 extern void reset_node_managed_pages(pg_data_t *pgdat);
 extern void reset_all_zones_managed_pages(void);
 
-extern void free_bootmem_node(pg_data_t *pgdat,
-			      unsigned long addr,
-			      unsigned long size);
-extern void free_bootmem(unsigned long physaddr, unsigned long size);
 extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
 
 /* We are using top down, so it is safe to use 0 here */
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index bc38e5673d31..85e1822ce918 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -150,33 +150,3 @@ unsigned long __init free_all_bootmem(void)
 
 	return pages;
 }
-
-/**
- * free_bootmem_node - mark a page range as usable
- * @pgdat: node the range resides on
- * @physaddr: starting physical address of the range
- * @size: size of the range in bytes
- *
- * Partial pages will be considered reserved and left as they are.
- *
- * The range must reside completely on the specified node.
- */
-void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
-			      unsigned long size)
-{
-	memblock_free(physaddr, size);
-}
-
-/**
- * free_bootmem - mark a page range as usable
- * @addr: starting physical address of the range
- * @size: size of the range in bytes
- *
- * Partial pages will be considered reserved and left as they are.
- *
- * The range must be contiguous but may span node boundaries.
- */
-void __init free_bootmem(unsigned long addr, unsigned long size)
-{
-	memblock_free(addr, size);
-}
-- 
cgit v1.2.3-71-gd317


From 53ab85ebfd27cdf16c8ddc72781c072a63bef3cb Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:25 -0700
Subject: memblock: replace free_bootmem_late with memblock_free_late

The free_bootmem_late and memblock_free_late do exactly the same thing:
they iterate over a range and give pages to the page allocator.

Replace calls to free_bootmem_late with calls to memblock_free_late and
remove the bootmem variant.

Link: http://lkml.kernel.org/r/1536927045-23536-25-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/kernel/mdesc.c               |  3 ++-
 arch/x86/platform/efi/quirks.c          |  6 +++---
 drivers/firmware/efi/apple-properties.c |  2 +-
 include/linux/bootmem.h                 |  2 --
 mm/nobootmem.c                          | 24 ------------------------
 5 files changed, 6 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 59131e72ee78..a41526bd91e2 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/miscdevice.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/refcount.h>
 
@@ -190,7 +191,7 @@ static void __init mdesc_memblock_free(struct mdesc_handle *hp)
 
 	alloc_size = PAGE_ALIGN(hp->handle_size);
 	start = __pa(hp);
-	free_bootmem_late(start, alloc_size);
+	memblock_free_late(start, alloc_size);
 }
 
 static struct mdesc_mem_ops memblock_mdesc_ops = {
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 669babcaf245..4b70d0f5a803 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -333,7 +333,7 @@ void __init efi_reserve_boot_services(void)
 
 		/*
 		 * Because the following memblock_reserve() is paired
-		 * with free_bootmem_late() for this region in
+		 * with memblock_free_late() for this region in
 		 * efi_free_boot_services(), we must be extremely
 		 * careful not to reserve, and subsequently free,
 		 * critical regions of memory (like the kernel image) or
@@ -364,7 +364,7 @@ void __init efi_reserve_boot_services(void)
 		 * doesn't make sense as far as the firmware is
 		 * concerned, but it does provide us with a way to tag
 		 * those regions that must not be paired with
-		 * free_bootmem_late().
+		 * memblock_free_late().
 		 */
 		md->attribute |= EFI_MEMORY_RUNTIME;
 	}
@@ -414,7 +414,7 @@ void __init efi_free_boot_services(void)
 			size -= rm_size;
 		}
 
-		free_bootmem_late(start, size);
+		memblock_free_late(start, size);
 	}
 
 	if (!num_entries)
diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c
index 60a95719ecb8..2b675f788b61 100644
--- a/drivers/firmware/efi/apple-properties.c
+++ b/drivers/firmware/efi/apple-properties.c
@@ -235,7 +235,7 @@ static int __init map_properties(void)
 		 */
 		data->len = 0;
 		memunmap(data);
-		free_bootmem_late(pa_data + sizeof(*data), data_len);
+		memblock_free_late(pa_data + sizeof(*data), data_len);
 
 		return ret;
 	}
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 706cf8ef6678..bcc7e2fcb6a6 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -30,8 +30,6 @@ extern unsigned long free_all_bootmem(void);
 extern void reset_node_managed_pages(pg_data_t *pgdat);
 extern void reset_all_zones_managed_pages(void);
 
-extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
-
 /* We are using top down, so it is safe to use 0 here */
 #define BOOTMEM_LOW_LIMIT 0
 
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 85e1822ce918..ee0f7fc37fd1 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -33,30 +33,6 @@ unsigned long min_low_pfn;
 unsigned long max_pfn;
 unsigned long long max_possible_pfn;
 
-/**
- * free_bootmem_late - free bootmem pages directly to page allocator
- * @addr: starting address of the range
- * @size: size of the range in bytes
- *
- * This is only useful when the bootmem allocator has already been torn
- * down, but we are still initializing the system.  Pages are given directly
- * to the page allocator, no bootmem metadata is updated because it is gone.
- */
-void __init free_bootmem_late(unsigned long addr, unsigned long size)
-{
-	unsigned long cursor, end;
-
-	kmemleak_free_part_phys(addr, size);
-
-	cursor = PFN_UP(addr);
-	end = PFN_DOWN(addr + size);
-
-	for (; cursor < end; cursor++) {
-		__free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
-		totalram_pages++;
-	}
-}
-
 static void __init __free_pages_memory(unsigned long start, unsigned long end)
 {
 	int order;
-- 
cgit v1.2.3-71-gd317


From c6ffc5ca8fb311a89cb6de5c31b6511308ddac8d Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:30 -0700
Subject: memblock: rename free_all_bootmem to memblock_free_all

The conversion is done using

sed -i 's@free_all_bootmem@memblock_free_all@' \
    $(git grep -l free_all_bootmem)

Link: http://lkml.kernel.org/r/1536927045-23536-26-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/mm/init.c                   | 2 +-
 arch/arc/mm/init.c                     | 2 +-
 arch/arm/mm/init.c                     | 2 +-
 arch/arm64/mm/init.c                   | 2 +-
 arch/c6x/mm/init.c                     | 2 +-
 arch/h8300/mm/init.c                   | 2 +-
 arch/hexagon/mm/init.c                 | 2 +-
 arch/ia64/mm/init.c                    | 2 +-
 arch/m68k/mm/init.c                    | 2 +-
 arch/microblaze/mm/init.c              | 2 +-
 arch/mips/loongson64/loongson-3/numa.c | 2 +-
 arch/mips/mm/init.c                    | 2 +-
 arch/mips/sgi-ip27/ip27-memory.c       | 2 +-
 arch/nds32/mm/init.c                   | 2 +-
 arch/nios2/mm/init.c                   | 2 +-
 arch/openrisc/mm/init.c                | 2 +-
 arch/parisc/mm/init.c                  | 2 +-
 arch/powerpc/mm/mem.c                  | 2 +-
 arch/riscv/mm/init.c                   | 2 +-
 arch/s390/mm/init.c                    | 2 +-
 arch/sh/mm/init.c                      | 2 +-
 arch/sparc/mm/init_32.c                | 2 +-
 arch/sparc/mm/init_64.c                | 4 ++--
 arch/um/kernel/mem.c                   | 2 +-
 arch/unicore32/mm/init.c               | 2 +-
 arch/x86/mm/highmem_32.c               | 2 +-
 arch/x86/mm/init_32.c                  | 4 ++--
 arch/x86/mm/init_64.c                  | 4 ++--
 arch/x86/xen/mmu_pv.c                  | 2 +-
 arch/xtensa/mm/init.c                  | 2 +-
 include/linux/bootmem.h                | 2 +-
 mm/memblock.c                          | 2 +-
 mm/nobootmem.c                         | 4 ++--
 mm/page_alloc.c                        | 2 +-
 mm/page_poison.c                       | 2 +-
 35 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 9d74520298ab..853d15344934 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -282,7 +282,7 @@ mem_init(void)
 {
 	set_max_mapnr(max_low_pfn);
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-	free_all_bootmem();
+	memblock_free_all();
 	mem_init_print_info(NULL);
 }
 
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index ba145065c579..0f29c6548779 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -218,7 +218,7 @@ void __init mem_init(void)
 		free_highmem_page(pfn_to_page(tmp));
 #endif
 
-	free_all_bootmem();
+	memblock_free_all();
 	mem_init_print_info(NULL);
 }
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 0cc8e04295a4..d421a10c93a8 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -508,7 +508,7 @@ void __init mem_init(void)
 
 	/* this will put all unused low memory onto the freelists */
 	free_unused_memmap();
-	free_all_bootmem();
+	memblock_free_all();
 
 #ifdef CONFIG_SA1111
 	/* now that our DMA memory is actually so designated, we can free it */
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 2ddb1c5e988d..d8d73073835f 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -599,7 +599,7 @@ void __init mem_init(void)
 	free_unused_memmap();
 #endif
 	/* this will put all unused low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 
 	kexec_reserve_crashkres_pages();
 
diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c
index dc369ad8b0ba..3383df8b3508 100644
--- a/arch/c6x/mm/init.c
+++ b/arch/c6x/mm/init.c
@@ -62,7 +62,7 @@ void __init mem_init(void)
 	high_memory = (void *)(memory_end & PAGE_MASK);
 
 	/* this will put all memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 
 	mem_init_print_info(NULL);
 }
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index 5d31ac9d7a8d..f2bf4487aabd 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -96,7 +96,7 @@ void __init mem_init(void)
 	max_mapnr = MAP_NR(high_memory);
 
 	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 
 	mem_init_print_info(NULL);
 }
diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c
index d789b9cc0189..88643faf3981 100644
--- a/arch/hexagon/mm/init.c
+++ b/arch/hexagon/mm/init.c
@@ -68,7 +68,7 @@ unsigned long long kmap_generation;
 void __init mem_init(void)
 {
 	/*  No idea where this is actually declared.  Seems to evade LXR.  */
-	free_all_bootmem();
+	memblock_free_all();
 	mem_init_print_info(NULL);
 
 	/*
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 2169ca52bdf4..43ea4a47163d 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -627,7 +627,7 @@ mem_init (void)
 
 	set_max_mapnr(max_low_pfn);
 	high_memory = __va(max_low_pfn * PAGE_SIZE);
-	free_all_bootmem();
+	memblock_free_all();
 	mem_init_print_info(NULL);
 
 	/*
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 977363eda125..ae49ae4d3049 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -140,7 +140,7 @@ static inline void init_pointer_tables(void)
 void __init mem_init(void)
 {
 	/* this will put all memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 	init_pointer_tables();
 	mem_init_print_info(NULL);
 }
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 8c7f074ec20f..9989740d397a 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -204,7 +204,7 @@ void __init mem_init(void)
 	high_memory = (void *)__va(memory_start + lowmem_size - 1);
 
 	/* this will put all memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 #ifdef CONFIG_HIGHMEM
 	highmem_setup();
 #endif
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c
index c1e6ec52c614..703ad4536fe0 100644
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/loongson-3/numa.c
@@ -272,7 +272,7 @@ void __init paging_init(void)
 void __init mem_init(void)
 {
 	high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
-	free_all_bootmem();
+	memblock_free_all();
 	setup_zero_pages();	/* This comes from node 0 */
 	mem_init_print_info(NULL);
 }
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 842a49ef9909..0893b6136498 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -463,7 +463,7 @@ void __init mem_init(void)
 	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
 	maar_init();
-	free_all_bootmem();
+	memblock_free_all();
 	setup_zero_pages();	/* Setup zeroed pages.  */
 	mem_init_free_highmem();
 	mem_init_print_info(NULL);
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 6f7bef052b7f..cb1f1a6a166d 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -475,7 +475,7 @@ void __init paging_init(void)
 void __init mem_init(void)
 {
 	high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
-	free_all_bootmem();
+	memblock_free_all();
 	setup_zero_pages();	/* This comes from node 0 */
 	mem_init_print_info(NULL);
 }
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index 5af81b866aa5..66d3e9cf498d 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -192,7 +192,7 @@ void __init mem_init(void)
 	free_highmem();
 
 	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 	mem_init_print_info(NULL);
 
 	pr_info("virtual kernel memory layout:\n"
diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c
index c92fe4234009..12923501d94f 100644
--- a/arch/nios2/mm/init.c
+++ b/arch/nios2/mm/init.c
@@ -73,7 +73,7 @@ void __init mem_init(void)
 	high_memory = __va(end_mem);
 
 	/* this will put all memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 	mem_init_print_info(NULL);
 }
 
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index b7670de26c11..91a6a9ab7598 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -213,7 +213,7 @@ void __init mem_init(void)
 	memset((void *)empty_zero_page, 0, PAGE_SIZE);
 
 	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 
 	mem_init_print_info(NULL);
 
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index f88a52b8531c..7e7a3126c5e9 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -621,7 +621,7 @@ void __init mem_init(void)
 
 	high_memory = __va((max_pfn << PAGE_SHIFT));
 	set_max_mapnr(page_to_pfn(virt_to_page(high_memory - 1)) + 1);
-	free_all_bootmem();
+	memblock_free_all();
 
 #ifdef CONFIG_PA11
 	if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) {
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index dd949d6649a2..b3fe79064a69 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -349,7 +349,7 @@ void __init mem_init(void)
 
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 	set_max_mapnr(max_pfn);
-	free_all_bootmem();
+	memblock_free_all();
 
 #ifdef CONFIG_HIGHMEM
 	{
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 58a522f9bcc3..d58c111099b3 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -55,7 +55,7 @@ void __init mem_init(void)
 #endif /* CONFIG_FLATMEM */
 
 	high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
-	free_all_bootmem();
+	memblock_free_all();
 
 	mem_init_print_info(NULL);
 }
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 92d7a153e72a..873f6ee1c46d 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -139,7 +139,7 @@ void __init mem_init(void)
 	cmma_init();
 
 	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 	setup_zero_pages();	/* Setup zeroed pages. */
 
 	cmma_init_nodat();
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index c884b760e52f..21447f866415 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -350,7 +350,7 @@ void __init mem_init(void)
 		high_memory = max_t(void *, high_memory,
 				    __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT));
 
-	free_all_bootmem();
+	memblock_free_all();
 
 	/* Set this up early, so we can take care of the zero page */
 	cpu_cache_init();
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 885dd3881874..880714565c40 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -277,7 +277,7 @@ void __init mem_init(void)
 
 	max_mapnr = last_valid_pfn - pfn_base;
 	high_memory = __va(max_low_pfn << PAGE_SHIFT);
-	free_all_bootmem();
+	memblock_free_all();
 
 	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
 		unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT;
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 6f965e6d01cc..a8c3453195e6 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2545,12 +2545,12 @@ void __init mem_init(void)
 {
 	high_memory = __va(last_valid_pfn << PAGE_SHIFT);
 
-	free_all_bootmem();
+	memblock_free_all();
 
 	/*
 	 * Must be done after boot memory is put on freelist, because here we
 	 * might set fields in deferred struct pages that have not yet been
-	 * initialized, and free_all_bootmem() initializes all the reserved
+	 * initialized, and memblock_free_all() initializes all the reserved
 	 * deferred pages for us.
 	 */
 	register_page_bootmem_info();
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 3555c139389c..2c672a8f4571 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -51,7 +51,7 @@ void __init mem_init(void)
 	uml_reserved = brk_end;
 
 	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 	max_low_pfn = totalram_pages;
 	max_pfn = totalram_pages;
 	mem_init_print_info(NULL);
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 4ba51991c7de..44fd0e8fbe87 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -286,7 +286,7 @@ void __init mem_init(void)
 	free_unused_memmap(&meminfo);
 
 	/* this will put all unused low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 
 	mem_init_print_info(NULL);
 	printk(KERN_NOTICE "Virtual kernel memory layout:\n"
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 6d18b70ed5a9..62915a5e0fa2 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -111,7 +111,7 @@ void __init set_highmem_pages_init(void)
 
 	/*
 	 * Explicitly reset zone->managed_pages because set_highmem_pages_init()
-	 * is invoked before free_all_bootmem()
+	 * is invoked before memblock_free_all()
 	 */
 	reset_all_zones_managed_pages();
 	for_each_zone(zone) {
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 142c7d9f89cc..3bbe5f58a67d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -771,7 +771,7 @@ void __init mem_init(void)
 #endif
 	/*
 	 * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
-	 * be done before free_all_bootmem(). Memblock use free low memory for
+	 * be done before memblock_free_all(). Memblock use free low memory for
 	 * temporary data (see find_range_array()) and for this purpose can use
 	 * pages that was already passed to the buddy allocator, hence marked as
 	 * not accessible in the page tables when compiled with
@@ -781,7 +781,7 @@ void __init mem_init(void)
 	set_highmem_pages_init();
 
 	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 
 	after_bootmem = 1;
 	x86_init.hyper.init_after_bootmem();
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index f39b51244fe2..bfb0bedc21d3 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1188,14 +1188,14 @@ void __init mem_init(void)
 	/* clear_bss() already clear the empty_zero_page */
 
 	/* this will put all memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 	after_bootmem = 1;
 	x86_init.hyper.init_after_bootmem();
 
 	/*
 	 * Must be done after boot memory is put on freelist, because here we
 	 * might set fields in deferred struct pages that have not yet been
-	 * initialized, and free_all_bootmem() initializes all the reserved
+	 * initialized, and memblock_free_all() initializes all the reserved
 	 * deferred pages for us.
 	 */
 	register_page_bootmem_info();
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 70ea598a37d2..0d7b3ae4960b 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -864,7 +864,7 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
  * The init_mm pagetable is really pinned as soon as its created, but
  * that's before we have page structures to store the bits.  So do all
  * the book-keeping now once struct pages for allocated pages are
- * initialized. This happens only after free_all_bootmem() is called.
+ * initialized. This happens only after memblock_free_all() is called.
  */
 static void __init xen_after_bootmem(void)
 {
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 34aead7dcb48..f7fbe6334939 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -152,7 +152,7 @@ void __init mem_init(void)
 	max_mapnr = max_pfn - ARCH_PFN_OFFSET;
 	high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT);
 
-	free_all_bootmem();
+	memblock_free_all();
 
 	mem_init_print_info(NULL);
 	pr_info("virtual kernel memory layout:\n"
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index bcc7e2fcb6a6..b58873a567b2 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -26,7 +26,7 @@ extern unsigned long max_pfn;
  */
 extern unsigned long long max_possible_pfn;
 
-extern unsigned long free_all_bootmem(void);
+extern unsigned long memblock_free_all(void);
 extern void reset_node_managed_pages(pg_data_t *pgdat);
 extern void reset_all_zones_managed_pages(void);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 58340de3ebc6..e2f397174734 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1360,7 +1360,7 @@ static void * __init memblock_alloc_internal(
 	/*
 	 * Detect any accidental use of these APIs after slab is ready, as at
 	 * this moment memblock may be deinitialized already and its
-	 * internal data may be destroyed (after execution of free_all_bootmem)
+	 * internal data may be destroyed (after execution of memblock_free_all)
 	 */
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, nid);
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index ee0f7fc37fd1..bb64b09ca4d2 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -111,11 +111,11 @@ void __init reset_all_zones_managed_pages(void)
 }
 
 /**
- * free_all_bootmem - release free pages to the buddy allocator
+ * memblock_free_all - release free pages to the buddy allocator
  *
  * Return: the number of pages actually released.
  */
-unsigned long __init free_all_bootmem(void)
+unsigned long __init memblock_free_all(void)
 {
 	unsigned long pages;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8ca6954fdcdc..6e9b8387a706 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5476,7 +5476,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
 
 /*
  * Initially all pages are reserved - free ones are freed
- * up by free_all_bootmem() once the early boot process is
+ * up by memblock_free_all() once the early boot process is
  * done. Non-atomic initialization, single-pass.
  */
 void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
diff --git a/mm/page_poison.c b/mm/page_poison.c
index aa2b3d34e8ea..f7e2a676365a 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -21,7 +21,7 @@ bool page_poisoning_enabled(void)
 {
 	/*
 	 * Assumes that debug_pagealloc_enabled is set before
-	 * free_all_bootmem.
+	 * memblock_free_all.
 	 * Page poisoning is debug page alloc for some arches. If
 	 * either of those options are enabled, enable poisoning.
 	 */
-- 
cgit v1.2.3-71-gd317


From 57c8a661d95dff48dd9c2f2496139082bbaf241a Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:49 -0700
Subject: mm: remove include/linux/bootmem.h

Move remaining definitions and declarations from include/linux/bootmem.h
into include/linux/memblock.h and remove the redundant header.

The includes were replaced with the semantic patch below and then
semi-automated removal of duplicated '#include <linux/memblock.h>

@@
@@
- #include <linux/bootmem.h>
+ #include <linux/memblock.h>

[sfr@canb.auug.org.au: dma-direct: fix up for the removal of linux/bootmem.h]
  Link: http://lkml.kernel.org/r/20181002185342.133d1680@canb.auug.org.au
[sfr@canb.auug.org.au: powerpc: fix up for removal of linux/bootmem.h]
  Link: http://lkml.kernel.org/r/20181005161406.73ef8727@canb.auug.org.au
[sfr@canb.auug.org.au: x86/kaslr, ACPI/NUMA: fix for linux/bootmem.h removal]
  Link: http://lkml.kernel.org/r/20181008190341.5e396491@canb.auug.org.au
Link: http://lkml.kernel.org/r/1536927045-23536-30-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/core_cia.c                |   2 +-
 arch/alpha/kernel/core_irongate.c           |   1 -
 arch/alpha/kernel/core_marvel.c             |   2 +-
 arch/alpha/kernel/core_titan.c              |   2 +-
 arch/alpha/kernel/core_tsunami.c            |   2 +-
 arch/alpha/kernel/pci-noop.c                |   2 +-
 arch/alpha/kernel/pci.c                     |   2 +-
 arch/alpha/kernel/pci_iommu.c               |   2 +-
 arch/alpha/kernel/setup.c                   |   1 -
 arch/alpha/kernel/sys_nautilus.c            |   2 +-
 arch/alpha/mm/init.c                        |   2 +-
 arch/alpha/mm/numa.c                        |   1 -
 arch/arc/kernel/unwind.c                    |   2 +-
 arch/arc/mm/highmem.c                       |   2 +-
 arch/arc/mm/init.c                          |   1 -
 arch/arm/kernel/devtree.c                   |   1 -
 arch/arm/kernel/setup.c                     |   1 -
 arch/arm/mach-omap2/omap_hwmod.c            |   2 +-
 arch/arm/mm/dma-mapping.c                   |   1 -
 arch/arm/mm/init.c                          |   1 -
 arch/arm/xen/mm.c                           |   1 -
 arch/arm/xen/p2m.c                          |   2 +-
 arch/arm64/kernel/acpi.c                    |   1 -
 arch/arm64/kernel/acpi_numa.c               |   1 -
 arch/arm64/kernel/setup.c                   |   1 -
 arch/arm64/mm/dma-mapping.c                 |   2 +-
 arch/arm64/mm/init.c                        |   1 -
 arch/arm64/mm/kasan_init.c                  |   1 -
 arch/arm64/mm/numa.c                        |   1 -
 arch/c6x/kernel/setup.c                     |   1 -
 arch/c6x/mm/init.c                          |   2 +-
 arch/h8300/kernel/setup.c                   |   1 -
 arch/h8300/mm/init.c                        |   2 +-
 arch/hexagon/kernel/dma.c                   |   2 +-
 arch/hexagon/kernel/setup.c                 |   2 +-
 arch/hexagon/mm/init.c                      |   1 -
 arch/ia64/kernel/crash.c                    |   2 +-
 arch/ia64/kernel/efi.c                      |   2 +-
 arch/ia64/kernel/ia64_ksyms.c               |   2 +-
 arch/ia64/kernel/iosapic.c                  |   2 +-
 arch/ia64/kernel/mca.c                      |   2 +-
 arch/ia64/kernel/mca_drv.c                  |   2 +-
 arch/ia64/kernel/setup.c                    |   1 -
 arch/ia64/kernel/smpboot.c                  |   2 +-
 arch/ia64/kernel/topology.c                 |   2 +-
 arch/ia64/kernel/unwind.c                   |   2 +-
 arch/ia64/mm/contig.c                       |   1 -
 arch/ia64/mm/discontig.c                    |   1 -
 arch/ia64/mm/init.c                         |   1 -
 arch/ia64/mm/numa.c                         |   2 +-
 arch/ia64/mm/tlb.c                          |   2 +-
 arch/ia64/pci/pci.c                         |   2 +-
 arch/ia64/sn/kernel/bte.c                   |   2 +-
 arch/ia64/sn/kernel/io_common.c             |   2 +-
 arch/ia64/sn/kernel/setup.c                 |   2 +-
 arch/m68k/atari/stram.c                     |   2 +-
 arch/m68k/coldfire/m54xx.c                  |   2 +-
 arch/m68k/kernel/setup_mm.c                 |   1 -
 arch/m68k/kernel/setup_no.c                 |   1 -
 arch/m68k/kernel/uboot.c                    |   2 +-
 arch/m68k/mm/init.c                         |   2 +-
 arch/m68k/mm/mcfmmu.c                       |   1 -
 arch/m68k/mm/motorola.c                     |   1 -
 arch/m68k/mm/sun3mmu.c                      |   2 +-
 arch/m68k/sun3/config.c                     |   2 +-
 arch/m68k/sun3/dvma.c                       |   2 +-
 arch/m68k/sun3/mmu_emu.c                    |   2 +-
 arch/m68k/sun3/sun3dvma.c                   |   2 +-
 arch/m68k/sun3x/dvma.c                      |   2 +-
 arch/microblaze/mm/consistent.c             |   2 +-
 arch/microblaze/mm/init.c                   |   3 +-
 arch/microblaze/pci/pci-common.c            |   2 +-
 arch/mips/ar7/memory.c                      |   2 +-
 arch/mips/ath79/setup.c                     |   2 +-
 arch/mips/bcm63xx/prom.c                    |   2 +-
 arch/mips/bcm63xx/setup.c                   |   2 +-
 arch/mips/bmips/setup.c                     |   2 +-
 arch/mips/cavium-octeon/dma-octeon.c        |   2 +-
 arch/mips/dec/prom/memory.c                 |   2 +-
 arch/mips/emma/common/prom.c                |   2 +-
 arch/mips/fw/arc/memory.c                   |   2 +-
 arch/mips/jazz/jazzdma.c                    |   2 +-
 arch/mips/kernel/crash.c                    |   2 +-
 arch/mips/kernel/crash_dump.c               |   2 +-
 arch/mips/kernel/prom.c                     |   2 +-
 arch/mips/kernel/setup.c                    |   1 -
 arch/mips/kernel/traps.c                    |   1 -
 arch/mips/kernel/vpe.c                      |   2 +-
 arch/mips/kvm/commpage.c                    |   2 +-
 arch/mips/kvm/dyntrans.c                    |   2 +-
 arch/mips/kvm/emulate.c                     |   2 +-
 arch/mips/kvm/interrupt.c                   |   2 +-
 arch/mips/kvm/mips.c                        |   2 +-
 arch/mips/lantiq/prom.c                     |   2 +-
 arch/mips/lasat/prom.c                      |   2 +-
 arch/mips/loongson64/common/init.c          |   2 +-
 arch/mips/loongson64/loongson-3/numa.c      |   1 -
 arch/mips/mm/init.c                         |   2 +-
 arch/mips/mm/pgtable-32.c                   |   2 +-
 arch/mips/mti-malta/malta-memory.c          |   2 +-
 arch/mips/netlogic/xlp/dt.c                 |   2 +-
 arch/mips/pci/pci-legacy.c                  |   2 +-
 arch/mips/pci/pci.c                         |   2 +-
 arch/mips/ralink/of.c                       |   2 +-
 arch/mips/rb532/prom.c                      |   2 +-
 arch/mips/sgi-ip27/ip27-memory.c            |   1 -
 arch/mips/sibyte/common/cfe.c               |   2 +-
 arch/mips/sibyte/swarm/setup.c              |   2 +-
 arch/mips/txx9/rbtx4938/prom.c              |   2 +-
 arch/nds32/kernel/setup.c                   |   3 +-
 arch/nds32/mm/highmem.c                     |   2 +-
 arch/nds32/mm/init.c                        |   3 +-
 arch/nios2/kernel/prom.c                    |   2 +-
 arch/nios2/kernel/setup.c                   |   1 -
 arch/nios2/mm/init.c                        |   2 +-
 arch/openrisc/kernel/setup.c                |   3 +-
 arch/openrisc/mm/init.c                     |   3 +-
 arch/parisc/mm/init.c                       |   1 -
 arch/powerpc/kernel/pci_32.c                |   2 +-
 arch/powerpc/kernel/setup-common.c          |   1 -
 arch/powerpc/kernel/setup_64.c              |   3 +-
 arch/powerpc/lib/alloc.c                    |   2 +-
 arch/powerpc/mm/hugetlbpage.c               |   1 -
 arch/powerpc/mm/mem.c                       |   3 +-
 arch/powerpc/mm/mmu_context_nohash.c        |   2 +-
 arch/powerpc/mm/numa.c                      |   3 +-
 arch/powerpc/platforms/powermac/nvram.c     |   2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c   |   3 +-
 arch/powerpc/platforms/ps3/setup.c          |   2 +-
 arch/powerpc/sysdev/msi_bitmap.c            |   2 +-
 arch/riscv/mm/init.c                        |   3 +-
 arch/s390/kernel/crash_dump.c               |   3 +-
 arch/s390/kernel/setup.c                    |   1 -
 arch/s390/kernel/smp.c                      |   3 +-
 arch/s390/kernel/topology.c                 |   2 +-
 arch/s390/kernel/vdso.c                     |   2 +-
 arch/s390/mm/extmem.c                       |   2 +-
 arch/s390/mm/init.c                         |   3 +-
 arch/s390/mm/vmem.c                         |   3 +-
 arch/s390/numa/mode_emu.c                   |   1 -
 arch/s390/numa/numa.c                       |   1 -
 arch/s390/numa/toptree.c                    |   2 +-
 arch/sh/mm/init.c                           |   3 +-
 arch/sh/mm/ioremap_fixed.c                  |   2 +-
 arch/sparc/kernel/mdesc.c                   |   2 -
 arch/sparc/kernel/prom_32.c                 |   2 +-
 arch/sparc/kernel/setup_64.c                |   2 +-
 arch/sparc/kernel/smp_64.c                  |   2 +-
 arch/sparc/mm/init_32.c                     |   1 -
 arch/sparc/mm/init_64.c                     |   3 +-
 arch/sparc/mm/srmmu.c                       |   2 +-
 arch/um/drivers/net_kern.c                  |   2 +-
 arch/um/drivers/vector_kern.c               |   2 +-
 arch/um/kernel/initrd.c                     |   2 +-
 arch/um/kernel/mem.c                        |   1 -
 arch/um/kernel/physmem.c                    |   1 -
 arch/unicore32/kernel/hibernate.c           |   2 +-
 arch/unicore32/kernel/setup.c               |   3 +-
 arch/unicore32/mm/init.c                    |   3 +-
 arch/unicore32/mm/mmu.c                     |   1 -
 arch/x86/kernel/acpi/boot.c                 |   2 +-
 arch/x86/kernel/acpi/sleep.c                |   1 -
 arch/x86/kernel/apic/apic.c                 |   2 +-
 arch/x86/kernel/apic/io_apic.c              |   2 +-
 arch/x86/kernel/cpu/common.c                |   2 +-
 arch/x86/kernel/e820.c                      |   3 +-
 arch/x86/kernel/mpparse.c                   |   1 -
 arch/x86/kernel/pci-dma.c                   |   2 +-
 arch/x86/kernel/pci-swiotlb.c               |   2 +-
 arch/x86/kernel/pvclock.c                   |   2 +-
 arch/x86/kernel/setup.c                     |   1 -
 arch/x86/kernel/setup_percpu.c              |   1 -
 arch/x86/kernel/smpboot.c                   |   2 +-
 arch/x86/kernel/tce_64.c                    |   1 -
 arch/x86/mm/amdtopology.c                   |   1 -
 arch/x86/mm/fault.c                         |   2 +-
 arch/x86/mm/highmem_32.c                    |   2 +-
 arch/x86/mm/init.c                          |   1 -
 arch/x86/mm/init_32.c                       |   1 -
 arch/x86/mm/init_64.c                       |   1 -
 arch/x86/mm/ioremap.c                       |   2 +-
 arch/x86/mm/kasan_init_64.c                 |   3 +-
 arch/x86/mm/kaslr.c                         |   1 +
 arch/x86/mm/numa.c                          |   1 -
 arch/x86/mm/numa_32.c                       |   1 -
 arch/x86/mm/numa_64.c                       |   2 +-
 arch/x86/mm/numa_emulation.c                |   1 -
 arch/x86/mm/pageattr-test.c                 |   2 +-
 arch/x86/mm/pageattr.c                      |   2 +-
 arch/x86/mm/pat.c                           |   2 +-
 arch/x86/mm/physaddr.c                      |   2 +-
 arch/x86/pci/i386.c                         |   2 +-
 arch/x86/platform/efi/efi.c                 |   3 +-
 arch/x86/platform/efi/efi_64.c              |   2 +-
 arch/x86/platform/efi/quirks.c              |   1 -
 arch/x86/platform/olpc/olpc_dt.c            |   2 +-
 arch/x86/power/hibernate_32.c               |   2 +-
 arch/x86/xen/enlighten.c                    |   2 +-
 arch/x86/xen/enlighten_pv.c                 |   3 +-
 arch/x86/xen/p2m.c                          |   1 -
 arch/xtensa/kernel/pci.c                    |   2 +-
 arch/xtensa/mm/cache.c                      |   2 +-
 arch/xtensa/mm/init.c                       |   2 +-
 arch/xtensa/mm/kasan_init.c                 |   3 +-
 arch/xtensa/mm/mmu.c                        |   2 +-
 arch/xtensa/platforms/iss/network.c         |   2 +-
 arch/xtensa/platforms/iss/setup.c           |   2 +-
 block/blk-settings.c                        |   2 +-
 block/bounce.c                              |   2 +-
 drivers/acpi/numa.c                         |   1 -
 drivers/acpi/tables.c                       |   3 +-
 drivers/base/platform.c                     |   2 +-
 drivers/clk/ti/clk.c                        |   2 +-
 drivers/firmware/dmi_scan.c                 |   2 +-
 drivers/firmware/efi/apple-properties.c     |   2 +-
 drivers/firmware/iscsi_ibft_find.c          |   2 +-
 drivers/firmware/memmap.c                   |   2 +-
 drivers/iommu/mtk_iommu.c                   |   2 +-
 drivers/iommu/mtk_iommu_v1.c                |   2 +-
 drivers/macintosh/smu.c                     |   3 +-
 drivers/mtd/ar7part.c                       |   2 +-
 drivers/net/arcnet/arc-rimi.c               |   2 +-
 drivers/net/arcnet/com20020-isa.c           |   2 +-
 drivers/net/arcnet/com90io.c                |   2 +-
 drivers/of/fdt.c                            |   1 -
 drivers/of/unittest.c                       |   2 +-
 drivers/s390/char/fs3270.c                  |   2 +-
 drivers/s390/char/tty3270.c                 |   2 +-
 drivers/s390/cio/cmf.c                      |   2 +-
 drivers/s390/virtio/virtio_ccw.c            |   2 +-
 drivers/sfi/sfi_core.c                      |   2 +-
 drivers/tty/serial/cpm_uart/cpm_uart_core.c |   2 +-
 drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c |   2 +-
 drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c |   2 +-
 drivers/usb/early/xhci-dbc.c                |   1 -
 drivers/xen/balloon.c                       |   2 +-
 drivers/xen/events/events_base.c            |   2 +-
 drivers/xen/grant-table.c                   |   2 +-
 drivers/xen/swiotlb-xen.c                   |   1 -
 drivers/xen/xen-selfballoon.c               |   2 +-
 fs/dcache.c                                 |   2 +-
 fs/inode.c                                  |   2 +-
 fs/namespace.c                              |   2 +-
 fs/proc/kcore.c                             |   2 +-
 fs/proc/page.c                              |   2 +-
 fs/proc/vmcore.c                            |   2 +-
 include/linux/bootmem.h                     | 173 ----------------------------
 include/linux/memblock.h                    | 151 +++++++++++++++++++++++-
 init/main.c                                 |   2 +-
 kernel/dma/direct.c                         |   2 +-
 kernel/dma/swiotlb.c                        |   2 +-
 kernel/futex.c                              |   2 +-
 kernel/locking/qspinlock_paravirt.h         |   2 +-
 kernel/pid.c                                |   2 +-
 kernel/power/snapshot.c                     |   2 +-
 kernel/printk/printk.c                      |   1 -
 kernel/profile.c                            |   2 +-
 lib/cpumask.c                               |   2 +-
 mm/hugetlb.c                                |   1 -
 mm/kasan/kasan_init.c                       |   3 +-
 mm/kmemleak.c                               |   2 +-
 mm/memblock.c                               |   1 -
 mm/memory_hotplug.c                         |   1 -
 mm/page_alloc.c                             |   1 -
 mm/page_ext.c                               |   2 +-
 mm/page_idle.c                              |   2 +-
 mm/page_owner.c                             |   2 +-
 mm/percpu.c                                 |   2 +-
 mm/sparse-vmemmap.c                         |   1 -
 mm/sparse.c                                 |   1 -
 net/ipv4/inet_hashtables.c                  |   2 +-
 net/ipv4/tcp.c                              |   2 +-
 net/ipv4/udp.c                              |   2 +-
 net/sctp/protocol.c                         |   2 +-
 net/xfrm/xfrm_hash.c                        |   2 +-
 275 files changed, 353 insertions(+), 476 deletions(-)
 delete mode 100644 include/linux/bootmem.h

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c
index 026ee955fd10..867e8730b0c5 100644
--- a/arch/alpha/kernel/core_cia.c
+++ b/arch/alpha/kernel/core_cia.c
@@ -21,7 +21,7 @@
 #include <linux/pci.h>
 #include <linux/sched.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/ptrace.h>
 #include <asm/mce.h>
diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c
index 35572be9deb5..a9fd133a7fb2 100644
--- a/arch/alpha/kernel/core_irongate.c
+++ b/arch/alpha/kernel/core_irongate.c
@@ -20,7 +20,6 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/initrd.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 
 #include <asm/ptrace.h>
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index 1f00c9433b10..8a568c4d8e81 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -18,7 +18,7 @@
 #include <linux/mc146818rtc.h>
 #include <linux/rtc.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/ptrace.h>
 #include <asm/smp.h>
diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c
index 132b06bdf903..97551597581b 100644
--- a/arch/alpha/kernel/core_titan.c
+++ b/arch/alpha/kernel/core_titan.c
@@ -16,7 +16,7 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/vmalloc.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/ptrace.h>
 #include <asm/smp.h>
diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c
index e7c956ea46b6..f334b8928d72 100644
--- a/arch/alpha/kernel/core_tsunami.c
+++ b/arch/alpha/kernel/core_tsunami.c
@@ -17,7 +17,7 @@
 #include <linux/pci.h>
 #include <linux/sched.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/ptrace.h>
 #include <asm/smp.h>
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index 59cbfc2bf2c5..a9378ee0c2f1 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -7,7 +7,7 @@
 
 #include <linux/pci.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 #include <linux/capability.h>
 #include <linux/mm.h>
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 4cc3eb92f55b..13937e72d875 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -18,7 +18,7 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/module.h>
 #include <linux/cache.h>
 #include <linux/slab.h>
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 5d178c7ba5b2..82cf950bda2a 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -7,7 +7,7 @@
 #include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/gfp.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/scatterlist.h>
 #include <linux/log2.h>
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 64c06a0adf3d..a37fd990bd55 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -29,7 +29,6 @@
 #include <linux/string.h>
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/pci.h>
 #include <linux/seq_file.h>
diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c
index ff4f54b86c7f..cd9a112d67ff 100644
--- a/arch/alpha/kernel/sys_nautilus.c
+++ b/arch/alpha/kernel/sys_nautilus.c
@@ -32,7 +32,7 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/reboot.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/bitops.h>
 
 #include <asm/ptrace.h>
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 853d15344934..a42fc5c4db89 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -19,7 +19,7 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/init.h>
-#include <linux/bootmem.h> /* max_low_pfn */
+#include <linux/memblock.h> /* max_low_pfn */
 #include <linux/vmalloc.h>
 #include <linux/gfp.h>
 
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 26cd925d19b1..74846553e3f1 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -10,7 +10,6 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/swap.h>
 #include <linux/initrd.h>
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 2a01dd1005f4..d34f69eb1a95 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -15,7 +15,7 @@
 
 #include <linux/sched.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/sort.h>
 #include <linux/slab.h>
 #include <linux/stop_machine.h>
diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
index f582dc8944c9..48e700151810 100644
--- a/arch/arc/mm/highmem.c
+++ b/arch/arc/mm/highmem.c
@@ -7,7 +7,7 @@
  *
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/highmem.h>
 #include <asm/processor.h>
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 0f29c6548779..f8fe5668b30f 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -8,7 +8,6 @@
 
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #ifdef CONFIG_BLK_DEV_INITRD
 #include <linux/initrd.h>
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index 13bcd3b867cb..e3057c1b55b9 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -12,7 +12,6 @@
 #include <linux/export.h>
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 39e6090d23ac..840a4adc69fc 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -16,7 +16,6 @@
 #include <linux/utsname.h>
 #include <linux/initrd.h>
 #include <linux/console.h>
-#include <linux/bootmem.h>
 #include <linux/seq_file.h>
 #include <linux/screen_info.h>
 #include <linux/of_platform.h>
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 1f9b34a7eccd..cd5732ab0cdf 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -141,7 +141,7 @@
 #include <linux/cpu.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <linux/platform_data/ti-sysc.h>
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 66566472c153..661fe48ab78d 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -9,7 +9,6 @@
  *
  *  DMA uncached mapping support.
  */
-#include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/genalloc.h>
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index d421a10c93a8..32e4845af2b6 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -11,7 +11,6 @@
 #include <linux/errno.h>
 #include <linux/swap.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/mman.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 785d2a562a23..cb44aa290e73 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -1,6 +1,5 @@
 #include <linux/cpu.h>
 #include <linux/dma-mapping.h>
-#include <linux/bootmem.h>
 #include <linux/gfp.h>
 #include <linux/highmem.h>
 #include <linux/export.h>
diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c
index 0641ba54ab62..e70a49fc8dcd 100644
--- a/arch/arm/xen/p2m.c
+++ b/arch/arm/xen/p2m.c
@@ -1,4 +1,4 @@
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index ed46dc188b22..44e3c351e1ea 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -16,7 +16,6 @@
 #define pr_fmt(fmt) "ACPI: " fmt
 
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
 #include <linux/cpumask.h>
 #include <linux/efi.h>
 #include <linux/efi-bgrt.h>
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index 4f4f1815e047..eac1d0cc595c 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -18,7 +18,6 @@
 
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
-#include <linux/bootmem.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/memblock.h>
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 3428427f6c93..7ce7306f1d75 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -26,7 +26,6 @@
 #include <linux/initrd.h>
 #include <linux/console.h>
 #include <linux/cache.h>
-#include <linux/bootmem.h>
 #include <linux/screen_info.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index d190612b8f33..3a703e5d4e32 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -19,7 +19,7 @@
 
 #include <linux/gfp.h>
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/cache.h>
 #include <linux/export.h>
 #include <linux/slab.h>
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index d8d73073835f..9d9582cac6c4 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -22,7 +22,6 @@
 #include <linux/errno.h>
 #include <linux/swap.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/cache.h>
 #include <linux/mman.h>
 #include <linux/nodemask.h>
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 6a65a2912d36..63527e585aac 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -11,7 +11,6 @@
  */
 
 #define pr_fmt(fmt) "kasan: " fmt
-#include <linux/bootmem.h>
 #include <linux/kasan.h>
 #include <linux/kernel.h>
 #include <linux/sched/task.h>
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 0bff116c07a8..27a31efd9e8e 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -20,7 +20,6 @@
 #define pr_fmt(fmt) "NUMA: " fmt
 
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/module.h>
 #include <linux/of.h>
diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c
index 05d96a9541b5..2e1c0ea22eb0 100644
--- a/arch/c6x/kernel/setup.c
+++ b/arch/c6x/kernel/setup.c
@@ -11,7 +11,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
-#include <linux/bootmem.h>
 #include <linux/clkdev.h>
 #include <linux/initrd.h>
 #include <linux/kernel.h>
diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c
index 3383df8b3508..af5ada0520be 100644
--- a/arch/c6x/mm/init.c
+++ b/arch/c6x/mm/init.c
@@ -11,7 +11,7 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #ifdef CONFIG_BLK_DEV_RAM
 #include <linux/blkdev.h>
 #endif
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index 34e2df5c0d6d..b32bfa1fe99e 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -18,7 +18,6 @@
 #include <linux/console.h>
 #include <linux/errno.h>
 #include <linux/string.h>
-#include <linux/bootmem.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/of.h>
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index f2bf4487aabd..6519252ac4db 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -30,7 +30,7 @@
 #include <linux/init.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 
 #include <asm/setup.h>
diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c
index 706699374444..38eaa7b703e7 100644
--- a/arch/hexagon/kernel/dma.c
+++ b/arch/hexagon/kernel/dma.c
@@ -19,7 +19,7 @@
  */
 
 #include <linux/dma-noncoherent.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/genalloc.h>
 #include <linux/module.h>
 #include <asm/page.h>
diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c
index dc8c7e75b5d1..b3c3e04d4e57 100644
--- a/arch/hexagon/kernel/setup.c
+++ b/arch/hexagon/kernel/setup.c
@@ -20,7 +20,7 @@
 
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mmzone.h>
 #include <linux/mm.h>
 #include <linux/seq_file.h>
diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c
index 88643faf3981..1719ede9e9bd 100644
--- a/arch/hexagon/mm/init.c
+++ b/arch/hexagon/mm/init.c
@@ -20,7 +20,6 @@
 
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <asm/atomic.h>
 #include <linux/highmem.h>
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index 39f4433a6f0e..bec762a9b418 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -12,7 +12,7 @@
 #include <linux/smp.h>
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/kexec.h>
 #include <linux/elfcore.h>
 #include <linux/sysctl.h>
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index f77d80edddfe..8f106638913c 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -23,7 +23,7 @@
  *	Skip non-WB memory and ignore empty memory ranges.
  */
 #include <linux/module.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/crash_dump.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 6b51c88e3578..b49fe6f618ed 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -6,7 +6,7 @@
 #ifdef CONFIG_VIRTUAL_MEM_MAP
 #include <linux/compiler.h>
 #include <linux/export.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 EXPORT_SYMBOL(min_low_pfn);	/* defined by bootmem.c, but not exported by generic code */
 EXPORT_SYMBOL(max_low_pfn);	/* defined by bootmem.c, but not exported by generic code */
 #endif
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 550243a94b5d..fe6e4946672e 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -90,7 +90,7 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/string.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/delay.h>
 #include <asm/hw_irq.h>
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 71209766c47f..9a6603f8e409 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -77,7 +77,7 @@
 #include <linux/sched/task.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/acpi.h>
 #include <linux/timer.h>
 #include <linux/module.h>
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index dfe40cbdf3b3..45f956ad715a 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kallsyms.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/acpi.h>
 #include <linux/timer.h>
 #include <linux/module.h>
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 0e6c2d9fb498..583a3746d70b 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -27,7 +27,6 @@
 #include <linux/init.h>
 
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
 #include <linux/console.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 74fe317477e6..51ec944b036c 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -24,7 +24,7 @@
 
 #include <linux/module.h>
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/init.h>
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 9b820f7a6a98..e311ee13e61d 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -19,7 +19,7 @@
 #include <linux/node.h>
 #include <linux/slab.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/nodemask.h>
 #include <linux/notifier.h>
 #include <linux/export.h>
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index e04efa088902..7601fe0622d2 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -28,7 +28,7 @@
  *	  acquired, then the read-write lock must be acquired first.
  */
 #include <linux/module.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/elf.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 9e5c23a6b8b4..6e447234205c 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -14,7 +14,6 @@
  * Routines used by ia64 machines with contiguous (or virtually contiguous)
  * memory.
  */
-#include <linux/bootmem.h>
 #include <linux/efi.h>
 #include <linux/memblock.h>
 #include <linux/mm.h>
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 70609f823960..8a965784340c 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -19,7 +19,6 @@
 #include <linux/mm.h>
 #include <linux/nmi.h>
 #include <linux/swap.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/acpi.h>
 #include <linux/efi.h>
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 43ea4a47163d..d5e12ff1d73c 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -8,7 +8,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 
-#include <linux/bootmem.h>
 #include <linux/efi.h>
 #include <linux/elf.h>
 #include <linux/memblock.h>
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index aa19b7ac8222..3861d6e32d5f 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -15,7 +15,7 @@
 #include <linux/mm.h>
 #include <linux/node.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/module.h>
 #include <asm/mmzone.h>
 #include <asm/numa.h>
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 5554863b4c9b..ab545daff7c3 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -21,7 +21,7 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 
 #include <asm/delay.h>
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 5d71800df431..196a0dd7ff97 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -20,7 +20,7 @@
 #include <linux/ioport.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 
 #include <asm/machvec.h>
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index 9146192b86f5..9900e6d4add6 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -16,7 +16,7 @@
 #include <asm/nodedata.h>
 #include <asm/delay.h>
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/string.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index 8b05d5581615..98f55220c67d 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -6,7 +6,7 @@
  * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <asm/sn/types.h>
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index ab2564f95199..71ad6b0ccab4 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -20,7 +20,7 @@
 #include <linux/mm.h>
 #include <linux/serial.h>
 #include <linux/irq.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mmzone.h>
 #include <linux/interrupt.h>
 #include <linux/acpi.h>
diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c
index 1089d67df315..6ffc204eb07d 100644
--- a/arch/m68k/atari/stram.c
+++ b/arch/m68k/atari/stram.c
@@ -17,7 +17,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/pagemap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mount.h>
 #include <linux/blkdev.h>
 #include <linux/module.h>
diff --git a/arch/m68k/coldfire/m54xx.c b/arch/m68k/coldfire/m54xx.c
index adad03ca6e11..360c723c0ae6 100644
--- a/arch/m68k/coldfire/m54xx.c
+++ b/arch/m68k/coldfire/m54xx.c
@@ -16,7 +16,7 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/clk.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/pgalloc.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 5d3596c180f9..a1a3eaeaf58c 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -20,7 +20,6 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c
index cfd5475bfc31..3c5def10d486 100644
--- a/arch/m68k/kernel/setup_no.c
+++ b/arch/m68k/kernel/setup_no.c
@@ -27,7 +27,6 @@
 #include <linux/console.h>
 #include <linux/errno.h>
 #include <linux/string.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
diff --git a/arch/m68k/kernel/uboot.c b/arch/m68k/kernel/uboot.c
index 107082877064..1b4c562753da 100644
--- a/arch/m68k/kernel/uboot.c
+++ b/arch/m68k/kernel/uboot.c
@@ -16,7 +16,7 @@
 #include <linux/console.h>
 #include <linux/errno.h>
 #include <linux/string.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/initrd.h>
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index ae49ae4d3049..933c33e76a48 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -17,7 +17,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 
 #include <asm/setup.h>
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 38a1d92dd555..0de4999a3810 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -13,7 +13,6 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/string.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 
 #include <asm/setup.h>
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 2113eec8dbf9..7497cf30bf1c 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -18,7 +18,6 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/gfp.h>
 
diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c
index 19c05ab9824d..f736db48a2e1 100644
--- a/arch/m68k/mm/sun3mmu.c
+++ b/arch/m68k/mm/sun3mmu.c
@@ -16,7 +16,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/setup.h>
 #include <linux/uaccess.h>
diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c
index 79a2bb857906..542c4404861c 100644
--- a/arch/m68k/sun3/config.c
+++ b/arch/m68k/sun3/config.c
@@ -15,7 +15,7 @@
 #include <linux/tty.h>
 #include <linux/console.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/platform_device.h>
 
 #include <asm/oplib.h>
diff --git a/arch/m68k/sun3/dvma.c b/arch/m68k/sun3/dvma.c
index 5f92c72b05c3..a2c1c9304895 100644
--- a/arch/m68k/sun3/dvma.c
+++ b/arch/m68k/sun3/dvma.c
@@ -11,7 +11,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/list.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c
index d30da12a1702..582a1284059a 100644
--- a/arch/m68k/sun3/mmu_emu.c
+++ b/arch/m68k/sun3/mmu_emu.c
@@ -13,7 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/ptrace.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/sched/mm.h>
diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c
index 72d94585b52e..8be8b750c629 100644
--- a/arch/m68k/sun3/sun3dvma.c
+++ b/arch/m68k/sun3/sun3dvma.c
@@ -7,7 +7,7 @@
  * Contains common routines for sun3/sun3x DVMA management.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c
index b2acbc862f60..89e630e66555 100644
--- a/arch/m68k/sun3x/dvma.c
+++ b/arch/m68k/sun3x/dvma.c
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/bitops.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/vmalloc.h>
 
 #include <asm/sun3x.h>
diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c
index d801cc5f5b95..45e0a1aa9357 100644
--- a/arch/microblaze/mm/consistent.c
+++ b/arch/microblaze/mm/consistent.c
@@ -28,7 +28,7 @@
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 9989740d397a..8c14988f52f2 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -7,10 +7,9 @@
  * for more details.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/memblock.h>
 #include <linux/mm.h> /* mem_init */
 #include <linux/initrd.h>
 #include <linux/pagemap.h>
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index 2ffd171af8b6..6b89a66ec1a5 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -20,7 +20,7 @@
 #include <linux/pci.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/shmem_fs.h>
 #include <linux/list.h>
diff --git a/arch/mips/ar7/memory.c b/arch/mips/ar7/memory.c
index 0332f0514d05..80390a9ec264 100644
--- a/arch/mips/ar7/memory.c
+++ b/arch/mips/ar7/memory.c
@@ -16,7 +16,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/pfn.h>
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 4c7a93f4039a..9728abcb18fa 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -14,7 +14,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c
index 7019e2967009..77a836e661c9 100644
--- a/arch/mips/bcm63xx/prom.c
+++ b/arch/mips/bcm63xx/prom.c
@@ -7,7 +7,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/smp.h>
 #include <asm/bootinfo.h>
 #include <asm/bmips.h>
diff --git a/arch/mips/bcm63xx/setup.c b/arch/mips/bcm63xx/setup.c
index 2be9caaa2085..e28ee9a7cc7e 100644
--- a/arch/mips/bcm63xx/setup.c
+++ b/arch/mips/bcm63xx/setup.c
@@ -9,7 +9,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ioport.h>
 #include <linux/pm.h>
 #include <asm/bootinfo.h>
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
index 6329c5f780d6..1738a06396f9 100644
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -9,7 +9,7 @@
 
 #include <linux/init.h>
 #include <linux/bitops.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/clk-provider.h>
 #include <linux/ioport.h>
 #include <linux/kernel.h>
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index c44c1a654471..e8eb60ed99f2 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -11,7 +11,7 @@
  * Copyright (C) 2010 Cavium Networks, Inc.
  */
 #include <linux/dma-direct.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/swiotlb.h>
 #include <linux/types.h>
 #include <linux/init.h>
diff --git a/arch/mips/dec/prom/memory.c b/arch/mips/dec/prom/memory.c
index a2acc6454cf3..5073d2ed78bb 100644
--- a/arch/mips/dec/prom/memory.c
+++ b/arch/mips/dec/prom/memory.c
@@ -8,7 +8,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/types.h>
 
 #include <asm/addrspace.h>
diff --git a/arch/mips/emma/common/prom.c b/arch/mips/emma/common/prom.c
index cae42259d6da..675337b8a4a0 100644
--- a/arch/mips/emma/common/prom.c
+++ b/arch/mips/emma/common/prom.c
@@ -22,7 +22,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/addrspace.h>
 #include <asm/bootinfo.h>
diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c
index dd9496f26e6a..429b7f8d2aeb 100644
--- a/arch/mips/fw/arc/memory.c
+++ b/arch/mips/fw/arc/memory.c
@@ -17,7 +17,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/swap.h>
 
 #include <asm/sgialib.h>
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index 0a0aaf39fd16..4c41ed0a637e 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -13,7 +13,7 @@
 #include <linux/export.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/spinlock.h>
 #include <linux/gfp.h>
 #include <linux/dma-direct.h>
diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c
index 2c7288041a99..81845ba04835 100644
--- a/arch/mips/kernel/crash.c
+++ b/arch/mips/kernel/crash.c
@@ -3,7 +3,7 @@
 #include <linux/smp.h>
 #include <linux/reboot.h>
 #include <linux/kexec.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/crash_dump.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
diff --git a/arch/mips/kernel/crash_dump.c b/arch/mips/kernel/crash_dump.c
index a8657d29c62e..01b2bd95ba1f 100644
--- a/arch/mips/kernel/crash_dump.c
+++ b/arch/mips/kernel/crash_dump.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/highmem.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/crash_dump.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c
index 89950b7bf536..93b8e0b4332f 100644
--- a/arch/mips/kernel/prom.c
+++ b/arch/mips/kernel/prom.c
@@ -12,7 +12,7 @@
 #include <linux/export.h>
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/debugfs.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 31522d3bc8bf..41c1683761bb 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -15,7 +15,6 @@
 #include <linux/export.h>
 #include <linux/screen_info.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/initrd.h>
 #include <linux/root_dev.h>
 #include <linux/highmem.h>
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 623dc18f7f2f..0f852e1b5891 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -28,7 +28,6 @@
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/kallsyms.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/interrupt.h>
 #include <linux/ptrace.h>
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 0bef238d2c0c..6176b9acba95 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -26,7 +26,7 @@
 #include <linux/moduleloader.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/mipsregs.h>
 #include <asm/mipsmtregs.h>
 #include <asm/cacheflush.h>
diff --git a/arch/mips/kvm/commpage.c b/arch/mips/kvm/commpage.c
index f43629979a0e..5812e6145801 100644
--- a/arch/mips/kvm/commpage.c
+++ b/arch/mips/kvm/commpage.c
@@ -14,7 +14,7 @@
 #include <linux/err.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
diff --git a/arch/mips/kvm/dyntrans.c b/arch/mips/kvm/dyntrans.c
index f8e772564d74..d77b61b3d6ee 100644
--- a/arch/mips/kvm/dyntrans.c
+++ b/arch/mips/kvm/dyntrans.c
@@ -16,7 +16,7 @@
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/cacheflush.h>
 
 #include "commpage.h"
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 4144bfaef137..ec9ed23bca7f 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -15,7 +15,7 @@
 #include <linux/kvm_host.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/random.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
diff --git a/arch/mips/kvm/interrupt.c b/arch/mips/kvm/interrupt.c
index aa0a1a00faf6..7257e8b6f5a9 100644
--- a/arch/mips/kvm/interrupt.c
+++ b/arch/mips/kvm/interrupt.c
@@ -13,7 +13,7 @@
 #include <linux/err.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
 
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index f7ea8e21656b..1fcc4d149054 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -18,7 +18,7 @@
 #include <linux/vmalloc.h>
 #include <linux/sched/signal.h>
 #include <linux/fs.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/fpu.h>
 #include <asm/page.h>
diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c
index d984bd5c2ec5..14d4c5e2b42f 100644
--- a/arch/mips/lantiq/prom.c
+++ b/arch/mips/lantiq/prom.c
@@ -8,7 +8,7 @@
 
 #include <linux/export.h>
 #include <linux/clk.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/of_fdt.h>
 
 #include <asm/bootinfo.h>
diff --git a/arch/mips/lasat/prom.c b/arch/mips/lasat/prom.c
index 37b8fc5b9ac9..5ce1407de2d5 100644
--- a/arch/mips/lasat/prom.c
+++ b/arch/mips/lasat/prom.c
@@ -8,7 +8,7 @@
 #include <linux/ctype.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ioport.h>
 #include <asm/bootinfo.h>
 #include <asm/lasat/lasat.h>
diff --git a/arch/mips/loongson64/common/init.c b/arch/mips/loongson64/common/init.c
index 6ef17120722f..c073fbcb9805 100644
--- a/arch/mips/loongson64/common/init.c
+++ b/arch/mips/loongson64/common/init.c
@@ -8,7 +8,7 @@
  * option) any later version.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/bootinfo.h>
 #include <asm/traps.h>
 #include <asm/smp-ops.h>
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c
index 703ad4536fe0..622761878cd1 100644
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/loongson-3/numa.c
@@ -18,7 +18,6 @@
 #include <linux/nodemask.h>
 #include <linux/swap.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/pfn.h>
 #include <linux/highmem.h>
 #include <asm/page.h>
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 0893b6136498..b521d8e2d359 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -22,7 +22,7 @@
 #include <linux/ptrace.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/proc_fs.h>
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index b19a3c506b1e..e2a33adc0f29 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -7,7 +7,7 @@
  */
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <asm/fixmap.h>
 #include <asm/pgtable.h>
diff --git a/arch/mips/mti-malta/malta-memory.c b/arch/mips/mti-malta/malta-memory.c
index a47556723b85..868921adef1d 100644
--- a/arch/mips/mti-malta/malta-memory.c
+++ b/arch/mips/mti-malta/malta-memory.c
@@ -12,7 +12,7 @@
  *          Steven J. Hill <sjhill@mips.com>
  */
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/string.h>
 
 #include <asm/bootinfo.h>
diff --git a/arch/mips/netlogic/xlp/dt.c b/arch/mips/netlogic/xlp/dt.c
index b5ba83f4c646..c856f2a3ea42 100644
--- a/arch/mips/netlogic/xlp/dt.c
+++ b/arch/mips/netlogic/xlp/dt.c
@@ -33,7 +33,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c
index 3c3b1e6abb53..687513880fbf 100644
--- a/arch/mips/pci/pci-legacy.c
+++ b/arch/mips/pci/pci-legacy.c
@@ -11,7 +11,7 @@
 #include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/types.h>
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index c2e94cf5ecda..e68b44b27c0d 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -11,7 +11,7 @@
 #include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/types.h>
diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c
index 1ada8492733b..d544e7b07f7a 100644
--- a/arch/mips/ralink/of.c
+++ b/arch/mips/ralink/of.c
@@ -14,7 +14,7 @@
 #include <linux/sizes.h>
 #include <linux/of_fdt.h>
 #include <linux/kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/of_platform.h>
 #include <linux/of_address.h>
 
diff --git a/arch/mips/rb532/prom.c b/arch/mips/rb532/prom.c
index 6484e4a4597b..361a690facbf 100644
--- a/arch/mips/rb532/prom.c
+++ b/arch/mips/rb532/prom.c
@@ -29,7 +29,7 @@
 #include <linux/export.h>
 #include <linux/string.h>
 #include <linux/console.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ioport.h>
 #include <linux/blkdev.h>
 
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index cb1f1a6a166d..d8b8444d6795 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -18,7 +18,6 @@
 #include <linux/export.h>
 #include <linux/nodemask.h>
 #include <linux/swap.h>
-#include <linux/bootmem.h>
 #include <linux/pfn.h>
 #include <linux/highmem.h>
 #include <asm/page.h>
diff --git a/arch/mips/sibyte/common/cfe.c b/arch/mips/sibyte/common/cfe.c
index 092fb2a6ec4a..12a780f251e1 100644
--- a/arch/mips/sibyte/common/cfe.c
+++ b/arch/mips/sibyte/common/cfe.c
@@ -21,7 +21,7 @@
 #include <linux/linkage.h>
 #include <linux/mm.h>
 #include <linux/blkdev.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pm.h>
 #include <linux/smp.h>
 
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index 152ca71cc2d7..3b034b7178d6 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -23,7 +23,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/blkdev.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
diff --git a/arch/mips/txx9/rbtx4938/prom.c b/arch/mips/txx9/rbtx4938/prom.c
index bcb469247e8c..2b36a2ee744c 100644
--- a/arch/mips/txx9/rbtx4938/prom.c
+++ b/arch/mips/txx9/rbtx4938/prom.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/bootinfo.h>
 #include <asm/txx9/generic.h>
 #include <asm/txx9/rbtx4938.h>
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index 63a1a5ef5219..eacc79024879 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -2,9 +2,8 @@
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #include <linux/cpu.h>
-#include <linux/bootmem.h>
-#include <linux/seq_file.h>
 #include <linux/memblock.h>
+#include <linux/seq_file.h>
 #include <linux/console.h>
 #include <linux/screen_info.h>
 #include <linux/delay.h>
diff --git a/arch/nds32/mm/highmem.c b/arch/nds32/mm/highmem.c
index e17cb8a69315..022779af6148 100644
--- a/arch/nds32/mm/highmem.c
+++ b/arch/nds32/mm/highmem.c
@@ -6,7 +6,7 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>
 
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index 66d3e9cf498d..131104bd2538 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -7,12 +7,11 @@
 #include <linux/errno.h>
 #include <linux/swap.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mman.h>
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
 #include <linux/highmem.h>
-#include <linux/memblock.h>
 
 #include <asm/sections.h>
 #include <asm/setup.h>
diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c
index a6d4f7530247..232a36b511aa 100644
--- a/arch/nios2/kernel/prom.c
+++ b/arch/nios2/kernel/prom.c
@@ -25,7 +25,7 @@
 
 #include <linux/init.h>
 #include <linux/types.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/io.h>
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index 2d0011ddd4d5..6bbd4ae2beb0 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -16,7 +16,6 @@
 #include <linux/sched.h>
 #include <linux/sched/task.h>
 #include <linux/console.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/initrd.h>
 #include <linux/of_fdt.h>
diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c
index 12923501d94f..16cea5776b87 100644
--- a/arch/nios2/mm/init.c
+++ b/arch/nios2/mm/init.c
@@ -23,7 +23,7 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/binfmts.h>
 
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index e17fcd83120f..c605bdad1746 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -30,13 +30,12 @@
 #include <linux/delay.h>
 #include <linux/console.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/seq_file.h>
 #include <linux/serial.h>
 #include <linux/initrd.h>
 #include <linux/of_fdt.h>
 #include <linux/of.h>
-#include <linux/memblock.h>
 #include <linux/device.h>
 
 #include <asm/sections.h>
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index 91a6a9ab7598..d157310eb377 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -26,12 +26,11 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/smp.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/blkdev.h>	/* for initrd_* */
 #include <linux/pagemap.h>
-#include <linux/memblock.h>
 
 #include <asm/segment.h>
 #include <asm/pgalloc.h>
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 7e7a3126c5e9..2d7cffcaa476 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -14,7 +14,6 @@
 
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/gfp.h>
 #include <linux/delay.h>
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index d39ec3a4550a..274bd1442dd9 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -10,7 +10,7 @@
 #include <linux/capability.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/syscalls.h>
 #include <linux/irq.h>
 #include <linux/list.h>
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2b56d1f30387..93ee3703b42f 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -33,7 +33,6 @@
 #include <linux/serial_8250.h>
 #include <linux/percpu.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/of_platform.h>
 #include <linux/hugetlb.h>
 #include <asm/debugfs.h>
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 9216c3a7fcfc..2a51e4cc8246 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -29,10 +29,9 @@
 #include <linux/unistd.h>
 #include <linux/serial.h>
 #include <linux/serial_8250.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pci.h>
 #include <linux/lockdep.h>
-#include <linux/memblock.h>
 #include <linux/memory.h>
 #include <linux/nmi.h>
 
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index bf87d6e13369..5b61704447c1 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -2,7 +2,7 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/string.h>
 #include <asm/setup.h>
 
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a7226ed9cae6..8cf035e68378 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -15,7 +15,6 @@
 #include <linux/export.h>
 #include <linux/of_fdt.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/moduleparam.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index b3fe79064a69..0a64fffabee1 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -27,12 +27,11 @@
 #include <linux/mm.h>
 #include <linux/stddef.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/initrd.h>
 #include <linux/pagemap.h>
 #include <linux/suspend.h>
-#include <linux/memblock.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 954f1986af4d..67b9d7b669a1 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -44,7 +44,7 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f04f15f9d232..3a048e98a132 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -11,7 +11,7 @@
 #define pr_fmt(fmt) "numa: " fmt
 
 #include <linux/threads.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
@@ -19,7 +19,6 @@
 #include <linux/nodemask.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
-#include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/pfn.h>
 #include <linux/cpuset.h>
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index f45b369177a4..f3391be7c762 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -18,7 +18,7 @@
 #include <linux/errno.h>
 #include <linux/adb.h>
 #include <linux/pmu.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/completion.h>
 #include <linux/spinlock.h>
 #include <asm/sections.h>
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 23a67b545b70..aba81cbf0b36 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -17,11 +17,10 @@
 #include <linux/delay.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/msi.h>
-#include <linux/memblock.h>
 #include <linux/iommu.h>
 #include <linux/rculist.h>
 #include <linux/sizes.h>
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 12519857a33c..658bfab3350b 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -24,7 +24,7 @@
 #include <linux/root_dev.h>
 #include <linux/console.h>
 #include <linux/export.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index 349a9ff6ca5b..2444feda831f 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/kmemleak.h>
 #include <linux/bitmap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/msi_bitmap.h>
 #include <asm/setup.h>
 
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index d58c111099b3..1d9bfaff60bc 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -13,9 +13,8 @@
 
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
-#include <linux/initrd.h>
 #include <linux/memblock.h>
+#include <linux/initrd.h>
 #include <linux/swap.h>
 #include <linux/sizes.h>
 
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index d17566a8c76f..97eae3871868 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -13,10 +13,9 @@
 #include <linux/mm.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/elf.h>
 #include <asm/asm-offsets.h>
-#include <linux/memblock.h>
 #include <asm/os_info.h>
 #include <asm/elf.h>
 #include <asm/ipl.h>
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 781c1053a773..72dd23ef771b 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -34,7 +34,6 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/initrd.h>
-#include <linux/bootmem.h>
 #include <linux/root_dev.h>
 #include <linux/console.h>
 #include <linux/kernel_stat.h>
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 44f9a7d6450b..f82b3d3c36e2 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -20,7 +20,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/workqueue.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/mm.h>
@@ -35,7 +35,6 @@
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/crash_dump.h>
-#include <linux/memblock.h>
 #include <linux/kprobes.h>
 #include <asm/asm-offsets.h>
 #include <asm/diag.h>
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 799a91882a76..8992b04c0ade 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -8,7 +8,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/workqueue.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/uaccess.h>
 #include <linux/sysctl.h>
 #include <linux/cpuset.h>
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index ec31b48a42a5..ebe748a9f472 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -18,7 +18,7 @@
 #include <linux/user.h>
 #include <linux/elf.h>
 #include <linux/security.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/compat.h>
 #include <asm/asm-offsets.h>
 #include <asm/pgtable.h>
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 84111a43ea29..eba2def3414d 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -16,7 +16,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/export.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ctype.h>
 #include <linux/ioport.h>
 #include <asm/diag.h>
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 873f6ee1c46d..76d0708438e9 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -21,7 +21,7 @@
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/memory.h>
 #include <linux/pfn.h>
 #include <linux/poison.h>
@@ -29,7 +29,6 @@
 #include <linux/export.h>
 #include <linux/cma.h>
 #include <linux/gfp.h>
-#include <linux/memblock.h>
 #include <asm/processor.h>
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 04638b0b9ef1..0472e27febdf 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -4,14 +4,13 @@
  *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pfn.h>
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
-#include <linux/memblock.h>
 #include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
index 5a381fc8e958..bfba273c32c0 100644
--- a/arch/s390/numa/mode_emu.c
+++ b/arch/s390/numa/mode_emu.c
@@ -22,7 +22,6 @@
 #include <linux/kernel.h>
 #include <linux/cpumask.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/node.h>
 #include <linux/memory.h>
 #include <linux/slab.h>
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index 297f5d8b0890..ae0d9e889534 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -13,7 +13,6 @@
 #include <linux/kernel.h>
 #include <linux/mmzone.h>
 #include <linux/cpumask.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/node.h>
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
index 7f61cc3fd4d1..71a608cd4f61 100644
--- a/arch/s390/numa/toptree.c
+++ b/arch/s390/numa/toptree.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/cpumask.h>
 #include <linux/list.h>
 #include <linux/list_sort.h>
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 21447f866415..c8c13c777162 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -11,12 +11,11 @@
 #include <linux/swap.h>
 #include <linux/init.h>
 #include <linux/gfp.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/proc_fs.h>
 #include <linux/pagemap.h>
 #include <linux/percpu.h>
 #include <linux/io.h>
-#include <linux/memblock.h>
 #include <linux/dma-mapping.h>
 #include <linux/export.h>
 #include <asm/mmu_context.h>
diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c
index 927a1294c465..07e744d75fa0 100644
--- a/arch/sh/mm/ioremap_fixed.c
+++ b/arch/sh/mm/ioremap_fixed.c
@@ -14,7 +14,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/io.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/proc_fs.h>
 #include <asm/fixmap.h>
 #include <asm/page.h>
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index a41526bd91e2..9a26b442f820 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -5,13 +5,11 @@
  */
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/memblock.h>
 #include <linux/log2.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/miscdevice.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/refcount.h>
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index 4389944735c6..d41e2a749c5d 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -19,7 +19,7 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/prom.h>
 #include <asm/oplib.h>
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index de7c87b153fe..cd2825cb8420 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -32,7 +32,7 @@
 #include <linux/initrd.h>
 #include <linux/module.h>
 #include <linux/start_kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/io.h>
 #include <asm/processor.h>
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 6cc80d0f4b9f..4792e08ad36b 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -22,7 +22,7 @@
 #include <linux/cache.h>
 #include <linux/jiffies.h>
 #include <linux/profile.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/vmalloc.h>
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 880714565c40..d900952bfc5f 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -22,7 +22,6 @@
 #include <linux/initrd.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/pagemap.h>
 #include <linux/poison.h>
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index a8c3453195e6..3c8aac21f426 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -11,7 +11,7 @@
 #include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/initrd.h>
@@ -25,7 +25,6 @@
 #include <linux/sort.h>
 #include <linux/ioport.h>
 #include <linux/percpu.h>
-#include <linux/memblock.h>
 #include <linux/mmzone.h>
 #include <linux/gfp.h>
 
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index b48fea5ad9ef..a6142c5abf61 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -11,7 +11,7 @@
 
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
 #include <linux/kdebug.h>
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index ef19a391214f..673816880cce 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -6,7 +6,7 @@
  * Licensed under the GPL.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/inetdevice.h>
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 20442d20bd09..2b4dded11a7a 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/version.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/inetdevice.h>
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 844056cf313e..3678f5b05e42 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -4,7 +4,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/initrd.h>
 #include <asm/types.h>
 #include <init.h>
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 2c672a8f4571..1067469ba2ea 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -5,7 +5,6 @@
 
 #include <linux/stddef.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index 296a91a04598..5bf56af4d5b9 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -4,7 +4,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/pfn.h>
diff --git a/arch/unicore32/kernel/hibernate.c b/arch/unicore32/kernel/hibernate.c
index 9969ec374abb..29b71c68eb7c 100644
--- a/arch/unicore32/kernel/hibernate.c
+++ b/arch/unicore32/kernel/hibernate.c
@@ -13,7 +13,7 @@
 
 #include <linux/gfp.h>
 #include <linux/suspend.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index 9f163f976315..b2c38b32ea57 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c
@@ -17,7 +17,7 @@
 #include <linux/utsname.h>
 #include <linux/initrd.h>
 #include <linux/console.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/seq_file.h>
 #include <linux/screen_info.h>
 #include <linux/init.h>
@@ -27,7 +27,6 @@
 #include <linux/smp.h>
 #include <linux/fs.h>
 #include <linux/proc_fs.h>
-#include <linux/memblock.h>
 #include <linux/elf.h>
 #include <linux/io.h>
 
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 44fd0e8fbe87..cf4eb9481fd6 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -11,13 +11,12 @@
 #include <linux/errno.h>
 #include <linux/swap.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mman.h>
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
 #include <linux/highmem.h>
 #include <linux/gfp.h>
-#include <linux/memblock.h>
 #include <linux/sort.h>
 #include <linux/dma-mapping.h>
 #include <linux/export.h>
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c
index 18b355a20f0b..040a8c279761 100644
--- a/arch/unicore32/mm/mmu.c
+++ b/arch/unicore32/mm/mmu.c
@@ -17,7 +17,6 @@
 #include <linux/nodemask.h>
 #include <linux/memblock.h>
 #include <linux/fs.h>
-#include <linux/bootmem.h>
 #include <linux/io.h>
 
 #include <asm/cputype.h>
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f5ea6415b778..7f5d212551d4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -32,7 +32,7 @@
 #include <linux/dmi.h>
 #include <linux/irq.h>
 #include <linux/slab.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
 #include <linux/efi-bgrt.h>
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index f1915b744052..ca13851f0570 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -7,7 +7,6 @@
  */
 
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/dmi.h>
 #include <linux/cpumask.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ab731ab09f06..32b2b7a41ef5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -20,7 +20,7 @@
 #include <linux/acpi_pmtmr.h>
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ftrace.h>
 #include <linux/ioport.h>
 #include <linux/export.h>
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8c7450900e0e..5fbc57e4b0b9 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -47,7 +47,7 @@
 #include <linux/kthread.h>
 #include <linux/jiffies.h>	/* time_after() */
 #include <linux/slab.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/irqdomain.h>
 #include <asm/io.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 660d0b22e962..cbbd57ae06ee 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,7 +1,7 @@
 /* cpu_feature_enabled() cannot be used this early */
 #define USE_EARLY_PGTABLE_L5
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/linkage.h>
 #include <linux/bitops.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a0ec4c37265a..68ff62bffbab 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -9,11 +9,10 @@
  * allocation code routines via a platform independent interface (memblock, etc.).
  */
 #include <linux/crash_dump.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/suspend.h>
 #include <linux/acpi.h>
 #include <linux/firmware-map.h>
-#include <linux/memblock.h>
 #include <linux/sort.h>
 
 #include <asm/e820/api.h>
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index f1c5eb99d445..3482460d984d 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -11,7 +11,6 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 7ba73fe0d917..f4562fcec681 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -3,7 +3,7 @@
 #include <linux/dma-debug.h>
 #include <linux/dmar.h>
 #include <linux/export.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 #include <linux/pci.h>
 
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 71c0b01d93b1..bd08b9e1c9e2 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -5,7 +5,7 @@
 #include <linux/cache.h>
 #include <linux/init.h>
 #include <linux/swiotlb.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/dma-direct.h>
 #include <linux/mem_encrypt.h>
 
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 637982efecd8..9b158b4716d2 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -20,7 +20,7 @@
 #include <linux/notifier.h>
 #include <linux/sched.h>
 #include <linux/gfp.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/nmi.h>
 
 #include <asm/fixmap.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 7005f89bf3b2..b74e7bfed6ab 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -30,7 +30,6 @@
 #include <linux/sfi.h>
 #include <linux/apm_bios.h>
 #include <linux/initrd.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
 #include <linux/console.h>
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 483412fb8a24..e8796fcd7e5a 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -4,7 +4,6 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/percpu.h>
 #include <linux/kexec.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5369d7fac797..a9134d1910b9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -49,7 +49,7 @@
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/percpu.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/err.h>
 #include <linux/nmi.h>
 #include <linux/tboot.h>
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index 75730ce01f8d..285aaa62d153 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -30,7 +30,6 @@
 #include <linux/string.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <asm/tce.h>
 #include <asm/calgary.h>
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
index 048c761d97b0..058b2f36b3a6 100644
--- a/arch/x86/mm/amdtopology.c
+++ b/arch/x86/mm/amdtopology.c
@@ -12,7 +12,6 @@
 #include <linux/string.h>
 #include <linux/nodemask.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 
 #include <asm/io.h>
 #include <linux/pci_ids.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b24eb4eb9984..71d4b9d4d43f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -8,7 +8,7 @@
 #include <linux/sched/task_stack.h>	/* task_stack_*(), ...		*/
 #include <linux/kdebug.h>		/* oops_begin/end, ...		*/
 #include <linux/extable.h>		/* search_exception_tables	*/
-#include <linux/bootmem.h>		/* max_low_pfn			*/
+#include <linux/memblock.h>		/* max_low_pfn			*/
 #include <linux/kprobes.h>		/* NOKPROBE_SYMBOL, ...		*/
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
 #include <linux/perf_event.h>		/* perf_sw_event		*/
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 62915a5e0fa2..0d4bdcb84da5 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -1,7 +1,7 @@
 #include <linux/highmem.h>
 #include <linux/export.h>
 #include <linux/swap.h> /* for totalram_pages */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 void *kmap(struct page *page)
 {
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index faca978ebf9d..ef99f3892e1f 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -3,7 +3,6 @@
 #include <linux/ioport.h>
 #include <linux/swap.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>	/* for max_low_pfn */
 #include <linux/swapfile.h>
 #include <linux/swapops.h>
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3bbe5f58a67d..49ecf5ecf6d3 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -23,7 +23,6 @@
 #include <linux/pci.h>
 #include <linux/pfn.h>
 #include <linux/poison.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/proc_fs.h>
 #include <linux/memory_hotplug.h>
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bfb0bedc21d3..5fab264948c2 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -20,7 +20,6 @@
 #include <linux/init.h>
 #include <linux/initrd.h>
 #include <linux/pagemap.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/proc_fs.h>
 #include <linux/pci.h>
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 24e0920a9b25..5378d10f1d31 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -6,7 +6,7 @@
  * (C) Copyright 1995 1996 Linus Torvalds
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8f87499124b8..04a9cf6b034f 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -5,10 +5,9 @@
 /* cpu_feature_enabled() cannot be used this early */
 #define USE_EARLY_PGTABLE_L5
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/kasan.h>
 #include <linux/kdebug.h>
-#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 61db77b0eda9..3f452ffed7e9 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/random.h>
+#include <linux/memblock.h>
 
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 16e37d712ffd..1308f5408bf7 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -4,7 +4,6 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/mmzone.h>
 #include <linux/ctype.h>
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index e8a4a09e20f1..f2bd3d61e16b 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -22,7 +22,6 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/init.h>
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 066f3511d5f1..59d80160fa5a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -3,7 +3,7 @@
  * Generic VM initialization for x86-64 NUMA setups.
  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include "numa_internal.h"
 
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index b54d52a2d00a..a80fdd7fb40f 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -6,7 +6,6 @@
 #include <linux/errno.h>
 #include <linux/topology.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <asm/dma.h>
 
 #include "numa_internal.h"
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index a25588ad75ef..08f8f76a4852 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -5,7 +5,7 @@
  * Clears the a test pte bit on random pages in the direct mapping,
  * then reverts and compares page tables forwards and afterwards.
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/kthread.h>
 #include <linux/random.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 62bb30b4bd2a..f799076e3d57 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -3,7 +3,7 @@
  * Thanks to Ben LaHaise for precious feedback.
  */
 #include <linux/highmem.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 3d0c83ef6aab..08013524fba1 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/seq_file.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/debugfs.h>
 #include <linux/ioport.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index 7f9acb68324c..bdc98150d4db 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mmdebug.h>
 #include <linux/export.h>
 #include <linux/mm.h>
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index ed4ac215305d..8cd66152cdb0 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -32,7 +32,7 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/errno.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/pat.h>
 #include <asm/e820/api.h>
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 9061babfbc83..7ae939e353cd 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -36,9 +36,8 @@
 #include <linux/efi.h>
 #include <linux/efi-bgrt.h>
 #include <linux/export.h>
-#include <linux/bootmem.h>
-#include <linux/slab.h>
 #include <linux/memblock.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/uaccess.h>
 #include <linux/time.h>
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index e8da7f492970..cf0347f61b21 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -23,7 +23,7 @@
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ioport.h>
 #include <linux/mc146818rtc.h>
 #include <linux/efi.h>
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 4b70d0f5a803..95e77a667ba5 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -8,7 +8,6 @@
 #include <linux/efi.h>
 #include <linux/slab.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/acpi.h>
 #include <linux/dmi.h>
 
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index 140cd76ee897..115c8e4173bb 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -17,7 +17,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_pdt.h>
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 15695e30f982..be15bdcb20df 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -8,7 +8,7 @@
 
 #include <linux/gfp.h>
 #include <linux/suspend.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 67b2f31a1265..e996e8e744cb 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #endif
 #include <linux/cpu.h>
 #include <linux/kexec.h>
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ec7a4209f310..2f6787fc7106 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -23,7 +23,7 @@
 #include <linux/start_kernel.h>
 #include <linux/sched.h>
 #include <linux/kprobes.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/mm.h>
 #include <linux/page-flags.h>
@@ -31,7 +31,6 @@
 #include <linux/console.h>
 #include <linux/pci.h>
 #include <linux/gfp.h>
-#include <linux/memblock.h>
 #include <linux/edd.h>
 #include <linux/frame.h>
 
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index b3e11afed25b..b06731705529 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -67,7 +67,6 @@
 #include <linux/hash.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 21f13e9aabe1..5ca440a74316 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -24,7 +24,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/pci-bridge.h>
 #include <asm/platform.h>
diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index 9220dcde7520..b27359e2a464 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c
@@ -21,7 +21,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/ptrace.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/swap.h>
 #include <linux/pagemap.h>
 
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index f7fbe6334939..9750a48f491b 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -18,7 +18,7 @@
 
 #include <linux/kernel.h>
 #include <linux/errno.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c
index 1a30a258ccd0..6b95ca43aec0 100644
--- a/arch/xtensa/mm/kasan_init.c
+++ b/arch/xtensa/mm/kasan_init.c
@@ -8,11 +8,10 @@
  * Copyright (C) 2017 Cadence Design Systems Inc.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init_task.h>
 #include <linux/kasan.h>
 #include <linux/kernel.h>
-#include <linux/memblock.h>
 #include <asm/initialize_mmu.h>
 #include <asm/tlbflush.h>
 #include <asm/traps.h>
diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index f33a1ff2662a..a4dcfd39bc5c 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -4,7 +4,7 @@
  *
  * Extracted from init.c
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
 #include <linux/string.h>
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index 206b9d4591e8..190846dddc67 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -30,7 +30,7 @@
 #include <linux/etherdevice.h>
 #include <linux/interrupt.h>
 #include <linux/ioctl.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
 #include <linux/platform_device.h>
diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c
index 58709e89a8ed..c14cc673976c 100644
--- a/arch/xtensa/platforms/iss/setup.c
+++ b/arch/xtensa/platforms/iss/setup.c
@@ -16,7 +16,7 @@
  * option) any later version.
  *
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/stddef.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
diff --git a/block/blk-settings.c b/block/blk-settings.c
index ffd459969689..696c04c1ab6c 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -6,7 +6,7 @@
 #include <linux/init.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
-#include <linux/bootmem.h>	/* for max_pfn/max_low_pfn */
+#include <linux/memblock.h>	/* for max_pfn/max_low_pfn */
 #include <linux/gcd.h>
 #include <linux/lcm.h>
 #include <linux/jiffies.h>
diff --git a/block/bounce.c b/block/bounce.c
index ec0d99995f5f..cf49fe02f65c 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -18,7 +18,7 @@
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/highmem.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/printk.h>
 #include <asm/tlbflush.h>
 
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 85167603b9c9..274699463b4f 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -27,7 +27,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/numa.h>
 #include <linux/nodemask.h>
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index a3d012b08fc5..61203eebf3a1 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -31,9 +31,8 @@
 #include <linux/irq.h>
 #include <linux/errno.h>
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
-#include <linux/earlycpio.h>
 #include <linux/memblock.h>
+#include <linux/earlycpio.h>
 #include <linux/initrd.h>
 #include "internal.h"
 
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 23cf4427f425..41b91af95afb 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/dma-mapping.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 5c54d3734daf..5b2867a33b98 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -23,7 +23,7 @@
 #include <linux/of_address.h>
 #include <linux/list.h>
 #include <linux/regmap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/device.h>
 
 #include "clock.h"
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index f2483548cde9..099d83e4e910 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -5,7 +5,7 @@
 #include <linux/ctype.h>
 #include <linux/dmi.h>
 #include <linux/efi.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/random.h>
 #include <asm/dmi.h>
 #include <asm/unaligned.h>
diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c
index 2b675f788b61..ac1654f74dc7 100644
--- a/drivers/firmware/efi/apple-properties.c
+++ b/drivers/firmware/efi/apple-properties.c
@@ -20,7 +20,7 @@
 
 #define pr_fmt(fmt) "apple-properties: " fmt
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/efi.h>
 #include <linux/io.h>
 #include <linux/platform_data/x86/apple.h>
diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c
index 2224f1dc074b..72d9ea18270b 100644
--- a/drivers/firmware/iscsi_ibft_find.c
+++ b/drivers/firmware/iscsi_ibft_find.c
@@ -18,7 +18,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/blkdev.h>
 #include <linux/ctype.h>
 #include <linux/device.h>
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 03cead6d5f97..2a23453f005a 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -19,7 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
 
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index f9f69f7111a9..44bd5b9166bb 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -11,7 +11,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/bug.h>
 #include <linux/clk.h>
 #include <linux/component.h>
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 676c029494e4..0e780848f59b 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -13,7 +13,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/bug.h>
 #include <linux/clk.h>
 #include <linux/component.h>
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 0069f9084f9f..880a81c82b7a 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -23,7 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/dmapool.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/jiffies.h>
@@ -38,7 +38,6 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/slab.h>
-#include <linux/memblock.h>
 #include <linux/sched/signal.h>
 
 #include <asm/byteorder.h>
diff --git a/drivers/mtd/ar7part.c b/drivers/mtd/ar7part.c
index fc15ec58230a..0d33cf0842ad 100644
--- a/drivers/mtd/ar7part.c
+++ b/drivers/mtd/ar7part.c
@@ -25,7 +25,7 @@
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/module.h>
 
 #include <uapi/linux/magic.h>
diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c
index a07e24970be4..11c5bad95226 100644
--- a/drivers/net/arcnet/arc-rimi.c
+++ b/drivers/net/arcnet/arc-rimi.c
@@ -33,7 +33,7 @@
 #include <linux/ioport.h>
 #include <linux/delay.h>
 #include <linux/netdevice.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c
index 38fa60ddaf2e..28510e33924f 100644
--- a/drivers/net/arcnet/com20020-isa.c
+++ b/drivers/net/arcnet/com20020-isa.c
@@ -38,7 +38,7 @@
 #include <linux/netdevice.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/io.h>
 
 #include "arcdevice.h"
diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c
index 4e56aaf2b984..2c546013a980 100644
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -34,7 +34,7 @@
 #include <linux/ioport.h>
 #include <linux/delay.h>
 #include <linux/netdevice.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index ffe62a7ae19b..bb532aae0d92 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -11,7 +11,6 @@
 #include <linux/crc32.h>
 #include <linux/kernel.h>
 #include <linux/initrd.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 01e23b85e798..49ae2aa744d6 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -5,7 +5,7 @@
 
 #define pr_fmt(fmt) "### dt-test ### " fmt
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/errno.h>
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 16a4e8528bbc..8f3a2eeb28dc 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -8,7 +8,7 @@
  *     Copyright IBM Corp. 2003, 2009
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/console.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c
index 5b8af2782282..2b0c36c2c568 100644
--- a/drivers/s390/char/tty3270.c
+++ b/drivers/s390/char/tty3270.c
@@ -19,7 +19,7 @@
 #include <linux/workqueue.h>
 
 #include <linux/slab.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/compat.h>
 
 #include <asm/ccwdev.h>
diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c
index 8af4948dae80..72dd2471ec1e 100644
--- a/drivers/s390/cio/cmf.c
+++ b/drivers/s390/cio/cmf.c
@@ -13,7 +13,7 @@
 #define KMSG_COMPONENT "cio"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/list.h>
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 8f5c1d7f751a..97b6f197f007 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -9,7 +9,7 @@
 
 #include <linux/kernel_stat.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/err.h>
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>
diff --git a/drivers/sfi/sfi_core.c b/drivers/sfi/sfi_core.c
index 153b3f3cc795..a5136901dd8a 100644
--- a/drivers/sfi/sfi_core.c
+++ b/drivers/sfi/sfi_core.c
@@ -59,7 +59,7 @@
 #define KMSG_COMPONENT "SFI"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
index 79ad30d34949..b929c7ae3a27 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
@@ -24,7 +24,7 @@
 #include <linux/console.h>
 #include <linux/sysrq.h>
 #include <linux/device.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/dma-mapping.h>
 #include <linux/fs_uart_pd.h>
 #include <linux/of_address.h>
diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c b/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c
index 4eba17f3d293..56fc527015cb 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c
@@ -19,7 +19,7 @@
 #include <linux/console.h>
 #include <linux/sysrq.h>
 #include <linux/device.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/io.h>
diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c b/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c
index e3bff068dc3c..6a1cd03bfe39 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c
@@ -19,7 +19,7 @@
 #include <linux/console.h>
 #include <linux/sysrq.h>
 #include <linux/device.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/io.h>
diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c
index ddc5fa88f268..d2652dccc699 100644
--- a/drivers/usb/early/xhci-dbc.c
+++ b/drivers/usb/early/xhci-dbc.c
@@ -12,7 +12,6 @@
 #include <linux/console.h>
 #include <linux/pci_regs.h>
 #include <linux/pci_ids.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/io.h>
 #include <asm/pci-direct.h>
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index e12bb256036f..a3f5cbfcd4a1 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -44,7 +44,7 @@
 #include <linux/cred.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/mutex.h>
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index e6c1934734b7..93194f3e7540 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -28,7 +28,7 @@
 #include <linux/irq.h>
 #include <linux/moduleparam.h>
 #include <linux/string.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/irqnr.h>
 #include <linux/pci.h>
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 84575baceebc..f15f89df1f36 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -33,7 +33,7 @@
 
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index c5f26a87d238..2a7f545bd0b5 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -35,7 +35,6 @@
 
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/dma-direct.h>
 #include <linux/export.h>
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 55988b8418ee..5165aa82bf7d 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -68,7 +68,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/swap.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
diff --git a/fs/dcache.c b/fs/dcache.c
index c2e443fb76ae..2593153471cf 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -26,7 +26,7 @@
 #include <linux/export.h>
 #include <linux/security.h>
 #include <linux/seqlock.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/bit_spinlock.h>
 #include <linux/rculist_bl.h>
 #include <linux/list_lru.h>
diff --git a/fs/inode.c b/fs/inode.c
index 9b808986d440..9e198f00b64c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -10,7 +10,7 @@
 #include <linux/swap.h>
 #include <linux/security.h>
 #include <linux/cdev.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/posix_acl.h>
diff --git a/fs/namespace.c b/fs/namespace.c
index d86830c86ce8..98d27da43304 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -23,7 +23,7 @@
 #include <linux/uaccess.h>
 #include <linux/proc_ns.h>
 #include <linux/magic.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/task_work.h>
 #include <linux/sched/task.h>
 
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d297fe4472a9..bbcc185062bb 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -22,7 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/printk.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 792c78a49174..6c517b11acf8 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/compiler.h>
 #include <linux/fs.h>
 #include <linux/init.h>
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 0377a104495b..3fe90443c1bb 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -16,7 +16,7 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/printk.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/crash_dump.h>
 #include <linux/list.h>
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
deleted file mode 100644
index b58873a567b2..000000000000
--- a/include/linux/bootmem.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
- */
-#ifndef _LINUX_BOOTMEM_H
-#define _LINUX_BOOTMEM_H
-
-#include <linux/mmzone.h>
-#include <linux/mm_types.h>
-#include <asm/dma.h>
-#include <asm/processor.h>
-
-/*
- *  simple boot-time physical memory area allocator.
- */
-
-extern unsigned long max_low_pfn;
-extern unsigned long min_low_pfn;
-
-/*
- * highest page
- */
-extern unsigned long max_pfn;
-/*
- * highest possible page
- */
-extern unsigned long long max_possible_pfn;
-
-extern unsigned long memblock_free_all(void);
-extern void reset_node_managed_pages(pg_data_t *pgdat);
-extern void reset_all_zones_managed_pages(void);
-
-/* We are using top down, so it is safe to use 0 here */
-#define BOOTMEM_LOW_LIMIT 0
-
-#ifndef ARCH_LOW_ADDRESS_LIMIT
-#define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
-#endif
-
-/* FIXME: use MEMBLOCK_ALLOC_* variants here */
-#define BOOTMEM_ALLOC_ACCESSIBLE	0
-#define BOOTMEM_ALLOC_ANYWHERE		(~(phys_addr_t)0)
-
-/* FIXME: Move to memblock.h at a point where we remove nobootmem.c */
-void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
-				      phys_addr_t min_addr,
-				      phys_addr_t max_addr, int nid);
-void *memblock_alloc_try_nid_nopanic(phys_addr_t size,
-		phys_addr_t align, phys_addr_t min_addr,
-		phys_addr_t max_addr, int nid);
-void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
-		phys_addr_t min_addr, phys_addr_t max_addr, int nid);
-void __memblock_free_early(phys_addr_t base, phys_addr_t size);
-void __memblock_free_late(phys_addr_t base, phys_addr_t size);
-
-static inline void * __init memblock_alloc(
-					phys_addr_t size,  phys_addr_t align)
-{
-	return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
-					    BOOTMEM_ALLOC_ACCESSIBLE,
-					    NUMA_NO_NODE);
-}
-
-static inline void * __init memblock_alloc_raw(
-					phys_addr_t size,  phys_addr_t align)
-{
-	return memblock_alloc_try_nid_raw(size, align, BOOTMEM_LOW_LIMIT,
-					    BOOTMEM_ALLOC_ACCESSIBLE,
-					    NUMA_NO_NODE);
-}
-
-static inline void * __init memblock_alloc_from(
-		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
-{
-	return memblock_alloc_try_nid(size, align, min_addr,
-				      BOOTMEM_ALLOC_ACCESSIBLE,
-				      NUMA_NO_NODE);
-}
-
-static inline void * __init memblock_alloc_nopanic(
-					phys_addr_t size, phys_addr_t align)
-{
-	return memblock_alloc_try_nid_nopanic(size, align,
-						    BOOTMEM_LOW_LIMIT,
-						    BOOTMEM_ALLOC_ACCESSIBLE,
-						    NUMA_NO_NODE);
-}
-
-static inline void * __init memblock_alloc_low(
-					phys_addr_t size, phys_addr_t align)
-{
-	return memblock_alloc_try_nid(size, align,
-						   BOOTMEM_LOW_LIMIT,
-						   ARCH_LOW_ADDRESS_LIMIT,
-						   NUMA_NO_NODE);
-}
-static inline void * __init memblock_alloc_low_nopanic(
-					phys_addr_t size, phys_addr_t align)
-{
-	return memblock_alloc_try_nid_nopanic(size, align,
-						   BOOTMEM_LOW_LIMIT,
-						   ARCH_LOW_ADDRESS_LIMIT,
-						   NUMA_NO_NODE);
-}
-
-static inline void * __init memblock_alloc_from_nopanic(
-		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
-{
-	return memblock_alloc_try_nid_nopanic(size, align, min_addr,
-						    BOOTMEM_ALLOC_ACCESSIBLE,
-						    NUMA_NO_NODE);
-}
-
-static inline void * __init memblock_alloc_node(
-		phys_addr_t size, phys_addr_t align, int nid)
-{
-	return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
-					    BOOTMEM_ALLOC_ACCESSIBLE, nid);
-}
-
-static inline void * __init memblock_alloc_node_nopanic(
-						phys_addr_t size, int nid)
-{
-	return memblock_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT,
-						    BOOTMEM_ALLOC_ACCESSIBLE,
-						    nid);
-}
-
-static inline void __init memblock_free_early(
-					phys_addr_t base, phys_addr_t size)
-{
-	__memblock_free_early(base, size);
-}
-
-static inline void __init memblock_free_early_nid(
-				phys_addr_t base, phys_addr_t size, int nid)
-{
-	__memblock_free_early(base, size);
-}
-
-static inline void __init memblock_free_late(
-					phys_addr_t base, phys_addr_t size)
-{
-	__memblock_free_late(base, size);
-}
-
-extern void *alloc_large_system_hash(const char *tablename,
-				     unsigned long bucketsize,
-				     unsigned long numentries,
-				     int scale,
-				     int flags,
-				     unsigned int *_hash_shift,
-				     unsigned int *_hash_mask,
-				     unsigned long low_limit,
-				     unsigned long high_limit);
-
-#define HASH_EARLY	0x00000001	/* Allocating during early boot? */
-#define HASH_SMALL	0x00000002	/* sub-page allocation allowed, min
-					 * shift passed via *_hash_shift */
-#define HASH_ZERO	0x00000004	/* Zero allocated hash table */
-
-/* Only NUMA needs hash distribution. 64bit NUMA architectures have
- * sufficient vmalloc space.
- */
-#ifdef CONFIG_NUMA
-#define HASHDIST_DEFAULT IS_ENABLED(CONFIG_64BIT)
-extern int hashdist;		/* Distribute hashes across NUMA nodes? */
-#else
-#define hashdist (0)
-#endif
-
-
-#endif /* _LINUX_BOOTMEM_H */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 9d46a7204975..1b4d85879cbe 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -15,6 +15,19 @@
 
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <asm/dma.h>
+
+extern unsigned long max_low_pfn;
+extern unsigned long min_low_pfn;
+
+/*
+ * highest page
+ */
+extern unsigned long max_pfn;
+/*
+ * highest possible page
+ */
+extern unsigned long long max_possible_pfn;
 
 #define INIT_MEMBLOCK_REGIONS	128
 #define INIT_PHYSMEM_REGIONS	4
@@ -119,6 +132,10 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
 enum memblock_flags choose_memblock_flags(void);
 
+unsigned long memblock_free_all(void);
+void reset_node_managed_pages(pg_data_t *pgdat);
+void reset_all_zones_managed_pages(void);
+
 /* Low level functions */
 int memblock_add_range(struct memblock_type *type,
 		       phys_addr_t base, phys_addr_t size,
@@ -300,11 +317,116 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
 }
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
+/* Flags for memblock allocation APIs */
+#define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
+#define MEMBLOCK_ALLOC_ACCESSIBLE	0
+
+/* We are using top down, so it is safe to use 0 here */
+#define MEMBLOCK_LOW_LIMIT 0
+
+#ifndef ARCH_LOW_ADDRESS_LIMIT
+#define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
+#endif
+
 phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
 phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
 
 phys_addr_t memblock_phys_alloc(phys_addr_t size, phys_addr_t align);
 
+void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
+				 phys_addr_t min_addr, phys_addr_t max_addr,
+				 int nid);
+void *memblock_alloc_try_nid_nopanic(phys_addr_t size, phys_addr_t align,
+				     phys_addr_t min_addr, phys_addr_t max_addr,
+				     int nid);
+void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
+			     phys_addr_t min_addr, phys_addr_t max_addr,
+			     int nid);
+
+static inline void * __init memblock_alloc(phys_addr_t size,  phys_addr_t align)
+{
+	return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
+				      MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_alloc_raw(phys_addr_t size,
+					       phys_addr_t align)
+{
+	return memblock_alloc_try_nid_raw(size, align, MEMBLOCK_LOW_LIMIT,
+					  MEMBLOCK_ALLOC_ACCESSIBLE,
+					  NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_alloc_from(phys_addr_t size,
+						phys_addr_t align,
+						phys_addr_t min_addr)
+{
+	return memblock_alloc_try_nid(size, align, min_addr,
+				      MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_alloc_nopanic(phys_addr_t size,
+						   phys_addr_t align)
+{
+	return memblock_alloc_try_nid_nopanic(size, align, MEMBLOCK_LOW_LIMIT,
+					      MEMBLOCK_ALLOC_ACCESSIBLE,
+					      NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_alloc_low(phys_addr_t size,
+					       phys_addr_t align)
+{
+	return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
+				      ARCH_LOW_ADDRESS_LIMIT, NUMA_NO_NODE);
+}
+static inline void * __init memblock_alloc_low_nopanic(phys_addr_t size,
+						       phys_addr_t align)
+{
+	return memblock_alloc_try_nid_nopanic(size, align, MEMBLOCK_LOW_LIMIT,
+					      ARCH_LOW_ADDRESS_LIMIT,
+					      NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_alloc_from_nopanic(phys_addr_t size,
+							phys_addr_t align,
+							phys_addr_t min_addr)
+{
+	return memblock_alloc_try_nid_nopanic(size, align, min_addr,
+					      MEMBLOCK_ALLOC_ACCESSIBLE,
+					      NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_alloc_node(phys_addr_t size,
+						phys_addr_t align, int nid)
+{
+	return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
+				      MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+}
+
+static inline void * __init memblock_alloc_node_nopanic(phys_addr_t size,
+							int nid)
+{
+	return memblock_alloc_try_nid_nopanic(size, 0, MEMBLOCK_LOW_LIMIT,
+					      MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+}
+
+static inline void __init memblock_free_early(phys_addr_t base,
+					      phys_addr_t size)
+{
+	__memblock_free_early(base, size);
+}
+
+static inline void __init memblock_free_early_nid(phys_addr_t base,
+						  phys_addr_t size, int nid)
+{
+	__memblock_free_early(base, size);
+}
+
+static inline void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
+{
+	__memblock_free_late(base, size);
+}
+
 /*
  * Set the allocation direction to bottom-up or top-down.
  */
@@ -323,10 +445,6 @@ static inline bool memblock_bottom_up(void)
 	return memblock.bottom_up;
 }
 
-/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
-#define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
-#define MEMBLOCK_ALLOC_ACCESSIBLE	0
-
 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
 					phys_addr_t start, phys_addr_t end,
 					enum memblock_flags flags);
@@ -432,6 +550,31 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
 	     i < memblock_type->cnt;					\
 	     i++, rgn = &memblock_type->regions[i])
 
+extern void *alloc_large_system_hash(const char *tablename,
+				     unsigned long bucketsize,
+				     unsigned long numentries,
+				     int scale,
+				     int flags,
+				     unsigned int *_hash_shift,
+				     unsigned int *_hash_mask,
+				     unsigned long low_limit,
+				     unsigned long high_limit);
+
+#define HASH_EARLY	0x00000001	/* Allocating during early boot? */
+#define HASH_SMALL	0x00000002	/* sub-page allocation allowed, min
+					 * shift passed via *_hash_shift */
+#define HASH_ZERO	0x00000004	/* Zero allocated hash table */
+
+/* Only NUMA needs hash distribution. 64bit NUMA architectures have
+ * sufficient vmalloc space.
+ */
+#ifdef CONFIG_NUMA
+#define HASHDIST_DEFAULT IS_ENABLED(CONFIG_64BIT)
+extern int hashdist;		/* Distribute hashes across NUMA nodes? */
+#else
+#define hashdist (0)
+#endif
+
 #ifdef CONFIG_MEMTEST
 extern void early_memtest(phys_addr_t start, phys_addr_t end);
 #else
diff --git a/init/main.c b/init/main.c
index 8cef69c61389..51b8e7b8ae5b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -25,7 +25,7 @@
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/initrd.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/acpi.h>
 #include <linux/console.h>
 #include <linux/nmi.h>
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index f14c376937e5..22a12ab5a5e9 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -4,7 +4,7 @@
  *
  * DMA operations that map physical memory directly without using an IOMMU.
  */
-#include <linux/bootmem.h> /* for max_pfn */
+#include <linux/memblock.h> /* for max_pfn */
 #include <linux/export.h>
 #include <linux/mm.h>
 #include <linux/dma-direct.h>
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 801da67e957b..5731daa09a32 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -40,7 +40,7 @@
 #include <asm/dma.h>
 
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/iommu-helper.h>
 
 #define CREATE_TRACE_POINTS
diff --git a/kernel/futex.c b/kernel/futex.c
index 3e2de8fc1891..f423f9b6577e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -65,7 +65,7 @@
 #include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/freezer.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/fault-inject.h>
 
 #include <asm/futex.h>
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 0130e488ebfe..8f36c27c1794 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -4,7 +4,7 @@
 #endif
 
 #include <linux/hash.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/debug_locks.h>
 
 /*
diff --git a/kernel/pid.c b/kernel/pid.c
index cdf63e53a014..b2f6c506035d 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -31,7 +31,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/rculist.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/hash.h>
 #include <linux/pid_namespace.h>
 #include <linux/init_task.h>
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 34116a6097be..3c9e365438ad 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -23,7 +23,7 @@
 #include <linux/pm.h>
 #include <linux/device.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/nmi.h>
 #include <linux/syscalls.h>
 #include <linux/console.h>
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 429e4a3833ca..1b2a029360b7 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -31,7 +31,6 @@
 #include <linux/delay.h>
 #include <linux/smp.h>
 #include <linux/security.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/syscalls.h>
 #include <linux/crash_core.h>
diff --git a/kernel/profile.c b/kernel/profile.c
index 9aa2a4445b0d..9c08a2c7cb1d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -16,7 +16,7 @@
 
 #include <linux/export.h>
 #include <linux/profile.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/notifier.h>
 #include <linux/mm.h>
 #include <linux/cpumask.h>
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 1405cb22e6bc..75b5e7672c4c 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -4,7 +4,7 @@
 #include <linux/bitops.h>
 #include <linux/cpumask.h>
 #include <linux/export.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 /**
  * cpumask_next - get the next cpu in a cpumask
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e35d99844612..c007fb5fb8d5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -15,7 +15,6 @@
 #include <linux/compiler.h>
 #include <linux/cpuset.h>
 #include <linux/mutex.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/sysfs.h>
 #include <linux/slab.h>
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 785a9707786b..c7550eb65922 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -10,11 +10,10 @@
  *
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/kasan.h>
 #include <linux/kernel.h>
-#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/pfn.h>
 #include <linux/slab.h>
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 4f7e4b5a2f08..877de4fa0720 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -92,7 +92,7 @@
 #include <linux/stacktrace.h>
 #include <linux/cache.h>
 #include <linux/percpu.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pfn.h>
 #include <linux/mmzone.h>
 #include <linux/slab.h>
diff --git a/mm/memblock.c b/mm/memblock.c
index 2ed73245b5da..c655342569f8 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,7 +20,6 @@
 #include <linux/kmemleak.h>
 #include <linux/seq_file.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 
 #include <asm/sections.h>
 #include <linux/io.h>
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 7e6509a53d79..41e326472ef9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -33,7 +33,6 @@
 #include <linux/stop_machine.h>
 #include <linux/hugetlb.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/compaction.h>
 
 #include <asm/tlbflush.h>
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5f3291601945..ef289fadec0e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -20,7 +20,6 @@
 #include <linux/interrupt.h>
 #include <linux/pagemap.h>
 #include <linux/jiffies.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 5323c2ade686..ae44f7adbe07 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/mm.h>
 #include <linux/mmzone.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/page_ext.h>
 #include <linux/memory.h>
 #include <linux/vmalloc.h>
diff --git a/mm/page_idle.c b/mm/page_idle.c
index 6302bc62c27d..b9e4b42b33ab 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/fs.h>
 #include <linux/sysfs.h>
 #include <linux/kobject.h>
diff --git a/mm/page_owner.c b/mm/page_owner.c
index d80adfe702d3..87bc0dfdb52b 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -3,7 +3,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/stacktrace.h>
 #include <linux/page_owner.h>
 #include <linux/jump_label.h>
diff --git a/mm/percpu.c b/mm/percpu.c
index 3050c1d37d37..61cdbb3b3736 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -65,7 +65,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/bitmap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/err.h>
 #include <linux/lcm.h>
 #include <linux/list.h>
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 7408cabed61a..7fec05796796 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -20,7 +20,6 @@
  */
 #include <linux/mm.h>
 #include <linux/mmzone.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/memremap.h>
 #include <linux/highmem.h>
diff --git a/mm/sparse.c b/mm/sparse.c
index b139fbc61d10..ab2ac45e0440 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -5,7 +5,6 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/mmzone.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/compiler.h>
 #include <linux/highmem.h>
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f5c9ef2586de..411dd7a90046 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -19,7 +19,7 @@
 #include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/vmalloc.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <net/addrconf.h>
 #include <net/inet_connection_sock.h>
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1834818ed07b..9e6bc4d6daa7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -262,7 +262,7 @@
 #include <linux/net.h>
 #include <linux/socket.h>
 #include <linux/random.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/cache.h>
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ca3ed931f2a9..1976fddb9e00 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -81,7 +81,7 @@
 
 #include <linux/uaccess.h>
 #include <asm/ioctls.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/types.h>
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e948db29ab53..9b277bd36d1a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -46,7 +46,7 @@
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
 #include <linux/seq_file.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index 2ad33ce1ea17..eca8d84d99bf 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -6,7 +6,7 @@
 
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/xfrm.h>
-- 
cgit v1.2.3-71-gd317


From 7e1c4e27928e5f87b9b1eaf06dc31773b2f1e7f1 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:57 -0700
Subject: memblock: stop using implicit alignment to SMP_CACHE_BYTES

When a memblock allocation APIs are called with align = 0, the alignment
is implicitly set to SMP_CACHE_BYTES.

Implicit alignment is done deep in the memblock allocator and it can
come as a surprise.  Not that such an alignment would be wrong even
when used incorrectly but it is better to be explicit for the sake of
clarity and the prinicple of the least surprise.

Replace all such uses of memblock APIs with the 'align' parameter
explicitly set to SMP_CACHE_BYTES and stop implicit alignment assignment
in the memblock internal allocation functions.

For the case when memblock APIs are used via helper functions, e.g.  like
iommu_arena_new_node() in Alpha, the helper functions were detected with
Coccinelle's help and then manually examined and updated where
appropriate.

The direct memblock APIs users were updated using the semantic patch below:

@@
expression size, min_addr, max_addr, nid;
@@
(
|
- memblock_alloc_try_nid_raw(size, 0, min_addr, max_addr, nid)
+ memblock_alloc_try_nid_raw(size, SMP_CACHE_BYTES, min_addr, max_addr,
nid)
|
- memblock_alloc_try_nid_nopanic(size, 0, min_addr, max_addr, nid)
+ memblock_alloc_try_nid_nopanic(size, SMP_CACHE_BYTES, min_addr, max_addr,
nid)
|
- memblock_alloc_try_nid(size, 0, min_addr, max_addr, nid)
+ memblock_alloc_try_nid(size, SMP_CACHE_BYTES, min_addr, max_addr, nid)
|
- memblock_alloc(size, 0)
+ memblock_alloc(size, SMP_CACHE_BYTES)
|
- memblock_alloc_raw(size, 0)
+ memblock_alloc_raw(size, SMP_CACHE_BYTES)
|
- memblock_alloc_from(size, 0, min_addr)
+ memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr)
|
- memblock_alloc_nopanic(size, 0)
+ memblock_alloc_nopanic(size, SMP_CACHE_BYTES)
|
- memblock_alloc_low(size, 0)
+ memblock_alloc_low(size, SMP_CACHE_BYTES)
|
- memblock_alloc_low_nopanic(size, 0)
+ memblock_alloc_low_nopanic(size, SMP_CACHE_BYTES)
|
- memblock_alloc_from_nopanic(size, 0, min_addr)
+ memblock_alloc_from_nopanic(size, SMP_CACHE_BYTES, min_addr)
|
- memblock_alloc_node(size, 0, nid)
+ memblock_alloc_node(size, SMP_CACHE_BYTES, nid)
)

[mhocko@suse.com: changelog update]
[akpm@linux-foundation.org: coding-style fixes]
[rppt@linux.ibm.com: fix missed uses of implicit alignment]
  Link: http://lkml.kernel.org/r/20181016133656.GA10925@rapoport-lnx
Link: http://lkml.kernel.org/r/1538687224-17535-1-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Suggested-by: Michal Hocko <mhocko@suse.com>
Acked-by: Paul Burton <paul.burton@mips.com>	[MIPS]
Acked-by: Michael Ellerman <mpe@ellerman.id.au>	[powerpc]
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/core_apecs.c            |  3 ++-
 arch/alpha/kernel/core_lca.c              |  3 ++-
 arch/alpha/kernel/core_marvel.c           |  4 ++--
 arch/alpha/kernel/core_mcpcia.c           |  6 +++--
 arch/alpha/kernel/core_t2.c               |  2 +-
 arch/alpha/kernel/core_titan.c            |  6 +++--
 arch/alpha/kernel/core_tsunami.c          |  6 +++--
 arch/alpha/kernel/core_wildfire.c         |  6 +++--
 arch/alpha/kernel/pci-noop.c              |  4 ++--
 arch/alpha/kernel/pci.c                   |  4 ++--
 arch/alpha/kernel/pci_iommu.c             |  4 ++--
 arch/arm/kernel/setup.c                   |  4 ++--
 arch/arm/mach-omap2/omap_hwmod.c          |  8 ++++---
 arch/arm64/kernel/setup.c                 |  2 +-
 arch/ia64/kernel/mca.c                    |  4 ++--
 arch/ia64/mm/tlb.c                        |  6 +++--
 arch/ia64/sn/kernel/io_common.c           |  4 +++-
 arch/ia64/sn/kernel/setup.c               |  5 ++--
 arch/m68k/sun3/sun3dvma.c                 |  2 +-
 arch/microblaze/mm/init.c                 |  2 +-
 arch/mips/kernel/setup.c                  |  2 +-
 arch/powerpc/kernel/paca.c                |  2 +-
 arch/powerpc/kernel/pci_32.c              |  3 ++-
 arch/powerpc/lib/alloc.c                  |  2 +-
 arch/powerpc/mm/mmu_context_nohash.c      |  7 +++---
 arch/powerpc/platforms/powermac/nvram.c   |  2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  6 ++---
 arch/powerpc/sysdev/msi_bitmap.c          |  2 +-
 arch/um/drivers/net_kern.c                |  2 +-
 arch/um/drivers/vector_kern.c             |  2 +-
 arch/um/kernel/initrd.c                   |  2 +-
 arch/unicore32/kernel/setup.c             |  2 +-
 arch/x86/kernel/acpi/boot.c               |  2 +-
 arch/x86/kernel/apic/io_apic.c            |  2 +-
 arch/x86/kernel/e820.c                    |  3 ++-
 arch/x86/platform/olpc/olpc_dt.c          |  2 +-
 arch/xtensa/platforms/iss/network.c       |  2 +-
 drivers/clk/ti/clk.c                      |  2 +-
 drivers/firmware/efi/memmap.c             |  2 +-
 drivers/firmware/memmap.c                 |  3 ++-
 drivers/macintosh/smu.c                   |  2 +-
 drivers/of/of_reserved_mem.c              |  1 +
 include/linux/memblock.h                  |  3 ++-
 init/main.c                               | 13 +++++++----
 kernel/power/snapshot.c                   |  3 ++-
 lib/cpumask.c                             |  2 +-
 mm/memblock.c                             |  8 -------
 mm/page_alloc.c                           |  6 +++--
 mm/percpu.c                               | 38 ++++++++++++++++---------------
 mm/sparse.c                               |  3 ++-
 50 files changed, 120 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/core_apecs.c b/arch/alpha/kernel/core_apecs.c
index 1bf3eef34c22..6df765ff2b10 100644
--- a/arch/alpha/kernel/core_apecs.c
+++ b/arch/alpha/kernel/core_apecs.c
@@ -346,7 +346,8 @@ apecs_init_arch(void)
 	 * Window 1 is direct access 1GB at 1GB
 	 * Window 2 is scatter-gather 8MB at 8MB (for isa)
 	 */
-	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
 	hose->sg_pci = NULL;
 	__direct_map_base = 0x40000000;
 	__direct_map_size = 0x40000000;
diff --git a/arch/alpha/kernel/core_lca.c b/arch/alpha/kernel/core_lca.c
index 81c0c43635b0..57e0750419f2 100644
--- a/arch/alpha/kernel/core_lca.c
+++ b/arch/alpha/kernel/core_lca.c
@@ -275,7 +275,8 @@ lca_init_arch(void)
 	 * Note that we do not try to save any of the DMA window CSRs
 	 * before setting them, since we cannot read those CSRs on LCA.
 	 */
-	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
 	hose->sg_pci = NULL;
 	__direct_map_base = 0x40000000;
 	__direct_map_size = 0x40000000;
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index 8a568c4d8e81..c1d0c18c71ca 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -82,7 +82,7 @@ mk_resource_name(int pe, int port, char *str)
 	char *name;
 	
 	sprintf(tmp, "PCI %s PE %d PORT %d", str, pe, port);
-	name = memblock_alloc(strlen(tmp) + 1, 0);
+	name = memblock_alloc(strlen(tmp) + 1, SMP_CACHE_BYTES);
 	strcpy(name, tmp);
 
 	return name;
@@ -117,7 +117,7 @@ alloc_io7(unsigned int pe)
 		return NULL;
 	}
 
-	io7 = memblock_alloc(sizeof(*io7), 0);
+	io7 = memblock_alloc(sizeof(*io7), SMP_CACHE_BYTES);
 	io7->pe = pe;
 	raw_spin_lock_init(&io7->irq_lock);
 
diff --git a/arch/alpha/kernel/core_mcpcia.c b/arch/alpha/kernel/core_mcpcia.c
index b1549db54260..74b1d018124c 100644
--- a/arch/alpha/kernel/core_mcpcia.c
+++ b/arch/alpha/kernel/core_mcpcia.c
@@ -364,9 +364,11 @@ mcpcia_startup_hose(struct pci_controller *hose)
 	 * Window 1 is scatter-gather (up to) 1GB at 1GB (for pci)
 	 * Window 2 is direct access 2GB at 2GB
 	 */
-	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
 	hose->sg_pci = iommu_arena_new(hose, 0x40000000,
-				       size_for_memory(0x40000000), 0);
+				       size_for_memory(0x40000000),
+				       SMP_CACHE_BYTES);
 
 	__direct_map_base = 0x80000000;
 	__direct_map_size = 0x80000000;
diff --git a/arch/alpha/kernel/core_t2.c b/arch/alpha/kernel/core_t2.c
index 2c00b61ca379..98d5b6ff8a76 100644
--- a/arch/alpha/kernel/core_t2.c
+++ b/arch/alpha/kernel/core_t2.c
@@ -351,7 +351,7 @@ t2_sg_map_window2(struct pci_controller *hose,
 
 	/* Note we can only do 1 SG window, as the other is for direct, so
 	   do an ISA SG area, especially for the floppy. */
-	hose->sg_isa = iommu_arena_new(hose, base, length, 0);
+	hose->sg_isa = iommu_arena_new(hose, base, length, SMP_CACHE_BYTES);
 	hose->sg_pci = NULL;
 
 	temp = (base & 0xfff00000UL) | ((base + length - 1) >> 20);
diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c
index 97551597581b..2a2820fb1be6 100644
--- a/arch/alpha/kernel/core_titan.c
+++ b/arch/alpha/kernel/core_titan.c
@@ -316,10 +316,12 @@ titan_init_one_pachip_port(titan_pachip_port *port, int index)
 	 * Window 1 is direct access 1GB at 2GB
 	 * Window 2 is scatter-gather 1GB at 3GB
 	 */
-	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
 	hose->sg_isa->align_entry = 8; /* 64KB for ISA */
 
-	hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x40000000, 0);
+	hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x40000000,
+				       SMP_CACHE_BYTES);
 	hose->sg_pci->align_entry = 4; /* Titan caches 4 PTEs at a time */
 
 	port->wsba[0].csr = hose->sg_isa->dma_base | 3;
diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c
index f334b8928d72..fc1ab73f23de 100644
--- a/arch/alpha/kernel/core_tsunami.c
+++ b/arch/alpha/kernel/core_tsunami.c
@@ -319,12 +319,14 @@ tsunami_init_one_pchip(tsunami_pchip *pchip, int index)
 	 * NOTE: we need the align_entry settings for Acer devices on ES40,
 	 * specifically floppy and IDE when memory is larger than 2GB.
 	 */
-	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
 	/* Initially set for 4 PTEs, but will be overridden to 64K for ISA. */
         hose->sg_isa->align_entry = 4;
 
 	hose->sg_pci = iommu_arena_new(hose, 0x40000000,
-				       size_for_memory(0x40000000), 0);
+				       size_for_memory(0x40000000),
+				       SMP_CACHE_BYTES);
         hose->sg_pci->align_entry = 4; /* Tsunami caches 4 PTEs at a time */
 
 	__direct_map_base = 0x80000000;
diff --git a/arch/alpha/kernel/core_wildfire.c b/arch/alpha/kernel/core_wildfire.c
index cad36fc6ed7d..353c03d15442 100644
--- a/arch/alpha/kernel/core_wildfire.c
+++ b/arch/alpha/kernel/core_wildfire.c
@@ -111,8 +111,10 @@ wildfire_init_hose(int qbbno, int hoseno)
          * ??? We ought to scale window 3 memory.
          *
          */
-        hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
-        hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x08000000, 0);
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
+	hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x08000000,
+				       SMP_CACHE_BYTES);
 
 	pci = WILDFIRE_pci(qbbno, hoseno);
 
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index a9378ee0c2f1..091cff3c68fd 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -33,7 +33,7 @@ alloc_pci_controller(void)
 {
 	struct pci_controller *hose;
 
-	hose = memblock_alloc(sizeof(*hose), 0);
+	hose = memblock_alloc(sizeof(*hose), SMP_CACHE_BYTES);
 
 	*hose_tail = hose;
 	hose_tail = &hose->next;
@@ -44,7 +44,7 @@ alloc_pci_controller(void)
 struct resource * __init
 alloc_resource(void)
 {
-	return memblock_alloc(sizeof(struct resource), 0);
+	return memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
 }
 
 SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, bus,
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 13937e72d875..97098127df83 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -392,7 +392,7 @@ alloc_pci_controller(void)
 {
 	struct pci_controller *hose;
 
-	hose = memblock_alloc(sizeof(*hose), 0);
+	hose = memblock_alloc(sizeof(*hose), SMP_CACHE_BYTES);
 
 	*hose_tail = hose;
 	hose_tail = &hose->next;
@@ -403,7 +403,7 @@ alloc_pci_controller(void)
 struct resource * __init
 alloc_resource(void)
 {
-	return memblock_alloc(sizeof(struct resource), 0);
+	return memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
 }
 
 
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 82cf950bda2a..46e08e0d9181 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -79,7 +79,7 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
 		printk("%s: couldn't allocate arena from node %d\n"
 		       "    falling back to system-wide allocation\n",
 		       __func__, nid);
-		arena = memblock_alloc(sizeof(*arena), 0);
+		arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES);
 	}
 
 	arena->ptes = memblock_alloc_node(sizeof(*arena), align, nid);
@@ -92,7 +92,7 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
 
 #else /* CONFIG_DISCONTIGMEM */
 
-	arena = memblock_alloc(sizeof(*arena), 0);
+	arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES);
 	arena->ptes = memblock_alloc_from(mem_size, align, 0);
 
 #endif /* CONFIG_DISCONTIGMEM */
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 840a4adc69fc..ac7e08886863 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -856,7 +856,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
 		 */
 		boot_alias_start = phys_to_idmap(start);
 		if (arm_has_idmap_alias() && boot_alias_start != IDMAP_INVALID_ADDR) {
-			res = memblock_alloc(sizeof(*res), 0);
+			res = memblock_alloc(sizeof(*res), SMP_CACHE_BYTES);
 			res->name = "System RAM (boot alias)";
 			res->start = boot_alias_start;
 			res->end = phys_to_idmap(end);
@@ -864,7 +864,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
 			request_resource(&iomem_resource, res);
 		}
 
-		res = memblock_alloc(sizeof(*res), 0);
+		res = memblock_alloc(sizeof(*res), SMP_CACHE_BYTES);
 		res->name  = "System RAM";
 		res->start = start;
 		res->end = end;
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index cd5732ab0cdf..083dcd9942ce 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -726,7 +726,7 @@ static int __init _setup_clkctrl_provider(struct device_node *np)
 	u64 size;
 	int i;
 
-	provider = memblock_alloc(sizeof(*provider), 0);
+	provider = memblock_alloc(sizeof(*provider), SMP_CACHE_BYTES);
 	if (!provider)
 		return -ENOMEM;
 
@@ -736,12 +736,14 @@ static int __init _setup_clkctrl_provider(struct device_node *np)
 		of_property_count_elems_of_size(np, "reg", sizeof(u32)) / 2;
 
 	provider->addr =
-		memblock_alloc(sizeof(void *) * provider->num_addrs, 0);
+		memblock_alloc(sizeof(void *) * provider->num_addrs,
+			       SMP_CACHE_BYTES);
 	if (!provider->addr)
 		return -ENOMEM;
 
 	provider->size =
-		memblock_alloc(sizeof(u32) * provider->num_addrs, 0);
+		memblock_alloc(sizeof(u32) * provider->num_addrs,
+			       SMP_CACHE_BYTES);
 	if (!provider->size)
 		return -ENOMEM;
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 7ce7306f1d75..953e316521fc 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -218,7 +218,7 @@ static void __init request_standard_resources(void)
 	num_standard_resources = memblock.memory.cnt;
 	standard_resources = memblock_alloc_low(num_standard_resources *
 					        sizeof(*standard_resources),
-					        0);
+					        SMP_CACHE_BYTES);
 
 	for_each_memblock(memory, region) {
 		res = &standard_resources[i++];
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 9a6603f8e409..91bd1e129379 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -361,9 +361,9 @@ static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
 
 #define IA64_LOG_ALLOCATE(it, size) \
 	{ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \
-		(ia64_err_rec_t *)memblock_alloc(size, 0); \
+		(ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES); \
 	ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \
-		(ia64_err_rec_t *)memblock_alloc(size, 0);}
+		(ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES);}
 #define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock)
 #define IA64_LOG_LOCK(it)      spin_lock_irqsave(&ia64_state_log[it].isl_lock, s)
 #define IA64_LOG_UNLOCK(it)    spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s)
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index ab545daff7c3..9340bcb4f29c 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -59,8 +59,10 @@ struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
 void __init
 mmu_context_init (void)
 {
-	ia64_ctx.bitmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, 0);
-	ia64_ctx.flushmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, 0);
+	ia64_ctx.bitmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3,
+					 SMP_CACHE_BYTES);
+	ia64_ctx.flushmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3,
+					   SMP_CACHE_BYTES);
 }
 
 /*
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index 98f55220c67d..8df13d0d96fa 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -391,7 +391,9 @@ void __init hubdev_init_node(nodepda_t * npda, cnodeid_t node)
 	if (node >= num_online_nodes())	/* Headless/memless IO nodes */
 		node = 0;
 
-	hubdev_info = (struct hubdev_info *)memblock_alloc_node(size, 0, node);
+	hubdev_info = (struct hubdev_info *)memblock_alloc_node(size,
+								SMP_CACHE_BYTES,
+								node);
 
 	npda->pdinfo = (void *)hubdev_info;
 }
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 71ad6b0ccab4..a6d40a2c5bff 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -511,7 +511,8 @@ static void __init sn_init_pdas(char **cmdline_p)
 	 */
 	for_each_online_node(cnode) {
 		nodepdaindr[cnode] =
-		    memblock_alloc_node(sizeof(nodepda_t), 0, cnode);
+		    memblock_alloc_node(sizeof(nodepda_t), SMP_CACHE_BYTES,
+					cnode);
 		memset(nodepdaindr[cnode]->phys_cpuid, -1,
 		    sizeof(nodepdaindr[cnode]->phys_cpuid));
 		spin_lock_init(&nodepdaindr[cnode]->ptc_lock);
@@ -522,7 +523,7 @@ static void __init sn_init_pdas(char **cmdline_p)
 	 */
 	for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++)
 		nodepdaindr[cnode] =
-		    memblock_alloc_node(sizeof(nodepda_t), 0, 0);
+		    memblock_alloc_node(sizeof(nodepda_t), SMP_CACHE_BYTES, 0);
 
 	/*
 	 * Now copy the array of nodepda pointers to each nodepda.
diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c
index 8be8b750c629..4d64711d3d47 100644
--- a/arch/m68k/sun3/sun3dvma.c
+++ b/arch/m68k/sun3/sun3dvma.c
@@ -268,7 +268,7 @@ void __init dvma_init(void)
 	list_add(&(hole->list), &hole_list);
 
 	iommu_use = memblock_alloc(IOMMU_TOTAL_ENTRIES * sizeof(unsigned long),
-				   0);
+				   SMP_CACHE_BYTES);
 
 	dvma_unmap_iommu(DVMA_START, DVMA_SIZE);
 
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 8c14988f52f2..b17fd8aafd64 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -376,7 +376,7 @@ void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 	if (mem_init_done)
 		p = kzalloc(size, mask);
 	else {
-		p = memblock_alloc(size, 0);
+		p = memblock_alloc(size, SMP_CACHE_BYTES);
 		if (p)
 			memset(p, 0, size);
 	}
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 41c1683761bb..ea09ed6a80a9 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -916,7 +916,7 @@ static void __init resource_init(void)
 		if (end >= HIGHMEM_START)
 			end = HIGHMEM_START - 1;
 
-		res = memblock_alloc(sizeof(struct resource), 0);
+		res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
 
 		res->start = start;
 		res->end = end;
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index f331a0054b3a..913bfca09c4f 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -198,7 +198,7 @@ void __init allocate_paca_ptrs(void)
 	paca_nr_cpu_ids = nr_cpu_ids;
 
 	paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
-	paca_ptrs = __va(memblock_phys_alloc(paca_ptrs_size, 0));
+	paca_ptrs = __va(memblock_phys_alloc(paca_ptrs_size, SMP_CACHE_BYTES));
 	memset(paca_ptrs, 0x88, paca_ptrs_size);
 }
 
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 274bd1442dd9..d3f04f2d8249 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -203,7 +203,8 @@ pci_create_OF_bus_map(void)
 	struct property* of_prop;
 	struct device_node *dn;
 
-	of_prop = memblock_alloc(sizeof(struct property) + 256, 0);
+	of_prop = memblock_alloc(sizeof(struct property) + 256,
+				 SMP_CACHE_BYTES);
 	dn = of_find_node_by_path("/");
 	if (dn) {
 		memset(of_prop, -1, sizeof(struct property) + 256);
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index 5b61704447c1..dedf88a76f58 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -14,7 +14,7 @@ void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 	if (slab_is_available())
 		p = kzalloc(size, mask);
 	else {
-		p = memblock_alloc(size, 0);
+		p = memblock_alloc(size, SMP_CACHE_BYTES);
 	}
 	return p;
 }
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 67b9d7b669a1..2faca46ad720 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -461,10 +461,11 @@ void __init mmu_context_init(void)
 	/*
 	 * Allocate the maps used by context management
 	 */
-	context_map = memblock_alloc(CTX_MAP_SIZE, 0);
-	context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 0);
+	context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
+	context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1),
+				    SMP_CACHE_BYTES);
 #ifdef CONFIG_SMP
-	stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, 0);
+	stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
 
 	cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
 				  "powerpc/mmu/ctx:prepare",
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index f3391be7c762..ae54d7fe68f3 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -513,7 +513,7 @@ static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr)
 		printk(KERN_ERR "nvram: no address\n");
 		return -EINVAL;
 	}
-	nvram_image = memblock_alloc(NVRAM_SIZE, 0);
+	nvram_image = memblock_alloc(NVRAM_SIZE, SMP_CACHE_BYTES);
 	nvram_data = ioremap(addr, NVRAM_SIZE*2);
 	nvram_naddrs = 1; /* Make sure we get the correct case */
 
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index aba81cbf0b36..dd807446801e 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3769,7 +3769,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	phb_id = be64_to_cpup(prop64);
 	pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
 
-	phb = memblock_alloc(sizeof(*phb), 0);
+	phb = memblock_alloc(sizeof(*phb), SMP_CACHE_BYTES);
 
 	/* Allocate PCI controller */
 	phb->hose = hose = pcibios_alloc_controller(np);
@@ -3815,7 +3815,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	else
 		phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE;
 
-	phb->diag_data = memblock_alloc(phb->diag_data_size, 0);
+	phb->diag_data = memblock_alloc(phb->diag_data_size, SMP_CACHE_BYTES);
 
 	/* Parse 32-bit and IO ranges (if any) */
 	pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
@@ -3874,7 +3874,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	}
 	pemap_off = size;
 	size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
-	aux = memblock_alloc(size, 0);
+	aux = memblock_alloc(size, SMP_CACHE_BYTES);
 	phb->ioda.pe_alloc = aux;
 	phb->ioda.m64_segmap = aux + m64map_off;
 	phb->ioda.m32_segmap = aux + m32map_off;
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index 2444feda831f..d45450f6666a 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -128,7 +128,7 @@ int __ref msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count,
 	if (bmp->bitmap_from_slab)
 		bmp->bitmap = kzalloc(size, GFP_KERNEL);
 	else {
-		bmp->bitmap = memblock_alloc(size, 0);
+		bmp->bitmap = memblock_alloc(size, SMP_CACHE_BYTES);
 		/* the bitmap won't be freed from memblock allocator */
 		kmemleak_not_leak(bmp->bitmap);
 	}
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 673816880cce..624cb47cc9cd 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -650,7 +650,7 @@ static int __init eth_setup(char *str)
 		return 1;
 	}
 
-	new = memblock_alloc(sizeof(*new), 0);
+	new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES);
 
 	INIT_LIST_HEAD(&new->list);
 	new->index = n;
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 2b4dded11a7a..10d8d20eb9ec 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1580,7 +1580,7 @@ static int __init vector_setup(char *str)
 				 str, error);
 		return 1;
 	}
-	new = memblock_alloc(sizeof(*new), 0);
+	new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES);
 	INIT_LIST_HEAD(&new->list);
 	new->unit = n;
 	new->arguments = str;
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 3678f5b05e42..ce169ea87e61 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -36,7 +36,7 @@ int __init read_initrd(void)
 		return 0;
 	}
 
-	area = memblock_alloc(size, 0);
+	area = memblock_alloc(size, SMP_CACHE_BYTES);
 
 	if (load_initrd(initrd, area, size) == -1)
 		return 0;
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index b2c38b32ea57..4b0cb68c355a 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c
@@ -206,7 +206,7 @@ request_standard_resources(struct meminfo *mi)
 		if (mi->bank[i].size == 0)
 			continue;
 
-		res = memblock_alloc_low(sizeof(*res), 0);
+		res = memblock_alloc_low(sizeof(*res), SMP_CACHE_BYTES);
 		res->name  = "System RAM";
 		res->start = mi->bank[i].start;
 		res->end   = mi->bank[i].start + mi->bank[i].size - 1;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7f5d212551d4..92c76bf97ad8 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -934,7 +934,7 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
 	 */
 #define HPET_RESOURCE_NAME_SIZE 9
 	hpet_res = memblock_alloc(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE,
-				  0);
+				  SMP_CACHE_BYTES);
 
 	hpet_res->name = (void *)&hpet_res[1];
 	hpet_res->flags = IORESOURCE_MEM;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5fbc57e4b0b9..2953bbf05c08 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2578,7 +2578,7 @@ static struct resource * __init ioapic_setup_resources(void)
 	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
 	n *= nr_ioapics;
 
-	mem = memblock_alloc(n, 0);
+	mem = memblock_alloc(n, SMP_CACHE_BYTES);
 	res = (void *)mem;
 
 	mem += sizeof(struct resource) * nr_ioapics;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 68ff62bffbab..50895c2f937d 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1093,7 +1093,8 @@ void __init e820__reserve_resources(void)
 	struct resource *res;
 	u64 end;
 
-	res = memblock_alloc(sizeof(*res) * e820_table->nr_entries, 0);
+	res = memblock_alloc(sizeof(*res) * e820_table->nr_entries,
+			     SMP_CACHE_BYTES);
 	e820_res = res;
 
 	for (i = 0; i < e820_table->nr_entries; i++) {
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index 115c8e4173bb..24d2175a9480 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -141,7 +141,7 @@ void * __init prom_early_alloc(unsigned long size)
 		 * fast enough on the platforms we care about while minimizing
 		 * wasted bootmem) and hand off chunks of it to callers.
 		 */
-		res = memblock_alloc(chunk_size, 0);
+		res = memblock_alloc(chunk_size, SMP_CACHE_BYTES);
 		BUG_ON(!res);
 		prom_early_allocated += chunk_size;
 		memset(res, 0, chunk_size);
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index 190846dddc67..d052712373b6 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -646,7 +646,7 @@ static int __init iss_net_setup(char *str)
 		return 1;
 	}
 
-	new = memblock_alloc(sizeof(*new), 0);
+	new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES);
 	if (new == NULL) {
 		pr_err("Alloc_bootmem failed\n");
 		return 1;
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 5b2867a33b98..e205af814582 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -342,7 +342,7 @@ void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem)
 {
 	struct clk_iomap *io;
 
-	io = memblock_alloc(sizeof(*io), 0);
+	io = memblock_alloc(sizeof(*io), SMP_CACHE_BYTES);
 
 	io->mem = mem;
 
diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c
index ef618bceb79a..fa2904fb841f 100644
--- a/drivers/firmware/efi/memmap.c
+++ b/drivers/firmware/efi/memmap.c
@@ -15,7 +15,7 @@
 
 static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size)
 {
-	return memblock_phys_alloc(size, 0);
+	return memblock_phys_alloc(size, SMP_CACHE_BYTES);
 }
 
 static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size)
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 2a23453f005a..d168c87c7d30 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -333,7 +333,8 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type)
 {
 	struct firmware_map_entry *entry;
 
-	entry = memblock_alloc(sizeof(struct firmware_map_entry), 0);
+	entry = memblock_alloc(sizeof(struct firmware_map_entry),
+			       SMP_CACHE_BYTES);
 	if (WARN_ON(!entry))
 		return -ENOMEM;
 
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 880a81c82b7a..0a0b8e1f4236 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -492,7 +492,7 @@ int __init smu_init (void)
 		goto fail_np;
 	}
 
-	smu = memblock_alloc(sizeof(struct smu_device), 0);
+	smu = memblock_alloc(sizeof(struct smu_device), SMP_CACHE_BYTES);
 
 	spin_lock_init(&smu->lock);
 	INIT_LIST_HEAD(&smu->cmd_list);
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index d6255c276a41..1977ee0adcb1 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -36,6 +36,7 @@ int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 	 * panic()s on allocation failure.
 	 */
 	end = !end ? MEMBLOCK_ALLOC_ANYWHERE : end;
+	align = !align ? SMP_CACHE_BYTES : align;
 	base = __memblock_alloc_base(size, align, end);
 	if (!base)
 		return -ENOMEM;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 1b4d85879cbe..aee299a6aa76 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -406,7 +406,8 @@ static inline void * __init memblock_alloc_node(phys_addr_t size,
 static inline void * __init memblock_alloc_node_nopanic(phys_addr_t size,
 							int nid)
 {
-	return memblock_alloc_try_nid_nopanic(size, 0, MEMBLOCK_LOW_LIMIT,
+	return memblock_alloc_try_nid_nopanic(size, SMP_CACHE_BYTES,
+					      MEMBLOCK_LOW_LIMIT,
 					      MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 }
 
diff --git a/init/main.c b/init/main.c
index 51b8e7b8ae5b..ee147103ba1b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -375,10 +375,11 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
 static void __init setup_command_line(char *command_line)
 {
 	saved_command_line =
-		memblock_alloc(strlen(boot_command_line) + 1, 0);
+		memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES);
 	initcall_command_line =
-		memblock_alloc(strlen(boot_command_line) + 1, 0);
-	static_command_line = memblock_alloc(strlen(command_line) + 1, 0);
+		memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES);
+	static_command_line = memblock_alloc(strlen(command_line) + 1,
+					     SMP_CACHE_BYTES);
 	strcpy(saved_command_line, boot_command_line);
 	strcpy(static_command_line, command_line);
 }
@@ -773,8 +774,10 @@ static int __init initcall_blacklist(char *str)
 		str_entry = strsep(&str, ",");
 		if (str_entry) {
 			pr_debug("blacklisting initcall %s\n", str_entry);
-			entry = memblock_alloc(sizeof(*entry), 0);
-			entry->buf = memblock_alloc(strlen(str_entry) + 1, 0);
+			entry = memblock_alloc(sizeof(*entry),
+					       SMP_CACHE_BYTES);
+			entry->buf = memblock_alloc(strlen(str_entry) + 1,
+						    SMP_CACHE_BYTES);
 			strcpy(entry->buf, str_entry);
 			list_add(&entry->next, &blacklisted_initcalls);
 		}
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3c9e365438ad..b0308a2c6000 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -963,7 +963,8 @@ void __init __register_nosave_region(unsigned long start_pfn,
 		BUG_ON(!region);
 	} else {
 		/* This allocation cannot fail */
-		region = memblock_alloc(sizeof(struct nosave_region), 0);
+		region = memblock_alloc(sizeof(struct nosave_region),
+					SMP_CACHE_BYTES);
 	}
 	region->start_pfn = start_pfn;
 	region->end_pfn = end_pfn;
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 75b5e7672c4c..8d666ab84b5c 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -163,7 +163,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var);
  */
 void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
-	*mask = memblock_alloc(cpumask_size(), 0);
+	*mask = memblock_alloc(cpumask_size(), SMP_CACHE_BYTES);
 }
 
 /**
diff --git a/mm/memblock.c b/mm/memblock.c
index c655342569f8..839531133816 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1247,9 +1247,6 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
 {
 	phys_addr_t found;
 
-	if (!align)
-		align = SMP_CACHE_BYTES;
-
 	found = memblock_find_in_range_node(size, align, start, end, nid,
 					    flags);
 	if (found && !memblock_reserve(found, size)) {
@@ -1343,8 +1340,6 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
  * The allocation is performed from memory region limited by
  * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE.
  *
- * The memory block is aligned on %SMP_CACHE_BYTES if @align == 0.
- *
  * The phys address of allocated boot memory block is converted to virtual and
  * allocated memory is reset to 0.
  *
@@ -1374,9 +1369,6 @@ static void * __init memblock_alloc_internal(
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, nid);
 
-	if (!align)
-		align = SMP_CACHE_BYTES;
-
 	if (max_addr > memblock.current_limit)
 		max_addr = memblock.current_limit;
 again:
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef289fadec0e..a919ba5cb3c8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7710,9 +7710,11 @@ void *__init alloc_large_system_hash(const char *tablename,
 		size = bucketsize << log2qty;
 		if (flags & HASH_EARLY) {
 			if (flags & HASH_ZERO)
-				table = memblock_alloc_nopanic(size, 0);
+				table = memblock_alloc_nopanic(size,
+							       SMP_CACHE_BYTES);
 			else
-				table = memblock_alloc_raw(size, 0);
+				table = memblock_alloc_raw(size,
+							   SMP_CACHE_BYTES);
 		} else if (hashdist) {
 			table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
 		} else {
diff --git a/mm/percpu.c b/mm/percpu.c
index 61cdbb3b3736..a6b74c6fe0be 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1102,8 +1102,8 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
 
 	/* allocate chunk */
 	chunk = memblock_alloc(sizeof(struct pcpu_chunk) +
-				    BITS_TO_LONGS(region_size >> PAGE_SHIFT),
-				    0);
+			       BITS_TO_LONGS(region_size >> PAGE_SHIFT),
+			       SMP_CACHE_BYTES);
 
 	INIT_LIST_HEAD(&chunk->list);
 
@@ -1114,12 +1114,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
 	chunk->nr_pages = region_size >> PAGE_SHIFT;
 	region_bits = pcpu_chunk_map_bits(chunk);
 
-	chunk->alloc_map = memblock_alloc(BITS_TO_LONGS(region_bits) *
-					       sizeof(chunk->alloc_map[0]), 0);
-	chunk->bound_map = memblock_alloc(BITS_TO_LONGS(region_bits + 1) *
-					       sizeof(chunk->bound_map[0]), 0);
-	chunk->md_blocks = memblock_alloc(pcpu_chunk_nr_blocks(chunk) *
-					       sizeof(chunk->md_blocks[0]), 0);
+	chunk->alloc_map = memblock_alloc(BITS_TO_LONGS(region_bits) * sizeof(chunk->alloc_map[0]),
+					  SMP_CACHE_BYTES);
+	chunk->bound_map = memblock_alloc(BITS_TO_LONGS(region_bits + 1) * sizeof(chunk->bound_map[0]),
+					  SMP_CACHE_BYTES);
+	chunk->md_blocks = memblock_alloc(pcpu_chunk_nr_blocks(chunk) * sizeof(chunk->md_blocks[0]),
+					  SMP_CACHE_BYTES);
 	pcpu_init_md_blocks(chunk);
 
 	/* manage populated page bitmap */
@@ -2075,12 +2075,14 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
 
 	/* process group information and build config tables accordingly */
-	group_offsets = memblock_alloc(ai->nr_groups *
-					     sizeof(group_offsets[0]), 0);
-	group_sizes = memblock_alloc(ai->nr_groups *
-					   sizeof(group_sizes[0]), 0);
-	unit_map = memblock_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0);
-	unit_off = memblock_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0);
+	group_offsets = memblock_alloc(ai->nr_groups * sizeof(group_offsets[0]),
+				       SMP_CACHE_BYTES);
+	group_sizes = memblock_alloc(ai->nr_groups * sizeof(group_sizes[0]),
+				     SMP_CACHE_BYTES);
+	unit_map = memblock_alloc(nr_cpu_ids * sizeof(unit_map[0]),
+				  SMP_CACHE_BYTES);
+	unit_off = memblock_alloc(nr_cpu_ids * sizeof(unit_off[0]),
+				  SMP_CACHE_BYTES);
 
 	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
 		unit_map[cpu] = UINT_MAX;
@@ -2144,8 +2146,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	 * empty chunks.
 	 */
 	pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
-	pcpu_slot = memblock_alloc(
-			pcpu_nr_slots * sizeof(pcpu_slot[0]), 0);
+	pcpu_slot = memblock_alloc(pcpu_nr_slots * sizeof(pcpu_slot[0]),
+				   SMP_CACHE_BYTES);
 	for (i = 0; i < pcpu_nr_slots; i++)
 		INIT_LIST_HEAD(&pcpu_slot[i]);
 
@@ -2458,7 +2460,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 	size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
 	areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
 
-	areas = memblock_alloc_nopanic(areas_size, 0);
+	areas = memblock_alloc_nopanic(areas_size, SMP_CACHE_BYTES);
 	if (!areas) {
 		rc = -ENOMEM;
 		goto out_free;
@@ -2599,7 +2601,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 	/* unaligned allocations can't be freed, round up to page size */
 	pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
 			       sizeof(pages[0]));
-	pages = memblock_alloc(pages_size, 0);
+	pages = memblock_alloc(pages_size, SMP_CACHE_BYTES);
 
 	/* allocate pages */
 	j = 0;
diff --git a/mm/sparse.c b/mm/sparse.c
index ab2ac45e0440..33307fc05c4d 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -68,7 +68,8 @@ static noinline struct mem_section __ref *sparse_index_alloc(int nid)
 	if (slab_is_available())
 		section = kzalloc_node(array_size, GFP_KERNEL, nid);
 	else
-		section = memblock_alloc_node(array_size, 0, nid);
+		section = memblock_alloc_node(array_size, SMP_CACHE_BYTES,
+					      nid);
 
 	return section;
 }
-- 
cgit v1.2.3-71-gd317


From d15e59260f62bd5e0f625cf5f5240f6ffac78ab6 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 30 Oct 2018 15:10:18 -0700
Subject: mm/memory_hotplug: make remove_memory() take the device_hotplug_lock

Patch series "mm: online/offline_pages called w.o. mem_hotplug_lock", v3.

Reading through the code and studying how mem_hotplug_lock is to be used,
I noticed that there are two places where we can end up calling
device_online()/device_offline() - online_pages()/offline_pages() without
the mem_hotplug_lock.  And there are other places where we call
device_online()/device_offline() without the device_hotplug_lock.

While e.g.
	echo "online" > /sys/devices/system/memory/memory9/state
is fine, e.g.
	echo 1 > /sys/devices/system/memory/memory9/online
Will not take the mem_hotplug_lock. However the device_lock() and
device_hotplug_lock.

E.g.  via memory_probe_store(), we can end up calling
add_memory()->online_pages() without the device_hotplug_lock.  So we can
have concurrent callers in online_pages().  We e.g.  touch in
online_pages() basically unprotected zone->present_pages then.

Looks like there is a longer history to that (see Patch #2 for details),
and fixing it to work the way it was intended is not really possible.  We
would e.g.  have to take the mem_hotplug_lock in device/base/core.c, which
sounds wrong.

Summary: We had a lock inversion on mem_hotplug_lock and device_lock().
More details can be found in patch 3 and patch 6.

I propose the general rules (documentation added in patch 6):

1. add_memory/add_memory_resource() must only be called with
   device_hotplug_lock.
2. remove_memory() must only be called with device_hotplug_lock. This is
   already documented and holds for all callers.
3. device_online()/device_offline() must only be called with
   device_hotplug_lock. This is already documented and true for now in core
   code. Other callers (related to memory hotplug) have to be fixed up.
4. mem_hotplug_lock is taken inside of add_memory/remove_memory/
   online_pages/offline_pages.

To me, this looks way cleaner than what we have right now (and easier to
verify).  And looking at the documentation of remove_memory, using
lock_device_hotplug also for add_memory() feels natural.

This patch (of 6):

remove_memory() is exported right now but requires the
device_hotplug_lock, which is not exported.  So let's provide a variant
that takes the lock and only export that one.

The lock is already held in
	arch/powerpc/platforms/pseries/hotplug-memory.c
	drivers/acpi/acpi_memhotplug.c
	arch/powerpc/platforms/powernv/memtrace.c

Apart from that, there are not other users in the tree.

Link: http://lkml.kernel.org/r/20180925091457.28651-2-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Rashmica Gupta <rashmica.g@gmail.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Len Brown <lenb@kernel.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Cc: John Allen <jallen@linux.vnet.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: YASUAKI ISHIMATSU <yasu.isimatu@gmail.com>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/powernv/memtrace.c       | 2 +-
 arch/powerpc/platforms/pseries/hotplug-memory.c | 6 +++---
 drivers/acpi/acpi_memhotplug.c                  | 2 +-
 include/linux/memory_hotplug.h                  | 3 ++-
 mm/memory_hotplug.c                             | 9 ++++++++-
 5 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index a29fdf8a2e56..773623f6bfb1 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -121,7 +121,7 @@ static u64 memtrace_alloc_node(u32 nid, u64 size)
 			lock_device_hotplug();
 			end_pfn = base_pfn + nr_pages;
 			for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
-				remove_memory(nid, pfn << PAGE_SHIFT, bytes);
+				__remove_memory(nid, pfn << PAGE_SHIFT, bytes);
 			}
 			unlock_device_hotplug();
 			return base_pfn << PAGE_SHIFT;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 2b796da822c2..d79b31e7a6cf 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -300,7 +300,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
 	nid = memory_add_physaddr_to_nid(base);
 
 	for (i = 0; i < sections_per_block; i++) {
-		remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE);
+		__remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE);
 		base += MIN_MEMORY_BLOCK_SIZE;
 	}
 
@@ -389,7 +389,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 	block_sz = pseries_memory_block_size();
 	nid = memory_add_physaddr_to_nid(lmb->base_addr);
 
-	remove_memory(nid, lmb->base_addr, block_sz);
+	__remove_memory(nid, lmb->base_addr, block_sz);
 
 	/* Update memory regions for memory remove */
 	memblock_remove(lmb->base_addr, block_sz);
@@ -676,7 +676,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
 
 	rc = dlpar_online_lmb(lmb);
 	if (rc) {
-		remove_memory(nid, lmb->base_addr, block_sz);
+		__remove_memory(nid, lmb->base_addr, block_sz);
 		invalidate_lmb_associativity_index(lmb);
 	} else {
 		lmb->flags |= DRCONF_MEM_ASSIGNED;
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 6b0d3ef7309c..811148415993 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -282,7 +282,7 @@ static void acpi_memory_remove_memory(struct acpi_memory_device *mem_device)
 			nid = memory_add_physaddr_to_nid(info->start_addr);
 
 		acpi_unbind_memory_blocks(info);
-		remove_memory(nid, info->start_addr, info->length);
+		__remove_memory(nid, info->start_addr, info->length);
 		list_del(&info->list);
 		kfree(info);
 	}
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 34a28227068d..1f096852f479 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -301,6 +301,7 @@ extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
 extern void try_offline_node(int nid);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern void remove_memory(int nid, u64 start, u64 size);
+extern void __remove_memory(int nid, u64 start, u64 size);
 
 #else
 static inline bool is_mem_section_removable(unsigned long pfn,
@@ -317,6 +318,7 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 }
 
 static inline void remove_memory(int nid, u64 start, u64 size) {}
+static inline void __remove_memory(int nid, u64 start, u64 size) {}
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
 extern void __ref free_area_init_core_hotplug(int nid);
@@ -330,7 +332,6 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
 		unsigned long nr_pages, struct vmem_altmap *altmap);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
-extern void remove_memory(int nid, u64 start, u64 size);
 extern int sparse_add_one_section(struct pglist_data *pgdat,
 		unsigned long start_pfn, struct vmem_altmap *altmap);
 extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 41e326472ef9..8f38e689da25 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1806,7 +1806,7 @@ EXPORT_SYMBOL(try_offline_node);
  * and online/offline operations before this call, as required by
  * try_offline_node().
  */
-void __ref remove_memory(int nid, u64 start, u64 size)
+void __ref __remove_memory(int nid, u64 start, u64 size)
 {
 	int ret;
 
@@ -1835,5 +1835,12 @@ void __ref remove_memory(int nid, u64 start, u64 size)
 
 	mem_hotplug_done();
 }
+
+void remove_memory(int nid, u64 start, u64 size)
+{
+	lock_device_hotplug();
+	__remove_memory(nid, start, size);
+	unlock_device_hotplug();
+}
 EXPORT_SYMBOL_GPL(remove_memory);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
-- 
cgit v1.2.3-71-gd317


From 8df1d0e4a265f25dc1e7e7624ccdbcb4a6630c89 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 30 Oct 2018 15:10:24 -0700
Subject: mm/memory_hotplug: make add_memory() take the device_hotplug_lock

add_memory() currently does not take the device_hotplug_lock, however
is aleady called under the lock from
	arch/powerpc/platforms/pseries/hotplug-memory.c
	drivers/acpi/acpi_memhotplug.c
to synchronize against CPU hot-remove and similar.

In general, we should hold the device_hotplug_lock when adding memory to
synchronize against online/offline request (e.g.  from user space) - which
already resulted in lock inversions due to device_lock() and
mem_hotplug_lock - see 30467e0b3be ("mm, hotplug: fix concurrent memory
hot-add deadlock").  add_memory()/add_memory_resource() will create memory
block devices, so this really feels like the right thing to do.

Holding the device_hotplug_lock makes sure that a memory block device
can really only be accessed (e.g. via .online/.state) from user space,
once the memory has been fully added to the system.

The lock is not held yet in
	drivers/xen/balloon.c
	arch/powerpc/platforms/powernv/memtrace.c
	drivers/s390/char/sclp_cmd.c
	drivers/hv/hv_balloon.c
So, let's either use the locked variants or take the lock.

Don't export add_memory_resource(), as it once was exported to be used by
XEN, which is never built as a module.  If somebody requires it, we also
have to export a locked variant (as device_hotplug_lock is never
exported).

Link: http://lkml.kernel.org/r/20180925091457.28651-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Rashmica Gupta <rashmica.g@gmail.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Len Brown <lenb@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Cc: John Allen <jallen@linux.vnet.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: Pavel Tatashin <pavel.tatashin@microsoft.com>
Cc: YASUAKI ISHIMATSU <yasu.isimatu@gmail.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c |  2 +-
 drivers/acpi/acpi_memhotplug.c                  |  2 +-
 drivers/base/memory.c                           |  9 +++++++--
 drivers/xen/balloon.c                           |  3 +++
 include/linux/memory_hotplug.h                  |  1 +
 mm/memory_hotplug.c                             | 22 +++++++++++++++++++---
 6 files changed, 32 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index d79b31e7a6cf..2a983b5a52e1 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -668,7 +668,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
 	nid = memory_add_physaddr_to_nid(lmb->base_addr);
 
 	/* Add the memory */
-	rc = add_memory(nid, lmb->base_addr, block_sz);
+	rc = __add_memory(nid, lmb->base_addr, block_sz);
 	if (rc) {
 		invalidate_lmb_associativity_index(lmb);
 		return rc;
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 811148415993..8fe0960ea572 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -228,7 +228,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 		if (node < 0)
 			node = memory_add_physaddr_to_nid(info->start_addr);
 
-		result = add_memory(node, info->start_addr, info->length);
+		result = __add_memory(node, info->start_addr, info->length);
 
 		/*
 		 * If the memory block has been used by the kernel, add_memory()
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 817320c7c4c1..40cac122ec73 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -519,15 +519,20 @@ memory_probe_store(struct device *dev, struct device_attribute *attr,
 	if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
 		return -EINVAL;
 
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		goto out;
+
 	nid = memory_add_physaddr_to_nid(phys_addr);
-	ret = add_memory(nid, phys_addr,
-			 MIN_MEMORY_BLOCK_SIZE * sections_per_block);
+	ret = __add_memory(nid, phys_addr,
+			   MIN_MEMORY_BLOCK_SIZE * sections_per_block);
 
 	if (ret)
 		goto out;
 
 	ret = count;
 out:
+	unlock_device_hotplug();
 	return ret;
 }
 
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index a3f5cbfcd4a1..fdfc64f5acea 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -395,7 +395,10 @@ static enum bp_state reserve_additional_memory(void)
 	 * callers drop the mutex before trying again.
 	 */
 	mutex_unlock(&balloon_mutex);
+	/* add_memory_resource() requires the device_hotplug lock */
+	lock_device_hotplug();
 	rc = add_memory_resource(nid, resource, memhp_auto_online);
+	unlock_device_hotplug();
 	mutex_lock(&balloon_mutex);
 
 	if (rc) {
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 1f096852f479..ffd9cd10fcf3 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -324,6 +324,7 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {}
 extern void __ref free_area_init_core_hotplug(int nid);
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
+extern int __add_memory(int nid, u64 start, u64 size);
 extern int add_memory(int nid, u64 start, u64 size);
 extern int add_memory_resource(int nid, struct resource *resource, bool online);
 extern int arch_add_memory(int nid, u64 start, u64 size,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 8f38e689da25..39cc887bbdcc 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1068,7 +1068,12 @@ static int online_memory_block(struct memory_block *mem, void *arg)
 	return device_online(&mem->dev);
 }
 
-/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
+/*
+ * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
+ * and online/offline operations (triggered e.g. by sysfs).
+ *
+ * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG
+ */
 int __ref add_memory_resource(int nid, struct resource *res, bool online)
 {
 	u64 start, size;
@@ -1137,9 +1142,9 @@ out:
 	mem_hotplug_done();
 	return ret;
 }
-EXPORT_SYMBOL_GPL(add_memory_resource);
 
-int __ref add_memory(int nid, u64 start, u64 size)
+/* requires device_hotplug_lock, see add_memory_resource() */
+int __ref __add_memory(int nid, u64 start, u64 size)
 {
 	struct resource *res;
 	int ret;
@@ -1153,6 +1158,17 @@ int __ref add_memory(int nid, u64 start, u64 size)
 		release_memory_resource(res);
 	return ret;
 }
+
+int add_memory(int nid, u64 start, u64 size)
+{
+	int rc;
+
+	lock_device_hotplug();
+	rc = __add_memory(nid, start, size);
+	unlock_device_hotplug();
+
+	return rc;
+}
 EXPORT_SYMBOL_GPL(add_memory);
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-- 
cgit v1.2.3-71-gd317