From f5895943d91b41b0368830cdb6eaffb8eda0f4c8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Mar 2014 17:44:49 +0000
Subject: KEYS: Move the flags representing required permission to linux/key.h

Move the flags representing required permission to linux/key.h as the perm
parameter of security_key_permission() is in terms of them - and not the
permissions mask flags used in key->perm.

Whilst we're at it:

 (1) Rename them to be KEY_NEED_xxx rather than KEY_xxx to avoid collisions
     with symbols in uapi/linux/input.h.

 (2) Don't use key_perm_t for a mask of required permissions, but rather limit
     it to the permissions mask attached to the key and arguments related
     directly to that.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Dmitry Kasatkin <d.kasatkin@samsung.com>
---
 include/linux/key.h      | 11 +++++++++++
 include/linux/security.h |  6 +++---
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/key.h b/include/linux/key.h
index 80d677483e31..cd0abb8c9c33 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -309,6 +309,17 @@ static inline key_serial_t key_serial(const struct key *key)
 
 extern void key_set_timeout(struct key *, unsigned);
 
+/*
+ * The permissions required on a key that we're looking up.
+ */
+#define	KEY_NEED_VIEW	0x01	/* Require permission to view attributes */
+#define	KEY_NEED_READ	0x02	/* Require permission to read content */
+#define	KEY_NEED_WRITE	0x04	/* Require permission to update / modify */
+#define	KEY_NEED_SEARCH	0x08	/* Require permission to search (keyring) or find (key) */
+#define	KEY_NEED_LINK	0x10	/* Require permission to link */
+#define	KEY_NEED_SETATTR 0x20	/* Require permission to change attributes */
+#define	KEY_NEED_ALL	0x3f	/* All the above permissions */
+
 /**
  * key_is_instantiated - Determine if a key has been positively instantiated
  * @key: The key to check.
diff --git a/include/linux/security.h b/include/linux/security.h
index 5623a7f965b7..3a5ed0cd2751 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1707,7 +1707,7 @@ struct security_operations {
 	void (*key_free) (struct key *key);
 	int (*key_permission) (key_ref_t key_ref,
 			       const struct cred *cred,
-			       key_perm_t perm);
+			       unsigned perm);
 	int (*key_getsecurity)(struct key *key, char **_buffer);
 #endif	/* CONFIG_KEYS */
 
@@ -3026,7 +3026,7 @@ static inline int security_path_chroot(struct path *path)
 int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags);
 void security_key_free(struct key *key);
 int security_key_permission(key_ref_t key_ref,
-			    const struct cred *cred, key_perm_t perm);
+			    const struct cred *cred, unsigned perm);
 int security_key_getsecurity(struct key *key, char **_buffer);
 
 #else
@@ -3044,7 +3044,7 @@ static inline void security_key_free(struct key *key)
 
 static inline int security_key_permission(key_ref_t key_ref,
 					  const struct cred *cred,
-					  key_perm_t perm)
+					  unsigned perm)
 {
 	return 0;
 }
-- 
cgit v1.2.3-71-gd317


From 5d2e9fadf43e87e690bfbe607313bf9be47867e4 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 25 Mar 2014 10:30:47 +0200
Subject: Bluetooth: Add scan_rsp parameter to mgmt_device_found()

In preparation for being able to merge ADV_IND/ADV_SCAN_IND and SCAN_RSP
together into a single device found event add a second parameter to the
mgmt_device_found function. For now all callers pass NULL as this
parameters since we don't yet have storing of the last received
advertising report.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  5 +++--
 net/bluetooth/hci_event.c        | 10 +++++-----
 net/bluetooth/mgmt.c             | 16 +++++++++++-----
 3 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 5f8bc05694ac..a1b8eab8a47d 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1264,8 +1264,9 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192,
 				       u8 *randomizer192, u8 *hash256,
 				       u8 *randomizer256, u8 status);
 void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
-		       u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name,
-		       u8 ssp, u8 *eir, u16 eir_len);
+		       u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, u8
+		       ssp, u8 *eir, u16 eir_len, u8 *scan_rsp,
+		       u8 scan_rsp_len);
 void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		      u8 addr_type, s8 rssi, u8 *name, u8 name_len);
 void mgmt_discovering(struct hci_dev *hdev, u8 discovering);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 2388f2c09887..4a2c919d5908 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1827,7 +1827,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		name_known = hci_inquiry_cache_update(hdev, &data, false, &ssp);
 		mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
 				  info->dev_class, 0, !name_known, ssp, NULL,
-				  0);
+				  0, NULL, 0);
 	}
 
 	hci_dev_unlock(hdev);
@@ -3102,7 +3102,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
 							      false, &ssp);
 			mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
 					  info->dev_class, info->rssi,
-					  !name_known, ssp, NULL, 0);
+					  !name_known, ssp, NULL, 0, NULL, 0);
 		}
 	} else {
 		struct inquiry_info_with_rssi *info = (void *) (skb->data + 1);
@@ -3120,7 +3120,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev,
 							      false, &ssp);
 			mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
 					  info->dev_class, info->rssi,
-					  !name_known, ssp, NULL, 0);
+					  !name_known, ssp, NULL, 0, NULL, 0);
 		}
 	}
 
@@ -3309,7 +3309,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
 		eir_len = eir_get_length(info->data, sizeof(info->data));
 		mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00,
 				  info->dev_class, info->rssi, !name_known,
-				  ssp, info->data, eir_len);
+				  ssp, info->data, eir_len, NULL, 0);
 	}
 
 	hci_dev_unlock(hdev);
@@ -3972,7 +3972,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 	}
 
 	mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, rssi, 0, 1,
-			  data, len);
+			  data, len, NULL, 0);
 }
 
 static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d2d4e0d5aed0..a0ef5c076880 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -5669,7 +5669,8 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192,
 
 void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		       u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, u8
-		       ssp, u8 *eir, u16 eir_len)
+		       ssp, u8 *eir, u16 eir_len, u8 *scan_rsp,
+		       u8 scan_rsp_len)
 {
 	char buf[512];
 	struct mgmt_ev_device_found *ev = (void *) buf;
@@ -5679,8 +5680,10 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 	if (!hci_discovery_active(hdev))
 		return;
 
-	/* Leave 5 bytes for a potential CoD field */
-	if (sizeof(*ev) + eir_len + 5 > sizeof(buf))
+	/* Make sure that the buffer is big enough. The 5 extra bytes
+	 * are for the potential CoD field.
+	 */
+	if (sizeof(*ev) + eir_len + scan_rsp_len + 5 > sizeof(buf))
 		return;
 
 	memset(buf, 0, sizeof(buf));
@@ -5707,8 +5710,11 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV,
 					  dev_class, 3);
 
-	ev->eir_len = cpu_to_le16(eir_len);
-	ev_size = sizeof(*ev) + eir_len;
+	if (scan_rsp_len > 0)
+		memcpy(ev->eir + eir_len, scan_rsp, scan_rsp_len);
+
+	ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len);
+	ev_size = sizeof(*ev) + eir_len + scan_rsp_len;
 
 	mgmt_event(MGMT_EV_DEVICE_FOUND, hdev, ev, ev_size, NULL);
 }
-- 
cgit v1.2.3-71-gd317


From 3c857757ef6e5a4e472bd3e5c934709c2eb482af Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 25 Mar 2014 10:30:49 +0200
Subject: Bluetooth: Add directed advertising support through connect()

When we're in peripheral mode (HCI_ADVERTISING flag is set) the most
natural mapping of connect() is to perform directed advertising to the
peer device.

This patch does the necessary changes to enable directed advertising and
keeps the hci_conn state as BT_CONNECT in a similar way as is done for
central or BR/EDR connection initiation.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h |  1 +
 net/bluetooth/hci_conn.c    | 77 ++++++++++++++++++++++++++++++++++++++++-----
 net/bluetooth/hci_event.c   | 19 +++++++++--
 3 files changed, 87 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index be150cf8cd43..4261a67682c0 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -367,6 +367,7 @@ enum {
 #define HCI_ERROR_REMOTE_POWER_OFF	0x15
 #define HCI_ERROR_LOCAL_HOST_TERM	0x16
 #define HCI_ERROR_PAIRING_NOT_ALLOWED	0x18
+#define HCI_ERROR_ADVERTISING_TIMEOUT	0x3c
 
 /* Flow control modes */
 #define HCI_FLOW_CTL_MODE_PACKET_BASED	0x00
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 129c22a85ccf..55a174317925 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -367,9 +367,23 @@ static void le_conn_timeout(struct work_struct *work)
 {
 	struct hci_conn *conn = container_of(work, struct hci_conn,
 					     le_conn_timeout.work);
+	struct hci_dev *hdev = conn->hdev;
 
 	BT_DBG("");
 
+	/* We could end up here due to having done directed advertising,
+	 * so clean up the state if necessary. This should however only
+	 * happen with broken hardware or if low duty cycle was used
+	 * (which doesn't have a timeout of its own).
+	 */
+	if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) {
+		u8 enable = 0x00;
+		hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable),
+			     &enable);
+		hci_le_conn_failed(conn, HCI_ERROR_ADVERTISING_TIMEOUT);
+		return;
+	}
+
 	hci_le_create_connection_cancel(conn);
 }
 
@@ -549,6 +563,11 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
 	 * favor of connection establishment, we should restart it.
 	 */
 	hci_update_background_scan(hdev);
+
+	/* Re-enable advertising in case this was a failed connection
+	 * attempt as a peripheral.
+	 */
+	mgmt_reenable_advertising(hdev);
 }
 
 static void create_le_conn_complete(struct hci_dev *hdev, u8 status)
@@ -609,6 +628,45 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
 	conn->state = BT_CONNECT;
 }
 
+static void hci_req_directed_advertising(struct hci_request *req,
+					 struct hci_conn *conn)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct hci_cp_le_set_adv_param cp;
+	u8 own_addr_type;
+	u8 enable;
+
+	enable = 0x00;
+	hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+
+	/* Clear the HCI_ADVERTISING bit temporarily so that the
+	 * hci_update_random_address knows that it's safe to go ahead
+	 * and write a new random address. The flag will be set back on
+	 * as soon as the SET_ADV_ENABLE HCI command completes.
+	 */
+	clear_bit(HCI_ADVERTISING, &hdev->dev_flags);
+
+	/* Set require_privacy to false so that the remote device has a
+	 * chance of identifying us.
+	 */
+	if (hci_update_random_address(req, false, &own_addr_type) < 0)
+		return;
+
+	memset(&cp, 0, sizeof(cp));
+	cp.type = LE_ADV_DIRECT_IND;
+	cp.own_address_type = own_addr_type;
+	cp.direct_addr_type = conn->dst_type;
+	bacpy(&cp.direct_addr, &conn->dst);
+	cp.channel_map = hdev->le_adv_channel_map;
+
+	hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp);
+
+	enable = 0x01;
+	hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+
+	conn->state = BT_CONNECT;
+}
+
 struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 				u8 dst_type, u8 sec_level, u8 auth_type)
 {
@@ -618,9 +676,6 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 	struct hci_request req;
 	int err;
 
-	if (test_bit(HCI_ADVERTISING, &hdev->flags))
-		return ERR_PTR(-ENOTSUPP);
-
 	/* Some devices send ATT messages as soon as the physical link is
 	 * established. To be able to handle these ATT messages, the user-
 	 * space first establishes the connection and then starts the pairing
@@ -668,13 +723,20 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 		return ERR_PTR(-ENOMEM);
 
 	conn->dst_type = dst_type;
-
-	conn->out = true;
-	conn->link_mode |= HCI_LM_MASTER;
 	conn->sec_level = BT_SECURITY_LOW;
 	conn->pending_sec_level = sec_level;
 	conn->auth_type = auth_type;
 
+	hci_req_init(&req, hdev);
+
+	if (test_bit(HCI_ADVERTISING, &hdev->dev_flags)) {
+		hci_req_directed_advertising(&req, conn);
+		goto create_conn;
+	}
+
+	conn->out = true;
+	conn->link_mode |= HCI_LM_MASTER;
+
 	params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
 	if (params) {
 		conn->le_conn_min_interval = params->conn_min_interval;
@@ -684,8 +746,6 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 		conn->le_conn_max_interval = hdev->le_conn_max_interval;
 	}
 
-	hci_req_init(&req, hdev);
-
 	/* If controller is scanning, we stop it since some controllers are
 	 * not able to scan and connect at the same time. Also set the
 	 * HCI_LE_SCAN_INTERRUPTED flag so that the command complete
@@ -699,6 +759,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 
 	hci_req_add_le_create_conn(&req, conn);
 
+create_conn:
 	err = hci_req_run(&req, create_le_conn_complete);
 	if (err) {
 		hci_conn_del(conn);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4a2c919d5908..81e5236a0119 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -991,10 +991,25 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
 	if (!sent)
 		return;
 
+	if (status)
+		return;
+
 	hci_dev_lock(hdev);
 
-	if (!status)
-		mgmt_advertising(hdev, *sent);
+	/* If we're doing connection initation as peripheral. Set a
+	 * timeout in case something goes wrong.
+	 */
+	if (*sent) {
+		struct hci_conn *conn;
+
+		conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+		if (conn)
+			queue_delayed_work(hdev->workqueue,
+					   &conn->le_conn_timeout,
+					   HCI_LE_CONN_TIMEOUT);
+	}
+
+	mgmt_advertising(hdev, *sent);
 
 	hci_dev_unlock(hdev);
 }
-- 
cgit v1.2.3-71-gd317


From b9a6328f2a7f15490de7e45eabb025f8b74a81af Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 25 Mar 2014 10:51:52 +0200
Subject: Bluetooth: Merge ADV_IND/ADV_SCAN_IND and SCAN_RSP together

To avoid too many events being sent to user space and to help parsing of
all available remote device data it makes sense for us to wait for the
scan response and send a single merged Device Found event to user space.

This patch adds a few new variables to hci_dev to track the last
received ADV_IND/ADV_SCAN_IND, i.e. those which will cause a SCAN_REQ to
be send in the case of active scanning. When the SCAN_RSP is received
the pending data is passed together with the SCAN_RSP to the
mgmt_device_found function which takes care of merging them into a
single Device Found event.

We also need a bit of extra logic to handle situations where we don't
receive a SCAN_RSP after caching some data. In such a scenario we simply
have to send out the pending data as it is and then operate on the new
report as if there was no pending data.

We also need to send out any pending data when scanning stops as
well as ensure that the storage is empty at the start of a new active
scanning session. These both cases are covered by the update to the
hci_cc_le_set_scan_enable function in this patch.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |   4 ++
 net/bluetooth/hci_event.c        | 103 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 105 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a1b8eab8a47d..59b112397d39 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -68,6 +68,10 @@ struct discovery_state {
 	struct list_head	unknown;	/* Name state not known */
 	struct list_head	resolve;	/* Name needs to be resolved */
 	__u32			timestamp;
+	bdaddr_t		last_adv_addr;
+	u8			last_adv_addr_type;
+	u8			last_adv_data[HCI_MAX_AD_LENGTH];
+	u8			last_adv_data_len;
 };
 
 struct hci_conn_hash {
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 81e5236a0119..5816a13c5342 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1033,6 +1033,32 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_dev_unlock(hdev);
 }
 
+static bool has_pending_adv_report(struct hci_dev *hdev)
+{
+	struct discovery_state *d = &hdev->discovery;
+
+	return bacmp(&d->last_adv_addr, BDADDR_ANY);
+}
+
+static void clear_pending_adv_report(struct hci_dev *hdev)
+{
+	struct discovery_state *d = &hdev->discovery;
+
+	bacpy(&d->last_adv_addr, BDADDR_ANY);
+	d->last_adv_data_len = 0;
+}
+
+static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr,
+				     u8 bdaddr_type, u8 *data, u8 len)
+{
+	struct discovery_state *d = &hdev->discovery;
+
+	bacpy(&d->last_adv_addr, bdaddr);
+	d->last_adv_addr_type = bdaddr_type;
+	memcpy(d->last_adv_data, data, len);
+	d->last_adv_data_len = len;
+}
+
 static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 				      struct sk_buff *skb)
 {
@@ -1051,9 +1077,24 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 	switch (cp->enable) {
 	case LE_SCAN_ENABLE:
 		set_bit(HCI_LE_SCAN, &hdev->dev_flags);
+		if (hdev->le_scan_type == LE_SCAN_ACTIVE)
+			clear_pending_adv_report(hdev);
 		break;
 
 	case LE_SCAN_DISABLE:
+		/* We do this here instead of when setting DISCOVERY_STOPPED
+		 * since the latter would potentially require waiting for
+		 * inquiry to stop too.
+		 */
+		if (has_pending_adv_report(hdev)) {
+			struct discovery_state *d = &hdev->discovery;
+
+			mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
+					  d->last_adv_addr_type, NULL, 0, 0,
+					  1, d->last_adv_data,
+					  d->last_adv_data_len, NULL, 0);
+		}
+
 		/* Cancel this timer so that we don't try to disable scanning
 		 * when it's already disabled.
 		 */
@@ -3979,6 +4020,8 @@ static void check_pending_le_conn(struct hci_dev *hdev, bdaddr_t *addr,
 static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 			       u8 bdaddr_type, s8 rssi, u8 *data, u8 len)
 {
+	struct discovery_state *d = &hdev->discovery;
+
 	/* Passive scanning shouldn't trigger any device found events */
 	if (hdev->le_scan_type == LE_SCAN_PASSIVE) {
 		if (type == LE_ADV_IND || type == LE_ADV_DIRECT_IND)
@@ -3986,8 +4029,64 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 		return;
 	}
 
-	mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, rssi, 0, 1,
-			  data, len, NULL, 0);
+	/* If there's nothing pending either store the data from this
+	 * event or send an immediate device found event if the data
+	 * should not be stored for later.
+	 */
+	if (!has_pending_adv_report(hdev)) {
+		/* If the report will trigger a SCAN_REQ store it for
+		 * later merging.
+		 */
+		if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
+			store_pending_adv_report(hdev, bdaddr, bdaddr_type,
+						 data, len);
+			return;
+		}
+
+		mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL,
+				  rssi, 0, 1, data, len, NULL, 0);
+		return;
+	}
+
+	/* If the pending data doesn't match this report or this isn't a
+	 * scan response (e.g. we got a duplicate ADV_IND) then force
+	 * sending of the pending data.
+	 */
+	if (type != LE_ADV_SCAN_RSP || bacmp(bdaddr, &d->last_adv_addr) ||
+	    bdaddr_type != d->last_adv_addr_type) {
+		/* Send out whatever is in the cache */
+		mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
+				  d->last_adv_addr_type, NULL, 0, 0, 1,
+				  d->last_adv_data, d->last_adv_data_len,
+				  NULL, 0);
+
+		/* If the new report will trigger a SCAN_REQ store it for
+		 * later merging.
+		 */
+		if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
+			store_pending_adv_report(hdev, bdaddr, bdaddr_type,
+						 data, len);
+			return;
+		}
+
+		/* The advertising reports cannot be merged, so clear
+		 * the pending report and send out a device found event.
+		 */
+		clear_pending_adv_report(hdev);
+		mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
+				  d->last_adv_addr_type, NULL, rssi, 0, 1,
+				  data, len, NULL, 0);
+		return;
+	}
+
+	/* If we get here we've got a pending ADV_IND or ADV_SCAN_IND and
+	 * the new event is a SCAN_RSP. We can therefore proceed with
+	 * sending a merged device found event.
+	 */
+	mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
+			  d->last_adv_addr_type, NULL, rssi, 0, 1, data, len,
+			  d->last_adv_data, d->last_adv_data_len);
+	clear_pending_adv_report(hdev);
 }
 
 static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
-- 
cgit v1.2.3-71-gd317


From 73cf71d9865ad83c2ab7d09bc71be129088e4ded Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 25 Mar 2014 12:06:19 +0200
Subject: Bluetooth: Fix line splitting of mgmt_device_found parameters

The line was incorrectly split between the variable type and its name.
This patch fixes the issue.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 4 ++--
 net/bluetooth/mgmt.c             | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 59b112397d39..0ba7617ceb27 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1268,8 +1268,8 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192,
 				       u8 *randomizer192, u8 *hash256,
 				       u8 *randomizer256, u8 status);
 void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
-		       u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, u8
-		       ssp, u8 *eir, u16 eir_len, u8 *scan_rsp,
+		       u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name,
+		       u8 ssp, u8 *eir, u16 eir_len, u8 *scan_rsp,
 		       u8 scan_rsp_len);
 void mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		      u8 addr_type, s8 rssi, u8 *name, u8 name_len);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index a0ef5c076880..37706e89af50 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -5668,8 +5668,8 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192,
 }
 
 void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
-		       u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, u8
-		       ssp, u8 *eir, u16 eir_len, u8 *scan_rsp,
+		       u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name,
+		       u8 ssp, u8 *eir, u16 eir_len, u8 *scan_rsp,
 		       u8 scan_rsp_len)
 {
 	char buf[512];
-- 
cgit v1.2.3-71-gd317


From ff5cd29f5cb8de0f0bc9016874ddde467d4b0c85 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Tue, 25 Mar 2014 14:40:52 +0200
Subject: Bluetooth: Store also RSSI for pending advertising reports

Especially in crowded environments it can become frequent that we have
to send out whatever pending event there is stored. Since user space
has its own filtering of small RSSI changes sending a 0 value will
essentially force user space to wake up the higher layers (e.g. over
D-Bus) even though the RSSI didn't actually change more than the
threshold value.

This patch adds storing also of the RSSI for pending advertising reports
so that we report an as accurate RSSI as possible when we have to send
out the stored information to user space.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_event.c        | 12 +++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 0ba7617ceb27..c2a419c2c5c7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -70,6 +70,7 @@ struct discovery_state {
 	__u32			timestamp;
 	bdaddr_t		last_adv_addr;
 	u8			last_adv_addr_type;
+	s8			last_adv_rssi;
 	u8			last_adv_data[HCI_MAX_AD_LENGTH];
 	u8			last_adv_data_len;
 };
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 1b2221d62007..2ecb34f2e2ad 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1049,12 +1049,13 @@ static void clear_pending_adv_report(struct hci_dev *hdev)
 }
 
 static void store_pending_adv_report(struct hci_dev *hdev, bdaddr_t *bdaddr,
-				     u8 bdaddr_type, u8 *data, u8 len)
+				     u8 bdaddr_type, s8 rssi, u8 *data, u8 len)
 {
 	struct discovery_state *d = &hdev->discovery;
 
 	bacpy(&d->last_adv_addr, bdaddr);
 	d->last_adv_addr_type = bdaddr_type;
+	d->last_adv_rssi = rssi;
 	memcpy(d->last_adv_data, data, len);
 	d->last_adv_data_len = len;
 }
@@ -4040,7 +4041,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 		 */
 		if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
 			store_pending_adv_report(hdev, bdaddr, bdaddr_type,
-						 data, len);
+						 rssi, data, len);
 			return;
 		}
 
@@ -4061,8 +4062,9 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 		/* Send out whatever is in the cache, but skip duplicates */
 		if (!match)
 			mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK,
-					  d->last_adv_addr_type, NULL, 0,
-					  0, 1, d->last_adv_data,
+					  d->last_adv_addr_type, NULL,
+					  d->last_adv_rssi, 0, 1,
+					  d->last_adv_data,
 					  d->last_adv_data_len, NULL, 0);
 
 		/* If the new report will trigger a SCAN_REQ store it for
@@ -4070,7 +4072,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 		 */
 		if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) {
 			store_pending_adv_report(hdev, bdaddr, bdaddr_type,
-						 data, len);
+						 rssi, data, len);
 			return;
 		}
 
-- 
cgit v1.2.3-71-gd317


From ae55f5982a8bc6adbafb337e0b781d30d5617782 Mon Sep 17 00:00:00 2001
From: Lukasz Rymanowski <lukasz.rymanowski@tieto.com>
Date: Thu, 27 Mar 2014 20:55:19 +0100
Subject: Bluetooth: Keep msec in DISCOV_INTERLEAVED_TIMEOUT

Keep msec instead of jiffies in this define. This is needed by following
patch where we want this timeout to be exposed in debugfs.

Note: Value of this timeout comes from recommendation in BT Core Spec.4.0,
Vol 3, Part C, chapter 13.2.1.

Signed-off-by: Lukasz Rymanowski <lukasz.rymanowski@tieto.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  2 +-
 net/bluetooth/mgmt.c             | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index c2a419c2c5c7..08a1d44eeab0 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1210,7 +1210,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event);
 #define DISCOV_LE_SCAN_WIN		0x12
 #define DISCOV_LE_SCAN_INT		0x12
 #define DISCOV_LE_TIMEOUT		msecs_to_jiffies(10240)
-#define DISCOV_INTERLEAVED_TIMEOUT	msecs_to_jiffies(5120)
+#define DISCOV_INTERLEAVED_TIMEOUT	5120	/* msec */
 #define DISCOV_INTERLEAVED_INQUIRY_LEN	0x04
 #define DISCOV_BREDR_INQUIRY_LEN	0x08
 
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 37706e89af50..944c4fc87905 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3351,6 +3351,8 @@ static int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status)
 
 static void start_discovery_complete(struct hci_dev *hdev, u8 status)
 {
+	unsigned long timeout = 0;
+
 	BT_DBG("status %d", status);
 
 	if (status) {
@@ -3366,13 +3368,11 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status)
 
 	switch (hdev->discovery.type) {
 	case DISCOV_TYPE_LE:
-		queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable,
-				   DISCOV_LE_TIMEOUT);
+		timeout = DISCOV_LE_TIMEOUT;
 		break;
 
 	case DISCOV_TYPE_INTERLEAVED:
-		queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable,
-				   DISCOV_INTERLEAVED_TIMEOUT);
+		timeout = msecs_to_jiffies(DISCOV_INTERLEAVED_TIMEOUT);
 		break;
 
 	case DISCOV_TYPE_BREDR:
@@ -3381,6 +3381,11 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status)
 	default:
 		BT_ERR("Invalid discovery type %d", hdev->discovery.type);
 	}
+
+	if (!timeout)
+		return;
+
+	queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, timeout);
 }
 
 static int start_discovery(struct sock *sk, struct hci_dev *hdev,
-- 
cgit v1.2.3-71-gd317


From b9a7a61e5c3e2f6316c2aedf4ca171bdee7a4804 Mon Sep 17 00:00:00 2001
From: Lukasz Rymanowski <lukasz.rymanowski@tieto.com>
Date: Thu, 27 Mar 2014 20:55:20 +0100
Subject: Bluetooth: Add new debugfs parameter

With this patch it is possible to control discovery interleaved
timeout value from debugfs.

It is for fine tuning of this timeout.

Signed-off-by: Lukasz Rymanowski <lukasz.rymanowski@tieto.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 +
 net/bluetooth/hci_core.c         | 4 ++++
 net/bluetooth/mgmt.c             | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 08a1d44eeab0..e0c26bc144e5 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -199,6 +199,7 @@ struct hci_dev {
 	__u16		le_scan_window;
 	__u16		le_conn_min_interval;
 	__u16		le_conn_max_interval;
+	__u16		discov_interleaved_timeout;
 	__u8		ssp_debug_mode;
 
 	__u16		devid_source;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 7cf511cb1171..d31f144860d1 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1823,6 +1823,9 @@ static int __hci_init(struct hci_dev *hdev)
 				    &lowpan_debugfs_fops);
 		debugfs_create_file("le_auto_conn", 0644, hdev->debugfs, hdev,
 				    &le_auto_conn_fops);
+		debugfs_create_u16("discov_interleaved_timeout", 0644,
+				   hdev->debugfs,
+				   &hdev->discov_interleaved_timeout);
 	}
 
 	return 0;
@@ -3785,6 +3788,7 @@ struct hci_dev *hci_alloc_dev(void)
 	hdev->le_conn_max_interval = 0x0038;
 
 	hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT;
+	hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT;
 
 	mutex_init(&hdev->lock);
 	mutex_init(&hdev->req_lock);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 944c4fc87905..b34e13aed51c 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3372,7 +3372,7 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status)
 		break;
 
 	case DISCOV_TYPE_INTERLEAVED:
-		timeout = msecs_to_jiffies(DISCOV_INTERLEAVED_TIMEOUT);
+		timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout);
 		break;
 
 	case DISCOV_TYPE_BREDR:
-- 
cgit v1.2.3-71-gd317


From 3d5a76f08bbac55305da87f4c810279189f64297 Mon Sep 17 00:00:00 2001
From: Lukasz Rymanowski <lukasz.rymanowski@tieto.com>
Date: Thu, 27 Mar 2014 20:55:21 +0100
Subject: Bluetooth: Keep msec in DISCOV_LE_TIMEOUT

To be consistent, lets use msec for this timeout as well.

Note: This define value is a minimum scan time taken from BT Core spec 4.0,
Vol 3, Part C, chapter 9.2.6

Signed-off-by: Lukasz Rymanowski <lukasz.rymanowski@tieto.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 2 +-
 net/bluetooth/mgmt.c             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index e0c26bc144e5..d73f41855ada 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1210,7 +1210,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event);
  */
 #define DISCOV_LE_SCAN_WIN		0x12
 #define DISCOV_LE_SCAN_INT		0x12
-#define DISCOV_LE_TIMEOUT		msecs_to_jiffies(10240)
+#define DISCOV_LE_TIMEOUT		10240	/* msec */
 #define DISCOV_INTERLEAVED_TIMEOUT	5120	/* msec */
 #define DISCOV_INTERLEAVED_INQUIRY_LEN	0x04
 #define DISCOV_BREDR_INQUIRY_LEN	0x08
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index b34e13aed51c..11cb00a2befb 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3368,7 +3368,7 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status)
 
 	switch (hdev->discovery.type) {
 	case DISCOV_TYPE_LE:
-		timeout = DISCOV_LE_TIMEOUT;
+		timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
 		break;
 
 	case DISCOV_TYPE_INTERLEAVED:
-- 
cgit v1.2.3-71-gd317


From d728c8ef8bea6e81f44933c0237531cda499577e Mon Sep 17 00:00:00 2001
From: Brad Volkin <bradley.d.volkin@intel.com>
Date: Tue, 18 Feb 2014 10:15:56 -0800
Subject: drm/i915: Add a CMD_PARSER_VERSION getparam

So userspace can query the kernel for command parser support.

v2: Add i915_cmd_parser_get_version(), history log, and kerneldoc

OTC-Tracker: AXIA-4631
Change-Id: I58af650db9f6753c2dcac9c54ab432fd31db302f
Signed-off-by: Brad Volkin <bradley.d.volkin@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 19 +++++++++++++++++++
 drivers/gpu/drm/i915/i915_dma.c        |  3 +++
 drivers/gpu/drm/i915/i915_drv.h        |  1 +
 include/uapi/drm/i915_drm.h            |  1 +
 4 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index a8f00dbc0dde..bae7c2f33692 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -896,3 +896,22 @@ int i915_parse_cmds(struct intel_ring_buffer *ring,
 
 	return ret;
 }
+
+/**
+ * i915_cmd_parser_get_version() - get the cmd parser version number
+ *
+ * The cmd parser maintains a simple increasing integer version number suitable
+ * for passing to userspace clients to determine what operations are permitted.
+ *
+ * Return: the current version number of the cmd parser
+ */
+int i915_cmd_parser_get_version(void)
+{
+	/*
+	 * Command parser version history
+	 *
+	 * 1. Initial version. Checks batches and reports violations, but leaves
+	 *    hardware parsing enabled (so does not allow new use cases).
+	 */
+	return 1;
+}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 96177eec0a0e..0b38f88c35f0 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1017,6 +1017,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
 		value = 1;
 		break;
+	case I915_PARAM_CMD_PARSER_VERSION:
+		value = i915_cmd_parser_get_version();
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8e1576cf6d63..0801e157a7ff 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2385,6 +2385,7 @@ void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
 const char *i915_cache_level_str(int type);
 
 /* i915_cmd_parser.c */
+int i915_cmd_parser_get_version(void);
 void i915_cmd_parser_init_ring(struct intel_ring_buffer *ring);
 bool i915_needs_cmd_parser(struct intel_ring_buffer *ring);
 int i915_parse_cmds(struct intel_ring_buffer *ring,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 126bfaa8bb6b..8a3e4ef00c3d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_NO_RELOC	 25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT     	 	 27
+#define I915_PARAM_CMD_PARSER_VERSION	 28
 
 typedef struct drm_i915_getparam {
 	int param;
-- 
cgit v1.2.3-71-gd317


From c2d15359c4b5476c181d8a51bdeba4cead15c120 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Tue, 1 Apr 2014 12:59:08 +0300
Subject: drm: Make drm_clflush_virt_range() void*
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently drm_cflush_virt_rage() takes a char* so the caller probably
has to do pointless casting to avoid compiler warnings. Make the
argument void* instead to avoid such issues.

v2: Use void* arithmetic (Chris)

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_cache.c | 4 ++--
 include/drm/drmP.h          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index bb8f58012189..4bfad5f5a843 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -125,11 +125,11 @@ drm_clflush_sg(struct sg_table *st)
 EXPORT_SYMBOL(drm_clflush_sg);
 
 void
-drm_clflush_virt_range(char *addr, unsigned long length)
+drm_clflush_virt_range(void *addr, unsigned long length)
 {
 #if defined(CONFIG_X86)
 	if (cpu_has_clflush) {
-		char *end = addr + length;
+		void *end = addr + length;
 		mb();
 		for (; addr < end; addr += boot_cpu_data.x86_clflush_size)
 			clflush(addr);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index daac00a93126..150780bdd66b 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1322,7 +1322,7 @@ extern int drm_remove_magic(struct drm_master *master, drm_magic_t magic);
 /* Cache management (drm_cache.c) */
 void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
 void drm_clflush_sg(struct sg_table *st);
-void drm_clflush_virt_range(char *addr, unsigned long length);
+void drm_clflush_virt_range(void *addr, unsigned long length);
 
 				/* Locking IOCTL support (drm_lock.h) */
 extern int drm_lock(struct drm_device *dev, void *data,
-- 
cgit v1.2.3-71-gd317


From c50b960ccc5981627628302701e93e6aceccdb1c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 28 Mar 2014 10:19:47 +0000
Subject: netfilter: nf_tables: implement proper set selection

The current set selection simply choses the first set type that provides
the requested features, which always results in the rbtree being chosen
by virtue of being the first set in the list.

What we actually want to do is choose the implementation that can provide
the requested features and is optimal from either a performance or memory
perspective depending on the characteristics of the elements and the
preferences specified by the user.

The elements are not known when creating a set. Even if we would provide
them for anonymous (literal) sets, we'd still have standalone sets where
the elements are not known in advance. We therefore need an abstract
description of the data charcteristics.

The kernel already knows the size of the key, this patch starts by
introducing a nested set description which so far contains only the maximum
amount of elements. Based on this the set implementations are changed to
provide an estimate of the required amount of memory and the lookup
complexity class.

The set ops have a new callback ->estimate() that is invoked during set
selection. It receives a structure containing the attributes known to the
kernel and is supposed to populate a struct nft_set_estimate with the
complexity class and, in case the size is known, the complete amount of
memory required, or the amount of memory required per element otherwise.

Based on the policy specified by the user (performance/memory, defaulting
to performance) the kernel will then select the best suited implementation.

Even if the set implementation would allow to add more than the specified
maximum amount of elements, they are enforced since new implementations
might not be able to add more than maximum based on which they were
selected.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  46 ++++++++++++
 include/uapi/linux/netfilter/nf_tables.h |  27 +++++++
 net/netfilter/nf_tables_api.c            | 121 +++++++++++++++++++++++++++----
 net/netfilter/nft_hash.c                 |  45 +++++++++++-
 net/netfilter/nft_rbtree.c               |  21 ++++++
 5 files changed, 242 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e6bc14d8fa9a..29ff1dc41ef3 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -145,6 +145,44 @@ struct nft_set_iter {
 			      const struct nft_set_elem *elem);
 };
 
+/**
+ *	struct nft_set_desc - description of set elements
+ *
+ *	@klen: key length
+ *	@dlen: data length
+ *	@size: number of set elements
+ */
+struct nft_set_desc {
+	unsigned int		klen;
+	unsigned int		dlen;
+	unsigned int		size;
+};
+
+/**
+ *	enum nft_set_class - performance class
+ *
+ *	@NFT_LOOKUP_O_1: constant, O(1)
+ *	@NFT_LOOKUP_O_LOG_N: logarithmic, O(log N)
+ *	@NFT_LOOKUP_O_N: linear, O(N)
+ */
+enum nft_set_class {
+	NFT_SET_CLASS_O_1,
+	NFT_SET_CLASS_O_LOG_N,
+	NFT_SET_CLASS_O_N,
+};
+
+/**
+ *	struct nft_set_estimate - estimation of memory and performance
+ *				  characteristics
+ *
+ *	@size: required memory
+ *	@class: lookup performance class
+ */
+struct nft_set_estimate {
+	unsigned int		size;
+	enum nft_set_class	class;
+};
+
 /**
  *	struct nft_set_ops - nf_tables set operations
  *
@@ -174,7 +212,11 @@ struct nft_set_ops {
 						struct nft_set_iter *iter);
 
 	unsigned int			(*privsize)(const struct nlattr * const nla[]);
+	bool				(*estimate)(const struct nft_set_desc *desc,
+						    u32 features,
+						    struct nft_set_estimate *est);
 	int				(*init)(const struct nft_set *set,
+						const struct nft_set_desc *desc,
 						const struct nlattr * const nla[]);
 	void				(*destroy)(const struct nft_set *set);
 
@@ -194,6 +236,8 @@ void nft_unregister_set(struct nft_set_ops *ops);
  * 	@name: name of the set
  * 	@ktype: key type (numeric type defined by userspace, not used in the kernel)
  * 	@dtype: data type (verdict or numeric type defined by userspace)
+ * 	@size: maximum set size
+ * 	@nelems: number of elements
  * 	@ops: set ops
  * 	@flags: set flags
  * 	@klen: key length
@@ -206,6 +250,8 @@ struct nft_set {
 	char				name[IFNAMSIZ];
 	u32				ktype;
 	u32				dtype;
+	u32				size;
+	u32				nelems;
 	/* runtime data below here */
 	const struct nft_set_ops	*ops ____cacheline_aligned;
 	u16				flags;
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c88ccbfda5f1..160159274cab 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -211,6 +211,29 @@ enum nft_set_flags {
 	NFT_SET_MAP			= 0x8,
 };
 
+/**
+ * enum nft_set_policies - set selection policy
+ *
+ * @NFT_SET_POL_PERFORMANCE: prefer high performance over low memory use
+ * @NFT_SET_POL_MEMORY: prefer low memory use over high performance
+ */
+enum nft_set_policies {
+	NFT_SET_POL_PERFORMANCE,
+	NFT_SET_POL_MEMORY,
+};
+
+/**
+ * enum nft_set_desc_attributes - set element description
+ *
+ * @NFTA_SET_DESC_SIZE: number of elements in set (NLA_U32)
+ */
+enum nft_set_desc_attributes {
+	NFTA_SET_DESC_UNSPEC,
+	NFTA_SET_DESC_SIZE,
+	__NFTA_SET_DESC_MAX
+};
+#define NFTA_SET_DESC_MAX	(__NFTA_SET_DESC_MAX - 1)
+
 /**
  * enum nft_set_attributes - nf_tables set netlink attributes
  *
@@ -221,6 +244,8 @@ enum nft_set_flags {
  * @NFTA_SET_KEY_LEN: key data length (NLA_U32)
  * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32)
  * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
+ * @NFTA_SET_POLICY: selection policy (NLA_U32)
+ * @NFTA_SET_DESC: set description (NLA_NESTED)
  */
 enum nft_set_attributes {
 	NFTA_SET_UNSPEC,
@@ -231,6 +256,8 @@ enum nft_set_attributes {
 	NFTA_SET_KEY_LEN,
 	NFTA_SET_DATA_TYPE,
 	NFTA_SET_DATA_LEN,
+	NFTA_SET_POLICY,
+	NFTA_SET_DESC,
 	__NFTA_SET_MAX
 };
 #define NFTA_SET_MAX		(__NFTA_SET_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 33045a562297..bd3381e16084 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1912,9 +1912,18 @@ void nft_unregister_set(struct nft_set_ops *ops)
 }
 EXPORT_SYMBOL_GPL(nft_unregister_set);
 
-static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[])
+/*
+ * Select a set implementation based on the data characteristics and the
+ * given policy. The total memory use might not be known if no size is
+ * given, in that case the amount of memory per element is used.
+ */
+static const struct nft_set_ops *
+nft_select_set_ops(const struct nlattr * const nla[],
+		   const struct nft_set_desc *desc,
+		   enum nft_set_policies policy)
 {
-	const struct nft_set_ops *ops;
+	const struct nft_set_ops *ops, *bops;
+	struct nft_set_estimate est, best;
 	u32 features;
 
 #ifdef CONFIG_MODULES
@@ -1932,15 +1941,45 @@ static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const
 		features &= NFT_SET_INTERVAL | NFT_SET_MAP;
 	}
 
-	// FIXME: implement selection properly
+	bops	   = NULL;
+	best.size  = ~0;
+	best.class = ~0;
+
 	list_for_each_entry(ops, &nf_tables_set_ops, list) {
 		if ((ops->features & features) != features)
 			continue;
+		if (!ops->estimate(desc, features, &est))
+			continue;
+
+		switch (policy) {
+		case NFT_SET_POL_PERFORMANCE:
+			if (est.class < best.class)
+				break;
+			if (est.class == best.class && est.size < best.size)
+				break;
+			continue;
+		case NFT_SET_POL_MEMORY:
+			if (est.size < best.size)
+				break;
+			if (est.size == best.size && est.class < best.class)
+				break;
+			continue;
+		default:
+			break;
+		}
+
 		if (!try_module_get(ops->owner))
 			continue;
-		return ops;
+		if (bops != NULL)
+			module_put(bops->owner);
+
+		bops = ops;
+		best = est;
 	}
 
+	if (bops != NULL)
+		return bops;
+
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
@@ -1952,6 +1991,12 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
 	[NFTA_SET_KEY_LEN]		= { .type = NLA_U32 },
 	[NFTA_SET_DATA_TYPE]		= { .type = NLA_U32 },
 	[NFTA_SET_DATA_LEN]		= { .type = NLA_U32 },
+	[NFTA_SET_POLICY]		= { .type = NLA_U32 },
+	[NFTA_SET_DESC]			= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
+	[NFTA_SET_DESC_SIZE]		= { .type = NLA_U32 },
 };
 
 static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
@@ -2043,6 +2088,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 {
 	struct nfgenmsg *nfmsg;
 	struct nlmsghdr *nlh;
+	struct nlattr *desc;
 	u32 portid = NETLINK_CB(ctx->skb).portid;
 	u32 seq = ctx->nlh->nlmsg_seq;
 
@@ -2076,6 +2122,14 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 			goto nla_put_failure;
 	}
 
+	desc = nla_nest_start(skb, NFTA_SET_DESC);
+	if (desc == NULL)
+		goto nla_put_failure;
+	if (set->size &&
+	    nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size)))
+		goto nla_put_failure;
+	nla_nest_end(skb, desc);
+
 	return nlmsg_end(skb, nlh);
 
 nla_put_failure:
@@ -2304,6 +2358,23 @@ err:
 	return err;
 }
 
+static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
+				    struct nft_set_desc *desc,
+				    const struct nlattr *nla)
+{
+	struct nlattr *da[NFTA_SET_DESC_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy);
+	if (err < 0)
+		return err;
+
+	if (da[NFTA_SET_DESC_SIZE] != NULL)
+		desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE]));
+
+	return 0;
+}
+
 static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 			    const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[])
@@ -2318,7 +2389,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	char name[IFNAMSIZ];
 	unsigned int size;
 	bool create;
-	u32 ktype, klen, dlen, dtype, flags;
+	u32 ktype, dtype, flags, policy;
+	struct nft_set_desc desc;
 	int err;
 
 	if (nla[NFTA_SET_TABLE] == NULL ||
@@ -2326,6 +2398,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	    nla[NFTA_SET_KEY_LEN] == NULL)
 		return -EINVAL;
 
+	memset(&desc, 0, sizeof(desc));
+
 	ktype = NFT_DATA_VALUE;
 	if (nla[NFTA_SET_KEY_TYPE] != NULL) {
 		ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
@@ -2333,8 +2407,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 			return -EINVAL;
 	}
 
-	klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
-	if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data))
+	desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+	if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data))
 		return -EINVAL;
 
 	flags = 0;
@@ -2346,7 +2420,6 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	}
 
 	dtype = 0;
-	dlen  = 0;
 	if (nla[NFTA_SET_DATA_TYPE] != NULL) {
 		if (!(flags & NFT_SET_MAP))
 			return -EINVAL;
@@ -2359,15 +2432,25 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 		if (dtype != NFT_DATA_VERDICT) {
 			if (nla[NFTA_SET_DATA_LEN] == NULL)
 				return -EINVAL;
-			dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
-			if (dlen == 0 ||
-			    dlen > FIELD_SIZEOF(struct nft_data, data))
+			desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
+			if (desc.dlen == 0 ||
+			    desc.dlen > FIELD_SIZEOF(struct nft_data, data))
 				return -EINVAL;
 		} else
-			dlen = sizeof(struct nft_data);
+			desc.dlen = sizeof(struct nft_data);
 	} else if (flags & NFT_SET_MAP)
 		return -EINVAL;
 
+	policy = NFT_SET_POL_PERFORMANCE;
+	if (nla[NFTA_SET_POLICY] != NULL)
+		policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+
+	if (nla[NFTA_SET_DESC] != NULL) {
+		err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]);
+		if (err < 0)
+			return err;
+	}
+
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
 	afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
@@ -2398,7 +2481,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	if (!(nlh->nlmsg_flags & NLM_F_CREATE))
 		return -ENOENT;
 
-	ops = nft_select_set_ops(nla);
+	ops = nft_select_set_ops(nla, &desc, policy);
 	if (IS_ERR(ops))
 		return PTR_ERR(ops);
 
@@ -2419,12 +2502,13 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	INIT_LIST_HEAD(&set->bindings);
 	set->ops   = ops;
 	set->ktype = ktype;
-	set->klen  = klen;
+	set->klen  = desc.klen;
 	set->dtype = dtype;
-	set->dlen  = dlen;
+	set->dlen  = desc.dlen;
 	set->flags = flags;
+	set->size  = desc.size;
 
-	err = ops->init(set, nla);
+	err = ops->init(set, &desc, nla);
 	if (err < 0)
 		goto err2;
 
@@ -2733,6 +2817,9 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
 	enum nft_registers dreg;
 	int err;
 
+	if (set->size && set->nelems == set->size)
+		return -ENFILE;
+
 	err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
 			       nft_set_elem_policy);
 	if (err < 0)
@@ -2798,6 +2885,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
 	err = set->ops->insert(set, &elem);
 	if (err < 0)
 		goto err3;
+	set->nelems++;
 
 	return 0;
 
@@ -2867,6 +2955,7 @@ static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
 		goto err2;
 
 	set->ops->remove(set, &elem);
+	set->nelems--;
 
 	nft_data_uninit(&elem.key, NFT_DATA_VALUE);
 	if (set->flags & NFT_SET_MAP)
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 3b1ad876d6b0..01884b315c4a 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/list.h>
+#include <linux/log2.h>
 #include <linux/jhash.h>
 #include <linux/netlink.h>
 #include <linux/vmalloc.h>
@@ -19,7 +20,7 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 
-#define NFT_HASH_MIN_SIZE	4
+#define NFT_HASH_MIN_SIZE	4UL
 
 struct nft_hash {
 	struct nft_hash_table __rcu	*tbl;
@@ -82,6 +83,11 @@ static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
 		kfree(tbl);
 }
 
+static unsigned int nft_hash_tbl_size(unsigned int nelem)
+{
+	return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE);
+}
+
 static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
 {
 	struct nft_hash_table *tbl;
@@ -335,17 +341,23 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
 }
 
 static int nft_hash_init(const struct nft_set *set,
+			 const struct nft_set_desc *desc,
 			 const struct nlattr * const tb[])
 {
 	struct nft_hash *priv = nft_set_priv(set);
 	struct nft_hash_table *tbl;
+	unsigned int size;
 
 	if (unlikely(!nft_hash_rnd_initted)) {
 		get_random_bytes(&nft_hash_rnd, 4);
 		nft_hash_rnd_initted = true;
 	}
 
-	tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE);
+	size = NFT_HASH_MIN_SIZE;
+	if (desc->size)
+		size = nft_hash_tbl_size(desc->size);
+
+	tbl = nft_hash_tbl_alloc(size);
 	if (tbl == NULL)
 		return -ENOMEM;
 	RCU_INIT_POINTER(priv->tbl, tbl);
@@ -369,8 +381,37 @@ static void nft_hash_destroy(const struct nft_set *set)
 	kfree(tbl);
 }
 
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+			      struct nft_set_estimate *est)
+{
+	unsigned int esize;
+
+	esize = sizeof(struct nft_hash_elem);
+	if (features & NFT_SET_MAP)
+		esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
+
+	if (desc->size) {
+		est->size = sizeof(struct nft_hash) +
+			    nft_hash_tbl_size(desc->size) *
+			    sizeof(struct nft_hash_elem *) +
+			    desc->size * esize;
+	} else {
+		/* Resizing happens when the load drops below 30% or goes
+		 * above 75%. The average of 52.5% load (approximated by 50%)
+		 * is used for the size estimation of the hash buckets,
+		 * meaning we calculate two buckets per element.
+		 */
+		est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+	}
+
+	est->class = NFT_SET_CLASS_O_1;
+
+	return true;
+}
+
 static struct nft_set_ops nft_hash_ops __read_mostly = {
 	.privsize       = nft_hash_privsize,
+	.estimate	= nft_hash_estimate,
 	.init		= nft_hash_init,
 	.destroy	= nft_hash_destroy,
 	.get		= nft_hash_get,
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index e21d69d13506..072e611e9f71 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -201,6 +201,7 @@ static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
 }
 
 static int nft_rbtree_init(const struct nft_set *set,
+			   const struct nft_set_desc *desc,
 			   const struct nlattr * const nla[])
 {
 	struct nft_rbtree *priv = nft_set_priv(set);
@@ -222,8 +223,28 @@ static void nft_rbtree_destroy(const struct nft_set *set)
 	}
 }
 
+static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
+				struct nft_set_estimate *est)
+{
+	unsigned int nsize;
+
+	nsize = sizeof(struct nft_rbtree_elem);
+	if (features & NFT_SET_MAP)
+		nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]);
+
+	if (desc->size)
+		est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
+	else
+		est->size = nsize;
+
+	est->class = NFT_SET_CLASS_O_LOG_N;
+
+	return true;
+}
+
 static struct nft_set_ops nft_rbtree_ops __read_mostly = {
 	.privsize	= nft_rbtree_privsize,
+	.estimate	= nft_rbtree_estimate,
 	.init		= nft_rbtree_init,
 	.destroy	= nft_rbtree_destroy,
 	.insert		= nft_rbtree_insert,
-- 
cgit v1.2.3-71-gd317


From 78f22b6a3a9254460d23060530b48ae02a9394e3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 24 Mar 2014 17:57:27 +0100
Subject: cfg80211: allow userspace to take ownership of interfaces

When dynamically creating interfaces from userspace, e.g. for P2P usage,
such interfaces are usually owned by the process that created them, i.e.
wpa_supplicant. Should wpa_supplicant crash, such interfaces will often
cease operating properly and cause problems on restarting the process.

To avoid this problem, introduce an ownership concept for interfaces. If
an interface is owned by a netlink socket, then it will be destroyed if
the netlink socket is closed for any reason, including if the process it
belongs to crashed. This gives us a race-free way to get rid of any such
interfaces.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  3 +++
 include/uapi/linux/nl80211.h |  6 ++++++
 net/wireless/core.c          | 44 ++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/core.h          | 11 +++++++++++
 net/wireless/nl80211.c       | 28 +++++++++++++++++++++++++++-
 5 files changed, 91 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f3539a15c411..6510ccf53a54 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3194,6 +3194,7 @@ struct cfg80211_cached_keys;
  * @ibss_dfs_possible: (private) IBSS may change to a DFS channel
  * @event_list: (private) list for internal event processing
  * @event_lock: (private) lock for event list
+ * @owner_nlportid: (private) owner socket port ID
  */
 struct wireless_dev {
 	struct wiphy *wiphy;
@@ -3241,6 +3242,8 @@ struct wireless_dev {
 	unsigned long cac_start_time;
 	unsigned int cac_time_ms;
 
+	u32 owner_nlportid;
+
 #ifdef CONFIG_CFG80211_WEXT
 	/* wext data */
 	struct {
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1ba9d626aa83..5e405fd55a71 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1579,6 +1579,10 @@ enum nl80211_commands {
  * @NL80211_ATTR_TDLS_PEER_CAPABILITY: flags for TDLS peer capabilities, u32.
  *	As specified in the &enum nl80211_tdls_peer_capability.
  *
+ * @NL80211_ATTR_IFACE_SOCKET_OWNER: flag attribute, if set during interface
+ *	creation then the new interface will be owned by the netlink socket
+ *	that created it and will be destroyed when the socket is closed
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1914,6 +1918,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_TDLS_PEER_CAPABILITY,
 
+	NL80211_ATTR_IFACE_SOCKET_OWNER,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 086cddd03ba6..5a63c3cbda2e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -260,6 +260,45 @@ static void cfg80211_event_work(struct work_struct *work)
 	rtnl_unlock();
 }
 
+void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev)
+{
+	struct cfg80211_iface_destroy *item;
+
+	ASSERT_RTNL();
+
+	spin_lock_irq(&rdev->destroy_list_lock);
+	while ((item = list_first_entry_or_null(&rdev->destroy_list,
+						struct cfg80211_iface_destroy,
+						list))) {
+		struct wireless_dev *wdev, *tmp;
+		u32 nlportid = item->nlportid;
+
+		list_del(&item->list);
+		kfree(item);
+		spin_unlock_irq(&rdev->destroy_list_lock);
+
+		list_for_each_entry_safe(wdev, tmp, &rdev->wdev_list, list) {
+			if (nlportid == wdev->owner_nlportid)
+				rdev_del_virtual_intf(rdev, wdev);
+		}
+
+		spin_lock_irq(&rdev->destroy_list_lock);
+	}
+	spin_unlock_irq(&rdev->destroy_list_lock);
+}
+
+static void cfg80211_destroy_iface_wk(struct work_struct *work)
+{
+	struct cfg80211_registered_device *rdev;
+
+	rdev = container_of(work, struct cfg80211_registered_device,
+			    destroy_work);
+
+	rtnl_lock();
+	cfg80211_destroy_ifaces(rdev);
+	rtnl_unlock();
+}
+
 /* exported functions */
 
 struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
@@ -318,6 +357,10 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	rdev->wiphy.dev.class = &ieee80211_class;
 	rdev->wiphy.dev.platform_data = rdev;
 
+	INIT_LIST_HEAD(&rdev->destroy_list);
+	spin_lock_init(&rdev->destroy_list_lock);
+	INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk);
+
 #ifdef CONFIG_CFG80211_DEFAULT_PS
 	rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
 #endif
@@ -675,6 +718,7 @@ void wiphy_unregister(struct wiphy *wiphy)
 	cancel_work_sync(&rdev->conn_work);
 	flush_work(&rdev->event_work);
 	cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
+	flush_work(&rdev->destroy_work);
 
 #ifdef CONFIG_PM
 	if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 5b1fdcadd469..6f6f75609852 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -80,6 +80,10 @@ struct cfg80211_registered_device {
 
 	struct cfg80211_coalesce *coalesce;
 
+	spinlock_t destroy_list_lock;
+	struct list_head destroy_list;
+	struct work_struct destroy_work;
+
 	/* must be last because of the way we do wiphy_priv(),
 	 * and it should at least be aligned to NETDEV_ALIGN */
 	struct wiphy wiphy __aligned(NETDEV_ALIGN);
@@ -232,6 +236,13 @@ struct cfg80211_beacon_registration {
 	u32 nlportid;
 };
 
+struct cfg80211_iface_destroy {
+	struct list_head list;
+	u32 nlportid;
+};
+
+void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);
+
 /* free object */
 void cfg80211_dev_free(struct cfg80211_registered_device *rdev);
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 052c1bf8ffac..b25b5ce4076d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -385,6 +385,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_MAC_HINT] = { .len = ETH_ALEN },
 	[NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 },
 	[NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 },
+	[NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -2514,6 +2515,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 	enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
 	u32 flags;
 
+	/* to avoid failing a new interface creation due to pending removal */
+	cfg80211_destroy_ifaces(rdev);
+
 	memset(&params, 0, sizeof(params));
 
 	if (!info->attrs[NL80211_ATTR_IFNAME])
@@ -2563,6 +2567,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 		return PTR_ERR(wdev);
 	}
 
+	if (info->attrs[NL80211_ATTR_IFACE_SOCKET_OWNER])
+		wdev->owner_nlportid = info->snd_portid;
+
 	switch (type) {
 	case NL80211_IFTYPE_MESH_POINT:
 		if (!info->attrs[NL80211_ATTR_MESH_ID])
@@ -11649,9 +11656,15 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
-		list_for_each_entry_rcu(wdev, &rdev->wdev_list, list)
+		bool schedule_destroy_work = false;
+
+		list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) {
 			cfg80211_mlme_unregister_socket(wdev, notify->portid);
 
+			if (wdev->owner_nlportid == notify->portid)
+				schedule_destroy_work = true;
+		}
+
 		spin_lock_bh(&rdev->beacon_registrations_lock);
 		list_for_each_entry_safe(reg, tmp, &rdev->beacon_registrations,
 					 list) {
@@ -11662,6 +11675,19 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 			}
 		}
 		spin_unlock_bh(&rdev->beacon_registrations_lock);
+
+		if (schedule_destroy_work) {
+			struct cfg80211_iface_destroy *destroy;
+
+			destroy = kzalloc(sizeof(*destroy), GFP_ATOMIC);
+			if (destroy) {
+				destroy->nlportid = notify->portid;
+				spin_lock(&rdev->destroy_list_lock);
+				list_add(&destroy->list, &rdev->destroy_list);
+				spin_unlock(&rdev->destroy_list_lock);
+				schedule_work(&rdev->destroy_work);
+			}
+		}
 	}
 
 	rcu_read_unlock();
-- 
cgit v1.2.3-71-gd317


From 77be2c54c5bd26279abc13807398771d80cda37a Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 27 Mar 2014 11:30:29 +0200
Subject: mac80211: add vif to flush call

This will allow the low level driver to make decision based
on the vif such as queues etc...
Since the vif might be NULL, we can't add it to the tracing
functions.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
[fix staging rtl8821ae driver]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ar5523/ar5523.c              |  3 ++-
 drivers/net/wireless/ath/ath10k/mac.c                 |  3 ++-
 drivers/net/wireless/ath/ath9k/main.c                 |  3 ++-
 drivers/net/wireless/ath/carl9170/main.c              |  4 +++-
 drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c |  3 ++-
 drivers/net/wireless/cw1200/sta.c                     |  3 ++-
 drivers/net/wireless/cw1200/sta.h                     |  3 ++-
 drivers/net/wireless/iwlegacy/common.c                |  3 ++-
 drivers/net/wireless/iwlegacy/common.h                |  3 ++-
 drivers/net/wireless/iwlwifi/dvm/mac80211.c           |  3 ++-
 drivers/net/wireless/mac80211_hwsim.c                 |  4 +++-
 drivers/net/wireless/p54/main.c                       |  3 ++-
 drivers/net/wireless/rt2x00/rt2x00.h                  |  3 ++-
 drivers/net/wireless/rt2x00/rt2x00mac.c               |  3 ++-
 drivers/net/wireless/rtlwifi/core.c                   |  3 ++-
 drivers/net/wireless/ti/wlcore/main.c                 |  3 ++-
 drivers/staging/rtl8821ae/core.c                      | 14 +++-----------
 include/net/mac80211.h                                |  4 +++-
 net/mac80211/driver-ops.h                             |  8 +++++++-
 net/mac80211/util.c                                   |  2 +-
 20 files changed, 48 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c
index 507d9a9ee69a..f92050617ae6 100644
--- a/drivers/net/wireless/ath/ar5523/ar5523.c
+++ b/drivers/net/wireless/ath/ar5523/ar5523.c
@@ -1090,7 +1090,8 @@ static int ar5523_set_rts_threshold(struct ieee80211_hw *hw, u32 value)
 	return ret;
 }
 
-static void ar5523_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void ar5523_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			 u32 queues, bool drop)
 {
 	struct ar5523 *ar = hw->priv;
 
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 511a2f81e7af..d1df99350147 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -3544,7 +3544,8 @@ static int ath10k_set_frag_threshold(struct ieee80211_hw *hw, u32 value)
 	return ret;
 }
 
-static void ath10k_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void ath10k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			 u32 queues, bool drop)
 {
 	struct ath10k *ar = hw->priv;
 	bool skip;
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index d69853b848ce..49265c6a1a7e 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1883,7 +1883,8 @@ static bool ath9k_has_tx_pending(struct ath_softc *sc)
 	return !!npend;
 }
 
-static void ath9k_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void ath9k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			u32 queues, bool drop)
 {
 	struct ath_softc *sc = hw->priv;
 	struct ath_hw *ah = sc->sc_ah;
diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
index 4c8cdb097b65..f8ded84b7be8 100644
--- a/drivers/net/wireless/ath/carl9170/main.c
+++ b/drivers/net/wireless/ath/carl9170/main.c
@@ -1707,7 +1707,9 @@ found:
 	return 0;
 }
 
-static void carl9170_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void carl9170_op_flush(struct ieee80211_hw *hw,
+			      struct ieee80211_vif *vif,
+			      u32 queues, bool drop)
 {
 	struct ar9170 *ar = hw->priv;
 	unsigned int vid;
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
index 8c5fa4e58139..43c71bfaa474 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
@@ -897,7 +897,8 @@ static bool brcms_tx_flush_completed(struct brcms_info *wl)
 	return result;
 }
 
-static void brcms_ops_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void brcms_ops_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			    u32 queues, bool drop)
 {
 	struct brcms_info *wl = hw->priv;
 	int ret;
diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c
index 103f7bce8932..cd0cad7f7759 100644
--- a/drivers/net/wireless/cw1200/sta.c
+++ b/drivers/net/wireless/cw1200/sta.c
@@ -936,7 +936,8 @@ static int __cw1200_flush(struct cw1200_common *priv, bool drop)
 	return ret;
 }
 
-void cw1200_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+void cw1200_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		  u32 queues, bool drop)
 {
 	struct cw1200_common *priv = hw->priv;
 
diff --git a/drivers/net/wireless/cw1200/sta.h b/drivers/net/wireless/cw1200/sta.h
index 35babb62cc6a..b7e386b7662b 100644
--- a/drivers/net/wireless/cw1200/sta.h
+++ b/drivers/net/wireless/cw1200/sta.h
@@ -40,7 +40,8 @@ int cw1200_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd,
 
 int cw1200_set_rts_threshold(struct ieee80211_hw *hw, u32 value);
 
-void cw1200_flush(struct ieee80211_hw *hw, u32 queues, bool drop);
+void cw1200_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		  u32 queues, bool drop);
 
 u64 cw1200_prepare_multicast(struct ieee80211_hw *hw,
 			     struct netdev_hw_addr_list *mc_list);
diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c
index 4f42174d9994..ecc674627e6e 100644
--- a/drivers/net/wireless/iwlegacy/common.c
+++ b/drivers/net/wireless/iwlegacy/common.c
@@ -4755,7 +4755,8 @@ out:
 }
 EXPORT_SYMBOL(il_mac_change_interface);
 
-void il_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+void il_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		  u32 queues, bool drop)
 {
 	struct il_priv *il = hw->priv;
 	unsigned long timeout = jiffies + msecs_to_jiffies(500);
diff --git a/drivers/net/wireless/iwlegacy/common.h b/drivers/net/wireless/iwlegacy/common.h
index dfb13c70efe8..ea5c0f863c4e 100644
--- a/drivers/net/wireless/iwlegacy/common.h
+++ b/drivers/net/wireless/iwlegacy/common.h
@@ -1723,7 +1723,8 @@ void il_mac_remove_interface(struct ieee80211_hw *hw,
 			     struct ieee80211_vif *vif);
 int il_mac_change_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			    enum nl80211_iftype newtype, bool newp2p);
-void il_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop);
+void il_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		  u32 queues, bool drop);
 int il_alloc_txq_mem(struct il_priv *il);
 void il_free_txq_mem(struct il_priv *il);
 
diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
index dd55c9cf7ba8..d4fae9fb2ddb 100644
--- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
@@ -1091,7 +1091,8 @@ static void iwlagn_configure_filter(struct ieee80211_hw *hw,
 			FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL;
 }
 
-static void iwlagn_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void iwlagn_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			     u32 queues, bool drop)
 {
 	struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw);
 
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 9d7a52f5a410..31c7a1c9ef5f 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1676,7 +1676,9 @@ static int mac80211_hwsim_ampdu_action(struct ieee80211_hw *hw,
 	return 0;
 }
 
-static void mac80211_hwsim_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void mac80211_hwsim_flush(struct ieee80211_hw *hw,
+				 struct ieee80211_vif *vif,
+				 u32 queues, bool drop)
 {
 	/* Not implemented, queues only on kernel side */
 }
diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c
index eede90b63f84..7be3a4839640 100644
--- a/drivers/net/wireless/p54/main.c
+++ b/drivers/net/wireless/p54/main.c
@@ -669,7 +669,8 @@ static unsigned int p54_flush_count(struct p54_common *priv)
 	return total;
 }
 
-static void p54_flush(struct ieee80211_hw *dev, u32 queues, bool drop)
+static void p54_flush(struct ieee80211_hw *dev, struct ieee80211_vif *vif,
+		      u32 queues, bool drop)
 {
 	struct p54_common *priv = dev->priv;
 	unsigned int total, i;
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index e3b885d8f7db..010b76505243 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -1448,7 +1448,8 @@ int rt2x00mac_conf_tx(struct ieee80211_hw *hw,
 		      struct ieee80211_vif *vif, u16 queue,
 		      const struct ieee80211_tx_queue_params *params);
 void rt2x00mac_rfkill_poll(struct ieee80211_hw *hw);
-void rt2x00mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop);
+void rt2x00mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		     u32 queues, bool drop);
 int rt2x00mac_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant);
 int rt2x00mac_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant);
 void rt2x00mac_get_ringparam(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index ddeb5a709aa3..30a2367ba8d6 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -747,7 +747,8 @@ void rt2x00mac_rfkill_poll(struct ieee80211_hw *hw)
 }
 EXPORT_SYMBOL_GPL(rt2x00mac_rfkill_poll);
 
-void rt2x00mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+void rt2x00mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		     u32 queues, bool drop)
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
 	struct data_queue *queue;
diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c
index 4ec424f26672..b1ed6d0796f6 100644
--- a/drivers/net/wireless/rtlwifi/core.c
+++ b/drivers/net/wireless/rtlwifi/core.c
@@ -1387,7 +1387,8 @@ static void rtl_op_rfkill_poll(struct ieee80211_hw *hw)
  * before switch channel or power save, or tx buffer packet
  * maybe send after offchannel or rf sleep, this may cause
  * dis-association by AP */
-static void rtl_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void rtl_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			 u32 queues, bool drop)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index ed88d3913483..077eb5b9cd74 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5184,7 +5184,8 @@ out:
 	mutex_unlock(&wl->mutex);
 }
 
-static void wlcore_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void wlcore_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			    u32 queues, bool drop)
 {
 	struct wl1271 *wl = hw->priv;
 
diff --git a/drivers/staging/rtl8821ae/core.c b/drivers/staging/rtl8821ae/core.c
index ff3139b6da65..63ae2d1997d3 100644
--- a/drivers/staging/rtl8821ae/core.c
+++ b/drivers/staging/rtl8821ae/core.c
@@ -1414,23 +1414,15 @@ static void rtl_op_rfkill_poll(struct ieee80211_hw *hw)
  * before switch channel or power save, or tx buffer packet
  * maybe send after offchannel or rf sleep, this may cause
  * dis-association by AP */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
-static void rtl_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void rtl_op_flush(struct ieee80211_hw *hw,
+			 struct ieee80211_vif *vif,
+			 u32 queues, bool drop)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 
 	if (rtlpriv->intf_ops->flush)
 		rtlpriv->intf_ops->flush(hw, queues, drop);
 }
-#else
-static void rtl_op_flush(struct ieee80211_hw *hw, bool drop)
-{
-	struct rtl_priv *rtlpriv = rtl_priv(hw);
-
-	if (rtlpriv->intf_ops->flush)
-		rtlpriv->intf_ops->flush(hw, drop);
-}
-#endif
 
 const struct ieee80211_ops rtl_ops = {
 	.start = rtl_op_start,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 8248e3909fdf..faa7b9cf9cc7 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2609,6 +2609,7 @@ enum ieee80211_roc_type {
  *	of queues to flush, which is useful if different virtual interfaces
  *	use different hardware queues; it may also indicate all queues.
  *	If the parameter @drop is set to %true, pending frames may be dropped.
+ *	Note that vif can be NULL.
  *	The callback can sleep.
  *
  * @channel_switch: Drivers that need (or want) to offload the channel
@@ -2871,7 +2872,8 @@ struct ieee80211_ops {
 			     struct netlink_callback *cb,
 			     void *data, int len);
 #endif
-	void (*flush)(struct ieee80211_hw *hw, u32 queues, bool drop);
+	void (*flush)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		      u32 queues, bool drop);
 	void (*channel_switch)(struct ieee80211_hw *hw,
 			       struct ieee80211_channel_switch *ch_switch);
 	int (*set_antenna)(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index fc689f5d971e..5331582a2c81 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -726,13 +726,19 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local)
 }
 
 static inline void drv_flush(struct ieee80211_local *local,
+			     struct ieee80211_sub_if_data *sdata,
 			     u32 queues, bool drop)
 {
+	struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL;
+
 	might_sleep();
 
+	if (sdata)
+		check_sdata_in_driver(sdata);
+
 	trace_drv_flush(local, queues, drop);
 	if (local->ops->flush)
-		local->ops->flush(&local->hw, queues, drop);
+		local->ops->flush(&local->hw, vif, queues, drop);
 	trace_drv_return_void(local);
 }
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 275c94f995f7..5cf62ec74c14 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -554,7 +554,7 @@ void ieee80211_flush_queues(struct ieee80211_local *local,
 	ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_FLUSH);
 
-	drv_flush(local, queues, false);
+	drv_flush(local, sdata, queues, false);
 
 	ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_FLUSH);
-- 
cgit v1.2.3-71-gd317


From 570dbde137d4604e4e682a5855b4425233344c19 Mon Sep 17 00:00:00 2001
From: David Spinadel <david.spinadel@intel.com>
Date: Sun, 23 Feb 2014 09:12:59 +0200
Subject: cfg80211: Add indoor only and GO concurrent channel attributes

The FCC are clarifying some soft configuration requirements,
which among other include the following:

1. Indoor operation, where a device can use channels requiring indoor
   operation, subject to that it can guarantee indoor operation,
   i.e., the device is connected to AC Power or the device is under
   the control of a local master that is acting as an AP and is
   connected to AC Power.
2. Concurrent GO operation, where devices may instantiate a P2P GO
   while they are under the guidance of an authorized master. For example,
   on a channel on which a BSS is connected to an authorized master, i.e.,
   with DFS and radar detection capability in the UNII band.

See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122

Add support for advertising Indoor-only and GO-Concurrent channel
properties.

Signed-off-by: David Spinadel <david.spinadel@intel.com>
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  5 +++++
 include/uapi/linux/nl80211.h | 23 +++++++++++++++++++++++
 net/wireless/nl80211.c       |  6 ++++++
 net/wireless/reg.c           |  2 ++
 4 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6510ccf53a54..14d8d3417735 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -109,6 +109,9 @@ enum ieee80211_band {
  *	channel as the control or any of the secondary channels.
  *	This may be due to the driver or due to regulatory bandwidth
  *	restrictions.
+ * @IEEE80211_CHAN_INDOOR_ONLY: see %NL80211_FREQUENCY_ATTR_INDOOR_ONLY
+ * @IEEE80211_CHAN_GO_CONCURRENT: see %NL80211_FREQUENCY_ATTR_GO_CONCURRENT
+ *
  */
 enum ieee80211_channel_flags {
 	IEEE80211_CHAN_DISABLED		= 1<<0,
@@ -120,6 +123,8 @@ enum ieee80211_channel_flags {
 	IEEE80211_CHAN_NO_OFDM		= 1<<6,
 	IEEE80211_CHAN_NO_80MHZ		= 1<<7,
 	IEEE80211_CHAN_NO_160MHZ	= 1<<8,
+	IEEE80211_CHAN_INDOOR_ONLY	= 1<<9,
+	IEEE80211_CHAN_GO_CONCURRENT	= 1<<10,
 };
 
 #define IEEE80211_CHAN_NO_HT40 \
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 5e405fd55a71..ac5b2d25f0fc 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2342,9 +2342,30 @@ enum nl80211_band_attr {
  *	using this channel as the primary or any of the secondary channels
  *	isn't possible
  * @NL80211_FREQUENCY_ATTR_DFS_CAC_TIME: DFS CAC time in milliseconds.
+ * @NL80211_FREQUENCY_ATTR_INDOOR_ONLY: Only indoor use is permitted on this
+ *	channel. A channel that has the INDOOR_ONLY attribute can only be
+ *	used when there is a clear assessment that the device is operating in
+ *	an indoor surroundings, i.e., it is connected to AC power (and not
+ *	through portable DC inverters) or is under the control of a master
+ *	that is acting as an AP and is connected to AC power.
+ * @NL80211_FREQUENCY_ATTR_GO_CONCURRENT: GO operation is allowed on this
+ *	channel if it's connected concurrently to a BSS on the same channel on
+ *	the 2 GHz band or to a channel in the same UNII band (on the 5 GHz
+ *	band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO on a
+ *	channel that has the GO_CONCURRENT attribute set can be done when there
+ *	is a clear assessment that the device is operating under the guidance of
+ *	an authorized master, i.e., setting up a GO while the device is also
+ *	connected to an AP with DFS and radar detection on the UNII band (it is
+ *	up to user-space, i.e., wpa_supplicant to perform the required
+ *	verifications)
  * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
  *	currently defined
  * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
+ *
+ * See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122
+ * for more information on the FCC description of the relaxations allowed
+ * by NL80211_FREQUENCY_ATTR_INDOOR_ONLY and
+ * NL80211_FREQUENCY_ATTR_GO_CONCURRENT.
  */
 enum nl80211_frequency_attr {
 	__NL80211_FREQUENCY_ATTR_INVALID,
@@ -2361,6 +2382,8 @@ enum nl80211_frequency_attr {
 	NL80211_FREQUENCY_ATTR_NO_80MHZ,
 	NL80211_FREQUENCY_ATTR_NO_160MHZ,
 	NL80211_FREQUENCY_ATTR_DFS_CAC_TIME,
+	NL80211_FREQUENCY_ATTR_INDOOR_ONLY,
+	NL80211_FREQUENCY_ATTR_GO_CONCURRENT,
 
 	/* keep last */
 	__NL80211_FREQUENCY_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b25b5ce4076d..c5ead18ad3ab 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -614,6 +614,12 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
 		if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) &&
 		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ))
 			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY))
+			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT))
+			goto nla_put_failure;
 	}
 
 	if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index e0a746d19061..1e9dc1cba335 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -873,6 +873,8 @@ static u32 map_regdom_flags(u32 rd_flags)
 		channel_flags |= IEEE80211_CHAN_RADAR;
 	if (rd_flags & NL80211_RRF_NO_OFDM)
 		channel_flags |= IEEE80211_CHAN_NO_OFDM;
+	if (rd_flags & NL80211_RRF_NO_OUTDOOR)
+		channel_flags |= IEEE80211_CHAN_INDOOR_ONLY;
 	return channel_flags;
 }
 
-- 
cgit v1.2.3-71-gd317


From 174e0cd28af0fe3c6c634c3e4d9e042c683bd7f7 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Sun, 23 Feb 2014 09:13:01 +0200
Subject: cfg80211: Enable GO operation on additional channels

Allow GO operation on a channel marked with IEEE80211_CHAN_GO_CONCURRENT
iff there is an active station interface that is associated to
an AP operating on the same channel in the 2 GHz band or the same UNII band
(in the 5 GHz band). This relaxation is not allowed if the channel is
marked with IEEE80211_CHAN_RADAR.

Note that this is a permissive approach to the FCC definitions,
that require a clear assessment that the device operating the AP is
an authorized master, i.e., with radar detection and DFS capabilities.

It is assumed that such restrictions are enforced by user space.
Furthermore, it is assumed, that if the conditions that allowed for
the operation of the GO on such a channel change, i.e., the station
interface disconnected from the AP, it is the responsibility of user
space to evacuate the GO from the channel.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h   |  4 ++-
 include/net/regulatory.h |  6 ++++
 net/mac80211/ibss.c      |  9 ++++--
 net/wireless/Kconfig     | 24 +++++++++++++++
 net/wireless/chan.c      | 76 ++++++++++++++++++++++++++++++++++++++++++++++--
 net/wireless/mesh.c      |  3 +-
 net/wireless/nl80211.c   | 11 ++++---
 net/wireless/reg.c       | 29 ++++++++++++++++++
 net/wireless/reg.h       | 12 ++++++++
 net/wireless/trace.h     | 11 ++++---
 10 files changed, 169 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 14d8d3417735..5640dc028bfa 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4539,12 +4539,14 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
  * cfg80211_reg_can_beacon - check if beaconing is allowed
  * @wiphy: the wiphy
  * @chandef: the channel definition
+ * @iftype: interface type
  *
  * Return: %true if there is no secondary channel or the secondary channel(s)
  * can be used for beaconing (i.e. is not a radar channel etc.)
  */
 bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
-			     struct cfg80211_chan_def *chandef);
+			     struct cfg80211_chan_def *chandef,
+			     enum nl80211_iftype iftype);
 
 /*
  * cfg80211_ch_switch_notify - update wdev channel and notify userspace
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index 75fc1f5a948d..259992444e80 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -131,6 +131,11 @@ struct regulatory_request {
  * 	all country IE information processed by the regulatory core. This will
  * 	override %REGULATORY_COUNTRY_IE_FOLLOW_POWER as all country IEs will
  * 	be ignored.
+ * @REGULATORY_ENABLE_RELAX_NO_IR: for devices that wish to allow the
+ *      NO_IR relaxation, which enables transmissions on channels on which
+ *      otherwise initiating radiation is not allowed. This will enable the
+ *      relaxations enabled under the CFG80211_REG_RELAX_NO_IR configuration
+ *      option
  */
 enum ieee80211_regulatory_flags {
 	REGULATORY_CUSTOM_REG			= BIT(0),
@@ -138,6 +143,7 @@ enum ieee80211_regulatory_flags {
 	REGULATORY_DISABLE_BEACON_HINTS		= BIT(2),
 	REGULATORY_COUNTRY_IE_FOLLOW_POWER	= BIT(3),
 	REGULATORY_COUNTRY_IE_IGNORE		= BIT(4),
+	REGULATORY_ENABLE_RELAX_NO_IR           = BIT(5),
 };
 
 struct ieee80211_freq_range {
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 35e4f94d7ba1..741445e498b0 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -262,7 +262,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	/* make a copy of the chandef, it could be modified below. */
 	chandef = *req_chandef;
 	chan = chandef.chan;
-	if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) {
+	if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef,
+				     NL80211_IFTYPE_ADHOC)) {
 		if (chandef.width == NL80211_CHAN_WIDTH_5 ||
 		    chandef.width == NL80211_CHAN_WIDTH_10 ||
 		    chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
@@ -274,7 +275,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 		chandef.width = NL80211_CHAN_WIDTH_20;
 		chandef.center_freq1 = chan->center_freq;
 		/* check again for downgraded chandef */
-		if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) {
+		if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef,
+					     NL80211_IFTYPE_ADHOC)) {
 			sdata_info(sdata,
 				   "Failed to join IBSS, beacons forbidden\n");
 			return;
@@ -861,7 +863,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 		goto disconnect;
 	}
 
-	if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, &params.chandef)) {
+	if (!cfg80211_reg_can_beacon(sdata->local->hw.wiphy, &params.chandef,
+				     NL80211_IFTYPE_ADHOC)) {
 		sdata_info(sdata,
 			   "IBSS %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
 			   ifibss->bssid,
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 2a891bc6315c..405f3c4cf70c 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -108,6 +108,30 @@ config CFG80211_REG_CELLULAR_HINTS
 	  feature if you have tested and validated this feature on your
 	  systems.
 
+config CFG80211_REG_RELAX_NO_IR
+	bool "cfg80211 support for NO_IR relaxation"
+	depends on CFG80211_CERTIFICATION_ONUS
+	---help---
+	 This option enables support for relaxation of the NO_IR flag for
+	 situations that certain regulatory bodies have provided clarifications
+	 on how relaxation can occur. This feature has an inherent dependency on
+	 userspace features which must have been properly tested and as such is
+	 not enabled by default.
+
+	 A relaxation feature example is allowing the operation of a P2P group
+	 owner (GO) on channels marked with NO_IR if there is an additional BSS
+	 interface which associated to an AP which userspace assumes or confirms
+	 to be an authorized master, i.e., with radar detection support and DFS
+	 capabilities. However, note that in order to not create daisy chain
+	 scenarios, this relaxation is not allowed in cases that the BSS client
+	 is associated to P2P GO and in addition the P2P GO instantiated on
+	 a channel due to this relaxation should not allow connection from
+	 non P2P clients.
+
+	 The regulatory core will apply these relaxations only for drivers that
+	 support this feature by declaring the appropriate channel flags and
+	 capabilities in their registration flow.
+
 config CFG80211_DEFAULT_PS
 	bool "enable powersave by default"
 	depends on CFG80211
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 9c9501a35fb5..50202af7fba3 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -661,15 +661,85 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 }
 EXPORT_SYMBOL(cfg80211_chandef_usable);
 
+/*
+ * For GO only, check if the channel can be used under permissive conditions
+ * mandated by the some regulatory bodies, i.e., the channel is marked with
+ * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface
+ * associated to an AP on the same channel or on the same UNII band
+ * (assuming that the AP is an authorized master).
+ */
+static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev,
+					struct ieee80211_channel *chan)
+{
+	struct wireless_dev *wdev_iter;
+	struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx);
+
+	ASSERT_RTNL();
+
+	if (!config_enabled(CONFIG_CFG80211_REG_RELAX_NO_IR) ||
+	    !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR) ||
+	    !(chan->flags & IEEE80211_CHAN_GO_CONCURRENT))
+		return false;
+
+	/*
+	 * Generally, it is possible to rely on another device/driver to allow
+	 * the GO concurrent relaxation, however, since the device can further
+	 * enforce the relaxation (by doing a similar verifications as this),
+	 * and thus fail the GO instantiation, consider only the interfaces of
+	 * the current registered device.
+	 */
+	list_for_each_entry(wdev_iter, &rdev->wdev_list, list) {
+		struct ieee80211_channel *other_chan = NULL;
+		int r1, r2;
+
+		if (wdev_iter->iftype != NL80211_IFTYPE_STATION ||
+		    !netif_running(wdev_iter->netdev))
+			continue;
+
+		wdev_lock(wdev_iter);
+		if (wdev_iter->current_bss)
+			other_chan = wdev_iter->current_bss->pub.channel;
+		wdev_unlock(wdev_iter);
+
+		if (!other_chan)
+			continue;
+
+		if (chan == other_chan)
+			return true;
+
+		if (chan->band != IEEE80211_BAND_5GHZ)
+			continue;
+
+		r1 = cfg80211_get_unii(chan->center_freq);
+		r2 = cfg80211_get_unii(other_chan->center_freq);
+
+		if (r1 != -EINVAL && r1 == r2)
+			return true;
+	}
+
+	return false;
+}
+
 bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
-			     struct cfg80211_chan_def *chandef)
+			     struct cfg80211_chan_def *chandef,
+			     enum nl80211_iftype iftype)
 {
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	bool res;
 	u32 prohibited_flags = IEEE80211_CHAN_DISABLED |
-			       IEEE80211_CHAN_NO_IR |
 			       IEEE80211_CHAN_RADAR;
 
-	trace_cfg80211_reg_can_beacon(wiphy, chandef);
+	trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype);
+
+	/*
+	 * Under certain conditions suggested by the some regulatory bodies
+	 * a GO can operate on channels marked with IEEE80211_NO_IR
+	 * so set this flag only if such relaxations are not enabled and
+	 * the conditions are not met.
+	 */
+	if (iftype != NL80211_IFTYPE_P2P_GO ||
+	    !cfg80211_go_permissive_chan(rdev, chandef->chan))
+		prohibited_flags |= IEEE80211_CHAN_NO_IR;
 
 	if (cfg80211_chandef_dfs_required(wiphy, chandef) > 0 &&
 	    cfg80211_chandef_dfs_available(wiphy, chandef)) {
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 5af5cc6b2c4c..7031ee0afad8 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -175,7 +175,8 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 							       scan_width);
 	}
 
-	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef))
+	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef,
+				     NL80211_IFTYPE_MESH_POINT))
 		return -EINVAL;
 
 	err = cfg80211_chandef_dfs_required(wdev->wiphy, &setup->chandef);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c5ead18ad3ab..b8d81e41b0f7 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1939,7 +1939,7 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
 			result = -EBUSY;
 			break;
 		}
-		if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef)) {
+		if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, iftype)) {
 			result = -EINVAL;
 			break;
 		}
@@ -3271,7 +3271,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 	} else if (!nl80211_get_ap_channel(rdev, &params))
 		return -EINVAL;
 
-	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef))
+	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef,
+				     wdev->iftype))
 		return -EINVAL;
 
 	err = cfg80211_chandef_dfs_required(wdev->wiphy, &params.chandef);
@@ -5941,7 +5942,8 @@ skip_beacons:
 	if (err)
 		return err;
 
-	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef))
+	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef,
+				     wdev->iftype))
 		return -EINVAL;
 
 	switch (dev->ieee80211_ptr->iftype) {
@@ -6717,7 +6719,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		return err;
 
-	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef))
+	if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef,
+				     NL80211_IFTYPE_ADHOC))
 		return -EINVAL;
 
 	switch (ibss.chandef.width) {
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2b8c1000c1be..58f48b8f42ae 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2616,6 +2616,35 @@ static void reg_timeout_work(struct work_struct *work)
 	rtnl_unlock();
 }
 
+/*
+ * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for
+ * UNII band definitions
+ */
+int cfg80211_get_unii(int freq)
+{
+	/* UNII-1 */
+	if (freq >= 5150 && freq <= 5250)
+		return 0;
+
+	/* UNII-2A */
+	if (freq > 5250 && freq <= 5350)
+		return 1;
+
+	/* UNII-2B */
+	if (freq > 5350 && freq <= 5470)
+		return 2;
+
+	/* UNII-2C */
+	if (freq > 5470 && freq <= 5725)
+		return 3;
+
+	/* UNII-3 */
+	if (freq > 5725 && freq <= 5825)
+		return 4;
+
+	return -EINVAL;
+}
+
 int __init regulatory_init(void)
 {
 	int err = 0;
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 37c180df34b7..334a53af0fc8 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -104,4 +104,16 @@ void regulatory_hint_country_ie(struct wiphy *wiphy,
  */
 void regulatory_hint_disconnect(void);
 
+/**
+ * cfg80211_get_unii - get the U-NII band for the frequency
+ * @freq: the frequency for which we want to get the UNII band.
+
+ * Get a value specifying the U-NII band frequency belongs to.
+ * U-NII bands are defined by the FCC in C.F.R 47 part 15.
+ *
+ * Returns -EINVAL if freq is invalid, 0 for UNII-1, 1 for UNII-2A,
+ * 2 for UNII-2B, 3 for UNII-2C and 4 for UNII-3.
+ */
+int cfg80211_get_unii(int freq);
+
 #endif  /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index aabccf13e07b..47b499f8f54d 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2193,18 +2193,21 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify,
 );
 
 TRACE_EVENT(cfg80211_reg_can_beacon,
-	TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef),
-	TP_ARGS(wiphy, chandef),
+	TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef,
+		 enum nl80211_iftype iftype),
+	TP_ARGS(wiphy, chandef, iftype),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		CHAN_DEF_ENTRY
+		__field(enum nl80211_iftype, iftype)
 	),
 	TP_fast_assign(
 		WIPHY_ASSIGN;
 		CHAN_DEF_ASSIGN(chandef);
+		__entry->iftype = iftype;
 	),
-	TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT,
-		  WIPHY_PR_ARG, CHAN_DEF_PR_ARG)
+	TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d",
+		  WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype)
 );
 
 TRACE_EVENT(cfg80211_chandef_dfs_required,
-- 
cgit v1.2.3-71-gd317


From 52616f2b446eaad8eb2cd78bbd052f0066069757 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Tue, 25 Feb 2014 16:26:00 +0200
Subject: cfg80211: Add an option to hint indoor operation

Add the option to hint the wireless core that it is operating in an indoor
environment.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  3 +++
 net/wireless/nl80211.c       | 18 ++++++--------
 net/wireless/reg.c           | 58 +++++++++++++++++++++++++++++++++++++++++++-
 net/wireless/reg.h           |  1 +
 4 files changed, 68 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ac5b2d25f0fc..513bfd7b2e5f 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2602,10 +2602,13 @@ enum nl80211_dfs_regions {
  *	present has been registered with the wireless core that
  *	has listed NL80211_FEATURE_CELL_BASE_REG_HINTS as a
  *	supported feature.
+ * @NL80211_USER_REG_HINT_INDOOR: a user sent an hint indicating that the
+ *	platform is operating in an indoor environment.
  */
 enum nl80211_user_reg_hint_type {
 	NL80211_USER_REG_HINT_USER	= 0,
 	NL80211_USER_REG_HINT_CELL_BASE = 1,
+	NL80211_USER_REG_HINT_INDOOR    = 2,
 };
 
 /**
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b8d81e41b0f7..85bc830fd7e3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4677,7 +4677,6 @@ static int parse_reg_rule(struct nlattr *tb[],
 
 static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 {
-	int r;
 	char *data = NULL;
 	enum nl80211_user_reg_hint_type user_reg_hint_type;
 
@@ -4690,11 +4689,6 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 	if (unlikely(!rcu_access_pointer(cfg80211_regdomain)))
 		return -EINPROGRESS;
 
-	if (!info->attrs[NL80211_ATTR_REG_ALPHA2])
-		return -EINVAL;
-
-	data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]);
-
 	if (info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE])
 		user_reg_hint_type =
 		  nla_get_u32(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE]);
@@ -4704,14 +4698,16 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 	switch (user_reg_hint_type) {
 	case NL80211_USER_REG_HINT_USER:
 	case NL80211_USER_REG_HINT_CELL_BASE:
-		break;
+		if (!info->attrs[NL80211_ATTR_REG_ALPHA2])
+			return -EINVAL;
+
+		data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]);
+		return regulatory_hint_user(data, user_reg_hint_type);
+	case NL80211_USER_REG_HINT_INDOOR:
+		return regulatory_hint_indoor_user();
 	default:
 		return -EINVAL;
 	}
-
-	r = regulatory_hint_user(data, user_reg_hint_type);
-
-	return r;
 }
 
 static int nl80211_get_mesh_config(struct sk_buff *skb,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 58f48b8f42ae..55d68c31ad72 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -65,11 +65,26 @@
 #define REG_DBG_PRINT(args...)
 #endif
 
+/**
+ * enum reg_request_treatment - regulatory request treatment
+ *
+ * @REG_REQ_OK: continue processing the regulatory request
+ * @REG_REQ_IGNORE: ignore the regulatory request
+ * @REG_REQ_INTERSECT: the regulatory domain resulting from this request should
+ *	be intersected with the current one.
+ * @REG_REQ_ALREADY_SET: the regulatory request will not change the current
+ *	regulatory settings, and no further processing is required.
+ * @REG_REQ_USER_HINT_HANDLED: a non alpha2  user hint was handled and no
+ *	further processing is required, i.e., not need to update last_request
+ *	etc. This should be used for user hints that do not provide an alpha2
+ *	but some other type of regulatory hint, i.e., indoor operation.
+ */
 enum reg_request_treatment {
 	REG_REQ_OK,
 	REG_REQ_IGNORE,
 	REG_REQ_INTERSECT,
 	REG_REQ_ALREADY_SET,
+	REG_REQ_USER_HINT_HANDLED,
 };
 
 static struct regulatory_request core_request_world = {
@@ -106,6 +121,14 @@ const struct ieee80211_regdomain __rcu *cfg80211_regdomain;
  */
 static int reg_num_devs_support_basehint;
 
+/*
+ * State variable indicating if the platform on which the devices
+ * are attached is operating in an indoor environment. The state variable
+ * is relevant for all registered devices.
+ * (protected by RTNL)
+ */
+static bool reg_is_indoor;
+
 static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
 {
 	return rtnl_dereference(cfg80211_regdomain);
@@ -1128,6 +1151,13 @@ static bool reg_request_cell_base(struct regulatory_request *request)
 	return request->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE;
 }
 
+static bool reg_request_indoor(struct regulatory_request *request)
+{
+	if (request->initiator != NL80211_REGDOM_SET_BY_USER)
+		return false;
+	return request->user_reg_hint_type == NL80211_USER_REG_HINT_INDOOR;
+}
+
 bool reg_last_request_cell_base(void)
 {
 	return reg_request_cell_base(get_last_request());
@@ -1570,6 +1600,11 @@ __reg_process_hint_user(struct regulatory_request *user_request)
 {
 	struct regulatory_request *lr = get_last_request();
 
+	if (reg_request_indoor(user_request)) {
+		reg_is_indoor = true;
+		return REG_REQ_USER_HINT_HANDLED;
+	}
+
 	if (reg_request_cell_base(user_request))
 		return reg_ignore_cell_hint(user_request);
 
@@ -1617,7 +1652,8 @@ reg_process_hint_user(struct regulatory_request *user_request)
 
 	treatment = __reg_process_hint_user(user_request);
 	if (treatment == REG_REQ_IGNORE ||
-	    treatment == REG_REQ_ALREADY_SET) {
+	    treatment == REG_REQ_ALREADY_SET ||
+	    treatment == REG_REQ_USER_HINT_HANDLED) {
 		kfree(user_request);
 		return treatment;
 	}
@@ -1678,6 +1714,7 @@ reg_process_hint_driver(struct wiphy *wiphy,
 	case REG_REQ_OK:
 		break;
 	case REG_REQ_IGNORE:
+	case REG_REQ_USER_HINT_HANDLED:
 		kfree(driver_request);
 		return treatment;
 	case REG_REQ_INTERSECT:
@@ -1777,6 +1814,7 @@ reg_process_hint_country_ie(struct wiphy *wiphy,
 	case REG_REQ_OK:
 		break;
 	case REG_REQ_IGNORE:
+	case REG_REQ_USER_HINT_HANDLED:
 		/* fall through */
 	case REG_REQ_ALREADY_SET:
 		kfree(country_ie_request);
@@ -1969,6 +2007,22 @@ int regulatory_hint_user(const char *alpha2,
 	return 0;
 }
 
+int regulatory_hint_indoor_user(void)
+{
+	struct regulatory_request *request;
+
+	request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
+	if (!request)
+		return -ENOMEM;
+
+	request->wiphy_idx = WIPHY_IDX_INVALID;
+	request->initiator = NL80211_REGDOM_SET_BY_USER;
+	request->user_reg_hint_type = NL80211_USER_REG_HINT_INDOOR;
+	queue_regulatory_request(request);
+
+	return 0;
+}
+
 /* Driver hints */
 int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
 {
@@ -2136,6 +2190,8 @@ static void restore_regulatory_settings(bool reset_user)
 
 	ASSERT_RTNL();
 
+	reg_is_indoor = false;
+
 	reset_regdomains(true, &world_regdom);
 	restore_alpha2(alpha2, reset_user);
 
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 334a53af0fc8..2a3842828f6d 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -25,6 +25,7 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy);
 
 int regulatory_hint_user(const char *alpha2,
 			 enum nl80211_user_reg_hint_type user_reg_hint_type);
+int regulatory_hint_indoor_user(void);
 
 void wiphy_regulatory_register(struct wiphy *wiphy);
 void wiphy_regulatory_deregister(struct wiphy *wiphy);
-- 
cgit v1.2.3-71-gd317


From cb2d956dd329caa11b5ece454dc52253aa038e73 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Mon, 17 Feb 2014 16:52:35 +0200
Subject: cfg80211: refactor cfg80211_can_use_iftype_chan()

Separate the code that counts the interface types and channels from
the code that check the interface combinations.  The new function that
checks for combinations is exported so it can be called by the
drivers.

This is done in preparation for moving the interface combinations
checks out of cfg80211.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 Documentation/DocBook/80211.tmpl |   1 +
 include/net/cfg80211.h           |  22 +++++++
 net/wireless/util.c              | 129 ++++++++++++++++++++++-----------------
 3 files changed, 96 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl
index 044b76436e83..d9b9416c989f 100644
--- a/Documentation/DocBook/80211.tmpl
+++ b/Documentation/DocBook/80211.tmpl
@@ -100,6 +100,7 @@
 !Finclude/net/cfg80211.h wdev_priv
 !Finclude/net/cfg80211.h ieee80211_iface_limit
 !Finclude/net/cfg80211.h ieee80211_iface_combination
+!Finclude/net/cfg80211.h cfg80211_check_combinations
       </chapter>
       <chapter>
       <title>Actions and configuration</title>
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5640dc028bfa..4653e9f75d0d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4692,6 +4692,28 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp);
  */
 unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy);
 
+/**
+ * cfg80211_check_combinations - check interface combinations
+ *
+ * @wiphy: the wiphy
+ * @num_different_channels: the number of different channels we want
+ *	to use for verification
+ * @radar_detect: a bitmap where each bit corresponds to a channel
+ *	width where radar detection is needed, as in the definition of
+ *	&struct ieee80211_iface_combination.@radar_detect_widths
+ * @iftype_num: array with the numbers of interfaces of each interface
+ *	type.  The index is the interface type as specified in &enum
+ *	nl80211_iftype.
+ *
+ * This function can be called by the driver to check whether a
+ * combination of interfaces and their types are allowed according to
+ * the interface combinations.
+ */
+int cfg80211_check_combinations(struct wiphy *wiphy,
+				const int num_different_channels,
+				const u8 radar_detect,
+				const int iftype_num[NUM_NL80211_IFTYPES]);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/wireless/util.c b/net/wireless/util.c
index e5872ff2c27c..46f404df35e3 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1268,6 +1268,76 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
 	return res;
 }
 
+int cfg80211_check_combinations(struct wiphy *wiphy,
+				const int num_different_channels,
+				const u8 radar_detect,
+				const int iftype_num[NUM_NL80211_IFTYPES])
+{
+	int i, j, iftype;
+	int num_interfaces = 0;
+	u32 used_iftypes = 0;
+
+	for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
+		num_interfaces += iftype_num[iftype];
+		if (iftype_num[iftype] > 0 &&
+		    !(wiphy->software_iftypes & BIT(iftype)))
+			used_iftypes |= BIT(iftype);
+	}
+
+	for (i = 0; i < wiphy->n_iface_combinations; i++) {
+		const struct ieee80211_iface_combination *c;
+		struct ieee80211_iface_limit *limits;
+		u32 all_iftypes = 0;
+
+		c = &wiphy->iface_combinations[i];
+
+		if (num_interfaces > c->max_interfaces)
+			continue;
+		if (num_different_channels > c->num_different_channels)
+			continue;
+
+		limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
+				 GFP_KERNEL);
+		if (!limits)
+			return -ENOMEM;
+
+		for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
+			if (wiphy->software_iftypes & BIT(iftype))
+				continue;
+			for (j = 0; j < c->n_limits; j++) {
+				all_iftypes |= limits[j].types;
+				if (!(limits[j].types & BIT(iftype)))
+					continue;
+				if (limits[j].max < iftype_num[iftype])
+					goto cont;
+				limits[j].max -= iftype_num[iftype];
+			}
+		}
+
+		if (radar_detect && !(c->radar_detect_widths & radar_detect))
+			goto cont;
+
+		/* Finally check that all iftypes that we're currently
+		 * using are actually part of this combination. If they
+		 * aren't then we can't use this combination and have
+		 * to continue to the next.
+		 */
+		if ((all_iftypes & used_iftypes) != used_iftypes)
+			goto cont;
+
+		/* This combination covered all interface types and
+		 * supported the requested numbers, so we're good.
+		 */
+		kfree(limits);
+		return 0;
+ cont:
+		kfree(limits);
+	}
+
+	return -EBUSY;
+}
+EXPORT_SYMBOL(cfg80211_check_combinations);
+
 int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 				 struct wireless_dev *wdev,
 				 enum nl80211_iftype iftype,
@@ -1276,7 +1346,6 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 				 u8 radar_detect)
 {
 	struct wireless_dev *wdev_iter;
-	u32 used_iftypes = BIT(iftype);
 	int num[NUM_NL80211_IFTYPES];
 	struct ieee80211_channel
 			*used_channels[CFG80211_MAX_NUM_DIFFERENT_CHANNELS];
@@ -1284,7 +1353,7 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 	enum cfg80211_chan_mode chmode;
 	int num_different_channels = 0;
 	int total = 1;
-	int i, j;
+	int i;
 
 	ASSERT_RTNL();
 
@@ -1369,65 +1438,13 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 
 		num[wdev_iter->iftype]++;
 		total++;
-		used_iftypes |= BIT(wdev_iter->iftype);
 	}
 
 	if (total == 1 && !radar_detect)
 		return 0;
 
-	for (i = 0; i < rdev->wiphy.n_iface_combinations; i++) {
-		const struct ieee80211_iface_combination *c;
-		struct ieee80211_iface_limit *limits;
-		u32 all_iftypes = 0;
-
-		c = &rdev->wiphy.iface_combinations[i];
-
-		if (total > c->max_interfaces)
-			continue;
-		if (num_different_channels > c->num_different_channels)
-			continue;
-
-		limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
-				 GFP_KERNEL);
-		if (!limits)
-			return -ENOMEM;
-
-		for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
-			if (rdev->wiphy.software_iftypes & BIT(iftype))
-				continue;
-			for (j = 0; j < c->n_limits; j++) {
-				all_iftypes |= limits[j].types;
-				if (!(limits[j].types & BIT(iftype)))
-					continue;
-				if (limits[j].max < num[iftype])
-					goto cont;
-				limits[j].max -= num[iftype];
-			}
-		}
-
-		if (radar_detect && !(c->radar_detect_widths & radar_detect))
-			goto cont;
-
-		/*
-		 * Finally check that all iftypes that we're currently
-		 * using are actually part of this combination. If they
-		 * aren't then we can't use this combination and have
-		 * to continue to the next.
-		 */
-		if ((all_iftypes & used_iftypes) != used_iftypes)
-			goto cont;
-
-		/*
-		 * This combination covered all interface types and
-		 * supported the requested numbers, so we're good.
-		 */
-		kfree(limits);
-		return 0;
- cont:
-		kfree(limits);
-	}
-
-	return -EBUSY;
+	return cfg80211_check_combinations(&rdev->wiphy, num_different_channels,
+					   radar_detect, num);
 }
 
 int ieee80211_get_ratemask(struct ieee80211_supported_band *sband,
-- 
cgit v1.2.3-71-gd317


From 2beb6dab2d799ee8934cb0801845e551ad8c70f2 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Tue, 18 Feb 2014 11:40:36 +0200
Subject: cfg80211/mac80211: refactor cfg80211_chandef_dfs_required()

Some interface types don't require DFS (such as STATION, P2P_CLIENT
etc).  In order to centralize these decisions, make
cfg80211_chandef_dfs_required() take the iftype into consideration.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h |  7 +++--
 net/mac80211/ibss.c    | 32 +++++++++++-----------
 net/mac80211/mesh.c    |  9 +++---
 net/wireless/chan.c    | 74 ++++++++++++++++++++++++++++++++++++++------------
 net/wireless/mesh.c    |  7 +++--
 net/wireless/nl80211.c | 37 ++++++++++++-------------
 6 files changed, 104 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4653e9f75d0d..92a65c331cf4 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -446,10 +446,13 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
  * cfg80211_chandef_dfs_required - checks if radar detection is required
  * @wiphy: the wiphy to validate against
  * @chandef: the channel definition to check
- * Return: 1 if radar detection is required, 0 if it is not, < 0 on error
+ * @iftype: the interface type as specified in &enum nl80211_iftype
+ * Returns:
+ *	1 if radar detection is required, 0 if it is not, < 0 on error
  */
 int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
-				  const struct cfg80211_chan_def *chandef);
+				  const struct cfg80211_chan_def *chandef,
+				  enum nl80211_iftype);
 
 /**
  * ieee80211_chandef_rate_flags - returns rate flags for a channel
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 741445e498b0..1759bd661e25 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -228,7 +228,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	struct beacon_data *presp;
 	enum nl80211_bss_scan_width scan_width;
 	bool have_higher_than_11mbit;
-	bool radar_required = false;
+	bool radar_required;
 	int err;
 
 	sdata_assert_lock(sdata);
@@ -284,21 +284,20 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	}
 
 	err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
-					    &chandef);
+					    &chandef, NL80211_IFTYPE_ADHOC);
 	if (err < 0) {
 		sdata_info(sdata,
 			   "Failed to join IBSS, invalid chandef\n");
 		return;
 	}
-	if (err > 0) {
-		if (!ifibss->userspace_handles_dfs) {
-			sdata_info(sdata,
-				   "Failed to join IBSS, DFS channel without control program\n");
-			return;
-		}
-		radar_required = true;
+	if (err > 0 && !ifibss->userspace_handles_dfs) {
+		sdata_info(sdata,
+			   "Failed to join IBSS, DFS channel without control program\n");
+		return;
 	}
 
+	radar_required = err;
+
 	mutex_lock(&local->mtx);
 	if (ieee80211_vif_use_channel(sdata, &chandef,
 				      ifibss->fixed_channel ?
@@ -777,7 +776,8 @@ static void ieee80211_ibss_csa_mark_radar(struct ieee80211_sub_if_data *sdata)
 	 * unavailable.
 	 */
 	err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
-					    &ifibss->chandef);
+					    &ifibss->chandef,
+					    NL80211_IFTYPE_ADHOC);
 	if (err > 0)
 		cfg80211_radar_event(sdata->local->hw.wiphy, &ifibss->chandef,
 				     GFP_ATOMIC);
@@ -876,17 +876,17 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	}
 
 	err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
-					    &params.chandef);
+					    &params.chandef,
+					    NL80211_IFTYPE_ADHOC);
 	if (err < 0)
 		goto disconnect;
-	if (err) {
+	if (err > 0 && !ifibss->userspace_handles_dfs) {
 		/* IBSS-DFS only allowed with a control program */
-		if (!ifibss->userspace_handles_dfs)
-			goto disconnect;
-
-		params.radar_required = true;
+		goto disconnect;
 	}
 
+	params.radar_required = err;
+
 	if (cfg80211_chandef_identical(&params.chandef,
 				       &sdata->vif.bss_conf.chandef)) {
 		ibss_dbg(sdata,
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 387f61c4557d..9d292377d3a6 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -903,14 +903,15 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
 	}
 
 	err = cfg80211_chandef_dfs_required(sdata->local->hw.wiphy,
-					    &params.chandef);
+					    &params.chandef,
+					    NL80211_IFTYPE_MESH_POINT);
 	if (err < 0)
 		return false;
-	if (err) {
-		params.radar_required = true;
+	if (err > 0)
 		/* TODO: DFS not (yet) supported */
 		return false;
-	}
+
+	params.radar_required = err;
 
 	if (cfg80211_chandef_identical(&params.chandef,
 				       &sdata->vif.bss_conf.chandef)) {
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index c3180dc03a33..c61bcdd3dfbc 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -326,28 +326,57 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy,
 
 
 int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
-				  const struct cfg80211_chan_def *chandef)
+				  const struct cfg80211_chan_def *chandef,
+				  enum nl80211_iftype iftype)
 {
 	int width;
-	int r;
+	int ret;
 
 	if (WARN_ON(!cfg80211_chandef_valid(chandef)))
 		return -EINVAL;
 
-	width = cfg80211_chandef_get_width(chandef);
-	if (width < 0)
-		return -EINVAL;
+	switch (iftype) {
+	case NL80211_IFTYPE_ADHOC:
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_P2P_GO:
+	case NL80211_IFTYPE_MESH_POINT:
+		width = cfg80211_chandef_get_width(chandef);
+		if (width < 0)
+			return -EINVAL;
 
-	r = cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq1,
-					    width);
-	if (r)
-		return r;
+		ret = cfg80211_get_chans_dfs_required(wiphy,
+						      chandef->center_freq1,
+						      width);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			return BIT(chandef->width);
 
-	if (!chandef->center_freq2)
-		return 0;
+		if (!chandef->center_freq2)
+			return 0;
+
+		ret = cfg80211_get_chans_dfs_required(wiphy,
+						      chandef->center_freq2,
+						      width);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			return BIT(chandef->width);
 
-	return cfg80211_get_chans_dfs_required(wiphy, chandef->center_freq2,
-					       width);
+		break;
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_P2P_CLIENT:
+	case NL80211_IFTYPE_MONITOR:
+	case NL80211_IFTYPE_AP_VLAN:
+	case NL80211_IFTYPE_WDS:
+	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_UNSPECIFIED:
+		break;
+	case NUM_NL80211_IFTYPES:
+		WARN_ON(1);
+	}
+
+	return 0;
 }
 EXPORT_SYMBOL(cfg80211_chandef_dfs_required);
 
@@ -749,7 +778,8 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
 	    !cfg80211_go_permissive_chan(rdev, chandef->chan))
 		prohibited_flags |= IEEE80211_CHAN_NO_IR;
 
-	if (cfg80211_chandef_dfs_required(wiphy, chandef) > 0 &&
+	if (cfg80211_chandef_dfs_required(wiphy, chandef,
+					  NL80211_IFTYPE_UNSPECIFIED) > 0 &&
 	    cfg80211_chandef_dfs_available(wiphy, chandef)) {
 		/* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */
 		prohibited_flags = IEEE80211_CHAN_DISABLED;
@@ -779,6 +809,8 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
 		        enum cfg80211_chan_mode *chanmode,
 		        u8 *radar_detect)
 {
+	int ret;
+
 	*chan = NULL;
 	*chanmode = CHAN_MODE_UNDEFINED;
 
@@ -821,8 +853,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
 			*chan = wdev->chandef.chan;
 			*chanmode = CHAN_MODE_SHARED;
 
-			if (cfg80211_chandef_dfs_required(wdev->wiphy,
-							  &wdev->chandef))
+			ret = cfg80211_chandef_dfs_required(wdev->wiphy,
+							    &wdev->chandef,
+							    wdev->iftype);
+			WARN_ON(ret < 0);
+			if (ret > 0)
 				*radar_detect |= BIT(wdev->chandef.width);
 		}
 		return;
@@ -831,8 +866,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
 			*chan = wdev->chandef.chan;
 			*chanmode = CHAN_MODE_SHARED;
 
-			if (cfg80211_chandef_dfs_required(wdev->wiphy,
-							  &wdev->chandef))
+			ret = cfg80211_chandef_dfs_required(wdev->wiphy,
+							    &wdev->chandef,
+							    wdev->iftype);
+			WARN_ON(ret < 0);
+			if (ret > 0)
 				*radar_detect |= BIT(wdev->chandef.width);
 		}
 		return;
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 7031ee0afad8..6ebe883653a4 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -179,10 +179,13 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 				     NL80211_IFTYPE_MESH_POINT))
 		return -EINVAL;
 
-	err = cfg80211_chandef_dfs_required(wdev->wiphy, &setup->chandef);
+	err = cfg80211_chandef_dfs_required(wdev->wiphy,
+					    &setup->chandef,
+					    NL80211_IFTYPE_MESH_POINT);
 	if (err < 0)
 		return err;
-	if (err)
+
+	if (err > 0)
 		radar_detect_width = BIT(setup->chandef.width);
 
 	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 85bc830fd7e3..4f82b9b71db1 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3275,12 +3275,15 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 				     wdev->iftype))
 		return -EINVAL;
 
-	err = cfg80211_chandef_dfs_required(wdev->wiphy, &params.chandef);
+	err = cfg80211_chandef_dfs_required(wdev->wiphy,
+					    &params.chandef,
+					    NL80211_IFTYPE_AP);
 	if (err < 0)
 		return err;
-	if (err) {
-		radar_detect_width = BIT(params.chandef.width);
+
+	if (err > 0) {
 		params.radar_required = true;
+		radar_detect_width = BIT(params.chandef.width);
 	}
 
 	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
@@ -5806,7 +5809,8 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
 	if (wdev->cac_started)
 		return -EBUSY;
 
-	err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef);
+	err = cfg80211_chandef_dfs_required(wdev->wiphy, &chandef,
+					    NL80211_IFTYPE_UNSPECIFIED);
 	if (err < 0)
 		return err;
 
@@ -5942,22 +5946,15 @@ skip_beacons:
 				     wdev->iftype))
 		return -EINVAL;
 
-	switch (dev->ieee80211_ptr->iftype) {
-	case NL80211_IFTYPE_AP:
-	case NL80211_IFTYPE_P2P_GO:
-	case NL80211_IFTYPE_ADHOC:
-	case NL80211_IFTYPE_MESH_POINT:
-		err = cfg80211_chandef_dfs_required(wdev->wiphy,
-						    &params.chandef);
-		if (err < 0)
-			return err;
-		if (err) {
-			radar_detect_width = BIT(params.chandef.width);
-			params.radar_required = true;
-		}
-		break;
-	default:
-		break;
+	err = cfg80211_chandef_dfs_required(wdev->wiphy,
+					    &params.chandef,
+					    wdev->iftype);
+	if (err < 0)
+		return err;
+
+	if (err > 0) {
+		radar_detect_width = BIT(params.chandef.width);
+		params.radar_required = true;
 	}
 
 	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
-- 
cgit v1.2.3-71-gd317


From 73de86a38962b18edad3205c2358599dd9c83e9f Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Thu, 13 Feb 2014 11:31:59 +0200
Subject: cfg80211/mac80211: move interface counting for combination check to
 mac80211

Move the counting part of the interface combination check from
cfg80211 to mac80211.

This is needed to simplify locking when the driver has to perform a
combination check by itself (eg. with channel-switch).

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h     |  2 --
 net/mac80211/cfg.c         |  2 --
 net/mac80211/chan.c        | 17 +++++++++++
 net/mac80211/ieee80211_i.h |  4 +++
 net/mac80211/util.c        | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/core.h        | 13 ++-------
 net/wireless/ibss.c        |  4 +++
 net/wireless/mesh.c        | 28 ------------------
 net/wireless/mlme.c        | 14 +--------
 net/wireless/nl80211.c     | 29 +++----------------
 net/wireless/util.c        |  5 ++++
 11 files changed, 110 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 92a65c331cf4..fb8afcee62b4 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -662,7 +662,6 @@ struct cfg80211_acl_data {
  * @p2p_opp_ps: P2P opportunistic PS
  * @acl: ACL configuration used by the drivers which has support for
  *	MAC address based access control
- * @radar_required: set if radar detection is required
  */
 struct cfg80211_ap_settings {
 	struct cfg80211_chan_def chandef;
@@ -680,7 +679,6 @@ struct cfg80211_ap_settings {
 	u8 p2p_ctwindow;
 	bool p2p_opp_ps;
 	const struct cfg80211_acl_data *acl;
-	bool radar_required;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 906bc3b05aae..8bf94eaa0456 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -972,7 +972,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 	sdata->needed_rx_chains = sdata->local->rx_chains;
 
 	mutex_lock(&local->mtx);
-	sdata->radar_required = params->radar_required;
 	err = ieee80211_vif_use_channel(sdata, &params->chandef,
 					IEEE80211_CHANCTX_SHARED);
 	mutex_unlock(&local->mtx);
@@ -2930,7 +2929,6 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
 	/* whatever, but channel contexts should not complain about that one */
 	sdata->smps_mode = IEEE80211_SMPS_OFF;
 	sdata->needed_rx_chains = local->rx_chains;
-	sdata->radar_required = true;
 
 	err = ieee80211_vif_use_channel(sdata, chandef,
 					IEEE80211_CHANCTX_SHARED);
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index b297bd3043a8..623b336f3efd 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -519,6 +519,7 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_chanctx *ctx;
+	u8 radar_detect_width = 0;
 	int ret;
 
 	lockdep_assert_held(&local->mtx);
@@ -526,6 +527,22 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
 	WARN_ON(sdata->dev && netif_carrier_ok(sdata->dev));
 
 	mutex_lock(&local->chanctx_mtx);
+
+	ret = cfg80211_chandef_dfs_required(local->hw.wiphy,
+					    chandef,
+					    sdata->wdev.iftype);
+	if (ret < 0)
+		goto out;
+	if (ret > 0)
+		radar_detect_width = BIT(chandef->width);
+
+	sdata->radar_required = ret;
+
+	ret = ieee80211_check_combinations(sdata, chandef, mode,
+					   radar_detect_width);
+	if (ret < 0)
+		goto out;
+
 	__ieee80211_vif_release_channel(sdata);
 
 	ctx = ieee80211_find_chanctx(local, chandef, mode);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 222c28b75315..09213fd87f8d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1805,6 +1805,10 @@ int ieee80211_cs_headroom(struct ieee80211_local *local,
 			  enum nl80211_iftype iftype);
 void ieee80211_recalc_dtim(struct ieee80211_local *local,
 			   struct ieee80211_sub_if_data *sdata);
+int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
+				 const struct cfg80211_chan_def *chandef,
+				 enum ieee80211_chanctx_mode chanmode,
+				 u8 radar_detect);
 
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 73af7398850b..436f98870066 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2797,3 +2797,75 @@ void ieee80211_recalc_dtim(struct ieee80211_local *local,
 
 	ps->dtim_count = dtim_count;
 }
+
+int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
+				 const struct cfg80211_chan_def *chandef,
+				 enum ieee80211_chanctx_mode chanmode,
+				 u8 radar_detect)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_sub_if_data *sdata_iter;
+	enum nl80211_iftype iftype = sdata->wdev.iftype;
+	int num[NUM_NL80211_IFTYPES];
+	struct ieee80211_chanctx *ctx;
+	int num_different_channels = 1;
+	int total = 1;
+
+	lockdep_assert_held(&local->chanctx_mtx);
+
+	if (WARN_ON(hweight32(radar_detect) > 1))
+		return -EINVAL;
+
+	if (WARN_ON(chanmode == IEEE80211_CHANCTX_SHARED && !chandef->chan))
+		return -EINVAL;
+
+	if (WARN_ON(iftype >= NUM_NL80211_IFTYPES))
+		return -EINVAL;
+
+	/* Always allow software iftypes */
+	if (local->hw.wiphy->software_iftypes & BIT(iftype)) {
+		if (radar_detect)
+			return -EINVAL;
+		return 0;
+	}
+
+	memset(num, 0, sizeof(num));
+
+	if (iftype != NL80211_IFTYPE_UNSPECIFIED)
+		num[iftype] = 1;
+
+	list_for_each_entry(ctx, &local->chanctx_list, list) {
+		if (ctx->conf.radar_enabled)
+			radar_detect |= BIT(ctx->conf.def.width);
+		if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) {
+			num_different_channels++;
+			continue;
+		}
+		if ((chanmode == IEEE80211_CHANCTX_SHARED) &&
+		    cfg80211_chandef_compatible(chandef,
+						&ctx->conf.def))
+			continue;
+		num_different_channels++;
+	}
+
+	list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) {
+		struct wireless_dev *wdev_iter;
+
+		wdev_iter = &sdata_iter->wdev;
+
+		if (sdata_iter == sdata ||
+		    rcu_access_pointer(sdata_iter->vif.chanctx_conf) == NULL ||
+		    local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype))
+			continue;
+
+		num[wdev_iter->iftype]++;
+		total++;
+	}
+
+	if (total == 1 && !radar_detect)
+		return 0;
+
+	return cfg80211_check_combinations(local->hw.wiphy,
+					   num_different_channels,
+					   radar_detect, num);
+}
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 6f6f75609852..6684c5d965d8 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -417,6 +417,9 @@ cfg80211_can_change_interface(struct cfg80211_registered_device *rdev,
 			      struct wireless_dev *wdev,
 			      enum nl80211_iftype iftype)
 {
+	/* TODO: For this function, we'll probably need to keep some
+	 * kind of interface combination check in cfg80211...
+	 */
 	return cfg80211_can_use_iftype_chan(rdev, wdev, iftype, NULL,
 					    CHAN_MODE_UNDEFINED, 0);
 }
@@ -431,16 +434,6 @@ cfg80211_can_add_interface(struct cfg80211_registered_device *rdev,
 	return cfg80211_can_change_interface(rdev, NULL, iftype);
 }
 
-static inline int
-cfg80211_can_use_chan(struct cfg80211_registered_device *rdev,
-		      struct wireless_dev *wdev,
-		      struct ieee80211_channel *chan,
-		      enum cfg80211_chan_mode chanmode)
-{
-	return cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
-					    chan, chanmode, 0);
-}
-
 static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
 {
 	unsigned long end = jiffies;
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index a6b5bdad039c..faf4961d47d8 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -135,6 +135,10 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 		radar_detect_width = BIT(params->chandef.width);
 	}
 
+	/* TODO: We need to check the combinations at this point, we
+	 * probably must move this call down to join_ibss() in
+	 * mac80211.
+	 */
 	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
 					   check_chan,
 					   (params->channel_fixed &&
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 6ebe883653a4..3ddfb7cd335e 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -99,7 +99,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 			 const struct mesh_config *conf)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	u8 radar_detect_width = 0;
 	int err;
 
 	BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN);
@@ -179,22 +178,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 				     NL80211_IFTYPE_MESH_POINT))
 		return -EINVAL;
 
-	err = cfg80211_chandef_dfs_required(wdev->wiphy,
-					    &setup->chandef,
-					    NL80211_IFTYPE_MESH_POINT);
-	if (err < 0)
-		return err;
-
-	if (err > 0)
-		radar_detect_width = BIT(setup->chandef.width);
-
-	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
-					   setup->chandef.chan,
-					   CHAN_MODE_SHARED,
-					   radar_detect_width);
-	if (err)
-		return err;
-
 	err = rdev_join_mesh(rdev, dev, conf, setup);
 	if (!err) {
 		memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len);
@@ -240,17 +223,6 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
 		if (!netif_running(wdev->netdev))
 			return -ENETDOWN;
 
-		/* cfg80211_can_use_chan() calls
-		 * cfg80211_can_use_iftype_chan() with no radar
-		 * detection, so if we're trying to use a radar
-		 * channel here, something is wrong.
-		 */
-		WARN_ON_ONCE(chandef->chan->flags & IEEE80211_CHAN_RADAR);
-		err = cfg80211_can_use_chan(rdev, wdev, chandef->chan,
-					    CHAN_MODE_SHARED);
-		if (err)
-			return err;
-
 		err = rdev_libertas_set_mesh_channel(rdev, wdev->netdev,
 						     chandef->chan);
 		if (!err)
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index c52ff59a3e96..4b4ba707fab9 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -233,14 +233,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 	if (!req.bss)
 		return -ENOENT;
 
-	err = cfg80211_can_use_chan(rdev, wdev, req.bss->channel,
-				    CHAN_MODE_SHARED);
-	if (err)
-		goto out;
-
 	err = rdev_auth(rdev, dev, &req);
 
-out:
 	cfg80211_put_bss(&rdev->wiphy, req.bss);
 	return err;
 }
@@ -306,16 +300,10 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 	if (!req->bss)
 		return -ENOENT;
 
-	err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED);
-	if (err)
-		goto out;
-
 	err = rdev_assoc(rdev, dev, req);
 	if (!err)
 		cfg80211_hold_bss(bss_from_pub(req->bss));
-
-out:
-	if (err)
+	else
 		cfg80211_put_bss(&rdev->wiphy, req->bss);
 
 	return err;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4f82b9b71db1..2b99aad33ae0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3155,7 +3155,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_ap_settings params;
 	int err;
-	u8 radar_detect_width = 0;
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
 	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
@@ -3275,24 +3274,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 				     wdev->iftype))
 		return -EINVAL;
 
-	err = cfg80211_chandef_dfs_required(wdev->wiphy,
-					    &params.chandef,
-					    NL80211_IFTYPE_AP);
-	if (err < 0)
-		return err;
-
-	if (err > 0) {
-		params.radar_required = true;
-		radar_detect_width = BIT(params.chandef.width);
-	}
-
-	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
-					   params.chandef.chan,
-					   CHAN_MODE_SHARED,
-					   radar_detect_width);
-	if (err)
-		return err;
-
 	if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
 		params.acl = parse_acl_data(&rdev->wiphy, info);
 		if (IS_ERR(params.acl))
@@ -5823,12 +5804,6 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
 	if (!rdev->ops->start_radar_detection)
 		return -EOPNOTSUPP;
 
-	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
-					   chandef.chan, CHAN_MODE_SHARED,
-					   BIT(chandef.width));
-	if (err)
-		return err;
-
 	cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, &chandef);
 	if (WARN_ON(!cac_time_ms))
 		cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
@@ -5957,6 +5932,10 @@ skip_beacons:
 		params.radar_required = true;
 	}
 
+	/* TODO: I left this here for now.  With channel switch, the
+	 * verification is a bit more complicated, because we only do
+	 * it later when the channel switch really happens.
+	 */
 	err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
 					   params.chandef.chan,
 					   CHAN_MODE_SHARED,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 46f404df35e3..9bfc4c621509 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1375,6 +1375,11 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 
 	num[iftype] = 1;
 
+	/* TODO: We'll probably not need this anymore, since this
+	 * should only be called with CHAN_MODE_UNDEFINED. There are
+	 * still a couple of pending calls where other chanmodes are
+	 * used, but we should get rid of them.
+	 */
 	switch (chanmode) {
 	case CHAN_MODE_UNDEFINED:
 		break;
-- 
cgit v1.2.3-71-gd317


From 5d52ee81101943c507f45c76368026935f6bb75a Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Thu, 27 Feb 2014 14:33:47 +0200
Subject: mac80211: allow reservation of a running chanctx

With single-channel drivers, we need to be able to change a running
chanctx if we want to use chanctx reservation.  Not all drivers may be
able to do this, so add a flag that indicates support for it.

Changing a running chanctx can also be used as an optimization in
multi-channel drivers when the context needs to be reserved for future
usage.

Introduce IEEE80211_CHANCTX_RESERVED chanctx mode to mark a channel as
reserved so nobody else can use it (since we know it's going to
change).  In the future, we may allow several vifs to use the same
reservation as long as they plan to use the chanctx on the same
future channel.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h |  7 +++++
 net/mac80211/chan.c    | 79 ++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 68 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index faa7b9cf9cc7..03ab3c08fb70 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1555,6 +1555,12 @@ struct ieee80211_tx_control {
  *	for a single active channel while using channel contexts. When support
  *	is not enabled the default action is to disconnect when getting the
  *	CSA frame.
+ *
+ * @IEEE80211_HW_CHANGE_RUNNING_CHANCTX: The hardware can change a
+ *	channel context on-the-fly.  This is needed for channel switch
+ *	on single-channel hardware.  It can also be used as an
+ *	optimization in certain channel switch cases with
+ *	multi-channel.
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_HAS_RATE_CONTROL			= 1<<0,
@@ -1586,6 +1592,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_TIMING_BEACON_ONLY			= 1<<26,
 	IEEE80211_HW_SUPPORTS_HT_CCK_RATES		= 1<<27,
 	IEEE80211_HW_CHANCTX_STA_CSA			= 1<<28,
+	IEEE80211_HW_CHANGE_RUNNING_CHANCTX		= 1<<29,
 };
 
 /**
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index baad75610a6a..57b8ab1bc5c1 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -168,6 +168,27 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,
 	}
 }
 
+static bool ieee80211_chanctx_is_reserved(struct ieee80211_local *local,
+					  struct ieee80211_chanctx *ctx)
+{
+	struct ieee80211_sub_if_data *sdata;
+	bool ret = false;
+
+	lockdep_assert_held(&local->chanctx_mtx);
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+		if (!ieee80211_sdata_running(sdata))
+			continue;
+		if (sdata->reserved_chanctx == ctx) {
+			ret = true;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+	return ret;
+}
+
 static struct ieee80211_chanctx *
 ieee80211_find_chanctx(struct ieee80211_local *local,
 		       const struct cfg80211_chan_def *chandef,
@@ -183,7 +204,12 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
 	list_for_each_entry(ctx, &local->chanctx_list, list) {
 		const struct cfg80211_chan_def *compat;
 
-		if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE)
+		/* We don't support chanctx reservation for multiple
+		 * vifs yet, so don't allow reserved chanctxs to be
+		 * reused.
+		 */
+		if ((ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) ||
+		    ieee80211_chanctx_is_reserved(local, ctx))
 			continue;
 
 		compat = cfg80211_chandef_compatible(&ctx->conf.def, chandef);
@@ -718,11 +744,20 @@ int ieee80211_vif_reserve_chanctx(struct ieee80211_sub_if_data *sdata,
 	/* try to find another context with the chandef we want */
 	new_ctx = ieee80211_find_chanctx(local, chandef, mode);
 	if (!new_ctx) {
-		/* create a new context */
-		new_ctx = ieee80211_new_chanctx(local, chandef, mode);
-		if (IS_ERR(new_ctx)) {
-			ret = PTR_ERR(new_ctx);
-			goto out;
+		if (curr_ctx->refcount == 1 &&
+		    (local->hw.flags & IEEE80211_HW_CHANGE_RUNNING_CHANCTX)) {
+			/* if we're the only users of the chanctx and
+			 * the driver supports changing a running
+			 * context, reserve our current context
+			 */
+			new_ctx = curr_ctx;
+		} else {
+			/* create a new context and reserve it */
+			new_ctx = ieee80211_new_chanctx(local, chandef, mode);
+			if (IS_ERR(new_ctx)) {
+				ret = PTR_ERR(new_ctx);
+				goto out;
+			}
 		}
 	}
 
@@ -770,22 +805,30 @@ int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata,
 
 	sdata->vif.bss_conf.chandef = sdata->reserved_chandef;
 
-	/* unref our reservation before assigning */
+	/* unref our reservation */
 	ctx->refcount--;
 	sdata->reserved_chanctx = NULL;
 
-	ret = ieee80211_assign_vif_chanctx(sdata, ctx);
-	if (old_ctx->refcount == 0)
-		ieee80211_free_chanctx(local, old_ctx);
-	if (ret) {
-		/* if assign fails refcount stays the same */
-		if (ctx->refcount == 0)
-			ieee80211_free_chanctx(local, ctx);
-		goto out;
-	}
+	if (old_ctx == ctx) {
+		/* This is our own context, just change it */
+		ret = __ieee80211_vif_change_channel(sdata, old_ctx,
+						     &tmp_changed);
+		if (ret)
+			goto out;
+	} else {
+		ret = ieee80211_assign_vif_chanctx(sdata, ctx);
+		if (old_ctx->refcount == 0)
+			ieee80211_free_chanctx(local, old_ctx);
+		if (ret) {
+			/* if assign fails refcount stays the same */
+			if (ctx->refcount == 0)
+				ieee80211_free_chanctx(local, ctx);
+			goto out;
+		}
 
-	if (sdata->vif.type == NL80211_IFTYPE_AP)
-		__ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
+		if (sdata->vif.type == NL80211_IFTYPE_AP)
+			__ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
+	}
 
 	*changed = tmp_changed;
 
-- 
cgit v1.2.3-71-gd317


From ce26151bc35d9d893ec1b441a261ea145511c89f Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@qca.qualcomm.com>
Date: Thu, 3 Apr 2014 10:03:45 +0300
Subject: cfg80211: update comment about WIPHY_FLAG_CUSTOM_REGULATORY

Commit a2f73b6c5db3c ("cfg80211: move regulatory flags to their own variable")
renamed WIPHY_FLAG_CUSTOM_REGULATORY to REGULATORY_CUSTOM_REG, but missed to
update one comment.

Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index fb8afcee62b4..9496fe5ea6b4 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3609,7 +3609,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2);
  * default channel settings will be disregarded. If no rule is found for a
  * channel on the regulatory domain the channel will be disabled.
  * Drivers using this for a wiphy should also set the wiphy flag
- * WIPHY_FLAG_CUSTOM_REGULATORY or cfg80211 will set it for the wiphy
+ * REGULATORY_CUSTOM_REG or cfg80211 will set it for the wiphy
  * that called this helper.
  */
 void wiphy_apply_custom_regulatory(struct wiphy *wiphy,
-- 
cgit v1.2.3-71-gd317


From 041f607de17666bed0407370e3f4a56e697354a8 Mon Sep 17 00:00:00 2001
From: Rostislav Lisovy <lisovy@gmail.com>
Date: Wed, 2 Apr 2014 15:31:55 +0200
Subject: mac80211: Update conf_is_ht() to work properly with 5/10MHz channels

The channels with 5/10MHz bandwidth are not HT. We have to
reflect this in conf_is_ht() function which returns whether the
particular channel is HT or not.

Signed-off-by: Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 03ab3c08fb70..a3044e124229 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4585,7 +4585,9 @@ conf_is_ht40(struct ieee80211_conf *conf)
 static inline bool
 conf_is_ht(struct ieee80211_conf *conf)
 {
-	return conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT;
+	return (conf->chandef.width != NL80211_CHAN_WIDTH_5) &&
+		(conf->chandef.width != NL80211_CHAN_WIDTH_10) &&
+		(conf->chandef.width != NL80211_CHAN_WIDTH_20_NOHT);
 }
 
 static inline enum nl80211_iftype
-- 
cgit v1.2.3-71-gd317


From 848ef58695d8c013f24633352586279cfb40e9d9 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Tue, 1 Apr 2014 17:02:53 +0300
Subject: net: rfkill: gpio: remove unused and obsolete platform parameters

After upgrading to descriptor based gpios, the gpio numbers
are not used anymore. The power_clk_name and the platform
specific setup and close hooks are not used by anybody, and
we should not encourage use of such things, so removing them.

Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/rfkill-gpio.h | 10 ----------
 net/rfkill/rfkill-gpio.c    | 15 +--------------
 2 files changed, 1 insertion(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/rfkill-gpio.h b/include/linux/rfkill-gpio.h
index 4d09f6eab359..20bcb55498cd 100644
--- a/include/linux/rfkill-gpio.h
+++ b/include/linux/rfkill-gpio.h
@@ -27,21 +27,11 @@
  * struct rfkill_gpio_platform_data - platform data for rfkill gpio device.
  * for unused gpio's, the expected value is -1.
  * @name:		name for the gpio rf kill instance
- * @reset_gpio:		GPIO which is used for reseting rfkill switch
- * @shutdown_gpio:	GPIO which is used for shutdown of rfkill switch
- * @power_clk_name:	[optional] name of clk to turn off while blocked
- * @gpio_runtime_close:	clean up platform specific gpio configuration
- * @gpio_runtime_setup:	set up platform specific gpio configuration
  */
 
 struct rfkill_gpio_platform_data {
 	char			*name;
-	int			reset_gpio;
-	int			shutdown_gpio;
-	const char		*power_clk_name;
 	enum rfkill_type	type;
-	void	(*gpio_runtime_close)(struct platform_device *);
-	int	(*gpio_runtime_setup)(struct platform_device *);
 };
 
 #endif /* __RFKILL_GPIO_H */
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index bd2a5b90400c..0adda445dfe7 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -87,7 +87,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 {
 	struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
 	struct rfkill_gpio_data *rfkill;
-	const char *clk_name = NULL;
 	struct gpio_desc *gpio;
 	int ret;
 	int len;
@@ -101,7 +100,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 		if (ret)
 			return ret;
 	} else if (pdata) {
-		clk_name = pdata->power_clk_name;
 		rfkill->name = pdata->name;
 		rfkill->type = pdata->type;
 	} else {
@@ -120,7 +118,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 	snprintf(rfkill->reset_name, len + 6 , "%s_reset", rfkill->name);
 	snprintf(rfkill->shutdown_name, len + 9, "%s_shutdown", rfkill->name);
 
-	rfkill->clk = devm_clk_get(&pdev->dev, clk_name);
+	rfkill->clk = devm_clk_get(&pdev->dev, NULL);
 
 	gpio = devm_gpiod_get_index(&pdev->dev, rfkill->reset_name, 0);
 	if (!IS_ERR(gpio)) {
@@ -146,14 +144,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	if (pdata && pdata->gpio_runtime_setup) {
-		ret = pdata->gpio_runtime_setup(pdev);
-		if (ret) {
-			dev_err(&pdev->dev, "can't set up gpio\n");
-			return ret;
-		}
-	}
-
 	rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev,
 					  rfkill->type, &rfkill_gpio_ops,
 					  rfkill);
@@ -174,10 +164,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 static int rfkill_gpio_remove(struct platform_device *pdev)
 {
 	struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev);
-	struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
 
-	if (pdata && pdata->gpio_runtime_close)
-		pdata->gpio_runtime_close(pdev);
 	rfkill_unregister(rfkill->rfkill_dev);
 	rfkill_destroy(rfkill->rfkill_dev);
 
-- 
cgit v1.2.3-71-gd317


From f39d2fa0122e6abd8505a3598f3aa535d0d5aade Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Mon, 24 Feb 2014 18:37:35 +0800
Subject: mtd: spi-nor: copy the SPI NOR commands to a new header file

This patch adds a new header :spi-nor.h,
and copies all the SPI NOR commands and relative macros into this new header.

This hearder can be used by the m25p80.c and other spi-nor controller,
such as Freescale's Quadspi.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Acked-by: Marek Vasut <marex@denx.de>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/spi-nor.h | 55 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 include/linux/mtd/spi-nor.h

(limited to 'include')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
new file mode 100644
index 000000000000..483fc2a086c4
--- /dev/null
+++ b/include/linux/mtd/spi-nor.h
@@ -0,0 +1,55 @@
+#ifndef __LINUX_MTD_SPI_NOR_H
+#define __LINUX_MTD_SPI_NOR_H
+
+/* Flash opcodes. */
+#define	OPCODE_WREN		0x06	/* Write enable */
+#define	OPCODE_RDSR		0x05	/* Read status register */
+#define	OPCODE_WRSR		0x01	/* Write status register 1 byte */
+#define	OPCODE_NORM_READ	0x03	/* Read data bytes (low frequency) */
+#define	OPCODE_FAST_READ	0x0b	/* Read data bytes (high frequency) */
+#define	OPCODE_DUAL_READ        0x3b    /* Read data bytes (Dual SPI) */
+#define	OPCODE_QUAD_READ        0x6b    /* Read data bytes (Quad SPI) */
+#define	OPCODE_PP		0x02	/* Page program (up to 256 bytes) */
+#define	OPCODE_BE_4K		0x20	/* Erase 4KiB block */
+#define	OPCODE_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
+#define	OPCODE_BE_32K		0x52	/* Erase 32KiB block */
+#define	OPCODE_CHIP_ERASE	0xc7	/* Erase whole flash chip */
+#define	OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
+#define	OPCODE_RDID		0x9f	/* Read JEDEC ID */
+#define	OPCODE_RDCR             0x35    /* Read configuration register */
+
+/* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
+#define	OPCODE_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
+#define	OPCODE_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
+#define	OPCODE_DUAL_READ_4B	0x3c    /* Read data bytes (Dual SPI) */
+#define	OPCODE_QUAD_READ_4B	0x6c    /* Read data bytes (Quad SPI) */
+#define	OPCODE_PP_4B		0x12	/* Page program (up to 256 bytes) */
+#define	OPCODE_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
+
+/* Used for SST flashes only. */
+#define	OPCODE_BP		0x02	/* Byte program */
+#define	OPCODE_WRDI		0x04	/* Write disable */
+#define	OPCODE_AAI_WP		0xad	/* Auto address increment word program */
+
+/* Used for Macronix and Winbond flashes. */
+#define	OPCODE_EN4B		0xb7	/* Enter 4-byte mode */
+#define	OPCODE_EX4B		0xe9	/* Exit 4-byte mode */
+
+/* Used for Spansion flashes only. */
+#define	OPCODE_BRWR		0x17	/* Bank register write */
+
+/* Status Register bits. */
+#define	SR_WIP			1	/* Write in progress */
+#define	SR_WEL			2	/* Write enable latch */
+/* meaning of other SR_* bits may differ between vendors */
+#define	SR_BP0			4	/* Block protect 0 */
+#define	SR_BP1			8	/* Block protect 1 */
+#define	SR_BP2			0x10	/* Block protect 2 */
+#define	SR_SRWD			0x80	/* SR write protect */
+
+#define SR_QUAD_EN_MX           0x40    /* Macronix Quad I/O */
+
+/* Configuration Register bits. */
+#define CR_QUAD_EN_SPAN		0x2     /* Spansion Quad I/O */
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 6e602ef73334550bbbb8be1041a3ce6eecbd42f1 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Mon, 24 Feb 2014 18:37:36 +0800
Subject: mtd: spi-nor: add the basic data structures

The spi_nor{} is cloned from the m25p{}.
The spi_nor{} can be used by both the m25p80 and spi-nor controller.

We also add the spi_nor_xfer_cfg{} which can be used by the two
fundamental primitives: read_xfer/write_xfer.

 1) the hooks for spi_nor{}:
    @prepare/unpreare: used to do some work before or after the
             read/write/erase/lock/unlock.
    @read_xfer/write_xfer: We can use these two hooks to code all
             the following hooks if the driver tries to implement them
             by itself.
    @read_reg: used to read the registers, such as read status register,
             read configure register.
    @write_reg: used to write the registers, such as write enable,
             erase sector.
    @read_id: read out the ID info.
    @wait_till_ready: wait till the NOR becomes ready.
    @read: read out the data from the NOR.
    @write: write data to the NOR.
    @erase: erase a sector of the NOR.

 2) Add a new field sst_write_second for the SST NOR write.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Acked-by: Marek Vasut <marex@denx.de>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/spi-nor.h | 110 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 483fc2a086c4..3a3c3872c8cd 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -52,4 +52,114 @@
 /* Configuration Register bits. */
 #define CR_QUAD_EN_SPAN		0x2     /* Spansion Quad I/O */
 
+enum read_mode {
+	SPI_NOR_NORMAL = 0,
+	SPI_NOR_FAST,
+	SPI_NOR_DUAL,
+	SPI_NOR_QUAD,
+};
+
+/**
+ * struct spi_nor_xfer_cfg - Structure for defining a Serial Flash transfer
+ * @wren:		command for "Write Enable", or 0x00 for not required
+ * @cmd:		command for operation
+ * @cmd_pins:		number of pins to send @cmd (1, 2, 4)
+ * @addr:		address for operation
+ * @addr_pins:		number of pins to send @addr (1, 2, 4)
+ * @addr_width:		number of address bytes
+ *			(3,4, or 0 for address not required)
+ * @mode:		mode data
+ * @mode_pins:		number of pins to send @mode (1, 2, 4)
+ * @mode_cycles:	number of mode cycles (0 for mode not required)
+ * @dummy_cycles:	number of dummy cycles (0 for dummy not required)
+ */
+struct spi_nor_xfer_cfg {
+	u8		wren;
+	u8		cmd;
+	u8		cmd_pins;
+	u32		addr;
+	u8		addr_pins;
+	u8		addr_width;
+	u8		mode;
+	u8		mode_pins;
+	u8		mode_cycles;
+	u8		dummy_cycles;
+};
+
+#define	SPI_NOR_MAX_CMD_SIZE	8
+enum spi_nor_ops {
+	SPI_NOR_OPS_READ = 0,
+	SPI_NOR_OPS_WRITE,
+	SPI_NOR_OPS_ERASE,
+	SPI_NOR_OPS_LOCK,
+	SPI_NOR_OPS_UNLOCK,
+};
+
+/**
+ * struct spi_nor - Structure for defining a the SPI NOR layer
+ * @mtd:		point to a mtd_info structure
+ * @lock:		the lock for the read/write/erase/lock/unlock operations
+ * @dev:		point to a spi device, or a spi nor controller device.
+ * @page_size:		the page size of the SPI NOR
+ * @addr_width:		number of address bytes
+ * @erase_opcode:	the opcode for erasing a sector
+ * @read_opcode:	the read opcode
+ * @read_dummy:		the dummy needed by the read operation
+ * @program_opcode:	the program opcode
+ * @flash_read:		the mode of the read
+ * @sst_write_second:	used by the SST write operation
+ * @cfg:		used by the read_xfer/write_xfer
+ * @cmd_buf:		used by the write_reg
+ * @prepare:		[OPTIONAL] do some preparations for the
+ *			read/write/erase/lock/unlock operations
+ * @unprepare:		[OPTIONAL] do some post work after the
+ *			read/write/erase/lock/unlock operations
+ * @read_xfer:		[OPTIONAL] the read fundamental primitive
+ * @write_xfer:		[OPTIONAL] the writefundamental primitive
+ * @read_reg:		[DRIVER-SPECIFIC] read out the register
+ * @write_reg:		[DRIVER-SPECIFIC] write data to the register
+ * @read_id:		[REPLACEABLE] read out the ID data, and find
+ *			the proper spi_device_id
+ * @wait_till_ready:	[REPLACEABLE] wait till the NOR becomes ready
+ * @read:		[DRIVER-SPECIFIC] read data from the SPI NOR
+ * @write:		[DRIVER-SPECIFIC] write data to the SPI NOR
+ * @erase:		[DRIVER-SPECIFIC] erase a sector of the SPI NOR
+ *			at the offset @offs
+ * @priv:		the private data
+ */
+struct spi_nor {
+	struct mtd_info		*mtd;
+	struct mutex		lock;
+	struct device		*dev;
+	u32			page_size;
+	u8			addr_width;
+	u8			erase_opcode;
+	u8			read_opcode;
+	u8			read_dummy;
+	u8			program_opcode;
+	enum read_mode		flash_read;
+	bool			sst_write_second;
+	struct spi_nor_xfer_cfg	cfg;
+	u8			cmd_buf[SPI_NOR_MAX_CMD_SIZE];
+
+	int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops);
+	void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops);
+	int (*read_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg,
+			 u8 *buf, size_t len);
+	int (*write_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg,
+			  u8 *buf, size_t len);
+	int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
+	int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
+			int write_enable);
+	const struct spi_device_id *(*read_id)(struct spi_nor *nor);
+	int (*wait_till_ready)(struct spi_nor *nor);
+
+	int (*read)(struct spi_nor *nor, loff_t from,
+			size_t len, size_t *retlen, u_char *read_buf);
+	void (*write)(struct spi_nor *nor, loff_t to,
+			size_t len, size_t *retlen, const u_char *write_buf);
+	int (*erase)(struct spi_nor *nor, loff_t offs);
+
+	void *priv;
+};
 #endif
-- 
cgit v1.2.3-71-gd317


From b199489d37b21c5e294f95bf265acc5dde3fc3a2 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Mon, 24 Feb 2014 18:37:37 +0800
Subject: mtd: spi-nor: add the framework for SPI NOR

This patch cloned most of the m25p80.c. In theory, it adds a new spi-nor layer.

Before this patch, the layer is like:

                   MTD
         ------------------------
                  m25p80
         ------------------------
	       spi bus driver
         ------------------------
	        SPI NOR chip

After this patch, the layer is like:
                   MTD
         ------------------------
                  spi-nor
         ------------------------
                  m25p80
         ------------------------
	       spi bus driver
         ------------------------
	       SPI NOR chip

With the spi-nor controller driver(Freescale Quadspi), it looks like:
                   MTD
         ------------------------
                  spi-nor
         ------------------------
                fsl-quadspi
         ------------------------
	       SPI NOR chip

New APIs:
   spi_nor_scan: used to scan a spi-nor flash.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Acked-by: Marek Vasut <marex@denx.de>
[Brian: rebased to include additional m25p_ids[] entry]
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/Kconfig           |    2 +
 drivers/mtd/Makefile          |    1 +
 drivers/mtd/spi-nor/Kconfig   |    6 +
 drivers/mtd/spi-nor/Makefile  |    1 +
 drivers/mtd/spi-nor/spi-nor.c | 1088 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/spi-nor.h   |   20 +
 6 files changed, 1118 insertions(+)
 create mode 100644 drivers/mtd/spi-nor/Kconfig
 create mode 100644 drivers/mtd/spi-nor/Makefile
 create mode 100644 drivers/mtd/spi-nor/spi-nor.c

(limited to 'include')

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 5d49a2129618..94b821042d9d 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -321,6 +321,8 @@ source "drivers/mtd/onenand/Kconfig"
 
 source "drivers/mtd/lpddr/Kconfig"
 
+source "drivers/mtd/spi-nor/Kconfig"
+
 source "drivers/mtd/ubi/Kconfig"
 
 endif # MTD
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index 4cfb31e6c966..40fd15344387 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -32,4 +32,5 @@ inftl-objs		:= inftlcore.o inftlmount.o
 
 obj-y		+= chips/ lpddr/ maps/ devices/ nand/ onenand/ tests/
 
+obj-$(CONFIG_MTD_SPI_NOR_BASE)	+= spi-nor/
 obj-$(CONFIG_MTD_UBI)		+= ubi/
diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig
new file mode 100644
index 000000000000..41591af67aca
--- /dev/null
+++ b/drivers/mtd/spi-nor/Kconfig
@@ -0,0 +1,6 @@
+config MTD_SPI_NOR_BASE
+	bool "the framework for SPI-NOR support"
+	depends on MTD
+	help
+	  This is the framework for the SPI NOR which can be used by the SPI
+	  device drivers and the SPI-NOR device driver.
diff --git a/drivers/mtd/spi-nor/Makefile b/drivers/mtd/spi-nor/Makefile
new file mode 100644
index 000000000000..7dfe1f9b6940
--- /dev/null
+++ b/drivers/mtd/spi-nor/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MTD_SPI_NOR_BASE)	+= spi-nor.o
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
new file mode 100644
index 000000000000..50b929095bdb
--- /dev/null
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -0,0 +1,1088 @@
+/*
+ * Cloned most of the code from the m25p80.c
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/math64.h>
+
+#include <linux/mtd/cfi.h>
+#include <linux/mtd/mtd.h>
+#include <linux/of_platform.h>
+#include <linux/spi/flash.h>
+#include <linux/mtd/spi-nor.h>
+
+/* Define max times to check status register before we give up. */
+#define	MAX_READY_WAIT_JIFFIES	(40 * HZ) /* M25P16 specs 40s max chip erase */
+
+#define JEDEC_MFR(_jedec_id)	((_jedec_id) >> 16)
+
+/*
+ * Read the status register, returning its value in the location
+ * Return the status register value.
+ * Returns negative if error occurred.
+ */
+static int read_sr(struct spi_nor *nor)
+{
+	int ret;
+	u8 val;
+
+	ret = nor->read_reg(nor, OPCODE_RDSR, &val, 1);
+	if (ret < 0) {
+		pr_err("error %d reading SR\n", (int) ret);
+		return ret;
+	}
+
+	return val;
+}
+
+/*
+ * Read configuration register, returning its value in the
+ * location. Return the configuration register value.
+ * Returns negative if error occured.
+ */
+static int read_cr(struct spi_nor *nor)
+{
+	int ret;
+	u8 val;
+
+	ret = nor->read_reg(nor, OPCODE_RDCR, &val, 1);
+	if (ret < 0) {
+		dev_err(nor->dev, "error %d reading CR\n", ret);
+		return ret;
+	}
+
+	return val;
+}
+
+/*
+ * Dummy Cycle calculation for different type of read.
+ * It can be used to support more commands with
+ * different dummy cycle requirements.
+ */
+static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor)
+{
+	switch (nor->flash_read) {
+	case SPI_NOR_FAST:
+	case SPI_NOR_DUAL:
+	case SPI_NOR_QUAD:
+		return 1;
+	case SPI_NOR_NORMAL:
+		return 0;
+	}
+	return 0;
+}
+
+/*
+ * Write status register 1 byte
+ * Returns negative if error occurred.
+ */
+static inline int write_sr(struct spi_nor *nor, u8 val)
+{
+	nor->cmd_buf[0] = val;
+	return nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 1, 0);
+}
+
+/*
+ * Set write enable latch with Write Enable command.
+ * Returns negative if error occurred.
+ */
+static inline int write_enable(struct spi_nor *nor)
+{
+	return nor->write_reg(nor, OPCODE_WREN, NULL, 0, 0);
+}
+
+/*
+ * Send write disble instruction to the chip.
+ */
+static inline int write_disable(struct spi_nor *nor)
+{
+	return nor->write_reg(nor, OPCODE_WRDI, NULL, 0, 0);
+}
+
+static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd)
+{
+	return mtd->priv;
+}
+
+/* Enable/disable 4-byte addressing mode. */
+static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable)
+{
+	int status;
+	bool need_wren = false;
+	u8 cmd;
+
+	switch (JEDEC_MFR(jedec_id)) {
+	case CFI_MFR_ST: /* Micron, actually */
+		/* Some Micron need WREN command; all will accept it */
+		need_wren = true;
+	case CFI_MFR_MACRONIX:
+	case 0xEF /* winbond */:
+		if (need_wren)
+			write_enable(nor);
+
+		cmd = enable ? OPCODE_EN4B : OPCODE_EX4B;
+		status = nor->write_reg(nor, cmd, NULL, 0, 0);
+		if (need_wren)
+			write_disable(nor);
+
+		return status;
+	default:
+		/* Spansion style */
+		nor->cmd_buf[0] = enable << 7;
+		return nor->write_reg(nor, OPCODE_BRWR, nor->cmd_buf, 1, 0);
+	}
+}
+
+static int spi_nor_wait_till_ready(struct spi_nor *nor)
+{
+	unsigned long deadline;
+	int sr;
+
+	deadline = jiffies + MAX_READY_WAIT_JIFFIES;
+
+	do {
+		cond_resched();
+
+		sr = read_sr(nor);
+		if (sr < 0)
+			break;
+		else if (!(sr & SR_WIP))
+			return 0;
+	} while (!time_after_eq(jiffies, deadline));
+
+	return -ETIMEDOUT;
+}
+
+/*
+ * Service routine to read status register until ready, or timeout occurs.
+ * Returns non-zero if error.
+ */
+static int wait_till_ready(struct spi_nor *nor)
+{
+	return nor->wait_till_ready(nor);
+}
+
+/*
+ * Erase the whole flash memory
+ *
+ * Returns 0 if successful, non-zero otherwise.
+ */
+static int erase_chip(struct spi_nor *nor)
+{
+	int ret;
+
+	dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd->size >> 10));
+
+	/* Wait until finished previous write command. */
+	ret = wait_till_ready(nor);
+	if (ret)
+		return ret;
+
+	/* Send write enable, then erase commands. */
+	write_enable(nor);
+
+	return nor->write_reg(nor, OPCODE_CHIP_ERASE, NULL, 0, 0);
+}
+
+static int spi_nor_lock_and_prep(struct spi_nor *nor, enum spi_nor_ops ops)
+{
+	int ret = 0;
+
+	mutex_lock(&nor->lock);
+
+	if (nor->prepare) {
+		ret = nor->prepare(nor, ops);
+		if (ret) {
+			dev_err(nor->dev, "failed in the preparation.\n");
+			mutex_unlock(&nor->lock);
+			return ret;
+		}
+	}
+	return ret;
+}
+
+static void spi_nor_unlock_and_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
+{
+	if (nor->unprepare)
+		nor->unprepare(nor, ops);
+	mutex_unlock(&nor->lock);
+}
+
+/*
+ * Erase an address range on the nor chip.  The address range may extend
+ * one or more erase sectors.  Return an error is there is a problem erasing.
+ */
+static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	u32 addr, len;
+	uint32_t rem;
+	int ret;
+
+	dev_dbg(nor->dev, "at 0x%llx, len %lld\n", (long long)instr->addr,
+			(long long)instr->len);
+
+	div_u64_rem(instr->len, mtd->erasesize, &rem);
+	if (rem)
+		return -EINVAL;
+
+	addr = instr->addr;
+	len = instr->len;
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_ERASE);
+	if (ret)
+		return ret;
+
+	/* whole-chip erase? */
+	if (len == mtd->size) {
+		if (erase_chip(nor)) {
+			ret = -EIO;
+			goto erase_err;
+		}
+
+	/* REVISIT in some cases we could speed up erasing large regions
+	 * by using OPCODE_SE instead of OPCODE_BE_4K.  We may have set up
+	 * to use "small sector erase", but that's not always optimal.
+	 */
+
+	/* "sector"-at-a-time erase */
+	} else {
+		while (len) {
+			if (nor->erase(nor, addr)) {
+				ret = -EIO;
+				goto erase_err;
+			}
+
+			addr += mtd->erasesize;
+			len -= mtd->erasesize;
+		}
+	}
+
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_ERASE);
+
+	instr->state = MTD_ERASE_DONE;
+	mtd_erase_callback(instr);
+
+	return ret;
+
+erase_err:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_ERASE);
+	instr->state = MTD_ERASE_FAILED;
+	return ret;
+}
+
+static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	uint32_t offset = ofs;
+	uint8_t status_old, status_new;
+	int ret = 0;
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_LOCK);
+	if (ret)
+		return ret;
+
+	/* Wait until finished previous command */
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto err;
+
+	status_old = read_sr(nor);
+
+	if (offset < mtd->size - (mtd->size / 2))
+		status_new = status_old | SR_BP2 | SR_BP1 | SR_BP0;
+	else if (offset < mtd->size - (mtd->size / 4))
+		status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1;
+	else if (offset < mtd->size - (mtd->size / 8))
+		status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0;
+	else if (offset < mtd->size - (mtd->size / 16))
+		status_new = (status_old & ~(SR_BP0 | SR_BP1)) | SR_BP2;
+	else if (offset < mtd->size - (mtd->size / 32))
+		status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0;
+	else if (offset < mtd->size - (mtd->size / 64))
+		status_new = (status_old & ~(SR_BP2 | SR_BP0)) | SR_BP1;
+	else
+		status_new = (status_old & ~(SR_BP2 | SR_BP1)) | SR_BP0;
+
+	/* Only modify protection if it will not unlock other areas */
+	if ((status_new & (SR_BP2 | SR_BP1 | SR_BP0)) >
+				(status_old & (SR_BP2 | SR_BP1 | SR_BP0))) {
+		write_enable(nor);
+		ret = write_sr(nor, status_new);
+		if (ret)
+			goto err;
+	}
+
+err:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_LOCK);
+	return ret;
+}
+
+static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	uint32_t offset = ofs;
+	uint8_t status_old, status_new;
+	int ret = 0;
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_UNLOCK);
+	if (ret)
+		return ret;
+
+	/* Wait until finished previous command */
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto err;
+
+	status_old = read_sr(nor);
+
+	if (offset+len > mtd->size - (mtd->size / 64))
+		status_new = status_old & ~(SR_BP2 | SR_BP1 | SR_BP0);
+	else if (offset+len > mtd->size - (mtd->size / 32))
+		status_new = (status_old & ~(SR_BP2 | SR_BP1)) | SR_BP0;
+	else if (offset+len > mtd->size - (mtd->size / 16))
+		status_new = (status_old & ~(SR_BP2 | SR_BP0)) | SR_BP1;
+	else if (offset+len > mtd->size - (mtd->size / 8))
+		status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0;
+	else if (offset+len > mtd->size - (mtd->size / 4))
+		status_new = (status_old & ~(SR_BP0 | SR_BP1)) | SR_BP2;
+	else if (offset+len > mtd->size - (mtd->size / 2))
+		status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0;
+	else
+		status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1;
+
+	/* Only modify protection if it will not lock other areas */
+	if ((status_new & (SR_BP2 | SR_BP1 | SR_BP0)) <
+				(status_old & (SR_BP2 | SR_BP1 | SR_BP0))) {
+		write_enable(nor);
+		ret = write_sr(nor, status_new);
+		if (ret)
+			goto err;
+	}
+
+err:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_UNLOCK);
+	return ret;
+}
+
+struct flash_info {
+	/* JEDEC id zero means "no ID" (most older chips); otherwise it has
+	 * a high byte of zero plus three data bytes: the manufacturer id,
+	 * then a two byte device id.
+	 */
+	u32		jedec_id;
+	u16             ext_id;
+
+	/* The size listed here is what works with OPCODE_SE, which isn't
+	 * necessarily called a "sector" by the vendor.
+	 */
+	unsigned	sector_size;
+	u16		n_sectors;
+
+	u16		page_size;
+	u16		addr_width;
+
+	u16		flags;
+#define	SECT_4K			0x01	/* OPCODE_BE_4K works uniformly */
+#define	SPI_NOR_NO_ERASE	0x02	/* No erase command needed */
+#define	SST_WRITE		0x04	/* use SST byte programming */
+#define	SPI_NOR_NO_FR		0x08	/* Can't do fastread */
+#define	SECT_4K_PMC		0x10	/* OPCODE_BE_4K_PMC works uniformly */
+#define	SPI_NOR_DUAL_READ	0x20    /* Flash supports Dual Read */
+#define	SPI_NOR_QUAD_READ	0x40    /* Flash supports Quad Read */
+};
+
+#define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)	\
+	((kernel_ulong_t)&(struct flash_info) {				\
+		.jedec_id = (_jedec_id),				\
+		.ext_id = (_ext_id),					\
+		.sector_size = (_sector_size),				\
+		.n_sectors = (_n_sectors),				\
+		.page_size = 256,					\
+		.flags = (_flags),					\
+	})
+
+#define CAT25_INFO(_sector_size, _n_sectors, _page_size, _addr_width, _flags)	\
+	((kernel_ulong_t)&(struct flash_info) {				\
+		.sector_size = (_sector_size),				\
+		.n_sectors = (_n_sectors),				\
+		.page_size = (_page_size),				\
+		.addr_width = (_addr_width),				\
+		.flags = (_flags),					\
+	})
+
+/* NOTE: double check command sets and memory organization when you add
+ * more nor chips.  This current list focusses on newer chips, which
+ * have been converging on command sets which including JEDEC ID.
+ */
+const struct spi_device_id spi_nor_ids[] = {
+	/* Atmel -- some are (confusingly) marketed as "DataFlash" */
+	{ "at25fs010",  INFO(0x1f6601, 0, 32 * 1024,   4, SECT_4K) },
+	{ "at25fs040",  INFO(0x1f6604, 0, 64 * 1024,   8, SECT_4K) },
+
+	{ "at25df041a", INFO(0x1f4401, 0, 64 * 1024,   8, SECT_4K) },
+	{ "at25df321a", INFO(0x1f4701, 0, 64 * 1024,  64, SECT_4K) },
+	{ "at25df641",  INFO(0x1f4800, 0, 64 * 1024, 128, SECT_4K) },
+
+	{ "at26f004",   INFO(0x1f0400, 0, 64 * 1024,  8, SECT_4K) },
+	{ "at26df081a", INFO(0x1f4501, 0, 64 * 1024, 16, SECT_4K) },
+	{ "at26df161a", INFO(0x1f4601, 0, 64 * 1024, 32, SECT_4K) },
+	{ "at26df321",  INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K) },
+
+	{ "at45db081d", INFO(0x1f2500, 0, 64 * 1024, 16, SECT_4K) },
+
+	/* EON -- en25xxx */
+	{ "en25f32",    INFO(0x1c3116, 0, 64 * 1024,   64, SECT_4K) },
+	{ "en25p32",    INFO(0x1c2016, 0, 64 * 1024,   64, 0) },
+	{ "en25q32b",   INFO(0x1c3016, 0, 64 * 1024,   64, 0) },
+	{ "en25p64",    INFO(0x1c2017, 0, 64 * 1024,  128, 0) },
+	{ "en25q64",    INFO(0x1c3017, 0, 64 * 1024,  128, SECT_4K) },
+	{ "en25qh256",  INFO(0x1c7019, 0, 64 * 1024,  512, 0) },
+
+	/* ESMT */
+	{ "f25l32pa", INFO(0x8c2016, 0, 64 * 1024, 64, SECT_4K) },
+
+	/* Everspin */
+	{ "mr25h256", CAT25_INFO( 32 * 1024, 1, 256, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "mr25h10",  CAT25_INFO(128 * 1024, 1, 256, 3, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+
+	/* GigaDevice */
+	{ "gd25q32", INFO(0xc84016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "gd25q64", INFO(0xc84017, 0, 64 * 1024, 128, SECT_4K) },
+
+	/* Intel/Numonyx -- xxxs33b */
+	{ "160s33b",  INFO(0x898911, 0, 64 * 1024,  32, 0) },
+	{ "320s33b",  INFO(0x898912, 0, 64 * 1024,  64, 0) },
+	{ "640s33b",  INFO(0x898913, 0, 64 * 1024, 128, 0) },
+
+	/* Macronix */
+	{ "mx25l2005a",  INFO(0xc22012, 0, 64 * 1024,   4, SECT_4K) },
+	{ "mx25l4005a",  INFO(0xc22013, 0, 64 * 1024,   8, SECT_4K) },
+	{ "mx25l8005",   INFO(0xc22014, 0, 64 * 1024,  16, 0) },
+	{ "mx25l1606e",  INFO(0xc22015, 0, 64 * 1024,  32, SECT_4K) },
+	{ "mx25l3205d",  INFO(0xc22016, 0, 64 * 1024,  64, 0) },
+	{ "mx25l3255e",  INFO(0xc29e16, 0, 64 * 1024,  64, SECT_4K) },
+	{ "mx25l6405d",  INFO(0xc22017, 0, 64 * 1024, 128, 0) },
+	{ "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) },
+	{ "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) },
+	{ "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, 0) },
+	{ "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) },
+	{ "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_QUAD_READ) },
+	{ "mx66l1g55g",  INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) },
+
+	/* Micron */
+	{ "n25q064",     INFO(0x20ba17, 0, 64 * 1024,  128, 0) },
+	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, 0) },
+	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, 0) },
+	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K) },
+	{ "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K) },
+
+	/* PMC */
+	{ "pm25lv512",   INFO(0,        0, 32 * 1024,    2, SECT_4K_PMC) },
+	{ "pm25lv010",   INFO(0,        0, 32 * 1024,    4, SECT_4K_PMC) },
+	{ "pm25lq032",   INFO(0x7f9d46, 0, 64 * 1024,   64, SECT_4K) },
+
+	/* Spansion -- single (large) sector size only, at least
+	 * for the chips listed here (without boot sectors).
+	 */
+	{ "s25sl032p",  INFO(0x010215, 0x4d00,  64 * 1024,  64, 0) },
+	{ "s25sl064p",  INFO(0x010216, 0x4d00,  64 * 1024, 128, 0) },
+	{ "s25fl256s0", INFO(0x010219, 0x4d00, 256 * 1024, 128, 0) },
+	{ "s25fl256s1", INFO(0x010219, 0x4d01,  64 * 1024, 512, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{ "s25fl512s",  INFO(0x010220, 0x4d00, 256 * 1024, 256, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{ "s70fl01gs",  INFO(0x010221, 0x4d00, 256 * 1024, 256, 0) },
+	{ "s25sl12800", INFO(0x012018, 0x0300, 256 * 1024,  64, 0) },
+	{ "s25sl12801", INFO(0x012018, 0x0301,  64 * 1024, 256, 0) },
+	{ "s25fl129p0", INFO(0x012018, 0x4d00, 256 * 1024,  64, 0) },
+	{ "s25fl129p1", INFO(0x012018, 0x4d01,  64 * 1024, 256, 0) },
+	{ "s25sl004a",  INFO(0x010212,      0,  64 * 1024,   8, 0) },
+	{ "s25sl008a",  INFO(0x010213,      0,  64 * 1024,  16, 0) },
+	{ "s25sl016a",  INFO(0x010214,      0,  64 * 1024,  32, 0) },
+	{ "s25sl032a",  INFO(0x010215,      0,  64 * 1024,  64, 0) },
+	{ "s25sl064a",  INFO(0x010216,      0,  64 * 1024, 128, 0) },
+	{ "s25fl008k",  INFO(0xef4014,      0,  64 * 1024,  16, SECT_4K) },
+	{ "s25fl016k",  INFO(0xef4015,      0,  64 * 1024,  32, SECT_4K) },
+	{ "s25fl064k",  INFO(0xef4017,      0,  64 * 1024, 128, SECT_4K) },
+
+	/* SST -- large erase sizes are "overlays", "sectors" are 4K */
+	{ "sst25vf040b", INFO(0xbf258d, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
+	{ "sst25vf080b", INFO(0xbf258e, 0, 64 * 1024, 16, SECT_4K | SST_WRITE) },
+	{ "sst25vf016b", INFO(0xbf2541, 0, 64 * 1024, 32, SECT_4K | SST_WRITE) },
+	{ "sst25vf032b", INFO(0xbf254a, 0, 64 * 1024, 64, SECT_4K | SST_WRITE) },
+	{ "sst25vf064c", INFO(0xbf254b, 0, 64 * 1024, 128, SECT_4K) },
+	{ "sst25wf512",  INFO(0xbf2501, 0, 64 * 1024,  1, SECT_4K | SST_WRITE) },
+	{ "sst25wf010",  INFO(0xbf2502, 0, 64 * 1024,  2, SECT_4K | SST_WRITE) },
+	{ "sst25wf020",  INFO(0xbf2503, 0, 64 * 1024,  4, SECT_4K | SST_WRITE) },
+	{ "sst25wf040",  INFO(0xbf2504, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
+
+	/* ST Microelectronics -- newer production may have feature updates */
+	{ "m25p05",  INFO(0x202010,  0,  32 * 1024,   2, 0) },
+	{ "m25p10",  INFO(0x202011,  0,  32 * 1024,   4, 0) },
+	{ "m25p20",  INFO(0x202012,  0,  64 * 1024,   4, 0) },
+	{ "m25p40",  INFO(0x202013,  0,  64 * 1024,   8, 0) },
+	{ "m25p80",  INFO(0x202014,  0,  64 * 1024,  16, 0) },
+	{ "m25p16",  INFO(0x202015,  0,  64 * 1024,  32, 0) },
+	{ "m25p32",  INFO(0x202016,  0,  64 * 1024,  64, 0) },
+	{ "m25p64",  INFO(0x202017,  0,  64 * 1024, 128, 0) },
+	{ "m25p128", INFO(0x202018,  0, 256 * 1024,  64, 0) },
+	{ "n25q032", INFO(0x20ba16,  0,  64 * 1024,  64, 0) },
+
+	{ "m25p05-nonjedec",  INFO(0, 0,  32 * 1024,   2, 0) },
+	{ "m25p10-nonjedec",  INFO(0, 0,  32 * 1024,   4, 0) },
+	{ "m25p20-nonjedec",  INFO(0, 0,  64 * 1024,   4, 0) },
+	{ "m25p40-nonjedec",  INFO(0, 0,  64 * 1024,   8, 0) },
+	{ "m25p80-nonjedec",  INFO(0, 0,  64 * 1024,  16, 0) },
+	{ "m25p16-nonjedec",  INFO(0, 0,  64 * 1024,  32, 0) },
+	{ "m25p32-nonjedec",  INFO(0, 0,  64 * 1024,  64, 0) },
+	{ "m25p64-nonjedec",  INFO(0, 0,  64 * 1024, 128, 0) },
+	{ "m25p128-nonjedec", INFO(0, 0, 256 * 1024,  64, 0) },
+
+	{ "m45pe10", INFO(0x204011,  0, 64 * 1024,    2, 0) },
+	{ "m45pe80", INFO(0x204014,  0, 64 * 1024,   16, 0) },
+	{ "m45pe16", INFO(0x204015,  0, 64 * 1024,   32, 0) },
+
+	{ "m25pe20", INFO(0x208012,  0, 64 * 1024,  4,       0) },
+	{ "m25pe80", INFO(0x208014,  0, 64 * 1024, 16,       0) },
+	{ "m25pe16", INFO(0x208015,  0, 64 * 1024, 32, SECT_4K) },
+
+	{ "m25px16",    INFO(0x207115,  0, 64 * 1024, 32, SECT_4K) },
+	{ "m25px32",    INFO(0x207116,  0, 64 * 1024, 64, SECT_4K) },
+	{ "m25px32-s0", INFO(0x207316,  0, 64 * 1024, 64, SECT_4K) },
+	{ "m25px32-s1", INFO(0x206316,  0, 64 * 1024, 64, SECT_4K) },
+	{ "m25px64",    INFO(0x207117,  0, 64 * 1024, 128, 0) },
+
+	/* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
+	{ "w25x10", INFO(0xef3011, 0, 64 * 1024,  2,  SECT_4K) },
+	{ "w25x20", INFO(0xef3012, 0, 64 * 1024,  4,  SECT_4K) },
+	{ "w25x40", INFO(0xef3013, 0, 64 * 1024,  8,  SECT_4K) },
+	{ "w25x80", INFO(0xef3014, 0, 64 * 1024,  16, SECT_4K) },
+	{ "w25x16", INFO(0xef3015, 0, 64 * 1024,  32, SECT_4K) },
+	{ "w25x32", INFO(0xef3016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "w25q32", INFO(0xef4016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "w25q32dw", INFO(0xef6016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) },
+	{ "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) },
+	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
+	{ "w25q80", INFO(0xef5014, 0, 64 * 1024,  16, SECT_4K) },
+	{ "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
+	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
+	{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K) },
+
+	/* Catalyst / On Semiconductor -- non-JEDEC */
+	{ "cat25c11", CAT25_INFO(  16, 8, 16, 1, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "cat25c03", CAT25_INFO(  32, 8, 16, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "cat25c09", CAT25_INFO( 128, 8, 32, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "cat25c17", CAT25_INFO( 256, 8, 32, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "cat25128", CAT25_INFO(2048, 8, 64, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ },
+};
+
+static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor)
+{
+	int			tmp;
+	u8			id[5];
+	u32			jedec;
+	u16                     ext_jedec;
+	struct flash_info	*info;
+
+	tmp = nor->read_reg(nor, OPCODE_RDID, id, 5);
+	if (tmp < 0) {
+		dev_dbg(nor->dev, " error %d reading JEDEC ID\n", tmp);
+		return ERR_PTR(tmp);
+	}
+	jedec = id[0];
+	jedec = jedec << 8;
+	jedec |= id[1];
+	jedec = jedec << 8;
+	jedec |= id[2];
+
+	ext_jedec = id[3] << 8 | id[4];
+
+	for (tmp = 0; tmp < ARRAY_SIZE(spi_nor_ids) - 1; tmp++) {
+		info = (void *)spi_nor_ids[tmp].driver_data;
+		if (info->jedec_id == jedec) {
+			if (info->ext_id == 0 || info->ext_id == ext_jedec)
+				return &spi_nor_ids[tmp];
+		}
+	}
+	dev_err(nor->dev, "unrecognized JEDEC id %06x\n", jedec);
+	return ERR_PTR(-ENODEV);
+}
+
+static const struct spi_device_id *jedec_probe(struct spi_nor *nor)
+{
+	return nor->read_id(nor);
+}
+
+static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len,
+			size_t *retlen, u_char *buf)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	int ret;
+
+	dev_dbg(nor->dev, "from 0x%08x, len %zd\n", (u32)from, len);
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_READ);
+	if (ret)
+		return ret;
+
+	ret = nor->read(nor, from, len, retlen, buf);
+
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_READ);
+	return ret;
+}
+
+static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
+		size_t *retlen, const u_char *buf)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	size_t actual;
+	int ret;
+
+	dev_dbg(nor->dev, "to 0x%08x, len %zd\n", (u32)to, len);
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_WRITE);
+	if (ret)
+		return ret;
+
+	/* Wait until finished previous write command. */
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto time_out;
+
+	write_enable(nor);
+
+	nor->sst_write_second = false;
+
+	actual = to % 2;
+	/* Start write from odd address. */
+	if (actual) {
+		nor->program_opcode = OPCODE_BP;
+
+		/* write one byte. */
+		nor->write(nor, to, 1, retlen, buf);
+		ret = wait_till_ready(nor);
+		if (ret)
+			goto time_out;
+	}
+	to += actual;
+
+	/* Write out most of the data here. */
+	for (; actual < len - 1; actual += 2) {
+		nor->program_opcode = OPCODE_AAI_WP;
+
+		/* write two bytes. */
+		nor->write(nor, to, 2, retlen, buf + actual);
+		ret = wait_till_ready(nor);
+		if (ret)
+			goto time_out;
+		to += 2;
+		nor->sst_write_second = true;
+	}
+	nor->sst_write_second = false;
+
+	write_disable(nor);
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto time_out;
+
+	/* Write out trailing byte if it exists. */
+	if (actual != len) {
+		write_enable(nor);
+
+		nor->program_opcode = OPCODE_BP;
+		nor->write(nor, to, 1, retlen, buf + actual);
+
+		ret = wait_till_ready(nor);
+		if (ret)
+			goto time_out;
+		write_disable(nor);
+	}
+time_out:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE);
+	return ret;
+}
+
+/*
+ * Write an address range to the nor chip.  Data must be written in
+ * FLASH_PAGESIZE chunks.  The address range may be any size provided
+ * it is within the physical boundaries.
+ */
+static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
+	size_t *retlen, const u_char *buf)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	u32 page_offset, page_size, i;
+	int ret;
+
+	dev_dbg(nor->dev, "to 0x%08x, len %zd\n", (u32)to, len);
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_WRITE);
+	if (ret)
+		return ret;
+
+	/* Wait until finished previous write command. */
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto write_err;
+
+	write_enable(nor);
+
+	page_offset = to & (nor->page_size - 1);
+
+	/* do all the bytes fit onto one page? */
+	if (page_offset + len <= nor->page_size) {
+		nor->write(nor, to, len, retlen, buf);
+	} else {
+		/* the size of data remaining on the first page */
+		page_size = nor->page_size - page_offset;
+		nor->write(nor, to, page_size, retlen, buf);
+
+		/* write everything in nor->page_size chunks */
+		for (i = page_size; i < len; i += page_size) {
+			page_size = len - i;
+			if (page_size > nor->page_size)
+				page_size = nor->page_size;
+
+			wait_till_ready(nor);
+			write_enable(nor);
+
+			nor->write(nor, to + i, page_size, retlen, buf + i);
+		}
+	}
+
+write_err:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE);
+	return 0;
+}
+
+static int macronix_quad_enable(struct spi_nor *nor)
+{
+	int ret, val;
+
+	val = read_sr(nor);
+	write_enable(nor);
+
+	nor->cmd_buf[0] = val | SR_QUAD_EN_MX;
+	nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 1, 0);
+
+	if (wait_till_ready(nor))
+		return 1;
+
+	ret = read_sr(nor);
+	if (!(ret > 0 && (ret & SR_QUAD_EN_MX))) {
+		dev_err(nor->dev, "Macronix Quad bit not set\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Write status Register and configuration register with 2 bytes
+ * The first byte will be written to the status register, while the
+ * second byte will be written to the configuration register.
+ * Return negative if error occured.
+ */
+static int write_sr_cr(struct spi_nor *nor, u16 val)
+{
+	nor->cmd_buf[0] = val & 0xff;
+	nor->cmd_buf[1] = (val >> 8);
+
+	return nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 2, 0);
+}
+
+static int spansion_quad_enable(struct spi_nor *nor)
+{
+	int ret;
+	int quad_en = CR_QUAD_EN_SPAN << 8;
+
+	write_enable(nor);
+
+	ret = write_sr_cr(nor, quad_en);
+	if (ret < 0) {
+		dev_err(nor->dev,
+			"error while writing configuration register\n");
+		return -EINVAL;
+	}
+
+	/* read back and check it */
+	ret = read_cr(nor);
+	if (!(ret > 0 && (ret & CR_QUAD_EN_SPAN))) {
+		dev_err(nor->dev, "Spansion Quad bit not set\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int set_quad_mode(struct spi_nor *nor, u32 jedec_id)
+{
+	int status;
+
+	switch (JEDEC_MFR(jedec_id)) {
+	case CFI_MFR_MACRONIX:
+		status = macronix_quad_enable(nor);
+		if (status) {
+			dev_err(nor->dev, "Macronix quad-read not enabled\n");
+			return -EINVAL;
+		}
+		return status;
+	default:
+		status = spansion_quad_enable(nor);
+		if (status) {
+			dev_err(nor->dev, "Spansion quad-read not enabled\n");
+			return -EINVAL;
+		}
+		return status;
+	}
+}
+
+static int spi_nor_check(struct spi_nor *nor)
+{
+	if (!nor->dev || !nor->read || !nor->write ||
+		!nor->read_reg || !nor->write_reg || !nor->erase) {
+		pr_err("spi-nor: please fill all the necessary fields!\n");
+		return -EINVAL;
+	}
+
+	if (!nor->read_id)
+		nor->read_id = spi_nor_read_id;
+	if (!nor->wait_till_ready)
+		nor->wait_till_ready = spi_nor_wait_till_ready;
+
+	return 0;
+}
+
+int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
+			enum read_mode mode)
+{
+	struct flash_info		*info;
+	struct flash_platform_data	*data;
+	struct device *dev = nor->dev;
+	struct mtd_info *mtd = nor->mtd;
+	struct device_node *np = dev->of_node;
+	int ret;
+	int i;
+
+	ret = spi_nor_check(nor);
+	if (ret)
+		return ret;
+
+	/* Platform data helps sort out which chip type we have, as
+	 * well as how this board partitions it.  If we don't have
+	 * a chip ID, try the JEDEC id commands; they'll work for most
+	 * newer chips, even if we don't recognize the particular chip.
+	 */
+	data = dev_get_platdata(dev);
+	if (data && data->type) {
+		const struct spi_device_id *plat_id;
+
+		for (i = 0; i < ARRAY_SIZE(spi_nor_ids) - 1; i++) {
+			plat_id = &spi_nor_ids[i];
+			if (strcmp(data->type, plat_id->name))
+				continue;
+			break;
+		}
+
+		if (i < ARRAY_SIZE(spi_nor_ids) - 1)
+			id = plat_id;
+		else
+			dev_warn(dev, "unrecognized id %s\n", data->type);
+	}
+
+	info = (void *)id->driver_data;
+
+	if (info->jedec_id) {
+		const struct spi_device_id *jid;
+
+		jid = jedec_probe(nor);
+		if (IS_ERR(jid)) {
+			return PTR_ERR(jid);
+		} else if (jid != id) {
+			/*
+			 * JEDEC knows better, so overwrite platform ID. We
+			 * can't trust partitions any longer, but we'll let
+			 * mtd apply them anyway, since some partitions may be
+			 * marked read-only, and we don't want to lose that
+			 * information, even if it's not 100% accurate.
+			 */
+			dev_warn(dev, "found %s, expected %s\n",
+				 jid->name, id->name);
+			id = jid;
+			info = (void *)jid->driver_data;
+		}
+	}
+
+	mutex_init(&nor->lock);
+
+	/*
+	 * Atmel, SST and Intel/Numonyx serial nor tend to power
+	 * up with the software protection bits set
+	 */
+
+	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ATMEL ||
+	    JEDEC_MFR(info->jedec_id) == CFI_MFR_INTEL ||
+	    JEDEC_MFR(info->jedec_id) == CFI_MFR_SST) {
+		write_enable(nor);
+		write_sr(nor, 0);
+	}
+
+	if (data && data->name)
+		mtd->name = data->name;
+	else
+		mtd->name = dev_name(dev);
+
+	mtd->type = MTD_NORFLASH;
+	mtd->writesize = 1;
+	mtd->flags = MTD_CAP_NORFLASH;
+	mtd->size = info->sector_size * info->n_sectors;
+	mtd->_erase = spi_nor_erase;
+	mtd->_read = spi_nor_read;
+
+	/* nor protection support for STmicro chips */
+	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ST) {
+		mtd->_lock = spi_nor_lock;
+		mtd->_unlock = spi_nor_unlock;
+	}
+
+	/* sst nor chips use AAI word program */
+	if (info->flags & SST_WRITE)
+		mtd->_write = sst_write;
+	else
+		mtd->_write = spi_nor_write;
+
+	/* prefer "small sector" erase if possible */
+	if (info->flags & SECT_4K) {
+		nor->erase_opcode = OPCODE_BE_4K;
+		mtd->erasesize = 4096;
+	} else if (info->flags & SECT_4K_PMC) {
+		nor->erase_opcode = OPCODE_BE_4K_PMC;
+		mtd->erasesize = 4096;
+	} else {
+		nor->erase_opcode = OPCODE_SE;
+		mtd->erasesize = info->sector_size;
+	}
+
+	if (info->flags & SPI_NOR_NO_ERASE)
+		mtd->flags |= MTD_NO_ERASE;
+
+	mtd->dev.parent = dev;
+	nor->page_size = info->page_size;
+	mtd->writebufsize = nor->page_size;
+
+	if (np) {
+		/* If we were instantiated by DT, use it */
+		if (of_property_read_bool(np, "m25p,fast-read"))
+			nor->flash_read = SPI_NOR_FAST;
+		else
+			nor->flash_read = SPI_NOR_NORMAL;
+	} else {
+		/* If we weren't instantiated by DT, default to fast-read */
+		nor->flash_read = SPI_NOR_FAST;
+	}
+
+	/* Some devices cannot do fast-read, no matter what DT tells us */
+	if (info->flags & SPI_NOR_NO_FR)
+		nor->flash_read = SPI_NOR_NORMAL;
+
+	/* Quad/Dual-read mode takes precedence over fast/normal */
+	if (mode == SPI_NOR_QUAD && info->flags & SPI_NOR_QUAD_READ) {
+		ret = set_quad_mode(nor, info->jedec_id);
+		if (ret) {
+			dev_err(dev, "quad mode not supported\n");
+			return ret;
+		}
+		nor->flash_read = SPI_NOR_QUAD;
+	} else if (mode == SPI_NOR_DUAL && info->flags & SPI_NOR_DUAL_READ) {
+		nor->flash_read = SPI_NOR_DUAL;
+	}
+
+	/* Default commands */
+	switch (nor->flash_read) {
+	case SPI_NOR_QUAD:
+		nor->read_opcode = OPCODE_QUAD_READ;
+		break;
+	case SPI_NOR_DUAL:
+		nor->read_opcode = OPCODE_DUAL_READ;
+		break;
+	case SPI_NOR_FAST:
+		nor->read_opcode = OPCODE_FAST_READ;
+		break;
+	case SPI_NOR_NORMAL:
+		nor->read_opcode = OPCODE_NORM_READ;
+		break;
+	default:
+		dev_err(dev, "No Read opcode defined\n");
+		return -EINVAL;
+	}
+
+	nor->program_opcode = OPCODE_PP;
+
+	if (info->addr_width)
+		nor->addr_width = info->addr_width;
+	else if (mtd->size > 0x1000000) {
+		/* enable 4-byte addressing if the device exceeds 16MiB */
+		nor->addr_width = 4;
+		if (JEDEC_MFR(info->jedec_id) == CFI_MFR_AMD) {
+			/* Dedicated 4-byte command set */
+			switch (nor->flash_read) {
+			case SPI_NOR_QUAD:
+				nor->read_opcode = OPCODE_QUAD_READ_4B;
+				break;
+			case SPI_NOR_DUAL:
+				nor->read_opcode = OPCODE_DUAL_READ_4B;
+				break;
+			case SPI_NOR_FAST:
+				nor->read_opcode = OPCODE_FAST_READ_4B;
+				break;
+			case SPI_NOR_NORMAL:
+				nor->read_opcode = OPCODE_NORM_READ_4B;
+				break;
+			}
+			nor->program_opcode = OPCODE_PP_4B;
+			/* No small sector erase for 4-byte command set */
+			nor->erase_opcode = OPCODE_SE_4B;
+			mtd->erasesize = info->sector_size;
+		} else
+			set_4byte(nor, info->jedec_id, 1);
+	} else {
+		nor->addr_width = 3;
+	}
+
+	nor->read_dummy = spi_nor_read_dummy_cycles(nor);
+
+	dev_info(dev, "%s (%lld Kbytes)\n", id->name,
+			(long long)mtd->size >> 10);
+
+	dev_dbg(dev,
+		"mtd .name = %s, .size = 0x%llx (%lldMiB), "
+		".erasesize = 0x%.8x (%uKiB) .numeraseregions = %d\n",
+		mtd->name, (long long)mtd->size, (long long)(mtd->size >> 20),
+		mtd->erasesize, mtd->erasesize / 1024, mtd->numeraseregions);
+
+	if (mtd->numeraseregions)
+		for (i = 0; i < mtd->numeraseregions; i++)
+			dev_dbg(dev,
+				"mtd.eraseregions[%d] = { .offset = 0x%llx, "
+				".erasesize = 0x%.8x (%uKiB), "
+				".numblocks = %d }\n",
+				i, (long long)mtd->eraseregions[i].offset,
+				mtd->eraseregions[i].erasesize,
+				mtd->eraseregions[i].erasesize / 1024,
+				mtd->eraseregions[i].numblocks);
+	return 0;
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Huang Shijie <shijie8@gmail.com>");
+MODULE_AUTHOR("Mike Lavender");
+MODULE_DESCRIPTION("framework for SPI NOR");
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 3a3c3872c8cd..16d8409abcdc 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -162,4 +162,24 @@ struct spi_nor {
 
 	void *priv;
 };
+
+/**
+ * spi_nor_scan() - scan the SPI NOR
+ * @nor:	the spi_nor structure
+ * @id:		the spi_device_id provided by the driver
+ * @mode:	the read mode supported by the driver
+ *
+ * The drivers can use this fuction to scan the SPI NOR.
+ * In the scanning, it will try to get all the necessary information to
+ * fill the mtd_info{} and the spi_nor{}.
+ *
+ * The board may assigns a spi_device_id with @id which be used to compared with
+ * the spi_device_id detected by the scanning.
+ *
+ * Return: 0 for success, others for failure.
+ */
+int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
+			enum read_mode mode);
+extern const struct spi_device_id spi_nor_ids[];
+
 #endif
-- 
cgit v1.2.3-71-gd317


From 0d8c11c01274bde227d368daa8954911dd324a9f Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Mon, 24 Feb 2014 18:37:40 +0800
Subject: mtd: spi-nor: add a helper to find the spi_device_id

Add the spi_nor_match_id() to find the proper spi_device_id with the
NOR flash's name in the spi_nor_ids table.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Acked-by: Marek Vasut <marex@denx.de>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 12 ++++++++++++
 include/linux/mtd/spi-nor.h   | 12 ++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 50b929095bdb..f7c9e638623b 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1082,6 +1082,18 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 	return 0;
 }
 
+const struct spi_device_id *spi_nor_match_id(char *name)
+{
+	const struct spi_device_id *id = spi_nor_ids;
+
+	while (id->name[0]) {
+		if (!strcmp(name, id->name))
+			return id;
+		id++;
+	}
+	return NULL;
+}
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Huang Shijie <shijie8@gmail.com>");
 MODULE_AUTHOR("Mike Lavender");
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 16d8409abcdc..41dae78fbd1d 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -182,4 +182,16 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 			enum read_mode mode);
 extern const struct spi_device_id spi_nor_ids[];
 
+/**
+ * spi_nor_match_id() - find the spi_device_id by the name
+ * @name:	the name of the spi_device_id
+ *
+ * The drivers use this function to find the spi_device_id
+ * specified by the @name.
+ *
+ * Return: returns the right spi_device_id pointer on success,
+ *         and returns NULL on failure.
+ */
+const struct spi_device_id *spi_nor_match_id(char *name);
+
 #endif
-- 
cgit v1.2.3-71-gd317


From 8eabdd1ec122cf6b77cf73e798f134fbace1b8d1 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Thu, 10 Apr 2014 16:27:28 +0800
Subject: mtd: spi-nor: add the copyright information

Add the copyright information for spi-nor.c and spi-nor.h.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 6 +++++-
 include/linux/mtd/spi-nor.h   | 9 +++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 5cd86eb2a5f0..6c64ab95dee2 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1,5 +1,9 @@
 /*
- * Cloned most of the code from the m25p80.c
+ * Based on m25p80.c, by Mike Lavender (mike@steroidmicros.com), with
+ * influence from lart.c (Abraham Van Der Merwe) and mtd_dataflash.c
+ *
+ * Copyright (C) 2005, Intec Automation Inc.
+ * Copyright (C) 2014, Freescale Semiconductor, Inc.
  *
  * This code is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 41dae78fbd1d..9428d285489b 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -1,3 +1,12 @@
+/*
+ * Copyright (C) 2014 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
 #ifndef __LINUX_MTD_SPI_NOR_H
 #define __LINUX_MTD_SPI_NOR_H
 
-- 
cgit v1.2.3-71-gd317


From becd0cb8666de4bfaaf6eb3042f69066c8fb8677 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 8 Apr 2014 18:10:23 -0700
Subject: mtd: spi-nor: drop \t after #define

Spacing is a little non-standard here. Fix up tabs vs. spaces.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Acked-by: Huang Shijie <b32955@freescale.com>
Reviewed-by: Marek Vasut <marex@denx.de>
---
 include/linux/mtd/spi-nor.h | 72 ++++++++++++++++++++++-----------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 9428d285489b..a6e87190ead1 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -11,55 +11,55 @@
 #define __LINUX_MTD_SPI_NOR_H
 
 /* Flash opcodes. */
-#define	OPCODE_WREN		0x06	/* Write enable */
-#define	OPCODE_RDSR		0x05	/* Read status register */
-#define	OPCODE_WRSR		0x01	/* Write status register 1 byte */
-#define	OPCODE_NORM_READ	0x03	/* Read data bytes (low frequency) */
-#define	OPCODE_FAST_READ	0x0b	/* Read data bytes (high frequency) */
-#define	OPCODE_DUAL_READ        0x3b    /* Read data bytes (Dual SPI) */
-#define	OPCODE_QUAD_READ        0x6b    /* Read data bytes (Quad SPI) */
-#define	OPCODE_PP		0x02	/* Page program (up to 256 bytes) */
-#define	OPCODE_BE_4K		0x20	/* Erase 4KiB block */
-#define	OPCODE_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
-#define	OPCODE_BE_32K		0x52	/* Erase 32KiB block */
-#define	OPCODE_CHIP_ERASE	0xc7	/* Erase whole flash chip */
-#define	OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
-#define	OPCODE_RDID		0x9f	/* Read JEDEC ID */
-#define	OPCODE_RDCR             0x35    /* Read configuration register */
+#define OPCODE_WREN		0x06	/* Write enable */
+#define OPCODE_RDSR		0x05	/* Read status register */
+#define OPCODE_WRSR		0x01	/* Write status register 1 byte */
+#define OPCODE_NORM_READ	0x03	/* Read data bytes (low frequency) */
+#define OPCODE_FAST_READ	0x0b	/* Read data bytes (high frequency) */
+#define OPCODE_DUAL_READ	0x3b	/* Read data bytes (Dual SPI) */
+#define OPCODE_QUAD_READ	0x6b	/* Read data bytes (Quad SPI) */
+#define OPCODE_PP		0x02	/* Page program (up to 256 bytes) */
+#define OPCODE_BE_4K		0x20	/* Erase 4KiB block */
+#define OPCODE_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
+#define OPCODE_BE_32K		0x52	/* Erase 32KiB block */
+#define OPCODE_CHIP_ERASE	0xc7	/* Erase whole flash chip */
+#define OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
+#define OPCODE_RDID		0x9f	/* Read JEDEC ID */
+#define OPCODE_RDCR		0x35	/* Read configuration register */
 
 /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
-#define	OPCODE_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
-#define	OPCODE_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
-#define	OPCODE_DUAL_READ_4B	0x3c    /* Read data bytes (Dual SPI) */
-#define	OPCODE_QUAD_READ_4B	0x6c    /* Read data bytes (Quad SPI) */
-#define	OPCODE_PP_4B		0x12	/* Page program (up to 256 bytes) */
-#define	OPCODE_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
+#define OPCODE_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
+#define OPCODE_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
+#define OPCODE_DUAL_READ_4B	0x3c	/* Read data bytes (Dual SPI) */
+#define OPCODE_QUAD_READ_4B	0x6c	/* Read data bytes (Quad SPI) */
+#define OPCODE_PP_4B		0x12	/* Page program (up to 256 bytes) */
+#define OPCODE_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
 
 /* Used for SST flashes only. */
-#define	OPCODE_BP		0x02	/* Byte program */
-#define	OPCODE_WRDI		0x04	/* Write disable */
-#define	OPCODE_AAI_WP		0xad	/* Auto address increment word program */
+#define OPCODE_BP		0x02	/* Byte program */
+#define OPCODE_WRDI		0x04	/* Write disable */
+#define OPCODE_AAI_WP		0xad	/* Auto address increment word program */
 
 /* Used for Macronix and Winbond flashes. */
-#define	OPCODE_EN4B		0xb7	/* Enter 4-byte mode */
-#define	OPCODE_EX4B		0xe9	/* Exit 4-byte mode */
+#define OPCODE_EN4B		0xb7	/* Enter 4-byte mode */
+#define OPCODE_EX4B		0xe9	/* Exit 4-byte mode */
 
 /* Used for Spansion flashes only. */
-#define	OPCODE_BRWR		0x17	/* Bank register write */
+#define OPCODE_BRWR		0x17	/* Bank register write */
 
 /* Status Register bits. */
-#define	SR_WIP			1	/* Write in progress */
-#define	SR_WEL			2	/* Write enable latch */
+#define SR_WIP			1	/* Write in progress */
+#define SR_WEL			2	/* Write enable latch */
 /* meaning of other SR_* bits may differ between vendors */
-#define	SR_BP0			4	/* Block protect 0 */
-#define	SR_BP1			8	/* Block protect 1 */
-#define	SR_BP2			0x10	/* Block protect 2 */
-#define	SR_SRWD			0x80	/* SR write protect */
+#define SR_BP0			4	/* Block protect 0 */
+#define SR_BP1			8	/* Block protect 1 */
+#define SR_BP2			0x10	/* Block protect 2 */
+#define SR_SRWD			0x80	/* SR write protect */
 
-#define SR_QUAD_EN_MX           0x40    /* Macronix Quad I/O */
+#define SR_QUAD_EN_MX		0x40	/* Macronix Quad I/O */
 
 /* Configuration Register bits. */
-#define CR_QUAD_EN_SPAN		0x2     /* Spansion Quad I/O */
+#define CR_QUAD_EN_SPAN		0x2	/* Spansion Quad I/O */
 
 enum read_mode {
 	SPI_NOR_NORMAL = 0,
@@ -95,7 +95,7 @@ struct spi_nor_xfer_cfg {
 	u8		dummy_cycles;
 };
 
-#define	SPI_NOR_MAX_CMD_SIZE	8
+#define SPI_NOR_MAX_CMD_SIZE	8
 enum spi_nor_ops {
 	SPI_NOR_OPS_READ = 0,
 	SPI_NOR_OPS_WRITE,
-- 
cgit v1.2.3-71-gd317


From b02e7f3ef0beb72da8fc64542f0ac977996ec56b Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 8 Apr 2014 18:15:31 -0700
Subject: mtd: spi-nor: re-name OPCODE_* to SPINOR_OP_*

Qualify these with a better namespace, and prepare them for use in more
drivers.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Reviewed-by: Marek Vasut <marex@denx.de>
Acked-by: Huang Shijie <b32955@freescale.com>
---
 drivers/mtd/devices/m25p80.c      |  4 +--
 drivers/mtd/spi-nor/fsl-quadspi.c | 58 +++++++++++++++++------------------
 drivers/mtd/spi-nor/spi-nor.c     | 64 +++++++++++++++++++--------------------
 include/linux/mtd/spi-nor.h       | 54 ++++++++++++++++-----------------
 4 files changed, 90 insertions(+), 90 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 4af6400ccd95..1557d8f672c1 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -86,7 +86,7 @@ static void m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
 
 	spi_message_init(&m);
 
-	if (nor->program_opcode == OPCODE_AAI_WP && nor->sst_write_second)
+	if (nor->program_opcode == SPINOR_OP_AAI_WP && nor->sst_write_second)
 		cmd_sz = 1;
 
 	flash->command[0] = nor->program_opcode;
@@ -171,7 +171,7 @@ static int m25p80_erase(struct spi_nor *nor, loff_t offset)
 		return ret;
 
 	/* Send write enable, then erase commands. */
-	ret = nor->write_reg(nor, OPCODE_WREN, NULL, 0, 0);
+	ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 6dc08ed950c8..2977f026f39d 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -294,12 +294,12 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 	lut_base = SEQID_QUAD_READ * 4;
 
 	if (q->nor_size <= SZ_16M) {
-		cmd = OPCODE_QUAD_READ;
+		cmd = SPINOR_OP_QUAD_READ;
 		addrlen = ADDR24BIT;
 		dummy = 8;
 	} else {
 		/* use the 4-byte address */
-		cmd = OPCODE_QUAD_READ;
+		cmd = SPINOR_OP_QUAD_READ;
 		addrlen = ADDR32BIT;
 		dummy = 8;
 	}
@@ -311,17 +311,17 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 
 	/* Write enable */
 	lut_base = SEQID_WREN * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_WREN), base + QUADSPI_LUT(lut_base));
+	writel(LUT0(CMD, PAD1, SPINOR_OP_WREN), base + QUADSPI_LUT(lut_base));
 
 	/* Page Program */
 	lut_base = SEQID_PP * 4;
 
 	if (q->nor_size <= SZ_16M) {
-		cmd = OPCODE_PP;
+		cmd = SPINOR_OP_PP;
 		addrlen = ADDR24BIT;
 	} else {
 		/* use the 4-byte address */
-		cmd = OPCODE_PP;
+		cmd = SPINOR_OP_PP;
 		addrlen = ADDR32BIT;
 	}
 
@@ -331,18 +331,18 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 
 	/* Read Status */
 	lut_base = SEQID_RDSR * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_RDSR) | LUT1(READ, PAD1, 0x1),
+	writel(LUT0(CMD, PAD1, SPINOR_OP_RDSR) | LUT1(READ, PAD1, 0x1),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Erase a sector */
 	lut_base = SEQID_SE * 4;
 
 	if (q->nor_size <= SZ_16M) {
-		cmd = OPCODE_SE;
+		cmd = SPINOR_OP_SE;
 		addrlen = ADDR24BIT;
 	} else {
 		/* use the 4-byte address */
-		cmd = OPCODE_SE;
+		cmd = SPINOR_OP_SE;
 		addrlen = ADDR32BIT;
 	}
 
@@ -351,35 +351,35 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 
 	/* Erase the whole chip */
 	lut_base = SEQID_CHIP_ERASE * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_CHIP_ERASE),
+	writel(LUT0(CMD, PAD1, SPINOR_OP_CHIP_ERASE),
 			base + QUADSPI_LUT(lut_base));
 
 	/* READ ID */
 	lut_base = SEQID_RDID * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_RDID) | LUT1(READ, PAD1, 0x8),
+	writel(LUT0(CMD, PAD1, SPINOR_OP_RDID) | LUT1(READ, PAD1, 0x8),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Write Register */
 	lut_base = SEQID_WRSR * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_WRSR) | LUT1(WRITE, PAD1, 0x2),
+	writel(LUT0(CMD, PAD1, SPINOR_OP_WRSR) | LUT1(WRITE, PAD1, 0x2),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Read Configuration Register */
 	lut_base = SEQID_RDCR * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_RDCR) | LUT1(READ, PAD1, 0x1),
+	writel(LUT0(CMD, PAD1, SPINOR_OP_RDCR) | LUT1(READ, PAD1, 0x1),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Write disable */
 	lut_base = SEQID_WRDI * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_WRDI), base + QUADSPI_LUT(lut_base));
+	writel(LUT0(CMD, PAD1, SPINOR_OP_WRDI), base + QUADSPI_LUT(lut_base));
 
 	/* Enter 4 Byte Mode (Micron) */
 	lut_base = SEQID_EN4B * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_EN4B), base + QUADSPI_LUT(lut_base));
+	writel(LUT0(CMD, PAD1, SPINOR_OP_EN4B), base + QUADSPI_LUT(lut_base));
 
 	/* Enter 4 Byte Mode (Spansion) */
 	lut_base = SEQID_BRWR * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_BRWR), base + QUADSPI_LUT(lut_base));
+	writel(LUT0(CMD, PAD1, SPINOR_OP_BRWR), base + QUADSPI_LUT(lut_base));
 
 	fsl_qspi_lock_lut(q);
 }
@@ -388,29 +388,29 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
 {
 	switch (cmd) {
-	case OPCODE_QUAD_READ:
+	case SPINOR_OP_QUAD_READ:
 		return SEQID_QUAD_READ;
-	case OPCODE_WREN:
+	case SPINOR_OP_WREN:
 		return SEQID_WREN;
-	case OPCODE_WRDI:
+	case SPINOR_OP_WRDI:
 		return SEQID_WRDI;
-	case OPCODE_RDSR:
+	case SPINOR_OP_RDSR:
 		return SEQID_RDSR;
-	case OPCODE_SE:
+	case SPINOR_OP_SE:
 		return SEQID_SE;
-	case OPCODE_CHIP_ERASE:
+	case SPINOR_OP_CHIP_ERASE:
 		return SEQID_CHIP_ERASE;
-	case OPCODE_PP:
+	case SPINOR_OP_PP:
 		return SEQID_PP;
-	case OPCODE_RDID:
+	case SPINOR_OP_RDID:
 		return SEQID_RDID;
-	case OPCODE_WRSR:
+	case SPINOR_OP_WRSR:
 		return SEQID_WRSR;
-	case OPCODE_RDCR:
+	case SPINOR_OP_RDCR:
 		return SEQID_RDCR;
-	case OPCODE_EN4B:
+	case SPINOR_OP_EN4B:
 		return SEQID_EN4B;
-	case OPCODE_BRWR:
+	case SPINOR_OP_BRWR:
 		return SEQID_BRWR;
 	default:
 		dev_err(q->dev, "Unsupported cmd 0x%.2x\n", cmd);
@@ -688,7 +688,7 @@ static int fsl_qspi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
 		if (ret)
 			return ret;
 
-		if (opcode == OPCODE_CHIP_ERASE)
+		if (opcode == SPINOR_OP_CHIP_ERASE)
 			fsl_qspi_invalid(q);
 
 	} else if (len > 0) {
@@ -750,7 +750,7 @@ static int fsl_qspi_erase(struct spi_nor *nor, loff_t offs)
 		return ret;
 
 	/* Send write enable, then erase commands. */
-	ret = nor->write_reg(nor, OPCODE_WREN, NULL, 0, 0);
+	ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 6c64ab95dee2..1716f3ce9949 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -38,7 +38,7 @@ static int read_sr(struct spi_nor *nor)
 	int ret;
 	u8 val;
 
-	ret = nor->read_reg(nor, OPCODE_RDSR, &val, 1);
+	ret = nor->read_reg(nor, SPINOR_OP_RDSR, &val, 1);
 	if (ret < 0) {
 		pr_err("error %d reading SR\n", (int) ret);
 		return ret;
@@ -57,7 +57,7 @@ static int read_cr(struct spi_nor *nor)
 	int ret;
 	u8 val;
 
-	ret = nor->read_reg(nor, OPCODE_RDCR, &val, 1);
+	ret = nor->read_reg(nor, SPINOR_OP_RDCR, &val, 1);
 	if (ret < 0) {
 		dev_err(nor->dev, "error %d reading CR\n", ret);
 		return ret;
@@ -91,7 +91,7 @@ static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor)
 static inline int write_sr(struct spi_nor *nor, u8 val)
 {
 	nor->cmd_buf[0] = val;
-	return nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 1, 0);
+	return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1, 0);
 }
 
 /*
@@ -100,7 +100,7 @@ static inline int write_sr(struct spi_nor *nor, u8 val)
  */
 static inline int write_enable(struct spi_nor *nor)
 {
-	return nor->write_reg(nor, OPCODE_WREN, NULL, 0, 0);
+	return nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
 }
 
 /*
@@ -108,7 +108,7 @@ static inline int write_enable(struct spi_nor *nor)
  */
 static inline int write_disable(struct spi_nor *nor)
 {
-	return nor->write_reg(nor, OPCODE_WRDI, NULL, 0, 0);
+	return nor->write_reg(nor, SPINOR_OP_WRDI, NULL, 0, 0);
 }
 
 static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd)
@@ -132,7 +132,7 @@ static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable)
 		if (need_wren)
 			write_enable(nor);
 
-		cmd = enable ? OPCODE_EN4B : OPCODE_EX4B;
+		cmd = enable ? SPINOR_OP_EN4B : SPINOR_OP_EX4B;
 		status = nor->write_reg(nor, cmd, NULL, 0, 0);
 		if (need_wren)
 			write_disable(nor);
@@ -141,7 +141,7 @@ static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable)
 	default:
 		/* Spansion style */
 		nor->cmd_buf[0] = enable << 7;
-		return nor->write_reg(nor, OPCODE_BRWR, nor->cmd_buf, 1, 0);
+		return nor->write_reg(nor, SPINOR_OP_BRWR, nor->cmd_buf, 1, 0);
 	}
 }
 
@@ -193,7 +193,7 @@ static int erase_chip(struct spi_nor *nor)
 	/* Send write enable, then erase commands. */
 	write_enable(nor);
 
-	return nor->write_reg(nor, OPCODE_CHIP_ERASE, NULL, 0, 0);
+	return nor->write_reg(nor, SPINOR_OP_CHIP_ERASE, NULL, 0, 0);
 }
 
 static int spi_nor_lock_and_prep(struct spi_nor *nor, enum spi_nor_ops ops)
@@ -253,7 +253,7 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 		}
 
 	/* REVISIT in some cases we could speed up erasing large regions
-	 * by using OPCODE_SE instead of OPCODE_BE_4K.  We may have set up
+	 * by using SPINOR_OP_SE instead of SPINOR_OP_BE_4K.  We may have set up
 	 * to use "small sector erase", but that's not always optimal.
 	 */
 
@@ -385,7 +385,7 @@ struct flash_info {
 	u32		jedec_id;
 	u16             ext_id;
 
-	/* The size listed here is what works with OPCODE_SE, which isn't
+	/* The size listed here is what works with SPINOR_OP_SE, which isn't
 	 * necessarily called a "sector" by the vendor.
 	 */
 	unsigned	sector_size;
@@ -395,11 +395,11 @@ struct flash_info {
 	u16		addr_width;
 
 	u16		flags;
-#define	SECT_4K			0x01	/* OPCODE_BE_4K works uniformly */
+#define	SECT_4K			0x01	/* SPINOR_OP_BE_4K works uniformly */
 #define	SPI_NOR_NO_ERASE	0x02	/* No erase command needed */
 #define	SST_WRITE		0x04	/* use SST byte programming */
 #define	SPI_NOR_NO_FR		0x08	/* Can't do fastread */
-#define	SECT_4K_PMC		0x10	/* OPCODE_BE_4K_PMC works uniformly */
+#define	SECT_4K_PMC		0x10	/* SPINOR_OP_BE_4K_PMC works uniformly */
 #define	SPI_NOR_DUAL_READ	0x20    /* Flash supports Dual Read */
 #define	SPI_NOR_QUAD_READ	0x40    /* Flash supports Quad Read */
 };
@@ -598,7 +598,7 @@ static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor)
 	u16                     ext_jedec;
 	struct flash_info	*info;
 
-	tmp = nor->read_reg(nor, OPCODE_RDID, id, 5);
+	tmp = nor->read_reg(nor, SPINOR_OP_RDID, id, 5);
 	if (tmp < 0) {
 		dev_dbg(nor->dev, " error %d reading JEDEC ID\n", tmp);
 		return ERR_PTR(tmp);
@@ -670,7 +670,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 	actual = to % 2;
 	/* Start write from odd address. */
 	if (actual) {
-		nor->program_opcode = OPCODE_BP;
+		nor->program_opcode = SPINOR_OP_BP;
 
 		/* write one byte. */
 		nor->write(nor, to, 1, retlen, buf);
@@ -682,7 +682,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 	/* Write out most of the data here. */
 	for (; actual < len - 1; actual += 2) {
-		nor->program_opcode = OPCODE_AAI_WP;
+		nor->program_opcode = SPINOR_OP_AAI_WP;
 
 		/* write two bytes. */
 		nor->write(nor, to, 2, retlen, buf + actual);
@@ -703,7 +703,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 	if (actual != len) {
 		write_enable(nor);
 
-		nor->program_opcode = OPCODE_BP;
+		nor->program_opcode = SPINOR_OP_BP;
 		nor->write(nor, to, 1, retlen, buf + actual);
 
 		ret = wait_till_ready(nor);
@@ -777,7 +777,7 @@ static int macronix_quad_enable(struct spi_nor *nor)
 	write_enable(nor);
 
 	nor->cmd_buf[0] = val | SR_QUAD_EN_MX;
-	nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 1, 0);
+	nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1, 0);
 
 	if (wait_till_ready(nor))
 		return 1;
@@ -802,7 +802,7 @@ static int write_sr_cr(struct spi_nor *nor, u16 val)
 	nor->cmd_buf[0] = val & 0xff;
 	nor->cmd_buf[1] = (val >> 8);
 
-	return nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 2, 0);
+	return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 2, 0);
 }
 
 static int spansion_quad_enable(struct spi_nor *nor)
@@ -967,13 +967,13 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 
 	/* prefer "small sector" erase if possible */
 	if (info->flags & SECT_4K) {
-		nor->erase_opcode = OPCODE_BE_4K;
+		nor->erase_opcode = SPINOR_OP_BE_4K;
 		mtd->erasesize = 4096;
 	} else if (info->flags & SECT_4K_PMC) {
-		nor->erase_opcode = OPCODE_BE_4K_PMC;
+		nor->erase_opcode = SPINOR_OP_BE_4K_PMC;
 		mtd->erasesize = 4096;
 	} else {
-		nor->erase_opcode = OPCODE_SE;
+		nor->erase_opcode = SPINOR_OP_SE;
 		mtd->erasesize = info->sector_size;
 	}
 
@@ -1014,23 +1014,23 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 	/* Default commands */
 	switch (nor->flash_read) {
 	case SPI_NOR_QUAD:
-		nor->read_opcode = OPCODE_QUAD_READ;
+		nor->read_opcode = SPINOR_OP_QUAD_READ;
 		break;
 	case SPI_NOR_DUAL:
-		nor->read_opcode = OPCODE_DUAL_READ;
+		nor->read_opcode = SPINOR_OP_DUAL_READ;
 		break;
 	case SPI_NOR_FAST:
-		nor->read_opcode = OPCODE_FAST_READ;
+		nor->read_opcode = SPINOR_OP_FAST_READ;
 		break;
 	case SPI_NOR_NORMAL:
-		nor->read_opcode = OPCODE_NORM_READ;
+		nor->read_opcode = SPINOR_OP_NORM_READ;
 		break;
 	default:
 		dev_err(dev, "No Read opcode defined\n");
 		return -EINVAL;
 	}
 
-	nor->program_opcode = OPCODE_PP;
+	nor->program_opcode = SPINOR_OP_PP;
 
 	if (info->addr_width)
 		nor->addr_width = info->addr_width;
@@ -1041,21 +1041,21 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 			/* Dedicated 4-byte command set */
 			switch (nor->flash_read) {
 			case SPI_NOR_QUAD:
-				nor->read_opcode = OPCODE_QUAD_READ_4B;
+				nor->read_opcode = SPINOR_OP_QUAD_READ_4B;
 				break;
 			case SPI_NOR_DUAL:
-				nor->read_opcode = OPCODE_DUAL_READ_4B;
+				nor->read_opcode = SPINOR_OP_DUAL_READ_4B;
 				break;
 			case SPI_NOR_FAST:
-				nor->read_opcode = OPCODE_FAST_READ_4B;
+				nor->read_opcode = SPINOR_OP_FAST_READ_4B;
 				break;
 			case SPI_NOR_NORMAL:
-				nor->read_opcode = OPCODE_NORM_READ_4B;
+				nor->read_opcode = SPINOR_OP_NORM_READ_4B;
 				break;
 			}
-			nor->program_opcode = OPCODE_PP_4B;
+			nor->program_opcode = SPINOR_OP_PP_4B;
 			/* No small sector erase for 4-byte command set */
-			nor->erase_opcode = OPCODE_SE_4B;
+			nor->erase_opcode = SPINOR_OP_SE_4B;
 			mtd->erasesize = info->sector_size;
 		} else
 			set_4byte(nor, info->jedec_id, 1);
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index a6e87190ead1..f1fe1a6659a3 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -11,41 +11,41 @@
 #define __LINUX_MTD_SPI_NOR_H
 
 /* Flash opcodes. */
-#define OPCODE_WREN		0x06	/* Write enable */
-#define OPCODE_RDSR		0x05	/* Read status register */
-#define OPCODE_WRSR		0x01	/* Write status register 1 byte */
-#define OPCODE_NORM_READ	0x03	/* Read data bytes (low frequency) */
-#define OPCODE_FAST_READ	0x0b	/* Read data bytes (high frequency) */
-#define OPCODE_DUAL_READ	0x3b	/* Read data bytes (Dual SPI) */
-#define OPCODE_QUAD_READ	0x6b	/* Read data bytes (Quad SPI) */
-#define OPCODE_PP		0x02	/* Page program (up to 256 bytes) */
-#define OPCODE_BE_4K		0x20	/* Erase 4KiB block */
-#define OPCODE_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
-#define OPCODE_BE_32K		0x52	/* Erase 32KiB block */
-#define OPCODE_CHIP_ERASE	0xc7	/* Erase whole flash chip */
-#define OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
-#define OPCODE_RDID		0x9f	/* Read JEDEC ID */
-#define OPCODE_RDCR		0x35	/* Read configuration register */
+#define SPINOR_OP_WREN		0x06	/* Write enable */
+#define SPINOR_OP_RDSR		0x05	/* Read status register */
+#define SPINOR_OP_WRSR		0x01	/* Write status register 1 byte */
+#define SPINOR_OP_NORM_READ	0x03	/* Read data bytes (low frequency) */
+#define SPINOR_OP_FAST_READ	0x0b	/* Read data bytes (high frequency) */
+#define SPINOR_OP_DUAL_READ	0x3b	/* Read data bytes (Dual SPI) */
+#define SPINOR_OP_QUAD_READ	0x6b	/* Read data bytes (Quad SPI) */
+#define SPINOR_OP_PP		0x02	/* Page program (up to 256 bytes) */
+#define SPINOR_OP_BE_4K		0x20	/* Erase 4KiB block */
+#define SPINOR_OP_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
+#define SPINOR_OP_BE_32K	0x52	/* Erase 32KiB block */
+#define SPINOR_OP_CHIP_ERASE	0xc7	/* Erase whole flash chip */
+#define SPINOR_OP_SE		0xd8	/* Sector erase (usually 64KiB) */
+#define SPINOR_OP_RDID		0x9f	/* Read JEDEC ID */
+#define SPINOR_OP_RDCR		0x35	/* Read configuration register */
 
 /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
-#define OPCODE_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
-#define OPCODE_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
-#define OPCODE_DUAL_READ_4B	0x3c	/* Read data bytes (Dual SPI) */
-#define OPCODE_QUAD_READ_4B	0x6c	/* Read data bytes (Quad SPI) */
-#define OPCODE_PP_4B		0x12	/* Page program (up to 256 bytes) */
-#define OPCODE_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
+#define SPINOR_OP_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
+#define SPINOR_OP_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
+#define SPINOR_OP_DUAL_READ_4B	0x3c	/* Read data bytes (Dual SPI) */
+#define SPINOR_OP_QUAD_READ_4B	0x6c	/* Read data bytes (Quad SPI) */
+#define SPINOR_OP_PP_4B		0x12	/* Page program (up to 256 bytes) */
+#define SPINOR_OP_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
 
 /* Used for SST flashes only. */
-#define OPCODE_BP		0x02	/* Byte program */
-#define OPCODE_WRDI		0x04	/* Write disable */
-#define OPCODE_AAI_WP		0xad	/* Auto address increment word program */
+#define SPINOR_OP_BP		0x02	/* Byte program */
+#define SPINOR_OP_WRDI		0x04	/* Write disable */
+#define SPINOR_OP_AAI_WP	0xad	/* Auto address increment word program */
 
 /* Used for Macronix and Winbond flashes. */
-#define OPCODE_EN4B		0xb7	/* Enter 4-byte mode */
-#define OPCODE_EX4B		0xe9	/* Exit 4-byte mode */
+#define SPINOR_OP_EN4B		0xb7	/* Enter 4-byte mode */
+#define SPINOR_OP_EX4B		0xe9	/* Exit 4-byte mode */
 
 /* Used for Spansion flashes only. */
-#define OPCODE_BRWR		0x17	/* Bank register write */
+#define SPINOR_OP_BRWR		0x17	/* Bank register write */
 
 /* Status Register bits. */
 #define SR_WIP			1	/* Write in progress */
-- 
cgit v1.2.3-71-gd317


From 58b89a1f4c2a65b10b8f7b90b6ff2161b19bb0d1 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 8 Apr 2014 19:16:49 -0700
Subject: mtd: spi-nor: unify read opcode variants with ST SPI FSM

serial_flash_cmds.h defines our opcodes a little differently. Let's
borrow its naming, since it's borrowed from the SFDP standard, and it's
more extensible.

This prepares us for merging serial_flash_cmds.h and spi-nor.h opcode
listing.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Reviewed-by: Marek Vasut <marex@denx.de>
Acked-by: Huang Shijie <b32955@freescale.com>
---
 drivers/mtd/spi-nor/fsl-quadspi.c |  6 +++---
 drivers/mtd/spi-nor/spi-nor.c     | 16 ++++++++--------
 include/linux/mtd/spi-nor.h       | 24 ++++++++++++++++--------
 3 files changed, 27 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 2977f026f39d..b41bbbc531ff 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -294,12 +294,12 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 	lut_base = SEQID_QUAD_READ * 4;
 
 	if (q->nor_size <= SZ_16M) {
-		cmd = SPINOR_OP_QUAD_READ;
+		cmd = SPINOR_OP_READ_1_1_4;
 		addrlen = ADDR24BIT;
 		dummy = 8;
 	} else {
 		/* use the 4-byte address */
-		cmd = SPINOR_OP_QUAD_READ;
+		cmd = SPINOR_OP_READ_1_1_4;
 		addrlen = ADDR32BIT;
 		dummy = 8;
 	}
@@ -388,7 +388,7 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
 {
 	switch (cmd) {
-	case SPINOR_OP_QUAD_READ:
+	case SPINOR_OP_READ_1_1_4:
 		return SEQID_QUAD_READ;
 	case SPINOR_OP_WREN:
 		return SEQID_WREN;
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 1716f3ce9949..d6f44d527701 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1014,16 +1014,16 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 	/* Default commands */
 	switch (nor->flash_read) {
 	case SPI_NOR_QUAD:
-		nor->read_opcode = SPINOR_OP_QUAD_READ;
+		nor->read_opcode = SPINOR_OP_READ_1_1_4;
 		break;
 	case SPI_NOR_DUAL:
-		nor->read_opcode = SPINOR_OP_DUAL_READ;
+		nor->read_opcode = SPINOR_OP_READ_1_1_2;
 		break;
 	case SPI_NOR_FAST:
-		nor->read_opcode = SPINOR_OP_FAST_READ;
+		nor->read_opcode = SPINOR_OP_READ_FAST;
 		break;
 	case SPI_NOR_NORMAL:
-		nor->read_opcode = SPINOR_OP_NORM_READ;
+		nor->read_opcode = SPINOR_OP_READ;
 		break;
 	default:
 		dev_err(dev, "No Read opcode defined\n");
@@ -1041,16 +1041,16 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 			/* Dedicated 4-byte command set */
 			switch (nor->flash_read) {
 			case SPI_NOR_QUAD:
-				nor->read_opcode = SPINOR_OP_QUAD_READ_4B;
+				nor->read_opcode = SPINOR_OP_READ4_1_1_4;
 				break;
 			case SPI_NOR_DUAL:
-				nor->read_opcode = SPINOR_OP_DUAL_READ_4B;
+				nor->read_opcode = SPINOR_OP_READ4_1_1_2;
 				break;
 			case SPI_NOR_FAST:
-				nor->read_opcode = SPINOR_OP_FAST_READ_4B;
+				nor->read_opcode = SPINOR_OP_READ4_FAST;
 				break;
 			case SPI_NOR_NORMAL:
-				nor->read_opcode = SPINOR_OP_NORM_READ_4B;
+				nor->read_opcode = SPINOR_OP_READ4;
 				break;
 			}
 			nor->program_opcode = SPINOR_OP_PP_4B;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index f1fe1a6659a3..53241842a7ab 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -10,14 +10,22 @@
 #ifndef __LINUX_MTD_SPI_NOR_H
 #define __LINUX_MTD_SPI_NOR_H
 
+/*
+ * Note on opcode nomenclature: some opcodes have a format like
+ * SPINOR_OP_FUNCTION{4,}_x_y_z. The numbers x, y, and z stand for the number
+ * of I/O lines used for the opcode, address, and data (respectively). The
+ * FUNCTION has an optional suffix of '4', to represent an opcode which
+ * requires a 4-byte (32-bit) address.
+ */
+
 /* Flash opcodes. */
 #define SPINOR_OP_WREN		0x06	/* Write enable */
 #define SPINOR_OP_RDSR		0x05	/* Read status register */
 #define SPINOR_OP_WRSR		0x01	/* Write status register 1 byte */
-#define SPINOR_OP_NORM_READ	0x03	/* Read data bytes (low frequency) */
-#define SPINOR_OP_FAST_READ	0x0b	/* Read data bytes (high frequency) */
-#define SPINOR_OP_DUAL_READ	0x3b	/* Read data bytes (Dual SPI) */
-#define SPINOR_OP_QUAD_READ	0x6b	/* Read data bytes (Quad SPI) */
+#define SPINOR_OP_READ		0x03	/* Read data bytes (low frequency) */
+#define SPINOR_OP_READ_FAST	0x0b	/* Read data bytes (high frequency) */
+#define SPINOR_OP_READ_1_1_2	0x3b	/* Read data bytes (Dual SPI) */
+#define SPINOR_OP_READ_1_1_4	0x6b	/* Read data bytes (Quad SPI) */
 #define SPINOR_OP_PP		0x02	/* Page program (up to 256 bytes) */
 #define SPINOR_OP_BE_4K		0x20	/* Erase 4KiB block */
 #define SPINOR_OP_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
@@ -28,10 +36,10 @@
 #define SPINOR_OP_RDCR		0x35	/* Read configuration register */
 
 /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
-#define SPINOR_OP_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
-#define SPINOR_OP_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
-#define SPINOR_OP_DUAL_READ_4B	0x3c	/* Read data bytes (Dual SPI) */
-#define SPINOR_OP_QUAD_READ_4B	0x6c	/* Read data bytes (Quad SPI) */
+#define SPINOR_OP_READ4		0x13	/* Read data bytes (low frequency) */
+#define SPINOR_OP_READ4_FAST	0x0c	/* Read data bytes (high frequency) */
+#define SPINOR_OP_READ4_1_1_2	0x3c	/* Read data bytes (Dual SPI) */
+#define SPINOR_OP_READ4_1_1_4	0x6c	/* Read data bytes (Quad SPI) */
 #define SPINOR_OP_PP_4B		0x12	/* Page program (up to 256 bytes) */
 #define SPINOR_OP_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
 
-- 
cgit v1.2.3-71-gd317


From fdb9c293decf7e06795f7d9ae409df907c7ae1b6 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 15 Apr 2014 12:39:14 -0500
Subject: percpu: Replace __get_cpu_var with this_cpu_ptr

__this_cpu_ptr is being phased out.  Use raw_cpu_ptr instead which was
introduced in 3.15-rc1.  One case of using __get_cpu_var in the
get_cpu_var macro for address calculation was remaining in
include/linux/percpu.h.

tj: Updated patch description.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e7a0b95ed527..539b3caa5748 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -29,7 +29,7 @@
  */
 #define get_cpu_var(var) (*({				\
 	preempt_disable();				\
-	&__get_cpu_var(var); }))
+	this_cpu_ptr(&var); }))
 
 /*
  * The weird & is necessary because sparse considers (void)(var) to be
-- 
cgit v1.2.3-71-gd317


From 79f7ae7c45a6ccf04e2908337461dee615f6afb0 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Fri, 14 Mar 2014 21:11:56 +0900
Subject: mmc: clarify DDR timing mode between SD-UHS and eMMC

This change distinguishes DDR timing mode of current
mixed usage to clarify device type.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/debugfs.c | 3 +++
 drivers/mmc/core/mmc.c     | 2 +-
 include/linux/mmc/host.h   | 3 ++-
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 54829c0ed000..509229b48b55 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -135,6 +135,9 @@ static int mmc_ios_show(struct seq_file *s, void *data)
 	case MMC_TIMING_UHS_DDR50:
 		str = "sd uhs DDR50";
 		break;
+	case MMC_TIMING_MMC_DDR52:
+		str = "mmc DDR52";
+		break;
 	case MMC_TIMING_MMC_HS200:
 		str = "mmc high-speed SDR200";
 		break;
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 1ab5f3a0af5b..e22d8515ff97 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1264,7 +1264,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 					goto err;
 			}
 			mmc_card_set_ddr_mode(card);
-			mmc_set_timing(card->host, MMC_TIMING_UHS_DDR50);
+			mmc_set_timing(card->host, MMC_TIMING_MMC_DDR52);
 			mmc_set_bus_width(card->host, bus_width);
 		}
 	}
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index cb61ea4d6945..35354207e71f 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -58,7 +58,8 @@ struct mmc_ios {
 #define MMC_TIMING_UHS_SDR50	5
 #define MMC_TIMING_UHS_SDR104	6
 #define MMC_TIMING_UHS_DDR50	7
-#define MMC_TIMING_MMC_HS200	8
+#define MMC_TIMING_MMC_DDR52	8
+#define MMC_TIMING_MMC_HS200	9
 
 #define MMC_SDR_MODE		0
 #define MMC_1_2V_DDR_MODE	1
-- 
cgit v1.2.3-71-gd317


From af6b6967d6e17fe070c0fd1be364c34cbd31a523 Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Wed, 16 Apr 2014 17:19:12 +0200
Subject: net: phy: export genphy_config_init()

This enables other drivers to call this generic implementation, and then
only do specific details on top of it.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 3 ++-
 include/linux/phy.h          | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 0ce606624296..466ae3e06322 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1067,7 +1067,7 @@ int genphy_soft_reset(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(genphy_soft_reset);
 
-static int genphy_config_init(struct phy_device *phydev)
+int genphy_config_init(struct phy_device *phydev)
 {
 	int val;
 	u32 features;
@@ -1118,6 +1118,7 @@ static int gen10g_soft_reset(struct phy_device *phydev)
 	/* Do nothing for now */
 	return 0;
 }
+EXPORT_SYMBOL(genphy_config_init);
 
 static int gen10g_config_init(struct phy_device *phydev)
 {
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 4d0221fd0688..51d15f684e7e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -666,6 +666,7 @@ static inline int phy_read_status(struct phy_device *phydev)
 	return phydev->drv->read_status(phydev);
 }
 
+int genphy_config_init(struct phy_device *phydev);
 int genphy_setup_forced(struct phy_device *phydev);
 int genphy_restart_aneg(struct phy_device *phydev);
 int genphy_config_aneg(struct phy_device *phydev);
-- 
cgit v1.2.3-71-gd317


From a0265d28b3a5877b5b8edd14eb12a2ccb60ab1f3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 17 Apr 2014 13:45:03 +0800
Subject: net: Add __dev_forward_skb

This patch adds the helper __dev_forward_skb which is identical to
dev_forward_skb except that it doesn't actually inject the skb into
the stack.  This is useful where we wish to have finer control over
how the packet is injected, e.g., via netif_rx_ni or netif_receive_skb.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 42 ++++++++++++++++++++++++------------------
 2 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7ed3a3aa6604..a803d792df1e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2633,6 +2633,7 @@ int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq);
+int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index d2c8a06b3a98..11d70e3afefa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1661,6 +1661,29 @@ bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(is_skb_forwardable);
 
+int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
+			atomic_long_inc(&dev->rx_dropped);
+			kfree_skb(skb);
+			return NET_RX_DROP;
+		}
+	}
+
+	if (unlikely(!is_skb_forwardable(dev, skb))) {
+		atomic_long_inc(&dev->rx_dropped);
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	skb_scrub_packet(skb, true);
+	skb->protocol = eth_type_trans(skb, dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__dev_forward_skb);
+
 /**
  * dev_forward_skb - loopback an skb to another netif
  *
@@ -1681,24 +1704,7 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
  */
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 {
-	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
-		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
-			atomic_long_inc(&dev->rx_dropped);
-			kfree_skb(skb);
-			return NET_RX_DROP;
-		}
-	}
-
-	if (unlikely(!is_skb_forwardable(dev, skb))) {
-		atomic_long_inc(&dev->rx_dropped);
-		kfree_skb(skb);
-		return NET_RX_DROP;
-	}
-
-	skb_scrub_packet(skb, true);
-	skb->protocol = eth_type_trans(skb, dev);
-
-	return netif_rx_internal(skb);
+	return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
 
-- 
cgit v1.2.3-71-gd317


From 599018a71013386119c057a64183e49240c8b4e6 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Thu, 17 Apr 2014 18:22:54 -0700
Subject: 6lowpan: add helper to get 6lowpan namespace

This will simplify the new reassembly backport
with no code changes being required.

CC: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: linux-zigbee-devel@lists.sourceforge.net
Cc: David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 15 +++++++++++++++
 net/ieee802154/reassembly.c | 46 +++++++++++++++++++++++++++++----------------
 2 files changed, 45 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 5f9eb260990f..bc4118ede5b5 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -373,6 +373,21 @@ static inline void rt_genid_bump_ipv6(struct net *net)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
+static inline struct netns_ieee802154_lowpan *
+net_ieee802154_lowpan(struct net *net)
+{
+	return &net->ieee802154_lowpan;
+}
+#else
+static inline struct netns_ieee802154_lowpan *
+net_ieee802154_lowpan(struct net *net)
+{
+	return NULL;
+}
+#endif
+
+
 /* For callers who don't really care about whether it's IPv4 or IPv6 */
 static inline void rt_genid_bump_all(struct net *net)
 {
diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c
index ef2d54372b13..132b65b6d957 100644
--- a/net/ieee802154/reassembly.c
+++ b/net/ieee802154/reassembly.c
@@ -120,6 +120,8 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
 	struct inet_frag_queue *q;
 	struct lowpan_create_arg arg;
 	unsigned int hash;
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
 
 	arg.tag = frag_info->d_tag;
 	arg.d_size = frag_info->d_size;
@@ -129,7 +131,7 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
 	read_lock(&lowpan_frags.lock);
 	hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst);
 
-	q = inet_frag_find(&net->ieee802154_lowpan.frags,
+	q = inet_frag_find(&ieee802154_lowpan->frags,
 			   &lowpan_frags, &arg, hash);
 	if (IS_ERR_OR_NULL(q)) {
 		inet_frag_maybe_warn_overflow(q, pr_fmt());
@@ -357,6 +359,8 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
 	struct net *net = dev_net(skb->dev);
 	struct lowpan_frag_info *frag_info = lowpan_cb(skb);
 	struct ieee802154_addr source, dest;
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
 	int err;
 
 	source = mac_cb(skb)->source;
@@ -366,10 +370,10 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
 	if (err < 0)
 		goto err;
 
-	if (frag_info->d_size > net->ieee802154_lowpan.max_dsize)
+	if (frag_info->d_size > ieee802154_lowpan->max_dsize)
 		goto err;
 
-	inet_frag_evictor(&net->ieee802154_lowpan.frags, &lowpan_frags, false);
+	inet_frag_evictor(&ieee802154_lowpan->frags, &lowpan_frags, false);
 
 	fq = fq_find(net, frag_info, &source, &dest);
 	if (fq != NULL) {
@@ -436,6 +440,8 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
 {
 	struct ctl_table *table;
 	struct ctl_table_header *hdr;
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
 
 	table = lowpan_frags_ns_ctl_table;
 	if (!net_eq(net, &init_net)) {
@@ -444,10 +450,10 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
 		if (table == NULL)
 			goto err_alloc;
 
-		table[0].data = &net->ieee802154_lowpan.frags.high_thresh;
-		table[1].data = &net->ieee802154_lowpan.frags.low_thresh;
-		table[2].data = &net->ieee802154_lowpan.frags.timeout;
-		table[3].data = &net->ieee802154_lowpan.max_dsize;
+		table[0].data = &ieee802154_lowpan->frags.high_thresh;
+		table[1].data = &ieee802154_lowpan->frags.low_thresh;
+		table[2].data = &ieee802154_lowpan->frags.timeout;
+		table[3].data = &ieee802154_lowpan->max_dsize;
 
 		/* Don't export sysctls to unprivileged users */
 		if (net->user_ns != &init_user_ns)
@@ -458,7 +464,7 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
 	if (hdr == NULL)
 		goto err_reg;
 
-	net->ieee802154_lowpan.sysctl.frags_hdr = hdr;
+	ieee802154_lowpan->sysctl.frags_hdr = hdr;
 	return 0;
 
 err_reg:
@@ -471,9 +477,11 @@ err_alloc:
 static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net)
 {
 	struct ctl_table *table;
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
 
-	table = net->ieee802154_lowpan.sysctl.frags_hdr->ctl_table_arg;
-	unregister_net_sysctl_table(net->ieee802154_lowpan.sysctl.frags_hdr);
+	table = ieee802154_lowpan->sysctl.frags_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(ieee802154_lowpan->sysctl.frags_hdr);
 	if (!net_eq(net, &init_net))
 		kfree(table);
 }
@@ -514,20 +522,26 @@ static inline void lowpan_frags_sysctl_unregister(void)
 
 static int __net_init lowpan_frags_init_net(struct net *net)
 {
-	net->ieee802154_lowpan.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
-	net->ieee802154_lowpan.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
-	net->ieee802154_lowpan.frags.timeout = IPV6_FRAG_TIMEOUT;
-	net->ieee802154_lowpan.max_dsize = 0xFFFF;
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
 
-	inet_frags_init_net(&net->ieee802154_lowpan.frags);
+	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+	ieee802154_lowpan->max_dsize = 0xFFFF;
+
+	inet_frags_init_net(&ieee802154_lowpan->frags);
 
 	return lowpan_frags_ns_sysctl_register(net);
 }
 
 static void __net_exit lowpan_frags_exit_net(struct net *net)
 {
+	struct netns_ieee802154_lowpan *ieee802154_lowpan =
+		net_ieee802154_lowpan(net);
+
 	lowpan_frags_ns_sysctl_unregister(net);
-	inet_frags_exit_net(&net->ieee802154_lowpan.frags, &lowpan_frags);
+	inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
 }
 
 static struct pernet_operations lowpan_frags_ops = {
-- 
cgit v1.2.3-71-gd317


From 17d8ecb8ff791359c9d9a44bc766c3d4b87f37f7 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Thu, 17 Apr 2014 18:22:56 -0700
Subject: 6lowpan: include net/net_namespace.h on 6lowpan namepsace header

Don't rely on driver files or other headers having this file included.

CC: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: linux-zigbee-devel@lists.sourceforge.net
Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/6lowpan.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h
index f7d372b7d4ff..79b530fb2c4d 100644
--- a/include/net/6lowpan.h
+++ b/include/net/6lowpan.h
@@ -54,6 +54,7 @@
 #define __6LOWPAN_H__
 
 #include <net/ipv6.h>
+#include <net/net_namespace.h>
 
 #define UIP_802154_SHORTADDR_LEN	2  /* compressed ipv6 address length */
 #define UIP_IPH_LEN			40 /* ipv6 fixed header size */
-- 
cgit v1.2.3-71-gd317


From 86fd14ad1e8c4b8f5e9a7a27b26bdade91dd4bd0 Mon Sep 17 00:00:00 2001
From: Weiping Pan <wpan@redhat.com>
Date: Fri, 18 Apr 2014 12:27:46 +0800
Subject: tcp: make tcp_cwnd_application_limited() static

Make tcp_cwnd_application_limited() static and move it from tcp_input.c to
tcp_output.c

Signed-off-by: Weiping Pan <wpan@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  1 -
 net/ipv4/tcp_input.c  | 22 ----------------------
 net/ipv4/tcp_output.c | 22 ++++++++++++++++++++++
 3 files changed, 22 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 87d877408188..163d2b467d78 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -558,7 +558,6 @@ void tcp_send_loss_probe(struct sock *sk);
 bool tcp_schedule_loss_probe(struct sock *sk);
 
 /* tcp_input.c */
-void tcp_cwnd_application_limited(struct sock *sk);
 void tcp_resume_early_retransmit(struct sock *sk);
 void tcp_rearm_rto(struct sock *sk);
 void tcp_reset(struct sock *sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d6b46eb2f94c..6efed134ab63 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4703,28 +4703,6 @@ static int tcp_prune_queue(struct sock *sk)
 	return -1;
 }
 
-/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
- * As additional protections, we do not touch cwnd in retransmission phases,
- * and if application hit its sndbuf limit recently.
- */
-void tcp_cwnd_application_limited(struct sock *sk)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
-	    sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
-		/* Limited by application or receiver window. */
-		u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
-		u32 win_used = max(tp->snd_cwnd_used, init_win);
-		if (win_used < tp->snd_cwnd) {
-			tp->snd_ssthresh = tcp_current_ssthresh(sk);
-			tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
-		}
-		tp->snd_cwnd_used = 0;
-	}
-	tp->snd_cwnd_stamp = tcp_time_stamp;
-}
-
 static bool tcp_should_expand_sndbuf(const struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 025e25093984..29dde97c3c41 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1387,6 +1387,28 @@ unsigned int tcp_current_mss(struct sock *sk)
 	return mss_now;
 }
 
+/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
+ * As additional protections, we do not touch cwnd in retransmission phases,
+ * and if application hit its sndbuf limit recently.
+ */
+static void tcp_cwnd_application_limited(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
+	    sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+		/* Limited by application or receiver window. */
+		u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
+		u32 win_used = max(tp->snd_cwnd_used, init_win);
+		if (win_used < tp->snd_cwnd) {
+			tp->snd_ssthresh = tcp_current_ssthresh(sk);
+			tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
+		}
+		tp->snd_cwnd_used = 0;
+	}
+	tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
 /* Congestion window validation. (RFC2861) */
 static void tcp_cwnd_validate(struct sock *sk)
 {
-- 
cgit v1.2.3-71-gd317


From 4104d326b670c2b66f575d2004daa28b2d1b4c8d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 10 Jan 2014 17:01:58 -0500
Subject: ftrace: Remove global function list and call function directly

Instead of having a list of global functions that are called,
as only one global function is allow to be enabled at a time, there's
no reason to have a list.

Instead, simply have all the users of the global ops, use the global ops
directly, instead of registering their own ftrace_ops. Just switch what
function is used before enabling the function tracer.

This removes a lot of code as well as the complexity involved with it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h            |  20 ++---
 kernel/trace/ftrace.c             | 152 +++++++++++---------------------------
 kernel/trace/trace.c              |   2 +
 kernel/trace/trace.h              |  19 +++--
 kernel/trace/trace_functions.c    |  55 +++++---------
 kernel/trace/trace_irqsoff.c      |  33 ++++-----
 kernel/trace/trace_sched_wakeup.c |  40 +++++-----
 kernel/trace/trace_selftest.c     |  33 +++++----
 8 files changed, 133 insertions(+), 221 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9212b017bc72..f0ff2c2453e7 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -62,9 +62,6 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  * set in the flags member.
  *
  * ENABLED - set/unset when ftrace_ops is registered/unregistered
- * GLOBAL  - set manualy by ftrace_ops user to denote the ftrace_ops
- *           is part of the global tracers sharing the same filter
- *           via set_ftrace_* debugfs files.
  * DYNAMIC - set when ftrace_ops is registered to denote dynamically
  *           allocated ftrace_ops which need special care
  * CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops
@@ -96,15 +93,14 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  */
 enum {
 	FTRACE_OPS_FL_ENABLED			= 1 << 0,
-	FTRACE_OPS_FL_GLOBAL			= 1 << 1,
-	FTRACE_OPS_FL_DYNAMIC			= 1 << 2,
-	FTRACE_OPS_FL_CONTROL			= 1 << 3,
-	FTRACE_OPS_FL_SAVE_REGS			= 1 << 4,
-	FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED	= 1 << 5,
-	FTRACE_OPS_FL_RECURSION_SAFE		= 1 << 6,
-	FTRACE_OPS_FL_STUB			= 1 << 7,
-	FTRACE_OPS_FL_INITIALIZED		= 1 << 8,
-	FTRACE_OPS_FL_DELETED			= 1 << 9,
+	FTRACE_OPS_FL_DYNAMIC			= 1 << 1,
+	FTRACE_OPS_FL_CONTROL			= 1 << 2,
+	FTRACE_OPS_FL_SAVE_REGS			= 1 << 3,
+	FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED	= 1 << 4,
+	FTRACE_OPS_FL_RECURSION_SAFE		= 1 << 5,
+	FTRACE_OPS_FL_STUB			= 1 << 6,
+	FTRACE_OPS_FL_INITIALIZED		= 1 << 7,
+	FTRACE_OPS_FL_DELETED			= 1 << 8,
 };
 
 /*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1fd4b9479210..8f61ef70a297 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -62,7 +62,7 @@
 #define FTRACE_HASH_DEFAULT_BITS 10
 #define FTRACE_HASH_MAX_BITS 12
 
-#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
+#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL)
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define INIT_REGEX_LOCK(opsname)	\
@@ -103,7 +103,6 @@ static int ftrace_disabled __read_mostly;
 
 static DEFINE_MUTEX(ftrace_lock);
 
-static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
@@ -171,23 +170,6 @@ int ftrace_nr_registered_ops(void)
 	return cnt;
 }
 
-static void
-ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
-			struct ftrace_ops *op, struct pt_regs *regs)
-{
-	int bit;
-
-	bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
-	if (bit < 0)
-		return;
-
-	do_for_each_ftrace_op(op, ftrace_global_list) {
-		op->func(ip, parent_ip, op, regs);
-	} while_for_each_ftrace_op(op);
-
-	trace_clear_recursion(bit);
-}
-
 static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
 			    struct ftrace_ops *op, struct pt_regs *regs)
 {
@@ -237,43 +219,6 @@ static int control_ops_alloc(struct ftrace_ops *ops)
 	return 0;
 }
 
-static void update_global_ops(void)
-{
-	ftrace_func_t func = ftrace_global_list_func;
-	void *private = NULL;
-
-	/* The list has its own recursion protection. */
-	global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
-
-	/*
-	 * If there's only one function registered, then call that
-	 * function directly. Otherwise, we need to iterate over the
-	 * registered callers.
-	 */
-	if (ftrace_global_list == &ftrace_list_end ||
-	    ftrace_global_list->next == &ftrace_list_end) {
-		func = ftrace_global_list->func;
-		private = ftrace_global_list->private;
-		/*
-		 * As we are calling the function directly.
-		 * If it does not have recursion protection,
-		 * the function_trace_op needs to be updated
-		 * accordingly.
-		 */
-		if (!(ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE))
-			global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
-	}
-
-	/* If we filter on pids, update to use the pid function */
-	if (!list_empty(&ftrace_pids)) {
-		set_ftrace_pid_function(func);
-		func = ftrace_pid_func;
-	}
-
-	global_ops.func = func;
-	global_ops.private = private;
-}
-
 static void ftrace_sync(struct work_struct *work)
 {
 	/*
@@ -301,8 +246,6 @@ static void update_ftrace_function(void)
 {
 	ftrace_func_t func;
 
-	update_global_ops();
-
 	/*
 	 * If we are at the end of the list and this ops is
 	 * recursion safe and not dynamic and the arch supports passing ops,
@@ -314,10 +257,7 @@ static void update_ftrace_function(void)
 	     (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) &&
 	     !FTRACE_FORCE_LIST_FUNC)) {
 		/* Set the ftrace_ops that the arch callback uses */
-		if (ftrace_ops_list == &global_ops)
-			set_function_trace_op = ftrace_global_list;
-		else
-			set_function_trace_op = ftrace_ops_list;
+		set_function_trace_op = ftrace_ops_list;
 		func = ftrace_ops_list->func;
 	} else {
 		/* Just use the default ftrace_ops */
@@ -434,16 +374,9 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if (ops->flags & FTRACE_OPS_FL_DELETED)
 		return -EINVAL;
 
-	if (FTRACE_WARN_ON(ops == &global_ops))
-		return -EINVAL;
-
 	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
 		return -EBUSY;
 
-	/* We don't support both control and global flags set. */
-	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
-		return -EINVAL;
-
 #ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 	/*
 	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
@@ -461,10 +394,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if (!core_kernel_data((unsigned long)ops))
 		ops->flags |= FTRACE_OPS_FL_DYNAMIC;
 
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops);
-		ops->flags |= FTRACE_OPS_FL_ENABLED;
-	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
+	if (ops->flags & FTRACE_OPS_FL_CONTROL) {
 		if (control_ops_alloc(ops))
 			return -ENOMEM;
 		add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
@@ -484,15 +414,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 	if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
 		return -EBUSY;
 
-	if (FTRACE_WARN_ON(ops == &global_ops))
-		return -EINVAL;
-
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		ret = remove_ftrace_list_ops(&ftrace_global_list,
-					     &global_ops, ops);
-		if (!ret)
-			ops->flags &= ~FTRACE_OPS_FL_ENABLED;
-	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
+	if (ops->flags & FTRACE_OPS_FL_CONTROL) {
 		ret = remove_ftrace_list_ops(&ftrace_control_list,
 					     &control_ops, ops);
 	} else
@@ -2128,15 +2050,6 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
 	ftrace_start_up++;
 	command |= FTRACE_UPDATE_CALLS;
 
-	/* ops marked global share the filter hashes */
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		ops = &global_ops;
-		/* Don't update hash if global is already set */
-		if (global_start_up)
-			hash_enable = false;
-		global_start_up++;
-	}
-
 	ops->flags |= FTRACE_OPS_FL_ENABLED;
 	if (hash_enable)
 		ftrace_hash_rec_enable(ops, 1);
@@ -2166,21 +2079,10 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 	 */
 	WARN_ON_ONCE(ftrace_start_up < 0);
 
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		ops = &global_ops;
-		global_start_up--;
-		WARN_ON_ONCE(global_start_up < 0);
-		/* Don't update hash if global still has users */
-		if (global_start_up) {
-			WARN_ON_ONCE(!ftrace_start_up);
-			hash_disable = false;
-		}
-	}
-
 	if (hash_disable)
 		ftrace_hash_rec_disable(ops, 1);
 
-	if (ops != &global_ops || !global_start_up)
+	if (!global_start_up)
 		ops->flags &= ~FTRACE_OPS_FL_ENABLED;
 
 	command |= FTRACE_UPDATE_CALLS;
@@ -3524,10 +3426,6 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
 	struct ftrace_hash *hash;
 	int ret;
 
-	/* All global ops uses the global ops filters */
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL)
-		ops = &global_ops;
-
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
@@ -4462,6 +4360,34 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
+__init void ftrace_init_global_array_ops(struct trace_array *tr)
+{
+	tr->ops = &global_ops;
+	tr->ops->private = tr;
+}
+
+void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func)
+{
+	/* If we filter on pids, update to use the pid function */
+	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
+		if (WARN_ON(tr->ops->func != ftrace_stub))
+			printk("ftrace ops had %pS for function\n",
+			       tr->ops->func);
+		/* Only the top level instance does pid tracing */
+		if (!list_empty(&ftrace_pids)) {
+			set_ftrace_pid_function(func);
+			func = ftrace_pid_func;
+		}
+	}
+	tr->ops->func = func;
+	tr->ops->private = tr;
+}
+
+void ftrace_reset_array_ops(struct trace_array *tr)
+{
+	tr->ops->func = ftrace_stub;
+}
+
 static void
 ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
 			struct ftrace_ops *op, struct pt_regs *regs)
@@ -4520,9 +4446,16 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 	 */
 	preempt_disable_notrace();
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
-		if (ftrace_ops_test(op, ip, regs))
+		if (ftrace_ops_test(op, ip, regs)) {
+			if (WARN_ON(!op->func)) {
+				function_trace_stop = 1;
+				printk("op=%p %pS\n", op, op);
+				goto out;
+			}
 			op->func(ip, parent_ip, op, regs);
+		}
 	} while_for_each_ftrace_op(op);
+out:
 	preempt_enable_notrace();
 	trace_clear_recursion(bit);
 }
@@ -5076,8 +5009,7 @@ ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
 /* Just a place holder for function graph */
 static struct ftrace_ops fgraph_ops __read_mostly = {
 	.func		= ftrace_stub,
-	.flags		= FTRACE_OPS_FL_STUB | FTRACE_OPS_FL_GLOBAL |
-				FTRACE_OPS_FL_RECURSION_SAFE,
+	.flags		= FTRACE_OPS_FL_STUB | FTRACE_OPS_FL_RECURSION_SAFE,
 };
 
 static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 737b0efa1a62..fdd33aacdf05 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6629,6 +6629,8 @@ __init static int tracer_alloc_buffers(void)
 	 */
 	global_trace.current_trace = &nop_trace;
 
+	ftrace_init_global_array_ops(&global_trace);
+
 	register_tracer(&nop_trace);
 
 	/* All seems OK, enable tracing */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2e29d7ba5a52..df5256be64cd 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -416,13 +416,7 @@ enum {
 	TRACE_FTRACE_IRQ_BIT,
 	TRACE_FTRACE_SIRQ_BIT,
 
-	/* GLOBAL_BITs must be greater than FTRACE_BITs */
-	TRACE_GLOBAL_BIT,
-	TRACE_GLOBAL_NMI_BIT,
-	TRACE_GLOBAL_IRQ_BIT,
-	TRACE_GLOBAL_SIRQ_BIT,
-
-	/* INTERNAL_BITs must be greater than GLOBAL_BITs */
+	/* INTERNAL_BITs must be greater than FTRACE_BITs */
 	TRACE_INTERNAL_BIT,
 	TRACE_INTERNAL_NMI_BIT,
 	TRACE_INTERNAL_IRQ_BIT,
@@ -449,9 +443,6 @@ enum {
 #define TRACE_FTRACE_START	TRACE_FTRACE_BIT
 #define TRACE_FTRACE_MAX	((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
 
-#define TRACE_GLOBAL_START	TRACE_GLOBAL_BIT
-#define TRACE_GLOBAL_MAX	((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1)
-
 #define TRACE_LIST_START	TRACE_INTERNAL_BIT
 #define TRACE_LIST_MAX		((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
 
@@ -823,6 +814,9 @@ extern int ftrace_is_dead(void);
 int ftrace_create_function_files(struct trace_array *tr,
 				 struct dentry *parent);
 void ftrace_destroy_function_files(struct trace_array *tr);
+void ftrace_init_global_array_ops(struct trace_array *tr);
+void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func);
+void ftrace_reset_array_ops(struct trace_array *tr);
 #else
 static inline int ftrace_trace_task(struct task_struct *task)
 {
@@ -836,6 +830,11 @@ ftrace_create_function_files(struct trace_array *tr,
 	return 0;
 }
 static inline void ftrace_destroy_function_files(struct trace_array *tr) { }
+static inline __init void
+ftrace_init_global_array_ops(struct trace_array *tr) { }
+static inline void ftrace_reset_array_ops(struct trace_array *tr) { }
+/* ftace_func_t type is not defined, use macro instead of static inline */
+#define ftrace_init_array_ops(tr, func) do { } while (0)
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index ffd56351b521..2d9482b8f26a 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -26,8 +26,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
 static void
 function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
 			  struct ftrace_ops *op, struct pt_regs *pt_regs);
-static struct ftrace_ops trace_ops;
-static struct ftrace_ops trace_stack_ops;
 static struct tracer_flags func_flags;
 
 /* Our option */
@@ -83,28 +81,24 @@ void ftrace_destroy_function_files(struct trace_array *tr)
 
 static int function_trace_init(struct trace_array *tr)
 {
-	struct ftrace_ops *ops;
-
-	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
-		/* There's only one global tr */
-		if (!trace_ops.private) {
-			trace_ops.private = tr;
-			trace_stack_ops.private = tr;
-		}
+	ftrace_func_t func;
 
-		if (func_flags.val & TRACE_FUNC_OPT_STACK)
-			ops = &trace_stack_ops;
-		else
-			ops = &trace_ops;
-		tr->ops = ops;
-	} else if (!tr->ops) {
-		/*
-		 * Instance trace_arrays get their ops allocated
-		 * at instance creation. Unless it failed
-		 * the allocation.
-		 */
+	/*
+	 * Instance trace_arrays get their ops allocated
+	 * at instance creation. Unless it failed
+	 * the allocation.
+	 */
+	if (!tr->ops)
 		return -ENOMEM;
-	}
+
+	/* Currently only the global instance can do stack tracing */
+	if (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
+	    func_flags.val & TRACE_FUNC_OPT_STACK)
+		func = function_stack_trace_call;
+	else
+		func = function_trace_call;
+
+	ftrace_init_array_ops(tr, func);
 
 	tr->trace_buffer.cpu = get_cpu();
 	put_cpu();
@@ -118,6 +112,7 @@ static void function_trace_reset(struct trace_array *tr)
 {
 	tracing_stop_function_trace(tr);
 	tracing_stop_cmdline_record();
+	ftrace_reset_array_ops(tr);
 }
 
 static void function_trace_start(struct trace_array *tr)
@@ -199,18 +194,6 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
 	local_irq_restore(flags);
 }
 
-static struct ftrace_ops trace_ops __read_mostly =
-{
-	.func = function_trace_call,
-	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
-
-static struct ftrace_ops trace_stack_ops __read_mostly =
-{
-	.func = function_stack_trace_call,
-	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
-
 static struct tracer_opt func_opts[] = {
 #ifdef CONFIG_STACKTRACE
 	{ TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
@@ -248,10 +231,10 @@ func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 		unregister_ftrace_function(tr->ops);
 
 		if (set) {
-			tr->ops = &trace_stack_ops;
+			tr->ops->func = function_stack_trace_call;
 			register_ftrace_function(tr->ops);
 		} else {
-			tr->ops = &trace_ops;
+			tr->ops->func = function_trace_call;
 			register_ftrace_function(tr->ops);
 		}
 
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 8ff02cbb892f..b5cb047df3e9 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -151,12 +151,6 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,
 
 	atomic_dec(&data->disabled);
 }
-
-static struct ftrace_ops trace_ops __read_mostly =
-{
-	.func = irqsoff_tracer_call,
-	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -531,7 +525,7 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
 }
 #endif /* CONFIG_PREEMPT_TRACER */
 
-static int register_irqsoff_function(int graph, int set)
+static int register_irqsoff_function(struct trace_array *tr, int graph, int set)
 {
 	int ret;
 
@@ -543,7 +537,7 @@ static int register_irqsoff_function(int graph, int set)
 		ret = register_ftrace_graph(&irqsoff_graph_return,
 					    &irqsoff_graph_entry);
 	else
-		ret = register_ftrace_function(&trace_ops);
+		ret = register_ftrace_function(tr->ops);
 
 	if (!ret)
 		function_enabled = true;
@@ -551,7 +545,7 @@ static int register_irqsoff_function(int graph, int set)
 	return ret;
 }
 
-static void unregister_irqsoff_function(int graph)
+static void unregister_irqsoff_function(struct trace_array *tr, int graph)
 {
 	if (!function_enabled)
 		return;
@@ -559,17 +553,17 @@ static void unregister_irqsoff_function(int graph)
 	if (graph)
 		unregister_ftrace_graph();
 	else
-		unregister_ftrace_function(&trace_ops);
+		unregister_ftrace_function(tr->ops);
 
 	function_enabled = false;
 }
 
-static void irqsoff_function_set(int set)
+static void irqsoff_function_set(struct trace_array *tr, int set)
 {
 	if (set)
-		register_irqsoff_function(is_graph(), 1);
+		register_irqsoff_function(tr, is_graph(), 1);
 	else
-		unregister_irqsoff_function(is_graph());
+		unregister_irqsoff_function(tr, is_graph());
 }
 
 static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set)
@@ -577,7 +571,7 @@ static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set)
 	struct tracer *tracer = tr->current_trace;
 
 	if (mask & TRACE_ITER_FUNCTION)
-		irqsoff_function_set(set);
+		irqsoff_function_set(tr, set);
 
 	return trace_keep_overwrite(tracer, mask, set);
 }
@@ -586,7 +580,7 @@ static int start_irqsoff_tracer(struct trace_array *tr, int graph)
 {
 	int ret;
 
-	ret = register_irqsoff_function(graph, 0);
+	ret = register_irqsoff_function(tr, graph, 0);
 
 	if (!ret && tracing_is_enabled())
 		tracer_enabled = 1;
@@ -600,7 +594,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
 {
 	tracer_enabled = 0;
 
-	unregister_irqsoff_function(graph);
+	unregister_irqsoff_function(tr, graph);
 }
 
 static void __irqsoff_tracer_init(struct trace_array *tr)
@@ -617,7 +611,11 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
 	smp_wmb();
 	tracing_reset_online_cpus(&tr->trace_buffer);
 
-	if (start_irqsoff_tracer(tr, is_graph()))
+	ftrace_init_array_ops(tr, irqsoff_tracer_call);
+
+	/* Only toplevel instance supports graph tracing */
+	if (start_irqsoff_tracer(tr, (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
+				      is_graph())))
 		printk(KERN_ERR "failed to start irqsoff tracer\n");
 }
 
@@ -630,6 +628,7 @@ static void irqsoff_tracer_reset(struct trace_array *tr)
 
 	set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
 	set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
+	ftrace_reset_array_ops(tr);
 }
 
 static void irqsoff_tracer_start(struct trace_array *tr)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e14da5e97a69..4dd986defa60 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -130,15 +130,9 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
 	atomic_dec(&data->disabled);
 	preempt_enable_notrace();
 }
-
-static struct ftrace_ops trace_ops __read_mostly =
-{
-	.func = wakeup_tracer_call,
-	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
 #endif /* CONFIG_FUNCTION_TRACER */
 
-static int register_wakeup_function(int graph, int set)
+static int register_wakeup_function(struct trace_array *tr, int graph, int set)
 {
 	int ret;
 
@@ -150,7 +144,7 @@ static int register_wakeup_function(int graph, int set)
 		ret = register_ftrace_graph(&wakeup_graph_return,
 					    &wakeup_graph_entry);
 	else
-		ret = register_ftrace_function(&trace_ops);
+		ret = register_ftrace_function(tr->ops);
 
 	if (!ret)
 		function_enabled = true;
@@ -158,7 +152,7 @@ static int register_wakeup_function(int graph, int set)
 	return ret;
 }
 
-static void unregister_wakeup_function(int graph)
+static void unregister_wakeup_function(struct trace_array *tr, int graph)
 {
 	if (!function_enabled)
 		return;
@@ -166,17 +160,17 @@ static void unregister_wakeup_function(int graph)
 	if (graph)
 		unregister_ftrace_graph();
 	else
-		unregister_ftrace_function(&trace_ops);
+		unregister_ftrace_function(tr->ops);
 
 	function_enabled = false;
 }
 
-static void wakeup_function_set(int set)
+static void wakeup_function_set(struct trace_array *tr, int set)
 {
 	if (set)
-		register_wakeup_function(is_graph(), 1);
+		register_wakeup_function(tr, is_graph(), 1);
 	else
-		unregister_wakeup_function(is_graph());
+		unregister_wakeup_function(tr, is_graph());
 }
 
 static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
@@ -184,16 +178,16 @@ static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
 	struct tracer *tracer = tr->current_trace;
 
 	if (mask & TRACE_ITER_FUNCTION)
-		wakeup_function_set(set);
+		wakeup_function_set(tr, set);
 
 	return trace_keep_overwrite(tracer, mask, set);
 }
 
-static int start_func_tracer(int graph)
+static int start_func_tracer(struct trace_array *tr, int graph)
 {
 	int ret;
 
-	ret = register_wakeup_function(graph, 0);
+	ret = register_wakeup_function(tr, graph, 0);
 
 	if (!ret && tracing_is_enabled())
 		tracer_enabled = 1;
@@ -203,11 +197,11 @@ static int start_func_tracer(int graph)
 	return ret;
 }
 
-static void stop_func_tracer(int graph)
+static void stop_func_tracer(struct trace_array *tr, int graph)
 {
 	tracer_enabled = 0;
 
-	unregister_wakeup_function(graph);
+	unregister_wakeup_function(tr, graph);
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -221,12 +215,12 @@ wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 	if (!(is_graph() ^ set))
 		return 0;
 
-	stop_func_tracer(!set);
+	stop_func_tracer(tr, !set);
 
 	wakeup_reset(wakeup_trace);
 	tracing_max_latency = 0;
 
-	return start_func_tracer(set);
+	return start_func_tracer(tr, set);
 }
 
 static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
@@ -587,7 +581,7 @@ static void start_wakeup_tracer(struct trace_array *tr)
 	 */
 	smp_wmb();
 
-	if (start_func_tracer(is_graph()))
+	if (start_func_tracer(tr, is_graph()))
 		printk(KERN_ERR "failed to start wakeup tracer\n");
 
 	return;
@@ -600,7 +594,7 @@ fail_deprobe:
 static void stop_wakeup_tracer(struct trace_array *tr)
 {
 	tracer_enabled = 0;
-	stop_func_tracer(is_graph());
+	stop_func_tracer(tr, is_graph());
 	unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
 	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
 	unregister_trace_sched_wakeup(probe_wakeup, NULL);
@@ -617,6 +611,7 @@ static int __wakeup_tracer_init(struct trace_array *tr)
 
 	tracing_max_latency = 0;
 	wakeup_trace = tr;
+	ftrace_init_array_ops(tr, wakeup_tracer_call);
 	start_wakeup_tracer(tr);
 	return 0;
 }
@@ -653,6 +648,7 @@ static void wakeup_tracer_reset(struct trace_array *tr)
 
 	set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
 	set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
+	ftrace_reset_array_ops(tr);
 }
 
 static void wakeup_tracer_start(struct trace_array *tr)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index e98fca60974f..519d04affe38 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -161,11 +161,6 @@ static struct ftrace_ops test_probe3 = {
 	.flags			= FTRACE_OPS_FL_RECURSION_SAFE,
 };
 
-static struct ftrace_ops test_global = {
-	.func		= trace_selftest_test_global_func,
-	.flags		= FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
-
 static void print_counts(void)
 {
 	printk("(%d %d %d %d %d) ",
@@ -185,7 +180,7 @@ static void reset_counts(void)
 	trace_selftest_test_dyn_cnt = 0;
 }
 
-static int trace_selftest_ops(int cnt)
+static int trace_selftest_ops(struct trace_array *tr, int cnt)
 {
 	int save_ftrace_enabled = ftrace_enabled;
 	struct ftrace_ops *dyn_ops;
@@ -220,7 +215,11 @@ static int trace_selftest_ops(int cnt)
 	register_ftrace_function(&test_probe1);
 	register_ftrace_function(&test_probe2);
 	register_ftrace_function(&test_probe3);
-	register_ftrace_function(&test_global);
+	/* First time we are running with main function */
+	if (cnt > 1) {
+		ftrace_init_array_ops(tr, trace_selftest_test_global_func);
+		register_ftrace_function(tr->ops);
+	}
 
 	DYN_FTRACE_TEST_NAME();
 
@@ -232,8 +231,10 @@ static int trace_selftest_ops(int cnt)
 		goto out;
 	if (trace_selftest_test_probe3_cnt != 1)
 		goto out;
-	if (trace_selftest_test_global_cnt == 0)
-		goto out;
+	if (cnt > 1) {
+		if (trace_selftest_test_global_cnt == 0)
+			goto out;
+	}
 
 	DYN_FTRACE_TEST_NAME2();
 
@@ -269,8 +270,10 @@ static int trace_selftest_ops(int cnt)
 		goto out_free;
 	if (trace_selftest_test_probe3_cnt != 3)
 		goto out_free;
-	if (trace_selftest_test_global_cnt == 0)
-		goto out;
+	if (cnt > 1) {
+		if (trace_selftest_test_global_cnt == 0)
+			goto out;
+	}
 	if (trace_selftest_test_dyn_cnt == 0)
 		goto out_free;
 
@@ -295,7 +298,9 @@ static int trace_selftest_ops(int cnt)
 	unregister_ftrace_function(&test_probe1);
 	unregister_ftrace_function(&test_probe2);
 	unregister_ftrace_function(&test_probe3);
-	unregister_ftrace_function(&test_global);
+	if (cnt > 1)
+		unregister_ftrace_function(tr->ops);
+	ftrace_reset_array_ops(tr);
 
 	/* Make sure everything is off */
 	reset_counts();
@@ -388,7 +393,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 	}
 
 	/* Test the ops with global tracing running */
-	ret = trace_selftest_ops(1);
+	ret = trace_selftest_ops(tr, 1);
 	trace->reset(tr);
 
  out:
@@ -399,7 +404,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 
 	/* Test the ops with global tracing off */
 	if (!ret)
-		ret = trace_selftest_ops(2);
+		ret = trace_selftest_ops(tr, 2);
 
 	return ret;
 }
-- 
cgit v1.2.3-71-gd317


From 68957303f44a501af5cf37913208a2acaa6bcdf1 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 25 Mar 2014 06:51:47 +0100
Subject: NFC: ST21NFCA: Add driver for STMicroelectronics ST21NFCA NFC Chip

Add driver for STMicroelectronics ST21NFCA NFC controller.
ST21NFCA is using HCI protocol, shdlc as LLC layer & I2C as
communication protocol.

Adding support for Reader/Writer mode with Tag type 1/2/3/4 A & B.
It is using proprietary gate 15 for ISO14443-3 such as type 1 &
type 2 tags. It is using proprietary gate 14 for type F tags.
ST21NFCA_DEVICE_MGNT_GATE gives access to proprietary CLF configuration.
Standard gate for ISO14443-4 A (13) & B (11) are also used.

ST21NFCA specific mecanism:

One particular point to notice for the data handling is that frame
does not contain any length value. Therefore the i2c part of this driver
is managing the reception with a read length sequence until the end of
frame (0x7e) is reached.

In order to avoid conflict between sof & eof a mecanism
called byte stuffing concist of an escape byte (0x7d) insertion before
special byte (0x7e, 0x7d). The special byte is then xored with 0x20.

In this driver, When data are available in the CLF, the interrupt
gpio is driven to active state and triggered an interrupt.
Once the i2c_master_recv start, the interrupt gpio is driven to idle
state until its complete. If the frame is incomplete or data are still
available, interrupts will be triggered again.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/Kconfig                    |   1 +
 drivers/nfc/Makefile                   |   1 +
 drivers/nfc/st21nfca/Kconfig           |  23 ++
 drivers/nfc/st21nfca/Makefile          |   8 +
 drivers/nfc/st21nfca/i2c.c             | 595 +++++++++++++++++++++++++++++++++
 drivers/nfc/st21nfca/st21nfca.c        | 506 ++++++++++++++++++++++++++++
 drivers/nfc/st21nfca/st21nfca.h        |  87 +++++
 include/linux/platform_data/st21nfca.h |  32 ++
 8 files changed, 1253 insertions(+)
 create mode 100644 drivers/nfc/st21nfca/Kconfig
 create mode 100644 drivers/nfc/st21nfca/Makefile
 create mode 100644 drivers/nfc/st21nfca/i2c.c
 create mode 100644 drivers/nfc/st21nfca/st21nfca.c
 create mode 100644 drivers/nfc/st21nfca/st21nfca.h
 create mode 100644 include/linux/platform_data/st21nfca.h

(limited to 'include')

diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig
index 65d4ca19d132..26c66a126551 100644
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig
@@ -71,5 +71,6 @@ config NFC_PORT100
 source "drivers/nfc/pn544/Kconfig"
 source "drivers/nfc/microread/Kconfig"
 source "drivers/nfc/nfcmrvl/Kconfig"
+source "drivers/nfc/st21nfca/Kconfig"
 
 endmenu
diff --git a/drivers/nfc/Makefile b/drivers/nfc/Makefile
index ae42a3fa60c9..23225b0287fd 100644
--- a/drivers/nfc/Makefile
+++ b/drivers/nfc/Makefile
@@ -11,5 +11,6 @@ obj-$(CONFIG_NFC_SIM)		+= nfcsim.o
 obj-$(CONFIG_NFC_PORT100)	+= port100.o
 obj-$(CONFIG_NFC_MRVL)		+= nfcmrvl/
 obj-$(CONFIG_NFC_TRF7970A)	+= trf7970a.o
+obj-$(CONFIG_NFC_ST21NFCA)  += st21nfca/
 
 ccflags-$(CONFIG_NFC_DEBUG) := -DDEBUG
diff --git a/drivers/nfc/st21nfca/Kconfig b/drivers/nfc/st21nfca/Kconfig
new file mode 100644
index 000000000000..ee459f066ade
--- /dev/null
+++ b/drivers/nfc/st21nfca/Kconfig
@@ -0,0 +1,23 @@
+config NFC_ST21NFCA
+	tristate "STMicroelectronics ST21NFCA NFC driver"
+	depends on NFC_HCI
+	select CRC_CCITT
+	default n
+	---help---
+	  STMicroelectronics ST21NFCA core driver. It implements the chipset
+	  HCI logic and hooks into the NFC kernel APIs. Physical layers will
+	  register against it.
+
+	  To compile this driver as a module, choose m here. The module will
+	  be called st21nfca.
+	  Say N if unsure.
+
+config NFC_ST21NFCA_I2C
+	tristate "NFC ST21NFCA i2c support"
+	depends on NFC_ST21NFCA && I2C && NFC_SHDLC
+	---help---
+	  This module adds support for the STMicroelectronics st21nfca i2c interface.
+	  Select this if your platform is using the i2c bus.
+
+	  If you choose to build a module, it'll be called st21nfca_i2c.
+	  Say N if unsure.
diff --git a/drivers/nfc/st21nfca/Makefile b/drivers/nfc/st21nfca/Makefile
new file mode 100644
index 000000000000..038ed093a119
--- /dev/null
+++ b/drivers/nfc/st21nfca/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for ST21NFCA HCI based NFC driver
+#
+
+st21nfca_i2c-objs  = i2c.o
+
+obj-$(CONFIG_NFC_ST21NFCA)     += st21nfca.o
+obj-$(CONFIG_NFC_ST21NFCA_I2C) += st21nfca_i2c.o
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
new file mode 100644
index 000000000000..3b0fd0f76d1c
--- /dev/null
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -0,0 +1,595 @@
+/*
+ * I2C Link Layer for ST21NFCA HCI based Driver
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/crc-ccitt.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/nfc.h>
+#include <linux/firmware.h>
+#include <linux/unaligned/access_ok.h>
+#include <linux/platform_data/st21nfca.h>
+
+#include <net/nfc/hci.h>
+#include <net/nfc/llc.h>
+#include <net/nfc/nfc.h>
+
+#include "st21nfca.h"
+
+/*
+ * Every frame starts with ST21NFCA_SOF_EOF and ends with ST21NFCA_SOF_EOF.
+ * Because ST21NFCA_SOF_EOF is a possible data value, there is a mecanism
+ * called byte stuffing has been introduced.
+ *
+ * if byte == ST21NFCA_SOF_EOF or ST21NFCA_ESCAPE_BYTE_STUFFING
+ * - insert ST21NFCA_ESCAPE_BYTE_STUFFING (escape byte)
+ * - xor byte with ST21NFCA_BYTE_STUFFING_MASK
+ */
+#define ST21NFCA_SOF_EOF		0x7e
+#define ST21NFCA_BYTE_STUFFING_MASK	0x20
+#define ST21NFCA_ESCAPE_BYTE_STUFFING	0x7d
+
+/* SOF + 00 fill size */
+#define ST21NFCA_FRAME_HEADROOM			2
+
+/* 4 bytes crc (worst case byte stuffing) + EOF */
+#define ST21NFCA_FRAME_TAILROOM 5
+
+#define ST21NFCA_HCI_I2C_DRIVER_NAME "st21nfca_hci_i2c"
+
+static struct i2c_device_id st21nfca_hci_i2c_id_table[] = {
+	{ST21NFCA_HCI_DRIVER_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, st21nfca_hci_i2c_id_table);
+
+struct st21nfca_i2c_phy {
+	struct i2c_client *i2c_dev;
+	struct nfc_hci_dev *hdev;
+
+	unsigned int gpio_ena;
+	unsigned int gpio_irq;
+	unsigned int irq_polarity;
+
+	struct sk_buff *pending_skb;
+	int current_read_len;
+	/*
+	 * crc might have fail because i2c macro
+	 * is disable due to other interface activity
+	 */
+	int crc_trials;
+
+	int powered;
+	int run_mode;
+
+	/*
+	 * < 0 if hardware error occured (e.g. i2c err)
+	 * and prevents normal operation.
+	 */
+	int hard_fault;
+};
+static u8 len_seq[] = { 13, 24, 15, 29 };
+static u16 wait_tab[] = { 2, 3, 5, 15, 20, 40};
+
+#define I2C_DUMP_SKB(info, skb)					\
+do {								\
+	pr_debug("%s:\n", info);				\
+	print_hex_dump(KERN_DEBUG, "i2c: ", DUMP_PREFIX_OFFSET,	\
+		       16, 1, (skb)->data, (skb)->len, 0);	\
+} while (0)
+
+static void st21nfca_hci_platform_init(struct st21nfca_i2c_phy *phy)
+{
+	u16 wait_tab[] = { 50, 300, 1000 };
+	char reboot_cmd[] = { 0x7E, 0x66, 0x48, 0xF6, 0x7E };
+	u8 tmp[ST21NFCA_HCI_LLC_MAX_SIZE];
+	int i, r = -1;
+
+	for (i = 0; i < ARRAY_SIZE(wait_tab) && r < 0; i++)
+		r = i2c_master_recv(phy->i2c_dev, tmp,
+				    ST21NFCA_HCI_LLC_MAX_SIZE);
+
+	r = -1;
+	for (i = 0; i < ARRAY_SIZE(wait_tab) && r < 0; i++)
+		r = i2c_master_send(phy->i2c_dev, reboot_cmd,
+				    sizeof(reboot_cmd));
+	usleep_range(1000, 1500);
+
+}
+
+static int st21nfca_hci_i2c_enable(void *phy_id)
+{
+	struct st21nfca_i2c_phy *phy = phy_id;
+
+	gpio_set_value(phy->gpio_ena, 1);
+	phy->powered = 1;
+	phy->run_mode = ST21NFCA_HCI_MODE;
+
+	usleep_range(10000, 15000);
+
+	return 0;
+}
+
+static void st21nfca_hci_i2c_disable(void *phy_id)
+{
+	struct st21nfca_i2c_phy *phy = phy_id;
+
+	pr_info("\n");
+	gpio_set_value(phy->gpio_ena, 0);
+
+	phy->powered = 0;
+}
+
+static int st21nfca_hci_add_len_crc(struct sk_buff *skb)
+{
+	int ret = 2;
+	u16 crc;
+	u8 tmp;
+
+	*skb_push(skb, 1) = 0;
+
+	crc = crc_ccitt(0xffff, skb->data, skb->len);
+	crc = ~crc;
+
+	tmp = crc & 0x00ff;
+	*skb_put(skb, 1) = tmp;
+
+	tmp = (crc >> 8) & 0x00ff;
+	*skb_put(skb, 1) = tmp;
+
+	return ret;
+}
+
+static void st21nfca_hci_remove_len_crc(struct sk_buff *skb, int crc_len)
+{
+	skb_pull(skb, ST21NFCA_FRAME_HEADROOM);
+	skb_trim(skb, crc_len);
+}
+
+/*
+ * Writing a frame must not return the number of written bytes.
+ * It must return either zero for success, or <0 for error.
+ * In addition, it must not alter the skb
+ */
+static int st21nfca_hci_i2c_write(void *phy_id, struct sk_buff *skb)
+{
+	int r = -1, i, j, len;
+	struct st21nfca_i2c_phy *phy = phy_id;
+	struct i2c_client *client = phy->i2c_dev;
+	u16 wait_tab[] = { 2, 3, 5, 15, 20, 40};
+	u8 tmp[ST21NFCA_HCI_LLC_MAX_SIZE * 2];
+
+	I2C_DUMP_SKB("st21nfca_hci_i2c_write", skb);
+
+
+	if (phy->hard_fault != 0)
+		return phy->hard_fault;
+
+	/*
+	 * Compute CRC before byte stuffing computation on frame
+	 * Note st21nfca_hci_add_len_crc is doing a byte stuffing
+	 * on its own value
+	 */
+	len = st21nfca_hci_add_len_crc(skb);
+
+	/* add ST21NFCA_SOF_EOF on tail */
+	*skb_put(skb, 1) = ST21NFCA_SOF_EOF;
+	/* add ST21NFCA_SOF_EOF on head */
+	*skb_push(skb, 1) = ST21NFCA_SOF_EOF;
+
+	/*
+	 * Compute byte stuffing
+	 * if byte == ST21NFCA_SOF_EOF or ST21NFCA_ESCAPE_BYTE_STUFFING
+	 * insert ST21NFCA_ESCAPE_BYTE_STUFFING (escape byte)
+	 * xor byte with ST21NFCA_BYTE_STUFFING_MASK
+	 */
+	tmp[0] = skb->data[0];
+	for (i = 1, j = 1; i < skb->len - 1; i++, j++) {
+		if (skb->data[i] == ST21NFCA_SOF_EOF
+		    || skb->data[i] == ST21NFCA_ESCAPE_BYTE_STUFFING) {
+			tmp[j] = ST21NFCA_ESCAPE_BYTE_STUFFING;
+			j++;
+			tmp[j] = skb->data[i] ^ ST21NFCA_BYTE_STUFFING_MASK;
+		} else {
+			tmp[j] = skb->data[i];
+		}
+	}
+	tmp[j] = skb->data[i];
+	j++;
+
+	/*
+	 * Manage sleep mode
+	 * Try 3 times to send data with delay between each
+	 */
+	for (i = 0; i < ARRAY_SIZE(wait_tab) && r < 0; i++) {
+		r = i2c_master_send(client, tmp, j);
+		if (r < 0)
+			msleep(wait_tab[i]);
+	}
+
+	if (r >= 0) {
+		if (r != j)
+			r = -EREMOTEIO;
+		else
+			r = 0;
+	}
+
+	st21nfca_hci_remove_len_crc(skb, len);
+
+	return r;
+}
+
+static int get_frame_size(u8 *buf, int buflen)
+{
+	int len = 0;
+	if (buf[len + 1] == ST21NFCA_SOF_EOF)
+		return 0;
+
+	for (len = 1; len < buflen && buf[len] != ST21NFCA_SOF_EOF; len++)
+		;
+
+	return len;
+}
+
+static int check_crc(u8 *buf, int buflen)
+{
+	u16 crc;
+
+	crc = crc_ccitt(0xffff, buf, buflen - 2);
+	crc = ~crc;
+
+	if (buf[buflen - 2] != (crc & 0xff) || buf[buflen - 1] != (crc >> 8)) {
+		pr_err(ST21NFCA_HCI_DRIVER_NAME
+		       ": CRC error 0x%x != 0x%x 0x%x\n", crc, buf[buflen - 1],
+		       buf[buflen - 2]);
+
+		pr_info(DRIVER_DESC ": %s : BAD CRC\n", __func__);
+		print_hex_dump(KERN_DEBUG, "crc: ", DUMP_PREFIX_NONE,
+			       16, 2, buf, buflen, false);
+		return -EPERM;
+	}
+	return 0;
+}
+
+/*
+ * Prepare received data for upper layer.
+ * Received data include byte stuffing, crc and sof/eof
+ * which is not usable by hci part.
+ * returns:
+ * frame size without sof/eof, header and byte stuffing
+ * -EBADMSG : frame was incorrect and discarded
+ */
+static int st21nfca_hci_i2c_repack(struct sk_buff *skb)
+{
+	int i, j, r, size;
+	if (skb->len < 1 || (skb->len > 1 && skb->data[1] != 0))
+		return -EBADMSG;
+
+	size = get_frame_size(skb->data, skb->len);
+	if (size > 0) {
+		skb_trim(skb, size);
+		/* remove ST21NFCA byte stuffing for upper layer */
+		for (i = 1, j = 0; i < skb->len; i++) {
+			if (skb->data[i] ==
+					(u8) ST21NFCA_ESCAPE_BYTE_STUFFING) {
+				skb->data[i] =
+				    skb->data[i +
+					      1] | ST21NFCA_BYTE_STUFFING_MASK;
+				i++;
+				j++;
+			}
+			skb->data[i] = skb->data[i + j];
+		}
+		/* remove byte stuffing useless byte */
+		skb_trim(skb, i - j);
+		/* remove ST21NFCA_SOF_EOF from head */
+		skb_pull(skb, 1);
+
+		r = check_crc(skb->data, skb->len);
+		if (r != 0) {
+			i = 0;
+			return -EBADMSG;
+		}
+
+		/* remove headbyte */
+		skb_pull(skb, 1);
+		/* remove crc. Byte Stuffing is already removed here */
+		skb_trim(skb, skb->len - 2);
+		return skb->len;
+	}
+	return 0;
+}
+
+/*
+ * Reads an shdlc frame and returns it in a newly allocated sk_buff. Guarantees
+ * that i2c bus will be flushed and that next read will start on a new frame.
+ * returned skb contains only LLC header and payload.
+ * returns:
+ * frame size : if received frame is complete (find ST21NFCA_SOF_EOF at
+ * end of read)
+ * -EAGAIN : if received frame is incomplete (not find ST21NFCA_SOF_EOF
+ * at end of read)
+ * -EREMOTEIO : i2c read error (fatal)
+ * -EBADMSG : frame was incorrect and discarded
+ * (value returned from st21nfca_hci_i2c_repack)
+ * -EIO : if no ST21NFCA_SOF_EOF is found after reaching
+ * the read length end sequence
+ */
+static int st21nfca_hci_i2c_read(struct st21nfca_i2c_phy *phy,
+				 struct sk_buff *skb)
+{
+	int r, i;
+	u8 len;
+	struct i2c_client *client = phy->i2c_dev;
+
+	if (phy->current_read_len < ARRAY_SIZE(len_seq)) {
+		len = len_seq[phy->current_read_len];
+
+		/*
+		 * Add retry mecanism
+		 * Operation on I2C interface may fail in case of operation on
+		 * RF or SWP interface
+		 */
+		r = 0;
+		for (i = 0; i < ARRAY_SIZE(wait_tab) && r <= 0; i++) {
+			r = i2c_master_recv(client, skb_put(skb, len), len);
+			if (r < 0)
+				msleep(wait_tab[i]);
+		}
+
+		if (r != len) {
+			phy->current_read_len = 0;
+			return -EREMOTEIO;
+		}
+
+		if (memchr(skb->data + 2, ST21NFCA_SOF_EOF,
+				skb->len - 2) != NULL) {
+			phy->current_read_len = 0;
+			return st21nfca_hci_i2c_repack(skb);
+		}
+		phy->current_read_len++;
+		return -EAGAIN;
+	}
+	return -EIO;
+}
+
+/*
+ * Reads an shdlc frame from the chip. This is not as straightforward as it
+ * seems. The frame format is data-crc, and corruption can occur anywhere
+ * while transiting on i2c bus, such that we could read an invalid data.
+ * The tricky case is when we read a corrupted data or crc. We must detect
+ * this here in order to determine that data can be transmitted to the hci
+ * core. This is the reason why we check the crc here.
+ * The CLF will repeat a frame until we send a RR on that frame.
+ *
+ * On ST21NFCA, IRQ goes in idle when read starts. As no size information are
+ * available in the incoming data, other IRQ might come. Every IRQ will trigger
+ * a read sequence with different length and will fill the current frame.
+ * The reception is complete once we reach a ST21NFCA_SOF_EOF.
+ */
+static irqreturn_t st21nfca_hci_irq_thread_fn(int irq, void *phy_id)
+{
+	struct st21nfca_i2c_phy *phy = phy_id;
+	struct i2c_client *client;
+
+	int r;
+
+	if (!phy || irq != phy->i2c_dev->irq) {
+		WARN_ON_ONCE(1);
+		return IRQ_NONE;
+	}
+
+	client = phy->i2c_dev;
+	dev_dbg(&client->dev, "IRQ\n");
+
+	if (phy->hard_fault != 0)
+		return IRQ_HANDLED;
+
+	r = st21nfca_hci_i2c_read(phy, phy->pending_skb);
+	if (r == -EREMOTEIO) {
+		phy->hard_fault = r;
+
+		nfc_hci_recv_frame(phy->hdev, NULL);
+
+		return IRQ_HANDLED;
+	} else if (r == -EAGAIN || r == -EIO) {
+		return IRQ_HANDLED;
+	} else if (r == -EBADMSG && phy->crc_trials < ARRAY_SIZE(wait_tab)) {
+		/*
+		 * With ST21NFCA, only one interface (I2C, RF or SWP)
+		 * may be active at a time.
+		 * Having incorrect crc is usually due to i2c macrocell
+		 * deactivation in the middle of a transmission.
+		 * It may generate corrupted data on i2c.
+		 * We give sometime to get i2c back.
+		 * The complete frame will be repeated.
+		 */
+		msleep(wait_tab[phy->crc_trials]);
+		phy->crc_trials++;
+		phy->current_read_len = 0;
+	} else if (r > 0) {
+		/*
+		 * We succeeded to read data from the CLF and
+		 * data is valid.
+		 * Reset counter.
+		 */
+		nfc_hci_recv_frame(phy->hdev, phy->pending_skb);
+		phy->crc_trials = 0;
+	}
+
+	phy->pending_skb = alloc_skb(ST21NFCA_HCI_LLC_MAX_SIZE * 2, GFP_KERNEL);
+	if (phy->pending_skb == NULL) {
+		phy->hard_fault = -ENOMEM;
+		nfc_hci_recv_frame(phy->hdev, NULL);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static struct nfc_phy_ops i2c_phy_ops = {
+	.write = st21nfca_hci_i2c_write,
+	.enable = st21nfca_hci_i2c_enable,
+	.disable = st21nfca_hci_i2c_disable,
+};
+
+static int st21nfca_request_resources(struct st21nfca_i2c_phy *phy,
+				      struct i2c_client *client)
+{
+	struct st21nfca_nfc_platform_data *pdata;
+	int r;
+
+	pdata = client->dev.platform_data;
+	if (pdata == NULL) {
+		nfc_err(&client->dev, "No platform data\n");
+		return -EINVAL;
+	}
+
+	/* store for later use */
+	phy->gpio_irq = pdata->gpio_irq;
+	phy->gpio_ena = pdata->gpio_ena;
+	phy->irq_polarity = pdata->irq_polarity;
+	phy->i2c_dev = client;
+
+	r = devm_gpio_request(&client->dev, phy->gpio_irq, "wake_up");
+	if (r) {
+		pr_err("%s : gpio_request failed\n", __FILE__);
+		return -ENODEV;
+	}
+
+	r = gpio_direction_input(phy->gpio_irq);
+	if (r) {
+		pr_err("%s : gpio_direction_input failed\n", __FILE__);
+		return -ENODEV;
+	}
+
+	if (phy->gpio_ena != 0) {
+		r = devm_gpio_request(&client->dev,
+					phy->gpio_ena, "clf_enable");
+		if (r) {
+			pr_err("%s : ena gpio_request failed\n", __FILE__);
+			return -ENODEV;
+		}
+		r = gpio_direction_output(phy->gpio_ena, 1);
+
+		if (r) {
+			pr_err("%s : ena gpio_direction_output failed\n",
+			       __FILE__);
+			return -ENODEV;
+		}
+	}
+
+	phy->pending_skb = alloc_skb(ST21NFCA_HCI_LLC_MAX_SIZE * 2, GFP_KERNEL);
+	if (phy->pending_skb == NULL)
+		return -ENOMEM;
+
+	phy->current_read_len = 0;
+	phy->crc_trials = 0;
+	return r;
+}
+
+static int st21nfca_hci_i2c_probe(struct i2c_client *client,
+				  const struct i2c_device_id *id)
+{
+	struct st21nfca_i2c_phy *phy;
+	struct st21nfca_nfc_platform_data *pdata;
+	int r = 0;
+
+	dev_dbg(&client->dev, "%s\n", __func__);
+	dev_dbg(&client->dev, "IRQ: %d\n", client->irq);
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		nfc_err(&client->dev, "Need I2C_FUNC_I2C\n");
+		return -ENODEV;
+	}
+
+	phy = devm_kzalloc(&client->dev, sizeof(struct st21nfca_i2c_phy),
+			   GFP_KERNEL);
+	if (!phy) {
+		nfc_err(&client->dev,
+			"Cannot allocate memory for st21nfca i2c phy.\n");
+		return -ENOMEM;
+	}
+
+	phy->i2c_dev = client;
+
+	i2c_set_clientdata(client, phy);
+
+	pdata = client->dev.platform_data;
+	if (pdata == NULL) {
+		nfc_err(&client->dev, "No platform data\n");
+		return -EINVAL;
+	}
+
+	r = st21nfca_request_resources(phy, client);
+	if (r) {
+		nfc_err(&client->dev, "Cannot get platform resources\n");
+		return r;
+	}
+
+	st21nfca_hci_platform_init(phy);
+	r = devm_request_threaded_irq(&client->dev, client->irq, NULL,
+				st21nfca_hci_irq_thread_fn,
+				phy->irq_polarity | IRQF_ONESHOT,
+				ST21NFCA_HCI_DRIVER_NAME, phy);
+	if (r < 0) {
+		nfc_err(&client->dev, "Unable to register IRQ handler\n");
+		return r;
+	}
+
+	r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
+			       ST21NFCA_FRAME_HEADROOM, ST21NFCA_FRAME_TAILROOM,
+			       ST21NFCA_HCI_LLC_MAX_PAYLOAD, &phy->hdev);
+
+	if (r < 0)
+		return r;
+
+	return 0;
+}
+
+static int st21nfca_hci_i2c_remove(struct i2c_client *client)
+{
+	struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client);
+
+	dev_dbg(&client->dev, "%s\n", __func__);
+
+	st21nfca_hci_remove(phy->hdev);
+
+	if (phy->powered)
+		st21nfca_hci_i2c_disable(phy);
+
+	return 0;
+}
+
+static struct i2c_driver st21nfca_hci_i2c_driver = {
+	.driver = {
+		   .name = ST21NFCA_HCI_I2C_DRIVER_NAME,
+		   },
+	.probe = st21nfca_hci_i2c_probe,
+	.id_table = st21nfca_hci_i2c_id_table,
+	.remove = st21nfca_hci_i2c_remove,
+};
+
+module_i2c_driver(st21nfca_hci_i2c_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/nfc/st21nfca/st21nfca.c b/drivers/nfc/st21nfca/st21nfca.c
new file mode 100644
index 000000000000..69213f37b7ba
--- /dev/null
+++ b/drivers/nfc/st21nfca/st21nfca.c
@@ -0,0 +1,506 @@
+/*
+ * HCI based Driver for STMicroelectronics NFC Chip
+ *
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/crc-ccitt.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+
+#include <linux/nfc.h>
+#include <net/nfc/hci.h>
+#include <net/nfc/llc.h>
+
+#include <uapi/linux/nfc.h>
+
+#include "st21nfca.h"
+#include <linux/platform_data/st21nfca.h>
+
+#define DRIVER_DESC "HCI NFC driver for ST21NFCA"
+
+#define FULL_VERSION_LEN 3
+
+/* Proprietary gates, events, commands and registers */
+
+/* Commands that apply to all RF readers */
+#define ST21NFCA_RF_READER_CMD_PRESENCE_CHECK	0x30
+
+#define ST21NFCA_RF_READER_ISO15693_GATE	0x12
+
+/*
+ * Reader gate for communication with contact-less cards using Type A
+ * protocol ISO14443-3 but not compliant with ISO14443-4
+ */
+#define ST21NFCA_RF_READER_14443_3_A_GATE	0x15
+#define ST21NFCA_RF_READER_14443_3_A_UID	0x02
+#define ST21NFCA_RF_READER_14443_3_A_ATQA	0x03
+#define ST21NFCA_RF_READER_14443_3_A_SAK	0x04
+
+#define ST21NFCA_DEVICE_MGNT_GATE		0x01
+#define ST21NFCA_DEVICE_MGNT_PIPE		0x02
+#define ST21NFCA_NFC_MODE	0x03	/* NFC_MODE parameter*/
+
+
+static DECLARE_BITMAP(dev_mask, ST21NFCA_NUM_DEVICES);
+
+static struct nfc_hci_gate st21nfca_gates[] = {
+	{NFC_HCI_ADMIN_GATE, NFC_HCI_INVALID_PIPE},
+	{NFC_HCI_LOOPBACK_GATE, NFC_HCI_INVALID_PIPE},
+	{NFC_HCI_ID_MGMT_GATE, NFC_HCI_INVALID_PIPE},
+	{NFC_HCI_LINK_MGMT_GATE, NFC_HCI_INVALID_PIPE},
+	{NFC_HCI_RF_READER_B_GATE, NFC_HCI_INVALID_PIPE},
+	{NFC_HCI_RF_READER_A_GATE, NFC_HCI_INVALID_PIPE},
+	{ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DEVICE_MGNT_PIPE},
+	{ST21NFCA_RF_READER_F_GATE, NFC_HCI_INVALID_PIPE},
+	{ST21NFCA_RF_READER_14443_3_A_GATE, NFC_HCI_INVALID_PIPE},
+};
+/* Largest headroom needed for outgoing custom commands */
+#define ST21NFCA_CMDS_HEADROOM  7
+
+static int st21nfca_hci_open(struct nfc_hci_dev *hdev)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+	int r;
+
+	mutex_lock(&info->info_lock);
+
+	if (info->state != ST21NFCA_ST_COLD) {
+		r = -EBUSY;
+		goto out;
+	}
+
+	r = info->phy_ops->enable(info->phy_id);
+
+	if (r == 0)
+		info->state = ST21NFCA_ST_READY;
+
+out:
+	mutex_unlock(&info->info_lock);
+	return r;
+}
+
+static void st21nfca_hci_close(struct nfc_hci_dev *hdev)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	mutex_lock(&info->info_lock);
+
+	if (info->state == ST21NFCA_ST_COLD)
+		goto out;
+
+	info->phy_ops->disable(info->phy_id);
+	info->state = ST21NFCA_ST_COLD;
+
+out:
+	mutex_unlock(&info->info_lock);
+}
+
+static int st21nfca_hci_ready(struct nfc_hci_dev *hdev)
+{
+	struct sk_buff *skb;
+
+	u8 param;
+	int r;
+
+	param = NFC_HCI_UICC_HOST_ID;
+	r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE,
+			      NFC_HCI_ADMIN_WHITELIST, &param, 1);
+	if (r < 0)
+		return r;
+
+	/* Set NFC_MODE in device management gate to enable */
+	r = nfc_hci_get_param(hdev, ST21NFCA_DEVICE_MGNT_GATE,
+			      ST21NFCA_NFC_MODE, &skb);
+	if (r < 0)
+		return r;
+
+	if (skb->data[0] == 0) {
+		kfree_skb(skb);
+		param = 1;
+
+		r = nfc_hci_set_param(hdev, ST21NFCA_DEVICE_MGNT_GATE,
+					ST21NFCA_NFC_MODE, &param, 1);
+		if (r < 0)
+			return r;
+	}
+
+	r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
+			       NFC_HCI_EVT_END_OPERATION, NULL, 0);
+	if (r < 0)
+		return r;
+
+	r = nfc_hci_get_param(hdev, NFC_HCI_ID_MGMT_GATE,
+			      NFC_HCI_ID_MGMT_VERSION_SW, &skb);
+	if (r < 0)
+		return r;
+
+	if (skb->len != FULL_VERSION_LEN) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	print_hex_dump(KERN_DEBUG, "FULL VERSION SOFTWARE INFO: ",
+		       DUMP_PREFIX_NONE, 16, 1,
+		       skb->data, FULL_VERSION_LEN, false);
+
+	kfree_skb(skb);
+
+	return 0;
+}
+
+static int st21nfca_hci_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	return info->phy_ops->write(info->phy_id, skb);
+}
+
+static int st21nfca_hci_start_poll(struct nfc_hci_dev *hdev,
+				   u32 im_protocols, u32 tm_protocols)
+{
+	int r;
+
+	pr_info(DRIVER_DESC ": %s protocols 0x%x 0x%x\n",
+		__func__, im_protocols, tm_protocols);
+
+	r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
+			       NFC_HCI_EVT_END_OPERATION, NULL, 0);
+	if (r < 0)
+		return r;
+	if (im_protocols) {
+		/*
+		 * enable polling according to im_protocols & tm_protocols
+		 * - CLOSE pipe according to im_protocols & tm_protocols
+		 */
+		if ((NFC_HCI_RF_READER_B_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					NFC_HCI_RF_READER_B_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		if ((NFC_HCI_RF_READER_A_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					NFC_HCI_RF_READER_A_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		if ((ST21NFCA_RF_READER_F_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					ST21NFCA_RF_READER_F_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		if ((ST21NFCA_RF_READER_14443_3_A_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					ST21NFCA_RF_READER_14443_3_A_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		if ((ST21NFCA_RF_READER_ISO15693_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					ST21NFCA_RF_READER_ISO15693_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
+				       NFC_HCI_EVT_READER_REQUESTED, NULL, 0);
+		if (r < 0)
+			nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
+					   NFC_HCI_EVT_END_OPERATION, NULL, 0);
+	}
+	return r;
+}
+
+static int st21nfca_get_iso14443_3_atqa(struct nfc_hci_dev *hdev, u16 *atqa)
+{
+	int r;
+	struct sk_buff *atqa_skb = NULL;
+
+	r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE,
+			      ST21NFCA_RF_READER_14443_3_A_ATQA, &atqa_skb);
+	if (r < 0)
+		goto exit;
+
+	if (atqa_skb->len != 2) {
+		r = -EPROTO;
+		goto exit;
+	}
+
+	*atqa = be16_to_cpu(*(u16 *) atqa_skb->data);
+
+exit:
+	kfree_skb(atqa_skb);
+	return r;
+}
+
+static int st21nfca_get_iso14443_3_sak(struct nfc_hci_dev *hdev, u8 *sak)
+{
+	int r;
+	struct sk_buff *sak_skb = NULL;
+
+	r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE,
+			      ST21NFCA_RF_READER_14443_3_A_SAK, &sak_skb);
+	if (r < 0)
+		goto exit;
+
+	if (sak_skb->len != 1) {
+		r = -EPROTO;
+		goto exit;
+	}
+
+	*sak = sak_skb->data[0];
+
+exit:
+	kfree_skb(sak_skb);
+	return r;
+}
+
+static int st21nfca_get_iso14443_3_uid(struct nfc_hci_dev *hdev, u8 *gate,
+				       int *len)
+{
+	int r;
+	struct sk_buff *uid_skb = NULL;
+
+	r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE,
+			      ST21NFCA_RF_READER_14443_3_A_UID, &uid_skb);
+	if (r < 0)
+		goto exit;
+
+	if (uid_skb->len == 0 || uid_skb->len > NFC_NFCID1_MAXSIZE) {
+		r = -EPROTO;
+		goto exit;
+	}
+
+	gate = uid_skb->data;
+	*len = uid_skb->len;
+exit:
+	kfree_skb(uid_skb);
+	return r;
+}
+
+static int st21nfca_hci_target_from_gate(struct nfc_hci_dev *hdev, u8 gate,
+					 struct nfc_target *target)
+{
+	int r, len;
+	u16 atqa;
+	u8 sak;
+	u8 uid[NFC_NFCID1_MAXSIZE];
+
+	switch (gate) {
+	case ST21NFCA_RF_READER_F_GATE:
+		target->supported_protocols = NFC_PROTO_FELICA_MASK;
+		break;
+	case ST21NFCA_RF_READER_14443_3_A_GATE:
+		/* ISO14443-3 type 1 or 2 tags */
+		r = st21nfca_get_iso14443_3_atqa(hdev, &atqa);
+		if (r < 0)
+			return r;
+		if (atqa == 0x000c) {
+			target->supported_protocols = NFC_PROTO_JEWEL_MASK;
+			target->sens_res = 0x0c00;
+		} else {
+			r = st21nfca_get_iso14443_3_sak(hdev, &sak);
+			if (r < 0)
+				return r;
+
+			r = st21nfca_get_iso14443_3_uid(hdev, uid, &len);
+			if (r < 0)
+				return r;
+
+			target->supported_protocols =
+			    nfc_hci_sak_to_protocol(sak);
+			if (target->supported_protocols == 0xffffffff)
+				return -EPROTO;
+
+			target->sens_res = atqa;
+			target->sel_res = sak;
+			memcpy(target->nfcid1, uid, len);
+			target->nfcid1_len = len;
+		}
+
+		break;
+	default:
+		return -EPROTO;
+	}
+
+	return 0;
+}
+
+/*
+ * Returns:
+ * <= 0: driver handled the data exchange
+ *    1: driver doesn't especially handle, please do standard processing
+ */
+static int st21nfca_hci_im_transceive(struct nfc_hci_dev *hdev,
+				      struct nfc_target *target,
+				      struct sk_buff *skb,
+				      data_exchange_cb_t cb, void *cb_context)
+{
+	pr_info(DRIVER_DESC ": %s for gate=%d len=%d\n", __func__,
+		target->hci_reader_gate, skb->len);
+
+	switch (target->hci_reader_gate) {
+	case ST21NFCA_RF_READER_F_GATE:
+		*skb_push(skb, 1) = 0x1a;
+		return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
+					      ST21NFCA_WR_XCHG_DATA, skb->data,
+					      skb->len, cb, cb_context);
+	case ST21NFCA_RF_READER_14443_3_A_GATE:
+		*skb_push(skb, 1) = 0x1a;	/* CTR, see spec:10.2.2.1 */
+
+		return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
+					      ST21NFCA_WR_XCHG_DATA, skb->data,
+					      skb->len, cb, cb_context);
+	default:
+		return 1;
+	}
+}
+
+static int st21nfca_hci_check_presence(struct nfc_hci_dev *hdev,
+				       struct nfc_target *target)
+{
+	u8 fwi = 0x11;
+	switch (target->hci_reader_gate) {
+	case NFC_HCI_RF_READER_A_GATE:
+	case NFC_HCI_RF_READER_B_GATE:
+		/*
+		 * PRESENCE_CHECK on those gates is available
+		 * However, the answer to this command is taking 3 * fwi
+		 * if the card is no present.
+		 * Instead, we send an empty I-Frame with a very short
+		 * configurable fwi ~604µs.
+		 */
+		return nfc_hci_send_cmd(hdev, target->hci_reader_gate,
+					ST21NFCA_WR_XCHG_DATA, &fwi, 1, NULL);
+	case ST21NFCA_RF_READER_14443_3_A_GATE:
+		return nfc_hci_send_cmd(hdev, target->hci_reader_gate,
+					ST21NFCA_RF_READER_CMD_PRESENCE_CHECK,
+					NULL, 0, NULL);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static struct nfc_hci_ops st21nfca_hci_ops = {
+	.open = st21nfca_hci_open,
+	.close = st21nfca_hci_close,
+	.hci_ready = st21nfca_hci_ready,
+	.xmit = st21nfca_hci_xmit,
+	.start_poll = st21nfca_hci_start_poll,
+	.target_from_gate = st21nfca_hci_target_from_gate,
+	.im_transceive = st21nfca_hci_im_transceive,
+	.check_presence = st21nfca_hci_check_presence,
+};
+
+int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
+		       char *llc_name, int phy_headroom, int phy_tailroom,
+		       int phy_payload, struct nfc_hci_dev **hdev)
+{
+	struct st21nfca_hci_info *info;
+	int r = 0;
+	int dev_num;
+	u32 protocols;
+	struct nfc_hci_init_data init_data;
+	unsigned long quirks = 0;
+
+	info = kzalloc(sizeof(struct st21nfca_hci_info), GFP_KERNEL);
+	if (!info) {
+		r = -ENOMEM;
+		goto err_alloc_hdev;
+	}
+
+	info->phy_ops = phy_ops;
+	info->phy_id = phy_id;
+	info->state = ST21NFCA_ST_COLD;
+	mutex_init(&info->info_lock);
+
+	init_data.gate_count = ARRAY_SIZE(st21nfca_gates);
+
+	memcpy(init_data.gates, st21nfca_gates, sizeof(st21nfca_gates));
+
+	/*
+	 * Session id must include the driver name + i2c bus addr
+	 * persistent info to discriminate 2 identical chips
+	 */
+	dev_num = find_first_zero_bit(dev_mask, ST21NFCA_NUM_DEVICES);
+	if (dev_num >= ST21NFCA_NUM_DEVICES)
+		goto err_alloc_hdev;
+
+	scnprintf(init_data.session_id, sizeof(init_data.session_id), "%s%2x",
+		  "ST21AH", dev_num);
+
+	protocols = NFC_PROTO_JEWEL_MASK |
+	    NFC_PROTO_MIFARE_MASK |
+	    NFC_PROTO_FELICA_MASK |
+	    NFC_PROTO_ISO14443_MASK |
+	    NFC_PROTO_ISO14443_B_MASK;
+
+	set_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &quirks);
+
+	info->hdev =
+	    nfc_hci_allocate_device(&st21nfca_hci_ops, &init_data, quirks,
+				    protocols, llc_name,
+				    phy_headroom + ST21NFCA_CMDS_HEADROOM,
+				    phy_tailroom, phy_payload);
+
+	if (!info->hdev) {
+		pr_err("Cannot allocate nfc hdev.\n");
+		r = -ENOMEM;
+		goto err_alloc_hdev;
+	}
+
+	nfc_hci_set_clientdata(info->hdev, info);
+
+	r = nfc_hci_register_device(info->hdev);
+	if (r)
+		goto err_regdev;
+
+	*hdev = info->hdev;
+
+	return 0;
+
+err_regdev:
+	nfc_hci_free_device(info->hdev);
+
+err_alloc_hdev:
+	kfree(info);
+
+	return r;
+}
+EXPORT_SYMBOL(st21nfca_hci_probe);
+
+void st21nfca_hci_remove(struct nfc_hci_dev *hdev)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	nfc_hci_unregister_device(hdev);
+	nfc_hci_free_device(hdev);
+	kfree(info);
+}
+EXPORT_SYMBOL(st21nfca_hci_remove);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/nfc/st21nfca/st21nfca.h b/drivers/nfc/st21nfca/st21nfca.h
new file mode 100644
index 000000000000..334cd90bcc8c
--- /dev/null
+++ b/drivers/nfc/st21nfca/st21nfca.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __LOCAL_ST21NFCA_H_
+#define __LOCAL_ST21NFCA_H_
+
+#include <net/nfc/hci.h>
+
+#define HCI_MODE 0
+
+/* framing in HCI mode */
+#define ST21NFCA_SOF_EOF_LEN    2
+
+/* Almost every time value is 0 */
+#define ST21NFCA_HCI_LLC_LEN    1
+
+/* Size in worst case :
+ * In normal case CRC len = 2 but byte stuffing
+ * may appear in case one CRC byte = ST21NFCA_SOF_EOF
+ */
+#define ST21NFCA_HCI_LLC_CRC    4
+
+#define ST21NFCA_HCI_LLC_LEN_CRC        (ST21NFCA_SOF_EOF_LEN + \
+						ST21NFCA_HCI_LLC_LEN + \
+						ST21NFCA_HCI_LLC_CRC)
+#define ST21NFCA_HCI_LLC_MIN_SIZE       (1 + ST21NFCA_HCI_LLC_LEN_CRC)
+
+/* Worst case when adding byte stuffing between each byte */
+#define ST21NFCA_HCI_LLC_MAX_PAYLOAD    29
+#define ST21NFCA_HCI_LLC_MAX_SIZE       (ST21NFCA_HCI_LLC_LEN_CRC + 1 + \
+					ST21NFCA_HCI_LLC_MAX_PAYLOAD)
+
+#define DRIVER_DESC "HCI NFC driver for ST21NFCA"
+
+#define ST21NFCA_HCI_MODE 0
+
+#define ST21NFCA_NUM_DEVICES 256
+
+int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
+		       char *llc_name, int phy_headroom, int phy_tailroom,
+		       int phy_payload, struct nfc_hci_dev **hdev);
+void st21nfca_hci_remove(struct nfc_hci_dev *hdev);
+
+enum st21nfca_state {
+	ST21NFCA_ST_COLD,
+	ST21NFCA_ST_READY,
+};
+
+struct st21nfca_hci_info {
+	struct nfc_phy_ops *phy_ops;
+	void *phy_id;
+
+	struct nfc_hci_dev *hdev;
+
+	enum st21nfca_state state;
+
+	struct mutex info_lock;
+
+	int async_cb_type;
+	data_exchange_cb_t async_cb;
+	void *async_cb_context;
+
+} __packed;
+
+/* Reader RF commands */
+#define ST21NFCA_WR_XCHG_DATA            0x10
+
+#define ST21NFCA_RF_READER_F_GATE               0x14
+#define ST21NFCA_RF_READER_F_DATARATE 0x01
+#define ST21NFCA_RF_READER_F_DATARATE_106 0x01
+#define ST21NFCA_RF_READER_F_DATARATE_212 0x02
+#define ST21NFCA_RF_READER_F_DATARATE_424 0x04
+
+#endif /* __LOCAL_ST21NFCA_H_ */
diff --git a/include/linux/platform_data/st21nfca.h b/include/linux/platform_data/st21nfca.h
new file mode 100644
index 000000000000..1730312398ff
--- /dev/null
+++ b/include/linux/platform_data/st21nfca.h
@@ -0,0 +1,32 @@
+/*
+ * Driver include for the ST21NFCA NFC chip.
+ *
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ST21NFCA_HCI_H_
+#define _ST21NFCA_HCI_H_
+
+#include <linux/i2c.h>
+
+#define ST21NFCA_HCI_DRIVER_NAME "st21nfca_hci"
+
+struct st21nfca_nfc_platform_data {
+	unsigned int gpio_irq;
+	unsigned int gpio_ena;
+	unsigned int irq_polarity;
+};
+
+#endif /* _ST21NFCA_HCI_H_ */
-- 
cgit v1.2.3-71-gd317


From e240bc36125691b0e18e70407c2d18ca6117c2f5 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 25 Mar 2014 06:51:49 +0100
Subject: NFC: hci: Add load_session HCI operand

load_session allows a CLF to restore the gate <-> pipe table from some
proprietary location.
The main advantage to add this function is to reduce the memory wear by
running pipe creation (and storing) only once.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/hci.h |  1 +
 net/nfc/hci/core.c    | 45 +++++++++++++++++++++------------------------
 2 files changed, 22 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 03c4650b548c..61286db54388 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -27,6 +27,7 @@ struct nfc_hci_dev;
 struct nfc_hci_ops {
 	int (*open) (struct nfc_hci_dev *hdev);
 	void (*close) (struct nfc_hci_dev *hdev);
+	int (*load_session) (struct nfc_hci_dev *hdev);
 	int (*hci_ready) (struct nfc_hci_dev *hdev);
 	/*
 	 * xmit must always send the complete buffer before
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index d45b638e77c7..c4d251a6fd67 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -380,34 +380,31 @@ static int hci_dev_session_init(struct nfc_hci_dev *hdev)
 	if (r < 0)
 		goto disconnect_all;
 
-	if (skb->len && skb->len == strlen(hdev->init_data.session_id))
-		if (memcmp(hdev->init_data.session_id, skb->data,
-			   skb->len) == 0) {
-			/* TODO ELa: restore gate<->pipe table from
-			 * some TBD location.
-			 * note: it doesn't seem possible to get the chip
-			 * currently open gate/pipe table.
-			 * It is only possible to obtain the supported
-			 * gate list.
-			 */
+	if (skb->len && skb->len == strlen(hdev->init_data.session_id) &&
+		(memcmp(hdev->init_data.session_id, skb->data,
+			   skb->len) == 0) && hdev->ops->load_session) {
+		/* Restore gate<->pipe table from some proprietary location. */
 
-			/* goto exit
-			 * For now, always do a full initialization */
-		}
+		r = hdev->ops->load_session(hdev);
 
-	r = nfc_hci_disconnect_all_gates(hdev);
-	if (r < 0)
-		goto exit;
+		if (r < 0)
+			goto disconnect_all;
+	} else {
 
-	r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count,
-				  hdev->init_data.gates);
-	if (r < 0)
-		goto disconnect_all;
+		r = nfc_hci_disconnect_all_gates(hdev);
+		if (r < 0)
+			goto exit;
 
-	r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE,
-			      NFC_HCI_ADMIN_SESSION_IDENTITY,
-			      hdev->init_data.session_id,
-			      strlen(hdev->init_data.session_id));
+		r = hci_dev_connect_gates(hdev, hdev->init_data.gate_count,
+					  hdev->init_data.gates);
+		if (r < 0)
+			goto disconnect_all;
+
+		r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE,
+				NFC_HCI_ADMIN_SESSION_IDENTITY,
+				hdev->init_data.session_id,
+				strlen(hdev->init_data.session_id));
+	}
 	if (r == 0)
 		goto exit;
 
-- 
cgit v1.2.3-71-gd317


From 51d98fa47c9c3f5d34cd4097ce08e8e8669a89b4 Mon Sep 17 00:00:00 2001
From: "Mark A. Greer" <mgreer@animalcreek.com>
Date: Mon, 31 Mar 2014 17:36:37 -0700
Subject: NFC: digital: Add macros for the ISO/IEC 14443-B Protocol

Add RF tech and framing macros for the ISO/IEC 14443-B Protocol.

Cc: Thierry Escande <thierry.escande@linux.intel.com>
Signed-off-by: Mark A. Greer <mgreer@animalcreek.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/digital.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/nfc/digital.h b/include/net/nfc/digital.h
index 7655cfe27c34..bdf55c3b7a19 100644
--- a/include/net/nfc/digital.h
+++ b/include/net/nfc/digital.h
@@ -36,6 +36,7 @@ enum {
 	NFC_DIGITAL_RF_TECH_212F,
 	NFC_DIGITAL_RF_TECH_424F,
 	NFC_DIGITAL_RF_TECH_ISO15693,
+	NFC_DIGITAL_RF_TECH_106B,
 
 	NFC_DIGITAL_RF_TECH_LAST,
 };
@@ -62,6 +63,9 @@ enum {
 	NFC_DIGITAL_FRAMING_ISO15693_INVENTORY,
 	NFC_DIGITAL_FRAMING_ISO15693_T5T,
 
+	NFC_DIGITAL_FRAMING_NFCB,
+	NFC_DIGITAL_FRAMING_NFCB_T4T,
+
 	NFC_DIGITAL_FRAMING_LAST,
 };
 
-- 
cgit v1.2.3-71-gd317


From f1370cc4a01e61007ab3020c761cef6b88ae3729 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 18 Apr 2014 16:23:46 +0900
Subject: xfrm: Remove useless secid field from xfrm_audit.

It seems to me that commit ab5f5e8b "[XFRM]: xfrm audit calls" is doing
something strange at xfrm_audit_helper_usrinfo().
If secid != 0 && security_secid_to_secctx(secid) != 0, the caller calls
audit_log_task_context() which basically does
secid != 0 && security_secid_to_secctx(secid) == 0 case
except that secid is obtained from current thread's context.

Oh, what happens if secid passed to xfrm_audit_helper_usrinfo() was
obtained from other thread's context? It might audit current thread's
context rather than other thread's context if security_secid_to_secctx()
in xfrm_audit_helper_usrinfo() failed for some reason.

Then, are all the caller of xfrm_audit_helper_usrinfo() passing either
secid obtained from current thread's context or secid == 0?
It seems to me that they are.

If I didn't miss something, we don't need to pass secid to
xfrm_audit_helper_usrinfo() because audit_log_task_context() will
obtain secid from current thread's context.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h     | 29 ++++++++++-------------------
 net/key/af_key.c       | 12 +++++-------
 net/xfrm/xfrm_policy.c | 22 ++++++++--------------
 net/xfrm/xfrm_state.c  | 17 +++++++----------
 net/xfrm/xfrm_user.c   | 27 ++++++---------------------
 5 files changed, 36 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 116e9c7e19cb..882889eb156b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -693,7 +693,6 @@ struct xfrm_spi_skb_cb {
 
 /* Audit Information */
 struct xfrm_audit {
-	u32	secid;
 	kuid_t	loginuid;
 	unsigned int sessionid;
 };
@@ -713,30 +712,22 @@ static inline struct audit_buffer *xfrm_audit_start(const char *op)
 	return audit_buf;
 }
 
-static inline void xfrm_audit_helper_usrinfo(kuid_t auid, unsigned int ses, u32 secid,
+static inline void xfrm_audit_helper_usrinfo(kuid_t auid, unsigned int ses,
 					     struct audit_buffer *audit_buf)
 {
-	char *secctx;
-	u32 secctx_len;
-
 	audit_log_format(audit_buf, " auid=%u ses=%u",
 			 from_kuid(&init_user_ns, auid), ses);
-	if (secid != 0 &&
-	    security_secid_to_secctx(secid, &secctx, &secctx_len) == 0) {
-		audit_log_format(audit_buf, " subj=%s", secctx);
-		security_release_secctx(secctx, secctx_len);
-	} else
-		audit_log_task_context(audit_buf);
+	audit_log_task_context(audit_buf);
 }
 
 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, kuid_t auid,
-			   unsigned int ses, u32 secid);
+			   unsigned int ses);
 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, kuid_t auid,
-			      unsigned int ses, u32 secid);
+			      unsigned int ses);
 void xfrm_audit_state_add(struct xfrm_state *x, int result, kuid_t auid,
-			  unsigned int ses, u32 secid);
+			  unsigned int ses);
 void xfrm_audit_state_delete(struct xfrm_state *x, int result, kuid_t auid,
-			     unsigned int ses, u32 secid);
+			     unsigned int ses);
 void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
 				      struct sk_buff *skb);
 void xfrm_audit_state_replay(struct xfrm_state *x, struct sk_buff *skb,
@@ -749,22 +740,22 @@ void xfrm_audit_state_icvfail(struct xfrm_state *x, struct sk_buff *skb,
 #else
 
 static inline void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
-				  kuid_t auid, unsigned int ses, u32 secid)
+				  kuid_t auid, unsigned int ses)
 {
 }
 
 static inline void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
-				  kuid_t auid, unsigned int ses, u32 secid)
+				  kuid_t auid, unsigned int ses)
 {
 }
 
 static inline void xfrm_audit_state_add(struct xfrm_state *x, int result,
-				 kuid_t auid, unsigned int ses, u32 secid)
+				 kuid_t auid, unsigned int ses)
 {
 }
 
 static inline void xfrm_audit_state_delete(struct xfrm_state *x, int result,
-				    kuid_t auid, unsigned int ses, u32 secid)
+				    kuid_t auid, unsigned int ses)
 {
 }
 
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f3c83073afc4..d66ff72adefb 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1478,7 +1478,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg
 
 	xfrm_audit_state_add(x, err ? 0 : 1,
 			     audit_get_loginuid(current),
-			     audit_get_sessionid(current), 0);
+			     audit_get_sessionid(current));
 
 	if (err < 0) {
 		x->km.state = XFRM_STATE_DEAD;
@@ -1534,7 +1534,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_
 out:
 	xfrm_audit_state_delete(x, err ? 0 : 1,
 				audit_get_loginuid(current),
-				audit_get_sessionid(current), 0);
+				audit_get_sessionid(current));
 	xfrm_state_put(x);
 
 	return err;
@@ -1735,7 +1735,6 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m
 
 	audit_info.loginuid = audit_get_loginuid(current);
 	audit_info.sessionid = audit_get_sessionid(current);
-	audit_info.secid = 0;
 	err = xfrm_state_flush(net, proto, &audit_info);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
@@ -2290,7 +2289,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
 
 	xfrm_audit_policy_add(xp, err ? 0 : 1,
 			      audit_get_loginuid(current),
-			      audit_get_sessionid(current), 0);
+			      audit_get_sessionid(current));
 
 	if (err)
 		goto out;
@@ -2374,7 +2373,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 
 	xfrm_audit_policy_delete(xp, err ? 0 : 1,
 				 audit_get_loginuid(current),
-				 audit_get_sessionid(current), 0);
+				 audit_get_sessionid(current));
 
 	if (err)
 		goto out;
@@ -2624,7 +2623,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
 	if (delete) {
 		xfrm_audit_policy_delete(xp, err ? 0 : 1,
 				audit_get_loginuid(current),
-				audit_get_sessionid(current), 0);
+				audit_get_sessionid(current));
 
 		if (err)
 			goto out;
@@ -2738,7 +2737,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
 
 	audit_info.loginuid = audit_get_loginuid(current);
 	audit_info.sessionid = audit_get_sessionid(current);
-	audit_info.secid = 0;
 	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c08fbd11ceff..bd001b7062c0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -785,8 +785,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 			if (err) {
 				xfrm_audit_policy_delete(pol, 0,
 							 audit_info->loginuid,
-							 audit_info->sessionid,
-							 audit_info->secid);
+							 audit_info->sessionid);
 				return err;
 			}
 		}
@@ -801,8 +800,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 				if (err) {
 					xfrm_audit_policy_delete(pol, 0,
 							audit_info->loginuid,
-							audit_info->sessionid,
-							audit_info->secid);
+							audit_info->sessionid);
 					return err;
 				}
 			}
@@ -842,8 +840,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 			cnt++;
 
 			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
-						 audit_info->sessionid,
-						 audit_info->secid);
+						 audit_info->sessionid);
 
 			xfrm_policy_kill(pol);
 
@@ -864,8 +861,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 
 				xfrm_audit_policy_delete(pol, 1,
 							 audit_info->loginuid,
-							 audit_info->sessionid,
-							 audit_info->secid);
+							 audit_info->sessionid);
 				xfrm_policy_kill(pol);
 
 				write_lock_bh(&net->xfrm.xfrm_policy_lock);
@@ -2870,12 +2866,10 @@ static void xfrm_policy_fini(struct net *net)
 #ifdef CONFIG_XFRM_SUB_POLICY
 	audit_info.loginuid = INVALID_UID;
 	audit_info.sessionid = (unsigned int)-1;
-	audit_info.secid = 0;
 	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info);
 #endif
 	audit_info.loginuid = INVALID_UID;
 	audit_info.sessionid = (unsigned int)-1;
-	audit_info.secid = 0;
 	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
 
 	WARN_ON(!list_empty(&net->xfrm.policy_all));
@@ -2992,14 +2986,14 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
 }
 
 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
-			   kuid_t auid, unsigned int sessionid, u32 secid)
+			   kuid_t auid, unsigned int sessionid)
 {
 	struct audit_buffer *audit_buf;
 
 	audit_buf = xfrm_audit_start("SPD-add");
 	if (audit_buf == NULL)
 		return;
-	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+	xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf);
 	audit_log_format(audit_buf, " res=%u", result);
 	xfrm_audit_common_policyinfo(xp, audit_buf);
 	audit_log_end(audit_buf);
@@ -3007,14 +3001,14 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
 
 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
-			      kuid_t auid, unsigned int sessionid, u32 secid)
+			      kuid_t auid, unsigned int sessionid)
 {
 	struct audit_buffer *audit_buf;
 
 	audit_buf = xfrm_audit_start("SPD-delete");
 	if (audit_buf == NULL)
 		return;
-	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+	xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf);
 	audit_log_format(audit_buf, " res=%u", result);
 	xfrm_audit_common_policyinfo(xp, audit_buf);
 	audit_log_end(audit_buf);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 8e9c781a6bba..d91312b5ceb0 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -465,7 +465,7 @@ expired:
 
 	xfrm_audit_state_delete(x, err ? 0 : 1,
 				audit_get_loginuid(current),
-				audit_get_sessionid(current), 0);
+				audit_get_sessionid(current));
 
 out:
 	spin_unlock(&x->lock);
@@ -574,8 +574,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi
 			   (err = security_xfrm_state_delete(x)) != 0) {
 				xfrm_audit_state_delete(x, 0,
 							audit_info->loginuid,
-							audit_info->sessionid,
-							audit_info->secid);
+							audit_info->sessionid);
 				return err;
 			}
 		}
@@ -613,8 +612,7 @@ restart:
 				err = xfrm_state_delete(x);
 				xfrm_audit_state_delete(x, err ? 0 : 1,
 							audit_info->loginuid,
-							audit_info->sessionid,
-							audit_info->secid);
+							audit_info->sessionid);
 				xfrm_state_put(x);
 				if (!err)
 					cnt++;
@@ -2134,7 +2132,6 @@ void xfrm_state_fini(struct net *net)
 	flush_work(&net->xfrm.state_hash_work);
 	audit_info.loginuid = INVALID_UID;
 	audit_info.sessionid = (unsigned int)-1;
-	audit_info.secid = 0;
 	xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info);
 	flush_work(&net->xfrm.state_gc_work);
 
@@ -2199,14 +2196,14 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
 }
 
 void xfrm_audit_state_add(struct xfrm_state *x, int result,
-			  kuid_t auid, unsigned int sessionid, u32 secid)
+			  kuid_t auid, unsigned int sessionid)
 {
 	struct audit_buffer *audit_buf;
 
 	audit_buf = xfrm_audit_start("SAD-add");
 	if (audit_buf == NULL)
 		return;
-	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+	xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf);
 	xfrm_audit_helper_sainfo(x, audit_buf);
 	audit_log_format(audit_buf, " res=%u", result);
 	audit_log_end(audit_buf);
@@ -2214,14 +2211,14 @@ void xfrm_audit_state_add(struct xfrm_state *x, int result,
 EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
 
 void xfrm_audit_state_delete(struct xfrm_state *x, int result,
-			     kuid_t auid, unsigned int sessionid, u32 secid)
+			     kuid_t auid, unsigned int sessionid)
 {
 	struct audit_buffer *audit_buf;
 
 	audit_buf = xfrm_audit_start("SAD-delete");
 	if (audit_buf == NULL)
 		return;
-	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
+	xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf);
 	xfrm_audit_helper_sainfo(x, audit_buf);
 	audit_log_format(audit_buf, " res=%u", result);
 	audit_log_end(audit_buf);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8f131c10a6f3..d6409d927b82 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -599,7 +599,6 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct km_event c;
 	kuid_t loginuid = audit_get_loginuid(current);
 	unsigned int sessionid = audit_get_sessionid(current);
-	u32 sid;
 
 	err = verify_newsa_info(p, attrs);
 	if (err)
@@ -615,8 +614,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	else
 		err = xfrm_state_update(x);
 
-	security_task_getsecid(current, &sid);
-	xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid);
+	xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid);
 
 	if (err < 0) {
 		x->km.state = XFRM_STATE_DEAD;
@@ -678,7 +676,6 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct xfrm_usersa_id *p = nlmsg_data(nlh);
 	kuid_t loginuid = audit_get_loginuid(current);
 	unsigned int sessionid = audit_get_sessionid(current);
-	u32 sid;
 
 	x = xfrm_user_state_lookup(net, p, attrs, &err);
 	if (x == NULL)
@@ -703,8 +700,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	km_state_notify(x, &c);
 
 out:
-	security_task_getsecid(current, &sid);
-	xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid);
+	xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid);
 	xfrm_state_put(x);
 	return err;
 }
@@ -1416,7 +1412,6 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int excl;
 	kuid_t loginuid = audit_get_loginuid(current);
 	unsigned int sessionid = audit_get_sessionid(current);
-	u32 sid;
 
 	err = verify_newpolicy_info(p);
 	if (err)
@@ -1435,8 +1430,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	 * a type XFRM_MSG_UPDPOLICY - JHS */
 	excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
 	err = xfrm_policy_insert(p->dir, xp, excl);
-	security_task_getsecid(current, &sid);
-	xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid);
+	xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid);
 
 	if (err) {
 		security_xfrm_policy_free(xp->security);
@@ -1675,11 +1669,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	} else {
 		kuid_t loginuid = audit_get_loginuid(current);
 		unsigned int sessionid = audit_get_sessionid(current);
-		u32 sid;
 
-		security_task_getsecid(current, &sid);
-		xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid,
-					 sid);
+		xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid);
 
 		if (err != 0)
 			goto out;
@@ -1709,7 +1700,6 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	audit_info.loginuid = audit_get_loginuid(current);
 	audit_info.sessionid = audit_get_sessionid(current);
-	security_task_getsecid(current, &audit_info.secid);
 	err = xfrm_state_flush(net, p->proto, &audit_info);
 	if (err) {
 		if (err == -ESRCH) /* empty table */
@@ -1902,7 +1892,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	audit_info.loginuid = audit_get_loginuid(current);
 	audit_info.sessionid = audit_get_sessionid(current);
-	security_task_getsecid(current, &audit_info.secid);
 	err = xfrm_policy_flush(net, type, &audit_info);
 	if (err) {
 		if (err == -ESRCH) /* empty table */
@@ -1971,11 +1960,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (up->hard) {
 		kuid_t loginuid = audit_get_loginuid(current);
 		unsigned int sessionid = audit_get_sessionid(current);
-		u32 sid;
 
-		security_task_getsecid(current, &sid);
 		xfrm_policy_delete(xp, p->dir);
-		xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid);
+		xfrm_audit_policy_delete(xp, 1, loginuid, sessionid);
 
 	} else {
 		// reset the timers here?
@@ -2014,11 +2001,9 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (ue->hard) {
 		kuid_t loginuid = audit_get_loginuid(current);
 		unsigned int sessionid = audit_get_sessionid(current);
-		u32 sid;
 
-		security_task_getsecid(current, &sid);
 		__xfrm_state_delete(x);
-		xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid);
+		xfrm_audit_state_delete(x, 1, loginuid, sessionid);
 	}
 	err = 0;
 out:
-- 
cgit v1.2.3-71-gd317


From 0967e6a5070e336507ce52f09f7297413b966981 Mon Sep 17 00:00:00 2001
From: Vandana Kannan <vandana.kannan@intel.com>
Date: Tue, 1 Apr 2014 16:26:59 +0530
Subject: drm/edid: Fill PAR in AVI infoframe based on CEA mode list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Populate PAR in infoframe structure. If there is a user setting for PAR, then
that value is set. Else, value is taken from CEA mode list if VIC is found.
Else, PAR is calculated from resolution. If none of these conditions are
satisfied, PAR is NONE as per initialization.

v2: Removed the part which sets PAR according to user input, based on
Daniel's review comments.

A separate patch will be submitted to create a property that would enable a
user space app to set aspect ratio for AVI infoframe.

v2: Removed the part which sets PAR according to user input, based on
Daniel's review comments.

v3: Removed calculation of PAR for non-CEA modes as per discussion with
Ville.

A separate patch will be submitted to create a property that would enable a
user space app to set aspect ratio for AVI infoframe.

Signed-off-by: Vandana Kannan <vandana.kannan@intel.com>
Cc: Jesse Barnes <jesse.barnes@intel.com>
Cc: Vijay Purushothaman <vijay.a.purushothaman@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[danvet: Squash in fixup for htmldocs.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_edid.c | 22 ++++++++++++++++++++++
 include/drm/drm_crtc.h     |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index d4e3f9d9370f..b8d6c5163575 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -2452,6 +2452,22 @@ u8 drm_match_cea_mode(const struct drm_display_mode *to_match)
 }
 EXPORT_SYMBOL(drm_match_cea_mode);
 
+/**
+ * drm_get_cea_aspect_ratio - get the picture aspect ratio corresponding to
+ * the input VIC from the CEA mode list
+ * @video_code: ID given to each of the CEA modes
+ *
+ * Returns picture aspect ratio
+ */
+enum hdmi_picture_aspect drm_get_cea_aspect_ratio(const u8 video_code)
+{
+	/* return picture aspect ratio for video_code - 1 to access the
+	 * right array element
+	*/
+	return edid_cea_modes[video_code-1].picture_aspect_ratio;
+}
+EXPORT_SYMBOL(drm_get_cea_aspect_ratio);
+
 /*
  * Calculate the alternate clock for HDMI modes (those from the HDMI vendor
  * specific block).
@@ -3613,6 +3629,12 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame,
 	frame->video_code = drm_match_cea_mode(mode);
 
 	frame->picture_aspect = HDMI_PICTURE_ASPECT_NONE;
+
+	/* Populate picture aspect ratio from CEA mode list */
+	if (frame->video_code > 0)
+		frame->picture_aspect = drm_get_cea_aspect_ratio(
+						frame->video_code);
+
 	frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE;
 	frame->scan_mode = HDMI_SCAN_MODE_UNDERSCAN;
 
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index e55fccbe7c42..c061bb372199 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -1020,6 +1020,7 @@ extern int drm_mode_gamma_get_ioctl(struct drm_device *dev,
 extern int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 				    void *data, struct drm_file *file_priv);
 extern u8 drm_match_cea_mode(const struct drm_display_mode *to_match);
+extern enum hdmi_picture_aspect drm_get_cea_aspect_ratio(const u8 video_code);
 extern bool drm_detect_hdmi_monitor(struct edid *edid);
 extern bool drm_detect_monitor_audio(struct edid *edid);
 extern bool drm_rgb_quant_range_selectable(struct edid *edid);
-- 
cgit v1.2.3-71-gd317


From f9b0e251dfbf2c4da642ec9210db29a7ac63b81a Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Wed, 2 Apr 2014 12:29:46 +0200
Subject: drm: make mode_valid callback optional

Many drm connectors do not need mode validation.
The patch makes this callback optional and removes dumb implementations.

v2: Rebase:
- imx move to a shared (but still dummy) ->mode_valid implementation.
- probe helpers have been extracted to drm_probe_helper.c

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com> (v1)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 Documentation/DocBook/drm.tmpl             | 6 +++---
 drivers/gpu/drm/ast/ast_mode.c             | 7 -------
 drivers/gpu/drm/bridge/ptn3460.c           | 7 -------
 drivers/gpu/drm/cirrus/cirrus_mode.c       | 8 --------
 drivers/gpu/drm/drm_probe_helper.c         | 2 +-
 drivers/gpu/drm/exynos/exynos_dp_core.c    | 7 -------
 drivers/gpu/drm/exynos/exynos_drm_dpi.c    | 7 -------
 drivers/gpu/drm/exynos/exynos_drm_vidi.c   | 7 -------
 drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c  | 7 -------
 drivers/gpu/drm/rcar-du/rcar_du_vgacon.c   | 7 -------
 drivers/gpu/drm/shmobile/shmob_drm_crtc.c  | 7 -------
 drivers/staging/imx-drm/imx-drm-core.c     | 7 -------
 drivers/staging/imx-drm/imx-drm.h          | 2 --
 drivers/staging/imx-drm/imx-hdmi.c         | 1 -
 drivers/staging/imx-drm/imx-ldb.c          | 1 -
 drivers/staging/imx-drm/imx-tve.c          | 4 ----
 drivers/staging/imx-drm/parallel-display.c | 1 -
 include/drm/drm_crtc_helper.h              | 2 +-
 18 files changed, 5 insertions(+), 85 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 677a02553ec0..c72e146e58d3 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -1903,8 +1903,8 @@ void intel_crt_init(struct drm_device *dev)
           <para>
             The function filters out modes larger than
             <parameter>max_width</parameter> and <parameter>max_height</parameter>
-            if specified. It then calls the connector
-            <methodname>mode_valid</methodname> helper operation for  each mode in
+            if specified. It then calls the optional connector
+            <methodname>mode_valid</methodname> helper operation for each mode in
             the probed list to check whether the mode is valid for the connector.
           </para>
         </listitem>
@@ -2265,7 +2265,7 @@ void intel_crt_init(struct drm_device *dev)
           <para>
             Verify whether a mode is valid for the connector. Return MODE_OK for
             supported modes and one of the enum drm_mode_status values (MODE_*)
-            for unsupported modes. This operation is mandatory.
+            for unsupported modes. This operation is optional.
           </para>
           <para>
             As the mode rejection reason is currently not used beside for
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index a4afdc8bb578..e599d64a2620 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -743,12 +743,6 @@ static int ast_get_modes(struct drm_connector *connector)
 	return 0;
 }
 
-static int ast_mode_valid(struct drm_connector *connector,
-			  struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 static void ast_connector_destroy(struct drm_connector *connector)
 {
 	struct ast_connector *ast_connector = to_ast_connector(connector);
@@ -765,7 +759,6 @@ ast_connector_detect(struct drm_connector *connector, bool force)
 }
 
 static const struct drm_connector_helper_funcs ast_connector_helper_funcs = {
-	.mode_valid = ast_mode_valid,
 	.get_modes = ast_get_modes,
 	.best_encoder = ast_best_single_encoder,
 };
diff --git a/drivers/gpu/drm/bridge/ptn3460.c b/drivers/gpu/drm/bridge/ptn3460.c
index b171901a3553..98fd17ae4916 100644
--- a/drivers/gpu/drm/bridge/ptn3460.c
+++ b/drivers/gpu/drm/bridge/ptn3460.c
@@ -225,12 +225,6 @@ out:
 	return num_modes;
 }
 
-static int ptn3460_mode_valid(struct drm_connector *connector,
-		struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 struct drm_encoder *ptn3460_best_encoder(struct drm_connector *connector)
 {
 	struct ptn3460_bridge *ptn_bridge;
@@ -242,7 +236,6 @@ struct drm_encoder *ptn3460_best_encoder(struct drm_connector *connector)
 
 struct drm_connector_helper_funcs ptn3460_connector_helper_funcs = {
 	.get_modes = ptn3460_get_modes,
-	.mode_valid = ptn3460_mode_valid,
 	.best_encoder = ptn3460_best_encoder,
 };
 
diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c
index f59433b7610c..49332c5fe35b 100644
--- a/drivers/gpu/drm/cirrus/cirrus_mode.c
+++ b/drivers/gpu/drm/cirrus/cirrus_mode.c
@@ -505,13 +505,6 @@ static int cirrus_vga_get_modes(struct drm_connector *connector)
 	return count;
 }
 
-static int cirrus_vga_mode_valid(struct drm_connector *connector,
-				 struct drm_display_mode *mode)
-{
-	/* Any mode we've added is valid */
-	return MODE_OK;
-}
-
 static struct drm_encoder *cirrus_connector_best_encoder(struct drm_connector
 						  *connector)
 {
@@ -546,7 +539,6 @@ static void cirrus_connector_destroy(struct drm_connector *connector)
 
 struct drm_connector_helper_funcs cirrus_vga_connector_helper_funcs = {
 	.get_modes = cirrus_vga_get_modes,
-	.mode_valid = cirrus_vga_mode_valid,
 	.best_encoder = cirrus_connector_best_encoder,
 };
 
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index e70f54d4a581..d06340985a72 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -169,7 +169,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 	drm_mode_validate_flag(connector, mode_flags);
 
 	list_for_each_entry(mode, &connector->modes, head) {
-		if (mode->status == MODE_OK)
+		if (mode->status == MODE_OK && connector_funcs->mode_valid)
 			mode->status = connector_funcs->mode_valid(connector,
 								   mode);
 	}
diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.c b/drivers/gpu/drm/exynos/exynos_dp_core.c
index aed533bbfd31..bb74472b4e4b 100644
--- a/drivers/gpu/drm/exynos/exynos_dp_core.c
+++ b/drivers/gpu/drm/exynos/exynos_dp_core.c
@@ -949,12 +949,6 @@ static int exynos_dp_get_modes(struct drm_connector *connector)
 	return 1;
 }
 
-static int exynos_dp_mode_valid(struct drm_connector *connector,
-			struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 static struct drm_encoder *exynos_dp_best_encoder(
 			struct drm_connector *connector)
 {
@@ -965,7 +959,6 @@ static struct drm_encoder *exynos_dp_best_encoder(
 
 static struct drm_connector_helper_funcs exynos_dp_connector_helper_funcs = {
 	.get_modes = exynos_dp_get_modes,
-	.mode_valid = exynos_dp_mode_valid,
 	.best_encoder = exynos_dp_best_encoder,
 };
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
index 2b09c7c0bfcc..82e52c71bccc 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
@@ -94,12 +94,6 @@ static int exynos_dpi_get_modes(struct drm_connector *connector)
 	return 0;
 }
 
-static int exynos_dpi_mode_valid(struct drm_connector *connector,
-				 struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 static struct drm_encoder *
 exynos_dpi_best_encoder(struct drm_connector *connector)
 {
@@ -110,7 +104,6 @@ exynos_dpi_best_encoder(struct drm_connector *connector)
 
 static struct drm_connector_helper_funcs exynos_dpi_connector_helper_funcs = {
 	.get_modes = exynos_dpi_get_modes,
-	.mode_valid = exynos_dpi_mode_valid,
 	.best_encoder = exynos_dpi_best_encoder,
 };
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index 7afead9c3f30..b6980865dd50 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -533,12 +533,6 @@ static int vidi_get_modes(struct drm_connector *connector)
 	return drm_add_edid_modes(connector, edid);
 }
 
-static int vidi_mode_valid(struct drm_connector *connector,
-			struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 static struct drm_encoder *vidi_best_encoder(struct drm_connector *connector)
 {
 	struct vidi_context *ctx = ctx_from_connector(connector);
@@ -548,7 +542,6 @@ static struct drm_encoder *vidi_best_encoder(struct drm_connector *connector)
 
 static struct drm_connector_helper_funcs vidi_connector_helper_funcs = {
 	.get_modes = vidi_get_modes,
-	.mode_valid = vidi_mode_valid,
 	.best_encoder = vidi_best_encoder,
 };
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
index 4f3ba93cd91d..289048d1c7b2 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
@@ -57,15 +57,8 @@ static int rcar_du_lvds_connector_get_modes(struct drm_connector *connector)
 	return 1;
 }
 
-static int rcar_du_lvds_connector_mode_valid(struct drm_connector *connector,
-					    struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 static const struct drm_connector_helper_funcs connector_helper_funcs = {
 	.get_modes = rcar_du_lvds_connector_get_modes,
-	.mode_valid = rcar_du_lvds_connector_mode_valid,
 	.best_encoder = rcar_du_connector_best_encoder,
 };
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c
index 41d563adfeaa..ccfe64c7188f 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c
@@ -25,15 +25,8 @@ static int rcar_du_vga_connector_get_modes(struct drm_connector *connector)
 	return 0;
 }
 
-static int rcar_du_vga_connector_mode_valid(struct drm_connector *connector,
-					    struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 static const struct drm_connector_helper_funcs connector_helper_funcs = {
 	.get_modes = rcar_du_vga_connector_get_modes,
-	.mode_valid = rcar_du_vga_connector_mode_valid,
 	.best_encoder = rcar_du_connector_best_encoder,
 };
 
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
index e9e5e6d368cc..faf176b2daf9 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
@@ -674,12 +674,6 @@ static int shmob_drm_connector_get_modes(struct drm_connector *connector)
 	return 1;
 }
 
-static int shmob_drm_connector_mode_valid(struct drm_connector *connector,
-					  struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
 static struct drm_encoder *
 shmob_drm_connector_best_encoder(struct drm_connector *connector)
 {
@@ -690,7 +684,6 @@ shmob_drm_connector_best_encoder(struct drm_connector *connector)
 
 static const struct drm_connector_helper_funcs connector_helper_funcs = {
 	.get_modes = shmob_drm_connector_get_modes,
-	.mode_valid = shmob_drm_connector_mode_valid,
 	.best_encoder = shmob_drm_connector_best_encoder,
 };
 
diff --git a/drivers/staging/imx-drm/imx-drm-core.c b/drivers/staging/imx-drm/imx-drm-core.c
index 4144a75e5f71..53ff005245c5 100644
--- a/drivers/staging/imx-drm/imx-drm-core.c
+++ b/drivers/staging/imx-drm/imx-drm-core.c
@@ -200,13 +200,6 @@ static const struct file_operations imx_drm_driver_fops = {
 	.llseek = noop_llseek,
 };
 
-int imx_drm_connector_mode_valid(struct drm_connector *connector,
-	struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-EXPORT_SYMBOL(imx_drm_connector_mode_valid);
-
 void imx_drm_connector_destroy(struct drm_connector *connector)
 {
 	drm_sysfs_connector_remove(connector);
diff --git a/drivers/staging/imx-drm/imx-drm.h b/drivers/staging/imx-drm/imx-drm.h
index a322bac55414..7453ae00c412 100644
--- a/drivers/staging/imx-drm/imx-drm.h
+++ b/drivers/staging/imx-drm/imx-drm.h
@@ -50,8 +50,6 @@ int imx_drm_encoder_get_mux_id(struct device_node *node,
 int imx_drm_encoder_parse_of(struct drm_device *drm,
 	struct drm_encoder *encoder, struct device_node *np);
 
-int imx_drm_connector_mode_valid(struct drm_connector *connector,
-	struct drm_display_mode *mode);
 void imx_drm_connector_destroy(struct drm_connector *connector);
 void imx_drm_encoder_destroy(struct drm_encoder *encoder);
 
diff --git a/drivers/staging/imx-drm/imx-hdmi.c b/drivers/staging/imx-drm/imx-hdmi.c
index d47dedd2cdb4..9fbe6d6a989d 100644
--- a/drivers/staging/imx-drm/imx-hdmi.c
+++ b/drivers/staging/imx-drm/imx-hdmi.c
@@ -1492,7 +1492,6 @@ static struct drm_connector_funcs imx_hdmi_connector_funcs = {
 
 static struct drm_connector_helper_funcs imx_hdmi_connector_helper_funcs = {
 	.get_modes = imx_hdmi_connector_get_modes,
-	.mode_valid = imx_drm_connector_mode_valid,
 	.best_encoder = imx_hdmi_connector_best_encoder,
 };
 
diff --git a/drivers/staging/imx-drm/imx-ldb.c b/drivers/staging/imx-drm/imx-ldb.c
index fe4c1ef4e7a5..7e3f019d7e72 100644
--- a/drivers/staging/imx-drm/imx-ldb.c
+++ b/drivers/staging/imx-drm/imx-ldb.c
@@ -317,7 +317,6 @@ static struct drm_connector_funcs imx_ldb_connector_funcs = {
 static struct drm_connector_helper_funcs imx_ldb_connector_helper_funcs = {
 	.get_modes = imx_ldb_connector_get_modes,
 	.best_encoder = imx_ldb_connector_best_encoder,
-	.mode_valid = imx_drm_connector_mode_valid,
 };
 
 static struct drm_encoder_funcs imx_ldb_encoder_funcs = {
diff --git a/drivers/staging/imx-drm/imx-tve.c b/drivers/staging/imx-drm/imx-tve.c
index 575533f4fd64..5a5a5287a86a 100644
--- a/drivers/staging/imx-drm/imx-tve.c
+++ b/drivers/staging/imx-drm/imx-tve.c
@@ -251,10 +251,6 @@ static int imx_tve_connector_mode_valid(struct drm_connector *connector,
 	unsigned long rate;
 	int ret;
 
-	ret = imx_drm_connector_mode_valid(connector, mode);
-	if (ret != MODE_OK)
-		return ret;
-
 	/* pixel clock with 2x oversampling */
 	rate = clk_round_rate(tve->clk, 2000UL * mode->clock) / 2000;
 	if (rate == mode->clock)
diff --git a/drivers/staging/imx-drm/parallel-display.c b/drivers/staging/imx-drm/parallel-display.c
index c60b6c645f42..52598e489a4b 100644
--- a/drivers/staging/imx-drm/parallel-display.c
+++ b/drivers/staging/imx-drm/parallel-display.c
@@ -148,7 +148,6 @@ static struct drm_connector_funcs imx_pd_connector_funcs = {
 static struct drm_connector_helper_funcs imx_pd_connector_helper_funcs = {
 	.get_modes = imx_pd_connector_get_modes,
 	.best_encoder = imx_pd_connector_best_encoder,
-	.mode_valid = imx_drm_connector_mode_valid,
 };
 
 static struct drm_encoder_funcs imx_pd_encoder_funcs = {
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 36a5febac2a6..7ffb59232e84 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -114,7 +114,7 @@ struct drm_encoder_helper_funcs {
 /**
  * drm_connector_helper_funcs - helper operations for connectors
  * @get_modes: get mode list for this connector
- * @mode_valid: is this mode valid on the given connector?
+ * @mode_valid (optional): is this mode valid on the given connector?
  *
  * The helper operations are called by the mid-layer CRTC helper.
  */
-- 
cgit v1.2.3-71-gd317


From eaaf8f0fc32468d4dedbb5b5f9c5bfb27744be4c Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 28 Aug 2013 15:19:23 +0200
Subject: drm/pci: fold in irq_by_busid support

This is a ums-only ioctl, and we've only ever supported ums (at least
in upstream) on pci devices. So no point in keeping that piece of
legacy logic abstracted within the drm bus driver.

To keep things work without CONFIG_PCI also add a dummy ioctl.

v2: Block the irq_by_busid ioctl for modeset drivers.

v3: Spelling/whitespace polish (Thierry)

Reviewed-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_irq.c | 27 ---------------------------
 drivers/gpu/drm/drm_pci.c | 39 +++++++++++++++++++++++++++++++++++++--
 include/drm/drmP.h        |  1 -
 3 files changed, 37 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index d4cc47690d65..e5920e7a4392 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -56,33 +56,6 @@
  */
 #define DRM_REDUNDANT_VBLIRQ_THRESH_NS 1000000
 
-/**
- * Get interrupt from bus id.
- *
- * \param inode device inode.
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument, pointing to a drm_irq_busid structure.
- * \return zero on success or a negative number on failure.
- *
- * Finds the PCI device with the specified bus id and gets its IRQ number.
- * This IOCTL is deprecated, and will now return EINVAL for any busid not equal
- * to that of the device that this DRM instance attached to.
- */
-int drm_irq_by_busid(struct drm_device *dev, void *data,
-		     struct drm_file *file_priv)
-{
-	struct drm_irq_busid *p = data;
-
-	if (!dev->driver->bus->irq_by_busid)
-		return -EINVAL;
-
-	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
-		return -EINVAL;
-
-	return dev->driver->bus->irq_by_busid(dev, p);
-}
-
 /*
  * Clear vblank timestamp buffer for a crtc.
  */
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 9c696a5ad74d..8d4221683af4 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -247,7 +247,6 @@ err:
 	return ret;
 }
 
-
 static int drm_pci_irq_by_busid(struct drm_device *dev, struct drm_irq_busid *p)
 {
 	if ((p->busnum >> 8) != drm_get_pci_domain(dev) ||
@@ -262,6 +261,37 @@ static int drm_pci_irq_by_busid(struct drm_device *dev, struct drm_irq_busid *p)
 	return 0;
 }
 
+/**
+ * Get interrupt from bus id.
+ *
+ * \param inode device inode.
+ * \param file_priv DRM file private.
+ * \param cmd command.
+ * \param arg user argument, pointing to a drm_irq_busid structure.
+ * \return zero on success or a negative number on failure.
+ *
+ * Finds the PCI device with the specified bus id and gets its IRQ number.
+ * This IOCTL is deprecated, and will now return EINVAL for any busid not equal
+ * to that of the device that this DRM instance attached to.
+ */
+int drm_irq_by_busid(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct drm_irq_busid *p = data;
+
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
+	/* UMS was only ever support on PCI devices. */
+	if (WARN_ON(!dev->pdev))
+		return -EINVAL;
+
+	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
+		return -EINVAL;
+
+	return drm_pci_irq_by_busid(dev, p);
+}
+
 static void drm_pci_agp_init(struct drm_device *dev)
 {
 	if (drm_core_check_feature(dev, DRIVER_USE_AGP)) {
@@ -292,7 +322,6 @@ static struct drm_bus drm_pci_bus = {
 	.get_name = drm_pci_get_name,
 	.set_busid = drm_pci_set_busid,
 	.set_unique = drm_pci_set_unique,
-	.irq_by_busid = drm_pci_irq_by_busid,
 };
 
 /**
@@ -453,6 +482,12 @@ int drm_pci_init(struct drm_driver *driver, struct pci_driver *pdriver)
 }
 
 void drm_pci_agp_destroy(struct drm_device *dev) {}
+
+int drm_irq_by_busid(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	return -EINVAL;
+}
 #endif
 
 EXPORT_SYMBOL(drm_pci_init);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index a7c2a862b4f4..6d7ca98d0143 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -737,7 +737,6 @@ struct drm_bus {
 	int (*set_busid)(struct drm_device *dev, struct drm_master *master);
 	int (*set_unique)(struct drm_device *dev, struct drm_master *master,
 			  struct drm_unique *unique);
-	int (*irq_by_busid)(struct drm_device *dev, struct drm_irq_busid *p);
 };
 
 /**
-- 
cgit v1.2.3-71-gd317


From ebfa4324930618e72645d2eb7db1c9773228a868 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 3 Nov 2013 20:27:09 +0100
Subject: drm: remove drm_dev_to_irq from drivers

Only used in some legacy pci drivers, and dereferencing the PCI irq is
actually shorter ...

Since this removes all users for drm_dev_to_irq from the tree except
in drm_irq.c, move the inline helper in there. It'll disappear soon,
too.

v2: Polish commit message (Thierry)

Reviewed-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_irq.c             | 5 +++++
 drivers/gpu/drm/mga/mga_state.c       | 2 +-
 drivers/gpu/drm/r128/r128_state.c     | 2 +-
 drivers/gpu/drm/radeon/radeon_state.c | 2 +-
 include/drm/drmP.h                    | 5 -----
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 06a53f8b618c..589e865832cd 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -233,6 +233,11 @@ static void drm_irq_vgaarb_nokms(void *cookie, bool state)
 	}
 }
 
+static inline int drm_dev_to_irq(struct drm_device *dev)
+{
+	return dev->driver->bus->get_irq(dev);
+}
+
 /**
  * Install IRQ handler.
  *
diff --git a/drivers/gpu/drm/mga/mga_state.c b/drivers/gpu/drm/mga/mga_state.c
index 314685b7f41f..3cb58df5237e 100644
--- a/drivers/gpu/drm/mga/mga_state.c
+++ b/drivers/gpu/drm/mga/mga_state.c
@@ -1020,7 +1020,7 @@ static int mga_getparam(struct drm_device *dev, void *data, struct drm_file *fil
 
 	switch (param->param) {
 	case MGA_PARAM_IRQ_NR:
-		value = drm_dev_to_irq(dev);
+		value = dev->pdev->irq;
 		break;
 	case MGA_PARAM_CARD_TYPE:
 		value = dev_priv->chipset;
diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c
index e806dacd452f..97064dd434c2 100644
--- a/drivers/gpu/drm/r128/r128_state.c
+++ b/drivers/gpu/drm/r128/r128_state.c
@@ -1594,7 +1594,7 @@ static int r128_getparam(struct drm_device *dev, void *data, struct drm_file *fi
 
 	switch (param->param) {
 	case R128_PARAM_IRQ_NR:
-		value = drm_dev_to_irq(dev);
+		value = dev->pdev->irq;
 		break;
 	default:
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 956ab7f14e16..b576549fc783 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -3054,7 +3054,7 @@ static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_fil
 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
 			value = 0;
 		else
-			value = drm_dev_to_irq(dev);
+			value = dev->pdev->irq;
 		break;
 	case RADEON_PARAM_GART_BASE:
 		value = dev_priv->gart_vm_start;
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 6d7ca98d0143..41839ea0c1ee 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1185,11 +1185,6 @@ static __inline__ int drm_core_check_feature(struct drm_device *dev,
 	return ((dev->driver->driver_features & feature) ? 1 : 0);
 }
 
-static inline int drm_dev_to_irq(struct drm_device *dev)
-{
-	return dev->driver->bus->get_irq(dev);
-}
-
 static inline void drm_device_set_unplugged(struct drm_device *dev)
 {
 	smp_wmb();
-- 
cgit v1.2.3-71-gd317


From 42b21049fc26513ca8e732f47559b1525b04a992 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 3 Nov 2013 20:30:25 +0100
Subject: drm: kill drm_bus->bus_type

Completely unused. Hooray, midlayer mistakes that didn't cause work to
undo!

v2: Rebase on top of the recent tegra changes which added a host1x drm
bus.

Reviewed-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_pci.c      | 1 -
 drivers/gpu/drm/drm_platform.c | 1 -
 drivers/gpu/drm/drm_usb.c      | 1 -
 drivers/gpu/drm/tegra/bus.c    | 1 -
 include/drm/drmP.h             | 6 ------
 5 files changed, 10 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 8d4221683af4..08c76af920d6 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -317,7 +317,6 @@ void drm_pci_agp_destroy(struct drm_device *dev)
 }
 
 static struct drm_bus drm_pci_bus = {
-	.bus_type = DRIVER_BUS_PCI,
 	.get_irq = drm_pci_get_irq,
 	.get_name = drm_pci_get_name,
 	.set_busid = drm_pci_set_busid,
diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c
index 319ff5385601..5cd8c695824f 100644
--- a/drivers/gpu/drm/drm_platform.c
+++ b/drivers/gpu/drm/drm_platform.c
@@ -123,7 +123,6 @@ err:
 }
 
 static struct drm_bus drm_platform_bus = {
-	.bus_type = DRIVER_BUS_PLATFORM,
 	.get_irq = drm_platform_get_irq,
 	.get_name = drm_platform_get_name,
 	.set_busid = drm_platform_set_busid,
diff --git a/drivers/gpu/drm/drm_usb.c b/drivers/gpu/drm/drm_usb.c
index c3406aad2944..ed60f887c805 100644
--- a/drivers/gpu/drm/drm_usb.c
+++ b/drivers/gpu/drm/drm_usb.c
@@ -53,7 +53,6 @@ static int drm_usb_set_busid(struct drm_device *dev,
 }
 
 static struct drm_bus drm_usb_bus = {
-	.bus_type = DRIVER_BUS_USB,
 	.get_irq = drm_usb_get_irq,
 	.get_name = drm_usb_get_name,
 	.set_busid = drm_usb_set_busid,
diff --git a/drivers/gpu/drm/tegra/bus.c b/drivers/gpu/drm/tegra/bus.c
index 71cef5c13dc8..6916fa51cfa4 100644
--- a/drivers/gpu/drm/tegra/bus.c
+++ b/drivers/gpu/drm/tegra/bus.c
@@ -37,7 +37,6 @@ static int drm_host1x_set_busid(struct drm_device *dev,
 }
 
 static struct drm_bus drm_host1x_bus = {
-	.bus_type = DRIVER_BUS_HOST1X,
 	.set_busid = drm_host1x_set_busid,
 };
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 41839ea0c1ee..9f1fb8d36b67 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -143,11 +143,6 @@ int drm_err(const char *func, const char *format, ...);
 #define DRIVER_PRIME       0x4000
 #define DRIVER_RENDER      0x8000
 
-#define DRIVER_BUS_PCI 0x1
-#define DRIVER_BUS_PLATFORM 0x2
-#define DRIVER_BUS_USB 0x3
-#define DRIVER_BUS_HOST1X 0x4
-
 /***********************************************************************/
 /** \name Begin the DRM... */
 /*@{*/
@@ -731,7 +726,6 @@ struct drm_master {
 #define DRM_SCANOUTPOS_ACCURATE     (1 << 2)
 
 struct drm_bus {
-	int bus_type;
 	int (*get_irq)(struct drm_device *dev);
 	const char *(*get_name)(struct drm_device *dev);
 	int (*set_busid)(struct drm_device *dev, struct drm_master *master);
-- 
cgit v1.2.3-71-gd317


From fa372a51cb5f93800f711473e5a36e0e0c9a8f00 Mon Sep 17 00:00:00 2001
From: Markus Mayer <markus.mayer@linaro.org>
Date: Tue, 8 Apr 2014 15:19:43 -0700
Subject: mmc: Delay the card_event callback into the mmc_rescan worker

This change removes the callback from atomic context which it doesn't
need to be in, and puts it in line with the debounced rescan.

This code is based on these e-mail threads with Christian Daudt:

  https://lkml.org/lkml/2013/8/19/539
  https://lkml.org/lkml/2014/3/19/79

Signed-off-by: Markus Mayer <markus.mayer@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/core.c      | 5 +++++
 drivers/mmc/core/slot-gpio.c | 4 +---
 include/linux/mmc/host.h     | 2 ++
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index acbc3f2aaaf9..f396d1bb4ac4 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2403,6 +2403,11 @@ void mmc_rescan(struct work_struct *work)
 		container_of(work, struct mmc_host, detect.work);
 	int i;
 
+	if (host->trigger_card_event && host->ops->card_event) {
+		host->ops->card_event(host);
+		host->trigger_card_event = false;
+	}
+
 	if (host->rescan_disable)
 		return;
 
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index f7650b899e3d..5f89cb83d5f0 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -32,9 +32,7 @@ static irqreturn_t mmc_gpio_cd_irqt(int irq, void *dev_id)
 	/* Schedule a card detection after a debounce timeout */
 	struct mmc_host *host = dev_id;
 
-	if (host->ops->card_event)
-		host->ops->card_event(host);
-
+	host->trigger_card_event = true;
 	mmc_detect_change(host, msecs_to_jiffies(200));
 
 	return IRQ_HANDLED;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 35354207e71f..0cf705c83998 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -319,6 +319,8 @@ struct mmc_host {
 	int			rescan_disable;	/* disable card detection */
 	int			rescan_entered;	/* used with nonremovable devices */
 
+	bool			trigger_card_event; /* card_event necessary */
+
 	struct mmc_card		*card;		/* device attached to this host */
 
 	wait_queue_head_t	wq;
-- 
cgit v1.2.3-71-gd317


From 297d40560bc8f474adbb43178e3118321fa702ea Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Fri, 4 Apr 2014 22:42:48 -0300
Subject: mmc: card.h: Use NULL instead of 0 for END_FIXUP

Fix the following sparse warnings:

drivers/mmc/card/block.c:2421:9: warning: Using plain integer as NULL pointer

drivers/mmc/core/quirks.c:69:9: warning: Using plain integer as NULL pointer

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 include/linux/mmc/card.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index b73027298b3a..aa7e57f60fb2 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -353,7 +353,7 @@ struct mmc_fixup {
 #define CID_OEMID_ANY ((unsigned short) -1)
 #define CID_NAME_ANY (NULL)
 
-#define END_FIXUP { 0 }
+#define END_FIXUP { NULL }
 
 #define _FIXUP_EXT(_name, _manfid, _oemid, _rev_start, _rev_end,	\
 		   _cis_vendor, _cis_device,				\
-- 
cgit v1.2.3-71-gd317


From 3de0b592394d17b2c41a261a6a493a521213f299 Mon Sep 17 00:00:00 2001
From: Venkata Duvvuru <VenkatKumar.Duvvuru@Emulex.Com>
Date: Mon, 21 Apr 2014 15:37:59 +0530
Subject: ethtool: Support for configurable RSS hash key

This ethtool patch primarily copies the ioctl command data structures
from/to the User space and invokes the driver hook.

Signed-off-by: Venkat Duvvuru <VenkatKumar.Duvvuru@Emulex.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h      |  13 +++
 include/uapi/linux/ethtool.h |  32 +++++++
 net/core/ethtool.c           | 221 ++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 252 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 0a114d05f68d..212f537fc686 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -154,13 +154,23 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  * @reset: Reset (part of) the device, as specified by a bitmask of
  *	flags from &enum ethtool_reset_flags.  Returns a negative
  *	error code or zero.
+ * @get_rxfh_key_size: Get the size of the RX flow hash key.
+ *	Returns zero if not supported for this specific device.
  * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table.
  *	Returns zero if not supported for this specific device.
  * @get_rxfh_indir: Get the contents of the RX flow hash indirection table.
  *	Will not be called if @get_rxfh_indir_size returns zero.
+ * @get_rxfh: Get the contents of the RX flow hash indirection table and hash
+ *	key.
+ *	Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size
+ *	returns zero.
  *	Returns a negative error code or zero.
  * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
  *	Will not be called if @get_rxfh_indir_size returns zero.
+ * @set_rxfh: Set the contents of the RX flow hash indirection table and
+ *	hash key.
+ *	Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size
+ *	returns zero.
  *	Returns a negative error code or zero.
  * @get_channels: Get number of channels.
  * @set_channels: Set number of channels.  Returns a negative error code or
@@ -232,7 +242,10 @@ struct ethtool_ops {
 	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
 	int	(*flash_device)(struct net_device *, struct ethtool_flash *);
 	int	(*reset)(struct net_device *, u32 *);
+	u32	(*get_rxfh_key_size)(struct net_device *);
 	u32	(*get_rxfh_indir_size)(struct net_device *);
+	int	(*get_rxfh)(struct net_device *, u32 *, u8 *);
+	int	(*set_rxfh)(struct net_device *, u32 *, u8 *);
 	int	(*get_rxfh_indir)(struct net_device *, u32 *);
 	int	(*set_rxfh_indir)(struct net_device *, const u32 *);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index fd161e91b6d7..d47d31d6fa0e 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -846,6 +846,35 @@ struct ethtool_rxfh_indir {
 	__u32	ring_index[0];
 };
 
+/**
+ * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key.
+ * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH
+ * @rss_context: RSS context identifier.
+ * @indir_size: On entry, the array size of the user buffer, which may be zero.
+ *		On return from %ETHTOOL_GRSSH, the array size of the hardware
+ *		indirection table.
+ * @key_size:	On entry, the array size of the user buffer in bytes,
+ *		which may be zero.
+ *		On return from %ETHTOOL_GRSSH, the size of the RSS hash key.
+ * @rsvd:	Reserved for future extensions.
+ * @rss_config: RX ring/queue index for each hash value i.e., indirection table
+ *		of size @indir_size followed by hash key of size @key_size.
+ *
+ * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the
+ * size should be returned.  For %ETHTOOL_SRSSH, a @indir_size of 0xDEADBEEF
+ * means that indir table setting is not requested and a @indir_size of zero
+ * means the indir table should be reset to default values.  This last feature
+ * is not supported by the original implementations.
+ */
+struct ethtool_rxfh {
+	__u32   cmd;
+	__u32	rss_context;
+	__u32   indir_size;
+	__u32   key_size;
+	__u32	rsvd[2];
+	__u32   rss_config[0];
+};
+
 /**
  * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter
  * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW
@@ -1118,6 +1147,9 @@ enum ethtool_sfeatures_retval_bits {
 #define ETHTOOL_GEEE		0x00000044 /* Get EEE settings */
 #define ETHTOOL_SEEE		0x00000045 /* Set EEE settings */
 
+#define ETHTOOL_GRSSH		0x00000046 /* Get RX flow hash configuration */
+#define ETHTOOL_SRSSH		0x00000047 /* Set RX flow hash configuration */
+
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
 #define SPARC_ETH_SSET		ETHTOOL_SSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 640ba0e5831c..1d72786ef866 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -557,6 +557,23 @@ err_out:
 	return ret;
 }
 
+static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
+					struct ethtool_rxnfc *rx_rings,
+					u32 size)
+{
+	int ret = 0, i;
+
+	if (copy_from_user(indir, useraddr, size * sizeof(indir[0])))
+		ret = -EFAULT;
+
+	/* Validate ring indices */
+	for (i = 0; i < size; i++) {
+		if (indir[i] >= rx_rings->data)
+			ret = -EINVAL;
+	}
+	return ret;
+}
+
 static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
 						     void __user *useraddr)
 {
@@ -613,6 +630,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 	u32 *indir;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	int ret;
+	u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]);
 
 	if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir ||
 	    !ops->get_rxnfc)
@@ -643,28 +661,196 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 		for (i = 0; i < dev_size; i++)
 			indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
 	} else {
-		if (copy_from_user(indir,
-				  useraddr +
-				  offsetof(struct ethtool_rxfh_indir,
-					   ring_index[0]),
-				  dev_size * sizeof(indir[0]))) {
+		ret = ethtool_copy_validate_indir(indir,
+						  useraddr + ringidx_offset,
+						  &rx_rings,
+						  dev_size);
+		if (ret)
+			goto out;
+	}
+
+	ret = ops->set_rxfh_indir(dev, indir);
+
+out:
+	kfree(indir);
+	return ret;
+}
+
+static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
+					       void __user *useraddr)
+{
+	int ret;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u32 user_indir_size = 0, user_key_size = 0;
+	u32 dev_indir_size = 0, dev_key_size = 0;
+	u32 total_size;
+	u32 indir_offset, indir_bytes;
+	u32 key_offset;
+	u32 *indir = NULL;
+	u8 *hkey = NULL;
+	u8 *rss_config;
+
+	if (!(dev->ethtool_ops->get_rxfh_indir_size ||
+	      dev->ethtool_ops->get_rxfh_key_size) ||
+	      !dev->ethtool_ops->get_rxfh)
+		return -EOPNOTSUPP;
+
+	if (ops->get_rxfh_indir_size)
+		dev_indir_size = ops->get_rxfh_indir_size(dev);
+
+	indir_offset = offsetof(struct ethtool_rxfh, indir_size);
+
+	if (copy_from_user(&user_indir_size,
+			   useraddr + indir_offset,
+			   sizeof(user_indir_size)))
+		return -EFAULT;
+
+	if (copy_to_user(useraddr + indir_offset,
+			 &dev_indir_size, sizeof(dev_indir_size)))
+		return -EFAULT;
+
+	if (ops->get_rxfh_key_size)
+		dev_key_size = ops->get_rxfh_key_size(dev);
+
+	if ((dev_key_size + dev_indir_size) == 0)
+		return -EOPNOTSUPP;
+
+	key_offset = offsetof(struct ethtool_rxfh, key_size);
+
+	if (copy_from_user(&user_key_size,
+			   useraddr + key_offset,
+			   sizeof(user_key_size)))
+		return -EFAULT;
+
+	if (copy_to_user(useraddr + key_offset,
+			 &dev_key_size, sizeof(dev_key_size)))
+		return -EFAULT;
+
+	/* If the user buffer size is 0, this is just a query for the
+	 * device table size and key size.  Otherwise, if the User size is
+	 * not equal to device table size or key size it's an error.
+	 */
+	if (!user_indir_size && !user_key_size)
+		return 0;
+
+	if ((user_indir_size && (user_indir_size != dev_indir_size)) ||
+	    (user_key_size && (user_key_size != dev_key_size)))
+		return -EINVAL;
+
+	indir_bytes = user_indir_size * sizeof(indir[0]);
+	total_size = indir_bytes + user_key_size;
+	rss_config = kzalloc(total_size, GFP_USER);
+	if (!rss_config)
+		return -ENOMEM;
+
+	if (user_indir_size)
+		indir = (u32 *)rss_config;
+
+	if (user_key_size)
+		hkey = rss_config + indir_bytes;
+
+	ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey);
+	if (!ret) {
+		if (copy_to_user(useraddr +
+				 offsetof(struct ethtool_rxfh, rss_config[0]),
+				 rss_config, total_size))
 			ret = -EFAULT;
+	}
+
+	kfree(rss_config);
+
+	return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
+					       void __user *useraddr)
+{
+	int ret;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct ethtool_rxnfc rx_rings;
+	u32 user_indir_size = 0, dev_indir_size = 0, i;
+	u32 user_key_size = 0, dev_key_size = 0;
+	u32 *indir = NULL, indir_bytes = 0;
+	u8 *hkey = NULL;
+	u8 *rss_config;
+	u32 indir_offset, key_offset;
+	u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+
+	if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) ||
+	    !ops->get_rxnfc || !ops->set_rxfh)
+		return -EOPNOTSUPP;
+
+	if (ops->get_rxfh_indir_size)
+		dev_indir_size = ops->get_rxfh_indir_size(dev);
+
+	indir_offset = offsetof(struct ethtool_rxfh, indir_size);
+	if (copy_from_user(&user_indir_size,
+			   useraddr + indir_offset,
+			   sizeof(user_indir_size)))
+		return -EFAULT;
+
+	if (ops->get_rxfh_key_size)
+		dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev);
+
+	if ((dev_key_size + dev_indir_size) == 0)
+		return -EOPNOTSUPP;
+
+	key_offset = offsetof(struct ethtool_rxfh, key_size);
+	if (copy_from_user(&user_key_size,
+			   useraddr + key_offset,
+			   sizeof(user_key_size)))
+		return -EFAULT;
+
+	/* If either indir or hash key is valid, proceed further.
+	 */
+	if ((user_indir_size && ((user_indir_size != 0xDEADBEEF) &&
+				 user_indir_size != dev_indir_size)) ||
+	    (user_key_size && (user_key_size != dev_key_size)))
+		return -EINVAL;
+
+	if (user_indir_size != 0xDEADBEEF)
+		indir_bytes = dev_indir_size * sizeof(indir[0]);
+
+	rss_config = kzalloc(indir_bytes + user_key_size, GFP_USER);
+	if (!rss_config)
+		return -ENOMEM;
+
+	rx_rings.cmd = ETHTOOL_GRXRINGS;
+	ret = ops->get_rxnfc(dev, &rx_rings, NULL);
+	if (ret)
+		goto out;
+
+	/* user_indir_size == 0 means reset the indir table to default.
+	 * user_indir_size == 0xDEADBEEF means indir setting is not requested.
+	 */
+	if (user_indir_size && user_indir_size != 0xDEADBEEF) {
+		indir = (u32 *)rss_config;
+		ret = ethtool_copy_validate_indir(indir,
+						  useraddr + rss_cfg_offset,
+						  &rx_rings,
+						  user_indir_size);
+		if (ret)
 			goto out;
-		}
+	} else if (user_indir_size == 0) {
+		indir = (u32 *)rss_config;
+		for (i = 0; i < dev_indir_size; i++)
+			indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+	}
 
-		/* Validate ring indices */
-		for (i = 0; i < dev_size; i++) {
-			if (indir[i] >= rx_rings.data) {
-				ret = -EINVAL;
-				goto out;
-			}
+	if (user_key_size) {
+		hkey = rss_config + indir_bytes;
+		if (copy_from_user(hkey,
+				   useraddr + rss_cfg_offset + indir_bytes,
+				   user_key_size)) {
+			ret = -EFAULT;
+			goto out;
 		}
 	}
 
-	ret = ops->set_rxfh_indir(dev, indir);
+	ret = ops->set_rxfh(dev, indir, hkey);
 
 out:
-	kfree(indir);
+	kfree(rss_config);
 	return ret;
 }
 
@@ -1491,6 +1677,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXCLSRULE:
 	case ETHTOOL_GRXCLSRLALL:
 	case ETHTOOL_GRXFHINDIR:
+	case ETHTOOL_GRSSH:
 	case ETHTOOL_GFEATURES:
 	case ETHTOOL_GCHANNELS:
 	case ETHTOOL_GET_TS_INFO:
@@ -1628,6 +1815,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SRXFHINDIR:
 		rc = ethtool_set_rxfh_indir(dev, useraddr);
 		break;
+	case ETHTOOL_GRSSH:
+		rc = ethtool_get_rxfh(dev, useraddr);
+		break;
+	case ETHTOOL_SRSSH:
+		rc = ethtool_set_rxfh(dev, useraddr);
+		break;
 	case ETHTOOL_GFEATURES:
 		rc = ethtool_get_features(dev, useraddr);
 		break;
-- 
cgit v1.2.3-71-gd317


From 4cd3675ebf74d7f559038ded6aa8088e4099a83d Mon Sep 17 00:00:00 2001
From: Chema Gonzalez <chema@google.com>
Date: Mon, 21 Apr 2014 09:21:24 -0700
Subject: filter: added BPF random opcode

Added a new ancillary load (bpf call in eBPF parlance) that produces
a 32-bit random number. We are implementing it as an ancillary load
(instead of an ISA opcode) because (a) it is simpler, (b) allows easy
JITing, and (c) seems more in line with generic ISAs that do not have
"get a random number" as a instruction, but as an OS call.

The main use for this ancillary load is to perform random packet sampling.

Signed-off-by: Chema Gonzalez <chema@google.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt | 13 +++++++++++++
 include/linux/filter.h              |  1 +
 include/uapi/linux/filter.h         |  3 ++-
 net/core/filter.c                   | 12 ++++++++++++
 tools/net/bpf_exp.l                 |  1 +
 tools/net/bpf_exp.y                 | 11 ++++++++++-
 6 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 81f940f4e884..82e1cb0b3da8 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -281,6 +281,7 @@ Possible BPF extensions are shown in the following table:
   cpu                                   raw_smp_processor_id()
   vlan_tci                              vlan_tx_tag_get(skb)
   vlan_pr                               vlan_tx_tag_present(skb)
+  rand                                  prandom_u32()
 
 These extensions can also be prefixed with '#'.
 Examples for low-level BPF:
@@ -308,6 +309,18 @@ Examples for low-level BPF:
   ret #-1
   drop: ret #0
 
+** icmp random packet sampling, 1 in 4
+  ldh [12]
+  jne #0x800, drop
+  ldb [23]
+  jneq #1, drop
+  # get a random uint32 number
+  ld rand
+  mod #4
+  jneq #1, drop
+  ret #-1
+  drop: ret #0
+
 ** SECCOMP filter example:
 
   ld [4]                  /* offsetof(struct seccomp_data, arch) */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 024fd03e5d18..759abf78dd61 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -223,6 +223,7 @@ enum {
 	BPF_S_ANC_VLAN_TAG,
 	BPF_S_ANC_VLAN_TAG_PRESENT,
 	BPF_S_ANC_PAY_OFFSET,
+	BPF_S_ANC_RANDOM,
 };
 
 #endif /* __LINUX_FILTER_H__ */
diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h
index 8eb9ccaa5b48..253b4d42cf2b 100644
--- a/include/uapi/linux/filter.h
+++ b/include/uapi/linux/filter.h
@@ -130,7 +130,8 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_VLAN_TAG	44
 #define SKF_AD_VLAN_TAG_PRESENT 48
 #define SKF_AD_PAY_OFFSET	52
-#define SKF_AD_MAX	56
+#define SKF_AD_RANDOM	56
+#define SKF_AD_MAX	60
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index cd58614660cf..78a636e60a0b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -643,6 +643,12 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
 	return raw_smp_processor_id();
 }
 
+/* note that this only generates 32-bit random numbers */
+static u64 __get_random_u32(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+{
+	return (u64)prandom_u32();
+}
+
 /* Register mappings for user programs. */
 #define A_REG		0
 #define X_REG		7
@@ -779,6 +785,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_NLATTR:
 	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
 	case SKF_AD_OFF + SKF_AD_CPU:
+	case SKF_AD_OFF + SKF_AD_RANDOM:
 		/* arg1 = ctx */
 		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
 		insn->a_reg = ARG1_REG;
@@ -812,6 +819,9 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		case SKF_AD_OFF + SKF_AD_CPU:
 			insn->imm = __get_raw_cpu_id - __bpf_call_base;
 			break;
+		case SKF_AD_OFF + SKF_AD_RANDOM:
+			insn->imm = __get_random_u32 - __bpf_call_base;
+			break;
 		}
 		break;
 
@@ -1362,6 +1372,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 			ANCILLARY(VLAN_TAG);
 			ANCILLARY(VLAN_TAG_PRESENT);
 			ANCILLARY(PAY_OFFSET);
+			ANCILLARY(RANDOM);
 			}
 
 			/* ancillary operation unknown or unsupported */
@@ -1746,6 +1757,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
 		[BPF_S_ANC_VLAN_TAG]	= BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_ANC_PAY_OFFSET]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_RANDOM]	= BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_LD_W_LEN]	= BPF_LD|BPF_W|BPF_LEN,
 		[BPF_S_LD_W_IND]	= BPF_LD|BPF_W|BPF_IND,
 		[BPF_S_LD_H_IND]	= BPF_LD|BPF_H|BPF_IND,
diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l
index bf7be77ddd62..833a96611da6 100644
--- a/tools/net/bpf_exp.l
+++ b/tools/net/bpf_exp.l
@@ -92,6 +92,7 @@ extern void yyerror(const char *str);
 "#"?("cpu")	{ return K_CPU; }
 "#"?("vlan_tci") { return K_VLANT; }
 "#"?("vlan_pr")	{ return K_VLANP; }
+"#"?("rand")	{ return K_RAND; }
 
 ":"		{ return ':'; }
 ","		{ return ','; }
diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y
index d15efc989ef5..e6306c51c26f 100644
--- a/tools/net/bpf_exp.y
+++ b/tools/net/bpf_exp.y
@@ -56,7 +56,7 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type);
 %token OP_LDXI
 
 %token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE
-%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF
+%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND
 
 %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%'
 
@@ -164,6 +164,9 @@ ldb
 	| OP_LDB K_POFF {
 		bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+	| OP_LDB K_RAND {
+		bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_RANDOM); }
 	;
 
 ldh
@@ -212,6 +215,9 @@ ldh
 	| OP_LDH K_POFF {
 		bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+	| OP_LDH K_RAND {
+		bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_RANDOM); }
 	;
 
 ldi
@@ -265,6 +271,9 @@ ld
 	| OP_LD K_POFF {
 		bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+	| OP_LD K_RAND {
+		bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_RANDOM); }
 	| OP_LD 'M' '[' number ']' {
 		bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); }
 	| OP_LD '[' 'x' '+' number ']' {
-- 
cgit v1.2.3-71-gd317


From 4f520900522fd596e336c07e9aafd5b7a9564235 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 22 Apr 2014 21:31:54 -0400
Subject: netlink: have netlink per-protocol bind function return an error
 code.

Have the netlink per-protocol optional bind function return an int error code
rather than void to signal a failure.

This will enable netlink protocols to perform extra checks including
capabilities and permissions verifications when updating memberships in
multicast groups.

In netlink_bind() and netlink_setsockopt() the call to the per-protocol bind
function was moved above the multicast group update to prevent any access to
the multicast socket groups before checking with the per-protocol bind
function.  This will enable the per-protocol bind function to be used to check
permissions which could be denied before making them available, and to avoid
the messy job of undoing the addition should the per-protocol bind function
fail.

The netfilter subsystem seems to be the only one currently using the
per-protocol bind function.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h   |  3 ++-
 net/netfilter/nfnetlink.c |  3 ++-
 net/netlink/af_netlink.c  | 68 +++++++++++++++++++++++++++++++++--------------
 net/netlink/af_netlink.h  |  6 +++--
 4 files changed, 56 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index aad8eeaf416d..5146ce066498 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -45,7 +45,8 @@ struct netlink_kernel_cfg {
 	unsigned int	flags;
 	void		(*input)(struct sk_buff *skb);
 	struct mutex	*cb_mutex;
-	void		(*bind)(int group);
+	int		(*bind)(int group);
+	void		(*unbind)(int group);
 	bool		(*compare)(struct net *net, struct sock *sk);
 };
 
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 0df800a454ec..6e42dcfad40a 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -400,7 +400,7 @@ static void nfnetlink_rcv(struct sk_buff *skb)
 }
 
 #ifdef CONFIG_MODULES
-static void nfnetlink_bind(int group)
+static int nfnetlink_bind(int group)
 {
 	const struct nfnetlink_subsystem *ss;
 	int type = nfnl_group2type[group];
@@ -410,6 +410,7 @@ static void nfnetlink_bind(int group)
 	rcu_read_unlock();
 	if (!ss)
 		request_module("nfnetlink-subsys-%d", type);
+	return 0;
 }
 #endif
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 894cda0206bb..7e8d229bc010 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1206,7 +1206,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
 	struct module *module = NULL;
 	struct mutex *cb_mutex;
 	struct netlink_sock *nlk;
-	void (*bind)(int group);
+	int (*bind)(int group);
+	void (*unbind)(int group);
 	int err = 0;
 
 	sock->state = SS_UNCONNECTED;
@@ -1232,6 +1233,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
 		err = -EPROTONOSUPPORT;
 	cb_mutex = nl_table[protocol].cb_mutex;
 	bind = nl_table[protocol].bind;
+	unbind = nl_table[protocol].unbind;
 	netlink_unlock_table();
 
 	if (err < 0)
@@ -1248,6 +1250,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
 	nlk = nlk_sk(sock->sk);
 	nlk->module = module;
 	nlk->netlink_bind = bind;
+	nlk->netlink_unbind = unbind;
 out:
 	return err;
 
@@ -1301,6 +1304,7 @@ static int netlink_release(struct socket *sock)
 			kfree_rcu(old, rcu);
 			nl_table[sk->sk_protocol].module = NULL;
 			nl_table[sk->sk_protocol].bind = NULL;
+			nl_table[sk->sk_protocol].unbind = NULL;
 			nl_table[sk->sk_protocol].flags = 0;
 			nl_table[sk->sk_protocol].registered = 0;
 		}
@@ -1411,6 +1415,19 @@ static int netlink_realloc_groups(struct sock *sk)
 	return err;
 }
 
+static void netlink_unbind(int group, long unsigned int groups,
+			   struct netlink_sock *nlk)
+{
+	int undo;
+
+	if (!nlk->netlink_unbind)
+		return;
+
+	for (undo = 0; undo < group; undo++)
+		if (test_bit(group, &groups))
+			nlk->netlink_unbind(undo);
+}
+
 static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			int addr_len)
 {
@@ -1419,6 +1436,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 	int err;
+	long unsigned int groups = nladdr->nl_groups;
 
 	if (addr_len < sizeof(struct sockaddr_nl))
 		return -EINVAL;
@@ -1427,7 +1445,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 		return -EINVAL;
 
 	/* Only superuser is allowed to listen multicasts */
-	if (nladdr->nl_groups) {
+	if (groups) {
 		if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
 			return -EPERM;
 		err = netlink_realloc_groups(sk);
@@ -1435,37 +1453,45 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			return err;
 	}
 
-	if (nlk->portid) {
+	if (nlk->portid)
 		if (nladdr->nl_pid != nlk->portid)
 			return -EINVAL;
-	} else {
+
+	if (nlk->netlink_bind && groups) {
+		int group;
+
+		for (group = 0; group < nlk->ngroups; group++) {
+			if (!test_bit(group, &groups))
+				continue;
+			err = nlk->netlink_bind(group);
+			if (!err)
+				continue;
+			netlink_unbind(group, groups, nlk);
+			return err;
+		}
+	}
+
+	if (!nlk->portid) {
 		err = nladdr->nl_pid ?
 			netlink_insert(sk, net, nladdr->nl_pid) :
 			netlink_autobind(sock);
-		if (err)
+		if (err) {
+			netlink_unbind(nlk->ngroups - 1, groups, nlk);
 			return err;
+		}
 	}
 
-	if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
+	if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
 		return 0;
 
 	netlink_table_grab();
 	netlink_update_subscriptions(sk, nlk->subscriptions +
-					 hweight32(nladdr->nl_groups) -
+					 hweight32(groups) -
 					 hweight32(nlk->groups[0]));
-	nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
+	nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups;
 	netlink_update_listeners(sk);
 	netlink_table_ungrab();
 
-	if (nlk->netlink_bind && nlk->groups[0]) {
-		int i;
-
-		for (i = 0; i < nlk->ngroups; i++) {
-			if (test_bit(i, nlk->groups))
-				nlk->netlink_bind(i);
-		}
-	}
-
 	return 0;
 }
 
@@ -2103,14 +2129,16 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 			return err;
 		if (!val || val - 1 >= nlk->ngroups)
 			return -EINVAL;
+		if (nlk->netlink_bind) {
+			err = nlk->netlink_bind(val);
+			if (err)
+				return err;
+		}
 		netlink_table_grab();
 		netlink_update_socket_mc(nlk, val,
 					 optname == NETLINK_ADD_MEMBERSHIP);
 		netlink_table_ungrab();
 
-		if (nlk->netlink_bind)
-			nlk->netlink_bind(val);
-
 		err = 0;
 		break;
 	}
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index ed13a790b00e..0b59d441f5b6 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -38,7 +38,8 @@ struct netlink_sock {
 	struct mutex		*cb_mutex;
 	struct mutex		cb_def_mutex;
 	void			(*netlink_rcv)(struct sk_buff *skb);
-	void			(*netlink_bind)(int group);
+	int			(*netlink_bind)(int group);
+	void			(*netlink_unbind)(int group);
 	struct module		*module;
 #ifdef CONFIG_NETLINK_MMAP
 	struct mutex		pg_vec_lock;
@@ -74,7 +75,8 @@ struct netlink_table {
 	unsigned int		groups;
 	struct mutex		*cb_mutex;
 	struct module		*module;
-	void			(*bind)(int group);
+	int			(*bind)(int group);
+	void			(*unbind)(int group);
 	bool			(*compare)(struct net *net, struct sock *sock);
 	int			registered;
 };
-- 
cgit v1.2.3-71-gd317


From 3a101b8de0d39403b2c7e5c23fd0b005668acf48 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 22 Apr 2014 21:31:56 -0400
Subject: audit: add netlink audit protocol bind to check capabilities on
 multicast join

Register a netlink per-protocol bind fuction for audit to check userspace
process capabilities before allowing a multicast group connection.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/capability.h     |  7 ++++++-
 kernel/audit.c                      | 10 ++++++++++
 security/selinux/include/classmap.h |  2 +-
 3 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h
index 154dd6d3c8fe..12c37a197d24 100644
--- a/include/uapi/linux/capability.h
+++ b/include/uapi/linux/capability.h
@@ -347,7 +347,12 @@ struct vfs_cap_data {
 
 #define CAP_BLOCK_SUSPEND    36
 
-#define CAP_LAST_CAP         CAP_BLOCK_SUSPEND
+/* Allow reading the audit log via multicast netlink socket */
+
+#define CAP_AUDIT_READ		37
+
+
+#define CAP_LAST_CAP         CAP_AUDIT_READ
 
 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
 
diff --git a/kernel/audit.c b/kernel/audit.c
index 7c2893602d06..223cb746f141 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1076,10 +1076,20 @@ static void audit_receive(struct sk_buff  *skb)
 	mutex_unlock(&audit_cmd_mutex);
 }
 
+/* Run custom bind function on netlink socket group connect or bind requests. */
+static int audit_bind(int group)
+{
+	if (!capable(CAP_AUDIT_READ))
+		return -EPERM;
+
+	return 0;
+}
+
 static int __net_init audit_net_init(struct net *net)
 {
 	struct netlink_kernel_cfg cfg = {
 		.input	= audit_receive,
+		.bind	= audit_bind,
 	};
 
 	struct audit_net *aunet = net_generic(net, audit_net_id);
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 14d04e63b1f0..be491a74c1ed 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -147,7 +147,7 @@ struct security_class_mapping secclass_map[] = {
 	{ "peer", { "recv", NULL } },
 	{ "capability2",
 	  { "mac_override", "mac_admin", "syslog", "wake_alarm", "block_suspend",
-	    NULL } },
+	    "audit_read", NULL } },
 	{ "kernel_service", { "use_as_override", "create_files_as", NULL } },
 	{ "tun_socket",
 	  { COMMON_SOCK_PERMS, "attach_queue", NULL } },
-- 
cgit v1.2.3-71-gd317


From 451f921639fea4600dfb9ab2889332bdcc7b48d3 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 22 Apr 2014 21:31:57 -0400
Subject: audit: add netlink multicast group for log read

Add a netlink multicast socket with one group to kaudit for "best-effort"
delivery to read-only userspace clients such as systemd, in addition to the
existing bidirectional unicast auditd userspace client.

Currently, auditd is intended to use the CAP_AUDIT_CONTROL and CAP_AUDIT_WRITE
capabilities, but actually uses CAP_NET_ADMIN.  The CAP_AUDIT_READ capability
is added for use by read-only AUDIT_NLGRP_READLOG netlink multicast group
clients to the kaudit subsystem.

This will safely give access to services such as systemd to consume audit logs
while ensuring write access remains restricted for integrity.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/audit.h |  8 ++++++++
 kernel/audit.c             | 51 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 55 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 11917f747cb4..dfa4c860ccef 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -373,6 +373,14 @@ enum {
  */
 #define AUDIT_MESSAGE_TEXT_MAX	8560
 
+/* Multicast Netlink socket groups (default up to 32) */
+enum audit_nlgrps {
+	AUDIT_NLGRP_NONE,	/* Group 0 not used */
+	AUDIT_NLGRP_READLOG,	/* "best effort" read only socket */
+	__AUDIT_NLGRP_MAX
+};
+#define AUDIT_NLGRP_MAX                (__AUDIT_NLGRP_MAX - 1)
+
 struct audit_status {
 	__u32		mask;		/* Bit mask for valid entries */
 	__u32		enabled;	/* 1 = enabled, 0 = disabled */
diff --git a/kernel/audit.c b/kernel/audit.c
index 223cb746f141..d272cc11dff4 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -423,6 +423,35 @@ static void kauditd_send_skb(struct sk_buff *skb)
 		consume_skb(skb);
 }
 
+/*
+ * kauditd_send_multicast_skb - send the skb to multicast userspace listeners
+ *
+ * This function doesn't consume an skb as might be expected since it has to
+ * copy it anyways.
+ */
+static void kauditd_send_multicast_skb(struct sk_buff *skb)
+{
+	struct sk_buff		*copy;
+	struct audit_net	*aunet = net_generic(&init_net, audit_net_id);
+	struct sock		*sock = aunet->nlsk;
+
+	/*
+	 * The seemingly wasteful skb_copy() rather than bumping the refcount
+	 * using skb_get() is necessary because non-standard mods are made to
+	 * the skb by the original kaudit unicast socket send routine.  The
+	 * existing auditd daemon assumes this breakage.  Fixing this would
+	 * require co-ordinating a change in the established protocol between
+	 * the kaudit kernel subsystem and the auditd userspace code.  There is
+	 * no reason for new multicast clients to continue with this
+	 * non-compliance.
+	 */
+	copy = skb_copy(skb, GFP_KERNEL);
+	if (!copy)
+		return;
+
+	nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
+}
+
 /*
  * flush_hold_queue - empty the hold queue if auditd appears
  *
@@ -1090,6 +1119,8 @@ static int __net_init audit_net_init(struct net *net)
 	struct netlink_kernel_cfg cfg = {
 		.input	= audit_receive,
 		.bind	= audit_bind,
+		.flags	= NL_CFG_F_NONROOT_RECV,
+		.groups	= AUDIT_NLGRP_MAX,
 	};
 
 	struct audit_net *aunet = net_generic(net, audit_net_id);
@@ -1911,10 +1942,10 @@ out:
  * audit_log_end - end one audit record
  * @ab: the audit_buffer
  *
- * The netlink_* functions cannot be called inside an irq context, so
- * the audit buffer is placed on a queue and a tasklet is scheduled to
- * remove them from the queue outside the irq context.  May be called in
- * any context.
+ * netlink_unicast() cannot be called inside an irq context because it blocks
+ * (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed
+ * on a queue and a tasklet is scheduled to remove them from the queue outside
+ * the irq context.  May be called in any context.
  */
 void audit_log_end(struct audit_buffer *ab)
 {
@@ -1924,6 +1955,18 @@ void audit_log_end(struct audit_buffer *ab)
 		audit_log_lost("rate limit exceeded");
 	} else {
 		struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
+
+		kauditd_send_multicast_skb(ab->skb);
+
+		/*
+		 * The original kaudit unicast socket sends up messages with
+		 * nlmsg_len set to the payload length rather than the entire
+		 * message length.  This breaks the standard set by netlink.
+		 * The existing auditd daemon assumes this breakage.  Fixing
+		 * this would require co-ordinating a change in the established
+		 * protocol between the kaudit kernel subsystem and the auditd
+		 * userspace code.
+		 */
 		nlh->nlmsg_len = ab->skb->len - NLMSG_HDRLEN;
 
 		if (audit_pid) {
-- 
cgit v1.2.3-71-gd317


From 2e71029e2c32ecd59a2e8f351517bfbbad42ac11 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Tue, 22 Apr 2014 21:48:30 +0900
Subject: xfrm: Remove useless xfrm_audit struct.

Commit f1370cc4 "xfrm: Remove useless secid field from xfrm_audit." changed
"struct xfrm_audit" to have either
{ audit_get_loginuid(current) / audit_get_sessionid(current) } or
{ INVALID_UID / -1 } pair.

This means that we can represent "struct xfrm_audit" as "bool".
This patch replaces "struct xfrm_audit" argument with "bool".

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h     | 42 +++++++++++++++++++-----------------------
 net/key/af_key.c       | 30 +++++++-----------------------
 net/xfrm/xfrm_policy.c | 40 ++++++++++++++--------------------------
 net/xfrm/xfrm_state.c  | 34 ++++++++++++----------------------
 net/xfrm/xfrm_user.c   | 38 ++++++++------------------------------
 5 files changed, 60 insertions(+), 124 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 882889eb156b..721e9c3b11bd 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -691,12 +691,6 @@ struct xfrm_spi_skb_cb {
 
 #define XFRM_SPI_SKB_CB(__skb) ((struct xfrm_spi_skb_cb *)&((__skb)->cb[0]))
 
-/* Audit Information */
-struct xfrm_audit {
-	kuid_t	loginuid;
-	unsigned int sessionid;
-};
-
 #ifdef CONFIG_AUDITSYSCALL
 static inline struct audit_buffer *xfrm_audit_start(const char *op)
 {
@@ -712,22 +706,24 @@ static inline struct audit_buffer *xfrm_audit_start(const char *op)
 	return audit_buf;
 }
 
-static inline void xfrm_audit_helper_usrinfo(kuid_t auid, unsigned int ses,
+static inline void xfrm_audit_helper_usrinfo(bool task_valid,
 					     struct audit_buffer *audit_buf)
 {
-	audit_log_format(audit_buf, " auid=%u ses=%u",
-			 from_kuid(&init_user_ns, auid), ses);
+	const unsigned int auid = from_kuid(&init_user_ns, task_valid ?
+					    audit_get_loginuid(current) :
+					    INVALID_UID);
+	const unsigned int ses = task_valid ? audit_get_sessionid(current) :
+		(unsigned int) -1;
+
+	audit_log_format(audit_buf, " auid=%u ses=%u", auid, ses);
 	audit_log_task_context(audit_buf);
 }
 
-void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, kuid_t auid,
-			   unsigned int ses);
-void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, kuid_t auid,
-			      unsigned int ses);
-void xfrm_audit_state_add(struct xfrm_state *x, int result, kuid_t auid,
-			  unsigned int ses);
-void xfrm_audit_state_delete(struct xfrm_state *x, int result, kuid_t auid,
-			     unsigned int ses);
+void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid);
+void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
+			      bool task_valid);
+void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid);
+void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid);
 void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
 				      struct sk_buff *skb);
 void xfrm_audit_state_replay(struct xfrm_state *x, struct sk_buff *skb,
@@ -740,22 +736,22 @@ void xfrm_audit_state_icvfail(struct xfrm_state *x, struct sk_buff *skb,
 #else
 
 static inline void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
-				  kuid_t auid, unsigned int ses)
+					 bool task_valid)
 {
 }
 
 static inline void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
-				  kuid_t auid, unsigned int ses)
+					    bool task_valid)
 {
 }
 
 static inline void xfrm_audit_state_add(struct xfrm_state *x, int result,
-				 kuid_t auid, unsigned int ses)
+					bool task_valid)
 {
 }
 
 static inline void xfrm_audit_state_delete(struct xfrm_state *x, int result,
-				    kuid_t auid, unsigned int ses)
+					   bool task_valid)
 {
 }
 
@@ -1499,7 +1495,7 @@ struct xfrmk_spdinfo {
 
 struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
 int xfrm_state_delete(struct xfrm_state *x);
-int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info);
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid);
 void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
 u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
@@ -1594,7 +1590,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark,
 					  int *err);
 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir,
 				     u32 id, int delete, int *err);
-int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info);
+int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
 u32 xfrm_get_acqseq(void);
 int verify_spi_info(u8 proto, u32 min, u32 max);
 int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index d66ff72adefb..b47f8e542aae 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1476,9 +1476,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg
 	else
 		err = xfrm_state_update(x);
 
-	xfrm_audit_state_add(x, err ? 0 : 1,
-			     audit_get_loginuid(current),
-			     audit_get_sessionid(current));
+	xfrm_audit_state_add(x, err ? 0 : 1, true);
 
 	if (err < 0) {
 		x->km.state = XFRM_STATE_DEAD;
@@ -1532,9 +1530,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_
 	c.event = XFRM_MSG_DELSA;
 	km_state_notify(x, &c);
 out:
-	xfrm_audit_state_delete(x, err ? 0 : 1,
-				audit_get_loginuid(current),
-				audit_get_sessionid(current));
+	xfrm_audit_state_delete(x, err ? 0 : 1, true);
 	xfrm_state_put(x);
 
 	return err;
@@ -1726,16 +1722,13 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m
 	struct net *net = sock_net(sk);
 	unsigned int proto;
 	struct km_event c;
-	struct xfrm_audit audit_info;
 	int err, err2;
 
 	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
 	if (proto == 0)
 		return -EINVAL;
 
-	audit_info.loginuid = audit_get_loginuid(current);
-	audit_info.sessionid = audit_get_sessionid(current);
-	err = xfrm_state_flush(net, proto, &audit_info);
+	err = xfrm_state_flush(net, proto, true);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
 		if (err == -ESRCH) /* empty table - go quietly */
@@ -2287,9 +2280,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
 	err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
 				 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
 
-	xfrm_audit_policy_add(xp, err ? 0 : 1,
-			      audit_get_loginuid(current),
-			      audit_get_sessionid(current));
+	xfrm_audit_policy_add(xp, err ? 0 : 1, true);
 
 	if (err)
 		goto out;
@@ -2371,9 +2362,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 	if (xp == NULL)
 		return -ENOENT;
 
-	xfrm_audit_policy_delete(xp, err ? 0 : 1,
-				 audit_get_loginuid(current),
-				 audit_get_sessionid(current));
+	xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
 
 	if (err)
 		goto out;
@@ -2621,9 +2610,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
 		return -ENOENT;
 
 	if (delete) {
-		xfrm_audit_policy_delete(xp, err ? 0 : 1,
-				audit_get_loginuid(current),
-				audit_get_sessionid(current));
+		xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
 
 		if (err)
 			goto out;
@@ -2732,12 +2719,9 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
 {
 	struct net *net = sock_net(sk);
 	struct km_event c;
-	struct xfrm_audit audit_info;
 	int err, err2;
 
-	audit_info.loginuid = audit_get_loginuid(current);
-	audit_info.sessionid = audit_get_sessionid(current);
-	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
+	err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true);
 	err2 = unicast_flush_resp(sk, hdr);
 	if (err || err2) {
 		if (err == -ESRCH) /* empty table - old silent behavior */
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index bd001b7062c0..375267d15c8f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -769,7 +769,7 @@ EXPORT_SYMBOL(xfrm_policy_byid);
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 static inline int
-xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
+xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
 {
 	int dir, err = 0;
 
@@ -783,9 +783,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 				continue;
 			err = security_xfrm_policy_delete(pol->security);
 			if (err) {
-				xfrm_audit_policy_delete(pol, 0,
-							 audit_info->loginuid,
-							 audit_info->sessionid);
+				xfrm_audit_policy_delete(pol, 0, task_valid);
 				return err;
 			}
 		}
@@ -799,8 +797,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 								pol->security);
 				if (err) {
 					xfrm_audit_policy_delete(pol, 0,
-							audit_info->loginuid,
-							audit_info->sessionid);
+								 task_valid);
 					return err;
 				}
 			}
@@ -810,19 +807,19 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 }
 #else
 static inline int
-xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
+xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
 {
 	return 0;
 }
 #endif
 
-int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
+int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 {
 	int dir, err = 0, cnt = 0;
 
 	write_lock_bh(&net->xfrm.xfrm_policy_lock);
 
-	err = xfrm_policy_flush_secctx_check(net, type, audit_info);
+	err = xfrm_policy_flush_secctx_check(net, type, task_valid);
 	if (err)
 		goto out;
 
@@ -839,8 +836,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 			write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 			cnt++;
 
-			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
-						 audit_info->sessionid);
+			xfrm_audit_policy_delete(pol, 1, task_valid);
 
 			xfrm_policy_kill(pol);
 
@@ -859,9 +855,7 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
 				cnt++;
 
-				xfrm_audit_policy_delete(pol, 1,
-							 audit_info->loginuid,
-							 audit_info->sessionid);
+				xfrm_audit_policy_delete(pol, 1, task_valid);
 				xfrm_policy_kill(pol);
 
 				write_lock_bh(&net->xfrm.xfrm_policy_lock);
@@ -2858,19 +2852,14 @@ out_byidx:
 
 static void xfrm_policy_fini(struct net *net)
 {
-	struct xfrm_audit audit_info;
 	unsigned int sz;
 	int dir;
 
 	flush_work(&net->xfrm.policy_hash_work);
 #ifdef CONFIG_XFRM_SUB_POLICY
-	audit_info.loginuid = INVALID_UID;
-	audit_info.sessionid = (unsigned int)-1;
-	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info);
+	xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
 #endif
-	audit_info.loginuid = INVALID_UID;
-	audit_info.sessionid = (unsigned int)-1;
-	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
+	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
 
 	WARN_ON(!list_empty(&net->xfrm.policy_all));
 
@@ -2985,15 +2974,14 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
 	}
 }
 
-void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
-			   kuid_t auid, unsigned int sessionid)
+void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
 {
 	struct audit_buffer *audit_buf;
 
 	audit_buf = xfrm_audit_start("SPD-add");
 	if (audit_buf == NULL)
 		return;
-	xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf);
+	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
 	audit_log_format(audit_buf, " res=%u", result);
 	xfrm_audit_common_policyinfo(xp, audit_buf);
 	audit_log_end(audit_buf);
@@ -3001,14 +2989,14 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
 
 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
-			      kuid_t auid, unsigned int sessionid)
+			      bool task_valid)
 {
 	struct audit_buffer *audit_buf;
 
 	audit_buf = xfrm_audit_start("SPD-delete");
 	if (audit_buf == NULL)
 		return;
-	xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf);
+	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
 	audit_log_format(audit_buf, " res=%u", result);
 	xfrm_audit_common_policyinfo(xp, audit_buf);
 	audit_log_end(audit_buf);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d91312b5ceb0..0ab54134bb40 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -463,9 +463,7 @@ expired:
 	if (!err)
 		km_state_expired(x, 1, 0);
 
-	xfrm_audit_state_delete(x, err ? 0 : 1,
-				audit_get_loginuid(current),
-				audit_get_sessionid(current));
+	xfrm_audit_state_delete(x, err ? 0 : 1, true);
 
 out:
 	spin_unlock(&x->lock);
@@ -562,7 +560,7 @@ EXPORT_SYMBOL(xfrm_state_delete);
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 static inline int
-xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
+xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
 {
 	int i, err = 0;
 
@@ -572,9 +570,7 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi
 		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 			if (xfrm_id_proto_match(x->id.proto, proto) &&
 			   (err = security_xfrm_state_delete(x)) != 0) {
-				xfrm_audit_state_delete(x, 0,
-							audit_info->loginuid,
-							audit_info->sessionid);
+				xfrm_audit_state_delete(x, 0, task_valid);
 				return err;
 			}
 		}
@@ -584,18 +580,18 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi
 }
 #else
 static inline int
-xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
+xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
 {
 	return 0;
 }
 #endif
 
-int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info)
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
 {
 	int i, err = 0, cnt = 0;
 
 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
-	err = xfrm_state_flush_secctx_check(net, proto, audit_info);
+	err = xfrm_state_flush_secctx_check(net, proto, task_valid);
 	if (err)
 		goto out;
 
@@ -611,8 +607,7 @@ restart:
 
 				err = xfrm_state_delete(x);
 				xfrm_audit_state_delete(x, err ? 0 : 1,
-							audit_info->loginuid,
-							audit_info->sessionid);
+							task_valid);
 				xfrm_state_put(x);
 				if (!err)
 					cnt++;
@@ -2126,13 +2121,10 @@ out_bydst:
 
 void xfrm_state_fini(struct net *net)
 {
-	struct xfrm_audit audit_info;
 	unsigned int sz;
 
 	flush_work(&net->xfrm.state_hash_work);
-	audit_info.loginuid = INVALID_UID;
-	audit_info.sessionid = (unsigned int)-1;
-	xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info);
+	xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
 	flush_work(&net->xfrm.state_gc_work);
 
 	WARN_ON(!list_empty(&net->xfrm.state_all));
@@ -2195,30 +2187,28 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
 	}
 }
 
-void xfrm_audit_state_add(struct xfrm_state *x, int result,
-			  kuid_t auid, unsigned int sessionid)
+void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid)
 {
 	struct audit_buffer *audit_buf;
 
 	audit_buf = xfrm_audit_start("SAD-add");
 	if (audit_buf == NULL)
 		return;
-	xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf);
+	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
 	xfrm_audit_helper_sainfo(x, audit_buf);
 	audit_log_format(audit_buf, " res=%u", result);
 	audit_log_end(audit_buf);
 }
 EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
 
-void xfrm_audit_state_delete(struct xfrm_state *x, int result,
-			     kuid_t auid, unsigned int sessionid)
+void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid)
 {
 	struct audit_buffer *audit_buf;
 
 	audit_buf = xfrm_audit_start("SAD-delete");
 	if (audit_buf == NULL)
 		return;
-	xfrm_audit_helper_usrinfo(auid, sessionid, audit_buf);
+	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
 	xfrm_audit_helper_sainfo(x, audit_buf);
 	audit_log_format(audit_buf, " res=%u", result);
 	audit_log_end(audit_buf);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d6409d927b82..3d4b4c464091 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -597,8 +597,6 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct xfrm_state *x;
 	int err;
 	struct km_event c;
-	kuid_t loginuid = audit_get_loginuid(current);
-	unsigned int sessionid = audit_get_sessionid(current);
 
 	err = verify_newsa_info(p, attrs);
 	if (err)
@@ -614,7 +612,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	else
 		err = xfrm_state_update(x);
 
-	xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid);
+	xfrm_audit_state_add(x, err ? 0 : 1, true);
 
 	if (err < 0) {
 		x->km.state = XFRM_STATE_DEAD;
@@ -674,8 +672,6 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int err = -ESRCH;
 	struct km_event c;
 	struct xfrm_usersa_id *p = nlmsg_data(nlh);
-	kuid_t loginuid = audit_get_loginuid(current);
-	unsigned int sessionid = audit_get_sessionid(current);
 
 	x = xfrm_user_state_lookup(net, p, attrs, &err);
 	if (x == NULL)
@@ -700,7 +696,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	km_state_notify(x, &c);
 
 out:
-	xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid);
+	xfrm_audit_state_delete(x, err ? 0 : 1, true);
 	xfrm_state_put(x);
 	return err;
 }
@@ -1410,8 +1406,6 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct km_event c;
 	int err;
 	int excl;
-	kuid_t loginuid = audit_get_loginuid(current);
-	unsigned int sessionid = audit_get_sessionid(current);
 
 	err = verify_newpolicy_info(p);
 	if (err)
@@ -1430,7 +1424,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	 * a type XFRM_MSG_UPDPOLICY - JHS */
 	excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
 	err = xfrm_policy_insert(p->dir, xp, excl);
-	xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid);
+	xfrm_audit_policy_add(xp, err ? 0 : 1, true);
 
 	if (err) {
 		security_xfrm_policy_free(xp->security);
@@ -1667,10 +1661,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 					    NETLINK_CB(skb).portid);
 		}
 	} else {
-		kuid_t loginuid = audit_get_loginuid(current);
-		unsigned int sessionid = audit_get_sessionid(current);
-
-		xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid);
+		xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
 
 		if (err != 0)
 			goto out;
@@ -1695,12 +1686,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net *net = sock_net(skb->sk);
 	struct km_event c;
 	struct xfrm_usersa_flush *p = nlmsg_data(nlh);
-	struct xfrm_audit audit_info;
 	int err;
 
-	audit_info.loginuid = audit_get_loginuid(current);
-	audit_info.sessionid = audit_get_sessionid(current);
-	err = xfrm_state_flush(net, p->proto, &audit_info);
+	err = xfrm_state_flush(net, p->proto, true);
 	if (err) {
 		if (err == -ESRCH) /* empty table */
 			return 0;
@@ -1884,15 +1872,12 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct km_event c;
 	u8 type = XFRM_POLICY_TYPE_MAIN;
 	int err;
-	struct xfrm_audit audit_info;
 
 	err = copy_from_user_policy_type(&type, attrs);
 	if (err)
 		return err;
 
-	audit_info.loginuid = audit_get_loginuid(current);
-	audit_info.sessionid = audit_get_sessionid(current);
-	err = xfrm_policy_flush(net, type, &audit_info);
+	err = xfrm_policy_flush(net, type, true);
 	if (err) {
 		if (err == -ESRCH) /* empty table */
 			return 0;
@@ -1958,12 +1943,8 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	err = 0;
 	if (up->hard) {
-		kuid_t loginuid = audit_get_loginuid(current);
-		unsigned int sessionid = audit_get_sessionid(current);
-
 		xfrm_policy_delete(xp, p->dir);
-		xfrm_audit_policy_delete(xp, 1, loginuid, sessionid);
-
+		xfrm_audit_policy_delete(xp, 1, true);
 	} else {
 		// reset the timers here?
 		WARN(1, "Dont know what to do with soft policy expire\n");
@@ -1999,11 +1980,8 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	km_state_expired(x, ue->hard, nlh->nlmsg_pid);
 
 	if (ue->hard) {
-		kuid_t loginuid = audit_get_loginuid(current);
-		unsigned int sessionid = audit_get_sessionid(current);
-
 		__xfrm_state_delete(x);
-		xfrm_audit_state_delete(x, 1, loginuid, sessionid);
+		xfrm_audit_state_delete(x, 1, true);
 	}
 	err = 0;
 out:
-- 
cgit v1.2.3-71-gd317


From fc8fd40eb29a936cc689d0008863d39a67741c67 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 3 Nov 2013 20:46:34 +0100
Subject: drm: Rip out totally bogus vga_switcheroo->can_switch locking

So I just wanted to add a new field to struct drm_device and
accidentally stumbled over something. According to comments
dev->open_count is protected by dev->count_lock, but that's totally
not the case. It's protected by drm_global_mutex.

Unfortunately the vga switcheroo callbacks took this comment at face
value. The problem is that we can't just take the drm_global_mutex
because:
- It would lead to a locking inversion with the driver load/unload
  paths.
- It wouldn't actually protect anything, for that we'd need to wrap
  the entire vga switcheroo code in the drm_global_mutex. And I'm not
  sure whether that would actually solve anything.

What we probably want is a try_to_grab_switcheroo reference kind of
thing which is used in the driver's ->open callback. Then we could
move all that ->can_switch madness into the vga switcheroo core where
it really belongs.

But since that would amount to real work take the easy way out and
just add a comment. It's definitely not going to make anything worse
since doing switcheroo state changes while restarting X just isn't
recommended. Even though the delayed switching code does exactly that.

v2:
- Simplify the ->can_switch implementations more (Thierry)
- Fix comment about the dev->open_count locking (Thierry)

Cc: Thierry Reding <treding@nvidia.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> (v1)
Reviewed-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c        | 11 ++++++-----
 drivers/gpu/drm/nouveau/nouveau_vga.c  | 11 ++++++-----
 drivers/gpu/drm/radeon/radeon_device.c | 11 ++++++-----
 include/drm/drmP.h                     |  2 +-
 4 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 96177eec0a0e..283ff06001bc 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1277,12 +1277,13 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_
 static bool i915_switcheroo_can_switch(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
-	bool can_switch;
 
-	spin_lock(&dev->count_lock);
-	can_switch = (dev->open_count == 0);
-	spin_unlock(&dev->count_lock);
-	return can_switch;
+	/*
+	 * FIXME: open_count is protected by drm_global_mutex but that would lead to
+	 * locking inversion with the driver load path. And the access here is
+	 * completely racy anyway. So don't bother with locking for now.
+	 */
+	return dev->open_count == 0;
 }
 
 static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c
index fb84da3cb50d..4f4c3fec6916 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c
@@ -64,12 +64,13 @@ static bool
 nouveau_switcheroo_can_switch(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
-	bool can_switch;
 
-	spin_lock(&dev->count_lock);
-	can_switch = (dev->open_count == 0);
-	spin_unlock(&dev->count_lock);
-	return can_switch;
+	/*
+	 * FIXME: open_count is protected by drm_global_mutex but that would lead to
+	 * locking inversion with the driver load path. And the access here is
+	 * completely racy anyway. So don't bother with locking for now.
+	 */
+	return dev->open_count == 0;
 }
 
 static const struct vga_switcheroo_client_ops
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 511fe26198e4..9aa1afd1786e 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1125,12 +1125,13 @@ static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
 static bool radeon_switcheroo_can_switch(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
-	bool can_switch;
 
-	spin_lock(&dev->count_lock);
-	can_switch = (dev->open_count == 0);
-	spin_unlock(&dev->count_lock);
-	return can_switch;
+	/*
+	 * FIXME: open_count is protected by drm_global_mutex but that would lead to
+	 * locking inversion with the driver load path. And the access here is
+	 * completely racy anyway. So don't bother with locking for now.
+	 */
+	return dev->open_count == 0;
 }
 
 static const struct vga_switcheroo_client_ops radeon_switcheroo_ops = {
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 9f1fb8d36b67..a20d882ca265 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1076,7 +1076,7 @@ struct drm_device {
 
 	/** \name Usage Counters */
 	/*@{ */
-	int open_count;			/**< Outstanding files open */
+	int open_count;			/**< Outstanding files open, protected by drm_global_mutex. */
 	int buf_use;			/**< Buffers in use -- cannot alloc */
 	atomic_t buf_alloc;		/**< Buffer allocation in progress */
 	/*@} */
-- 
cgit v1.2.3-71-gd317


From 2177a2182f3f375f64d9938dd884895c3872c380 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 16 Dec 2013 11:21:06 +0100
Subject: drm: rename dev->count_lock to dev->buf_lock

Since really that's all it protects - legacy horror stories in
drm_bufs.c. Since I don't want to waste any more time on this I didn't
bother to actually look at what it protects in there, but it's at
least contained now.

v2: Move the spurious hunk to the right patch (Thierry).

Cc: Thierry Reding <thierry.reding@gmail.com>
Reviewed-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_bufs.c | 32 ++++++++++++++++----------------
 drivers/gpu/drm/drm_stub.c |  2 +-
 include/drm/drmP.h         |  2 +-
 3 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index edec31fe3fed..ef7f0199a0f1 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -656,13 +656,13 @@ int drm_addbufs_agp(struct drm_device * dev, struct drm_buf_desc * request)
 		DRM_DEBUG("zone invalid\n");
 		return -EINVAL;
 	}
-	spin_lock(&dev->count_lock);
+	spin_lock(&dev->buf_lock);
 	if (dev->buf_use) {
-		spin_unlock(&dev->count_lock);
+		spin_unlock(&dev->buf_lock);
 		return -EBUSY;
 	}
 	atomic_inc(&dev->buf_alloc);
-	spin_unlock(&dev->count_lock);
+	spin_unlock(&dev->buf_lock);
 
 	mutex_lock(&dev->struct_mutex);
 	entry = &dma->bufs[order];
@@ -805,13 +805,13 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request)
 	page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0;
 	total = PAGE_SIZE << page_order;
 
-	spin_lock(&dev->count_lock);
+	spin_lock(&dev->buf_lock);
 	if (dev->buf_use) {
-		spin_unlock(&dev->count_lock);
+		spin_unlock(&dev->buf_lock);
 		return -EBUSY;
 	}
 	atomic_inc(&dev->buf_alloc);
-	spin_unlock(&dev->count_lock);
+	spin_unlock(&dev->buf_lock);
 
 	mutex_lock(&dev->struct_mutex);
 	entry = &dma->bufs[order];
@@ -1015,13 +1015,13 @@ static int drm_addbufs_sg(struct drm_device * dev, struct drm_buf_desc * request
 	if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER)
 		return -EINVAL;
 
-	spin_lock(&dev->count_lock);
+	spin_lock(&dev->buf_lock);
 	if (dev->buf_use) {
-		spin_unlock(&dev->count_lock);
+		spin_unlock(&dev->buf_lock);
 		return -EBUSY;
 	}
 	atomic_inc(&dev->buf_alloc);
-	spin_unlock(&dev->count_lock);
+	spin_unlock(&dev->buf_lock);
 
 	mutex_lock(&dev->struct_mutex);
 	entry = &dma->bufs[order];
@@ -1175,7 +1175,7 @@ int drm_addbufs(struct drm_device *dev, void *data,
  * \param arg pointer to a drm_buf_info structure.
  * \return zero on success or a negative number on failure.
  *
- * Increments drm_device::buf_use while holding the drm_device::count_lock
+ * Increments drm_device::buf_use while holding the drm_device::buf_lock
  * lock, preventing of allocating more buffers after this call. Information
  * about each requested buffer is then copied into user space.
  */
@@ -1196,13 +1196,13 @@ int drm_infobufs(struct drm_device *dev, void *data,
 	if (!dma)
 		return -EINVAL;
 
-	spin_lock(&dev->count_lock);
+	spin_lock(&dev->buf_lock);
 	if (atomic_read(&dev->buf_alloc)) {
-		spin_unlock(&dev->count_lock);
+		spin_unlock(&dev->buf_lock);
 		return -EBUSY;
 	}
 	++dev->buf_use;		/* Can't allocate more after this call */
-	spin_unlock(&dev->count_lock);
+	spin_unlock(&dev->buf_lock);
 
 	for (i = 0, count = 0; i < DRM_MAX_ORDER + 1; i++) {
 		if (dma->bufs[i].buf_count)
@@ -1381,13 +1381,13 @@ int drm_mapbufs(struct drm_device *dev, void *data,
 	if (!dma)
 		return -EINVAL;
 
-	spin_lock(&dev->count_lock);
+	spin_lock(&dev->buf_lock);
 	if (atomic_read(&dev->buf_alloc)) {
-		spin_unlock(&dev->count_lock);
+		spin_unlock(&dev->buf_lock);
 		return -EBUSY;
 	}
 	dev->buf_use++;		/* Can't allocate more after this call */
-	spin_unlock(&dev->count_lock);
+	spin_unlock(&dev->buf_lock);
 
 	if (request->count >= dma->buf_count) {
 		if ((dev->agp && (dma->flags & _DRM_DMA_USE_AGP))
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index 4c24c3ac1efa..5394b201c3d0 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -569,7 +569,7 @@ struct drm_device *drm_dev_alloc(struct drm_driver *driver,
 	INIT_LIST_HEAD(&dev->maplist);
 	INIT_LIST_HEAD(&dev->vblank_event_list);
 
-	spin_lock_init(&dev->count_lock);
+	spin_lock_init(&dev->buf_lock);
 	spin_lock_init(&dev->event_lock);
 	mutex_init(&dev->struct_mutex);
 	mutex_init(&dev->ctxlist_mutex);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index a20d882ca265..85682d959c7e 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1069,7 +1069,6 @@ struct drm_device {
 
 	/** \name Locks */
 	/*@{ */
-	spinlock_t count_lock;		/**< For inuse, drm_device::open_count, drm_device::buf_use */
 	struct mutex struct_mutex;	/**< For others */
 	struct mutex master_mutex;      /**< For drm_minor::master and drm_file::is_master */
 	/*@} */
@@ -1077,6 +1076,7 @@ struct drm_device {
 	/** \name Usage Counters */
 	/*@{ */
 	int open_count;			/**< Outstanding files open, protected by drm_global_mutex. */
+	spinlock_t buf_lock;		/**< For drm_device::buf_use and a few other things. */
 	int buf_use;			/**< Buffers in use -- cannot alloc */
 	atomic_t buf_alloc;		/**< Buffer allocation in progress */
 	/*@} */
-- 
cgit v1.2.3-71-gd317


From 7c1a38e391745cca72627979e5e02924bed5b43d Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 16 Dec 2013 11:21:15 +0100
Subject: drm/irq: track the irq installed in drm_irq_install in dev->irq

To get rid of the dev->bus->get_irq callback we need to pass in the
desired irq explicitly into drm_irq_install. To avoid having to do the
same for drm_irq_unistall just track it internally. That leaves
drivers with less room to botch things up.

v2: Add the hunk lost in an earlier patch to this one (Thierry).

v3: Fix up the totally fumbled logic in drm_irq_install and use the
local variable consistently. Spotted by both Thierry and Laurent.
Shame on me for failing to properly test the rebase version of this
patch ...

Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_irq.c | 18 +++++++++++-------
 include/drm/drmP.h        |  2 ++
 2 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 589e865832cd..7cf407bbfed5 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -249,14 +249,16 @@ static inline int drm_dev_to_irq(struct drm_device *dev)
  */
 int drm_irq_install(struct drm_device *dev)
 {
-	int ret;
+	int ret, irq;
 	unsigned long sh_flags = 0;
 	char *irqname;
 
+	irq = drm_dev_to_irq(dev);
+
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 		return -EINVAL;
 
-	if (drm_dev_to_irq(dev) == 0)
+	if (irq == 0)
 		return -EINVAL;
 
 	/* Driver must have been initialized */
@@ -267,7 +269,7 @@ int drm_irq_install(struct drm_device *dev)
 		return -EBUSY;
 	dev->irq_enabled = true;
 
-	DRM_DEBUG("irq=%d\n", drm_dev_to_irq(dev));
+	DRM_DEBUG("irq=%d\n", irq);
 
 	/* Before installing handler */
 	if (dev->driver->irq_preinstall)
@@ -282,7 +284,7 @@ int drm_irq_install(struct drm_device *dev)
 	else
 		irqname = dev->driver->name;
 
-	ret = request_irq(drm_dev_to_irq(dev), dev->driver->irq_handler,
+	ret = request_irq(irq, dev->driver->irq_handler,
 			  sh_flags, irqname, dev);
 
 	if (ret < 0) {
@@ -301,7 +303,9 @@ int drm_irq_install(struct drm_device *dev)
 		dev->irq_enabled = false;
 		if (!drm_core_check_feature(dev, DRIVER_MODESET))
 			vga_client_register(dev->pdev, NULL, NULL, NULL);
-		free_irq(drm_dev_to_irq(dev), dev);
+		free_irq(irq, dev);
+	} else {
+		dev->irq = irq;
 	}
 
 	return ret;
@@ -344,7 +348,7 @@ int drm_irq_uninstall(struct drm_device *dev)
 	if (!irq_enabled)
 		return -EINVAL;
 
-	DRM_DEBUG("irq=%d\n", drm_dev_to_irq(dev));
+	DRM_DEBUG("irq=%d\n", dev->irq);
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		vga_client_register(dev->pdev, NULL, NULL, NULL);
@@ -352,7 +356,7 @@ int drm_irq_uninstall(struct drm_device *dev)
 	if (dev->driver->irq_uninstall)
 		dev->driver->irq_uninstall(dev);
 
-	free_irq(drm_dev_to_irq(dev), dev);
+	free_irq(dev->irq, dev);
 
 	return 0;
 }
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 85682d959c7e..8e8c392d6fa8 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1107,6 +1107,8 @@ struct drm_device {
 	/** \name Context support */
 	/*@{ */
 	bool irq_enabled;		/**< True if irq handler is enabled */
+	int irq;
+
 	__volatile__ long context_flag;	/**< Context swapping flag */
 	int last_context;		/**< Last current context */
 	/*@} */
-- 
cgit v1.2.3-71-gd317


From bb0f1b5c1695b4399cfd2359c114ae63edbb3ad8 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 3 Nov 2013 21:09:27 +0100
Subject: drm: pass the irq explicitly to drm_irq_install

Unfortunately this requires a drm-wide change, and I didn't see a sane
way around that. Luckily it's fairly simple, we just need to inline
the respective get_irq implementation from either drm_pci.c or
drm_platform.c.

With that we can now also remove drm_dev_to_irq from drm_irq.c.

Reviewed-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 Documentation/DocBook/drm.tmpl           | 10 +---------
 drivers/gpu/drm/armada/armada_drv.c      |  2 +-
 drivers/gpu/drm/drm_irq.c                | 13 +++----------
 drivers/gpu/drm/gma500/psb_drv.c         |  2 +-
 drivers/gpu/drm/i915/i915_dma.c          |  2 +-
 drivers/gpu/drm/i915/i915_drv.c          |  4 ++--
 drivers/gpu/drm/i915/i915_gem.c          |  2 +-
 drivers/gpu/drm/msm/msm_drv.c            |  2 +-
 drivers/gpu/drm/qxl/qxl_irq.c            |  2 +-
 drivers/gpu/drm/radeon/radeon_irq_kms.c  |  2 +-
 drivers/gpu/drm/shmobile/shmob_drm_drv.c |  2 +-
 drivers/gpu/drm/tilcdc/tilcdc_drv.c      |  2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c      |  2 +-
 include/drm/drmP.h                       |  2 +-
 14 files changed, 17 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 677a02553ec0..83dd0b043c28 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -341,14 +341,6 @@ char *date;</synopsis>
         </para>
         <sect4>
           <title>Managed IRQ Registration</title>
-          <para>
-            Both the <function>drm_irq_install</function> and
-	    <function>drm_irq_uninstall</function> functions get the device IRQ by
-	    calling <function>drm_dev_to_irq</function>. This inline function will
-	    call a bus-specific operation to retrieve the IRQ number. For platform
-	    devices, <function>platform_get_irq</function>(..., 0) is used to
-	    retrieve the IRQ number.
-          </para>
           <para>
             <function>drm_irq_install</function> starts by calling the
             <methodname>irq_preinstall</methodname> driver operation. The operation
@@ -356,7 +348,7 @@ char *date;</synopsis>
             clearing all pending interrupt flags or disabling the interrupt.
           </para>
           <para>
-            The IRQ will then be requested by a call to
+            The passed-in IRQ will then be requested by a call to
             <function>request_irq</function>. If the DRIVER_IRQ_SHARED driver
             feature flag is set, a shared (IRQF_SHARED) IRQ handler will be
             requested.
diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c
index 32982da82694..567cfbde0883 100644
--- a/drivers/gpu/drm/armada/armada_drv.c
+++ b/drivers/gpu/drm/armada/armada_drv.c
@@ -173,7 +173,7 @@ static int armada_drm_load(struct drm_device *dev, unsigned long flags)
 	if (ret)
 		goto err_kms;
 
-	ret = drm_irq_install(dev);
+	ret = drm_irq_install(dev, platform_get_irq(dev->platformdev, 0));
 	if (ret)
 		goto err_kms;
 
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index cbf6f259bfe4..de38bc9b6581 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -233,11 +233,6 @@ static void drm_irq_vgaarb_nokms(void *cookie, bool state)
 	}
 }
 
-static inline int drm_dev_to_irq(struct drm_device *dev)
-{
-	return dev->driver->bus->get_irq(dev);
-}
-
 /**
  * Install IRQ handler.
  *
@@ -247,14 +242,12 @@ static inline int drm_dev_to_irq(struct drm_device *dev)
  * \c irq_preinstall() and \c irq_postinstall() functions
  * before and after the installation.
  */
-int drm_irq_install(struct drm_device *dev)
+int drm_irq_install(struct drm_device *dev, int irq)
 {
-	int ret, irq;
+	int ret;
 	unsigned long sh_flags = 0;
 	char *irqname;
 
-	irq = drm_dev_to_irq(dev);
-
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 		return -EINVAL;
 
@@ -399,7 +392,7 @@ int drm_control(struct drm_device *dev, void *data,
 		    ctl->irq != irq)
 			return -EINVAL;
 		mutex_lock(&dev->struct_mutex);
-		ret = drm_irq_install(dev);
+		ret = drm_irq_install(dev, irq);
 		mutex_unlock(&dev->struct_mutex);
 
 		return ret;
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index b686e56646eb..0a3101a3db19 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -354,7 +354,7 @@ static int psb_driver_load(struct drm_device *dev, unsigned long flags)
 	PSB_WVDC32(0xFFFFFFFF, PSB_INT_MASK_R);
 	spin_unlock_irqrestore(&dev_priv->irqmask_lock, irqflags);
 
-	drm_irq_install(dev);
+	drm_irq_install(dev, dev->pdev->irq);
 
 	dev->vblank_disable_allowed = true;
 	dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 283ff06001bc..42de2808e53d 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1327,7 +1327,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 	intel_power_domains_init_hw(dev_priv);
 
-	ret = drm_irq_install(dev);
+	ret = drm_irq_install(dev, dev->pdev->irq);
 	if (ret)
 		goto cleanup_gem_stolen;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 87ce105910f2..6124b491a19e 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -574,7 +574,7 @@ static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings)
 		mutex_unlock(&dev->struct_mutex);
 
 		/* We need working interrupts for modeset enabling ... */
-		drm_irq_install(dev);
+		drm_irq_install(dev, dev->pdev->irq);
 
 		intel_modeset_init_hw(dev);
 
@@ -752,7 +752,7 @@ int i915_reset(struct drm_device *dev)
 		 * being false when they shouldn't be able to.
 		 */
 		drm_irq_uninstall(dev);
-		drm_irq_install(dev);
+		drm_irq_install(dev, dev->pdev->irq);
 
 		/* rps/rc6 re-init is necessary to restore state lost after the
 		 * reset and the re-install of drm irq. Skip for ironlake per
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d2cc19a7023f..a94938f08698 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4523,7 +4523,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 
 	BUG_ON(!list_empty(&dev_priv->gtt.base.active_list));
 
-	ret = drm_irq_install(dev);
+	ret = drm_irq_install(dev, dev->pdev->irq);
 	if (ret)
 		goto cleanup_ringbuffer;
 	mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index f9de156b9e65..50ec1bed5820 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -288,7 +288,7 @@ static int msm_load(struct drm_device *dev, unsigned long flags)
 	}
 
 	pm_runtime_get_sync(dev->dev);
-	ret = drm_irq_install(dev);
+	ret = drm_irq_install(dev, platform_get_irq(dev->platformdev, 0));
 	pm_runtime_put_sync(dev->dev);
 	if (ret < 0) {
 		dev_err(dev->dev, "failed to install IRQ handler\n");
diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c
index 28f84b4fce32..34d6a85e9023 100644
--- a/drivers/gpu/drm/qxl/qxl_irq.c
+++ b/drivers/gpu/drm/qxl/qxl_irq.c
@@ -87,7 +87,7 @@ int qxl_irq_init(struct qxl_device *qdev)
 	atomic_set(&qdev->irq_received_cursor, 0);
 	atomic_set(&qdev->irq_received_io_cmd, 0);
 	qdev->irq_received_error = 0;
-	ret = drm_irq_install(qdev->ddev);
+	ret = drm_irq_install(qdev->ddev, qdev->ddev->pdev->irq);
 	qdev->ram_header->int_mask = QXL_INTERRUPT_MASK;
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Failed installing irq: %d\n", ret);
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 089c9ffb0aa9..16807afab362 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -287,7 +287,7 @@ int radeon_irq_kms_init(struct radeon_device *rdev)
 	INIT_WORK(&rdev->reset_work, radeon_irq_reset_work_func);
 
 	rdev->irq.installed = true;
-	r = drm_irq_install(rdev->ddev);
+	r = drm_irq_install(rdev->ddev, rdev->ddev->pdev->irq);
 	if (r) {
 		rdev->irq.installed = false;
 		flush_work(&rdev->hotplug_work);
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
index c839c9c89efb..82c84c7fd4f6 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_drv.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
@@ -185,7 +185,7 @@ static int shmob_drm_load(struct drm_device *dev, unsigned long flags)
 		goto done;
 	}
 
-	ret = drm_irq_install(dev);
+	ret = drm_irq_install(dev, platform_get_irq(dev->platformdev, 0));
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to install IRQ handler\n");
 		goto done;
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index 171a8203892c..b20b69488dc9 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -268,7 +268,7 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags)
 	}
 
 	pm_runtime_get_sync(dev->dev);
-	ret = drm_irq_install(dev);
+	ret = drm_irq_install(dev, platform_get_irq(dev->platformdev, 0));
 	pm_runtime_put_sync(dev->dev);
 	if (ret < 0) {
 		dev_err(dev->dev, "failed to install IRQ handler\n");
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 4a223bbea3b3..6bdd15eea7e8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -806,7 +806,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	}
 
 	if (dev_priv->capabilities & SVGA_CAP_IRQMASK) {
-		ret = drm_irq_install(dev);
+		ret = drm_irq_install(dev, dev->pdev->irq);
 		if (ret != 0) {
 			DRM_ERROR("Failed installing irq: %d\n", ret);
 			goto out_no_irq;
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 8e8c392d6fa8..7a7cfe88b9bf 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1353,7 +1353,7 @@ extern void drm_core_reclaim_buffers(struct drm_device *dev,
 				/* IRQ support (drm_irq.h) */
 extern int drm_control(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv);
-extern int drm_irq_install(struct drm_device *dev);
+extern int drm_irq_install(struct drm_device *dev, int irq);
 extern int drm_irq_uninstall(struct drm_device *dev);
 
 extern int drm_vblank_init(struct drm_device *dev, int num_crtcs);
-- 
cgit v1.2.3-71-gd317


From b2a21aa25a39837d06eb24a7f0fef1733f9843eb Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 3 Nov 2013 21:13:09 +0100
Subject: drm: remove bus->get_irq implementations

Now that they're all unused we can get rid of them, including the
dummy version in drm_usb.c.

Reviewed-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_pci.c      | 6 ------
 drivers/gpu/drm/drm_platform.c | 6 ------
 drivers/gpu/drm/drm_usb.c      | 6 ------
 include/drm/drmP.h             | 1 -
 4 files changed, 19 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 08c76af920d6..eff29e311f70 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -137,11 +137,6 @@ static int drm_get_pci_domain(struct drm_device *dev)
 	return pci_domain_nr(dev->pdev->bus);
 }
 
-static int drm_pci_get_irq(struct drm_device *dev)
-{
-	return dev->pdev->irq;
-}
-
 static const char *drm_pci_get_name(struct drm_device *dev)
 {
 	struct pci_driver *pdriver = dev->driver->kdriver.pci;
@@ -317,7 +312,6 @@ void drm_pci_agp_destroy(struct drm_device *dev)
 }
 
 static struct drm_bus drm_pci_bus = {
-	.get_irq = drm_pci_get_irq,
 	.get_name = drm_pci_get_name,
 	.set_busid = drm_pci_set_busid,
 	.set_unique = drm_pci_set_unique,
diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c
index 5cd8c695824f..5b918212b1c3 100644
--- a/drivers/gpu/drm/drm_platform.c
+++ b/drivers/gpu/drm/drm_platform.c
@@ -68,11 +68,6 @@ err_free:
 	return ret;
 }
 
-static int drm_platform_get_irq(struct drm_device *dev)
-{
-	return platform_get_irq(dev->platformdev, 0);
-}
-
 static const char *drm_platform_get_name(struct drm_device *dev)
 {
 	return dev->platformdev->name;
@@ -123,7 +118,6 @@ err:
 }
 
 static struct drm_bus drm_platform_bus = {
-	.get_irq = drm_platform_get_irq,
 	.get_name = drm_platform_get_name,
 	.set_busid = drm_platform_set_busid,
 };
diff --git a/drivers/gpu/drm/drm_usb.c b/drivers/gpu/drm/drm_usb.c
index ed60f887c805..abdc265c9cc8 100644
--- a/drivers/gpu/drm/drm_usb.c
+++ b/drivers/gpu/drm/drm_usb.c
@@ -36,11 +36,6 @@ err_free:
 }
 EXPORT_SYMBOL(drm_get_usb_dev);
 
-static int drm_usb_get_irq(struct drm_device *dev)
-{
-	return 0;
-}
-
 static const char *drm_usb_get_name(struct drm_device *dev)
 {
 	return "USB";
@@ -53,7 +48,6 @@ static int drm_usb_set_busid(struct drm_device *dev,
 }
 
 static struct drm_bus drm_usb_bus = {
-	.get_irq = drm_usb_get_irq,
 	.get_name = drm_usb_get_name,
 	.set_busid = drm_usb_set_busid,
 };
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 7a7cfe88b9bf..f4d0eaa3da46 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -726,7 +726,6 @@ struct drm_master {
 #define DRM_SCANOUTPOS_ACCURATE     (1 << 2)
 
 struct drm_bus {
-	int (*get_irq)(struct drm_device *dev);
 	const char *(*get_name)(struct drm_device *dev);
 	int (*set_busid)(struct drm_device *dev, struct drm_master *master);
 	int (*set_unique)(struct drm_device *dev, struct drm_master *master,
-- 
cgit v1.2.3-71-gd317


From 53bf2a2bca9b56e23fa8862159c75868672d7f1e Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 3 Nov 2013 21:47:18 +0100
Subject: drm: inline drm_pci_set_unique

This is only used for drm versions 1.0, and kms drivers have never
been there. So we can appropriately restrict this to legacy and hence
pci devices and inline everything.

v2: Make the dummy function actually return something, caught by Wu
Fengguang's 0-day tester.

v3: Fix spelling in comment (Thierry)

Reviewed-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_ioctl.c | 10 +++++++---
 drivers/gpu/drm/drm_pci.c   | 14 ++++++++++----
 include/drm/drmP.h          |  5 +++--
 3 files changed, 20 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 93a42040bedb..53560ef53f99 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -93,7 +93,8 @@ drm_unset_busid(struct drm_device *dev,
  * Copies the bus id from userspace into drm_device::unique, and verifies that
  * it matches the device this DRM is attached to (EINVAL otherwise).  Deprecated
  * in interface version 1.1 and will return EBUSY when setversion has requested
- * version 1.1 or greater.
+ * version 1.1 or greater. Also note that KMS is all version 1.1 and later and
+ * UMS was only ever supported on pci devices.
  */
 int drm_setunique(struct drm_device *dev, void *data,
 		  struct drm_file *file_priv)
@@ -108,10 +109,13 @@ int drm_setunique(struct drm_device *dev, void *data,
 	if (!u->unique_len || u->unique_len > 1024)
 		return -EINVAL;
 
-	if (!dev->driver->bus->set_unique)
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return 0;
+
+	if (WARN_ON(!dev->pdev))
 		return -EINVAL;
 
-	ret = dev->driver->bus->set_unique(dev, master, u);
+	ret = drm_pci_set_unique(dev, master, u);
 	if (ret)
 		goto err;
 
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index eff29e311f70..c550b349f202 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -185,9 +185,9 @@ err:
 	return ret;
 }
 
-static int drm_pci_set_unique(struct drm_device *dev,
-			      struct drm_master *master,
-			      struct drm_unique *u)
+int drm_pci_set_unique(struct drm_device *dev,
+		       struct drm_master *master,
+		       struct drm_unique *u)
 {
 	int domain, bus, slot, func, ret;
 	const char *bus_name;
@@ -314,7 +314,6 @@ void drm_pci_agp_destroy(struct drm_device *dev)
 static struct drm_bus drm_pci_bus = {
 	.get_name = drm_pci_get_name,
 	.set_busid = drm_pci_set_busid,
-	.set_unique = drm_pci_set_unique,
 };
 
 /**
@@ -481,6 +480,13 @@ int drm_irq_by_busid(struct drm_device *dev, void *data,
 {
 	return -EINVAL;
 }
+
+int drm_pci_set_unique(struct drm_device *dev,
+		       struct drm_master *master,
+		       struct drm_unique *u)
+{
+	return -EINVAL;
+}
 #endif
 
 EXPORT_SYMBOL(drm_pci_init);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index f4d0eaa3da46..1a9d509bef66 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -728,8 +728,6 @@ struct drm_master {
 struct drm_bus {
 	const char *(*get_name)(struct drm_device *dev);
 	int (*set_busid)(struct drm_device *dev, struct drm_master *master);
-	int (*set_unique)(struct drm_device *dev, struct drm_master *master,
-			  struct drm_unique *unique);
 };
 
 /**
@@ -1511,6 +1509,9 @@ extern drm_dma_handle_t *drm_pci_alloc(struct drm_device *dev, size_t size,
 				       size_t align);
 extern void __drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah);
 extern void drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah);
+extern int drm_pci_set_unique(struct drm_device *dev,
+			      struct drm_master *master,
+			      struct drm_unique *u);
 
 			       /* sysfs support (drm_sysfs.c) */
 struct drm_sysfs_class;
-- 
cgit v1.2.3-71-gd317


From 5829d1834e5486e83547f36576b160023c9609c2 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 3 Nov 2013 21:48:48 +0100
Subject: drm: rip out dev->devname

This was only ever used to pretty-print the irq driver name. And on
kms systems due to set_version bonghits we never set up the prettier
name, ever. Which make this a bit pointless.

Also, we can always dig out the driver-instance/irq relationship
through other means, so this isn't that useful. So just rip it out to
simplify the set_version/set_busid insanity a bit.

Also delete the temporary busname from drm_pci_set_busid, it's now
unused.

v2: Rebase on top of the new host1x drm_bus for tegra.

Reviewed-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_ioctl.c    |  3 ---
 drivers/gpu/drm/drm_irq.c      |  8 +-------
 drivers/gpu/drm/drm_pci.c      | 25 -------------------------
 drivers/gpu/drm/drm_platform.c | 11 -----------
 drivers/gpu/drm/drm_stub.c     |  5 -----
 drivers/gpu/drm/tegra/bus.c    | 10 ----------
 include/drm/drmP.h             |  1 -
 7 files changed, 1 insertion(+), 62 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 53560ef53f99..38269d5aa333 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -72,9 +72,6 @@ static void
 drm_unset_busid(struct drm_device *dev,
 		struct drm_master *master)
 {
-	kfree(dev->devname);
-	dev->devname = NULL;
-
 	kfree(master->unique);
 	master->unique = NULL;
 	master->unique_len = 0;
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index de38bc9b6581..7583767ec619 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -246,7 +246,6 @@ int drm_irq_install(struct drm_device *dev, int irq)
 {
 	int ret;
 	unsigned long sh_flags = 0;
-	char *irqname;
 
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
 		return -EINVAL;
@@ -272,13 +271,8 @@ int drm_irq_install(struct drm_device *dev, int irq)
 	if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED))
 		sh_flags = IRQF_SHARED;
 
-	if (dev->devname)
-		irqname = dev->devname;
-	else
-		irqname = dev->driver->name;
-
 	ret = request_irq(irq, dev->driver->irq_handler,
-			  sh_flags, irqname, dev);
+			  sh_flags, dev->driver->name, dev);
 
 	if (ret < 0) {
 		dev->irq_enabled = false;
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index c550b349f202..047c51046d94 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -146,7 +146,6 @@ static const char *drm_pci_get_name(struct drm_device *dev)
 static int drm_pci_set_busid(struct drm_device *dev, struct drm_master *master)
 {
 	int len, ret;
-	struct pci_driver *pdriver = dev->driver->kdriver.pci;
 	master->unique_len = 40;
 	master->unique_size = master->unique_len;
 	master->unique = kmalloc(master->unique_size, GFP_KERNEL);
@@ -168,18 +167,6 @@ static int drm_pci_set_busid(struct drm_device *dev, struct drm_master *master)
 	} else
 		master->unique_len = len;
 
-	dev->devname =
-		kmalloc(strlen(pdriver->name) +
-			master->unique_len + 2, GFP_KERNEL);
-
-	if (dev->devname == NULL) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	sprintf(dev->devname, "%s@%s", pdriver->name,
-		master->unique);
-
 	return 0;
 err:
 	return ret;
@@ -190,7 +177,6 @@ int drm_pci_set_unique(struct drm_device *dev,
 		       struct drm_unique *u)
 {
 	int domain, bus, slot, func, ret;
-	const char *bus_name;
 
 	master->unique_len = u->unique_len;
 	master->unique_size = u->unique_len + 1;
@@ -207,17 +193,6 @@ int drm_pci_set_unique(struct drm_device *dev,
 
 	master->unique[master->unique_len] = '\0';
 
-	bus_name = dev->driver->bus->get_name(dev);
-	dev->devname = kmalloc(strlen(bus_name) +
-			       strlen(master->unique) + 2, GFP_KERNEL);
-	if (!dev->devname) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	sprintf(dev->devname, "%s@%s", bus_name,
-		master->unique);
-
 	/* Return error if the busid submitted doesn't match the device's actual
 	 * busid.
 	 */
diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c
index 5b918212b1c3..b97b11ea2dc4 100644
--- a/drivers/gpu/drm/drm_platform.c
+++ b/drivers/gpu/drm/drm_platform.c
@@ -101,17 +101,6 @@ static int drm_platform_set_busid(struct drm_device *dev, struct drm_master *mas
 		goto err;
 	}
 
-	dev->devname =
-		kmalloc(strlen(dev->platformdev->name) +
-			master->unique_len + 2, GFP_KERNEL);
-
-	if (dev->devname == NULL) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	sprintf(dev->devname, "%s@%s", dev->platformdev->name,
-		master->unique);
 	return 0;
 err:
 	return ret;
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index 5394b201c3d0..3a8e832ad151 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -166,9 +166,6 @@ static void drm_master_destroy(struct kref *kref)
 		master->unique_len = 0;
 	}
 
-	kfree(dev->devname);
-	dev->devname = NULL;
-
 	list_for_each_entry_safe(pt, next, &master->magicfree, head) {
 		list_del(&pt->head);
 		drm_ht_remove_item(&master->magiclist, &pt->hash_item);
@@ -648,8 +645,6 @@ static void drm_dev_release(struct kref *ref)
 	drm_minor_free(dev, DRM_MINOR_RENDER);
 	drm_minor_free(dev, DRM_MINOR_CONTROL);
 
-	kfree(dev->devname);
-
 	mutex_destroy(&dev->master_mutex);
 	kfree(dev);
 }
diff --git a/drivers/gpu/drm/tegra/bus.c b/drivers/gpu/drm/tegra/bus.c
index 6916fa51cfa4..b3a66d65cb53 100644
--- a/drivers/gpu/drm/tegra/bus.c
+++ b/drivers/gpu/drm/tegra/bus.c
@@ -12,9 +12,7 @@ static int drm_host1x_set_busid(struct drm_device *dev,
 				struct drm_master *master)
 {
 	const char *device = dev_name(dev->dev);
-	const char *driver = dev->driver->name;
 	const char *bus = dev->dev->bus->name;
-	int length;
 
 	master->unique_len = strlen(bus) + 1 + strlen(device);
 	master->unique_size = master->unique_len;
@@ -25,14 +23,6 @@ static int drm_host1x_set_busid(struct drm_device *dev,
 
 	snprintf(master->unique, master->unique_len + 1, "%s:%s", bus, device);
 
-	length = strlen(driver) + 1 + master->unique_len;
-
-	dev->devname = kmalloc(length + 1, GFP_KERNEL);
-	if (!dev->devname)
-		return -ENOMEM;
-
-	snprintf(dev->devname, length + 1, "%s@%s", driver, master->unique);
-
 	return 0;
 }
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 1a9d509bef66..56b028d6bc98 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1048,7 +1048,6 @@ struct drm_vblank_crtc {
  */
 struct drm_device {
 	struct list_head legacy_dev_list;/**< list of devices per driver for stealth attach cleanup */
-	char *devname;			/**< For /proc/interrupts */
 	int if_version;			/**< Highest interface version set */
 
 	/** \name Lifetime Management */
-- 
cgit v1.2.3-71-gd317


From 9de1b51f1fae6476155350a0670dc637c762e718 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 3 Nov 2013 22:24:37 +0100
Subject: drm: remove drm_bus->get_name

The only user is the info debugfs file, so we only need something
human readable. Now for both pci and platform devices we've used the
name of the underlying device driver, which matches the name of the
drm driver in all cases. So we can just use that instead.

The exception is usb, which used a generic "USB". Not to harmful with
just one usb driver, but better to use "udl", too.

With that converted we can rip out all the ->get_name implementations.

Reviewed-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_info.c     | 6 ++----
 drivers/gpu/drm/drm_pci.c      | 7 -------
 drivers/gpu/drm/drm_platform.c | 6 ------
 drivers/gpu/drm/drm_usb.c      | 6 ------
 include/drm/drmP.h             | 1 -
 5 files changed, 2 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c
index 7473035dd28b..86feedd5e6f6 100644
--- a/drivers/gpu/drm/drm_info.c
+++ b/drivers/gpu/drm/drm_info.c
@@ -47,18 +47,16 @@ int drm_name_info(struct seq_file *m, void *data)
 	struct drm_minor *minor = node->minor;
 	struct drm_device *dev = minor->dev;
 	struct drm_master *master = minor->master;
-	const char *bus_name;
 	if (!master)
 		return 0;
 
-	bus_name = dev->driver->bus->get_name(dev);
 	if (master->unique) {
 		seq_printf(m, "%s %s %s\n",
-			   bus_name,
+			   dev->driver->name,
 			   dev_name(dev->dev), master->unique);
 	} else {
 		seq_printf(m, "%s %s\n",
-			   bus_name, dev_name(dev->dev));
+			   dev->driver->name, dev_name(dev->dev));
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 047c51046d94..1fbe22a254e1 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -137,12 +137,6 @@ static int drm_get_pci_domain(struct drm_device *dev)
 	return pci_domain_nr(dev->pdev->bus);
 }
 
-static const char *drm_pci_get_name(struct drm_device *dev)
-{
-	struct pci_driver *pdriver = dev->driver->kdriver.pci;
-	return pdriver->name;
-}
-
 static int drm_pci_set_busid(struct drm_device *dev, struct drm_master *master)
 {
 	int len, ret;
@@ -287,7 +281,6 @@ void drm_pci_agp_destroy(struct drm_device *dev)
 }
 
 static struct drm_bus drm_pci_bus = {
-	.get_name = drm_pci_get_name,
 	.set_busid = drm_pci_set_busid,
 };
 
diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c
index b97b11ea2dc4..c7ec27bbe7c6 100644
--- a/drivers/gpu/drm/drm_platform.c
+++ b/drivers/gpu/drm/drm_platform.c
@@ -68,11 +68,6 @@ err_free:
 	return ret;
 }
 
-static const char *drm_platform_get_name(struct drm_device *dev)
-{
-	return dev->platformdev->name;
-}
-
 static int drm_platform_set_busid(struct drm_device *dev, struct drm_master *master)
 {
 	int len, ret, id;
@@ -107,7 +102,6 @@ err:
 }
 
 static struct drm_bus drm_platform_bus = {
-	.get_name = drm_platform_get_name,
 	.set_busid = drm_platform_set_busid,
 };
 
diff --git a/drivers/gpu/drm/drm_usb.c b/drivers/gpu/drm/drm_usb.c
index abdc265c9cc8..fae4aa4e1426 100644
--- a/drivers/gpu/drm/drm_usb.c
+++ b/drivers/gpu/drm/drm_usb.c
@@ -36,11 +36,6 @@ err_free:
 }
 EXPORT_SYMBOL(drm_get_usb_dev);
 
-static const char *drm_usb_get_name(struct drm_device *dev)
-{
-	return "USB";
-}
-
 static int drm_usb_set_busid(struct drm_device *dev,
 			       struct drm_master *master)
 {
@@ -48,7 +43,6 @@ static int drm_usb_set_busid(struct drm_device *dev,
 }
 
 static struct drm_bus drm_usb_bus = {
-	.get_name = drm_usb_get_name,
 	.set_busid = drm_usb_set_busid,
 };
     
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 56b028d6bc98..493bbbb300e6 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -726,7 +726,6 @@ struct drm_master {
 #define DRM_SCANOUTPOS_ACCURATE     (1 << 2)
 
 struct drm_bus {
-	const char *(*get_name)(struct drm_device *dev);
 	int (*set_busid)(struct drm_device *dev, struct drm_master *master);
 };
 
-- 
cgit v1.2.3-71-gd317


From f93227759d9bd3b299c288a6bd448161f849cdfd Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 3 Nov 2013 22:32:41 +0100
Subject: drm: Remove dev->kdriver

With the last patch to ditch the ->get_name callbacks the last
user is now gone.

Reviewed-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_pci.c      | 1 -
 drivers/gpu/drm/drm_platform.c | 1 -
 drivers/gpu/drm/drm_usb.c      | 1 -
 include/drm/drmP.h             | 5 -----
 4 files changed, 8 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 1fbe22a254e1..d237de36a07a 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -364,7 +364,6 @@ int drm_pci_init(struct drm_driver *driver, struct pci_driver *pdriver)
 
 	DRM_DEBUG("\n");
 
-	driver->kdriver.pci = pdriver;
 	driver->bus = &drm_pci_bus;
 
 	if (driver->driver_features & DRIVER_MODESET)
diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c
index c7ec27bbe7c6..234e0bc1ae51 100644
--- a/drivers/gpu/drm/drm_platform.c
+++ b/drivers/gpu/drm/drm_platform.c
@@ -121,7 +121,6 @@ int drm_platform_init(struct drm_driver *driver, struct platform_device *platfor
 {
 	DRM_DEBUG("\n");
 
-	driver->kdriver.platform_device = platform_device;
 	driver->bus = &drm_platform_bus;
 	return drm_get_platform_dev(platform_device, driver);
 }
diff --git a/drivers/gpu/drm/drm_usb.c b/drivers/gpu/drm/drm_usb.c
index fae4aa4e1426..c6c7c29ad46f 100644
--- a/drivers/gpu/drm/drm_usb.c
+++ b/drivers/gpu/drm/drm_usb.c
@@ -51,7 +51,6 @@ int drm_usb_init(struct drm_driver *driver, struct usb_driver *udriver)
 	int res;
 	DRM_DEBUG("\n");
 
-	driver->kdriver.usb = udriver;
 	driver->bus = &drm_usb_bus;
 
 	res = usb_register(udriver);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 493bbbb300e6..19daabeeffbe 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -963,11 +963,6 @@ struct drm_driver {
 	const struct drm_ioctl_desc *ioctls;
 	int num_ioctls;
 	const struct file_operations *fops;
-	union {
-		struct pci_driver *pci;
-		struct platform_device *platform_device;
-		struct usb_driver *usb;
-	} kdriver;
 	struct drm_bus *bus;
 
 	/* List of devices hanging off this driver with stealth attach. */
-- 
cgit v1.2.3-71-gd317


From aa45660c6b59388fac3995a8c2998d710ef28fd4 Mon Sep 17 00:00:00 2001
From: Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>
Date: Mon, 14 Apr 2014 15:41:27 +0300
Subject: netfilter: nf_tables: Make meta expression core functions public

This will be useful to create network family dedicated META expression
as for NFPROTO_BRIDGE for instance.

Signed-off-by: Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nft_meta.h | 36 +++++++++++++++++++++++++++++
 net/netfilter/nft_meta.c         | 50 ++++++++++++++++++++--------------------
 2 files changed, 61 insertions(+), 25 deletions(-)
 create mode 100644 include/net/netfilter/nft_meta.h

(limited to 'include')

diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
new file mode 100644
index 000000000000..0ee47c3e2e31
--- /dev/null
+++ b/include/net/netfilter/nft_meta.h
@@ -0,0 +1,36 @@
+#ifndef _NFT_META_H_
+#define _NFT_META_H_
+
+struct nft_meta {
+	enum nft_meta_keys	key:8;
+	union {
+		enum nft_registers	dreg:8;
+		enum nft_registers	sreg:8;
+	};
+};
+
+extern const struct nla_policy nft_meta_policy[];
+
+int nft_meta_get_init(const struct nft_ctx *ctx,
+		      const struct nft_expr *expr,
+		      const struct nlattr * const tb[]);
+
+int nft_meta_set_init(const struct nft_ctx *ctx,
+		      const struct nft_expr *expr,
+		      const struct nlattr * const tb[]);
+
+int nft_meta_get_dump(struct sk_buff *skb,
+		      const struct nft_expr *expr);
+
+int nft_meta_set_dump(struct sk_buff *skb,
+		      const struct nft_expr *expr);
+
+void nft_meta_get_eval(const struct nft_expr *expr,
+		       struct nft_data data[NFT_REG_MAX + 1],
+		       const struct nft_pktinfo *pkt);
+
+void nft_meta_set_eval(const struct nft_expr *expr,
+		       struct nft_data data[NFT_REG_MAX + 1],
+		       const struct nft_pktinfo *pkt);
+
+#endif
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 6d0b8cc27f2a..852b178c6ae7 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -18,18 +18,11 @@
 #include <net/sock.h>
 #include <net/tcp_states.h> /* for TCP_TIME_WAIT */
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_meta.h>
 
-struct nft_meta {
-	enum nft_meta_keys	key:8;
-	union {
-		enum nft_registers	dreg:8;
-		enum nft_registers	sreg:8;
-	};
-};
-
-static void nft_meta_get_eval(const struct nft_expr *expr,
-			      struct nft_data data[NFT_REG_MAX + 1],
-			      const struct nft_pktinfo *pkt)
+void nft_meta_get_eval(const struct nft_expr *expr,
+		       struct nft_data data[NFT_REG_MAX + 1],
+		       const struct nft_pktinfo *pkt)
 {
 	const struct nft_meta *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
@@ -140,10 +133,11 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 err:
 	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
 }
+EXPORT_SYMBOL_GPL(nft_meta_get_eval);
 
-static void nft_meta_set_eval(const struct nft_expr *expr,
-			      struct nft_data data[NFT_REG_MAX + 1],
-			      const struct nft_pktinfo *pkt)
+void nft_meta_set_eval(const struct nft_expr *expr,
+		       struct nft_data data[NFT_REG_MAX + 1],
+		       const struct nft_pktinfo *pkt)
 {
 	const struct nft_meta *meta = nft_expr_priv(expr);
 	struct sk_buff *skb = pkt->skb;
@@ -163,16 +157,18 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
 		WARN_ON(1);
 	}
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_eval);
 
-static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
 	[NFTA_META_DREG]	= { .type = NLA_U32 },
 	[NFTA_META_KEY]		= { .type = NLA_U32 },
 	[NFTA_META_SREG]	= { .type = NLA_U32 },
 };
+EXPORT_SYMBOL_GPL(nft_meta_policy);
 
-static int nft_meta_get_init(const struct nft_ctx *ctx,
-			     const struct nft_expr *expr,
-			     const struct nlattr * const tb[])
+int nft_meta_get_init(const struct nft_ctx *ctx,
+		      const struct nft_expr *expr,
+		      const struct nlattr * const tb[])
 {
 	struct nft_meta *priv = nft_expr_priv(expr);
 	int err;
@@ -215,10 +211,11 @@ static int nft_meta_get_init(const struct nft_ctx *ctx,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nft_meta_get_init);
 
-static int nft_meta_set_init(const struct nft_ctx *ctx,
-			     const struct nft_expr *expr,
-			     const struct nlattr * const tb[])
+int nft_meta_set_init(const struct nft_ctx *ctx,
+		      const struct nft_expr *expr,
+		      const struct nlattr * const tb[])
 {
 	struct nft_meta *priv = nft_expr_priv(expr);
 	int err;
@@ -240,9 +237,10 @@ static int nft_meta_set_init(const struct nft_ctx *ctx,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_init);
 
-static int nft_meta_get_dump(struct sk_buff *skb,
-			     const struct nft_expr *expr)
+int nft_meta_get_dump(struct sk_buff *skb,
+		      const struct nft_expr *expr)
 {
 	const struct nft_meta *priv = nft_expr_priv(expr);
 
@@ -255,9 +253,10 @@ static int nft_meta_get_dump(struct sk_buff *skb,
 nla_put_failure:
 	return -1;
 }
+EXPORT_SYMBOL_GPL(nft_meta_get_dump);
 
-static int nft_meta_set_dump(struct sk_buff *skb,
-			     const struct nft_expr *expr)
+int nft_meta_set_dump(struct sk_buff *skb,
+		      const struct nft_expr *expr)
 {
 	const struct nft_meta *priv = nft_expr_priv(expr);
 
@@ -271,6 +270,7 @@ static int nft_meta_set_dump(struct sk_buff *skb,
 nla_put_failure:
 	return -1;
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_dump);
 
 static struct nft_expr_type nft_meta_type;
 static const struct nft_expr_ops nft_meta_get_ops = {
-- 
cgit v1.2.3-71-gd317


From f392e51cd6ae6f6ee5b9b6d611cdc282b4c1711e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Apr 2014 11:13:14 -0400
Subject: cgroup: update cgroup->subsys_mask to ->child_subsys_mask and restore
 cgroup_root->subsys_mask

944196278d3d ("cgroup: move ->subsys_mask from cgroupfs_root to
cgroup") moved ->subsys_mask from cgroup_root to cgroup to prepare for
the unified hierarhcy; however, it turns out that carrying the
subsys_mask of the children in the parent, instead of itself, is a lot
more natural.  This patch restores cgroup_root->subsys_mask and morphs
cgroup->subsys_mask into cgroup->child_subsys_mask.

* Uses of root->cgrp.subsys_mask are restored to root->subsys_mask.

* Remove automatic setting and clearing of cgrp->subsys_mask and
  instead just inherit ->child_subsys_mask from the parent during
  cgroup creation.  Note that this doesn't affect any current
  behaviors.

* Undo __kill_css() separation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  7 ++++--
 kernel/cgroup.c        | 64 +++++++++++++++++++++-----------------------------
 2 files changed, 32 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c2515851c1aa..1b5b2fe1b228 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -173,8 +173,8 @@ struct cgroup {
 	 */
 	u64 serial_nr;
 
-	/* The bitmask of subsystems attached to this cgroup */
-	unsigned long subsys_mask;
+	/* the bitmask of subsystems enabled on the child cgroups */
+	unsigned long child_subsys_mask;
 
 	/* Private pointers for each registered subsystem */
 	struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
@@ -282,6 +282,9 @@ enum {
 struct cgroup_root {
 	struct kernfs_root *kf_root;
 
+	/* The bitmask of subsystems attached to this hierarchy */
+	unsigned long subsys_mask;
+
 	/* Unique id for this hierarchy. */
 	int hierarchy_id;
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a6894272353b..f944619077f4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -529,7 +529,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
 	 * won't change, so no need for locking.
 	 */
 	for_each_subsys(ss, i) {
-		if (root->cgrp.subsys_mask & (1UL << i)) {
+		if (root->subsys_mask & (1UL << i)) {
 			/* Subsystem is in this hierarchy. So we want
 			 * the subsystem state from the new
 			 * cgroup */
@@ -742,7 +742,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 	BUG_ON(!list_empty(&cgrp->children));
 
 	/* Rebind all subsystems back to the default hierarchy */
-	rebind_subsystems(&cgrp_dfl_root, cgrp->subsys_mask);
+	rebind_subsystems(&cgrp_dfl_root, root->subsys_mask);
 
 	/*
 	 * Release all the links from cset_links to this hierarchy's
@@ -1050,8 +1050,11 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 		ss->root = dst_root;
 		css->cgroup = &dst_root->cgrp;
 
-		src_root->cgrp.subsys_mask &= ~(1 << ssid);
-		dst_root->cgrp.subsys_mask |= 1 << ssid;
+		src_root->subsys_mask &= ~(1 << ssid);
+		src_root->cgrp.child_subsys_mask &= ~(1 << ssid);
+
+		dst_root->subsys_mask |= 1 << ssid;
+		dst_root->cgrp.child_subsys_mask |= 1 << ssid;
 
 		if (ss->bind)
 			ss->bind(css);
@@ -1069,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq,
 	int ssid;
 
 	for_each_subsys(ss, ssid)
-		if (root->cgrp.subsys_mask & (1 << ssid))
+		if (root->subsys_mask & (1 << ssid))
 			seq_printf(seq, ",%s", ss->name);
 	if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
 		seq_puts(seq, ",sane_behavior");
@@ -1273,12 +1276,12 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
 	if (ret)
 		goto out_unlock;
 
-	if (opts.subsys_mask != root->cgrp.subsys_mask || opts.release_agent)
+	if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
 		pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
 			   task_tgid_nr(current), current->comm);
 
-	added_mask = opts.subsys_mask & ~root->cgrp.subsys_mask;
-	removed_mask = root->cgrp.subsys_mask & ~opts.subsys_mask;
+	added_mask = opts.subsys_mask & ~root->subsys_mask;
+	removed_mask = root->subsys_mask & ~opts.subsys_mask;
 
 	/* Don't allow flags or name to change at remount */
 	if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) ||
@@ -1535,7 +1538,7 @@ retry:
 		 * subsystems) then they must match.
 		 */
 		if ((opts.subsys_mask || opts.none) &&
-		    (opts.subsys_mask != root->cgrp.subsys_mask)) {
+		    (opts.subsys_mask != root->subsys_mask)) {
 			if (!name_match)
 				continue;
 			ret = -EBUSY;
@@ -3658,8 +3661,6 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
 	cgroup_get(cgrp);
 	css_get(css->parent);
 
-	cgrp->subsys_mask |= 1 << ss->id;
-
 	if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
 	    parent->parent) {
 		pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
@@ -3780,13 +3781,15 @@ static long cgroup_create(struct cgroup *parent, const char *name,
 
 	/* let's create and online css's */
 	for_each_subsys(ss, ssid) {
-		if (root->cgrp.subsys_mask & (1 << ssid)) {
+		if (parent->child_subsys_mask & (1 << ssid)) {
 			err = create_css(cgrp, ss);
 			if (err)
 				goto err_destroy;
 		}
 	}
 
+	cgrp->child_subsys_mask = parent->child_subsys_mask;
+
 	kernfs_activate(kn);
 
 	mutex_unlock(&cgroup_mutex);
@@ -3882,7 +3885,16 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
 	queue_work(cgroup_destroy_wq, &css->destroy_work);
 }
 
-static void __kill_css(struct cgroup_subsys_state *css)
+/**
+ * kill_css - destroy a css
+ * @css: css to destroy
+ *
+ * This function initiates destruction of @css by removing cgroup interface
+ * files and putting its base reference.  ->css_offline() will be invoked
+ * asynchronously once css_tryget() is guaranteed to fail and when the
+ * reference count reaches zero, @css will be released.
+ */
+static void kill_css(struct cgroup_subsys_state *css)
 {
 	lockdep_assert_held(&cgroup_tree_mutex);
 
@@ -3911,28 +3923,6 @@ static void __kill_css(struct cgroup_subsys_state *css)
 	percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
 }
 
-/**
- * kill_css - destroy a css
- * @css: css to destroy
- *
- * This function initiates destruction of @css by removing cgroup interface
- * files and putting its base reference.  ->css_offline() will be invoked
- * asynchronously once css_tryget() is guaranteed to fail and when the
- * reference count reaches zero, @css will be released.
- */
-static void kill_css(struct cgroup_subsys_state *css)
-{
-	struct cgroup *cgrp = css->cgroup;
-
-	lockdep_assert_held(&cgroup_tree_mutex);
-
-	/* if already killed, noop */
-	if (cgrp->subsys_mask & (1 << css->ss->id)) {
-		cgrp->subsys_mask &= ~(1 << css->ss->id);
-		__kill_css(css);
-	}
-}
-
 /**
  * cgroup_destroy_locked - the first stage of cgroup destruction
  * @cgrp: cgroup to be destroyed
@@ -4145,7 +4135,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 
 	BUG_ON(online_css(css));
 
-	cgrp_dfl_root.cgrp.subsys_mask |= 1 << ss->id;
+	cgrp_dfl_root.subsys_mask |= 1 << ss->id;
 
 	mutex_unlock(&cgroup_mutex);
 	mutex_unlock(&cgroup_tree_mutex);
@@ -4302,7 +4292,7 @@ int proc_cgroup_show(struct seq_file *m, void *v)
 
 		seq_printf(m, "%d:", root->hierarchy_id);
 		for_each_subsys(ss, ssid)
-			if (root->cgrp.subsys_mask & (1 << ssid))
+			if (root->subsys_mask & (1 << ssid))
 				seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
 		if (strlen(root->name))
 			seq_printf(m, "%sname=%s", count ? "," : "",
-- 
cgit v1.2.3-71-gd317


From 2d8f243a5e6efa57fb7c46fe83fafa45b33d0ec2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Apr 2014 11:13:15 -0400
Subject: cgroup: implement cgroup->e_csets[]

On the default unified hierarchy, a cgroup may be associated with
csses of its ancestors, which means that a css of a given cgroup may
be associated with css_sets of descendant cgroups.  This means that we
can't walk all tasks associated with a css by iterating the css_sets
associated with the cgroup as there are css_sets which are pointing to
the css but linked on the descendants.

This patch adds per-subsystem list heads cgroup->e_csets[].  Any
css_set which is pointing to a css is linked to
css->cgroup->e_csets[$SUBSYS_ID] through
css_set->e_cset_node[$SUBSYS_ID].  The lists are protected by
css_set_rwsem and will allow us to walk all css_sets associated with a
given css so that we can find out all associated tasks.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 18 ++++++++++++++++++
 kernel/cgroup.c        | 30 ++++++++++++++++++++++++++++--
 2 files changed, 46 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1b5b2fe1b228..33a0043ef454 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -187,6 +187,15 @@ struct cgroup {
 	 */
 	struct list_head cset_links;
 
+	/*
+	 * On the default hierarchy, a css_set for a cgroup with some
+	 * susbsys disabled will point to css's which are associated with
+	 * the closest ancestor which has the subsys enabled.  The
+	 * following lists all css_sets which point to this cgroup's css
+	 * for the given subsystem.
+	 */
+	struct list_head e_csets[CGROUP_SUBSYS_COUNT];
+
 	/*
 	 * Linked list running through all cgroups that can
 	 * potentially be reaped by the release agent. Protected by
@@ -369,6 +378,15 @@ struct css_set {
 	struct cgroup *mg_src_cgrp;
 	struct css_set *mg_dst_cset;
 
+	/*
+	 * On the default hierarhcy, ->subsys[ssid] may point to a css
+	 * attached to an ancestor instead of the cgroup this css_set is
+	 * associated with.  The following node is anchored at
+	 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to
+	 * iterate through all css's attached to a given cgroup.
+	 */
+	struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
+
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
 };
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4eb2dd1bb5b1..37d966289978 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -425,6 +425,8 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
 static void put_css_set_locked(struct css_set *cset, bool taskexit)
 {
 	struct cgrp_cset_link *link, *tmp_link;
+	struct cgroup_subsys *ss;
+	int ssid;
 
 	lockdep_assert_held(&css_set_rwsem);
 
@@ -432,6 +434,8 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
 		return;
 
 	/* This css_set is dead. unlink it and release cgroup refcounts */
+	for_each_subsys(ss, ssid)
+		list_del(&cset->e_cset_node[ssid]);
 	hash_del(&cset->hlist);
 	css_set_count--;
 
@@ -673,7 +677,9 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 	struct css_set *cset;
 	struct list_head tmp_links;
 	struct cgrp_cset_link *link;
+	struct cgroup_subsys *ss;
 	unsigned long key;
+	int ssid;
 
 	lockdep_assert_held(&cgroup_mutex);
 
@@ -724,10 +730,14 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 
 	css_set_count++;
 
-	/* Add this cgroup group to the hash table */
+	/* Add @cset to the hash table */
 	key = css_set_hash(cset->subsys);
 	hash_add(css_set_table, &cset->hlist, key);
 
+	for_each_subsys(ss, ssid)
+		list_add_tail(&cset->e_cset_node[ssid],
+			      &cset->subsys[ssid]->cgroup->e_csets[ssid]);
+
 	up_write(&css_set_rwsem);
 
 	return cset;
@@ -1028,7 +1038,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned long ss_mask)
 {
 	struct cgroup_subsys *ss;
-	int ssid, ret;
+	int ssid, i, ret;
 
 	lockdep_assert_held(&cgroup_tree_mutex);
 	lockdep_assert_held(&cgroup_mutex);
@@ -1081,6 +1091,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 	for_each_subsys(ss, ssid) {
 		struct cgroup_root *src_root;
 		struct cgroup_subsys_state *css;
+		struct css_set *cset;
 
 		if (!(ss_mask & (1 << ssid)))
 			continue;
@@ -1095,6 +1106,12 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 		ss->root = dst_root;
 		css->cgroup = &dst_root->cgrp;
 
+		down_write(&css_set_rwsem);
+		hash_for_each(css_set_table, i, cset, hlist)
+			list_move_tail(&cset->e_cset_node[ss->id],
+				       &dst_root->cgrp.e_csets[ss->id]);
+		up_write(&css_set_rwsem);
+
 		src_root->subsys_mask &= ~(1 << ssid);
 		src_root->cgrp.child_subsys_mask &= ~(1 << ssid);
 
@@ -1417,6 +1434,9 @@ out_unlock:
 
 static void init_cgroup_housekeeping(struct cgroup *cgrp)
 {
+	struct cgroup_subsys *ss;
+	int ssid;
+
 	atomic_set(&cgrp->refcnt, 1);
 	INIT_LIST_HEAD(&cgrp->sibling);
 	INIT_LIST_HEAD(&cgrp->children);
@@ -1425,6 +1445,9 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	mutex_init(&cgrp->pidlist_mutex);
 	cgrp->dummy_css.cgroup = cgrp;
+
+	for_each_subsys(ss, ssid)
+		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
 }
 
 static void init_cgroup_root(struct cgroup_root *root,
@@ -4249,6 +4272,9 @@ int __init cgroup_init(void)
 		if (!ss->early_init)
 			cgroup_init_subsys(ss);
 
+		list_add_tail(&init_css_set.e_cset_node[ssid],
+			      &cgrp_dfl_root.cgrp.e_csets[ssid]);
+
 		/*
 		 * cftype registration needs kmalloc and can't be done
 		 * during early_init.  Register base cftypes separately.
-- 
cgit v1.2.3-71-gd317


From 0f0a2b4fa6210147131082999f1f16d7fb79abf8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Apr 2014 11:13:15 -0400
Subject: cgroup: reorganize css_task_iter

This patch reorganizes css_task_iter so that adding effective css
support is easier.

* s/->cset_link/->cset_pos/ and s/->task/->task_pos/ for consistency

* ->origin_css is used to determine whether the iteration reached the
  last css_set.  Replace it with explicit ->cset_head so that
  css_advance_task_iter() doesn't have to know the termination
  condition directly.

* css_task_iter_next() currently assumes that it's walking list of
  cgrp_cset_link and reaches into the current cset through the current
  link to determine the termination conditions for task walking.  As
  this won't always be true for effective css walking, add
  ->tasks_head and ->mg_tasks_head and use them to control task
  walking so that css_task_iter_next() doesn't have to know how
  css_sets are being walked.

This patch doesn't make any behavior changes.  The iteration logic
stays unchanged after the patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  9 ++++++---
 kernel/cgroup.c        | 33 +++++++++++++++++----------------
 2 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 33a0043ef454..bee390586120 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -842,9 +842,12 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
 
 /* A css_task_iter should be treated as an opaque object */
 struct css_task_iter {
-	struct cgroup_subsys_state	*origin_css;
-	struct list_head		*cset_link;
-	struct list_head		*task;
+	struct list_head		*cset_pos;
+	struct list_head		*cset_head;
+
+	struct list_head		*task_pos;
+	struct list_head		*tasks_head;
+	struct list_head		*mg_tasks_head;
 };
 
 void css_task_iter_start(struct cgroup_subsys_state *css,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0edc186cd545..d48163b26196 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2857,27 +2857,30 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
  */
 static void css_advance_task_iter(struct css_task_iter *it)
 {
-	struct list_head *l = it->cset_link;
+	struct list_head *l = it->cset_pos;
 	struct cgrp_cset_link *link;
 	struct css_set *cset;
 
 	/* Advance to the next non-empty css_set */
 	do {
 		l = l->next;
-		if (l == &it->origin_css->cgroup->cset_links) {
-			it->cset_link = NULL;
+		if (l == it->cset_head) {
+			it->cset_pos = NULL;
 			return;
 		}
 		link = list_entry(l, struct cgrp_cset_link, cset_link);
 		cset = link->cset;
 	} while (list_empty(&cset->tasks) && list_empty(&cset->mg_tasks));
 
-	it->cset_link = l;
+	it->cset_pos = l;
 
 	if (!list_empty(&cset->tasks))
-		it->task = cset->tasks.next;
+		it->task_pos = cset->tasks.next;
 	else
-		it->task = cset->mg_tasks.next;
+		it->task_pos = cset->mg_tasks.next;
+
+	it->tasks_head = &cset->tasks;
+	it->mg_tasks_head = &cset->mg_tasks;
 }
 
 /**
@@ -2903,8 +2906,8 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
 	down_read(&css_set_rwsem);
 
-	it->origin_css = css;
-	it->cset_link = &css->cgroup->cset_links;
+	it->cset_pos = &css->cgroup->cset_links;
+	it->cset_head = it->cset_pos;
 
 	css_advance_task_iter(it);
 }
@@ -2920,12 +2923,10 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 struct task_struct *css_task_iter_next(struct css_task_iter *it)
 {
 	struct task_struct *res;
-	struct list_head *l = it->task;
-	struct cgrp_cset_link *link = list_entry(it->cset_link,
-					struct cgrp_cset_link, cset_link);
+	struct list_head *l = it->task_pos;
 
 	/* If the iterator cg is NULL, we have no tasks */
-	if (!it->cset_link)
+	if (!it->cset_pos)
 		return NULL;
 	res = list_entry(l, struct task_struct, cg_list);
 
@@ -2936,13 +2937,13 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 	 */
 	l = l->next;
 
-	if (l == &link->cset->tasks)
-		l = link->cset->mg_tasks.next;
+	if (l == it->tasks_head)
+		l = it->mg_tasks_head->next;
 
-	if (l == &link->cset->mg_tasks)
+	if (l == it->mg_tasks_head)
 		css_advance_task_iter(it);
 	else
-		it->task = l;
+		it->task_pos = l;
 
 	return res;
 }
-- 
cgit v1.2.3-71-gd317


From 3ebb2b6ef38875b866ec0118bfae7bc52afd0166 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Apr 2014 11:13:15 -0400
Subject: cgroup: teach css_task_iter about effective csses

Currently, css_task_iter iterates tasks associated with a css by
visiting each css_set associated with the owning cgroup and walking
tasks of each of them.  This works fine for !unified hierarchies as
each cgroup has its own css for each associated subsystem on the
hierarchy; however, on the planned unified hierarchy, a cgroup may not
have csses associated and its tasks would be considered associated
with the matching css of the nearest ancestor which has the subsystem
enabled.

This means that on the default unified hierarchy, just walking all
tasks associated with a cgroup isn't enough to walk all tasks which
are associated with the specified css.  If any of its children doesn't
have the matching css enabled, task iteration should also include all
tasks from the subtree.  We already added cgroup->e_csets[] to list
all css_sets effectively associated with a given css and walk css_sets
on that list instead to achieve such iteration.

This patch updates css_task_iter iteration such that it walks css_sets
on cgroup->e_csets[] instead of cgroup->cset_links if iteration is
requested on an non-dummy css.  Thanks to the previous iteration
update, this change can be achieved with the addition of
css_task_iter->ss and minimal updates to css_advance_task_iter() and
css_task_iter_start().

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  2 ++
 kernel/cgroup.c        | 18 +++++++++++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index bee390586120..18fcae39e63e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -842,6 +842,8 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
 
 /* A css_task_iter should be treated as an opaque object */
 struct css_task_iter {
+	struct cgroup_subsys		*ss;
+
 	struct list_head		*cset_pos;
 	struct list_head		*cset_head;
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d48163b26196..ad28866ed44c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2868,8 +2868,14 @@ static void css_advance_task_iter(struct css_task_iter *it)
 			it->cset_pos = NULL;
 			return;
 		}
-		link = list_entry(l, struct cgrp_cset_link, cset_link);
-		cset = link->cset;
+
+		if (it->ss) {
+			cset = container_of(l, struct css_set,
+					    e_cset_node[it->ss->id]);
+		} else {
+			link = list_entry(l, struct cgrp_cset_link, cset_link);
+			cset = link->cset;
+		}
 	} while (list_empty(&cset->tasks) && list_empty(&cset->mg_tasks));
 
 	it->cset_pos = l;
@@ -2906,7 +2912,13 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
 	down_read(&css_set_rwsem);
 
-	it->cset_pos = &css->cgroup->cset_links;
+	it->ss = css->ss;
+
+	if (it->ss)
+		it->cset_pos = &css->cgroup->e_csets[css->ss->id];
+	else
+		it->cset_pos = &css->cgroup->cset_links;
+
 	it->cset_head = it->cset_pos;
 
 	css_advance_task_iter(it);
-- 
cgit v1.2.3-71-gd317


From 6803c006282768ec850760766a6e4eb1a6ff87df Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Apr 2014 11:13:16 -0400
Subject: cgroup: add css_set->dfl_cgrp

To implement the unified hierarchy behavior, we'll need to be able to
determine the associated cgroup on the default hierarchy from css_set.
Let's add css_set->dfl_cgrp so that it can be accessed conveniently
and efficiently.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 3 +++
 kernel/cgroup.c        | 4 ++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 18fcae39e63e..c49d161a71cd 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -354,6 +354,9 @@ struct css_set {
 	 */
 	struct list_head cgrp_links;
 
+	/* the default cgroup associated with this css_set */
+	struct cgroup *dfl_cgrp;
+
 	/*
 	 * Set of subsystem states, one for each subsystem. This array is
 	 * immutable after creation apart from the init_css_set during
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2a4f88db3205..c66bfc8ee8a7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -651,6 +651,10 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
 	struct cgrp_cset_link *link;
 
 	BUG_ON(list_empty(tmp_links));
+
+	if (cgroup_on_dfl(cgrp))
+		cset->dfl_cgrp = cgrp;
+
 	link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
 	link->cset = cset;
 	link->cgrp = cgrp;
-- 
cgit v1.2.3-71-gd317


From f8f22e53a262ebee37fc98004f16b066cf5bc125 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Apr 2014 11:13:16 -0400
Subject: cgroup: implement dynamic subtree controller enable/disable on the
 default hierarchy

cgroup is switching away from multiple hierarchies and will use one
unified default hierarchy where controllers can be dynamically enabled
and disabled per subtree.  The default hierarchy will serve as the
unified hierarchy to which all controllers are attached and a css on
the default hierarchy would need to also serve the tasks of descendant
cgroups which don't have the controller enabled - ie. the tree may be
collapsed from leaf towards root when viewed from specific
controllers.  This has been implemented through effective css in the
previous patches.

This patch finally implements dynamic subtree controller
enable/disable on the default hierarchy via a new knob -
"cgroup.subtree_control" which controls which controllers are enabled
on the child cgroups.  Let's assume a hierarchy like the following.

  root - A - B - C
               \ D

root's "cgroup.subtree_control" determines which controllers are
enabled on A.  A's on B.  B's on C and D.  This coincides with the
fact that controllers on the immediate sub-level are used to
distribute the resources of the parent.  In fact, it's natural to
assume that resource control knobs of a child belong to its parent.
Enabling a controller in "cgroup.subtree_control" declares that
distribution of the respective resources of the cgroup will be
controlled.  Note that this means that controller enable states are
shared among siblings.

The default hierarchy has an extra restriction - only cgroups which
don't contain any task may have controllers enabled in
"cgroup.subtree_control".  Combined with the other properties of the
default hierarchy, this guarantees that, from the view point of
controllers, tasks are only on the leaf cgroups.  In other words, only
leaf csses may contain tasks.  This rules out situations where child
cgroups compete against internal tasks of the parent, which is a
competition between two different types of entities without any clear
way to determine resource distribution between the two.  Different
controllers handle it differently and all the implemented behaviors
are ambiguous, ad-hoc, cumbersome and/or just wrong.  Having this
structural constraints imposed from cgroup core removes the burden
from controller implementations and enables showing one consistent
behavior across all controllers.

When a controller is enabled or disabled, css associations for the
controller in the subtrees of each child should be updated.  After
enabling, the whole subtree of a child should point to the new css of
the child.  After disabling, the whole subtree of a child should point
to the cgroup's css.  This is implemented by first updating cgroup
states such that cgroup_e_css() result points to the appropriate css
and then invoking cgroup_update_dfl_csses() which migrates all tasks
in the affected subtrees to the self cgroup on the default hierarchy.

* When read, "cgroup.subtree_control" lists all the currently enabled
  controllers on the children of the cgroup.

* White-space separated list of controller names prefixed with either
  '+' or '-' can be written to "cgroup.subtree_control".  The ones
  prefixed with '+' are enabled on the controller and '-' disabled.

* A controller can be enabled iff the parent's
  "cgroup.subtree_control" enables it and disabled iff no child's
  "cgroup.subtree_control" has it enabled.

* If a cgroup has tasks, no controller can be enabled via
  "cgroup.subtree_control".  Likewise, if "cgroup.subtree_control" has
  some controllers enabled, tasks can't be migrated into the cgroup.

* All controllers which aren't bound on other hierarchies are
  automatically associated with the root cgroup of the default
  hierarchy.  All the controllers which are bound to the default
  hierarchy are listed in the read-only file "cgroup.controllers" in
  the root directory.

* "cgroup.controllers" in all non-root cgroups is read-only file whose
  content is equal to that of "cgroup.subtree_control" of the parent.
  This indicates which controllers can be used in the cgroup's
  "cgroup.subtree_control".

This is still experimental and there are some holes, one of which is
that ->can_attach() failure during cgroup_update_dfl_csses() may leave
the cgroups in an undefined state.  The issues will be addressed by
future patches.

v2: Non-root cgroups now also have "cgroup.controllers".

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |   5 +
 kernel/cgroup.c        | 367 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 370 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c49d161a71cd..ada239253ec7 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -21,6 +21,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/seq_file.h>
 #include <linux/kernfs.h>
+#include <linux/wait.h>
 
 #ifdef CONFIG_CGROUPS
 
@@ -164,6 +165,7 @@ struct cgroup {
 
 	struct cgroup *parent;		/* my parent */
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
+	struct kernfs_node *control_kn;	/* kn for "cgroup.subtree_control" */
 
 	/*
 	 * Monotonically increasing unique serial number which defines a
@@ -216,6 +218,9 @@ struct cgroup {
 	/* For css percpu_ref killing and RCU-protected deletion */
 	struct rcu_head rcu_head;
 	struct work_struct destroy_work;
+
+	/* used to wait for offlining of csses */
+	wait_queue_head_t offline_waitq;
 };
 
 #define MAX_CGROUP_ROOT_NAMELEN 64
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8c2835a9e192..809dd903ceb8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -182,6 +182,8 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned long ss_mask);
 static void cgroup_destroy_css_killed(struct cgroup *cgrp);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
+static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
+static void kill_css(struct cgroup_subsys_state *css);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
 			      bool is_add);
 static void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
@@ -338,6 +340,14 @@ static int notify_on_release(const struct cgroup *cgrp)
 #define for_each_root(root)						\
 	list_for_each_entry((root), &cgroup_roots, root_list)
 
+/* iterate over child cgrps, lock should be held throughout iteration */
+#define cgroup_for_each_live_child(child, cgrp)				\
+	list_for_each_entry((child), &(cgrp)->children, sibling)	\
+		if (({ lockdep_assert_held(&cgroup_tree_mutex);		\
+		       cgroup_is_dead(child); }))			\
+			;						\
+		else
+
 /**
  * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
  * @cgrp: the cgroup to be checked for liveness
@@ -1450,6 +1460,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 
 	for_each_subsys(ss, ssid)
 		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
+
+	init_waitqueue_head(&cgrp->offline_waitq);
 }
 
 static void init_cgroup_root(struct cgroup_root *root,
@@ -1938,6 +1950,14 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
 
 	lockdep_assert_held(&cgroup_mutex);
 
+	/*
+	 * Except for the root, child_subsys_mask must be zero for a cgroup
+	 * with tasks so that child cgroups don't compete against tasks.
+	 */
+	if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && dst_cgrp->parent &&
+	    dst_cgrp->child_subsys_mask)
+		return -EBUSY;
+
 	/* look up the dst cset for each src cset and link it to src */
 	list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) {
 		struct css_set *dst_cset;
@@ -2303,6 +2323,326 @@ static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
+static void cgroup_print_ss_mask(struct seq_file *seq, unsigned int ss_mask)
+{
+	struct cgroup_subsys *ss;
+	bool printed = false;
+	int ssid;
+
+	for_each_subsys(ss, ssid) {
+		if (ss_mask & (1 << ssid)) {
+			if (printed)
+				seq_putc(seq, ' ');
+			seq_printf(seq, "%s", ss->name);
+			printed = true;
+		}
+	}
+	if (printed)
+		seq_putc(seq, '\n');
+}
+
+/* show controllers which are currently attached to the default hierarchy */
+static int cgroup_root_controllers_show(struct seq_file *seq, void *v)
+{
+	struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+	cgroup_print_ss_mask(seq, cgrp->root->subsys_mask);
+	return 0;
+}
+
+/* show controllers which are enabled from the parent */
+static int cgroup_controllers_show(struct seq_file *seq, void *v)
+{
+	struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+	cgroup_print_ss_mask(seq, cgrp->parent->child_subsys_mask);
+	return 0;
+}
+
+/* show controllers which are enabled for a given cgroup's children */
+static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
+{
+	struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+	cgroup_print_ss_mask(seq, cgrp->child_subsys_mask);
+	return 0;
+}
+
+/**
+ * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy
+ * @cgrp: root of the subtree to update csses for
+ *
+ * @cgrp's child_subsys_mask has changed and its subtree's (self excluded)
+ * css associations need to be updated accordingly.  This function looks up
+ * all css_sets which are attached to the subtree, creates the matching
+ * updated css_sets and migrates the tasks to the new ones.
+ */
+static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+{
+	LIST_HEAD(preloaded_csets);
+	struct cgroup_subsys_state *css;
+	struct css_set *src_cset;
+	int ret;
+
+	lockdep_assert_held(&cgroup_tree_mutex);
+	lockdep_assert_held(&cgroup_mutex);
+
+	/* look up all csses currently attached to @cgrp's subtree */
+	down_read(&css_set_rwsem);
+	css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
+		struct cgrp_cset_link *link;
+
+		/* self is not affected by child_subsys_mask change */
+		if (css->cgroup == cgrp)
+			continue;
+
+		list_for_each_entry(link, &css->cgroup->cset_links, cset_link)
+			cgroup_migrate_add_src(link->cset, cgrp,
+					       &preloaded_csets);
+	}
+	up_read(&css_set_rwsem);
+
+	/* NULL dst indicates self on default hierarchy */
+	ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets);
+	if (ret)
+		goto out_finish;
+
+	list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
+		struct task_struct *last_task = NULL, *task;
+
+		/* src_csets precede dst_csets, break on the first dst_cset */
+		if (!src_cset->mg_src_cgrp)
+			break;
+
+		/*
+		 * All tasks in src_cset need to be migrated to the
+		 * matching dst_cset.  Empty it process by process.  We
+		 * walk tasks but migrate processes.  The leader might even
+		 * belong to a different cset but such src_cset would also
+		 * be among the target src_csets because the default
+		 * hierarchy enforces per-process membership.
+		 */
+		while (true) {
+			down_read(&css_set_rwsem);
+			task = list_first_entry_or_null(&src_cset->tasks,
+						struct task_struct, cg_list);
+			if (task) {
+				task = task->group_leader;
+				WARN_ON_ONCE(!task_css_set(task)->mg_src_cgrp);
+				get_task_struct(task);
+			}
+			up_read(&css_set_rwsem);
+
+			if (!task)
+				break;
+
+			/* guard against possible infinite loop */
+			if (WARN(last_task == task,
+				 "cgroup: update_dfl_csses failed to make progress, aborting in inconsistent state\n"))
+				goto out_finish;
+			last_task = task;
+
+			threadgroup_lock(task);
+			/* raced against de_thread() from another thread? */
+			if (!thread_group_leader(task)) {
+				threadgroup_unlock(task);
+				put_task_struct(task);
+				continue;
+			}
+
+			ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
+
+			threadgroup_unlock(task);
+			put_task_struct(task);
+
+			if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
+				goto out_finish;
+		}
+	}
+
+out_finish:
+	cgroup_migrate_finish(&preloaded_csets);
+	return ret;
+}
+
+/* change the enabled child controllers for a cgroup in the default hierarchy */
+static int cgroup_subtree_control_write(struct cgroup_subsys_state *dummy_css,
+					struct cftype *cft, char *buffer)
+{
+	unsigned long enable_req = 0, disable_req = 0, enable, disable;
+	struct cgroup *cgrp = dummy_css->cgroup, *child;
+	struct cgroup_subsys *ss;
+	char *tok, *p;
+	int ssid, ret;
+
+	/*
+	 * Parse input - white space separated list of subsystem names
+	 * prefixed with either + or -.
+	 */
+	p = buffer;
+	while ((tok = strsep(&p, " \t\n"))) {
+		for_each_subsys(ss, ssid) {
+			if (ss->disabled || strcmp(tok + 1, ss->name))
+				continue;
+
+			if (*tok == '+') {
+				enable_req |= 1 << ssid;
+				disable_req &= ~(1 << ssid);
+			} else if (*tok == '-') {
+				disable_req |= 1 << ssid;
+				enable_req &= ~(1 << ssid);
+			} else {
+				return -EINVAL;
+			}
+			break;
+		}
+		if (ssid == CGROUP_SUBSYS_COUNT)
+			return -EINVAL;
+	}
+
+	/*
+	 * We're gonna grab cgroup_tree_mutex which nests outside kernfs
+	 * active_ref.  cgroup_lock_live_group() already provides enough
+	 * protection.  Ensure @cgrp stays accessible and break the
+	 * active_ref protection.
+	 */
+	cgroup_get(cgrp);
+	kernfs_break_active_protection(cgrp->control_kn);
+retry:
+	enable = enable_req;
+	disable = disable_req;
+
+	mutex_lock(&cgroup_tree_mutex);
+
+	for_each_subsys(ss, ssid) {
+		if (enable & (1 << ssid)) {
+			if (cgrp->child_subsys_mask & (1 << ssid)) {
+				enable &= ~(1 << ssid);
+				continue;
+			}
+
+			/*
+			 * Because css offlining is asynchronous, userland
+			 * might try to re-enable the same controller while
+			 * the previous instance is still around.  In such
+			 * cases, wait till it's gone using offline_waitq.
+			 */
+			cgroup_for_each_live_child(child, cgrp) {
+				wait_queue_t wait;
+
+				if (!cgroup_css(child, ss))
+					continue;
+
+				prepare_to_wait(&child->offline_waitq, &wait,
+						TASK_UNINTERRUPTIBLE);
+				mutex_unlock(&cgroup_tree_mutex);
+				schedule();
+				finish_wait(&child->offline_waitq, &wait);
+				goto retry;
+			}
+
+			/* unavailable or not enabled on the parent? */
+			if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
+			    (cgrp->parent &&
+			     !(cgrp->parent->child_subsys_mask & (1 << ssid)))) {
+				ret = -ENOENT;
+				goto out_unlock_tree;
+			}
+		} else if (disable & (1 << ssid)) {
+			if (!(cgrp->child_subsys_mask & (1 << ssid))) {
+				disable &= ~(1 << ssid);
+				continue;
+			}
+
+			/* a child has it enabled? */
+			cgroup_for_each_live_child(child, cgrp) {
+				if (child->child_subsys_mask & (1 << ssid)) {
+					ret = -EBUSY;
+					goto out_unlock_tree;
+				}
+			}
+		}
+	}
+
+	if (!enable && !disable) {
+		ret = 0;
+		goto out_unlock_tree;
+	}
+
+	if (!cgroup_lock_live_group(cgrp)) {
+		ret = -ENODEV;
+		goto out_unlock_tree;
+	}
+
+	/*
+	 * Except for the root, child_subsys_mask must be zero for a cgroup
+	 * with tasks so that child cgroups don't compete against tasks.
+	 */
+	if (enable && cgrp->parent && !list_empty(&cgrp->cset_links)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	/*
+	 * Create csses for enables and update child_subsys_mask.  This
+	 * changes cgroup_e_css() results which in turn makes the
+	 * subsequent cgroup_update_dfl_csses() associate all tasks in the
+	 * subtree to the updated csses.
+	 */
+	for_each_subsys(ss, ssid) {
+		if (!(enable & (1 << ssid)))
+			continue;
+
+		cgroup_for_each_live_child(child, cgrp) {
+			ret = create_css(child, ss);
+			if (ret)
+				goto err_undo_css;
+		}
+	}
+
+	cgrp->child_subsys_mask |= enable;
+	cgrp->child_subsys_mask &= ~disable;
+
+	ret = cgroup_update_dfl_csses(cgrp);
+	if (ret)
+		goto err_undo_css;
+
+	/* all tasks are now migrated away from the old csses, kill them */
+	for_each_subsys(ss, ssid) {
+		if (!(disable & (1 << ssid)))
+			continue;
+
+		cgroup_for_each_live_child(child, cgrp)
+			kill_css(cgroup_css(child, ss));
+	}
+
+	kernfs_activate(cgrp->kn);
+	ret = 0;
+out_unlock:
+	mutex_unlock(&cgroup_mutex);
+out_unlock_tree:
+	mutex_unlock(&cgroup_tree_mutex);
+	kernfs_unbreak_active_protection(cgrp->control_kn);
+	cgroup_put(cgrp);
+	return ret;
+
+err_undo_css:
+	cgrp->child_subsys_mask &= ~enable;
+	cgrp->child_subsys_mask |= disable;
+
+	for_each_subsys(ss, ssid) {
+		if (!(enable & (1 << ssid)))
+			continue;
+
+		cgroup_for_each_live_child(child, cgrp) {
+			struct cgroup_subsys_state *css = cgroup_css(child, ss);
+			if (css)
+				kill_css(css);
+		}
+	}
+	goto out_unlock;
+}
+
 static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 				 size_t nbytes, loff_t off)
 {
@@ -2462,9 +2802,14 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
 		return PTR_ERR(kn);
 
 	ret = cgroup_kn_set_ugid(kn);
-	if (ret)
+	if (ret) {
 		kernfs_remove(kn);
-	return ret;
+		return ret;
+	}
+
+	if (cft->seq_show == cgroup_subtree_control_show)
+		cgrp->control_kn = kn;
+	return 0;
 }
 
 /**
@@ -3557,6 +3902,22 @@ static struct cftype cgroup_base_files[] = {
 		.flags = CFTYPE_ONLY_ON_ROOT,
 		.seq_show = cgroup_sane_behavior_show,
 	},
+	{
+		.name = "cgroup.controllers",
+		.flags = CFTYPE_ONLY_ON_DFL | CFTYPE_ONLY_ON_ROOT,
+		.seq_show = cgroup_root_controllers_show,
+	},
+	{
+		.name = "cgroup.controllers",
+		.flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT,
+		.seq_show = cgroup_controllers_show,
+	},
+	{
+		.name = "cgroup.subtree_control",
+		.flags = CFTYPE_ONLY_ON_DFL,
+		.seq_show = cgroup_subtree_control_show,
+		.write_string = cgroup_subtree_control_write,
+	},
 
 	/*
 	 * Historical crazy stuff.  These don't have "cgroup."  prefix and
@@ -3725,6 +4086,8 @@ static void offline_css(struct cgroup_subsys_state *css)
 	css->flags &= ~CSS_ONLINE;
 	css->cgroup->nr_css--;
 	RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
+
+	wake_up_all(&css->cgroup->offline_waitq);
 }
 
 /**
-- 
cgit v1.2.3-71-gd317


From be8f274323c26ddc7e6fd6c44254b7abcdbe6389 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Thu, 17 Apr 2014 17:16:58 +0900
Subject: kprobes: Prohibit probing on .entry.text code

.entry.text is a code area which is used for interrupt/syscall
entries, which includes many sensitive code.
Thus, it is better to prohibit probing on all of such code
instead of a part of that.
Since some symbols are already registered on kprobe blacklist,
this also removes them from the blacklist.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Jonathan Lebon <jlebon@redhat.com>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Link: http://lkml.kernel.org/r/20140417081658.26341.57354.stgit@ltc230.yrl.intra.hitachi.co.jp
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/entry_32.S     | 33 ---------------------------------
 arch/x86/kernel/entry_64.S     | 20 --------------------
 arch/x86/kernel/kprobes/core.c |  8 ++++++++
 include/linux/kprobes.h        |  1 +
 kernel/kprobes.c               | 13 ++++++++-----
 5 files changed, 17 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index a2a4f4697889..0ca5bf1697bb 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -314,10 +314,6 @@ ENTRY(ret_from_kernel_thread)
 	CFI_ENDPROC
 ENDPROC(ret_from_kernel_thread)
 
-/*
- * Interrupt exit functions should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 /*
  * Return to user mode is not as complex as all this looks,
  * but we want the default path for a system call return to
@@ -372,10 +368,6 @@ need_resched:
 END(resume_kernel)
 #endif
 	CFI_ENDPROC
-/*
- * End of kprobes section
- */
-	.popsection
 
 /* SYSENTER_RETURN points to after the "sysenter" instruction in
    the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
@@ -495,10 +487,6 @@ sysexit_audit:
 	PTGS_TO_GS_EX
 ENDPROC(ia32_sysenter_target)
 
-/*
- * syscall stub including irq exit should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 	# system call handler stub
 ENTRY(system_call)
 	RING0_INT_FRAME			# can't unwind into user space anyway
@@ -691,10 +679,6 @@ syscall_badsys:
 	jmp resume_userspace
 END(syscall_badsys)
 	CFI_ENDPROC
-/*
- * End of kprobes section
- */
-	.popsection
 
 .macro FIXUP_ESPFIX_STACK
 /*
@@ -781,10 +765,6 @@ common_interrupt:
 ENDPROC(common_interrupt)
 	CFI_ENDPROC
 
-/*
- *  Irq entries should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 #define BUILD_INTERRUPT3(name, nr, fn)	\
 ENTRY(name)				\
 	RING0_INT_FRAME;		\
@@ -961,10 +941,6 @@ ENTRY(spurious_interrupt_bug)
 	jmp error_code
 	CFI_ENDPROC
 END(spurious_interrupt_bug)
-/*
- * End of kprobes section
- */
-	.popsection
 
 #ifdef CONFIG_XEN
 /* Xen doesn't set %esp to be precisely what the normal sysenter
@@ -1239,11 +1215,6 @@ return_to_handler:
 	jmp *%ecx
 #endif
 
-/*
- * Some functions should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
-
 #ifdef CONFIG_TRACING
 ENTRY(trace_page_fault)
 	RING0_EC_FRAME
@@ -1453,7 +1424,3 @@ ENTRY(async_page_fault)
 END(async_page_fault)
 #endif
 
-/*
- * End of kprobes section
- */
-	.popsection
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1e96c3628bf2..43bb38951660 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -487,8 +487,6 @@ ENDPROC(native_usergs_sysret64)
 	TRACE_IRQS_OFF
 	.endm
 
-/* save complete stack frame */
-	.pushsection .kprobes.text, "ax"
 ENTRY(save_paranoid)
 	XCPT_FRAME 1 RDI+8
 	cld
@@ -517,7 +515,6 @@ ENTRY(save_paranoid)
 1:	ret
 	CFI_ENDPROC
 END(save_paranoid)
-	.popsection
 
 /*
  * A newly forked process directly context switches into this address.
@@ -975,10 +972,6 @@ END(interrupt)
 	call \func
 	.endm
 
-/*
- * Interrupt entry/exit should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 	/*
 	 * The interrupt stubs push (~vector+0x80) onto the stack and
 	 * then jump to common_interrupt.
@@ -1113,10 +1106,6 @@ ENTRY(retint_kernel)
 
 	CFI_ENDPROC
 END(common_interrupt)
-/*
- * End of kprobes section
- */
-       .popsection
 
 /*
  * APIC interrupts.
@@ -1477,11 +1466,6 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
 	hyperv_callback_vector hyperv_vector_handler
 #endif /* CONFIG_HYPERV */
 
-/*
- * Some functions should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
-
 paranoidzeroentry_ist debug do_debug DEBUG_STACK
 paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
 paranoiderrorentry stack_segment do_stack_segment
@@ -1898,7 +1882,3 @@ ENTRY(ignore_sysret)
 	CFI_ENDPROC
 END(ignore_sysret)
 
-/*
- * End of kprobes section
- */
-	.popsection
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index da7bdaa3ce15..7751b3dee53a 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1065,6 +1065,14 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+	return  (addr >= (unsigned long)__kprobes_text_start &&
+		 addr < (unsigned long)__kprobes_text_end) ||
+		(addr >= (unsigned long)__entry_text_start &&
+		 addr < (unsigned long)__entry_text_end);
+}
+
 int __init arch_init_kprobes(void)
 {
 	return 0;
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 925eaf28fca9..cdf9251f8249 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -265,6 +265,7 @@ extern void arch_disarm_kprobe(struct kprobe *p);
 extern int arch_init_kprobes(void);
 extern void show_registers(struct pt_regs *regs);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
+extern bool arch_within_kprobe_blacklist(unsigned long addr);
 
 struct kprobe_insn_cache {
 	struct mutex mutex;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ceeadfcabb76..5b5ac76671e7 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -96,9 +96,6 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
 static struct kprobe_blackpoint kprobe_blacklist[] = {
 	{"preempt_schedule",},
 	{"native_get_debugreg",},
-	{"irq_entries_start",},
-	{"common_interrupt",},
-	{"mcount",},	/* mcount can be called from everywhere */
 	{NULL}    /* Terminator */
 };
 
@@ -1324,12 +1321,18 @@ out:
 	return ret;
 }
 
+bool __weak arch_within_kprobe_blacklist(unsigned long addr)
+{
+	/* The __kprobes marked functions and entry code must not be probed */
+	return addr >= (unsigned long)__kprobes_text_start &&
+	       addr < (unsigned long)__kprobes_text_end;
+}
+
 static int __kprobes in_kprobes_functions(unsigned long addr)
 {
 	struct kprobe_blackpoint *kb;
 
-	if (addr >= (unsigned long)__kprobes_text_start &&
-	    addr < (unsigned long)__kprobes_text_end)
+	if (arch_within_kprobe_blacklist(addr))
 		return -EINVAL;
 	/*
 	 * If there exists a kprobe_blacklist, verify and
-- 
cgit v1.2.3-71-gd317


From 376e242429bf8539ef39a080ac113c8799840b13 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Thu, 17 Apr 2014 17:17:05 +0900
Subject: kprobes: Introduce NOKPROBE_SYMBOL() macro to maintain kprobes
 blacklist
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce NOKPROBE_SYMBOL() macro which builds a kprobes
blacklist at kernel build time.

The usage of this macro is similar to EXPORT_SYMBOL(),
placed after the function definition:

  NOKPROBE_SYMBOL(function);

Since this macro will inhibit inlining of static/inline
functions, this patch also introduces a nokprobe_inline macro
for static/inline functions. In this case, we must use
NOKPROBE_SYMBOL() for the inline function caller.

When CONFIG_KPROBES=y, the macro stores the given function
address in the "_kprobe_blacklist" section.

Since the data structures are not fully initialized by the
macro (because there is no "size" information),  those
are re-initialized at boot time by using kallsyms.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Link: http://lkml.kernel.org/r/20140417081705.26341.96719.stgit@ltc230.yrl.intra.hitachi.co.jp
Cc: Alok Kataria <akataria@vmware.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christopher Li <sparse@chrisli.org>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jan-Simon Möller <dl9pf@gmx.de>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: linux-arch@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-sparse@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/kprobes.txt         |  16 +++++-
 arch/x86/include/asm/asm.h        |   7 +++
 arch/x86/kernel/paravirt.c        |   4 ++
 include/asm-generic/vmlinux.lds.h |   9 ++++
 include/linux/compiler.h          |   2 +
 include/linux/kprobes.h           |  20 ++++++--
 kernel/kprobes.c                  | 100 ++++++++++++++++++++------------------
 kernel/sched/core.c               |   1 +
 8 files changed, 107 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 0cfb00fd86ff..4bbeca8483ed 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -22,8 +22,9 @@ Appendix B: The kprobes sysctl interface
 
 Kprobes enables you to dynamically break into any kernel routine and
 collect debugging and performance information non-disruptively. You
-can trap at almost any kernel code address, specifying a handler
+can trap at almost any kernel code address(*), specifying a handler
 routine to be invoked when the breakpoint is hit.
+(*: some parts of the kernel code can not be trapped, see 1.5 Blacklist)
 
 There are currently three types of probes: kprobes, jprobes, and
 kretprobes (also called return probes).  A kprobe can be inserted
@@ -273,6 +274,19 @@ using one of the following techniques:
  or
 - Execute 'sysctl -w debug.kprobes_optimization=n'
 
+1.5 Blacklist
+
+Kprobes can probe most of the kernel except itself. This means
+that there are some functions where kprobes cannot probe. Probing
+(trapping) such functions can cause a recursive trap (e.g. double
+fault) or the nested probe handler may never be called.
+Kprobes manages such functions as a blacklist.
+If you want to add a function into the blacklist, you just need
+to (1) include linux/kprobes.h and (2) use NOKPROBE_SYMBOL() macro
+to specify a blacklisted function.
+Kprobes checks the given probe address against the blacklist and
+rejects registering it, if the given address is in the blacklist.
+
 2. Architectures Supported
 
 Kprobes, jprobes, and return probes are implemented on the following
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 4582e8e1cd1a..7730c1c5c83a 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -57,6 +57,12 @@
 	.long (from) - . ;					\
 	.long (to) - . + 0x7ffffff0 ;				\
 	.popsection
+
+# define _ASM_NOKPROBE(entry)					\
+	.pushsection "_kprobe_blacklist","aw" ;			\
+	_ASM_ALIGN ;						\
+	_ASM_PTR (entry);					\
+	.popsection
 #else
 # define _ASM_EXTABLE(from,to)					\
 	" .pushsection \"__ex_table\",\"a\"\n"			\
@@ -71,6 +77,7 @@
 	" .long (" #from ") - .\n"				\
 	" .long (" #to ") - . + 0x7ffffff0\n"			\
 	" .popsection\n"
+/* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 
 #endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1b10af835c31..e136869ae42e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -23,6 +23,7 @@
 #include <linux/efi.h>
 #include <linux/bcd.h>
 #include <linux/highmem.h>
+#include <linux/kprobes.h>
 
 #include <asm/bug.h>
 #include <asm/paravirt.h>
@@ -389,6 +390,9 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
 	.end_context_switch = paravirt_nop,
 };
 
+/* At this point, native_get_debugreg has a real function entry */
+NOKPROBE_SYMBOL(native_get_debugreg);
+
 struct pv_apic_ops pv_apic_ops = {
 #ifdef CONFIG_X86_LOCAL_APIC
 	.startup_ipi_hook = paravirt_nop,
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 146e4fffd710..40ceb3ceba79 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -109,6 +109,14 @@
 #define BRANCH_PROFILE()
 #endif
 
+#ifdef CONFIG_KPROBES
+#define KPROBE_BLACKLIST()	VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \
+				*(_kprobe_blacklist)			      \
+				VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .;
+#else
+#define KPROBE_BLACKLIST()
+#endif
+
 #ifdef CONFIG_EVENT_TRACING
 #define FTRACE_EVENTS()	. = ALIGN(8);					\
 			VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
@@ -507,6 +515,7 @@
 	*(.init.rodata)							\
 	FTRACE_EVENTS()							\
 	TRACE_SYSCALLS()						\
+	KPROBE_BLACKLIST()						\
 	MEM_DISCARD(init.rodata)					\
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index ee7239ea1583..0300c0f5c88b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -374,7 +374,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
 #ifdef CONFIG_KPROBES
 # define __kprobes	__attribute__((__section__(".kprobes.text")))
+# define nokprobe_inline	__always_inline
 #else
 # define __kprobes
+# define nokprobe_inline	inline
 #endif
 #endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index cdf9251f8249..e059507c465d 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -205,10 +205,10 @@ struct kretprobe_blackpoint {
 	void *addr;
 };
 
-struct kprobe_blackpoint {
-	const char *name;
+struct kprobe_blacklist_entry {
+	struct list_head list;
 	unsigned long start_addr;
-	unsigned long range;
+	unsigned long end_addr;
 };
 
 #ifdef CONFIG_KPROBES
@@ -477,4 +477,18 @@ static inline int enable_jprobe(struct jprobe *jp)
 	return enable_kprobe(&jp->kp);
 }
 
+#ifdef CONFIG_KPROBES
+/*
+ * Blacklist ganerating macro. Specify functions which is not probed
+ * by using this macro.
+ */
+#define __NOKPROBE_SYMBOL(fname)			\
+static unsigned long __used				\
+	__attribute__((section("_kprobe_blacklist")))	\
+	_kbl_addr_##fname = (unsigned long)fname;
+#define NOKPROBE_SYMBOL(fname)	__NOKPROBE_SYMBOL(fname)
+#else
+#define NOKPROBE_SYMBOL(fname)
+#endif
+
 #endif /* _LINUX_KPROBES_H */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 5b5ac76671e7..5ffc6875d2a7 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -86,18 +86,8 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
 	return &(kretprobe_table_locks[hash].lock);
 }
 
-/*
- * Normally, functions that we'd want to prohibit kprobes in, are marked
- * __kprobes. But, there are cases where such functions already belong to
- * a different section (__sched for preempt_schedule)
- *
- * For such cases, we now have a blacklist
- */
-static struct kprobe_blackpoint kprobe_blacklist[] = {
-	{"preempt_schedule",},
-	{"native_get_debugreg",},
-	{NULL}    /* Terminator */
-};
+/* Blacklist -- list of struct kprobe_blacklist_entry */
+static LIST_HEAD(kprobe_blacklist);
 
 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
 /*
@@ -1328,24 +1318,22 @@ bool __weak arch_within_kprobe_blacklist(unsigned long addr)
 	       addr < (unsigned long)__kprobes_text_end;
 }
 
-static int __kprobes in_kprobes_functions(unsigned long addr)
+static bool __kprobes within_kprobe_blacklist(unsigned long addr)
 {
-	struct kprobe_blackpoint *kb;
+	struct kprobe_blacklist_entry *ent;
 
 	if (arch_within_kprobe_blacklist(addr))
-		return -EINVAL;
+		return true;
 	/*
 	 * If there exists a kprobe_blacklist, verify and
 	 * fail any probe registration in the prohibited area
 	 */
-	for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
-		if (kb->start_addr) {
-			if (addr >= kb->start_addr &&
-			    addr < (kb->start_addr + kb->range))
-				return -EINVAL;
-		}
+	list_for_each_entry(ent, &kprobe_blacklist, list) {
+		if (addr >= ent->start_addr && addr < ent->end_addr)
+			return true;
 	}
-	return 0;
+
+	return false;
 }
 
 /*
@@ -1436,7 +1424,7 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
 
 	/* Ensure it is not in reserved area nor out of text */
 	if (!kernel_text_address((unsigned long) p->addr) ||
-	    in_kprobes_functions((unsigned long) p->addr) ||
+	    within_kprobe_blacklist((unsigned long) p->addr) ||
 	    jump_label_text_reserved(p->addr, p->addr)) {
 		ret = -EINVAL;
 		goto out;
@@ -2022,6 +2010,38 @@ void __kprobes dump_kprobe(struct kprobe *kp)
 	       kp->symbol_name, kp->addr, kp->offset);
 }
 
+/*
+ * Lookup and populate the kprobe_blacklist.
+ *
+ * Unlike the kretprobe blacklist, we'll need to determine
+ * the range of addresses that belong to the said functions,
+ * since a kprobe need not necessarily be at the beginning
+ * of a function.
+ */
+static int __init populate_kprobe_blacklist(unsigned long *start,
+					     unsigned long *end)
+{
+	unsigned long *iter;
+	struct kprobe_blacklist_entry *ent;
+	unsigned long offset = 0, size = 0;
+
+	for (iter = start; iter < end; iter++) {
+		if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) {
+			pr_err("Failed to find blacklist %p\n", (void *)*iter);
+			continue;
+		}
+
+		ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+		if (!ent)
+			return -ENOMEM;
+		ent->start_addr = *iter;
+		ent->end_addr = *iter + size;
+		INIT_LIST_HEAD(&ent->list);
+		list_add_tail(&ent->list, &kprobe_blacklist);
+	}
+	return 0;
+}
+
 /* Module notifier call back, checking kprobes on the module */
 static int __kprobes kprobes_module_callback(struct notifier_block *nb,
 					     unsigned long val, void *data)
@@ -2065,14 +2085,13 @@ static struct notifier_block kprobe_module_nb = {
 	.priority = 0
 };
 
+/* Markers of _kprobe_blacklist section */
+extern unsigned long __start_kprobe_blacklist[];
+extern unsigned long __stop_kprobe_blacklist[];
+
 static int __init init_kprobes(void)
 {
 	int i, err = 0;
-	unsigned long offset = 0, size = 0;
-	char *modname, namebuf[KSYM_NAME_LEN];
-	const char *symbol_name;
-	void *addr;
-	struct kprobe_blackpoint *kb;
 
 	/* FIXME allocate the probe table, currently defined statically */
 	/* initialize all list heads */
@@ -2082,26 +2101,11 @@ static int __init init_kprobes(void)
 		raw_spin_lock_init(&(kretprobe_table_locks[i].lock));
 	}
 
-	/*
-	 * Lookup and populate the kprobe_blacklist.
-	 *
-	 * Unlike the kretprobe blacklist, we'll need to determine
-	 * the range of addresses that belong to the said functions,
-	 * since a kprobe need not necessarily be at the beginning
-	 * of a function.
-	 */
-	for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
-		kprobe_lookup_name(kb->name, addr);
-		if (!addr)
-			continue;
-
-		kb->start_addr = (unsigned long)addr;
-		symbol_name = kallsyms_lookup(kb->start_addr,
-				&size, &offset, &modname, namebuf);
-		if (!symbol_name)
-			kb->range = 0;
-		else
-			kb->range = size;
+	err = populate_kprobe_blacklist(__start_kprobe_blacklist,
+					__stop_kprobe_blacklist);
+	if (err) {
+		pr_err("kprobes: failed to populate blacklist: %d\n", err);
+		pr_err("Please take care of using kprobes.\n");
 	}
 
 	if (kretprobe_blacklist_size) {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 268a45ea238c..6863631e8cd0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2804,6 +2804,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
 		barrier();
 	} while (need_resched());
 }
+NOKPROBE_SYMBOL(preempt_schedule);
 EXPORT_SYMBOL(preempt_schedule);
 #endif /* CONFIG_PREEMPT */
 
-- 
cgit v1.2.3-71-gd317


From f5efc696cc711021cc73e7543cc3038e58459707 Mon Sep 17 00:00:00 2001
From: Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>
Date: Mon, 14 Apr 2014 15:41:28 +0300
Subject: netfilter: nf_tables: Add meta expression key for bridge interface
 name

NFT_META_BRI_IIFNAME to get packet input bridge interface name
NFT_META_BRI_OIFNAME to get packet output bridge interface name

Such meta key are accessible only through NFPROTO_BRIDGE family, on a
dedicated nft meta module: nft_meta_bridge.

Suggested-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |   4 +
 net/bridge/Makefile                      |   2 +-
 net/bridge/netfilter/Kconfig             |  14 +++-
 net/bridge/netfilter/Makefile            |   1 +
 net/bridge/netfilter/nft_meta_bridge.c   | 139 +++++++++++++++++++++++++++++++
 5 files changed, 158 insertions(+), 2 deletions(-)
 create mode 100644 net/bridge/netfilter/nft_meta_bridge.c

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 160159274cab..7d6433f66bf8 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -563,6 +563,8 @@ enum nft_exthdr_attributes {
  * @NFT_META_SECMARK: packet secmark (skb->secmark)
  * @NFT_META_NFPROTO: netfilter protocol
  * @NFT_META_L4PROTO: layer 4 protocol number
+ * @NFT_META_BRI_IIFNAME: packet input bridge interface name
+ * @NFT_META_BRI_OIFNAME: packet output bridge interface name
  */
 enum nft_meta_keys {
 	NFT_META_LEN,
@@ -582,6 +584,8 @@ enum nft_meta_keys {
 	NFT_META_SECMARK,
 	NFT_META_NFPROTO,
 	NFT_META_L4PROTO,
+	NFT_META_BRI_IIFNAME,
+	NFT_META_BRI_OIFNAME,
 };
 
 /**
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index e85498b2f166..906a18b4e74a 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -16,4 +16,4 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
 
 bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o
 
-obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/
+obj-$(CONFIG_BRIDGE_NETFILTER) += netfilter/
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 5ca74a0e595f..3baf29d34e62 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -2,13 +2,25 @@
 # Bridge netfilter configuration
 #
 #
-config NF_TABLES_BRIDGE
+menuconfig NF_TABLES_BRIDGE
 	depends on NF_TABLES
+	select BRIDGE_NETFILTER
 	tristate "Ethernet Bridge nf_tables support"
 
+if NF_TABLES_BRIDGE
+
+config NFT_BRIDGE_META
+	tristate "Netfilter nf_table bridge meta support"
+	depends on NFT_META
+	help
+	  Add support for bridge dedicated meta key.
+
+endif # NF_TABLES_BRIDGE
+
 menuconfig BRIDGE_NF_EBTABLES
 	tristate "Ethernet Bridge tables (ebtables) support"
 	depends on BRIDGE && NETFILTER
+	select BRIDGE_NETFILTER
 	select NETFILTER_XTABLES
 	help
 	  ebtables is a general, extensible frame/packet identification
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index ea7629f58b3d..6f2f3943d66f 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -3,6 +3,7 @@
 #
 
 obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
+obj-$(CONFIG_NFT_BRIDGE_META)  += nft_meta_bridge.o
 
 obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
 
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
new file mode 100644
index 000000000000..4f02109d708f
--- /dev/null
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_meta.h>
+
+#include "../br_private.h"
+
+static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
+				     struct nft_data data[NFT_REG_MAX + 1],
+				     const struct nft_pktinfo *pkt)
+{
+	const struct nft_meta *priv = nft_expr_priv(expr);
+	const struct net_device *in = pkt->in, *out = pkt->out;
+	struct nft_data *dest = &data[priv->dreg];
+	const struct net_bridge_port *p;
+
+	switch (priv->key) {
+	case NFT_META_BRI_IIFNAME:
+		if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
+			goto err;
+		break;
+	case NFT_META_BRI_OIFNAME:
+		if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
+			goto err;
+		break;
+	default:
+		goto out;
+	}
+
+	strncpy((char *)dest->data, p->br->dev->name, sizeof(dest->data));
+	return;
+out:
+	return nft_meta_get_eval(expr, data, pkt);
+err:
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
+				    const struct nft_expr *expr,
+				    const struct nlattr * const tb[])
+{
+	struct nft_meta *priv = nft_expr_priv(expr);
+	int err;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+	switch (priv->key) {
+	case NFT_META_BRI_IIFNAME:
+	case NFT_META_BRI_OIFNAME:
+		break;
+	default:
+		return nft_meta_get_init(ctx, expr, tb);
+	}
+
+	priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+	err = nft_validate_output_register(priv->dreg);
+	if (err < 0)
+		return err;
+
+	err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static struct nft_expr_type nft_meta_bridge_type;
+static const struct nft_expr_ops nft_meta_bridge_get_ops = {
+	.type		= &nft_meta_bridge_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.eval		= nft_meta_bridge_get_eval,
+	.init		= nft_meta_bridge_get_init,
+	.dump		= nft_meta_get_dump,
+};
+
+static const struct nft_expr_ops nft_meta_bridge_set_ops = {
+	.type		= &nft_meta_bridge_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+	.eval		= nft_meta_set_eval,
+	.init		= nft_meta_set_init,
+	.dump		= nft_meta_set_dump,
+};
+
+static const struct nft_expr_ops *
+nft_meta_bridge_select_ops(const struct nft_ctx *ctx,
+			   const struct nlattr * const tb[])
+{
+	if (tb[NFTA_META_KEY] == NULL)
+		return ERR_PTR(-EINVAL);
+
+	if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
+		return ERR_PTR(-EINVAL);
+
+	if (tb[NFTA_META_DREG])
+		return &nft_meta_bridge_get_ops;
+
+	if (tb[NFTA_META_SREG])
+		return &nft_meta_bridge_set_ops;
+
+	return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_meta_bridge_type __read_mostly = {
+	.family         = NFPROTO_BRIDGE,
+	.name           = "meta",
+	.select_ops     = &nft_meta_bridge_select_ops,
+	.policy         = nft_meta_policy,
+	.maxattr        = NFTA_META_MAX,
+	.owner          = THIS_MODULE,
+};
+
+static int __init nft_meta_bridge_module_init(void)
+{
+	return nft_register_expr(&nft_meta_bridge_type);
+}
+
+static void __exit nft_meta_bridge_module_exit(void)
+{
+	nft_unregister_expr(&nft_meta_bridge_type);
+}
+
+module_init(nft_meta_bridge_module_init);
+module_exit(nft_meta_bridge_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_BRIDGE, "meta");
-- 
cgit v1.2.3-71-gd317


From 7e65eac8e36f3f4e2553e83249e3d9bdf055456d Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@suse.com>
Date: Tue, 22 Apr 2014 12:03:58 -0700
Subject: 6lowpan: nuke net_ieee802154_lowpan() accessor when 6lowpan is
 disabled

Johannes noted this is not needed, all of the fragment
accessors don't need CONFIG_NET_NS. This goes test compiled with
CONFIG_BT_6LOWPAN=y and a disabled CONFIG_NET_NS.

CC: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: linux-zigbee-devel@lists.sourceforge.net
Cc: David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index bc4118ede5b5..361d26077196 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -379,15 +379,8 @@ net_ieee802154_lowpan(struct net *net)
 {
 	return &net->ieee802154_lowpan;
 }
-#else
-static inline struct netns_ieee802154_lowpan *
-net_ieee802154_lowpan(struct net *net)
-{
-	return NULL;
-}
 #endif
 
-
 /* For callers who don't really care about whether it's IPv4 or IPv6 */
 static inline void rt_genid_bump_all(struct net *net)
 {
-- 
cgit v1.2.3-71-gd317


From f01ec1c017dead42092997a2b8684fcab4cbf126 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 24 Apr 2014 10:02:49 +0200
Subject: vxlan: add x-netns support

This patch allows to switch the netns when packet is encapsulated or
decapsulated.
The vxlan socket is openned into the i/o netns, ie into the netns where
encapsulated packets are received. The socket lookup is done into this netns to
find the corresponding vxlan tunnel. After decapsulation, the packet is
injecting into the corresponding interface which may stand to another netns.

When one of the two netns is removed, the tunnel is destroyed.

Configuration example:
ip netns add netns1
ip netns exec netns1 ip link set lo up
ip link add vxlan10 type vxlan id 10 group 239.0.0.10 dev eth0 dstport 0
ip link set vxlan10 netns netns1
ip netns exec netns1 ip addr add 192.168.0.249/24 broadcast 192.168.0.255 dev vxlan10
ip netns exec netns1 ip link set vxlan10 up

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c           | 63 ++++++++++++++++++++++++++++++++-----------
 include/net/vxlan.h           |  2 +-
 net/openvswitch/vport-vxlan.c |  3 ++-
 3 files changed, 50 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 4dbb2ed85b97..1dfee9a7fbf7 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -127,6 +127,7 @@ struct vxlan_dev {
 	struct list_head  next;		/* vxlan's per namespace list */
 	struct vxlan_sock *vn_sock;	/* listening socket */
 	struct net_device *dev;
+	struct net	  *net;		/* netns for packet i/o */
 	struct vxlan_rdst default_dst;	/* default destination */
 	union vxlan_addr  saddr;	/* source address */
 	__be16		  dst_port;
@@ -1203,6 +1204,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
 
 	remote_ip = &vxlan->default_dst.remote_ip;
 	skb_reset_mac_header(skb);
+	skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
 	skb->protocol = eth_type_trans(skb, vxlan->dev);
 
 	/* Ignore packet loops (and multicast echo) */
@@ -1618,7 +1620,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 			   struct dst_entry *dst, struct sk_buff *skb,
 			   struct net_device *dev, struct in6_addr *saddr,
 			   struct in6_addr *daddr, __u8 prio, __u8 ttl,
-			   __be16 src_port, __be16 dst_port, __be32 vni)
+			   __be16 src_port, __be16 dst_port, __be32 vni,
+			   bool xnet)
 {
 	struct ipv6hdr *ip6h;
 	struct vxlanhdr *vxh;
@@ -1631,7 +1634,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 		skb->encapsulation = 1;
 	}
 
-	skb_scrub_packet(skb, false);
+	skb_scrub_packet(skb, xnet);
 
 	min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
 			+ VXLAN_HLEN + sizeof(struct ipv6hdr)
@@ -1711,7 +1714,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni)
+		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
 {
 	struct vxlanhdr *vxh;
 	struct udphdr *uh;
@@ -1760,7 +1763,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		return err;
 
 	return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
-			     tos, ttl, df, false);
+			     tos, ttl, df, xnet);
 }
 EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
 
@@ -1853,7 +1856,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		fl4.daddr = dst->sin.sin_addr.s_addr;
 		fl4.saddr = vxlan->saddr.sin.sin_addr.s_addr;
 
-		rt = ip_route_output_key(dev_net(dev), &fl4);
+		rt = ip_route_output_key(vxlan->net, &fl4);
 		if (IS_ERR(rt)) {
 			netdev_dbg(dev, "no route to %pI4\n",
 				   &dst->sin.sin_addr.s_addr);
@@ -1874,7 +1877,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 			struct vxlan_dev *dst_vxlan;
 
 			ip_rt_put(rt);
-			dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port);
+			dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port);
 			if (!dst_vxlan)
 				goto tx_error;
 			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1887,7 +1890,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
 				     fl4.saddr, dst->sin.sin_addr.s_addr,
 				     tos, ttl, df, src_port, dst_port,
-				     htonl(vni << 8));
+				     htonl(vni << 8),
+				     !net_eq(vxlan->net, dev_net(vxlan->dev)));
 
 		if (err < 0)
 			goto rt_tx_error;
@@ -1927,7 +1931,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 			struct vxlan_dev *dst_vxlan;
 
 			dst_release(ndst);
-			dst_vxlan = vxlan_find_vni(dev_net(dev), vni, dst_port);
+			dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port);
 			if (!dst_vxlan)
 				goto tx_error;
 			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1938,7 +1942,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 		err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
 				      dev, &fl6.saddr, &fl6.daddr, 0, ttl,
-				      src_port, dst_port, htonl(vni << 8));
+				      src_port, dst_port, htonl(vni << 8),
+				      !net_eq(vxlan->net, dev_net(vxlan->dev)));
 #endif
 	}
 
@@ -2082,7 +2087,7 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
 static int vxlan_init(struct net_device *dev)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
-	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
+	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
 	struct vxlan_sock *vs;
 
 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
@@ -2090,7 +2095,7 @@ static int vxlan_init(struct net_device *dev)
 		return -ENOMEM;
 
 	spin_lock(&vn->sock_lock);
-	vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port);
+	vs = vxlan_find_sock(vxlan->net, vxlan->dst_port);
 	if (vs) {
 		/* If we have a socket with same port already, reuse it */
 		atomic_inc(&vs->refcnt);
@@ -2172,8 +2177,8 @@ static void vxlan_flush(struct vxlan_dev *vxlan)
 /* Cleanup timer and forwarding table on shutdown */
 static int vxlan_stop(struct net_device *dev)
 {
-	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
 	struct vxlan_sock *vs = vxlan->vn_sock;
 
 	if (vs && vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
@@ -2202,7 +2207,7 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
 	struct net_device *lowerdev;
 	int max_mtu;
 
-	lowerdev = __dev_get_by_index(dev_net(dev), dst->remote_ifindex);
+	lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex);
 	if (lowerdev == NULL)
 		return eth_change_mtu(dev, new_mtu);
 
@@ -2285,7 +2290,6 @@ static void vxlan_setup(struct net_device *dev)
 
 	dev->tx_queue_len = 0;
 	dev->features	|= NETIF_F_LLTX;
-	dev->features	|= NETIF_F_NETNS_LOCAL;
 	dev->features	|= NETIF_F_SG | NETIF_F_HW_CSUM;
 	dev->features   |= NETIF_F_RXCSUM;
 	dev->features   |= NETIF_F_GSO_SOFTWARE;
@@ -2578,7 +2582,7 @@ EXPORT_SYMBOL_GPL(vxlan_sock_add);
 static void vxlan_sock_work(struct work_struct *work)
 {
 	struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, sock_work);
-	struct net *net = dev_net(vxlan->dev);
+	struct net *net = vxlan->net;
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 	__be16 port = vxlan->dst_port;
 	struct vxlan_sock *nvs;
@@ -2605,6 +2609,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 	if (!data[IFLA_VXLAN_ID])
 		return -EINVAL;
 
+	vxlan->net = dev_net(dev);
+
 	vni = nla_get_u32(data[IFLA_VXLAN_ID]);
 	dst->remote_vni = vni;
 
@@ -2739,8 +2745,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 
 static void vxlan_dellink(struct net_device *dev, struct list_head *head)
 {
-	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
 
 	spin_lock(&vn->sock_lock);
 	if (!hlist_unhashed(&vxlan->hlist))
@@ -2905,8 +2911,33 @@ static __net_init int vxlan_init_net(struct net *net)
 	return 0;
 }
 
+static void __net_exit vxlan_exit_net(struct net *net)
+{
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	struct vxlan_dev *vxlan, *next;
+	struct net_device *dev, *aux;
+	LIST_HEAD(list);
+
+	rtnl_lock();
+	for_each_netdev_safe(net, dev, aux)
+		if (dev->rtnl_link_ops == &vxlan_link_ops)
+			unregister_netdevice_queue(dev, &list);
+
+	list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
+		/* If vxlan->dev is in the same netns, it has already been added
+		 * to the list by the previous loop.
+		 */
+		if (!net_eq(dev_net(vxlan->dev), net))
+			unregister_netdevice_queue(dev, &list);
+	}
+
+	unregister_netdevice_many(&list);
+	rtnl_unlock();
+}
+
 static struct pernet_operations vxlan_net_ops = {
 	.init = vxlan_init_net,
+	.exit = vxlan_exit_net,
 	.id   = &vxlan_net_id,
 	.size = sizeof(struct vxlan_net),
 };
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 5deef1ae78c9..7bb4084b1bd0 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -33,7 +33,7 @@ void vxlan_sock_release(struct vxlan_sock *vs);
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni);
+		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
 
 __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb);
 
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index e797a50ac2be..21cceb3bdf78 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -180,7 +180,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 			     OVS_CB(skb)->tun_key->ipv4_tos,
 			     OVS_CB(skb)->tun_key->ipv4_ttl, df,
 			     src_port, dst_port,
-			     htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8));
+			     htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8),
+			     false);
 	if (err < 0)
 		ip_rt_put(rt);
 error:
-- 
cgit v1.2.3-71-gd317


From 65a124dd719d6e90591e4756bb04e1719489705e Mon Sep 17 00:00:00 2001
From: Michal Kazior <michal.kazior@tieto.com>
Date: Wed, 9 Apr 2014 15:29:22 +0200
Subject: cfg80211: allow drivers to iterate over matching combinations

The patch splits cfg80211_check_combinations()
into an iterator function and a simple iteration
user.

This makes it possible for drivers to asses how
many channels can use given iftype setup. This in
turn can be used for future
multi-interface/multi-channel channel switching.

Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 27 +++++++++++++++++++++++++++
 net/wireless/util.c    | 44 +++++++++++++++++++++++++++++++++++++-------
 2 files changed, 64 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9496fe5ea6b4..3dd2cb465540 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4715,6 +4715,33 @@ int cfg80211_check_combinations(struct wiphy *wiphy,
 				const u8 radar_detect,
 				const int iftype_num[NUM_NL80211_IFTYPES]);
 
+/**
+ * cfg80211_iter_combinations - iterate over matching combinations
+ *
+ * @wiphy: the wiphy
+ * @num_different_channels: the number of different channels we want
+ *	to use for verification
+ * @radar_detect: a bitmap where each bit corresponds to a channel
+ *	width where radar detection is needed, as in the definition of
+ *	&struct ieee80211_iface_combination.@radar_detect_widths
+ * @iftype_num: array with the numbers of interfaces of each interface
+ *	type.  The index is the interface type as specified in &enum
+ *	nl80211_iftype.
+ * @iter: function to call for each matching combination
+ * @data: pointer to pass to iter function
+ *
+ * This function can be called by the driver to check what possible
+ * combinations it fits in at a given moment, e.g. for channel switching
+ * purposes.
+ */
+int cfg80211_iter_combinations(struct wiphy *wiphy,
+			       const int num_different_channels,
+			       const u8 radar_detect,
+			       const int iftype_num[NUM_NL80211_IFTYPES],
+			       void (*iter)(const struct ieee80211_iface_combination *c,
+					    void *data),
+			       void *data);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/wireless/util.c b/net/wireless/util.c
index d032a31828f1..d8b599575a5e 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1263,10 +1263,13 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
 	return res;
 }
 
-int cfg80211_check_combinations(struct wiphy *wiphy,
-				const int num_different_channels,
-				const u8 radar_detect,
-				const int iftype_num[NUM_NL80211_IFTYPES])
+int cfg80211_iter_combinations(struct wiphy *wiphy,
+			       const int num_different_channels,
+			       const u8 radar_detect,
+			       const int iftype_num[NUM_NL80211_IFTYPES],
+			       void (*iter)(const struct ieee80211_iface_combination *c,
+					    void *data),
+			       void *data)
 {
 	int i, j, iftype;
 	int num_interfaces = 0;
@@ -1323,13 +1326,40 @@ int cfg80211_check_combinations(struct wiphy *wiphy,
 		/* This combination covered all interface types and
 		 * supported the requested numbers, so we're good.
 		 */
-		kfree(limits);
-		return 0;
+
+		(*iter)(c, data);
  cont:
 		kfree(limits);
 	}
 
-	return -EBUSY;
+	return 0;
+}
+EXPORT_SYMBOL(cfg80211_iter_combinations);
+
+static void
+cfg80211_iter_sum_ifcombs(const struct ieee80211_iface_combination *c,
+			  void *data)
+{
+	int *num = data;
+	(*num)++;
+}
+
+int cfg80211_check_combinations(struct wiphy *wiphy,
+				const int num_different_channels,
+				const u8 radar_detect,
+				const int iftype_num[NUM_NL80211_IFTYPES])
+{
+	int err, num = 0;
+
+	err = cfg80211_iter_combinations(wiphy, num_different_channels,
+					 radar_detect, iftype_num,
+					 cfg80211_iter_sum_ifcombs, &num);
+	if (err)
+		return err;
+	if (num == 0)
+		return -EBUSY;
+
+	return 0;
 }
 EXPORT_SYMBOL(cfg80211_check_combinations);
 
-- 
cgit v1.2.3-71-gd317


From 17d38fa8c20a9c3ec76943da46264ce657ac56d0 Mon Sep 17 00:00:00 2001
From: Marek Kwaczynski <marek.kwaczynski@tieto.com>
Date: Mon, 14 Apr 2014 11:27:21 +0200
Subject: mac80211: add option to generate CCMP IVs only for mgmt frames

Some chips can encrypt managment frames in HW, but
require generated IV in the frame. Add a key flag
that allows us to achieve this.

Signed-off-by: Marek Kwaczynski <marek.kwaczynski@tieto.com>
[use BIT(0) to fill that spot, fix indentation]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 16 ++++++++++------
 net/mac80211/wpa.c     |  5 ++++-
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a3044e124229..451c1bf00df9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1202,14 +1202,18 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  *	fall back to software crypto. Note that this flag deals only with
  *	RX, if your crypto engine can't deal with TX you can also set the
  *	%IEEE80211_KEY_FLAG_SW_MGMT_TX flag to encrypt such frames in SW.
+ * @IEEE80211_KEY_FLAG_GENERATE_IV_MGMT: This flag should be set by the
+ *	driver for a CCMP key to indicate that is requires IV generation
+ *	only for managment frames (MFP).
  */
 enum ieee80211_key_flags {
-	IEEE80211_KEY_FLAG_GENERATE_IV	= 1<<1,
-	IEEE80211_KEY_FLAG_GENERATE_MMIC= 1<<2,
-	IEEE80211_KEY_FLAG_PAIRWISE	= 1<<3,
-	IEEE80211_KEY_FLAG_SW_MGMT_TX	= 1<<4,
-	IEEE80211_KEY_FLAG_PUT_IV_SPACE = 1<<5,
-	IEEE80211_KEY_FLAG_RX_MGMT	= 1<<6,
+	IEEE80211_KEY_FLAG_GENERATE_IV_MGMT	= BIT(0),
+	IEEE80211_KEY_FLAG_GENERATE_IV		= BIT(1),
+	IEEE80211_KEY_FLAG_GENERATE_MMIC	= BIT(2),
+	IEEE80211_KEY_FLAG_PAIRWISE		= BIT(3),
+	IEEE80211_KEY_FLAG_SW_MGMT_TX		= BIT(4),
+	IEEE80211_KEY_FLAG_PUT_IV_SPACE		= BIT(5),
+	IEEE80211_KEY_FLAG_RX_MGMT		= BIT(6),
 };
 
 /**
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index b8600e3c29c8..9b3dcc201145 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -406,7 +406,10 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 
 	if (info->control.hw_key &&
 	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) &&
-	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) {
+	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) &&
+	    !((info->control.hw_key->flags &
+	       IEEE80211_KEY_FLAG_GENERATE_IV_MGMT) &&
+	      ieee80211_is_mgmt(hdr->frame_control))) {
 		/*
 		 * hwaccel has no need for preallocated room for CCMP
 		 * header or MIC fields
-- 
cgit v1.2.3-71-gd317


From ea077c1cea36a6b5ded1256dcd56c72ff2a22c62 Mon Sep 17 00:00:00 2001
From: Rostislav Lisovy <lisovy@gmail.com>
Date: Tue, 15 Apr 2014 14:37:55 +0200
Subject: cfg80211: Add attributes describing prohibited channel bandwidth

Since there are frequency bands (e.g. 5.9GHz) allowing channels
with only 10 or 5 MHz bandwidth, this patch adds attributes that
allow keeping track about this information.

When channel attributes are reported to user-space, make sure to
not break old tools, i.e. if the 'split wiphy dump' is enabled,
report the extra attributes (if present) describing the bandwidth
restrictions.  If the 'split wiphy dump' is not enabled,
completely omit those channels that have flags set to either
IEEE80211_CHAN_NO_10MHZ or IEEE80211_CHAN_NO_20MHZ.

Add the check for new bandwidth restriction flags in
cfg80211_chandef_usable() to comply with the restrictions.

Signed-off-by: Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  6 ++++++
 include/uapi/linux/nl80211.h |  6 ++++++
 net/wireless/chan.c          |  2 ++
 net/wireless/nl80211.c       | 13 +++++++++++++
 4 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 3dd2cb465540..c98cf08538b9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -111,6 +111,10 @@ enum ieee80211_band {
  *	restrictions.
  * @IEEE80211_CHAN_INDOOR_ONLY: see %NL80211_FREQUENCY_ATTR_INDOOR_ONLY
  * @IEEE80211_CHAN_GO_CONCURRENT: see %NL80211_FREQUENCY_ATTR_GO_CONCURRENT
+ * @IEEE80211_CHAN_NO_20MHZ: 20 MHz bandwidth is not permitted
+ *	on this channel.
+ * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted
+ *	on this channel.
  *
  */
 enum ieee80211_channel_flags {
@@ -125,6 +129,8 @@ enum ieee80211_channel_flags {
 	IEEE80211_CHAN_NO_160MHZ	= 1<<8,
 	IEEE80211_CHAN_INDOOR_ONLY	= 1<<9,
 	IEEE80211_CHAN_GO_CONCURRENT	= 1<<10,
+	IEEE80211_CHAN_NO_20MHZ		= 1<<11,
+	IEEE80211_CHAN_NO_10MHZ		= 1<<12,
 };
 
 #define IEEE80211_CHAN_NO_HT40 \
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 513bfd7b2e5f..0592032ff160 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2358,6 +2358,10 @@ enum nl80211_band_attr {
  *	connected to an AP with DFS and radar detection on the UNII band (it is
  *	up to user-space, i.e., wpa_supplicant to perform the required
  *	verifications)
+ * @NL80211_FREQUENCY_ATTR_NO_20MHZ: 20 MHz operation is not allowed
+ *	on this channel in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_NO_10MHZ: 10 MHz operation is not allowed
+ *	on this channel in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
  *	currently defined
  * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
@@ -2384,6 +2388,8 @@ enum nl80211_frequency_attr {
 	NL80211_FREQUENCY_ATTR_DFS_CAC_TIME,
 	NL80211_FREQUENCY_ATTR_INDOOR_ONLY,
 	NL80211_FREQUENCY_ATTR_GO_CONCURRENT,
+	NL80211_FREQUENCY_ATTR_NO_20MHZ,
+	NL80211_FREQUENCY_ATTR_NO_10MHZ,
 
 	/* keep last */
 	__NL80211_FREQUENCY_ATTR_AFTER_LAST,
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 2adf7b2eccbc..84d686e2dbd0 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -616,12 +616,14 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 		width = 5;
 		break;
 	case NL80211_CHAN_WIDTH_10:
+		prohibited_flags |= IEEE80211_CHAN_NO_10MHZ;
 		width = 10;
 		break;
 	case NL80211_CHAN_WIDTH_20:
 		if (!ht_cap->ht_supported)
 			return false;
 	case NL80211_CHAN_WIDTH_20_NOHT:
+		prohibited_flags |= IEEE80211_CHAN_NO_20MHZ;
 		width = 20;
 		break;
 	case NL80211_CHAN_WIDTH_40:
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fce423a4e96a..ca75f60041d2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -567,6 +567,13 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
 				   struct ieee80211_channel *chan,
 				   bool large)
 {
+	/* Some channels must be completely excluded from the
+	 * list to protect old user-space tools from breaking
+	 */
+	if (!large && chan->flags &
+	    (IEEE80211_CHAN_NO_10MHZ | IEEE80211_CHAN_NO_20MHZ))
+		return 0;
+
 	if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ,
 			chan->center_freq))
 		goto nla_put_failure;
@@ -620,6 +627,12 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
 		if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) &&
 		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT))
 			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ))
+			goto nla_put_failure;
+		if ((chan->flags & IEEE80211_CHAN_NO_10MHZ) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_10MHZ))
+			goto nla_put_failure;
 	}
 
 	if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
-- 
cgit v1.2.3-71-gd317


From 842b597ee0a7e1aa5a3148164ffdba00ec17f614 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 25 Apr 2014 18:28:02 -0400
Subject: cgroup: implement cgroup.populated for the default hierarchy

cgroup users often need a way to determine when a cgroup's
subhierarchy becomes empty so that it can be cleaned up.  cgroup
currently provides release_agent for it; unfortunately, this mechanism
is riddled with issues.

* It delivers events by forking and execing a userland binary
  specified as the release_agent.  This is a long deprecated method of
  notification delivery.  It's extremely heavy, slow and cumbersome to
  integrate with larger infrastructure.

* There is single monitoring point at the root.  There's no way to
  delegate management of a subtree.

* The event isn't recursive.  It triggers when a cgroup doesn't have
  any tasks or child cgroups.  Events for internal nodes trigger only
  after all children are removed.  This again makes it impossible to
  delegate management of a subtree.

* Events are filtered from the kernel side.  "notify_on_release" file
  is used to subscribe to or suppress release event.  This is
  unnecessarily complicated and probably done this way because event
  delivery itself was expensive.

This patch implements interface file "cgroup.populated" which can be
used to monitor whether the cgroup's subhierarchy has tasks in it or
not.  Its value is 0 if there is no task in the cgroup and its
descendants; otherwise, 1, and kernfs_notify() notificaiton is
triggers when the value changes, which can be monitored through poll
and [di]notify.

This is a lot ligther and simpler and trivially allows delegating
management of subhierarchy - subhierarchy monitoring can block further
propgation simply by putting itself or another process in the root of
the subhierarchy and monitor events that it's interested in from there
without interfering with monitoring higher in the tree.

v2: Patch description updated as per Serge.

v3: "cgroup.subtree_populated" renamed to "cgroup.populated".  The
    subtree_ prefix was a bit confusing because
    "cgroup.subtree_control" uses it to denote the tree rooted at the
    cgroup sans the cgroup itself while the populated state includes
    the cgroup itself.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Serge Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Lennart Poettering <lennart@poettering.net>
---
 include/linux/cgroup.h | 15 ++++++++++++
 kernel/cgroup.c        | 65 ++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 76 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ada239253ec7..4b38e2d6110d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -154,6 +154,14 @@ struct cgroup {
 	/* the number of attached css's */
 	int nr_css;
 
+	/*
+	 * If this cgroup contains any tasks, it contributes one to
+	 * populated_cnt.  All children with non-zero popuplated_cnt of
+	 * their own contribute one.  The count is zero iff there's no task
+	 * in this cgroup or its subtree.
+	 */
+	int populated_cnt;
+
 	atomic_t refcnt;
 
 	/*
@@ -166,6 +174,7 @@ struct cgroup {
 	struct cgroup *parent;		/* my parent */
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
 	struct kernfs_node *control_kn;	/* kn for "cgroup.subtree_control" */
+	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
 
 	/*
 	 * Monotonically increasing unique serial number which defines a
@@ -264,6 +273,12 @@ enum {
 	 *
 	 * - "cgroup.clone_children" is removed.
 	 *
+	 * - "cgroup.subtree_populated" is available.  Its value is 0 if
+	 *   the cgroup and its descendants contain no task; otherwise, 1.
+	 *   The file also generates kernfs notification which can be
+	 *   monitored through poll and [di]notify when the value of the
+	 *   file changes.
+	 *
 	 * - If mount is requested with sane_behavior but without any
 	 *   subsystem, the default unified hierarchy is mounted.
 	 *
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 809dd903ceb8..0f986f7afee4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -411,6 +411,43 @@ static struct css_set init_css_set = {
 
 static int css_set_count	= 1;	/* 1 for init_css_set */
 
+/**
+ * cgroup_update_populated - updated populated count of a cgroup
+ * @cgrp: the target cgroup
+ * @populated: inc or dec populated count
+ *
+ * @cgrp is either getting the first task (css_set) or losing the last.
+ * Update @cgrp->populated_cnt accordingly.  The count is propagated
+ * towards root so that a given cgroup's populated_cnt is zero iff the
+ * cgroup and all its descendants are empty.
+ *
+ * @cgrp's interface file "cgroup.populated" is zero if
+ * @cgrp->populated_cnt is zero and 1 otherwise.  When @cgrp->populated_cnt
+ * changes from or to zero, userland is notified that the content of the
+ * interface file has changed.  This can be used to detect when @cgrp and
+ * its descendants become populated or empty.
+ */
+static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
+{
+	lockdep_assert_held(&css_set_rwsem);
+
+	do {
+		bool trigger;
+
+		if (populated)
+			trigger = !cgrp->populated_cnt++;
+		else
+			trigger = !--cgrp->populated_cnt;
+
+		if (!trigger)
+			break;
+
+		if (cgrp->populated_kn)
+			kernfs_notify(cgrp->populated_kn);
+		cgrp = cgrp->parent;
+	} while (cgrp);
+}
+
 /*
  * hash table for cgroup groups. This improves the performance to find
  * an existing css_set. This hash doesn't (currently) take into
@@ -456,10 +493,13 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
 		list_del(&link->cgrp_link);
 
 		/* @cgrp can't go away while we're holding css_set_rwsem */
-		if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) {
-			if (taskexit)
-				set_bit(CGRP_RELEASABLE, &cgrp->flags);
-			check_for_release(cgrp);
+		if (list_empty(&cgrp->cset_links)) {
+			cgroup_update_populated(cgrp, false);
+			if (notify_on_release(cgrp)) {
+				if (taskexit)
+					set_bit(CGRP_RELEASABLE, &cgrp->flags);
+				check_for_release(cgrp);
+			}
 		}
 
 		kfree(link);
@@ -668,7 +708,11 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
 	link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
 	link->cset = cset;
 	link->cgrp = cgrp;
+
+	if (list_empty(&cgrp->cset_links))
+		cgroup_update_populated(cgrp, true);
 	list_move(&link->cset_link, &cgrp->cset_links);
+
 	/*
 	 * Always add links to the tail of the list so that the list
 	 * is sorted by order of hierarchy creation
@@ -2643,6 +2687,12 @@ err_undo_css:
 	goto out_unlock;
 }
 
+static int cgroup_populated_show(struct seq_file *seq, void *v)
+{
+	seq_printf(seq, "%d\n", (bool)seq_css(seq)->cgroup->populated_cnt);
+	return 0;
+}
+
 static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 				 size_t nbytes, loff_t off)
 {
@@ -2809,6 +2859,8 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
 
 	if (cft->seq_show == cgroup_subtree_control_show)
 		cgrp->control_kn = kn;
+	else if (cft->seq_show == cgroup_populated_show)
+		cgrp->populated_kn = kn;
 	return 0;
 }
 
@@ -3918,6 +3970,11 @@ static struct cftype cgroup_base_files[] = {
 		.seq_show = cgroup_subtree_control_show,
 		.write_string = cgroup_subtree_control_write,
 	},
+	{
+		.name = "cgroup.populated",
+		.flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT,
+		.seq_show = cgroup_populated_show,
+	},
 
 	/*
 	 * Historical crazy stuff.  These don't have "cgroup."  prefix and
-- 
cgit v1.2.3-71-gd317


From a89778d8baf19cd7e728d81121a294a06cedaad1 Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@ericsson.com>
Date: Thu, 24 Apr 2014 16:26:46 +0200
Subject: tipc: add support for link state subscriptions

When links are established over a bearer plane, we create a node
local publication containing information about the peer node and
bearer plane. This allows TIPC applications to use the standard
TIPC topology server subscription mechanism to get notifications
when a link goes up or down.

Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc.h | 1 +
 net/tipc/node.c           | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 852373d27dbb..53cd7902d34e 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -87,6 +87,7 @@ static inline unsigned int tipc_node(__u32 addr)
 
 #define TIPC_CFG_SRV		0	/* configuration service name type */
 #define TIPC_TOP_SRV		1	/* topology service name type */
+#define TIPC_LINK_STATE		2	/* link state name type */
 #define TIPC_RESERVED_TYPES	64	/* lowest user-publishable name type */
 
 /*
diff --git a/net/tipc/node.c b/net/tipc/node.c
index be90115cda1a..3b86a74cb31f 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -144,9 +144,11 @@ void tipc_node_stop(void)
 void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 {
 	struct tipc_link **active = &n_ptr->active_links[0];
+	u32 addr = n_ptr->addr;
 
 	n_ptr->working_links++;
-
+	tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE,
+			     l_ptr->bearer_id, addr);
 	pr_info("Established link <%s> on network plane %c\n",
 		l_ptr->name, l_ptr->net_plane);
 
@@ -203,8 +205,10 @@ static void node_select_active_links(struct tipc_node *n_ptr)
 void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
 {
 	struct tipc_link **active;
+	u32 addr = n_ptr->addr;
 
 	n_ptr->working_links--;
+	tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, l_ptr->bearer_id, addr);
 
 	if (!tipc_link_is_active(l_ptr)) {
 		pr_info("Lost standby link <%s> on network plane %c\n",
-- 
cgit v1.2.3-71-gd317


From 78acb1f9b898e85fa2c1e28e700b54b66b288e8d Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@ericsson.com>
Date: Thu, 24 Apr 2014 16:26:47 +0200
Subject: tipc: add ioctl to fetch link names

We add a new ioctl for AF_TIPC that can be used to fetch the
logical name for a link to a remote node on a given bearer. This
should be used in combination with link state subscriptions.
The logical name size limit definitions are moved to tipc.h, as
they are now also needed by the new ioctl.

Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc.h        | 22 ++++++++++++++++++++++
 include/uapi/linux/tipc_config.h | 10 +---------
 net/tipc/node.c                  | 27 +++++++++++++++++++++++++++
 net/tipc/node.h                  |  1 +
 net/tipc/socket.c                | 29 ++++++++++++++++++++++++++---
 5 files changed, 77 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 53cd7902d34e..6f71b9b41595 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -38,6 +38,7 @@
 #define _LINUX_TIPC_H_
 
 #include <linux/types.h>
+#include <linux/sockios.h>
 
 /*
  * TIPC addressing primitives
@@ -207,4 +208,25 @@ struct sockaddr_tipc {
 #define TIPC_NODE_RECVQ_DEPTH	131	/* Default: none (read only) */
 #define TIPC_SOCK_RECVQ_DEPTH	132	/* Default: none (read only) */
 
+/*
+ * Maximum sizes of TIPC bearer-related names (including terminating NULL)
+ * The string formatting for each name element is:
+ * media: media
+ * interface: media:interface name
+ * link: Z.C.N:interface-Z.C.N:interface
+ *
+ */
+
+#define TIPC_MAX_MEDIA_NAME	16
+#define TIPC_MAX_IF_NAME	16
+#define TIPC_MAX_BEARER_NAME	32
+#define TIPC_MAX_LINK_NAME	60
+
+#define SIOCGETLINKNAME		SIOCPROTOPRIVATE
+
+struct tipc_sioc_ln_req {
+	__u32 peer;
+	__u32 bearer_id;
+	char linkname[TIPC_MAX_LINK_NAME];
+};
 #endif
diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index 6b0bff09b3a7..41a76acbb305 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -39,6 +39,7 @@
 
 #include <linux/types.h>
 #include <linux/string.h>
+#include <linux/tipc.h>
 #include <asm/byteorder.h>
 
 #ifndef __KERNEL__
@@ -154,15 +155,6 @@
 #define TIPC_TLV_NAME_TBL_QUERY	25	/* struct tipc_name_table_query */
 #define TIPC_TLV_PORT_REF	26	/* 32-bit port reference */
 
-/*
- * Maximum sizes of TIPC bearer-related names (including terminating NUL)
- */
-
-#define TIPC_MAX_MEDIA_NAME	16	/* format = media */
-#define TIPC_MAX_IF_NAME	16	/* format = interface */
-#define TIPC_MAX_BEARER_NAME	32	/* format = media:interface */
-#define TIPC_MAX_LINK_NAME	60	/* format = Z.C.N:interface-Z.C.N:interface */
-
 /*
  * Link priority limits (min, default, max, media default)
  */
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 3b86a74cb31f..1f938f3dba4b 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -438,3 +438,30 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
 	rcu_read_unlock();
 	return buf;
 }
+
+/**
+ * tipc_node_get_linkname - get the name of a link
+ *
+ * @bearer_id: id of the bearer
+ * @node: peer node address
+ * @linkname: link name output buffer
+ *
+ * Returns 0 on success
+ */
+int tipc_node_get_linkname(u32 bearer_id, u32 addr, char *linkname, size_t len)
+{
+	struct tipc_link *link;
+	struct tipc_node *node = tipc_node_find(addr);
+
+	if ((bearer_id > MAX_BEARERS) || !node)
+		return -EINVAL;
+	tipc_node_lock(node);
+	link = node->links[bearer_id];
+	if (link) {
+		strncpy(linkname, link->name, len);
+		tipc_node_unlock(node);
+		return 0;
+	}
+	tipc_node_unlock(node);
+	return -EINVAL;
+}
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 7cbb8cec1a93..411b19114064 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -118,6 +118,7 @@ int tipc_node_active_links(struct tipc_node *n_ptr);
 int tipc_node_is_up(struct tipc_node *n_ptr);
 struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space);
 struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space);
+int tipc_node_get_linkname(u32 bearer_id, u32 node, char *linkname, size_t len);
 
 static inline void tipc_node_lock(struct tipc_node *n_ptr)
 {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3c0256962f7d..3f9912f87d0d 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -36,6 +36,7 @@
 
 #include "core.h"
 #include "port.h"
+#include "node.h"
 
 #include <linux/export.h>
 
@@ -1905,6 +1906,28 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
 	return put_user(sizeof(value), ol);
 }
 
+int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg)
+{
+	struct tipc_sioc_ln_req lnr;
+	void __user *argp = (void __user *)arg;
+
+	switch (cmd) {
+	case SIOCGETLINKNAME:
+		if (copy_from_user(&lnr, argp, sizeof(lnr)))
+			return -EFAULT;
+		if (!tipc_node_get_linkname(lnr.bearer_id, lnr.peer,
+					    lnr.linkname, TIPC_MAX_LINK_NAME)) {
+			if (copy_to_user(argp, &lnr, sizeof(lnr)))
+				return -EFAULT;
+			return 0;
+		}
+		return -EADDRNOTAVAIL;
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+
 /* Protocol switches for the various types of TIPC sockets */
 
 static const struct proto_ops msg_ops = {
@@ -1917,7 +1940,7 @@ static const struct proto_ops msg_ops = {
 	.accept		= sock_no_accept,
 	.getname	= tipc_getname,
 	.poll		= tipc_poll,
-	.ioctl		= sock_no_ioctl,
+	.ioctl		= tipc_ioctl,
 	.listen		= sock_no_listen,
 	.shutdown	= tipc_shutdown,
 	.setsockopt	= tipc_setsockopt,
@@ -1938,7 +1961,7 @@ static const struct proto_ops packet_ops = {
 	.accept		= tipc_accept,
 	.getname	= tipc_getname,
 	.poll		= tipc_poll,
-	.ioctl		= sock_no_ioctl,
+	.ioctl		= tipc_ioctl,
 	.listen		= tipc_listen,
 	.shutdown	= tipc_shutdown,
 	.setsockopt	= tipc_setsockopt,
@@ -1959,7 +1982,7 @@ static const struct proto_ops stream_ops = {
 	.accept		= tipc_accept,
 	.getname	= tipc_getname,
 	.poll		= tipc_poll,
-	.ioctl		= sock_no_ioctl,
+	.ioctl		= tipc_ioctl,
 	.listen		= tipc_listen,
 	.shutdown	= tipc_shutdown,
 	.setsockopt	= tipc_setsockopt,
-- 
cgit v1.2.3-71-gd317


From 4af619ae2ccf47a2b3a108e1926736484721370f Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Thu, 24 Apr 2014 19:09:05 +0200
Subject: at86rf230: use irq_get_trigger_type

This patch removes the platform data for the irq_type. We use instead
the irq_get_trigger_type function to get these flags which should
already configured by the interrupt controller.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ieee802154/at86rf230.c | 28 ++++++++--------------------
 include/linux/spi/at86rf230.h      | 14 --------------
 2 files changed, 8 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index e36f194673a4..17b9c9aea9be 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/gpio.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
@@ -970,8 +971,7 @@ static int at86rf230_irq_polarity(struct at86rf230_local *lp, int pol)
 
 static int at86rf230_hw_init(struct at86rf230_local *lp)
 {
-	struct at86rf230_platform_data *pdata = lp->spi->dev.platform_data;
-	int rc, irq_pol;
+	int rc, irq_pol, irq_type;
 	u8 status;
 	u8 csma_seed[2];
 
@@ -983,8 +983,9 @@ static int at86rf230_hw_init(struct at86rf230_local *lp)
 	if (rc)
 		return rc;
 
+	irq_type = irq_get_trigger_type(lp->spi->irq);
 	/* configure irq polarity, defaults to high active */
-	if (pdata->irq_type & (IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW))
+	if (irq_type & (IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW))
 		irq_pol = IRQ_ACTIVE_LOW;
 	else
 		irq_pol = IRQ_ACTIVE_HIGH;
@@ -1032,7 +1033,6 @@ static struct at86rf230_platform_data *
 at86rf230_get_pdata(struct spi_device *spi)
 {
 	struct at86rf230_platform_data *pdata;
-	const char *irq_type;
 
 	if (!IS_ENABLED(CONFIG_OF) || !spi->dev.of_node)
 		return spi->dev.platform_data;
@@ -1044,19 +1044,6 @@ at86rf230_get_pdata(struct spi_device *spi)
 	pdata->rstn = of_get_named_gpio(spi->dev.of_node, "reset-gpio", 0);
 	pdata->slp_tr = of_get_named_gpio(spi->dev.of_node, "sleep-gpio", 0);
 
-	pdata->irq_type = IRQF_TRIGGER_RISING;
-	of_property_read_string(spi->dev.of_node, "irq-type", &irq_type);
-	if (!strcmp(irq_type, "level-high"))
-		pdata->irq_type = IRQF_TRIGGER_HIGH;
-	else if (!strcmp(irq_type, "level-low"))
-		pdata->irq_type = IRQF_TRIGGER_LOW;
-	else if (!strcmp(irq_type, "edge-rising"))
-		pdata->irq_type = IRQF_TRIGGER_RISING;
-	else if (!strcmp(irq_type, "edge-falling"))
-		pdata->irq_type = IRQF_TRIGGER_FALLING;
-	else
-		dev_warn(&spi->dev, "wrong irq-type specified using edge-rising\n");
-
 	spi->dev.platform_data = pdata;
 done:
 	return pdata;
@@ -1071,7 +1058,7 @@ static int at86rf230_probe(struct spi_device *spi)
 	u8 part = 0, version = 0, status;
 	irq_handler_t irq_handler;
 	work_func_t irq_worker;
-	int rc;
+	int rc, irq_type;
 	const char *chip;
 	struct ieee802154_ops *ops = NULL;
 
@@ -1176,7 +1163,8 @@ static int at86rf230_probe(struct spi_device *spi)
 	dev->extra_tx_headroom = 0;
 	dev->flags = IEEE802154_HW_OMIT_CKSUM | IEEE802154_HW_AACK;
 
-	if (pdata->irq_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) {
+	irq_type = irq_get_trigger_type(spi->irq);
+	if (irq_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) {
 		irq_worker = at86rf230_irqwork;
 		irq_handler = at86rf230_isr;
 	} else {
@@ -1203,7 +1191,7 @@ static int at86rf230_probe(struct spi_device *spi)
 		goto err_hw_init;
 
 	rc = request_irq(spi->irq, irq_handler,
-			 IRQF_SHARED | pdata->irq_type,
+			 IRQF_SHARED | irq_type,
 			 dev_name(&spi->dev), lp);
 	if (rc)
 		goto err_hw_init;
diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h
index aa327a8105ad..b2b1afbb3202 100644
--- a/include/linux/spi/at86rf230.h
+++ b/include/linux/spi/at86rf230.h
@@ -26,20 +26,6 @@ struct at86rf230_platform_data {
 	int rstn;
 	int slp_tr;
 	int dig2;
-
-	/* Setting the irq_type will configure the driver to request
-	 * the platform irq trigger type according to the given value
-	 * and configure the interrupt polarity of the device to the
-	 * corresponding polarity.
-	 *
-	 * Allowed values are: IRQF_TRIGGER_RISING, IRQF_TRIGGER_FALLING,
-	 *                     IRQF_TRIGGER_HIGH and IRQF_TRIGGER_LOW
-	 *
-	 * Setting it to 0, the driver does not touch the trigger type
-	 * configuration of the interrupt and sets the interrupt polarity
-	 * of the device to high active (the default value).
-	 */
-	int irq_type;
 };
 
 #endif
-- 
cgit v1.2.3-71-gd317


From eb11022dca4eb22def72d5d4e3140caa357b34e1 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Fri, 25 Apr 2014 10:35:07 +0100
Subject: FDDI: Reformat <linux/if_fddi.h> for 8-character tabs

Some of our FDDI support code has been apparently written with an
assumption that tabs are 4-character wide.  In preparation to the next
change this update reformats <linux/if_fddi.h> so that it stays within 79
columns and otherwise renders correctly with 8-character tabs.  No
functional change.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_fddi.h | 90 ++++++++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_fddi.h b/include/uapi/linux/if_fddi.h
index 0d36909c3aef..1086cd9f6754 100644
--- a/include/uapi/linux/if_fddi.h
+++ b/include/uapi/linux/if_fddi.h
@@ -30,74 +30,76 @@
  *  Define max and min legal sizes.  The frame sizes do not include
  *  4 byte FCS/CRC (frame check sequence).
  */
-#define FDDI_K_ALEN			6		/* Octets in one FDDI address */
-#define FDDI_K_8022_HLEN	16		/* Total octets in 802.2 header */
-#define FDDI_K_SNAP_HLEN	21		/* Total octets in 802.2 SNAP header */
-#define FDDI_K_8022_ZLEN	16		/* Min octets in 802.2 frame sans FCS */
-#define FDDI_K_SNAP_ZLEN	21		/* Min octets in 802.2 SNAP frame sans FCS */
+#define FDDI_K_ALEN		6	/* Octets in one FDDI address */
+#define FDDI_K_8022_HLEN	16	/* Total octets in 802.2 header */
+#define FDDI_K_SNAP_HLEN	21	/* Total octets in 802.2 SNAP header */
+#define FDDI_K_8022_ZLEN	16	/* Min octets in 802.2 frame sans
+					   FCS */
+#define FDDI_K_SNAP_ZLEN	21	/* Min octets in 802.2 SNAP frame sans
+					   FCS */
 #define FDDI_K_8022_DLEN	4475	/* Max octets in 802.2 payload */
 #define FDDI_K_SNAP_DLEN	4470	/* Max octets in 802.2 SNAP payload */
-#define FDDI_K_LLC_ZLEN		13		/* Min octets in LLC frame sans FCS */
+#define FDDI_K_LLC_ZLEN		13	/* Min octets in LLC frame sans FCS */
 #define FDDI_K_LLC_LEN		4491	/* Max octets in LLC frame sans FCS */
+#define FDDI_K_OUI_LEN		3	/* Octets in OUI in 802.2 SNAP
+					   header */
 
 /* Define FDDI Frame Control (FC) Byte values */
-#define FDDI_FC_K_VOID					0x00	
-#define FDDI_FC_K_NON_RESTRICTED_TOKEN	0x80	
-#define FDDI_FC_K_RESTRICTED_TOKEN		0xC0	
-#define FDDI_FC_K_SMT_MIN				0x41
-#define FDDI_FC_K_SMT_MAX		   		0x4F
-#define FDDI_FC_K_MAC_MIN				0xC1
-#define FDDI_FC_K_MAC_MAX		  		0xCF	
-#define FDDI_FC_K_ASYNC_LLC_MIN			0x50
-#define FDDI_FC_K_ASYNC_LLC_DEF			0x54
-#define FDDI_FC_K_ASYNC_LLC_MAX			0x5F
-#define FDDI_FC_K_SYNC_LLC_MIN			0xD0
-#define FDDI_FC_K_SYNC_LLC_MAX			0xD7
-#define FDDI_FC_K_IMPLEMENTOR_MIN		0x60
-#define FDDI_FC_K_IMPLEMENTOR_MAX  		0x6F
-#define FDDI_FC_K_RESERVED_MIN			0x70
-#define FDDI_FC_K_RESERVED_MAX			0x7F
+#define FDDI_FC_K_VOID			0x00
+#define FDDI_FC_K_NON_RESTRICTED_TOKEN	0x80
+#define FDDI_FC_K_RESTRICTED_TOKEN	0xC0
+#define FDDI_FC_K_SMT_MIN		0x41
+#define FDDI_FC_K_SMT_MAX		0x4F
+#define FDDI_FC_K_MAC_MIN		0xC1
+#define FDDI_FC_K_MAC_MAX		0xCF
+#define FDDI_FC_K_ASYNC_LLC_MIN		0x50
+#define FDDI_FC_K_ASYNC_LLC_DEF		0x54
+#define FDDI_FC_K_ASYNC_LLC_MAX		0x5F
+#define FDDI_FC_K_SYNC_LLC_MIN		0xD0
+#define FDDI_FC_K_SYNC_LLC_MAX		0xD7
+#define FDDI_FC_K_IMPLEMENTOR_MIN	0x60
+#define FDDI_FC_K_IMPLEMENTOR_MAX	0x6F
+#define FDDI_FC_K_RESERVED_MIN		0x70
+#define FDDI_FC_K_RESERVED_MAX		0x7F
 
 /* Define LLC and SNAP constants */
-#define FDDI_EXTENDED_SAP	0xAA
+#define FDDI_EXTENDED_SAP		0xAA
 #define FDDI_UI_CMD			0x03
 
 /* Define 802.2 Type 1 header */
 struct fddi_8022_1_hdr {
-	__u8	dsap;					/* destination service access point */
-	__u8	ssap;					/* source service access point */
-	__u8	ctrl;					/* control byte #1 */
+	__u8	dsap;			/* destination service access point */
+	__u8	ssap;			/* source service access point */
+	__u8	ctrl;			/* control byte #1 */
 } __attribute__((packed));
 
 /* Define 802.2 Type 2 header */
 struct fddi_8022_2_hdr {
-	__u8	dsap;					/* destination service access point */
-	__u8	ssap;					/* source service access point */
-	__u8	ctrl_1;					/* control byte #1 */
-	__u8	ctrl_2;					/* control byte #2 */
+	__u8	dsap;			/* destination service access point */
+	__u8	ssap;			/* source service access point */
+	__u8	ctrl_1;			/* control byte #1 */
+	__u8	ctrl_2;			/* control byte #2 */
 } __attribute__((packed));
 
 /* Define 802.2 SNAP header */
-#define FDDI_K_OUI_LEN	3
 struct fddi_snap_hdr {
-	__u8	dsap;					/* always 0xAA */
-	__u8	ssap;					/* always 0xAA */
-	__u8	ctrl;					/* always 0x03 */
+	__u8	dsap;			/* always 0xAA */
+	__u8	ssap;			/* always 0xAA */
+	__u8	ctrl;			/* always 0x03 */
 	__u8	oui[FDDI_K_OUI_LEN];	/* organizational universal id */
-	__be16	ethertype;				/* packet type ID field */
+	__be16	ethertype;		/* packet type ID field */
 } __attribute__((packed));
 
 /* Define FDDI LLC frame header */
 struct fddihdr {
-	__u8	fc;						/* frame control */
-	__u8	daddr[FDDI_K_ALEN];		/* destination address */
-	__u8	saddr[FDDI_K_ALEN];		/* source address */
-	union
-		{
-		struct fddi_8022_1_hdr		llc_8022_1;
-		struct fddi_8022_2_hdr		llc_8022_2;
-		struct fddi_snap_hdr		llc_snap;
-		} hdr;
+	__u8	fc;			/* frame control */
+	__u8	daddr[FDDI_K_ALEN];	/* destination address */
+	__u8	saddr[FDDI_K_ALEN];	/* source address */
+	union {
+		struct fddi_8022_1_hdr	llc_8022_1;
+		struct fddi_8022_2_hdr	llc_8022_2;
+		struct fddi_snap_hdr	llc_snap;
+	} hdr;
 } __attribute__((packed));
 
 
-- 
cgit v1.2.3-71-gd317


From e2dcdfe95c0bd67e37db6057edd9c4ee1f1c7b17 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 28 Apr 2014 11:15:08 +0930
Subject: virtio: virtio_break_device() to mark all virtqueues broken.

Good for post-apocalyptic scenarios, like S/390 hotplug.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 15 +++++++++++++++
 include/linux/virtio.h       |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 1e443629f76d..4d08f45a9c29 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -865,4 +865,19 @@ bool virtqueue_is_broken(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
 
+/*
+ * This should prevent the device from being used, allowing drivers to
+ * recover.  You may need to grab appropriate locks to flush.
+ */
+void virtio_break_device(struct virtio_device *dev)
+{
+	struct virtqueue *_vq;
+
+	list_for_each_entry(_vq, &dev->vqs, list) {
+		struct vring_virtqueue *vq = to_vvq(_vq);
+		vq->broken = true;
+	}
+}
+EXPORT_SYMBOL_GPL(virtio_break_device);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index e4abb84199be..b46671e28de2 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -106,6 +106,8 @@ static inline struct virtio_device *dev_to_virtio(struct device *_dev)
 int register_virtio_device(struct virtio_device *dev);
 void unregister_virtio_device(struct virtio_device *dev);
 
+void virtio_break_device(struct virtio_device *dev);
+
 /**
  * virtio_driver - operations for a virtio I/O driver
  * @driver: underlying device driver (populate name and owner).
-- 
cgit v1.2.3-71-gd317


From 51e158c12aca3c9ac63988611a97c05109b14dc9 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 28 Apr 2014 11:34:33 +0930
Subject: param: hand arguments after -- straight to init

The kernel passes any args it doesn't need through to init, except it
assumes anything containing '.' belongs to the kernel (for a module).
This change means all users can clearly distinguish which arguments
are for init.

For example, the kernel uses debug ("dee-bug") to mean log everything to
the console, where systemd uses the debug from the Scandinavian "day-boog"
meaning "fail to boot".  If a future versions uses argv[] instead of
reading /proc/cmdline, this confusion will be avoided.

eg: test 'FOO="this is --foo"' -- 'systemd.debug="true true true"'

Gives:
argv[0] = '/debug-init'
argv[1] = 'test'
argv[2] = 'systemd.debug=true true true'
envp[0] = 'HOME=/'
envp[1] = 'TERM=linux'
envp[2] = 'FOO=this is --foo'

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h |  2 +-
 init/main.c                 | 33 +++++++++++++++++++++++++++++----
 kernel/module.c             | 12 +++++++++---
 kernel/params.c             | 25 ++++++++++++++-----------
 4 files changed, 53 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 204a67743804..b1990c5524e1 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -321,7 +321,7 @@ extern bool parameq(const char *name1, const char *name2);
 extern bool parameqn(const char *name1, const char *name2, size_t n);
 
 /* Called on module insert or kernel boot */
-extern int parse_args(const char *name,
+extern char *parse_args(const char *name,
 		      char *args,
 		      const struct kernel_param *params,
 		      unsigned num,
diff --git a/init/main.c b/init/main.c
index 9c7fd4c9249f..e9d458b5d77b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -252,6 +252,27 @@ static int __init repair_env_string(char *param, char *val, const char *unused)
 	return 0;
 }
 
+/* Anything after -- gets handed straight to init. */
+static int __init set_init_arg(char *param, char *val, const char *unused)
+{
+	unsigned int i;
+
+	if (panic_later)
+		return 0;
+
+	repair_env_string(param, val, unused);
+
+	for (i = 0; argv_init[i]; i++) {
+		if (i == MAX_INIT_ARGS) {
+			panic_later = "init";
+			panic_param = param;
+			return 0;
+		}
+	}
+	argv_init[i] = param;
+	return 0;
+}
+
 /*
  * Unknown boot options get handed to init, unless they look like
  * unused parameters (modprobe will find them in /proc/cmdline).
@@ -478,7 +499,7 @@ static void __init mm_init(void)
 
 asmlinkage void __init start_kernel(void)
 {
-	char * command_line;
+	char * command_line, *after_dashes;
 	extern const struct kernel_param __start___param[], __stop___param[];
 
 	/*
@@ -519,9 +540,13 @@ asmlinkage void __init start_kernel(void)
 
 	pr_notice("Kernel command line: %s\n", boot_command_line);
 	parse_early_param();
-	parse_args("Booting kernel", static_command_line, __start___param,
-		   __stop___param - __start___param,
-		   -1, -1, &unknown_bootoption);
+	after_dashes = parse_args("Booting kernel",
+				  static_command_line, __start___param,
+				  __stop___param - __start___param,
+				  -1, -1, &unknown_bootoption);
+	if (after_dashes)
+		parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
+			   set_init_arg);
 
 	jump_label_init();
 
diff --git a/kernel/module.c b/kernel/module.c
index 11869408f79b..66e4e0d260a9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3193,6 +3193,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
 {
 	struct module *mod;
 	long err;
+	char *after_dashes;
 
 	err = module_sig_check(info);
 	if (err)
@@ -3277,10 +3278,15 @@ static int load_module(struct load_info *info, const char __user *uargs,
 		goto ddebug_cleanup;
 
 	/* Module is ready to execute: parsing args may do that. */
-	err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-			 -32768, 32767, unknown_module_param_cb);
-	if (err < 0)
+	after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
+				  -32768, 32767, unknown_module_param_cb);
+	if (IS_ERR(after_dashes)) {
+		err = PTR_ERR(after_dashes);
 		goto bug_cleanup;
+	} else if (after_dashes) {
+		pr_warn("%s: parameters '%s' after `--' ignored\n",
+		       mod->name, after_dashes);
+	}
 
 	/* Link in to syfs. */
 	err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);
diff --git a/kernel/params.c b/kernel/params.c
index b00142e7f3ba..1e52ca233fd9 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -177,13 +177,13 @@ static char *next_arg(char *args, char **param, char **val)
 }
 
 /* Args looks like "foo=bar,bar2 baz=fuz wiz". */
-int parse_args(const char *doing,
-	       char *args,
-	       const struct kernel_param *params,
-	       unsigned num,
-	       s16 min_level,
-	       s16 max_level,
-	       int (*unknown)(char *param, char *val, const char *doing))
+char *parse_args(const char *doing,
+		 char *args,
+		 const struct kernel_param *params,
+		 unsigned num,
+		 s16 min_level,
+		 s16 max_level,
+		 int (*unknown)(char *param, char *val, const char *doing))
 {
 	char *param, *val;
 
@@ -198,6 +198,9 @@ int parse_args(const char *doing,
 		int irq_was_disabled;
 
 		args = next_arg(args, &param, &val);
+		/* Stop at -- */
+		if (!val && strcmp(param, "--") == 0)
+			return args;
 		irq_was_disabled = irqs_disabled();
 		ret = parse_one(param, val, doing, params, num,
 				min_level, max_level, unknown);
@@ -208,22 +211,22 @@ int parse_args(const char *doing,
 		switch (ret) {
 		case -ENOENT:
 			pr_err("%s: Unknown parameter `%s'\n", doing, param);
-			return ret;
+			return ERR_PTR(ret);
 		case -ENOSPC:
 			pr_err("%s: `%s' too large for parameter `%s'\n",
 			       doing, val ?: "", param);
-			return ret;
+			return ERR_PTR(ret);
 		case 0:
 			break;
 		default:
 			pr_err("%s: `%s' invalid for parameter `%s'\n",
 			       doing, val ?: "", param);
-			return ret;
+			return ERR_PTR(ret);
 		}
 	}
 
 	/* All parsed OK. */
-	return 0;
+	return NULL;
 }
 
 /* Lazy bastard, eh? */
-- 
cgit v1.2.3-71-gd317


From 2f7ef2f8790f5bf53db4fc6b2310943139285827 Mon Sep 17 00:00:00 2001
From: Cong Wang <cwang@twopensource.com>
Date: Fri, 25 Apr 2014 13:54:06 -0700
Subject: sched, cls: check if we could overwrite actions when changing a
 filter

When actions are attached to a filter, they are a part of the filter
itself, so when changing a filter we should allow to overwrite the actions
inside as well.

In my specific case, when I tried to _append_ a new action to an existing
filter which already has an action, I got EEXIST since kernel refused
to overwrite the existing one in kernel.

This patch checks if we are changing the filter checking NLM_F_CREATE flag
(Sigh, filters don't use NLM_F_REPLACE...) and then passes the boolean down
to actions. This fixes the problem above.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h     |  2 +-
 include/net/sch_generic.h |  2 +-
 net/sched/cls_api.c       |  9 +++++----
 net/sched/cls_basic.c     | 10 +++++-----
 net/sched/cls_bpf.c       | 10 +++++-----
 net/sched/cls_cgroup.c    |  4 ++--
 net/sched/cls_flow.c      |  4 ++--
 net/sched/cls_fw.c        | 10 +++++-----
 net/sched/cls_route.c     | 11 ++++++-----
 net/sched/cls_rsvp.h      |  4 ++--
 net/sched/cls_tcindex.c   |  8 ++++----
 net/sched/cls_u32.c       | 10 +++++-----
 12 files changed, 43 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a2441fb1428f..6da46dcf1049 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -136,7 +136,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 		      struct nlattr **tb, struct nlattr *rate_tlv,
-		      struct tcf_exts *exts);
+		      struct tcf_exts *exts, bool ovr);
 void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts);
 void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
 		     struct tcf_exts *src);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index d062f81c692f..624f9857c83e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -199,7 +199,7 @@ struct tcf_proto_ops {
 	int			(*change)(struct net *net, struct sk_buff *,
 					struct tcf_proto*, unsigned long,
 					u32 handle, struct nlattr **,
-					unsigned long *);
+					unsigned long *, bool);
 	int			(*delete)(struct tcf_proto*, unsigned long);
 	void			(*walk)(struct tcf_proto*, struct tcf_walker *arg);
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 29a30a14c315..6786130a4f59 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -317,7 +317,8 @@ replay:
 		}
 	}
 
-	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh);
+	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
+			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
 	if (err == 0) {
 		if (tp_created) {
 			spin_lock_bh(root_lock);
@@ -504,7 +505,7 @@ void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
 EXPORT_SYMBOL(tcf_exts_destroy);
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
-		  struct nlattr *rate_tlv, struct tcf_exts *exts)
+		  struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	{
@@ -513,7 +514,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		INIT_LIST_HEAD(&exts->actions);
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
-						"police", TCA_ACT_NOREPLACE,
+						"police", ovr,
 						TCA_ACT_BIND);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
@@ -523,7 +524,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		} else if (exts->action && tb[exts->action]) {
 			int err;
 			err = tcf_action_init(net, tb[exts->action], rate_tlv,
-					      NULL, TCA_ACT_NOREPLACE,
+					      NULL, ovr,
 					      TCA_ACT_BIND, &exts->actions);
 			if (err)
 				return err;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index e98ca99c202b..0ae1813e3e90 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -130,14 +130,14 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
 static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 			   struct basic_filter *f, unsigned long base,
 			   struct nlattr **tb,
-			   struct nlattr *est)
+			   struct nlattr *est, bool ovr)
 {
 	int err;
 	struct tcf_exts e;
 	struct tcf_ematch_tree t;
 
 	tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
 	if (err < 0)
 		return err;
 
@@ -161,7 +161,7 @@ errout:
 
 static int basic_change(struct net *net, struct sk_buff *in_skb,
 			struct tcf_proto *tp, unsigned long base, u32 handle,
-			struct nlattr **tca, unsigned long *arg)
+			struct nlattr **tca, unsigned long *arg, bool ovr)
 {
 	int err;
 	struct basic_head *head = tp->root;
@@ -179,7 +179,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 	if (f != NULL) {
 		if (handle && f->handle != handle)
 			return -EINVAL;
-		return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]);
+		return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
 	}
 
 	err = -ENOBUFS;
@@ -206,7 +206,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 		f->handle = head->hgenerator;
 	}
 
-	err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE]);
+	err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr);
 	if (err < 0)
 		goto errout;
 
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 8e3cf49118e3..16186965af97 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -156,7 +156,7 @@ static void cls_bpf_put(struct tcf_proto *tp, unsigned long f)
 static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 				   struct cls_bpf_prog *prog,
 				   unsigned long base, struct nlattr **tb,
-				   struct nlattr *est)
+				   struct nlattr *est, bool ovr)
 {
 	struct sock_filter *bpf_ops, *bpf_old;
 	struct tcf_exts exts;
@@ -170,7 +170,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 		return -EINVAL;
 
 	tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
-	ret = tcf_exts_validate(net, tp, tb, est, &exts);
+	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
 	if (ret < 0)
 		return ret;
 
@@ -242,7 +242,7 @@ static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 			  struct tcf_proto *tp, unsigned long base,
 			  u32 handle, struct nlattr **tca,
-			  unsigned long *arg)
+			  unsigned long *arg, bool ovr)
 {
 	struct cls_bpf_head *head = tp->root;
 	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg;
@@ -260,7 +260,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 		if (handle && prog->handle != handle)
 			return -EINVAL;
 		return cls_bpf_modify_existing(net, tp, prog, base, tb,
-					       tca[TCA_RATE]);
+					       tca[TCA_RATE], ovr);
 	}
 
 	prog = kzalloc(sizeof(*prog), GFP_KERNEL);
@@ -277,7 +277,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 		goto errout;
 	}
 
-	ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE]);
+	ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
 	if (ret < 0)
 		goto errout;
 
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 8e2158ab551c..cacf01bd04f0 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -83,7 +83,7 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
 static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 			     struct tcf_proto *tp, unsigned long base,
 			     u32 handle, struct nlattr **tca,
-			     unsigned long *arg)
+			     unsigned long *arg, bool ovr)
 {
 	struct nlattr *tb[TCA_CGROUP_MAX + 1];
 	struct cls_cgroup_head *head = tp->root;
@@ -119,7 +119,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 		return err;
 
 	tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 257029c54332..35be16f7c192 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -349,7 +349,7 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
 static int flow_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       unsigned long *arg)
+		       unsigned long *arg, bool ovr)
 {
 	struct flow_head *head = tp->root;
 	struct flow_filter *f;
@@ -393,7 +393,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	}
 
 	tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 63a3ce75c02e..861b03ccfed0 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -169,7 +169,7 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
 
 static int
 fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
-	struct nlattr **tb, struct nlattr **tca, unsigned long base)
+	struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr)
 {
 	struct fw_head *head = tp->root;
 	struct tcf_exts e;
@@ -177,7 +177,7 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
 	int err;
 
 	tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
 	if (err < 0)
 		return err;
 
@@ -218,7 +218,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle,
 		     struct nlattr **tca,
-		     unsigned long *arg)
+		     unsigned long *arg, bool ovr)
 {
 	struct fw_head *head = tp->root;
 	struct fw_filter *f = (struct fw_filter *) *arg;
@@ -236,7 +236,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 	if (f != NULL) {
 		if (f->id != handle && handle)
 			return -EINVAL;
-		return fw_change_attrs(net, tp, f, tb, tca, base);
+		return fw_change_attrs(net, tp, f, tb, tca, base, ovr);
 	}
 
 	if (!handle)
@@ -264,7 +264,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 	tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
 	f->id = handle;
 
-	err = fw_change_attrs(net, tp, f, tb, tca, base);
+	err = fw_change_attrs(net, tp, f, tb, tca, base, ovr);
 	if (err < 0)
 		goto errout;
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 1ad3068f2ce1..dd9fc2523c76 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -333,7 +333,8 @@ static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = {
 static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 			    unsigned long base, struct route4_filter *f,
 			    u32 handle, struct route4_head *head,
-			    struct nlattr **tb, struct nlattr *est, int new)
+			    struct nlattr **tb, struct nlattr *est, int new,
+			    bool ovr)
 {
 	int err;
 	u32 id = 0, to = 0, nhandle = 0x8000;
@@ -343,7 +344,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 	struct tcf_exts e;
 
 	tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
 	if (err < 0)
 		return err;
 
@@ -428,7 +429,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
-		       unsigned long *arg)
+		       unsigned long *arg, bool ovr)
 {
 	struct route4_head *head = tp->root;
 	struct route4_filter *f, *f1, **fp;
@@ -455,7 +456,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 			old_handle = f->handle;
 
 		err = route4_set_parms(net, tp, base, f, handle, head, tb,
-			tca[TCA_RATE], 0);
+			tca[TCA_RATE], 0, ovr);
 		if (err < 0)
 			return err;
 
@@ -479,7 +480,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 
 	tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
 	err = route4_set_parms(net, tp, base, f, handle, head, tb,
-		tca[TCA_RATE], 1);
+		tca[TCA_RATE], 1, ovr);
 	if (err < 0)
 		goto errout;
 
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 19f8e5dfa8bd..1020e233a5d6 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -415,7 +415,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
-		       unsigned long *arg)
+		       unsigned long *arg, bool ovr)
 {
 	struct rsvp_head *data = tp->root;
 	struct rsvp_filter *f, **fp;
@@ -436,7 +436,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 		return err;
 
 	tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index eed8404443d8..d11d0a4fbe34 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -192,7 +192,7 @@ static int
 tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		  u32 handle, struct tcindex_data *p,
 		  struct tcindex_filter_result *r, struct nlattr **tb,
-		 struct nlattr *est)
+		  struct nlattr *est, bool ovr)
 {
 	int err, balloc = 0;
 	struct tcindex_filter_result new_filter_result, *old_r = r;
@@ -202,7 +202,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	struct tcf_exts e;
 
 	tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
 	if (err < 0)
 		return err;
 
@@ -331,7 +331,7 @@ errout:
 static int
 tcindex_change(struct net *net, struct sk_buff *in_skb,
 	       struct tcf_proto *tp, unsigned long base, u32 handle,
-	       struct nlattr **tca, unsigned long *arg)
+	       struct nlattr **tca, unsigned long *arg, bool ovr)
 {
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -351,7 +351,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
 		return err;
 
 	return tcindex_set_parms(net, tp, base, handle, p, r, tb,
-				 tca[TCA_RATE]);
+				 tca[TCA_RATE], ovr);
 }
 
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 84c28daff848..c39b583ace32 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -486,13 +486,13 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
 static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 			 unsigned long base, struct tc_u_hnode *ht,
 			 struct tc_u_knode *n, struct nlattr **tb,
-			 struct nlattr *est)
+			 struct nlattr *est, bool ovr)
 {
 	int err;
 	struct tcf_exts e;
 
 	tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
 	if (err < 0)
 		return err;
 
@@ -545,7 +545,7 @@ errout:
 static int u32_change(struct net *net, struct sk_buff *in_skb,
 		      struct tcf_proto *tp, unsigned long base, u32 handle,
 		      struct nlattr **tca,
-		      unsigned long *arg)
+		      unsigned long *arg, bool ovr)
 {
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *ht;
@@ -569,7 +569,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 			return -EINVAL;
 
 		return u32_set_parms(net, tp, base, n->ht_up, n, tb,
-				     tca[TCA_RATE]);
+				     tca[TCA_RATE], ovr);
 	}
 
 	if (tb[TCA_U32_DIVISOR]) {
@@ -656,7 +656,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 	}
 #endif
 
-	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE]);
+	err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
 	if (err == 0) {
 		struct tc_u_knode **ins;
 		for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
-- 
cgit v1.2.3-71-gd317


From e16821bcfb364b0c41142db275dc74b39fa42c30 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Mon, 28 Apr 2014 11:22:08 +0300
Subject: cfg80211: Dynamic channel bandwidth changes in AP mode

This extends NL80211_CMD_SET_CHANNEL to allow dynamic channel bandwidth
changes in AP mode (including P2P GO) during a lifetime of the BSS. This
can be used to implement, e.g., HT 20/40 MHz co-existence rules on the
2.4 GHz band.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  8 ++++++++
 include/uapi/linux/nl80211.h |  4 ++++
 net/wireless/nl80211.c       | 40 ++++++++++++++++++++++++++++------------
 net/wireless/rdev-ops.h      | 13 +++++++++++++
 net/wireless/trace.h         | 18 ++++++++++++++++++
 5 files changed, 71 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c98cf08538b9..7eae46ccec01 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2290,6 +2290,10 @@ struct cfg80211_qos_map {
  * @channel_switch: initiate channel-switch procedure (with CSA)
  *
  * @set_qos_map: Set QoS mapping information to the driver
+ *
+ * @set_ap_chanwidth: Set the AP (including P2P GO) mode channel width for the
+ *	given interface This is used e.g. for dynamic HT 20/40 MHz channel width
+ *	changes during the lifetime of the BSS.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -2533,9 +2537,13 @@ struct cfg80211_ops {
 	int	(*channel_switch)(struct wiphy *wiphy,
 				  struct net_device *dev,
 				  struct cfg80211_csa_settings *params);
+
 	int     (*set_qos_map)(struct wiphy *wiphy,
 			       struct net_device *dev,
 			       struct cfg80211_qos_map *qos_map);
+
+	int	(*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev,
+				    struct cfg80211_chan_def *chandef);
 };
 
 /*
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 0592032ff160..406010d4def0 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3929,6 +3929,9 @@ enum nl80211_ap_sme_features {
  *	interface. An active monitor interface behaves like a normal monitor
  *	interface, but gets added to the driver. It ensures that incoming
  *	unicast packets directed at the configured interface address get ACKed.
+ * @NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE: This driver supports dynamic
+ *	channel bandwidth change (e.g., HT 20 <-> 40 MHz channel) during the
+ *	lifetime of a BSS.
  */
 enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS			= 1 << 0,
@@ -3949,6 +3952,7 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_FULL_AP_CLIENT_STATE		= 1 << 15,
 	NL80211_FEATURE_USERSPACE_MPM			= 1 << 16,
 	NL80211_FEATURE_ACTIVE_MONITOR			= 1 << 17,
+	NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE	= 1 << 18,
 };
 
 /**
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ca75f60041d2..0f1b18f209d6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1928,18 +1928,20 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
 }
 
 static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
-				 struct wireless_dev *wdev,
+				 struct net_device *dev,
 				 struct genl_info *info)
 {
 	struct cfg80211_chan_def chandef;
 	int result;
 	enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR;
+	struct wireless_dev *wdev = NULL;
 
-	if (wdev)
-		iftype = wdev->iftype;
-
+	if (dev)
+		wdev = dev->ieee80211_ptr;
 	if (!nl80211_can_set_dev_channel(wdev))
 		return -EOPNOTSUPP;
+	if (wdev)
+		iftype = wdev->iftype;
 
 	result = nl80211_parse_chandef(rdev, info, &chandef);
 	if (result)
@@ -1948,14 +1950,27 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
 	switch (iftype) {
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_P2P_GO:
-		if (wdev->beacon_interval) {
-			result = -EBUSY;
-			break;
-		}
 		if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, iftype)) {
 			result = -EINVAL;
 			break;
 		}
+		if (wdev->beacon_interval) {
+			if (!dev || !rdev->ops->set_ap_chanwidth ||
+			    !(rdev->wiphy.features &
+			      NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) {
+				result = -EBUSY;
+				break;
+			}
+
+			/* Only allow dynamic channel width changes */
+			if (chandef.chan != wdev->preset_chandef.chan) {
+				result = -EBUSY;
+				break;
+			}
+			result = rdev_set_ap_chanwidth(rdev, dev, &chandef);
+			if (result)
+				break;
+		}
 		wdev->preset_chandef = chandef;
 		result = 0;
 		break;
@@ -1977,7 +1992,7 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *netdev = info->user_ptr[1];
 
-	return __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info);
+	return __nl80211_set_channel(rdev, netdev, info);
 }
 
 static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info)
@@ -2099,9 +2114,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
-		result = __nl80211_set_channel(rdev,
-				nl80211_can_set_dev_channel(wdev) ? wdev : NULL,
-				info);
+		result = __nl80211_set_channel(
+			rdev,
+			nl80211_can_set_dev_channel(wdev) ? netdev : NULL,
+			info);
 		if (result)
 			return result;
 	}
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 74d97d33c938..00cdf73ba6c4 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -950,4 +950,17 @@ static inline int rdev_set_qos_map(struct cfg80211_registered_device *rdev,
 	return ret;
 }
 
+static inline int
+rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev,
+		      struct net_device *dev, struct cfg80211_chan_def *chandef)
+{
+	int ret;
+
+	trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, chandef);
+	ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, chandef);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+
+	return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 47b499f8f54d..f3c13ff4d04c 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1919,6 +1919,24 @@ TRACE_EVENT(rdev_set_qos_map,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->num_des)
 );
 
+TRACE_EVENT(rdev_set_ap_chanwidth,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 struct cfg80211_chan_def *chandef),
+	TP_ARGS(wiphy, netdev, chandef),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		CHAN_DEF_ENTRY
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		CHAN_DEF_ASSIGN(chandef);
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT,
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG)
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/
-- 
cgit v1.2.3-71-gd317


From 73dbd77d88e6974d2e30c239cef67e5370c2b7c5 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 29 Apr 2014 11:44:36 +0200
Subject: drm: Fixup flip-work kerneldoc

Describe the fifo parameter. It seems like kerneldoc doesn't properly
handle fields defined using a macro, so it will end up complaining about
this anyway and not generate the documentation for it either. At least
the kerneldoc is now complete.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 include/drm/drm_flip_work.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_flip_work.h b/include/drm/drm_flip_work.h
index 35c776ae7d3b..9eed34dcd6af 100644
--- a/include/drm/drm_flip_work.h
+++ b/include/drm/drm_flip_work.h
@@ -57,6 +57,7 @@ typedef void (*drm_flip_func_t)(struct drm_flip_work *work, void *val);
  * @count: number of committed items
  * @func: callback fxn called for each committed item
  * @worker: worker which calls @func
+ * @fifo: queue of committed items
  */
 struct drm_flip_work {
 	const char *name;
-- 
cgit v1.2.3-71-gd317


From 683399eddb9fff742b1a14c5a5d03e12bfc0afff Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Sun, 20 Apr 2014 18:57:36 -0600
Subject: netfilter: nfnetlink_acct: Adding quota support to accounting
 framework

nfacct objects already support accounting at the byte and packet
level.  As such it is a natural extension to add the possiblity to
define a ceiling limit for both metrics.

All the support for quotas itself is added to nfnetlink acctounting
framework to stay coherent with current accounting object management.
Quota limit checks are implemented in xt_nfacct filter where
statistic collection is already done.

Pablo Neira Ayuso has also contributed to this feature.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink_acct.h      |  8 ++-
 include/uapi/linux/netfilter/nfnetlink.h      |  2 +
 include/uapi/linux/netfilter/nfnetlink_acct.h |  9 +++
 net/netfilter/nfnetlink_acct.c                | 85 +++++++++++++++++++++++++++
 net/netfilter/xt_nfacct.c                     |  5 +-
 5 files changed, 107 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
index b2e85e59f760..6ec975748742 100644
--- a/include/linux/netfilter/nfnetlink_acct.h
+++ b/include/linux/netfilter/nfnetlink_acct.h
@@ -3,11 +3,17 @@
 
 #include <uapi/linux/netfilter/nfnetlink_acct.h>
 
+enum {
+	NFACCT_NO_QUOTA		= -1,
+	NFACCT_UNDERQUOTA,
+	NFACCT_OVERQUOTA,
+};
 
 struct nf_acct;
 
 struct nf_acct *nfnl_acct_find_get(const char *filter_name);
 void nfnl_acct_put(struct nf_acct *acct);
 void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
-
+extern int nfnl_acct_overquota(const struct sk_buff *skb,
+			      struct nf_acct *nfacct);
 #endif /* _NFNL_ACCT_H */
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 596ddd45253c..354a7e5e50f2 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -20,6 +20,8 @@ enum nfnetlink_groups {
 #define NFNLGRP_CONNTRACK_EXP_DESTROY	NFNLGRP_CONNTRACK_EXP_DESTROY
 	NFNLGRP_NFTABLES,
 #define NFNLGRP_NFTABLES                NFNLGRP_NFTABLES
+	NFNLGRP_ACCT_QUOTA,
+#define NFNLGRP_ACCT_QUOTA		NFNLGRP_ACCT_QUOTA
 	__NFNLGRP_MAX,
 };
 #define NFNLGRP_MAX	(__NFNLGRP_MAX - 1)
diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
index c7b6269e760b..51404ec19022 100644
--- a/include/uapi/linux/netfilter/nfnetlink_acct.h
+++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
@@ -10,15 +10,24 @@ enum nfnl_acct_msg_types {
 	NFNL_MSG_ACCT_GET,
 	NFNL_MSG_ACCT_GET_CTRZERO,
 	NFNL_MSG_ACCT_DEL,
+	NFNL_MSG_ACCT_OVERQUOTA,
 	NFNL_MSG_ACCT_MAX
 };
 
+enum nfnl_acct_flags {
+	NFACCT_F_QUOTA_PKTS	= (1 << 0),
+	NFACCT_F_QUOTA_BYTES	= (1 << 1),
+	NFACCT_F_OVERQUOTA	= (1 << 2), /* can't be set from userspace */
+};
+
 enum nfnl_acct_type {
 	NFACCT_UNSPEC,
 	NFACCT_NAME,
 	NFACCT_PKTS,
 	NFACCT_BYTES,
 	NFACCT_USE,
+	NFACCT_FLAGS,
+	NFACCT_QUOTA,
 	__NFACCT_MAX
 };
 #define NFACCT_MAX (__NFACCT_MAX - 1)
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index c7b6d466a662..70e86bbb3637 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -32,18 +32,24 @@ static LIST_HEAD(nfnl_acct_list);
 struct nf_acct {
 	atomic64_t		pkts;
 	atomic64_t		bytes;
+	unsigned long		flags;
 	struct list_head	head;
 	atomic_t		refcnt;
 	char			name[NFACCT_NAME_MAX];
 	struct rcu_head		rcu_head;
+	char			data[0];
 };
 
+#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
+
 static int
 nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
 {
 	struct nf_acct *nfacct, *matching = NULL;
 	char *acct_name;
+	unsigned int size = 0;
+	u32 flags = 0;
 
 	if (!tb[NFACCT_NAME])
 		return -EINVAL;
@@ -68,15 +74,39 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 			/* reset counters if you request a replacement. */
 			atomic64_set(&matching->pkts, 0);
 			atomic64_set(&matching->bytes, 0);
+			smp_mb__before_clear_bit();
+			/* reset overquota flag if quota is enabled. */
+			if ((matching->flags & NFACCT_F_QUOTA))
+				clear_bit(NFACCT_F_OVERQUOTA, &matching->flags);
 			return 0;
 		}
 		return -EBUSY;
 	}
 
 	nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
+	if (tb[NFACCT_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS]));
+		if (flags & ~NFACCT_F_QUOTA)
+			return -EOPNOTSUPP;
+		if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA)
+			return -EINVAL;
+		if (flags & NFACCT_F_OVERQUOTA)
+			return -EINVAL;
+
+		size += sizeof(u64);
+	}
+
+	nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL);
 	if (nfacct == NULL)
 		return -ENOMEM;
 
+	if (flags & NFACCT_F_QUOTA) {
+		u64 *quota = (u64 *)nfacct->data;
+
+		*quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA]));
+		nfacct->flags = flags;
+	}
+
 	strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
 
 	if (tb[NFACCT_BYTES]) {
@@ -117,6 +147,9 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
 		pkts = atomic64_xchg(&acct->pkts, 0);
 		bytes = atomic64_xchg(&acct->bytes, 0);
+		smp_mb__before_clear_bit();
+		if (acct->flags & NFACCT_F_QUOTA)
+			clear_bit(NFACCT_F_OVERQUOTA, &acct->flags);
 	} else {
 		pkts = atomic64_read(&acct->pkts);
 		bytes = atomic64_read(&acct->bytes);
@@ -125,7 +158,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	    nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) ||
 	    nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
 		goto nla_put_failure;
+	if (acct->flags & NFACCT_F_QUOTA) {
+		u64 *quota = (u64 *)acct->data;
 
+		if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) ||
+		    nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota)))
+			goto nla_put_failure;
+	}
 	nlmsg_end(skb, nlh);
 	return skb->len;
 
@@ -270,6 +309,8 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
 	[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
 	[NFACCT_BYTES] = { .type = NLA_U64 },
 	[NFACCT_PKTS] = { .type = NLA_U64 },
+	[NFACCT_FLAGS] = { .type = NLA_U32 },
+	[NFACCT_QUOTA] = { .type = NLA_U64 },
 };
 
 static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
@@ -336,6 +377,50 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
 }
 EXPORT_SYMBOL_GPL(nfnl_acct_update);
 
+static void nfnl_overquota_report(struct nf_acct *nfacct)
+{
+	int ret;
+	struct sk_buff *skb;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (skb == NULL)
+		return;
+
+	ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0,
+				  nfacct);
+	if (ret <= 0) {
+		kfree_skb(skb);
+		return;
+	}
+	netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
+			  GFP_ATOMIC);
+}
+
+int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
+{
+	u64 now;
+	u64 *quota;
+	int ret = NFACCT_UNDERQUOTA;
+
+	/* no place here if we don't have a quota */
+	if (!(nfacct->flags & NFACCT_F_QUOTA))
+		return NFACCT_NO_QUOTA;
+
+	quota = (u64 *)nfacct->data;
+	now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ?
+	       atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes);
+
+	ret = now > *quota;
+
+	if (now >= *quota &&
+	    !test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) {
+		nfnl_overquota_report(nfacct);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
+
 static int __init nfnl_acct_init(void)
 {
 	int ret;
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index b3be0ef21f19..8c646ed9c921 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -21,11 +21,14 @@ MODULE_ALIAS("ip6t_nfacct");
 
 static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
+	int overquota;
 	const struct xt_nfacct_match_info *info = par->targinfo;
 
 	nfnl_acct_update(skb, info->nfacct);
 
-	return true;
+	overquota = nfnl_acct_overquota(skb, info->nfacct);
+
+	return overquota == NFACCT_UNDERQUOTA ? false : true;
 }
 
 static int
-- 
cgit v1.2.3-71-gd317


From f768e5bdefe1ec9adbf7a116dfb156b73cacb582 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 28 Apr 2014 21:09:50 +0200
Subject: netfilter: add helper for adding nat extension

Reduce copy-past a bit by adding a common helper.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat.h          |  2 ++
 net/ipv4/netfilter/iptable_nat.c        | 14 +++-----------
 net/ipv4/netfilter/nft_chain_nat_ipv4.c | 12 +++---------
 net/ipv6/netfilter/ip6table_nat.c       | 14 +++-----------
 net/ipv6/netfilter/nft_chain_nat_ipv6.c | 12 +++---------
 net/netfilter/nf_nat_core.c             | 24 ++++++++++++++++--------
 6 files changed, 30 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 07eaaf604092..a71dd333ac68 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -48,6 +48,8 @@ unsigned int nf_nat_setup_info(struct nf_conn *ct,
 extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct,
 					      unsigned int hooknum);
 
+struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct);
+
 /* Is this tuple already taken? (not by us)*/
 int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
 		      const struct nf_conn *ignored_conntrack);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index ee2886126e3d..f1787c04a4dd 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -91,17 +91,9 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
 	if (nf_ct_is_untracked(ct))
 		return NF_ACCEPT;
 
-	nat = nfct_nat(ct);
-	if (!nat) {
-		/* NAT module was loaded late. */
-		if (nf_ct_is_confirmed(ct))
-			return NF_ACCEPT;
-		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-		if (nat == NULL) {
-			pr_debug("failed to add NAT extension\n");
-			return NF_ACCEPT;
-		}
-	}
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat == NULL)
+		return NF_ACCEPT;
 
 	switch (ctinfo) {
 	case IP_CT_RELATED:
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index b5b256d45e67..3964157d826c 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -48,15 +48,9 @@ static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
 
 	NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
 
-	nat = nfct_nat(ct);
-	if (nat == NULL) {
-		/* Conntrack module was loaded late, can't add extension. */
-		if (nf_ct_is_confirmed(ct))
-			return NF_ACCEPT;
-		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-		if (nat == NULL)
-			return NF_ACCEPT;
-	}
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat == NULL)
+		return NF_ACCEPT;
 
 	switch (ctinfo) {
 	case IP_CT_RELATED:
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 84c7f33d0cf8..387d8b8fc18d 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -90,17 +90,9 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
 	if (nf_ct_is_untracked(ct))
 		return NF_ACCEPT;
 
-	nat = nfct_nat(ct);
-	if (!nat) {
-		/* NAT module was loaded late. */
-		if (nf_ct_is_confirmed(ct))
-			return NF_ACCEPT;
-		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-		if (nat == NULL) {
-			pr_debug("failed to add NAT extension\n");
-			return NF_ACCEPT;
-		}
-	}
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat == NULL)
+		return NF_ACCEPT;
 
 	switch (ctinfo) {
 	case IP_CT_RELATED:
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 9c3297a768fd..d189fcb437fe 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -47,15 +47,9 @@ static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
 	if (ct == NULL || nf_ct_is_untracked(ct))
 		return NF_ACCEPT;
 
-	nat = nfct_nat(ct);
-	if (nat == NULL) {
-		/* Conntrack module was loaded late, can't add extension. */
-		if (nf_ct_is_confirmed(ct))
-			return NF_ACCEPT;
-		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-		if (nat == NULL)
-			return NF_ACCEPT;
-	}
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat == NULL)
+		return NF_ACCEPT;
 
 	switch (ctinfo) {
 	case IP_CT_RELATED:
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 52ca952b802c..09096a670c45 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -358,6 +358,19 @@ out:
 	rcu_read_unlock();
 }
 
+struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct)
+{
+	struct nf_conn_nat *nat = nfct_nat(ct);
+	if (nat)
+		return nat;
+
+	if (!nf_ct_is_confirmed(ct))
+		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+
+	return nat;
+}
+EXPORT_SYMBOL_GPL(nf_ct_nat_ext_add);
+
 unsigned int
 nf_nat_setup_info(struct nf_conn *ct,
 		  const struct nf_nat_range *range,
@@ -368,14 +381,9 @@ nf_nat_setup_info(struct nf_conn *ct,
 	struct nf_conn_nat *nat;
 
 	/* nat helper or nfctnetlink also setup binding */
-	nat = nfct_nat(ct);
-	if (!nat) {
-		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-		if (nat == NULL) {
-			pr_debug("failed to add NAT extension\n");
-			return NF_ACCEPT;
-		}
-	}
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat == NULL)
+		return NF_ACCEPT;
 
 	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
 		     maniptype == NF_NAT_MANIP_DST);
-- 
cgit v1.2.3-71-gd317


From 9cd4360de6090a6daf7fbe024e34953f2ae60ef2 Mon Sep 17 00:00:00 2001
From: Srikanth Thokala <sthokal@xilinx.com>
Date: Wed, 23 Apr 2014 20:23:26 +0530
Subject: dma: Add Xilinx AXI Video Direct Memory Access Engine driver support

This is the driver for the AXI Video Direct Memory Access (AXI
VDMA) core, which is a soft Xilinx IP core that provides high-
bandwidth direct memory access between memory and AXI4-Stream
type video target peripherals. The core provides efficient two
dimensional DMA operations with independent asynchronous read
and write channel operation.

This module works on Zynq (ARM Based SoC) and Microblaze platforms.

Signed-off-by: Srikanth Thokala <sthokal@xilinx.com>
Acked-by: Jassi Brar <jassisinghbrar@gmail.com>
Reviewed-by: Levente Kurusa <levex@linux.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/Kconfig              |   14 +
 drivers/dma/Makefile             |    1 +
 drivers/dma/xilinx/Makefile      |    1 +
 drivers/dma/xilinx/xilinx_vdma.c | 1379 ++++++++++++++++++++++++++++++++++++++
 include/linux/amba/xilinx_dma.h  |   47 ++
 5 files changed, 1442 insertions(+)
 create mode 100644 drivers/dma/xilinx/Makefile
 create mode 100644 drivers/dma/xilinx/xilinx_vdma.c
 create mode 100644 include/linux/amba/xilinx_dma.h

(limited to 'include')

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 5c5863842de9..b30b7ed89fb2 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -361,6 +361,20 @@ config FSL_EDMA
 	  multiplexing capability for DMA request sources(slot).
 	  This module can be found on Freescale Vybrid and LS-1 SoCs.
 
+config XILINX_VDMA
+	tristate "Xilinx AXI VDMA Engine"
+	depends on (ARCH_ZYNQ || MICROBLAZE)
+	select DMA_ENGINE
+	help
+	  Enable support for Xilinx AXI VDMA Soft IP.
+
+	  This engine provides high-bandwidth direct memory access
+	  between memory and AXI4-Stream video type target
+	  peripherals including peripherals which support AXI4-
+	  Stream Video Protocol.  It has two stream interfaces/
+	  channels, Memory Mapped to Stream (MM2S) and Stream to
+	  Memory Mapped (S2MM) for the data transfers.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 5150c82c9caf..c779e1eb2db2 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -46,3 +46,4 @@ obj-$(CONFIG_K3_DMA) += k3dma.o
 obj-$(CONFIG_MOXART_DMA) += moxart-dma.o
 obj-$(CONFIG_FSL_EDMA) += fsl-edma.o
 obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o
+obj-y += xilinx/
diff --git a/drivers/dma/xilinx/Makefile b/drivers/dma/xilinx/Makefile
new file mode 100644
index 000000000000..3c4e9f2fea28
--- /dev/null
+++ b/drivers/dma/xilinx/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_XILINX_VDMA) += xilinx_vdma.o
diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c
new file mode 100644
index 000000000000..42a13e8d4607
--- /dev/null
+++ b/drivers/dma/xilinx/xilinx_vdma.c
@@ -0,0 +1,1379 @@
+/*
+ * DMA driver for Xilinx Video DMA Engine
+ *
+ * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved.
+ *
+ * Based on the Freescale DMA driver.
+ *
+ * Description:
+ * The AXI Video Direct Memory Access (AXI VDMA) core is a soft Xilinx IP
+ * core that provides high-bandwidth direct memory access between memory
+ * and AXI4-Stream type video target peripherals. The core provides efficient
+ * two dimensional DMA operations with independent asynchronous read (S2MM)
+ * and write (MM2S) channel operation. It can be configured to have either
+ * one channel or two channels. If configured as two channels, one is to
+ * transmit to the video device (MM2S) and another is to receive from the
+ * video device (S2MM). Initialization, status, interrupt and management
+ * registers are accessed through an AXI4-Lite slave interface.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/amba/xilinx_dma.h>
+#include <linux/bitops.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+
+#include "../dmaengine.h"
+
+/* Register/Descriptor Offsets */
+#define XILINX_VDMA_MM2S_CTRL_OFFSET		0x0000
+#define XILINX_VDMA_S2MM_CTRL_OFFSET		0x0030
+#define XILINX_VDMA_MM2S_DESC_OFFSET		0x0050
+#define XILINX_VDMA_S2MM_DESC_OFFSET		0x00a0
+
+/* Control Registers */
+#define XILINX_VDMA_REG_DMACR			0x0000
+#define XILINX_VDMA_DMACR_DELAY_MAX		0xff
+#define XILINX_VDMA_DMACR_DELAY_SHIFT		24
+#define XILINX_VDMA_DMACR_FRAME_COUNT_MAX	0xff
+#define XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT	16
+#define XILINX_VDMA_DMACR_ERR_IRQ		BIT(14)
+#define XILINX_VDMA_DMACR_DLY_CNT_IRQ		BIT(13)
+#define XILINX_VDMA_DMACR_FRM_CNT_IRQ		BIT(12)
+#define XILINX_VDMA_DMACR_MASTER_SHIFT		8
+#define XILINX_VDMA_DMACR_FSYNCSRC_SHIFT	5
+#define XILINX_VDMA_DMACR_FRAMECNT_EN		BIT(4)
+#define XILINX_VDMA_DMACR_GENLOCK_EN		BIT(3)
+#define XILINX_VDMA_DMACR_RESET			BIT(2)
+#define XILINX_VDMA_DMACR_CIRC_EN		BIT(1)
+#define XILINX_VDMA_DMACR_RUNSTOP		BIT(0)
+#define XILINX_VDMA_DMACR_FSYNCSRC_MASK		GENMASK(6, 5)
+
+#define XILINX_VDMA_REG_DMASR			0x0004
+#define XILINX_VDMA_DMASR_EOL_LATE_ERR		BIT(15)
+#define XILINX_VDMA_DMASR_ERR_IRQ		BIT(14)
+#define XILINX_VDMA_DMASR_DLY_CNT_IRQ		BIT(13)
+#define XILINX_VDMA_DMASR_FRM_CNT_IRQ		BIT(12)
+#define XILINX_VDMA_DMASR_SOF_LATE_ERR		BIT(11)
+#define XILINX_VDMA_DMASR_SG_DEC_ERR		BIT(10)
+#define XILINX_VDMA_DMASR_SG_SLV_ERR		BIT(9)
+#define XILINX_VDMA_DMASR_EOF_EARLY_ERR		BIT(8)
+#define XILINX_VDMA_DMASR_SOF_EARLY_ERR		BIT(7)
+#define XILINX_VDMA_DMASR_DMA_DEC_ERR		BIT(6)
+#define XILINX_VDMA_DMASR_DMA_SLAVE_ERR		BIT(5)
+#define XILINX_VDMA_DMASR_DMA_INT_ERR		BIT(4)
+#define XILINX_VDMA_DMASR_IDLE			BIT(1)
+#define XILINX_VDMA_DMASR_HALTED		BIT(0)
+#define XILINX_VDMA_DMASR_DELAY_MASK		GENMASK(31, 24)
+#define XILINX_VDMA_DMASR_FRAME_COUNT_MASK	GENMASK(23, 16)
+
+#define XILINX_VDMA_REG_CURDESC			0x0008
+#define XILINX_VDMA_REG_TAILDESC		0x0010
+#define XILINX_VDMA_REG_REG_INDEX		0x0014
+#define XILINX_VDMA_REG_FRMSTORE		0x0018
+#define XILINX_VDMA_REG_THRESHOLD		0x001c
+#define XILINX_VDMA_REG_FRMPTR_STS		0x0024
+#define XILINX_VDMA_REG_PARK_PTR		0x0028
+#define XILINX_VDMA_PARK_PTR_WR_REF_SHIFT	8
+#define XILINX_VDMA_PARK_PTR_RD_REF_SHIFT	0
+#define XILINX_VDMA_REG_VDMA_VERSION		0x002c
+
+/* Register Direct Mode Registers */
+#define XILINX_VDMA_REG_VSIZE			0x0000
+#define XILINX_VDMA_REG_HSIZE			0x0004
+
+#define XILINX_VDMA_REG_FRMDLY_STRIDE		0x0008
+#define XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT	24
+#define XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT	0
+
+#define XILINX_VDMA_REG_START_ADDRESS(n)	(0x000c + 4 * (n))
+
+/* HW specific definitions */
+#define XILINX_VDMA_MAX_CHANS_PER_DEVICE	0x2
+
+#define XILINX_VDMA_DMAXR_ALL_IRQ_MASK	\
+		(XILINX_VDMA_DMASR_FRM_CNT_IRQ | \
+		 XILINX_VDMA_DMASR_DLY_CNT_IRQ | \
+		 XILINX_VDMA_DMASR_ERR_IRQ)
+
+#define XILINX_VDMA_DMASR_ALL_ERR_MASK	\
+		(XILINX_VDMA_DMASR_EOL_LATE_ERR | \
+		 XILINX_VDMA_DMASR_SOF_LATE_ERR | \
+		 XILINX_VDMA_DMASR_SG_DEC_ERR | \
+		 XILINX_VDMA_DMASR_SG_SLV_ERR | \
+		 XILINX_VDMA_DMASR_EOF_EARLY_ERR | \
+		 XILINX_VDMA_DMASR_SOF_EARLY_ERR | \
+		 XILINX_VDMA_DMASR_DMA_DEC_ERR | \
+		 XILINX_VDMA_DMASR_DMA_SLAVE_ERR | \
+		 XILINX_VDMA_DMASR_DMA_INT_ERR)
+
+/*
+ * Recoverable errors are DMA Internal error, SOF Early, EOF Early
+ * and SOF Late. They are only recoverable when C_FLUSH_ON_FSYNC
+ * is enabled in the h/w system.
+ */
+#define XILINX_VDMA_DMASR_ERR_RECOVER_MASK	\
+		(XILINX_VDMA_DMASR_SOF_LATE_ERR | \
+		 XILINX_VDMA_DMASR_EOF_EARLY_ERR | \
+		 XILINX_VDMA_DMASR_SOF_EARLY_ERR | \
+		 XILINX_VDMA_DMASR_DMA_INT_ERR)
+
+/* Axi VDMA Flush on Fsync bits */
+#define XILINX_VDMA_FLUSH_S2MM		3
+#define XILINX_VDMA_FLUSH_MM2S		2
+#define XILINX_VDMA_FLUSH_BOTH		1
+
+/* Delay loop counter to prevent hardware failure */
+#define XILINX_VDMA_LOOP_COUNT		1000000
+
+/**
+ * struct xilinx_vdma_desc_hw - Hardware Descriptor
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @pad1: Reserved @0x04
+ * @buf_addr: Buffer address @0x08
+ * @pad2: Reserved @0x0C
+ * @vsize: Vertical Size @0x10
+ * @hsize: Horizontal Size @0x14
+ * @stride: Number of bytes between the first
+ *	    pixels of each horizontal line @0x18
+ */
+struct xilinx_vdma_desc_hw {
+	u32 next_desc;
+	u32 pad1;
+	u32 buf_addr;
+	u32 pad2;
+	u32 vsize;
+	u32 hsize;
+	u32 stride;
+} __aligned(64);
+
+/**
+ * struct xilinx_vdma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_vdma_tx_segment {
+	struct xilinx_vdma_desc_hw hw;
+	struct list_head node;
+	dma_addr_t phys;
+} __aligned(64);
+
+/**
+ * struct xilinx_vdma_tx_descriptor - Per Transaction structure
+ * @async_tx: Async transaction descriptor
+ * @segments: TX segments list
+ * @node: Node in the channel descriptors list
+ */
+struct xilinx_vdma_tx_descriptor {
+	struct dma_async_tx_descriptor async_tx;
+	struct list_head segments;
+	struct list_head node;
+};
+
+/**
+ * struct xilinx_vdma_chan - Driver specific VDMA channel structure
+ * @xdev: Driver specific device structure
+ * @ctrl_offset: Control registers offset
+ * @desc_offset: TX descriptor registers offset
+ * @lock: Descriptor operation lock
+ * @pending_list: Descriptors waiting
+ * @active_desc: Active descriptor
+ * @allocated_desc: Allocated descriptor
+ * @done_list: Complete descriptors
+ * @common: DMA common channel
+ * @desc_pool: Descriptors pool
+ * @dev: The dma device
+ * @irq: Channel IRQ
+ * @id: Channel ID
+ * @direction: Transfer direction
+ * @num_frms: Number of frames
+ * @has_sg: Support scatter transfers
+ * @genlock: Support genlock mode
+ * @err: Channel has errors
+ * @tasklet: Cleanup work after irq
+ * @config: Device configuration info
+ * @flush_on_fsync: Flush on Frame sync
+ */
+struct xilinx_vdma_chan {
+	struct xilinx_vdma_device *xdev;
+	u32 ctrl_offset;
+	u32 desc_offset;
+	spinlock_t lock;
+	struct list_head pending_list;
+	struct xilinx_vdma_tx_descriptor *active_desc;
+	struct xilinx_vdma_tx_descriptor *allocated_desc;
+	struct list_head done_list;
+	struct dma_chan common;
+	struct dma_pool *desc_pool;
+	struct device *dev;
+	int irq;
+	int id;
+	enum dma_transfer_direction direction;
+	int num_frms;
+	bool has_sg;
+	bool genlock;
+	bool err;
+	struct tasklet_struct tasklet;
+	struct xilinx_vdma_config config;
+	bool flush_on_fsync;
+};
+
+/**
+ * struct xilinx_vdma_device - VDMA device structure
+ * @regs: I/O mapped base address
+ * @dev: Device Structure
+ * @common: DMA device structure
+ * @chan: Driver specific VDMA channel
+ * @has_sg: Specifies whether Scatter-Gather is present or not
+ * @flush_on_fsync: Flush on frame sync
+ */
+struct xilinx_vdma_device {
+	void __iomem *regs;
+	struct device *dev;
+	struct dma_device common;
+	struct xilinx_vdma_chan *chan[XILINX_VDMA_MAX_CHANS_PER_DEVICE];
+	bool has_sg;
+	u32 flush_on_fsync;
+};
+
+/* Macros */
+#define to_xilinx_chan(chan) \
+	container_of(chan, struct xilinx_vdma_chan, common)
+#define to_vdma_tx_descriptor(tx) \
+	container_of(tx, struct xilinx_vdma_tx_descriptor, async_tx)
+
+/* IO accessors */
+static inline u32 vdma_read(struct xilinx_vdma_chan *chan, u32 reg)
+{
+	return ioread32(chan->xdev->regs + reg);
+}
+
+static inline void vdma_write(struct xilinx_vdma_chan *chan, u32 reg, u32 value)
+{
+	iowrite32(value, chan->xdev->regs + reg);
+}
+
+static inline void vdma_desc_write(struct xilinx_vdma_chan *chan, u32 reg,
+				   u32 value)
+{
+	vdma_write(chan, chan->desc_offset + reg, value);
+}
+
+static inline u32 vdma_ctrl_read(struct xilinx_vdma_chan *chan, u32 reg)
+{
+	return vdma_read(chan, chan->ctrl_offset + reg);
+}
+
+static inline void vdma_ctrl_write(struct xilinx_vdma_chan *chan, u32 reg,
+				   u32 value)
+{
+	vdma_write(chan, chan->ctrl_offset + reg, value);
+}
+
+static inline void vdma_ctrl_clr(struct xilinx_vdma_chan *chan, u32 reg,
+				 u32 clr)
+{
+	vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) & ~clr);
+}
+
+static inline void vdma_ctrl_set(struct xilinx_vdma_chan *chan, u32 reg,
+				 u32 set)
+{
+	vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) | set);
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors and segments alloc and free
+ */
+
+/**
+ * xilinx_vdma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
+ */
+static struct xilinx_vdma_tx_segment *
+xilinx_vdma_alloc_tx_segment(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_tx_segment *segment;
+	dma_addr_t phys;
+
+	segment = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &phys);
+	if (!segment)
+		return NULL;
+
+	memset(segment, 0, sizeof(*segment));
+	segment->phys = phys;
+
+	return segment;
+}
+
+/**
+ * xilinx_vdma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific VDMA channel
+ * @segment: VDMA transaction segment
+ */
+static void xilinx_vdma_free_tx_segment(struct xilinx_vdma_chan *chan,
+					struct xilinx_vdma_tx_segment *segment)
+{
+	dma_pool_free(chan->desc_pool, segment, segment->phys);
+}
+
+/**
+ * xilinx_vdma_tx_descriptor - Allocate transaction descriptor
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: The allocated descriptor on success and NULL on failure.
+ */
+static struct xilinx_vdma_tx_descriptor *
+xilinx_vdma_alloc_tx_descriptor(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_tx_descriptor *desc;
+	unsigned long flags;
+
+	if (chan->allocated_desc)
+		return chan->allocated_desc;
+
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return NULL;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->allocated_desc = desc;
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	INIT_LIST_HEAD(&desc->segments);
+
+	return desc;
+}
+
+/**
+ * xilinx_vdma_free_tx_descriptor - Free transaction descriptor
+ * @chan: Driver specific VDMA channel
+ * @desc: VDMA transaction descriptor
+ */
+static void
+xilinx_vdma_free_tx_descriptor(struct xilinx_vdma_chan *chan,
+			       struct xilinx_vdma_tx_descriptor *desc)
+{
+	struct xilinx_vdma_tx_segment *segment, *next;
+
+	if (!desc)
+		return;
+
+	list_for_each_entry_safe(segment, next, &desc->segments, node) {
+		list_del(&segment->node);
+		xilinx_vdma_free_tx_segment(chan, segment);
+	}
+
+	kfree(desc);
+}
+
+/* Required functions */
+
+/**
+ * xilinx_vdma_free_desc_list - Free descriptors list
+ * @chan: Driver specific VDMA channel
+ * @list: List to parse and delete the descriptor
+ */
+static void xilinx_vdma_free_desc_list(struct xilinx_vdma_chan *chan,
+					struct list_head *list)
+{
+	struct xilinx_vdma_tx_descriptor *desc, *next;
+
+	list_for_each_entry_safe(desc, next, list, node) {
+		list_del(&desc->node);
+		xilinx_vdma_free_tx_descriptor(chan, desc);
+	}
+}
+
+/**
+ * xilinx_vdma_free_descriptors - Free channel descriptors
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_free_descriptors(struct xilinx_vdma_chan *chan)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	xilinx_vdma_free_desc_list(chan, &chan->pending_list);
+	xilinx_vdma_free_desc_list(chan, &chan->done_list);
+
+	xilinx_vdma_free_tx_descriptor(chan, chan->active_desc);
+	chan->active_desc = NULL;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_vdma_free_chan_resources - Free channel resources
+ * @dchan: DMA channel
+ */
+static void xilinx_vdma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+
+	dev_dbg(chan->dev, "Free all channel resources.\n");
+
+	xilinx_vdma_free_descriptors(chan);
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+}
+
+/**
+ * xilinx_vdma_chan_desc_cleanup - Clean channel descriptors
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_chan_desc_cleanup(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_tx_descriptor *desc, *next;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	list_for_each_entry_safe(desc, next, &chan->done_list, node) {
+		dma_async_tx_callback callback;
+		void *callback_param;
+
+		/* Remove from the list of running transactions */
+		list_del(&desc->node);
+
+		/* Run the link descriptor callback function */
+		callback = desc->async_tx.callback;
+		callback_param = desc->async_tx.callback_param;
+		if (callback) {
+			spin_unlock_irqrestore(&chan->lock, flags);
+			callback(callback_param);
+			spin_lock_irqsave(&chan->lock, flags);
+		}
+
+		/* Run any dependencies, then free the descriptor */
+		dma_run_dependencies(&desc->async_tx);
+		xilinx_vdma_free_tx_descriptor(chan, desc);
+	}
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_vdma_do_tasklet - Schedule completion tasklet
+ * @data: Pointer to the Xilinx VDMA channel structure
+ */
+static void xilinx_vdma_do_tasklet(unsigned long data)
+{
+	struct xilinx_vdma_chan *chan = (struct xilinx_vdma_chan *)data;
+
+	xilinx_vdma_chan_desc_cleanup(chan);
+}
+
+/**
+ * xilinx_vdma_alloc_chan_resources - Allocate channel resources
+ * @dchan: DMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+
+	/* Has this channel already been allocated? */
+	if (chan->desc_pool)
+		return 0;
+
+	/*
+	 * We need the descriptor to be aligned to 64bytes
+	 * for meeting Xilinx VDMA specification requirement.
+	 */
+	chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool",
+				chan->dev,
+				sizeof(struct xilinx_vdma_tx_segment),
+				__alignof__(struct xilinx_vdma_tx_segment), 0);
+	if (!chan->desc_pool) {
+		dev_err(chan->dev,
+			"unable to allocate channel %d descriptor pool\n",
+			chan->id);
+		return -ENOMEM;
+	}
+
+	dma_cookie_init(dchan);
+	return 0;
+}
+
+/**
+ * xilinx_vdma_tx_status - Get VDMA transaction status
+ * @dchan: DMA channel
+ * @cookie: Transaction identifier
+ * @txstate: Transaction state
+ *
+ * Return: DMA transaction status
+ */
+static enum dma_status xilinx_vdma_tx_status(struct dma_chan *dchan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(dchan, cookie, txstate);
+}
+
+/**
+ * xilinx_vdma_is_running - Check if VDMA channel is running
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: '1' if running, '0' if not.
+ */
+static bool xilinx_vdma_is_running(struct xilinx_vdma_chan *chan)
+{
+	return !(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
+		 XILINX_VDMA_DMASR_HALTED) &&
+		(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) &
+		 XILINX_VDMA_DMACR_RUNSTOP);
+}
+
+/**
+ * xilinx_vdma_is_idle - Check if VDMA channel is idle
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: '1' if idle, '0' if not.
+ */
+static bool xilinx_vdma_is_idle(struct xilinx_vdma_chan *chan)
+{
+	return vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
+		XILINX_VDMA_DMASR_IDLE;
+}
+
+/**
+ * xilinx_vdma_halt - Halt VDMA channel
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_halt(struct xilinx_vdma_chan *chan)
+{
+	int loop = XILINX_VDMA_LOOP_COUNT;
+
+	vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP);
+
+	/* Wait for the hardware to halt */
+	do {
+		if (vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
+		    XILINX_VDMA_DMASR_HALTED)
+			break;
+	} while (loop--);
+
+	if (!loop) {
+		dev_err(chan->dev, "Cannot stop channel %p: %x\n",
+			chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR));
+		chan->err = true;
+	}
+
+	return;
+}
+
+/**
+ * xilinx_vdma_start - Start VDMA channel
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_start(struct xilinx_vdma_chan *chan)
+{
+	int loop = XILINX_VDMA_LOOP_COUNT;
+
+	vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP);
+
+	/* Wait for the hardware to start */
+	do {
+		if (!(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
+		      XILINX_VDMA_DMASR_HALTED))
+			break;
+	} while (loop--);
+
+	if (!loop) {
+		dev_err(chan->dev, "Cannot start channel %p: %x\n",
+			chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR));
+
+		chan->err = true;
+	}
+
+	return;
+}
+
+/**
+ * xilinx_vdma_start_transfer - Starts VDMA transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_config *config = &chan->config;
+	struct xilinx_vdma_tx_descriptor *desc;
+	unsigned long flags;
+	u32 reg;
+	struct xilinx_vdma_tx_segment *head, *tail = NULL;
+
+	if (chan->err)
+		return;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	/* There's already an active descriptor, bail out. */
+	if (chan->active_desc)
+		goto out_unlock;
+
+	if (list_empty(&chan->pending_list))
+		goto out_unlock;
+
+	desc = list_first_entry(&chan->pending_list,
+				struct xilinx_vdma_tx_descriptor, node);
+
+	/* If it is SG mode and hardware is busy, cannot submit */
+	if (chan->has_sg && xilinx_vdma_is_running(chan) &&
+	    !xilinx_vdma_is_idle(chan)) {
+		dev_dbg(chan->dev, "DMA controller still busy\n");
+		goto out_unlock;
+	}
+
+	/*
+	 * If hardware is idle, then all descriptors on the running lists are
+	 * done, start new transfers
+	 */
+	if (chan->has_sg) {
+		head = list_first_entry(&desc->segments,
+					struct xilinx_vdma_tx_segment, node);
+		tail = list_entry(desc->segments.prev,
+				  struct xilinx_vdma_tx_segment, node);
+
+		vdma_ctrl_write(chan, XILINX_VDMA_REG_CURDESC, head->phys);
+	}
+
+	/* Configure the hardware using info in the config structure */
+	reg = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR);
+
+	if (config->frm_cnt_en)
+		reg |= XILINX_VDMA_DMACR_FRAMECNT_EN;
+	else
+		reg &= ~XILINX_VDMA_DMACR_FRAMECNT_EN;
+
+	/*
+	 * With SG, start with circular mode, so that BDs can be fetched.
+	 * In direct register mode, if not parking, enable circular mode
+	 */
+	if (chan->has_sg || !config->park)
+		reg |= XILINX_VDMA_DMACR_CIRC_EN;
+
+	if (config->park)
+		reg &= ~XILINX_VDMA_DMACR_CIRC_EN;
+
+	vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, reg);
+
+	if (config->park && (config->park_frm >= 0) &&
+			(config->park_frm < chan->num_frms)) {
+		if (chan->direction == DMA_MEM_TO_DEV)
+			vdma_write(chan, XILINX_VDMA_REG_PARK_PTR,
+				config->park_frm <<
+					XILINX_VDMA_PARK_PTR_RD_REF_SHIFT);
+		else
+			vdma_write(chan, XILINX_VDMA_REG_PARK_PTR,
+				config->park_frm <<
+					XILINX_VDMA_PARK_PTR_WR_REF_SHIFT);
+	}
+
+	/* Start the hardware */
+	xilinx_vdma_start(chan);
+
+	if (chan->err)
+		goto out_unlock;
+
+	/* Start the transfer */
+	if (chan->has_sg) {
+		vdma_ctrl_write(chan, XILINX_VDMA_REG_TAILDESC, tail->phys);
+	} else {
+		struct xilinx_vdma_tx_segment *segment, *last = NULL;
+		int i = 0;
+
+		list_for_each_entry(segment, &desc->segments, node) {
+			vdma_desc_write(chan,
+					XILINX_VDMA_REG_START_ADDRESS(i++),
+					segment->hw.buf_addr);
+			last = segment;
+		}
+
+		if (!last)
+			goto out_unlock;
+
+		/* HW expects these parameters to be same for one transaction */
+		vdma_desc_write(chan, XILINX_VDMA_REG_HSIZE, last->hw.hsize);
+		vdma_desc_write(chan, XILINX_VDMA_REG_FRMDLY_STRIDE,
+				last->hw.stride);
+		vdma_desc_write(chan, XILINX_VDMA_REG_VSIZE, last->hw.vsize);
+	}
+
+	list_del(&desc->node);
+	chan->active_desc = desc;
+
+out_unlock:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_vdma_issue_pending - Issue pending transactions
+ * @dchan: DMA channel
+ */
+static void xilinx_vdma_issue_pending(struct dma_chan *dchan)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+
+	xilinx_vdma_start_transfer(chan);
+}
+
+/**
+ * xilinx_vdma_complete_descriptor - Mark the active descriptor as complete
+ * @chan : xilinx DMA channel
+ *
+ * CONTEXT: hardirq
+ */
+static void xilinx_vdma_complete_descriptor(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_tx_descriptor *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	desc = chan->active_desc;
+	if (!desc) {
+		dev_dbg(chan->dev, "no running descriptors\n");
+		goto out_unlock;
+	}
+
+	dma_cookie_complete(&desc->async_tx);
+	list_add_tail(&desc->node, &chan->done_list);
+
+	chan->active_desc = NULL;
+
+out_unlock:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_vdma_reset - Reset VDMA channel
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_reset(struct xilinx_vdma_chan *chan)
+{
+	int loop = XILINX_VDMA_LOOP_COUNT;
+	u32 tmp;
+
+	vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RESET);
+
+	tmp = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) &
+		XILINX_VDMA_DMACR_RESET;
+
+	/* Wait for the hardware to finish reset */
+	do {
+		tmp = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) &
+			XILINX_VDMA_DMACR_RESET;
+	} while (loop-- && tmp);
+
+	if (!loop) {
+		dev_err(chan->dev, "reset timeout, cr %x, sr %x\n",
+			vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR),
+			vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR));
+		return -ETIMEDOUT;
+	}
+
+	chan->err = false;
+
+	return 0;
+}
+
+/**
+ * xilinx_vdma_chan_reset - Reset VDMA channel and enable interrupts
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_chan_reset(struct xilinx_vdma_chan *chan)
+{
+	int err;
+
+	/* Reset VDMA */
+	err = xilinx_vdma_reset(chan);
+	if (err)
+		return err;
+
+	/* Enable interrupts */
+	vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR,
+		      XILINX_VDMA_DMAXR_ALL_IRQ_MASK);
+
+	return 0;
+}
+
+/**
+ * xilinx_vdma_irq_handler - VDMA Interrupt handler
+ * @irq: IRQ number
+ * @data: Pointer to the Xilinx VDMA channel structure
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t xilinx_vdma_irq_handler(int irq, void *data)
+{
+	struct xilinx_vdma_chan *chan = data;
+	u32 status;
+
+	/* Read the status and ack the interrupts. */
+	status = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR);
+	if (!(status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK))
+		return IRQ_NONE;
+
+	vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR,
+			status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK);
+
+	if (status & XILINX_VDMA_DMASR_ERR_IRQ) {
+		/*
+		 * An error occurred. If C_FLUSH_ON_FSYNC is enabled and the
+		 * error is recoverable, ignore it. Otherwise flag the error.
+		 *
+		 * Only recoverable errors can be cleared in the DMASR register,
+		 * make sure not to write to other error bits to 1.
+		 */
+		u32 errors = status & XILINX_VDMA_DMASR_ALL_ERR_MASK;
+		vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR,
+				errors & XILINX_VDMA_DMASR_ERR_RECOVER_MASK);
+
+		if (!chan->flush_on_fsync ||
+		    (errors & ~XILINX_VDMA_DMASR_ERR_RECOVER_MASK)) {
+			dev_err(chan->dev,
+				"Channel %p has errors %x, cdr %x tdr %x\n",
+				chan, errors,
+				vdma_ctrl_read(chan, XILINX_VDMA_REG_CURDESC),
+				vdma_ctrl_read(chan, XILINX_VDMA_REG_TAILDESC));
+			chan->err = true;
+		}
+	}
+
+	if (status & XILINX_VDMA_DMASR_DLY_CNT_IRQ) {
+		/*
+		 * Device takes too long to do the transfer when user requires
+		 * responsiveness.
+		 */
+		dev_dbg(chan->dev, "Inter-packet latency too long\n");
+	}
+
+	if (status & XILINX_VDMA_DMASR_FRM_CNT_IRQ) {
+		xilinx_vdma_complete_descriptor(chan);
+		xilinx_vdma_start_transfer(chan);
+	}
+
+	tasklet_schedule(&chan->tasklet);
+	return IRQ_HANDLED;
+}
+
+/**
+ * xilinx_vdma_tx_submit - Submit DMA transaction
+ * @tx: Async transaction descriptor
+ *
+ * Return: cookie value on success and failure value on error
+ */
+static dma_cookie_t xilinx_vdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct xilinx_vdma_tx_descriptor *desc = to_vdma_tx_descriptor(tx);
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(tx->chan);
+	dma_cookie_t cookie;
+	unsigned long flags;
+	int err;
+
+	if (chan->err) {
+		/*
+		 * If reset fails, need to hard reset the system.
+		 * Channel is no longer functional
+		 */
+		err = xilinx_vdma_chan_reset(chan);
+		if (err < 0)
+			return err;
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	cookie = dma_cookie_assign(tx);
+
+	/* Append the transaction to the pending transactions queue. */
+	list_add_tail(&desc->node, &chan->pending_list);
+
+	/* Free the allocated desc */
+	chan->allocated_desc = NULL;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return cookie;
+}
+
+/**
+ * xilinx_vdma_dma_prep_interleaved - prepare a descriptor for a
+ *	DMA_SLAVE transaction
+ * @dchan: DMA channel
+ * @xt: Interleaved template pointer
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan,
+				 struct dma_interleaved_template *xt,
+				 unsigned long flags)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_vdma_tx_descriptor *desc;
+	struct xilinx_vdma_tx_segment *segment, *prev = NULL;
+	struct xilinx_vdma_desc_hw *hw;
+
+	if (!is_slave_direction(xt->dir))
+		return NULL;
+
+	if (!xt->numf || !xt->sgl[0].size)
+		return NULL;
+
+	/* Allocate a transaction descriptor. */
+	desc = xilinx_vdma_alloc_tx_descriptor(chan);
+	if (!desc)
+		return NULL;
+
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = xilinx_vdma_tx_submit;
+	async_tx_ack(&desc->async_tx);
+
+	/* Allocate the link descriptor from DMA pool */
+	segment = xilinx_vdma_alloc_tx_segment(chan);
+	if (!segment)
+		goto error;
+
+	/* Fill in the hardware descriptor */
+	hw = &segment->hw;
+	hw->vsize = xt->numf;
+	hw->hsize = xt->sgl[0].size;
+	hw->stride = xt->sgl[0].icg <<
+			XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT;
+	hw->stride |= chan->config.frm_dly <<
+			XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT;
+
+	if (xt->dir != DMA_MEM_TO_DEV)
+		hw->buf_addr = xt->dst_start;
+	else
+		hw->buf_addr = xt->src_start;
+
+	/* Link the previous next descriptor to current */
+	prev = list_last_entry(&desc->segments,
+				struct xilinx_vdma_tx_segment, node);
+	prev->hw.next_desc = segment->phys;
+
+	/* Insert the segment into the descriptor segments list. */
+	list_add_tail(&segment->node, &desc->segments);
+
+	prev = segment;
+
+	/* Link the last hardware descriptor with the first. */
+	segment = list_first_entry(&desc->segments,
+				   struct xilinx_vdma_tx_segment, node);
+	prev->hw.next_desc = segment->phys;
+
+	return &desc->async_tx;
+
+error:
+	xilinx_vdma_free_tx_descriptor(chan, desc);
+	return NULL;
+}
+
+/**
+ * xilinx_vdma_terminate_all - Halt the channel and free descriptors
+ * @chan: Driver specific VDMA Channel pointer
+ */
+static void xilinx_vdma_terminate_all(struct xilinx_vdma_chan *chan)
+{
+	/* Halt the DMA engine */
+	xilinx_vdma_halt(chan);
+
+	/* Remove and free all of the descriptors in the lists */
+	xilinx_vdma_free_descriptors(chan);
+}
+
+/**
+ * xilinx_vdma_channel_set_config - Configure VDMA channel
+ * Run-time configuration for Axi VDMA, supports:
+ * . halt the channel
+ * . configure interrupt coalescing and inter-packet delay threshold
+ * . start/stop parking
+ * . enable genlock
+ *
+ * @dchan: DMA channel
+ * @cfg: VDMA device configuration pointer
+ *
+ * Return: '0' on success and failure value on error
+ */
+int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
+					struct xilinx_vdma_config *cfg)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+	u32 dmacr;
+
+	if (cfg->reset)
+		return xilinx_vdma_chan_reset(chan);
+
+	dmacr = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR);
+
+	chan->config.frm_dly = cfg->frm_dly;
+	chan->config.park = cfg->park;
+
+	/* genlock settings */
+	chan->config.gen_lock = cfg->gen_lock;
+	chan->config.master = cfg->master;
+
+	if (cfg->gen_lock && chan->genlock) {
+		dmacr |= XILINX_VDMA_DMACR_GENLOCK_EN;
+		dmacr |= cfg->master << XILINX_VDMA_DMACR_MASTER_SHIFT;
+	}
+
+	chan->config.frm_cnt_en = cfg->frm_cnt_en;
+	if (cfg->park)
+		chan->config.park_frm = cfg->park_frm;
+	else
+		chan->config.park_frm = -1;
+
+	chan->config.coalesc = cfg->coalesc;
+	chan->config.delay = cfg->delay;
+
+	if (cfg->coalesc <= XILINX_VDMA_DMACR_FRAME_COUNT_MAX) {
+		dmacr |= cfg->coalesc << XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT;
+		chan->config.coalesc = cfg->coalesc;
+	}
+
+	if (cfg->delay <= XILINX_VDMA_DMACR_DELAY_MAX) {
+		dmacr |= cfg->delay << XILINX_VDMA_DMACR_DELAY_SHIFT;
+		chan->config.delay = cfg->delay;
+	}
+
+	/* FSync Source selection */
+	dmacr &= ~XILINX_VDMA_DMACR_FSYNCSRC_MASK;
+	dmacr |= cfg->ext_fsync << XILINX_VDMA_DMACR_FSYNCSRC_SHIFT;
+
+	vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, dmacr);
+
+	return 0;
+}
+EXPORT_SYMBOL(xilinx_vdma_channel_set_config);
+
+/**
+ * xilinx_vdma_device_control - Configure DMA channel of the device
+ * @dchan: DMA Channel pointer
+ * @cmd: DMA control command
+ * @arg: Channel configuration
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_device_control(struct dma_chan *dchan,
+				      enum dma_ctrl_cmd cmd, unsigned long arg)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+
+	if (cmd != DMA_TERMINATE_ALL)
+		return -ENXIO;
+
+	xilinx_vdma_terminate_all(chan);
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+/**
+ * xilinx_vdma_chan_remove - Per Channel remove function
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_chan_remove(struct xilinx_vdma_chan *chan)
+{
+	/* Disable all interrupts */
+	vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR,
+		      XILINX_VDMA_DMAXR_ALL_IRQ_MASK);
+
+	if (chan->irq > 0)
+		free_irq(chan->irq, chan);
+
+	tasklet_kill(&chan->tasklet);
+
+	list_del(&chan->common.device_node);
+}
+
+/**
+ * xilinx_vdma_chan_probe - Per Channel Probing
+ * It get channel features from the device tree entry and
+ * initialize special channel handling routines
+ *
+ * @xdev: Driver specific device structure
+ * @node: Device node
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev,
+				  struct device_node *node)
+{
+	struct xilinx_vdma_chan *chan;
+	bool has_dre = false;
+	u32 value, width;
+	int err;
+
+	/* Allocate and initialize the channel structure */
+	chan = devm_kzalloc(xdev->dev, sizeof(*chan), GFP_KERNEL);
+	if (!chan)
+		return -ENOMEM;
+
+	chan->dev = xdev->dev;
+	chan->xdev = xdev;
+	chan->has_sg = xdev->has_sg;
+
+	spin_lock_init(&chan->lock);
+	INIT_LIST_HEAD(&chan->pending_list);
+	INIT_LIST_HEAD(&chan->done_list);
+
+	/* Retrieve the channel properties from the device tree */
+	has_dre = of_property_read_bool(node, "xlnx,include-dre");
+
+	chan->genlock = of_property_read_bool(node, "xlnx,genlock-mode");
+
+	err = of_property_read_u32(node, "xlnx,datawidth", &value);
+	if (err) {
+		dev_err(xdev->dev, "missing xlnx,datawidth property\n");
+		return err;
+	}
+	width = value >> 3; /* Convert bits to bytes */
+
+	/* If data width is greater than 8 bytes, DRE is not in hw */
+	if (width > 8)
+		has_dre = false;
+
+	if (!has_dre)
+		xdev->common.copy_align = fls(width - 1);
+
+	if (of_device_is_compatible(node, "xlnx,axi-vdma-mm2s-channel")) {
+		chan->direction = DMA_MEM_TO_DEV;
+		chan->id = 0;
+
+		chan->ctrl_offset = XILINX_VDMA_MM2S_CTRL_OFFSET;
+		chan->desc_offset = XILINX_VDMA_MM2S_DESC_OFFSET;
+
+		if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH ||
+		    xdev->flush_on_fsync == XILINX_VDMA_FLUSH_MM2S)
+			chan->flush_on_fsync = true;
+	} else if (of_device_is_compatible(node,
+					    "xlnx,axi-vdma-s2mm-channel")) {
+		chan->direction = DMA_DEV_TO_MEM;
+		chan->id = 1;
+
+		chan->ctrl_offset = XILINX_VDMA_S2MM_CTRL_OFFSET;
+		chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET;
+
+		if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH ||
+		    xdev->flush_on_fsync == XILINX_VDMA_FLUSH_S2MM)
+			chan->flush_on_fsync = true;
+	} else {
+		dev_err(xdev->dev, "Invalid channel compatible node\n");
+		return -EINVAL;
+	}
+
+	/* Request the interrupt */
+	chan->irq = irq_of_parse_and_map(node, 0);
+	err = request_irq(chan->irq, xilinx_vdma_irq_handler, IRQF_SHARED,
+			  "xilinx-vdma-controller", chan);
+	if (err) {
+		dev_err(xdev->dev, "unable to request IRQ %d\n", chan->irq);
+		return err;
+	}
+
+	/* Initialize the tasklet */
+	tasklet_init(&chan->tasklet, xilinx_vdma_do_tasklet,
+			(unsigned long)chan);
+
+	/*
+	 * Initialize the DMA channel and add it to the DMA engine channels
+	 * list.
+	 */
+	chan->common.device = &xdev->common;
+
+	list_add_tail(&chan->common.device_node, &xdev->common.channels);
+	xdev->chan[chan->id] = chan;
+
+	/* Reset the channel */
+	err = xilinx_vdma_chan_reset(chan);
+	if (err < 0) {
+		dev_err(xdev->dev, "Reset channel failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * of_dma_xilinx_xlate - Translation function
+ * @dma_spec: Pointer to DMA specifier as found in the device tree
+ * @ofdma: Pointer to DMA controller data
+ *
+ * Return: DMA channel pointer on success and NULL on error
+ */
+static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec,
+						struct of_dma *ofdma)
+{
+	struct xilinx_vdma_device *xdev = ofdma->of_dma_data;
+	int chan_id = dma_spec->args[0];
+
+	if (chan_id >= XILINX_VDMA_MAX_CHANS_PER_DEVICE)
+		return NULL;
+
+	return dma_get_slave_channel(&xdev->chan[chan_id]->common);
+}
+
+/**
+ * xilinx_vdma_probe - Driver probe function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct xilinx_vdma_device *xdev;
+	struct device_node *child;
+	struct resource *io;
+	u32 num_frames;
+	int i, err;
+
+	/* Allocate and initialize the DMA engine structure */
+	xdev = devm_kzalloc(&pdev->dev, sizeof(*xdev), GFP_KERNEL);
+	if (!xdev)
+		return -ENOMEM;
+
+	xdev->dev = &pdev->dev;
+
+	/* Request and map I/O memory */
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	xdev->regs = devm_ioremap_resource(&pdev->dev, io);
+	if (IS_ERR(xdev->regs))
+		return PTR_ERR(xdev->regs);
+
+	/* Retrieve the DMA engine properties from the device tree */
+	xdev->has_sg = of_property_read_bool(node, "xlnx,include-sg");
+
+	err = of_property_read_u32(node, "xlnx,num-fstores", &num_frames);
+	if (err < 0) {
+		dev_err(xdev->dev, "missing xlnx,num-fstores property\n");
+		return err;
+	}
+
+	err = of_property_read_u32(node, "xlnx,flush-fsync",
+					&xdev->flush_on_fsync);
+	if (err < 0)
+		dev_warn(xdev->dev, "missing xlnx,flush-fsync property\n");
+
+	/* Initialize the DMA engine */
+	xdev->common.dev = &pdev->dev;
+
+	INIT_LIST_HEAD(&xdev->common.channels);
+	dma_cap_set(DMA_SLAVE, xdev->common.cap_mask);
+	dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask);
+
+	xdev->common.device_alloc_chan_resources =
+				xilinx_vdma_alloc_chan_resources;
+	xdev->common.device_free_chan_resources =
+				xilinx_vdma_free_chan_resources;
+	xdev->common.device_prep_interleaved_dma =
+				xilinx_vdma_dma_prep_interleaved;
+	xdev->common.device_control = xilinx_vdma_device_control;
+	xdev->common.device_tx_status = xilinx_vdma_tx_status;
+	xdev->common.device_issue_pending = xilinx_vdma_issue_pending;
+
+	platform_set_drvdata(pdev, xdev);
+
+	/* Initialize the channels */
+	for_each_child_of_node(node, child) {
+		err = xilinx_vdma_chan_probe(xdev, child);
+		if (err < 0)
+			goto error;
+	}
+
+	for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++)
+		if (xdev->chan[i])
+			xdev->chan[i]->num_frms = num_frames;
+
+	/* Register the DMA engine with the core */
+	dma_async_device_register(&xdev->common);
+
+	err = of_dma_controller_register(node, of_dma_xilinx_xlate,
+					 xdev);
+	if (err < 0) {
+		dev_err(&pdev->dev, "Unable to register DMA to DT\n");
+		dma_async_device_unregister(&xdev->common);
+		goto error;
+	}
+
+	dev_info(&pdev->dev, "Xilinx AXI VDMA Engine Driver Probed!!\n");
+
+	return 0;
+
+error:
+	for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++)
+		if (xdev->chan[i])
+			xilinx_vdma_chan_remove(xdev->chan[i]);
+
+	return err;
+}
+
+/**
+ * xilinx_vdma_remove - Driver remove function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: Always '0'
+ */
+static int xilinx_vdma_remove(struct platform_device *pdev)
+{
+	struct xilinx_vdma_device *xdev = platform_get_drvdata(pdev);
+	int i;
+
+	of_dma_controller_free(pdev->dev.of_node);
+
+	dma_async_device_unregister(&xdev->common);
+
+	for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++)
+		if (xdev->chan[i])
+			xilinx_vdma_chan_remove(xdev->chan[i]);
+
+	return 0;
+}
+
+static const struct of_device_id xilinx_vdma_of_ids[] = {
+	{ .compatible = "xlnx,axi-vdma-1.00.a",},
+	{}
+};
+
+static struct platform_driver xilinx_vdma_driver = {
+	.driver = {
+		.name = "xilinx-vdma",
+		.owner = THIS_MODULE,
+		.of_match_table = xilinx_vdma_of_ids,
+	},
+	.probe = xilinx_vdma_probe,
+	.remove = xilinx_vdma_remove,
+};
+
+module_platform_driver(xilinx_vdma_driver);
+
+MODULE_AUTHOR("Xilinx, Inc.");
+MODULE_DESCRIPTION("Xilinx VDMA driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/amba/xilinx_dma.h b/include/linux/amba/xilinx_dma.h
new file mode 100644
index 000000000000..34b98f276ed0
--- /dev/null
+++ b/include/linux/amba/xilinx_dma.h
@@ -0,0 +1,47 @@
+/*
+ * Xilinx DMA Engine drivers support header file
+ *
+ * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __DMA_XILINX_DMA_H
+#define __DMA_XILINX_DMA_H
+
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+
+/**
+ * struct xilinx_vdma_config - VDMA Configuration structure
+ * @frm_dly: Frame delay
+ * @gen_lock: Whether in gen-lock mode
+ * @master: Master that it syncs to
+ * @frm_cnt_en: Enable frame count enable
+ * @park: Whether wants to park
+ * @park_frm: Frame to park on
+ * @coalesc: Interrupt coalescing threshold
+ * @delay: Delay counter
+ * @reset: Reset Channel
+ * @ext_fsync: External Frame Sync source
+ */
+struct xilinx_vdma_config {
+	int frm_dly;
+	int gen_lock;
+	int master;
+	int frm_cnt_en;
+	int park;
+	int park_frm;
+	int coalesc;
+	int delay;
+	int reset;
+	int ext_fsync;
+};
+
+int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
+					struct xilinx_vdma_config *cfg);
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 7fa857ed041537ee6cbc7ee4ab0204a1231cfcb9 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 28 Apr 2014 11:14:27 -0700
Subject: net: dsa: add ds_to_priv

DSA drivers have a trick which consists in allocating "priv_size" more
bytes to account for the DSA driver private context. Add a helper
function to access that private context instead of open-coding it in
drivers with (void *)(ds + 1).

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 7828ebf99ee1..6efce384451e 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -181,6 +181,11 @@ struct dsa_switch_driver {
 void register_switch_driver(struct dsa_switch_driver *type);
 void unregister_switch_driver(struct dsa_switch_driver *type);
 
+static inline void *ds_to_priv(struct dsa_switch *ds)
+{
+	return (void *)(ds + 1);
+}
+
 /*
  * The original DSA tag format and some other tag formats have no
  * ethertype, which means that we need to add a little hack to the
-- 
cgit v1.2.3-71-gd317


From 5c98631cca574ac6255885cf372f6bcf9dcfd483 Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Tue, 29 Apr 2014 11:57:34 +0900
Subject: net: ipv6: Introduce ip6_sk_dst_hoplimit.

This replaces 6 identical code snippets with a call to a new
static inline function.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h   |  5 -----
 include/net/ipv6.h       | 19 +++++++++++++++++++
 net/ipv6/icmp.c          | 14 ++------------
 net/ipv6/ip6_flowlabel.c |  1 -
 net/ipv6/ping.c          |  7 +------
 net/ipv6/raw.c           | 10 ++--------
 net/ipv6/udp.c           | 10 ++--------
 net/l2tp/l2tp_ip6.c      | 10 ++--------
 8 files changed, 28 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 933a9f22a05f..f679877bb601 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -306,11 +306,6 @@ static inline void addrconf_addr_solict_mult(const struct in6_addr *addr,
 		      htonl(0xFF000000) | addr->s6_addr32[3]);
 }
 
-static inline bool ipv6_addr_is_multicast(const struct in6_addr *addr)
-{
-	return (addr->s6_addr32[0] & htonl(0xFF000000)) == htonl(0xFF000000);
-}
-
 static inline bool ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr)
 {
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d640925bc454..5b40ad297b8c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -583,6 +583,11 @@ static inline bool ipv6_addr_orchid(const struct in6_addr *a)
 	return (a->s6_addr32[0] & htonl(0xfffffff0)) == htonl(0x20010010);
 }
 
+static inline bool ipv6_addr_is_multicast(const struct in6_addr *addr)
+{
+	return (addr->s6_addr32[0] & htonl(0xFF000000)) == htonl(0xFF000000);
+}
+
 static inline void ipv6_addr_set_v4mapped(const __be32 addr,
 					  struct in6_addr *v4mapped)
 {
@@ -664,6 +669,20 @@ void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt);
 
 int ip6_dst_hoplimit(struct dst_entry *dst);
 
+static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
+				      struct dst_entry *dst)
+{
+	int hlimit;
+
+	if (ipv6_addr_is_multicast(&fl6->daddr))
+		hlimit = np->mcast_hops;
+	else
+		hlimit = np->hop_limit;
+	if (hlimit < 0)
+		hlimit = ip6_dst_hoplimit(dst);
+	return hlimit;
+}
+
 /*
  *	Header manipulation
  */
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 7b326529e6a2..3b0905b77127 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -493,12 +493,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	if (IS_ERR(dst))
 		goto out;
 
-	if (ipv6_addr_is_multicast(&fl6.daddr))
-		hlimit = np->mcast_hops;
-	else
-		hlimit = np->hop_limit;
-	if (hlimit < 0)
-		hlimit = ip6_dst_hoplimit(dst);
+	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	msg.skb = skb;
 	msg.offset = skb_network_offset(skb);
@@ -593,12 +588,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	if (IS_ERR(dst))
 		goto out;
 
-	if (ipv6_addr_is_multicast(&fl6.daddr))
-		hlimit = np->mcast_hops;
-	else
-		hlimit = np->hop_limit;
-	if (hlimit < 0)
-		hlimit = ip6_dst_hoplimit(dst);
+	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	idev = __in6_dev_get(skb->dev);
 
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 0961b5ef866d..4052694c6f2c 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -26,7 +26,6 @@
 #include <net/sock.h>
 
 #include <net/ipv6.h>
-#include <net/addrconf.h>
 #include <net/rawv6.h>
 #include <net/transp_v6.h>
 
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index bda74291c3e0..a2a1d80dfe0c 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -168,12 +168,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	pfh.wcheck = 0;
 	pfh.family = AF_INET6;
 
-	if (ipv6_addr_is_multicast(&fl6.daddr))
-		hlimit = np->mcast_hops;
-	else
-		hlimit = np->hop_limit;
-	if (hlimit < 0)
-		hlimit = ip6_dst_hoplimit(dst);
+	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	lock_sock(sk);
 	err = ip6_append_data(sk, ping_getfrag, &pfh, len,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 1f29996e368a..dddfb5fa2b7a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -873,14 +873,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		err = PTR_ERR(dst);
 		goto out;
 	}
-	if (hlimit < 0) {
-		if (ipv6_addr_is_multicast(&fl6.daddr))
-			hlimit = np->mcast_hops;
-		else
-			hlimit = np->hop_limit;
-		if (hlimit < 0)
-			hlimit = ip6_dst_hoplimit(dst);
-	}
+	if (hlimit < 0)
+		hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	if (tclass < 0)
 		tclass = np->tclass;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1e586d92260e..d8d6ca04bb9d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1232,14 +1232,8 @@ do_udp_sendmsg:
 		goto out;
 	}
 
-	if (hlimit < 0) {
-		if (ipv6_addr_is_multicast(&fl6.daddr))
-			hlimit = np->mcast_hops;
-		else
-			hlimit = np->hop_limit;
-		if (hlimit < 0)
-			hlimit = ip6_dst_hoplimit(dst);
-	}
+	if (hlimit < 0)
+		hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	if (tclass < 0)
 		tclass = np->tclass;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 7704ea9502fd..e472d44a3b91 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -605,14 +605,8 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		goto out;
 	}
 
-	if (hlimit < 0) {
-		if (ipv6_addr_is_multicast(&fl6.daddr))
-			hlimit = np->mcast_hops;
-		else
-			hlimit = np->hop_limit;
-		if (hlimit < 0)
-			hlimit = ip6_dst_hoplimit(dst);
-	}
+	if (hlimit < 0)
+		hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 
 	if (tclass < 0)
 		tclass = np->tclass;
-- 
cgit v1.2.3-71-gd317


From b87577b7c768683736eea28f70779e8c75b4df62 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 1 May 2014 09:26:53 +1000
Subject: drm: try harder to avoid regression when merging mode bits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For QXL hw we really want the bits to be replaced as we change
the preferred mode on the fly, and the same goes for virgl when
I get to it, however the original fix for this seems to have caused
a wierd regression on Intel G33 that in a stunning display of failure
at opposition to his normal self, Daniel failed to diagnose.

So we are left doing this, ugly ugly ugly ugly, Daniel you fixed
that G33 yet?, ugly, ugly.

Tested-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_modes.c         |  9 ++++--
 drivers/gpu/drm/drm_probe_helper.c  | 64 +++++++++++++++++++++++++------------
 drivers/gpu/drm/qxl/qxl_display.c   |  2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c |  2 +-
 include/drm/drm_crtc_helper.h       |  4 +++
 include/drm/drm_modes.h             |  2 +-
 6 files changed, 57 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 8b410576fce4..bedf1894e17e 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1013,6 +1013,7 @@ EXPORT_SYMBOL(drm_mode_sort);
 /**
  * drm_mode_connector_list_update - update the mode list for the connector
  * @connector: the connector to update
+ * @merge_type_bits: whether to merge or overright type bits.
  *
  * This moves the modes from the @connector probed_modes list
  * to the actual mode list. It compares the probed mode against the current
@@ -1021,7 +1022,8 @@ EXPORT_SYMBOL(drm_mode_sort);
  * This is just a helper functions doesn't validate any modes itself and also
  * doesn't prune any invalid modes. Callers need to do that themselves.
  */
-void drm_mode_connector_list_update(struct drm_connector *connector)
+void drm_mode_connector_list_update(struct drm_connector *connector,
+				    bool merge_type_bits)
 {
 	struct drm_display_mode *mode;
 	struct drm_display_mode *pmode, *pt;
@@ -1039,7 +1041,10 @@ void drm_mode_connector_list_update(struct drm_connector *connector)
 				/* if equal delete the probed mode */
 				mode->status = pmode->status;
 				/* Merge type bits together */
-				mode->type |= pmode->type;
+				if (merge_type_bits)
+					mode->type |= pmode->type;
+				else
+					mode->type = pmode->type;
 				list_del(&pmode->head);
 				drm_mode_destroy(connector->dev, pmode);
 				break;
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index e70f54d4a581..8afdd0998a8c 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -82,26 +82,8 @@ static void drm_mode_validate_flag(struct drm_connector *connector,
 	return;
 }
 
-/**
- * drm_helper_probe_single_connector_modes - get complete set of display modes
- * @connector: connector to probe
- * @maxX: max width for modes
- * @maxY: max height for modes
- *
- * Based on the helper callbacks implemented by @connector try to detect all
- * valid modes.  Modes will first be added to the connector's probed_modes list,
- * then culled (based on validity and the @maxX, @maxY parameters) and put into
- * the normal modes list.
- *
- * Intended to be use as a generic implementation of the ->fill_modes()
- * @connector vfunc for drivers that use the crtc helpers for output mode
- * filtering and detection.
- *
- * Returns:
- * The number of modes found on @connector.
- */
-int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
-					    uint32_t maxX, uint32_t maxY)
+static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connector *connector,
+							      uint32_t maxX, uint32_t maxY, bool merge_type_bits)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_display_mode *mode;
@@ -155,7 +137,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 	if (count == 0)
 		goto prune;
 
-	drm_mode_connector_list_update(connector);
+	drm_mode_connector_list_update(connector, merge_type_bits);
 
 	if (maxX && maxY)
 		drm_mode_validate_size(dev, &connector->modes, maxX, maxY);
@@ -194,8 +176,48 @@ prune:
 
 	return count;
 }
+
+/**
+ * drm_helper_probe_single_connector_modes - get complete set of display modes
+ * @connector: connector to probe
+ * @maxX: max width for modes
+ * @maxY: max height for modes
+ *
+ * Based on the helper callbacks implemented by @connector try to detect all
+ * valid modes.  Modes will first be added to the connector's probed_modes list,
+ * then culled (based on validity and the @maxX, @maxY parameters) and put into
+ * the normal modes list.
+ *
+ * Intended to be use as a generic implementation of the ->fill_modes()
+ * @connector vfunc for drivers that use the crtc helpers for output mode
+ * filtering and detection.
+ *
+ * Returns:
+ * The number of modes found on @connector.
+ */
+int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
+					    uint32_t maxX, uint32_t maxY)
+{
+	return drm_helper_probe_single_connector_modes_merge_bits(connector, maxX, maxY, true);
+}
 EXPORT_SYMBOL(drm_helper_probe_single_connector_modes);
 
+/**
+ * drm_helper_probe_single_connector_modes_nomerge - get complete set of display modes
+ * @connector: connector to probe
+ * @maxX: max width for modes
+ * @maxY: max height for modes
+ *
+ * This operates like drm_hehlper_probe_single_connector_modes except it
+ * replaces the mode bits instead of merging them for preferred modes.
+ */
+int drm_helper_probe_single_connector_modes_nomerge(struct drm_connector *connector,
+					    uint32_t maxX, uint32_t maxY)
+{
+	return drm_helper_probe_single_connector_modes_merge_bits(connector, maxX, maxY, false);
+}
+EXPORT_SYMBOL(drm_helper_probe_single_connector_modes_nomerge);
+
 /**
  * drm_kms_helper_hotplug_event - fire off KMS hotplug events
  * @dev: drm_device whose connector state changed
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 41bdd174657e..3ab9072d3623 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -841,7 +841,7 @@ static const struct drm_connector_funcs qxl_connector_funcs = {
 	.save = qxl_conn_save,
 	.restore = qxl_conn_restore,
 	.detect = qxl_conn_detect,
-	.fill_modes = drm_helper_probe_single_connector_modes,
+	.fill_modes = drm_helper_probe_single_connector_modes_nomerge,
 	.set_property = qxl_conn_set_property,
 	.destroy = qxl_conn_destroy,
 };
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index a2dde5ad8138..e7199b454ca0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -2001,7 +2001,7 @@ int vmw_du_connector_fill_modes(struct drm_connector *connector,
 	if (du->pref_mode)
 		list_move(&du->pref_mode->head, &connector->probed_modes);
 
-	drm_mode_connector_list_update(connector);
+	drm_mode_connector_list_update(connector, true);
 
 	return 1;
 }
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 36a5febac2a6..f51c8393e9a8 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -165,6 +165,10 @@ extern void drm_helper_resume_force_mode(struct drm_device *dev);
 extern int drm_helper_probe_single_connector_modes(struct drm_connector
 						   *connector, uint32_t maxX,
 						   uint32_t maxY);
+extern int drm_helper_probe_single_connector_modes_nomerge(struct drm_connector
+							   *connector,
+							   uint32_t maxX,
+							   uint32_t maxY);
 extern void drm_kms_helper_poll_init(struct drm_device *dev);
 extern void drm_kms_helper_poll_fini(struct drm_device *dev);
 extern bool drm_helper_hpd_irq_event(struct drm_device *dev);
diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h
index 2dbbf9976669..91d0582f924e 100644
--- a/include/drm/drm_modes.h
+++ b/include/drm/drm_modes.h
@@ -223,7 +223,7 @@ void drm_mode_validate_size(struct drm_device *dev,
 void drm_mode_prune_invalid(struct drm_device *dev,
 			    struct list_head *mode_list, bool verbose);
 void drm_mode_sort(struct list_head *mode_list);
-void drm_mode_connector_list_update(struct drm_connector *connector);
+void drm_mode_connector_list_update(struct drm_connector *connector, bool merge_type_bits);
 
 /* parsing cmdline modes */
 bool
-- 
cgit v1.2.3-71-gd317


From dfbb85cab5f0819d0424a3637b03e7892704fa42 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 2 Apr 2014 20:17:00 -0700
Subject: DMA: shdma: add cyclic transfer support

This patch add cyclic transfer support and enables dmaengine_prep_dma_cyclic()

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
[reflown changelog for readablity]
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/sh/shdma-base.c | 72 ++++++++++++++++++++++++++++++++++++++++-----
 include/linux/shdma-base.h  |  1 +
 2 files changed, 66 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index 6786ecbd5ed4..974794cdb6ed 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -304,6 +304,7 @@ static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all)
 	dma_async_tx_callback callback = NULL;
 	void *param = NULL;
 	unsigned long flags;
+	LIST_HEAD(cyclic_list);
 
 	spin_lock_irqsave(&schan->chan_lock, flags);
 	list_for_each_entry_safe(desc, _desc, &schan->ld_queue, node) {
@@ -369,10 +370,16 @@ static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all)
 		if (((desc->mark == DESC_COMPLETED ||
 		      desc->mark == DESC_WAITING) &&
 		     async_tx_test_ack(&desc->async_tx)) || all) {
-			/* Remove from ld_queue list */
-			desc->mark = DESC_IDLE;
 
-			list_move(&desc->node, &schan->ld_free);
+			if (all || !desc->cyclic) {
+				/* Remove from ld_queue list */
+				desc->mark = DESC_IDLE;
+				list_move(&desc->node, &schan->ld_free);
+			} else {
+				/* reuse as cyclic */
+				desc->mark = DESC_SUBMITTED;
+				list_move_tail(&desc->node, &cyclic_list);
+			}
 
 			if (list_empty(&schan->ld_queue)) {
 				dev_dbg(schan->dev, "Bring down channel %d\n", schan->id);
@@ -389,6 +396,8 @@ static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all)
 		 */
 		schan->dma_chan.completed_cookie = schan->dma_chan.cookie;
 
+	list_splice_tail(&cyclic_list, &schan->ld_queue);
+
 	spin_unlock_irqrestore(&schan->chan_lock, flags);
 
 	if (callback)
@@ -521,7 +530,7 @@ static struct shdma_desc *shdma_add_desc(struct shdma_chan *schan,
  */
 static struct dma_async_tx_descriptor *shdma_prep_sg(struct shdma_chan *schan,
 	struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
-	enum dma_transfer_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags, bool cyclic)
 {
 	struct scatterlist *sg;
 	struct shdma_desc *first = NULL, *new = NULL /* compiler... */;
@@ -569,7 +578,11 @@ static struct dma_async_tx_descriptor *shdma_prep_sg(struct shdma_chan *schan,
 			if (!new)
 				goto err_get_desc;
 
-			new->chunks = chunks--;
+			new->cyclic = cyclic;
+			if (cyclic)
+				new->chunks = 1;
+			else
+				new->chunks = chunks--;
 			list_add_tail(&new->node, &tx_list);
 		} while (len);
 	}
@@ -612,7 +625,8 @@ static struct dma_async_tx_descriptor *shdma_prep_memcpy(
 	sg_dma_address(&sg) = dma_src;
 	sg_dma_len(&sg) = len;
 
-	return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM, flags);
+	return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM,
+			     flags, false);
 }
 
 static struct dma_async_tx_descriptor *shdma_prep_slave_sg(
@@ -640,7 +654,50 @@ static struct dma_async_tx_descriptor *shdma_prep_slave_sg(
 	slave_addr = ops->slave_addr(schan);
 
 	return shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
-			      direction, flags);
+			     direction, flags, false);
+}
+
+struct dma_async_tx_descriptor *shdma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	unsigned int sg_len = buf_len / period_len;
+	int slave_id = schan->slave_id;
+	dma_addr_t slave_addr;
+	struct scatterlist sgl[sg_len];
+	int i;
+
+	if (!chan)
+		return NULL;
+
+	BUG_ON(!schan->desc_num);
+
+	/* Someone calling slave DMA on a generic channel? */
+	if (slave_id < 0 || (buf_len < period_len)) {
+		dev_warn(schan->dev,
+			"%s: bad parameter: buf_len=%d, period_len=%d, id=%d\n",
+			__func__, buf_len, period_len, slave_id);
+		return NULL;
+	}
+
+	slave_addr = ops->slave_addr(schan);
+
+	sg_init_table(sgl, sg_len);
+	for (i = 0; i < sg_len; i++) {
+		dma_addr_t src = buf_addr + (period_len * i);
+
+		sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len,
+			    offset_in_page(src));
+		sg_dma_address(&sgl[i]) = src;
+		sg_dma_len(&sgl[i]) = period_len;
+	}
+
+	return shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
+			     direction, flags, true);
 }
 
 static int shdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
@@ -915,6 +972,7 @@ int shdma_init(struct device *dev, struct shdma_dev *sdev,
 
 	/* Compulsory for DMA_SLAVE fields */
 	dma_dev->device_prep_slave_sg = shdma_prep_slave_sg;
+	dma_dev->device_prep_dma_cyclic = shdma_prep_dma_cyclic;
 	dma_dev->device_control = shdma_control;
 
 	dma_dev->dev = dev;
diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h
index f92c0a43c54c..abdf1f229dc3 100644
--- a/include/linux/shdma-base.h
+++ b/include/linux/shdma-base.h
@@ -54,6 +54,7 @@ struct shdma_desc {
 	dma_cookie_t cookie;
 	int chunks;
 	int mark;
+	bool cyclic;			/* used as cyclic transfer */
 };
 
 struct shdma_chan {
-- 
cgit v1.2.3-71-gd317


From 6d48f44b7b2af67b33c1ae5994b8f642685c8bc8 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Wed, 30 Apr 2014 15:23:33 +0300
Subject: mdio_bus: implement devm_mdiobus_alloc/devm_mdiobus_free

Add a resource managed devm_mdiobus_alloc[_size]()/devm_mdiobus_free()
to automatically clean up MDIO bus alocations made by MDIO drivers,
thus leading to simplified MDIO drivers code.

Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Acked-and-tested-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/driver-model/devres.txt |  5 +++
 drivers/net/phy/mdio_bus.c            | 67 +++++++++++++++++++++++++++++++++++
 include/linux/phy.h                   |  7 ++++
 3 files changed, 79 insertions(+)

(limited to 'include')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 4f7897e99cba..c74e04494ade 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -308,3 +308,8 @@ SLAVE DMA ENGINE
 
 SPI
   devm_spi_register_master()
+
+MDIO
+  devm_mdiobus_alloc()
+  devm_mdiobus_alloc_size()
+  devm_mdiobus_free()
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 76f54b32a120..68a9a3867c0f 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -69,6 +69,73 @@ struct mii_bus *mdiobus_alloc_size(size_t size)
 }
 EXPORT_SYMBOL(mdiobus_alloc_size);
 
+static void _devm_mdiobus_free(struct device *dev, void *res)
+{
+	mdiobus_free(*(struct mii_bus **)res);
+}
+
+static int devm_mdiobus_match(struct device *dev, void *res, void *data)
+{
+	struct mii_bus **r = res;
+
+	if (WARN_ON(!r || !*r))
+		return 0;
+
+	return *r == data;
+}
+
+/**
+ * devm_mdiobus_alloc_size - Resource-managed mdiobus_alloc_size()
+ * @dev:		Device to allocate mii_bus for
+ * @sizeof_priv:	Space to allocate for private structure.
+ *
+ * Managed mdiobus_alloc_size. mii_bus allocated with this function is
+ * automatically freed on driver detach.
+ *
+ * If an mii_bus allocated with this function needs to be freed separately,
+ * devm_mdiobus_free() must be used.
+ *
+ * RETURNS:
+ * Pointer to allocated mii_bus on success, NULL on failure.
+ */
+struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv)
+{
+	struct mii_bus **ptr, *bus;
+
+	ptr = devres_alloc(_devm_mdiobus_free, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	/* use raw alloc_dr for kmalloc caller tracing */
+	bus = mdiobus_alloc_size(sizeof_priv);
+	if (bus) {
+		*ptr = bus;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return bus;
+}
+EXPORT_SYMBOL_GPL(devm_mdiobus_alloc);
+
+/**
+ * devm_mdiobus_free - Resource-managed mdiobus_free()
+ * @dev:		Device this mii_bus belongs to
+ * @bus:		the mii_bus associated with the device
+ *
+ * Free mii_bus allocated with devm_mdiobus_alloc_size().
+ */
+void devm_mdiobus_free(struct device *dev, struct mii_bus *bus)
+{
+	int rc;
+
+	rc = devres_release(dev, _devm_mdiobus_free,
+			    devm_mdiobus_match, bus);
+	WARN_ON(rc);
+}
+EXPORT_SYMBOL_GPL(devm_mdiobus_free);
+
 /**
  * mdiobus_release - mii_bus device release callback
  * @d: the target struct device that contains the mii_bus
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 51d15f684e7e..864ddafad8cc 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -198,6 +198,13 @@ static inline struct mii_bus *mdiobus_alloc(void)
 int mdiobus_register(struct mii_bus *bus);
 void mdiobus_unregister(struct mii_bus *bus);
 void mdiobus_free(struct mii_bus *bus);
+struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv);
+static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev)
+{
+	return devm_mdiobus_alloc_size(dev, 0);
+}
+
+void devm_mdiobus_free(struct device *dev, struct mii_bus *bus);
 struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
 int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
 int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
-- 
cgit v1.2.3-71-gd317


From e114a710aa5058c0ba4aa1dfb105132aefeb5e04 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 30 Apr 2014 11:58:13 -0700
Subject: tcp: fix cwnd limited checking to improve congestion control

Yuchung discovered tcp_is_cwnd_limited() was returning false in
slow start phase even if the application filled the socket write queue.

All congestion modules take into account tcp_is_cwnd_limited()
before increasing cwnd, so this behavior limits slow start from
probing the bandwidth at full speed.

The problem is that even if write queue is full (aka we are _not_
application limited), cwnd can be under utilized if TSO should auto
defer or TCP Small queues decided to hold packets.

So the in_flight can be kept to smaller value, and we can get to the
point tcp_is_cwnd_limited() returns false.

With TCP Small Queues and FQ/pacing, this issue is more visible.

We fix this by having tcp_cwnd_validate(), which is supposed to track
such things, take into account unsent_segs, the number of segs that we
are not sending at the moment due to TSO or TSQ, but intend to send
real soon. Then when we are cwnd-limited, remember this fact while we
are processing the window of ACKs that comes back.

For example, suppose we have a brand new connection with cwnd=10; we
are in slow start, and we send a flight of 9 packets. By the time we
have received ACKs for all 9 packets we want our cwnd to be 18.
We implement this by setting tp->lsnd_pending to 9, and
considering ourselves to be cwnd-limited while cwnd is less than
twice tp->lsnd_pending (2*9 -> 18).

This makes tcp_is_cwnd_limited() more understandable, by removing
the GSO/TSO kludge, that tried to work around the issue.

Note the in_flight parameter can be removed in a followup cleanup
patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 +
 include/net/tcp.h     | 22 +++++++++++++++++++++-
 net/ipv4/tcp_cong.c   | 20 --------------------
 net/ipv4/tcp_output.c | 21 ++++++++++++++-------
 4 files changed, 36 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 239946868142..4e37c71ecd74 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -230,6 +230,7 @@ struct tcp_sock {
 	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
 	u32	snd_cwnd_used;
 	u32	snd_cwnd_stamp;
+	u32	lsnd_pending;	/* packets inflight or unsent since last xmit */
 	u32	prior_cwnd;	/* Congestion window at start of Recovery. */
 	u32	prr_delivered;	/* Number of newly delivered packets to
 				 * receiver in Recovery. */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 163d2b467d78..a9fe7bc4f4bb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -974,7 +974,27 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
 {
 	return tp->snd_una + tp->snd_wnd;
 }
-bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight);
+
+/* We follow the spirit of RFC2861 to validate cwnd but implement a more
+ * flexible approach. The RFC suggests cwnd should not be raised unless
+ * it was fully used previously. But we allow cwnd to grow as long as the
+ * application has used half the cwnd.
+ * Example :
+ *    cwnd is 10 (IW10), but application sends 9 frames.
+ *    We allow cwnd to reach 18 when all frames are ACKed.
+ * This check is safe because it's as aggressive as slow start which already
+ * risks 100% overshoot. The advantage is that we discourage application to
+ * either send more filler packets or data to artificially blow up the cwnd
+ * usage, and allow application-limited process to probe bw more aggressively.
+ *
+ * TODO: remove in_flight once we can fix all callers, and their callers...
+ */
+static inline bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+
+	return tp->snd_cwnd < 2 * tp->lsnd_pending;
+}
 
 static inline void tcp_check_probe_timer(struct sock *sk)
 {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 2b9464c93b88..a93b41ba05ff 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -276,26 +276,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 	return err;
 }
 
-/* RFC2861 Check whether we are limited by application or congestion window
- * This is the inverse of cwnd check in tcp_tso_should_defer
- */
-bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	u32 left;
-
-	if (in_flight >= tp->snd_cwnd)
-		return true;
-
-	left = tp->snd_cwnd - in_flight;
-	if (sk_can_gso(sk) &&
-	    left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
-	    left < tp->xmit_size_goal_segs)
-		return true;
-	return left <= tcp_max_tso_deferred_mss(tp);
-}
-EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
-
 /* Slow start is used when congestion window is no greater than the slow start
  * threshold. We base on RFC2581 and also handle stretch ACKs properly.
  * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 20847de991ea..f9181a133462 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1402,12 +1402,13 @@ static void tcp_cwnd_application_limited(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-/* Congestion window validation. (RFC2861) */
-static void tcp_cwnd_validate(struct sock *sk)
+static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tp->packets_out >= tp->snd_cwnd) {
+	tp->lsnd_pending = tp->packets_out + unsent_segs;
+
+	if (tcp_is_cwnd_limited(sk, 0)) {
 		/* Network is feed fully. */
 		tp->snd_cwnd_used = 0;
 		tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -1880,7 +1881,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	unsigned int tso_segs, sent_pkts;
+	unsigned int tso_segs, sent_pkts, unsent_segs = 0;
 	int cwnd_quota;
 	int result;
 
@@ -1924,7 +1925,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 				break;
 		} else {
 			if (!push_one && tcp_tso_should_defer(sk, skb))
-				break;
+				goto compute_unsent_segs;
 		}
 
 		/* TCP Small Queues :
@@ -1949,8 +1950,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			 * there is no smp_mb__after_set_bit() yet
 			 */
 			smp_mb__after_clear_bit();
-			if (atomic_read(&sk->sk_wmem_alloc) > limit)
+			if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+				u32 unsent_bytes;
+
+compute_unsent_segs:
+				unsent_bytes = tp->write_seq - tp->snd_nxt;
+				unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now);
 				break;
+			}
 		}
 
 		limit = mss_now;
@@ -1990,7 +1997,7 @@ repair:
 		/* Send one loss probe per tail loss episode. */
 		if (push_one != 2)
 			tcp_schedule_loss_probe(sk);
-		tcp_cwnd_validate(sk);
+		tcp_cwnd_validate(sk, unsent_segs);
 		return false;
 	}
 	return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
-- 
cgit v1.2.3-71-gd317


From 249015515fe3fc9818d86cb5c83bbc92505ad7dc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 2 May 2014 21:18:05 -0700
Subject: tcp: remove in_flight parameter from cong_avoid() methods

Commit e114a710aa505 ("tcp: fix cwnd limited checking to improve
congestion control") obsoleted in_flight parameter from
tcp_is_cwnd_limited() and its callers.

This patch does the removal as promised.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        | 8 +++-----
 net/ipv4/tcp_bic.c       | 5 ++---
 net/ipv4/tcp_cong.c      | 4 ++--
 net/ipv4/tcp_cubic.c     | 5 ++---
 net/ipv4/tcp_highspeed.c | 4 ++--
 net/ipv4/tcp_htcp.c      | 4 ++--
 net/ipv4/tcp_hybla.c     | 7 +++----
 net/ipv4/tcp_illinois.c  | 5 ++---
 net/ipv4/tcp_input.c     | 9 ++++-----
 net/ipv4/tcp_lp.c        | 5 ++---
 net/ipv4/tcp_output.c    | 2 +-
 net/ipv4/tcp_scalable.c  | 5 ++---
 net/ipv4/tcp_vegas.c     | 7 +++----
 net/ipv4/tcp_veno.c      | 9 ++++-----
 net/ipv4/tcp_yeah.c      | 5 ++---
 15 files changed, 36 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a9fe7bc4f4bb..3c9418456640 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -796,7 +796,7 @@ struct tcp_congestion_ops {
 	/* return slow start threshold (required) */
 	u32 (*ssthresh)(struct sock *sk);
 	/* do new cwnd calculation (required) */
-	void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
+	void (*cong_avoid)(struct sock *sk, u32 ack, u32 acked);
 	/* call before changing ca_state (optional) */
 	void (*set_state)(struct sock *sk, u8 new_state);
 	/* call when cwnd event occurs (optional) */
@@ -828,7 +828,7 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
 
 extern struct tcp_congestion_ops tcp_init_congestion_ops;
 u32 tcp_reno_ssthresh(struct sock *sk);
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight);
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
 extern struct tcp_congestion_ops tcp_reno;
 
 static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
@@ -986,10 +986,8 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
  * risks 100% overshoot. The advantage is that we discourage application to
  * either send more filler packets or data to artificially blow up the cwnd
  * usage, and allow application-limited process to probe bw more aggressively.
- *
- * TODO: remove in_flight once we can fix all callers, and their callers...
  */
-static inline bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
+static inline bool tcp_is_cwnd_limited(const struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 821846fb0a7e..d5de69bc04f5 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -140,13 +140,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 		ca->cnt = 1;
 }
 
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-			      u32 in_flight)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index a93b41ba05ff..7b09d8b49fa5 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -317,11 +317,11 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
 /* This is Jacobson's slow start and congestion avoidance.
  * SIGCOMM '88, p. 328.
  */
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	/* In "safe" area, increase. */
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 8bf224516ba2..ba2a4f3a6a1e 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -304,13 +304,12 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 		ca->cnt = 1;
 }
 
-static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-			      u32 in_flight)
+static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct bictcp *ca = inet_csk_ca(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (tp->snd_cwnd <= tp->snd_ssthresh) {
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 8b9e7bad77c0..1c4908280d92 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -109,12 +109,12 @@ static void hstcp_init(struct sock *sk)
 	tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
 }
 
-static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct hstcp *ca = inet_csk_ca(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 4a194acfd923..031361311a8b 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -227,12 +227,12 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
 	return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
 }
 
-static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index a15a799bf768..d8f8f05a4951 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -87,8 +87,7 @@ static inline u32 hybla_fraction(u32 odds)
  *     o Give cwnd a new value based on the model proposed
  *     o remember increments <1
  */
-static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-			     u32 in_flight)
+static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct hybla *ca = inet_csk_ca(sk);
@@ -101,11 +100,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
 		ca->minrtt_us = tp->srtt_us;
 	}
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (!ca->hybla_en) {
-		tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+		tcp_reno_cong_avoid(sk, ack, acked);
 		return;
 	}
 
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 863d105e3015..5999b3972e64 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -255,8 +255,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state)
 /*
  * Increase window in response to successful acknowledgment.
  */
-static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-				    u32 in_flight)
+static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct illinois *ca = inet_csk_ca(sk);
@@ -265,7 +264,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked,
 		update_params(sk);
 
 	/* RFC2861 only increase cwnd if fully utilized */
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	/* In slow start */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6efed134ab63..350b2072f0ab 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2938,10 +2938,11 @@ static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
 		tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
 }
 
-static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	icsk->icsk_ca_ops->cong_avoid(sk, ack, acked, in_flight);
+
+	icsk->icsk_ca_ops->cong_avoid(sk, ack, acked);
 	tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
 }
 
@@ -3364,7 +3365,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	u32 ack_seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 	bool is_dupack = false;
-	u32 prior_in_flight;
 	u32 prior_fackets;
 	int prior_packets = tp->packets_out;
 	const int prior_unsacked = tp->packets_out - tp->sacked_out;
@@ -3397,7 +3397,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		flag |= FLAG_SND_UNA_ADVANCED;
 
 	prior_fackets = tp->fackets_out;
-	prior_in_flight = tcp_packets_in_flight(tp);
 
 	/* ts_recent update must be made after we are sure that the packet
 	 * is in window.
@@ -3452,7 +3451,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 
 	/* Advance cwnd if state allows */
 	if (tcp_may_raise_cwnd(sk, flag))
-		tcp_cong_avoid(sk, ack, acked, prior_in_flight);
+		tcp_cong_avoid(sk, ack, acked);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index c9aecae31327..1e70fa8fa793 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -115,13 +115,12 @@ static void tcp_lp_init(struct sock *sk)
  * Will only call newReno CA when away from inference.
  * From TCP-LP's paper, this will be handled in additive increasement.
  */
-static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-			      u32 in_flight)
+static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct lp *lp = inet_csk_ca(sk);
 
 	if (!(lp->flag & LP_WITHIN_INF))
-		tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+		tcp_reno_cong_avoid(sk, ack, acked);
 }
 
 /**
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f9181a133462..89277a34f2c9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1408,7 +1408,7 @@ static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs)
 
 	tp->lsnd_pending = tp->packets_out + unsent_segs;
 
-	if (tcp_is_cwnd_limited(sk, 0)) {
+	if (tcp_is_cwnd_limited(sk)) {
 		/* Network is feed fully. */
 		tp->snd_cwnd_used = 0;
 		tp->snd_cwnd_stamp = tcp_time_stamp;
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 0ac50836da4d..8250949b8853 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -15,12 +15,11 @@
 #define TCP_SCALABLE_AI_CNT	50U
 #define TCP_SCALABLE_MD_SCALE	3
 
-static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-				    u32 in_flight)
+static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (tp->snd_cwnd <= tp->snd_ssthresh)
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 48539fff6357..9a5e05f27f4f 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -163,14 +163,13 @@ static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
 	return  min(tp->snd_ssthresh, tp->snd_cwnd-1);
 }
 
-static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-				 u32 in_flight)
+static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct vegas *vegas = inet_csk_ca(sk);
 
 	if (!vegas->doing_vegas_now) {
-		tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+		tcp_reno_cong_avoid(sk, ack, acked);
 		return;
 	}
 
@@ -195,7 +194,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked,
 			/* We don't have enough RTT samples to do the Vegas
 			 * calculation, so we'll behave like Reno.
 			 */
-			tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+			tcp_reno_cong_avoid(sk, ack, acked);
 		} else {
 			u32 rtt, diff;
 			u64 target_cwnd;
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 1b8e28fcd7e1..27b9825753d1 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -114,19 +114,18 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 		tcp_veno_init(sk);
 }
 
-static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-				u32 in_flight)
+static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct veno *veno = inet_csk_ca(sk);
 
 	if (!veno->doing_veno_now) {
-		tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+		tcp_reno_cong_avoid(sk, ack, acked);
 		return;
 	}
 
 	/* limited by applications */
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	/* We do the Veno calculations only if we got enough rtt samples */
@@ -134,7 +133,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked,
 		/* We don't have enough rtt samples to do the Veno
 		 * calculation, so we'll behave like Reno.
 		 */
-		tcp_reno_cong_avoid(sk, ack, acked, in_flight);
+		tcp_reno_cong_avoid(sk, ack, acked);
 	} else {
 		u64 target_cwnd;
 		u32 rtt;
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 5ede0e727945..599b79b8eac0 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -69,13 +69,12 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us)
 	tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us);
 }
 
-static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked,
-				u32 in_flight)
+static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct yeah *yeah = inet_csk_ca(sk);
 
-	if (!tcp_is_cwnd_limited(sk, in_flight))
+	if (!tcp_is_cwnd_limited(sk))
 		return;
 
 	if (tp->snd_cwnd <= tp->snd_ssthresh)
-- 
cgit v1.2.3-71-gd317


From 69dfa00ccb72a37f3810687ca110e5a8154c6eed Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 4 May 2014 15:09:13 -0400
Subject: cgroup: make flags and subsys_masks unsigned int

There's no reason to use atomic bitops for cgroup_subsys_state->flags,
cgroup_root->flags and various subsys_masks.  This patch updates those
to use bitwise and/or operations instead and converts them form
unsigned long to unsigned int.

This makes the fields occupy (marginally) smaller space and makes it
clear that they don't require atomicity.

This patch doesn't cause any behavior difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  8 ++++----
 kernel/cgroup.c        | 37 ++++++++++++++++++-------------------
 2 files changed, 22 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 4b38e2d6110d..c6c703f2486b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -62,7 +62,7 @@ struct cgroup_subsys_state {
 	/* the parent css */
 	struct cgroup_subsys_state *parent;
 
-	unsigned long flags;
+	unsigned int flags;
 
 	/* percpu_ref killing and RCU release */
 	struct rcu_head rcu_head;
@@ -185,7 +185,7 @@ struct cgroup {
 	u64 serial_nr;
 
 	/* the bitmask of subsystems enabled on the child cgroups */
-	unsigned long child_subsys_mask;
+	unsigned int child_subsys_mask;
 
 	/* Private pointers for each registered subsystem */
 	struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
@@ -312,7 +312,7 @@ struct cgroup_root {
 	struct kernfs_root *kf_root;
 
 	/* The bitmask of subsystems attached to this hierarchy */
-	unsigned long subsys_mask;
+	unsigned int subsys_mask;
 
 	/* Unique id for this hierarchy. */
 	int hierarchy_id;
@@ -327,7 +327,7 @@ struct cgroup_root {
 	struct list_head root_list;
 
 	/* Hierarchy-specific flags */
-	unsigned long flags;
+	unsigned int flags;
 
 	/* IDs for cgroups in this hierarchy */
 	struct idr cgroup_idr;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3873267c9ee3..21667f396a1e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -181,7 +181,7 @@ static struct cftype cgroup_base_files[];
 
 static void cgroup_put(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
-			     unsigned long ss_mask);
+			     unsigned int ss_mask);
 static void cgroup_destroy_css_killed(struct cgroup *cgrp);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
 static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
@@ -963,7 +963,7 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
  * update of a tasks cgroup pointer by cgroup_attach_task()
  */
 
-static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask);
+static int cgroup_populate_dir(struct cgroup *cgrp, unsigned int subsys_mask);
 static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
 static const struct file_operations proc_cgroupstats_operations;
 
@@ -1079,7 +1079,7 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
  * @cgrp: target cgroup
  * @subsys_mask: mask of the subsystem ids whose files should be removed
  */
-static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask)
+static void cgroup_clear_dir(struct cgroup *cgrp, unsigned int subsys_mask)
 {
 	struct cgroup_subsys *ss;
 	int i;
@@ -1087,15 +1087,14 @@ static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask)
 	for_each_subsys(ss, i) {
 		struct cftype *cfts;
 
-		if (!test_bit(i, &subsys_mask))
+		if (!(subsys_mask & (1 << i)))
 			continue;
 		list_for_each_entry(cfts, &ss->cfts, node)
 			cgroup_addrm_files(cgrp, cfts, false);
 	}
 }
 
-static int rebind_subsystems(struct cgroup_root *dst_root,
-			     unsigned long ss_mask)
+static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask)
 {
 	struct cgroup_subsys *ss;
 	int ssid, i, ret;
@@ -1128,7 +1127,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 		 * Just warn about it and continue.
 		 */
 		if (cgrp_dfl_root_visible) {
-			pr_warn("failed to create files (%d) while rebinding 0x%lx to default root\n",
+			pr_warn("failed to create files (%d) while rebinding 0x%x to default root\n",
 				ret, ss_mask);
 			pr_warn("you may retry by moving them to a different hierarchy and unbinding\n");
 		}
@@ -1214,8 +1213,8 @@ static int cgroup_show_options(struct seq_file *seq,
 }
 
 struct cgroup_sb_opts {
-	unsigned long subsys_mask;
-	unsigned long flags;
+	unsigned int subsys_mask;
+	unsigned int flags;
 	char *release_agent;
 	bool cpuset_clone_children;
 	char *name;
@@ -1227,12 +1226,12 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 {
 	char *token, *o = data;
 	bool all_ss = false, one_ss = false;
-	unsigned long mask = (unsigned long)-1;
+	unsigned int mask = -1U;
 	struct cgroup_subsys *ss;
 	int i;
 
 #ifdef CONFIG_CPUSETS
-	mask = ~(1UL << cpuset_cgrp_id);
+	mask = ~(1U << cpuset_cgrp_id);
 #endif
 
 	memset(opts, 0, sizeof(*opts));
@@ -1313,7 +1312,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 			/* Mutually exclusive option 'all' + subsystem name */
 			if (all_ss)
 				return -EINVAL;
-			set_bit(i, &opts->subsys_mask);
+			opts->subsys_mask |= (1 << i);
 			one_ss = true;
 
 			break;
@@ -1342,7 +1341,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 		if (all_ss || (!one_ss && !opts->none && !opts->name))
 			for_each_subsys(ss, i)
 				if (!ss->disabled)
-					set_bit(i, &opts->subsys_mask);
+					opts->subsys_mask |= (1 << i);
 
 		/*
 		 * We either have to specify by name or by subsystems. (So
@@ -1373,7 +1372,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
 	int ret = 0;
 	struct cgroup_root *root = cgroup_root_from_kf(kf_root);
 	struct cgroup_sb_opts opts;
-	unsigned long added_mask, removed_mask;
+	unsigned int added_mask, removed_mask;
 
 	if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) {
 		pr_err("sane_behavior: remount is not allowed\n");
@@ -1398,7 +1397,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
 	/* Don't allow flags or name to change at remount */
 	if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) ||
 	    (opts.name && strcmp(opts.name, root->name))) {
-		pr_err("option or name mismatch, new: 0x%lx \"%s\", old: 0x%lx \"%s\"\n",
+		pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n",
 		       opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "",
 		       root->flags & CGRP_ROOT_OPTION_MASK, root->name);
 		ret = -EINVAL;
@@ -1522,7 +1521,7 @@ static void init_cgroup_root(struct cgroup_root *root,
 		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
 }
 
-static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
+static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 {
 	LIST_HEAD(tmp_links);
 	struct cgroup *root_cgrp = &root->cgrp;
@@ -2507,7 +2506,7 @@ out_finish:
 static int cgroup_subtree_control_write(struct cgroup_subsys_state *dummy_css,
 					struct cftype *cft, char *buffer)
 {
-	unsigned long enable_req = 0, disable_req = 0, enable, disable;
+	unsigned int enable_req = 0, disable_req = 0, enable, disable;
 	struct cgroup *cgrp = dummy_css->cgroup, *child;
 	struct cgroup_subsys *ss;
 	char *tok, *p;
@@ -3998,7 +3997,7 @@ static struct cftype cgroup_base_files[] = {
  *
  * On failure, no file is added.
  */
-static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
+static int cgroup_populate_dir(struct cgroup *cgrp, unsigned int subsys_mask)
 {
 	struct cgroup_subsys *ss;
 	int i, ret = 0;
@@ -4007,7 +4006,7 @@ static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
 	for_each_subsys(ss, i) {
 		struct cftype *cfts;
 
-		if (!test_bit(i, &subsys_mask))
+		if (!(subsys_mask & (1 << i)))
 			continue;
 
 		list_for_each_entry(cfts, &ss->cfts, node) {
-- 
cgit v1.2.3-71-gd317


From 7d699ddb2b181a2c76e5ea18b1bdf102c4bebe4b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 4 May 2014 15:09:13 -0400
Subject: cgroup, memcg: allocate cgroup ID from 1

Currently, cgroup->id is allocated from 0, which is always assigned to
the root cgroup; unfortunately, memcg wants to use ID 0 to indicate
invalid IDs and ends up incrementing all IDs by one.

It's reasonable to reserve 0 for special purposes.  This patch updates
cgroup core so that ID 0 is not used and the root cgroups get ID 1.
The ID incrementing is removed form memcg.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 4 ++--
 kernel/cgroup.c        | 4 ++--
 mm/memcontrol.c        | 8 ++------
 3 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c6c703f2486b..793f70a48820 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -144,8 +144,8 @@ struct cgroup {
 	/*
 	 * idr allocated in-hierarchy ID.
 	 *
-	 * The ID of the root cgroup is always 0, and a new cgroup
-	 * will be assigned with a smallest available ID.
+	 * ID 0 is not used, the ID of the root cgroup is always 1, and a
+	 * new cgroup will be assigned with a smallest available ID.
 	 *
 	 * Allocating/Removing ID must be protected by cgroup_mutex.
 	 */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 21667f396a1e..3fa0463e74bb 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1531,7 +1531,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 	lockdep_assert_held(&cgroup_tree_mutex);
 	lockdep_assert_held(&cgroup_mutex);
 
-	ret = idr_alloc(&root->cgroup_idr, root_cgrp, 0, 1, GFP_KERNEL);
+	ret = idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
 	if (ret < 0)
 		goto out;
 	root_cgrp->id = ret;
@@ -4225,7 +4225,7 @@ static long cgroup_create(struct cgroup *parent, const char *name,
 	 * Temporarily set the pointer to NULL, so idr_find() won't return
 	 * a half-baked cgroup.
 	 */
-	cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
+	cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
 	if (cgrp->id < 0) {
 		err = -ENOMEM;
 		goto err_unlock;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 29501f040568..1d0b29715b73 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -527,18 +527,14 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
-	/*
-	 * The ID of the root cgroup is 0, but memcg treat 0 as an
-	 * invalid ID, so we return (cgroup_id + 1).
-	 */
-	return memcg->css.cgroup->id + 1;
+	return memcg->css.cgroup->id;
 }
 
 static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 {
 	struct cgroup_subsys_state *css;
 
-	css = css_from_id(id - 1, &memory_cgrp_subsys);
+	css = css_from_id(id, &memory_cgrp_subsys);
 	return mem_cgroup_from_css(css);
 }
 
-- 
cgit v1.2.3-71-gd317


From 15a4c835e4ed3e60dd68727cd1907e3dd89563f4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 4 May 2014 15:09:14 -0400
Subject: cgroup, memcg: implement css->id and convert css_from_id() to use it

Until now, cgroup->id has been used to identify all the associated
csses and css_from_id() takes cgroup ID and returns the matching css
by looking up the cgroup and then dereferencing the css associated
with it; however, now that the lifetimes of cgroup and css are
separate, this is incorrect and breaks on the unified hierarchy when a
controller is disabled and enabled back again before the previous
instance is released.

This patch adds css->id which is a subsystem-unique ID and converts
css_from_id() to look up by the new css->id instead.  memcg is the
only user of css_from_id() and also converted to use css->id instead.

For traditional hierarchies, this shouldn't make any functional
difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jianyu Zhan <nasa4836@gmail.com>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  9 ++++++++
 kernel/cgroup.c        | 59 ++++++++++++++++++++++++++++++++------------------
 mm/memcontrol.c        |  4 ++--
 3 files changed, 49 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 793f70a48820..2dfabb3b749a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -62,6 +62,12 @@ struct cgroup_subsys_state {
 	/* the parent css */
 	struct cgroup_subsys_state *parent;
 
+	/*
+	 * Subsys-unique ID.  0 is unused and root is always 1.  The
+	 * matching css can be looked up using css_from_id().
+	 */
+	int id;
+
 	unsigned int flags;
 
 	/* percpu_ref killing and RCU release */
@@ -655,6 +661,9 @@ struct cgroup_subsys {
 	/* link to parent, protected by cgroup_lock() */
 	struct cgroup_root *root;
 
+	/* idr for css->id */
+	struct idr css_idr;
+
 	/*
 	 * List of cftypes.  Each entry is the first entry of an array
 	 * terminated by zero length name.
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f1c98c527b2d..a1a20e8c973a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -100,8 +100,8 @@ static DECLARE_RWSEM(css_set_rwsem);
 #endif
 
 /*
- * Protects cgroup_idr so that IDs can be released without grabbing
- * cgroup_mutex.
+ * Protects cgroup_idr and css_idr so that IDs can be released without
+ * grabbing cgroup_mutex.
  */
 static DEFINE_SPINLOCK(cgroup_idr_lock);
 
@@ -1089,12 +1089,6 @@ static void cgroup_put(struct cgroup *cgrp)
 	if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp)))
 		return;
 
-	/*
-	 * XXX: cgrp->id is only used to look up css's.  As cgroup and
-	 * css's lifetimes will be decoupled, it should be made
-	 * per-subsystem and moved to css->id so that lookups are
-	 * successful until the target css is released.
-	 */
 	cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
 	cgrp->id = -1;
 
@@ -4104,8 +4098,11 @@ static void css_release(struct percpu_ref *ref)
 {
 	struct cgroup_subsys_state *css =
 		container_of(ref, struct cgroup_subsys_state, refcnt);
+	struct cgroup_subsys *ss = css->ss;
+
+	RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
+	cgroup_idr_remove(&ss->css_idr, css->id);
 
-	RCU_INIT_POINTER(css->cgroup->subsys[css->ss->id], NULL);
 	call_rcu(&css->rcu_head, css_free_rcu_fn);
 }
 
@@ -4195,9 +4192,17 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
 	if (err)
 		goto err_free_css;
 
+	err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_NOWAIT);
+	if (err < 0)
+		goto err_free_percpu_ref;
+	css->id = err;
+
 	err = cgroup_populate_dir(cgrp, 1 << ss->id);
 	if (err)
-		goto err_free_percpu_ref;
+		goto err_free_id;
+
+	/* @css is ready to be brought online now, make it visible */
+	cgroup_idr_replace(&ss->css_idr, css, css->id);
 
 	err = online_css(css);
 	if (err)
@@ -4216,6 +4221,8 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
 
 err_clear_dir:
 	cgroup_clear_dir(css->cgroup, 1 << css->ss->id);
+err_free_id:
+	cgroup_idr_remove(&ss->css_idr, css->id);
 err_free_percpu_ref:
 	percpu_ref_cancel_init(&css->refcnt);
 err_free_css:
@@ -4642,7 +4649,7 @@ static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
 	.rename			= cgroup_rename,
 };
 
-static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
+static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
 {
 	struct cgroup_subsys_state *css;
 
@@ -4651,6 +4658,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	mutex_lock(&cgroup_tree_mutex);
 	mutex_lock(&cgroup_mutex);
 
+	idr_init(&ss->css_idr);
 	INIT_LIST_HEAD(&ss->cfts);
 
 	/* Create the root cgroup state for this subsystem */
@@ -4659,6 +4667,13 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	/* We don't handle early failures gracefully */
 	BUG_ON(IS_ERR(css));
 	init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
+	if (early) {
+		/* idr_alloc() can't be called safely during early init */
+		css->id = 1;
+	} else {
+		css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
+		BUG_ON(css->id < 0);
+	}
 
 	/* Update the init_css_set to contain a subsys
 	 * pointer to this state - since the subsystem is
@@ -4709,7 +4724,7 @@ int __init cgroup_init_early(void)
 		ss->name = cgroup_subsys_name[i];
 
 		if (ss->early_init)
-			cgroup_init_subsys(ss);
+			cgroup_init_subsys(ss, true);
 	}
 	return 0;
 }
@@ -4741,8 +4756,16 @@ int __init cgroup_init(void)
 	mutex_unlock(&cgroup_tree_mutex);
 
 	for_each_subsys(ss, ssid) {
-		if (!ss->early_init)
-			cgroup_init_subsys(ss);
+		if (ss->early_init) {
+			struct cgroup_subsys_state *css =
+				init_css_set.subsys[ss->id];
+
+			css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
+						   GFP_KERNEL);
+			BUG_ON(css->id < 0);
+		} else {
+			cgroup_init_subsys(ss, false);
+		}
 
 		list_add_tail(&init_css_set.e_cset_node[ssid],
 			      &cgrp_dfl_root.cgrp.e_csets[ssid]);
@@ -5196,14 +5219,8 @@ struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry,
  */
 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
 {
-	struct cgroup *cgrp;
-
 	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	cgrp = idr_find(&ss->root->cgroup_idr, id);
-	if (cgrp)
-		return cgroup_css(cgrp, ss);
-	return NULL;
+	return idr_find(&ss->css_idr, id);
 }
 
 #ifdef CONFIG_CGROUP_DEBUG
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1d0b29715b73..c3f82f69ef58 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -527,7 +527,7 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
-	return memcg->css.cgroup->id;
+	return memcg->css.id;
 }
 
 static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
@@ -6401,7 +6401,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
 
-	if (css->cgroup->id > MEM_CGROUP_ID_MAX)
+	if (css->id > MEM_CGROUP_ID_MAX)
 		return -ENOSPC;
 
 	if (!parent)
-- 
cgit v1.2.3-71-gd317


From 5bcfedf06f7fdf9efcf65dc11198e9012f7530f4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 1 May 2014 18:34:18 +0200
Subject: net: filter: simplify label names from jump-table

This patch simplifies label naming for the BPF jump-table.
When we define labels via DL(), we just concatenate/textify
the combination of instruction opcode which consists of the
class, subclass, word size, target register and so on. Each
time we leave BPF_ prefix intact, so that e.g. the preprocessor
generates a label BPF_ALU_BPF_ADD_BPF_X for DL(BPF_ALU, BPF_ADD,
BPF_X) whereas a label name of ALU_ADD_X is much more easy
to grasp. Pure cleanup only.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |   3 +
 net/core/filter.c      | 308 ++++++++++++++++++++++++-------------------------
 2 files changed, 157 insertions(+), 154 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 759abf78dd61..b042d1db127f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -37,6 +37,9 @@
 #define BPF_CALL	0x80	/* function call */
 #define BPF_EXIT	0x90	/* function return */
 
+/* Placeholder/dummy for 0 */
+#define BPF_0		0
+
 /* BPF has 10 general purpose 64-bit registers and stack frame. */
 #define MAX_BPF_REG	11
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 7c4db3dd3d1e..a1784e95b049 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -156,94 +156,94 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
 	static const void *jumptable[256] = {
 		[0 ... 255] = &&default_label,
 		/* Now overwrite non-defaults ... */
-#define DL(A, B, C)	[A|B|C] = &&A##_##B##_##C
-		DL(BPF_ALU, BPF_ADD, BPF_X),
-		DL(BPF_ALU, BPF_ADD, BPF_K),
-		DL(BPF_ALU, BPF_SUB, BPF_X),
-		DL(BPF_ALU, BPF_SUB, BPF_K),
-		DL(BPF_ALU, BPF_AND, BPF_X),
-		DL(BPF_ALU, BPF_AND, BPF_K),
-		DL(BPF_ALU, BPF_OR, BPF_X),
-		DL(BPF_ALU, BPF_OR, BPF_K),
-		DL(BPF_ALU, BPF_LSH, BPF_X),
-		DL(BPF_ALU, BPF_LSH, BPF_K),
-		DL(BPF_ALU, BPF_RSH, BPF_X),
-		DL(BPF_ALU, BPF_RSH, BPF_K),
-		DL(BPF_ALU, BPF_XOR, BPF_X),
-		DL(BPF_ALU, BPF_XOR, BPF_K),
-		DL(BPF_ALU, BPF_MUL, BPF_X),
-		DL(BPF_ALU, BPF_MUL, BPF_K),
-		DL(BPF_ALU, BPF_MOV, BPF_X),
-		DL(BPF_ALU, BPF_MOV, BPF_K),
-		DL(BPF_ALU, BPF_DIV, BPF_X),
-		DL(BPF_ALU, BPF_DIV, BPF_K),
-		DL(BPF_ALU, BPF_MOD, BPF_X),
-		DL(BPF_ALU, BPF_MOD, BPF_K),
-		DL(BPF_ALU, BPF_NEG, 0),
-		DL(BPF_ALU, BPF_END, BPF_TO_BE),
-		DL(BPF_ALU, BPF_END, BPF_TO_LE),
-		DL(BPF_ALU64, BPF_ADD, BPF_X),
-		DL(BPF_ALU64, BPF_ADD, BPF_K),
-		DL(BPF_ALU64, BPF_SUB, BPF_X),
-		DL(BPF_ALU64, BPF_SUB, BPF_K),
-		DL(BPF_ALU64, BPF_AND, BPF_X),
-		DL(BPF_ALU64, BPF_AND, BPF_K),
-		DL(BPF_ALU64, BPF_OR, BPF_X),
-		DL(BPF_ALU64, BPF_OR, BPF_K),
-		DL(BPF_ALU64, BPF_LSH, BPF_X),
-		DL(BPF_ALU64, BPF_LSH, BPF_K),
-		DL(BPF_ALU64, BPF_RSH, BPF_X),
-		DL(BPF_ALU64, BPF_RSH, BPF_K),
-		DL(BPF_ALU64, BPF_XOR, BPF_X),
-		DL(BPF_ALU64, BPF_XOR, BPF_K),
-		DL(BPF_ALU64, BPF_MUL, BPF_X),
-		DL(BPF_ALU64, BPF_MUL, BPF_K),
-		DL(BPF_ALU64, BPF_MOV, BPF_X),
-		DL(BPF_ALU64, BPF_MOV, BPF_K),
-		DL(BPF_ALU64, BPF_ARSH, BPF_X),
-		DL(BPF_ALU64, BPF_ARSH, BPF_K),
-		DL(BPF_ALU64, BPF_DIV, BPF_X),
-		DL(BPF_ALU64, BPF_DIV, BPF_K),
-		DL(BPF_ALU64, BPF_MOD, BPF_X),
-		DL(BPF_ALU64, BPF_MOD, BPF_K),
-		DL(BPF_ALU64, BPF_NEG, 0),
-		DL(BPF_JMP, BPF_CALL, 0),
-		DL(BPF_JMP, BPF_JA, 0),
-		DL(BPF_JMP, BPF_JEQ, BPF_X),
-		DL(BPF_JMP, BPF_JEQ, BPF_K),
-		DL(BPF_JMP, BPF_JNE, BPF_X),
-		DL(BPF_JMP, BPF_JNE, BPF_K),
-		DL(BPF_JMP, BPF_JGT, BPF_X),
-		DL(BPF_JMP, BPF_JGT, BPF_K),
-		DL(BPF_JMP, BPF_JGE, BPF_X),
-		DL(BPF_JMP, BPF_JGE, BPF_K),
-		DL(BPF_JMP, BPF_JSGT, BPF_X),
-		DL(BPF_JMP, BPF_JSGT, BPF_K),
-		DL(BPF_JMP, BPF_JSGE, BPF_X),
-		DL(BPF_JMP, BPF_JSGE, BPF_K),
-		DL(BPF_JMP, BPF_JSET, BPF_X),
-		DL(BPF_JMP, BPF_JSET, BPF_K),
-		DL(BPF_JMP, BPF_EXIT, 0),
-		DL(BPF_STX, BPF_MEM, BPF_B),
-		DL(BPF_STX, BPF_MEM, BPF_H),
-		DL(BPF_STX, BPF_MEM, BPF_W),
-		DL(BPF_STX, BPF_MEM, BPF_DW),
-		DL(BPF_STX, BPF_XADD, BPF_W),
-		DL(BPF_STX, BPF_XADD, BPF_DW),
-		DL(BPF_ST, BPF_MEM, BPF_B),
-		DL(BPF_ST, BPF_MEM, BPF_H),
-		DL(BPF_ST, BPF_MEM, BPF_W),
-		DL(BPF_ST, BPF_MEM, BPF_DW),
-		DL(BPF_LDX, BPF_MEM, BPF_B),
-		DL(BPF_LDX, BPF_MEM, BPF_H),
-		DL(BPF_LDX, BPF_MEM, BPF_W),
-		DL(BPF_LDX, BPF_MEM, BPF_DW),
-		DL(BPF_LD, BPF_ABS, BPF_W),
-		DL(BPF_LD, BPF_ABS, BPF_H),
-		DL(BPF_LD, BPF_ABS, BPF_B),
-		DL(BPF_LD, BPF_IND, BPF_W),
-		DL(BPF_LD, BPF_IND, BPF_H),
-		DL(BPF_LD, BPF_IND, BPF_B),
+#define DL(A, B, C)	[BPF_##A|BPF_##B|BPF_##C] = &&A##_##B##_##C
+		DL(ALU, ADD, X),
+		DL(ALU, ADD, K),
+		DL(ALU, SUB, X),
+		DL(ALU, SUB, K),
+		DL(ALU, AND, X),
+		DL(ALU, AND, K),
+		DL(ALU, OR, X),
+		DL(ALU, OR, K),
+		DL(ALU, LSH, X),
+		DL(ALU, LSH, K),
+		DL(ALU, RSH, X),
+		DL(ALU, RSH, K),
+		DL(ALU, XOR, X),
+		DL(ALU, XOR, K),
+		DL(ALU, MUL, X),
+		DL(ALU, MUL, K),
+		DL(ALU, MOV, X),
+		DL(ALU, MOV, K),
+		DL(ALU, DIV, X),
+		DL(ALU, DIV, K),
+		DL(ALU, MOD, X),
+		DL(ALU, MOD, K),
+		DL(ALU, NEG, 0),
+		DL(ALU, END, TO_BE),
+		DL(ALU, END, TO_LE),
+		DL(ALU64, ADD, X),
+		DL(ALU64, ADD, K),
+		DL(ALU64, SUB, X),
+		DL(ALU64, SUB, K),
+		DL(ALU64, AND, X),
+		DL(ALU64, AND, K),
+		DL(ALU64, OR, X),
+		DL(ALU64, OR, K),
+		DL(ALU64, LSH, X),
+		DL(ALU64, LSH, K),
+		DL(ALU64, RSH, X),
+		DL(ALU64, RSH, K),
+		DL(ALU64, XOR, X),
+		DL(ALU64, XOR, K),
+		DL(ALU64, MUL, X),
+		DL(ALU64, MUL, K),
+		DL(ALU64, MOV, X),
+		DL(ALU64, MOV, K),
+		DL(ALU64, ARSH, X),
+		DL(ALU64, ARSH, K),
+		DL(ALU64, DIV, X),
+		DL(ALU64, DIV, K),
+		DL(ALU64, MOD, X),
+		DL(ALU64, MOD, K),
+		DL(ALU64, NEG, 0),
+		DL(JMP, CALL, 0),
+		DL(JMP, JA, 0),
+		DL(JMP, JEQ, X),
+		DL(JMP, JEQ, K),
+		DL(JMP, JNE, X),
+		DL(JMP, JNE, K),
+		DL(JMP, JGT, X),
+		DL(JMP, JGT, K),
+		DL(JMP, JGE, X),
+		DL(JMP, JGE, K),
+		DL(JMP, JSGT, X),
+		DL(JMP, JSGT, K),
+		DL(JMP, JSGE, X),
+		DL(JMP, JSGE, K),
+		DL(JMP, JSET, X),
+		DL(JMP, JSET, K),
+		DL(JMP, EXIT, 0),
+		DL(STX, MEM, B),
+		DL(STX, MEM, H),
+		DL(STX, MEM, W),
+		DL(STX, MEM, DW),
+		DL(STX, XADD, W),
+		DL(STX, XADD, DW),
+		DL(ST, MEM, B),
+		DL(ST, MEM, H),
+		DL(ST, MEM, W),
+		DL(ST, MEM, DW),
+		DL(LDX, MEM, B),
+		DL(LDX, MEM, H),
+		DL(LDX, MEM, W),
+		DL(LDX, MEM, DW),
+		DL(LD, ABS, W),
+		DL(LD, ABS, H),
+		DL(LD, ABS, B),
+		DL(LD, IND, W),
+		DL(LD, IND, H),
+		DL(LD, IND, B),
 #undef DL
 	};
 
@@ -257,93 +257,93 @@ select_insn:
 
 	/* ALU */
 #define ALU(OPCODE, OP)			\
-	BPF_ALU64_##OPCODE##_BPF_X:	\
+	ALU64_##OPCODE##_X:		\
 		A = A OP X;		\
 		CONT;			\
-	BPF_ALU_##OPCODE##_BPF_X:	\
+	ALU_##OPCODE##_X:		\
 		A = (u32) A OP (u32) X;	\
 		CONT;			\
-	BPF_ALU64_##OPCODE##_BPF_K:	\
+	ALU64_##OPCODE##_K:		\
 		A = A OP K;		\
 		CONT;			\
-	BPF_ALU_##OPCODE##_BPF_K:	\
+	ALU_##OPCODE##_K:		\
 		A = (u32) A OP (u32) K;	\
 		CONT;
 
-	ALU(BPF_ADD,  +)
-	ALU(BPF_SUB,  -)
-	ALU(BPF_AND,  &)
-	ALU(BPF_OR,   |)
-	ALU(BPF_LSH, <<)
-	ALU(BPF_RSH, >>)
-	ALU(BPF_XOR,  ^)
-	ALU(BPF_MUL,  *)
+	ALU(ADD,  +)
+	ALU(SUB,  -)
+	ALU(AND,  &)
+	ALU(OR,   |)
+	ALU(LSH, <<)
+	ALU(RSH, >>)
+	ALU(XOR,  ^)
+	ALU(MUL,  *)
 #undef ALU
-	BPF_ALU_BPF_NEG_0:
+	ALU_NEG_0:
 		A = (u32) -A;
 		CONT;
-	BPF_ALU64_BPF_NEG_0:
+	ALU64_NEG_0:
 		A = -A;
 		CONT;
-	BPF_ALU_BPF_MOV_BPF_X:
+	ALU_MOV_X:
 		A = (u32) X;
 		CONT;
-	BPF_ALU_BPF_MOV_BPF_K:
+	ALU_MOV_K:
 		A = (u32) K;
 		CONT;
-	BPF_ALU64_BPF_MOV_BPF_X:
+	ALU64_MOV_X:
 		A = X;
 		CONT;
-	BPF_ALU64_BPF_MOV_BPF_K:
+	ALU64_MOV_K:
 		A = K;
 		CONT;
-	BPF_ALU64_BPF_ARSH_BPF_X:
+	ALU64_ARSH_X:
 		(*(s64 *) &A) >>= X;
 		CONT;
-	BPF_ALU64_BPF_ARSH_BPF_K:
+	ALU64_ARSH_K:
 		(*(s64 *) &A) >>= K;
 		CONT;
-	BPF_ALU64_BPF_MOD_BPF_X:
+	ALU64_MOD_X:
 		if (unlikely(X == 0))
 			return 0;
 		tmp = A;
 		A = do_div(tmp, X);
 		CONT;
-	BPF_ALU_BPF_MOD_BPF_X:
+	ALU_MOD_X:
 		if (unlikely(X == 0))
 			return 0;
 		tmp = (u32) A;
 		A = do_div(tmp, (u32) X);
 		CONT;
-	BPF_ALU64_BPF_MOD_BPF_K:
+	ALU64_MOD_K:
 		tmp = A;
 		A = do_div(tmp, K);
 		CONT;
-	BPF_ALU_BPF_MOD_BPF_K:
+	ALU_MOD_K:
 		tmp = (u32) A;
 		A = do_div(tmp, (u32) K);
 		CONT;
-	BPF_ALU64_BPF_DIV_BPF_X:
+	ALU64_DIV_X:
 		if (unlikely(X == 0))
 			return 0;
 		do_div(A, X);
 		CONT;
-	BPF_ALU_BPF_DIV_BPF_X:
+	ALU_DIV_X:
 		if (unlikely(X == 0))
 			return 0;
 		tmp = (u32) A;
 		do_div(tmp, (u32) X);
 		A = (u32) tmp;
 		CONT;
-	BPF_ALU64_BPF_DIV_BPF_K:
+	ALU64_DIV_K:
 		do_div(A, K);
 		CONT;
-	BPF_ALU_BPF_DIV_BPF_K:
+	ALU_DIV_K:
 		tmp = (u32) A;
 		do_div(tmp, (u32) K);
 		A = (u32) tmp;
 		CONT;
-	BPF_ALU_BPF_END_BPF_TO_BE:
+	ALU_END_TO_BE:
 		switch (K) {
 		case 16:
 			A = (__force u16) cpu_to_be16(A);
@@ -356,7 +356,7 @@ select_insn:
 			break;
 		}
 		CONT;
-	BPF_ALU_BPF_END_BPF_TO_LE:
+	ALU_END_TO_LE:
 		switch (K) {
 		case 16:
 			A = (__force u16) cpu_to_le16(A);
@@ -371,7 +371,7 @@ select_insn:
 		CONT;
 
 	/* CALL */
-	BPF_JMP_BPF_CALL_0:
+	JMP_CALL_0:
 		/* Function call scratches R1-R5 registers, preserves R6-R9,
 		 * and stores return value into R0.
 		 */
@@ -380,122 +380,122 @@ select_insn:
 		CONT;
 
 	/* JMP */
-	BPF_JMP_BPF_JA_0:
+	JMP_JA_0:
 		insn += insn->off;
 		CONT;
-	BPF_JMP_BPF_JEQ_BPF_X:
+	JMP_JEQ_X:
 		if (A == X) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JEQ_BPF_K:
+	JMP_JEQ_K:
 		if (A == K) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JNE_BPF_X:
+	JMP_JNE_X:
 		if (A != X) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JNE_BPF_K:
+	JMP_JNE_K:
 		if (A != K) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JGT_BPF_X:
+	JMP_JGT_X:
 		if (A > X) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JGT_BPF_K:
+	JMP_JGT_K:
 		if (A > K) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JGE_BPF_X:
+	JMP_JGE_X:
 		if (A >= X) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JGE_BPF_K:
+	JMP_JGE_K:
 		if (A >= K) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSGT_BPF_X:
-		if (((s64)A) > ((s64)X)) {
+	JMP_JSGT_X:
+		if (((s64) A) > ((s64) X)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSGT_BPF_K:
-		if (((s64)A) > ((s64)K)) {
+	JMP_JSGT_K:
+		if (((s64) A) > ((s64) K)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSGE_BPF_X:
-		if (((s64)A) >= ((s64)X)) {
+	JMP_JSGE_X:
+		if (((s64) A) >= ((s64) X)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSGE_BPF_K:
-		if (((s64)A) >= ((s64)K)) {
+	JMP_JSGE_K:
+		if (((s64) A) >= ((s64) K)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSET_BPF_X:
+	JMP_JSET_X:
 		if (A & X) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSET_BPF_K:
+	JMP_JSET_K:
 		if (A & K) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_EXIT_0:
+	JMP_EXIT_0:
 		return R0;
 
 	/* STX and ST and LDX*/
 #define LDST(SIZEOP, SIZE)					\
-	BPF_STX_BPF_MEM_##SIZEOP:				\
+	STX_MEM_##SIZEOP:					\
 		*(SIZE *)(unsigned long) (A + insn->off) = X;	\
 		CONT;						\
-	BPF_ST_BPF_MEM_##SIZEOP:				\
+	ST_MEM_##SIZEOP:					\
 		*(SIZE *)(unsigned long) (A + insn->off) = K;	\
 		CONT;						\
-	BPF_LDX_BPF_MEM_##SIZEOP:				\
+	LDX_MEM_##SIZEOP:					\
 		A = *(SIZE *)(unsigned long) (X + insn->off);	\
 		CONT;
 
-	LDST(BPF_B,   u8)
-	LDST(BPF_H,  u16)
-	LDST(BPF_W,  u32)
-	LDST(BPF_DW, u64)
+	LDST(B,   u8)
+	LDST(H,  u16)
+	LDST(W,  u32)
+	LDST(DW, u64)
 #undef LDST
-	BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */
+	STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */
 		atomic_add((u32) X, (atomic_t *)(unsigned long)
 			   (A + insn->off));
 		CONT;
-	BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
+	STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
 		atomic64_add((u64) X, (atomic64_t *)(unsigned long)
 			     (A + insn->off));
 		CONT;
-	BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */
+	LD_ABS_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */
 		off = K;
 load_word:
 		/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
@@ -524,7 +524,7 @@ load_word:
 			CONT;
 		}
 		return 0;
-	BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */
+	LD_ABS_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */
 		off = K;
 load_half:
 		ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
@@ -533,7 +533,7 @@ load_half:
 			CONT;
 		}
 		return 0;
-	BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */
+	LD_ABS_B: /* R0 = *(u8 *) (ctx + K) */
 		off = K;
 load_byte:
 		ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
@@ -542,13 +542,13 @@ load_byte:
 			CONT;
 		}
 		return 0;
-	BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */
+	LD_IND_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */
 		off = K + X;
 		goto load_word;
-	BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */
+	LD_IND_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */
 		off = K + X;
 		goto load_half;
-	BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */
+	LD_IND_B: /* R0 = *(u8 *) (skb->data + X + K) */
 		off = K + X;
 		goto load_byte;
 
-- 
cgit v1.2.3-71-gd317


From 30743837dd204d2b04fd4e9d3db78cc7b118c81a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 1 May 2014 18:34:19 +0200
Subject: net: filter: make register naming more comprehensible

The current code is a bit hard to parse on which registers can be used,
how they are mapped and all play together. It makes much more sense to
define this a bit more clearly so that the code is a bit more intuitive.
This patch cleans this up, and makes naming a bit more consistent among
the code. This also allows for moving some of the defines into the header
file. Clearing of A and X registers in __sk_run_filter() do not get a
particular register name assigned as they have not an 'official' function,
but rather just result from the concrete initial mapping of old BPF
programs. Since for BPF helper functions for BPF_CALL we already use
small letters, so be consistent here as well. No functional changes.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  44 ++++++++--
 net/core/filter.c      | 215 +++++++++++++++++++++++++------------------------
 2 files changed, 145 insertions(+), 114 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index b042d1db127f..ed1efab10b8f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -40,16 +40,47 @@
 /* Placeholder/dummy for 0 */
 #define BPF_0		0
 
+/* Register numbers */
+enum {
+	BPF_REG_0 = 0,
+	BPF_REG_1,
+	BPF_REG_2,
+	BPF_REG_3,
+	BPF_REG_4,
+	BPF_REG_5,
+	BPF_REG_6,
+	BPF_REG_7,
+	BPF_REG_8,
+	BPF_REG_9,
+	BPF_REG_10,
+	__MAX_BPF_REG,
+};
+
 /* BPF has 10 general purpose 64-bit registers and stack frame. */
-#define MAX_BPF_REG	11
+#define MAX_BPF_REG	__MAX_BPF_REG
+
+/* ArgX, context and stack frame pointer register positions. Note,
+ * Arg1, Arg2, Arg3, etc are used as argument mappings of function
+ * calls in BPF_CALL instruction.
+ */
+#define BPF_REG_ARG1	BPF_REG_1
+#define BPF_REG_ARG2	BPF_REG_2
+#define BPF_REG_ARG3	BPF_REG_3
+#define BPF_REG_ARG4	BPF_REG_4
+#define BPF_REG_ARG5	BPF_REG_5
+#define BPF_REG_CTX	BPF_REG_6
+#define BPF_REG_FP	BPF_REG_10
+
+/* Additional register mappings for converted user programs. */
+#define BPF_REG_A	BPF_REG_0
+#define BPF_REG_X	BPF_REG_7
+#define BPF_REG_TMP	BPF_REG_8
 
 /* BPF program can access up to 512 bytes of stack space. */
 #define MAX_BPF_STACK	512
 
-/* Arg1, context and stack frame pointer register positions. */
-#define ARG1_REG	1
-#define CTX_REG		6
-#define FP_REG		10
+/* Macro to invoke filter function. */
+#define SK_RUN_FILTER(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
 
 struct sock_filter_int {
 	__u8	code;		/* opcode */
@@ -100,9 +131,6 @@ static inline unsigned int sk_filter_size(unsigned int proglen)
 #define sk_filter_proglen(fprog)			\
 		(fprog->len * sizeof(fprog->filter[0]))
 
-#define SK_RUN_FILTER(filter, ctx)			\
-		(*filter->bpf_func)(ctx, filter->insnsi)
-
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
 u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
diff --git a/net/core/filter.c b/net/core/filter.c
index a1784e95b049..c93ca8d66f37 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -45,6 +45,27 @@
 #include <linux/seccomp.h>
 #include <linux/if_vlan.h>
 
+/* Registers */
+#define R0	regs[BPF_REG_0]
+#define R1	regs[BPF_REG_1]
+#define R2	regs[BPF_REG_2]
+#define R3	regs[BPF_REG_3]
+#define R4	regs[BPF_REG_4]
+#define R5	regs[BPF_REG_5]
+#define R6	regs[BPF_REG_6]
+#define R7	regs[BPF_REG_7]
+#define R8	regs[BPF_REG_8]
+#define R9	regs[BPF_REG_9]
+#define R10	regs[BPF_REG_10]
+
+/* Named registers */
+#define A	regs[insn->a_reg]
+#define X	regs[insn->x_reg]
+#define FP	regs[BPF_REG_FP]
+#define ARG1	regs[BPF_REG_ARG1]
+#define CTX	regs[BPF_REG_CTX]
+#define K	insn->imm
+
 /* No hurry in this branch
  *
  * Exported for the bpf jit load helper.
@@ -122,13 +143,6 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	return 0;
 }
 
-/* Register mappings for user programs. */
-#define A_REG		0
-#define X_REG		7
-#define TMP_REG		8
-#define ARG2_REG	2
-#define ARG3_REG	3
-
 /**
  *	__sk_run_filter - run a filter on a given context
  *	@ctx: buffer to run the filter on
@@ -142,17 +156,6 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
 {
 	u64 stack[MAX_BPF_STACK / sizeof(u64)];
 	u64 regs[MAX_BPF_REG], tmp;
-	void *ptr;
-	int off;
-
-#define K  insn->imm
-#define A  regs[insn->a_reg]
-#define X  regs[insn->x_reg]
-#define R0 regs[0]
-
-#define CONT	 ({insn++; goto select_insn; })
-#define CONT_JMP ({insn++; goto select_insn; })
-
 	static const void *jumptable[256] = {
 		[0 ... 255] = &&default_label,
 		/* Now overwrite non-defaults ... */
@@ -246,11 +249,18 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
 		DL(LD, IND, B),
 #undef DL
 	};
+	void *ptr;
+	int off;
 
-	regs[FP_REG]  = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
-	regs[ARG1_REG] = (u64) (unsigned long) ctx;
-	regs[A_REG] = 0;
-	regs[X_REG] = 0;
+#define CONT	 ({ insn++; goto select_insn; })
+#define CONT_JMP ({ insn++; goto select_insn; })
+
+	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
+	ARG1 = (u64) (unsigned long) ctx;
+
+	/* Register for user BPF programs need to be reset first. */
+	regs[BPF_REG_A] = 0;
+	regs[BPF_REG_X] = 0;
 
 select_insn:
 	goto *jumptable[insn->code];
@@ -375,8 +385,7 @@ select_insn:
 		/* Function call scratches R1-R5 registers, preserves R6-R9,
 		 * and stores return value into R0.
 		 */
-		R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3],
-						   regs[4], regs[5]);
+		R0 = (__bpf_call_base + insn->imm)(R1, R2, R3, R4, R5);
 		CONT;
 
 	/* JMP */
@@ -500,7 +509,7 @@ select_insn:
 load_word:
 		/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
 		 * appearing in the programs where ctx == skb. All programs
-		 * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter()
+		 * keep 'ctx' in regs[BPF_REG_CTX] == R6, sk_convert_filter()
 		 * saves it in R6, internal BPF verifier will check that
 		 * R6 == ctx.
 		 *
@@ -556,13 +565,6 @@ load_byte:
 		/* If we ever reach this, we have a bug somewhere. */
 		WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
 		return 0;
-#undef CONT_JMP
-#undef CONT
-
-#undef R0
-#undef X
-#undef A
-#undef K
 }
 
 u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
@@ -594,14 +596,14 @@ static unsigned int pkt_type_offset(void)
 	return -1;
 }
 
-static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *)(long) ctx;
 
 	return __skb_get_poff(skb);
 }
 
-static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *)(long) ctx;
 	struct nlattr *nla;
@@ -612,17 +614,17 @@ static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
 	if (skb->len < sizeof(struct nlattr))
 		return 0;
 
-	if (A > skb->len - sizeof(struct nlattr))
+	if (a > skb->len - sizeof(struct nlattr))
 		return 0;
 
-	nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X);
+	nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
 	if (nla)
 		return (void *) nla - (void *) skb->data;
 
 	return 0;
 }
 
-static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *)(long) ctx;
 	struct nlattr *nla;
@@ -633,27 +635,27 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
 	if (skb->len < sizeof(struct nlattr))
 		return 0;
 
-	if (A > skb->len - sizeof(struct nlattr))
+	if (a > skb->len - sizeof(struct nlattr))
 		return 0;
 
-	nla = (struct nlattr *) &skb->data[A];
-	if (nla->nla_len > skb->len - A)
+	nla = (struct nlattr *) &skb->data[a];
+	if (nla->nla_len > skb->len - a)
 		return 0;
 
-	nla = nla_find_nested(nla, X);
+	nla = nla_find_nested(nla, x);
 	if (nla)
 		return (void *) nla - (void *) skb->data;
 
 	return 0;
 }
 
-static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	return raw_smp_processor_id();
 }
 
 /* note that this only generates 32-bit random numbers */
-static u64 __get_random_u32(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	return (u64)prandom_u32();
 }
@@ -668,28 +670,28 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
 
 		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = offsetof(struct sk_buff, protocol);
 		insn++;
 
 		/* A = ntohs(A) [emitting a nop or swap16] */
 		insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
-		insn->a_reg = A_REG;
+		insn->a_reg = BPF_REG_A;
 		insn->imm = 16;
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
 		insn->code = BPF_LDX | BPF_MEM | BPF_B;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = pkt_type_offset();
 		if (insn->off < 0)
 			return false;
 		insn++;
 
 		insn->code = BPF_ALU | BPF_AND | BPF_K;
-		insn->a_reg = A_REG;
+		insn->a_reg = BPF_REG_A;
 		insn->imm = PKT_TYPE_MAX;
 		break;
 
@@ -699,13 +701,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 			insn->code = BPF_LDX | BPF_MEM | BPF_DW;
 		else
 			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = TMP_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_TMP;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = offsetof(struct sk_buff, dev);
 		insn++;
 
 		insn->code = BPF_JMP | BPF_JNE | BPF_K;
-		insn->a_reg = TMP_REG;
+		insn->a_reg = BPF_REG_TMP;
 		insn->imm = 0;
 		insn->off = 1;
 		insn++;
@@ -716,8 +718,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
 
-		insn->a_reg = A_REG;
-		insn->x_reg = TMP_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_TMP;
 
 		if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
 			insn->code = BPF_LDX | BPF_MEM | BPF_W;
@@ -732,8 +734,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 
 		insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = offsetof(struct sk_buff, mark);
 		break;
 
@@ -741,8 +743,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 
 		insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = offsetof(struct sk_buff, hash);
 		break;
 
@@ -750,8 +752,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
 
 		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = offsetof(struct sk_buff, queue_mapping);
 		break;
 
@@ -760,8 +762,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 
 		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = offsetof(struct sk_buff, vlan_tci);
 		insn++;
 
@@ -769,16 +771,16 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 
 		if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
 			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = A_REG;
+			insn->a_reg = BPF_REG_A;
 			insn->imm = ~VLAN_TAG_PRESENT;
 		} else {
 			insn->code = BPF_ALU | BPF_RSH | BPF_K;
-			insn->a_reg = A_REG;
+			insn->a_reg = BPF_REG_A;
 			insn->imm = 12;
 			insn++;
 
 			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = A_REG;
+			insn->a_reg = BPF_REG_A;
 			insn->imm = 1;
 		}
 		break;
@@ -790,20 +792,20 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_RANDOM:
 		/* arg1 = ctx */
 		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = ARG1_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_ARG1;
+		insn->x_reg = BPF_REG_CTX;
 		insn++;
 
 		/* arg2 = A */
 		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = ARG2_REG;
-		insn->x_reg = A_REG;
+		insn->a_reg = BPF_REG_ARG2;
+		insn->x_reg = BPF_REG_A;
 		insn++;
 
 		/* arg3 = X */
 		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = ARG3_REG;
-		insn->x_reg = X_REG;
+		insn->a_reg = BPF_REG_ARG3;
+		insn->x_reg = BPF_REG_X;
 		insn++;
 
 		/* Emit call(ctx, arg2=A, arg3=X) */
@@ -829,8 +831,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 
 	case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
 		insn->code = BPF_ALU | BPF_XOR | BPF_X;
-		insn->a_reg = A_REG;
-		insn->x_reg = X_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_X;
 		break;
 
 	default:
@@ -880,7 +882,7 @@ int sk_convert_filter(struct sock_filter *prog, int len,
 	u8 bpf_src;
 
 	BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
-	BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG);
+	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
 
 	if (len <= 0 || len >= BPF_MAXINSNS)
 		return -EINVAL;
@@ -897,8 +899,8 @@ do_pass:
 
 	if (new_insn) {
 		new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		new_insn->a_reg = CTX_REG;
-		new_insn->x_reg = ARG1_REG;
+		new_insn->a_reg = BPF_REG_CTX;
+		new_insn->x_reg = BPF_REG_ARG1;
 	}
 	new_insn++;
 
@@ -948,8 +950,8 @@ do_pass:
 				break;
 
 			insn->code = fp->code;
-			insn->a_reg = A_REG;
-			insn->x_reg = X_REG;
+			insn->a_reg = BPF_REG_A;
+			insn->x_reg = BPF_REG_X;
 			insn->imm = fp->k;
 			break;
 
@@ -983,16 +985,16 @@ do_pass:
 				 * in compare insn.
 				 */
 				insn->code = BPF_ALU | BPF_MOV | BPF_K;
-				insn->a_reg = TMP_REG;
+				insn->a_reg = BPF_REG_TMP;
 				insn->imm = fp->k;
 				insn++;
 
-				insn->a_reg = A_REG;
-				insn->x_reg = TMP_REG;
+				insn->a_reg = BPF_REG_A;
+				insn->x_reg = BPF_REG_TMP;
 				bpf_src = BPF_X;
 			} else {
-				insn->a_reg = A_REG;
-				insn->x_reg = X_REG;
+				insn->a_reg = BPF_REG_A;
+				insn->x_reg = BPF_REG_X;
 				insn->imm = fp->k;
 				bpf_src = BPF_SRC(fp->code);
 			}
@@ -1027,33 +1029,33 @@ do_pass:
 		/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
 		case BPF_LDX | BPF_MSH | BPF_B:
 			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = TMP_REG;
-			insn->x_reg = A_REG;
+			insn->a_reg = BPF_REG_TMP;
+			insn->x_reg = BPF_REG_A;
 			insn++;
 
 			insn->code = BPF_LD | BPF_ABS | BPF_B;
-			insn->a_reg = A_REG;
+			insn->a_reg = BPF_REG_A;
 			insn->imm = fp->k;
 			insn++;
 
 			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = A_REG;
+			insn->a_reg = BPF_REG_A;
 			insn->imm = 0xf;
 			insn++;
 
 			insn->code = BPF_ALU | BPF_LSH | BPF_K;
-			insn->a_reg = A_REG;
+			insn->a_reg = BPF_REG_A;
 			insn->imm = 2;
 			insn++;
 
 			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = X_REG;
-			insn->x_reg = A_REG;
+			insn->a_reg = BPF_REG_X;
+			insn->x_reg = BPF_REG_A;
 			insn++;
 
 			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = A_REG;
-			insn->x_reg = TMP_REG;
+			insn->a_reg = BPF_REG_A;
+			insn->x_reg = BPF_REG_TMP;
 			break;
 
 		/* RET_K, RET_A are remaped into 2 insns. */
@@ -1063,7 +1065,7 @@ do_pass:
 				     (BPF_RVAL(fp->code) == BPF_K ?
 				      BPF_K : BPF_X);
 			insn->a_reg = 0;
-			insn->x_reg = A_REG;
+			insn->x_reg = BPF_REG_A;
 			insn->imm = fp->k;
 			insn++;
 
@@ -1074,8 +1076,9 @@ do_pass:
 		case BPF_ST:
 		case BPF_STX:
 			insn->code = BPF_STX | BPF_MEM | BPF_W;
-			insn->a_reg = FP_REG;
-			insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG;
+			insn->a_reg = BPF_REG_FP;
+			insn->x_reg = fp->code == BPF_ST ?
+				      BPF_REG_A : BPF_REG_X;
 			insn->off = -(BPF_MEMWORDS - fp->k) * 4;
 			break;
 
@@ -1084,8 +1087,8 @@ do_pass:
 		case BPF_LDX | BPF_MEM:
 			insn->code = BPF_LDX | BPF_MEM | BPF_W;
 			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      A_REG : X_REG;
-			insn->x_reg = FP_REG;
+				      BPF_REG_A : BPF_REG_X;
+			insn->x_reg = BPF_REG_FP;
 			insn->off = -(BPF_MEMWORDS - fp->k) * 4;
 			break;
 
@@ -1094,22 +1097,22 @@ do_pass:
 		case BPF_LDX | BPF_IMM:
 			insn->code = BPF_ALU | BPF_MOV | BPF_K;
 			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      A_REG : X_REG;
+				      BPF_REG_A : BPF_REG_X;
 			insn->imm = fp->k;
 			break;
 
 		/* X = A */
 		case BPF_MISC | BPF_TAX:
 			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = X_REG;
-			insn->x_reg = A_REG;
+			insn->a_reg = BPF_REG_X;
+			insn->x_reg = BPF_REG_A;
 			break;
 
 		/* A = X */
 		case BPF_MISC | BPF_TXA:
 			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = A_REG;
-			insn->x_reg = X_REG;
+			insn->a_reg = BPF_REG_A;
+			insn->x_reg = BPF_REG_X;
 			break;
 
 		/* A = skb->len or X = skb->len */
@@ -1117,16 +1120,16 @@ do_pass:
 		case BPF_LDX | BPF_W | BPF_LEN:
 			insn->code = BPF_LDX | BPF_MEM | BPF_W;
 			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      A_REG : X_REG;
-			insn->x_reg = CTX_REG;
+				      BPF_REG_A : BPF_REG_X;
+			insn->x_reg = BPF_REG_CTX;
 			insn->off = offsetof(struct sk_buff, len);
 			break;
 
 		/* access seccomp_data fields */
 		case BPF_LDX | BPF_ABS | BPF_W:
 			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->a_reg = A_REG;
-			insn->x_reg = CTX_REG;
+			insn->a_reg = BPF_REG_A;
+			insn->x_reg = BPF_REG_CTX;
 			insn->off = fp->k;
 			break;
 
-- 
cgit v1.2.3-71-gd317


From fd3c269f8ff940cc0fbb3b7f7e84c0572f6f759a Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Thu, 17 Apr 2014 10:37:35 +0800
Subject: drm/i915: Split the BDW device definition to prepare for dual BSD
 rings on BDW GT3

Based on the hardware spec, the BDW GT3 has the different configuration
with the BDW GT1/GT2. So split the BDW device info definition.
This is to do the preparation for adding the Dual BSD rings on BDW GT3 machine.

V1->V2: Follow Daniel's comment to pay attention to the stolen check for BDW
in kernel/early-quirks.c

Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.c | 26 ++++++++++++++++++++++++--
 include/drm/i915_pciids.h       | 22 +++++++++++++++++-----
 2 files changed, 41 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f2ca3e929cd7..9f47aab7915f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -279,6 +279,26 @@ static const struct intel_device_info intel_broadwell_m_info = {
 	GEN_DEFAULT_PIPEOFFSETS,
 };
 
+static const struct intel_device_info intel_broadwell_gt3d_info = {
+	.gen = 8, .num_pipes = 3,
+	.need_gfx_hws = 1, .has_hotplug = 1,
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+	.has_llc = 1,
+	.has_ddi = 1,
+	.has_fbc = 1,
+	GEN_DEFAULT_PIPEOFFSETS,
+};
+
+static const struct intel_device_info intel_broadwell_gt3m_info = {
+	.gen = 8, .is_mobile = 1, .num_pipes = 3,
+	.need_gfx_hws = 1, .has_hotplug = 1,
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+	.has_llc = 1,
+	.has_ddi = 1,
+	.has_fbc = 1,
+	GEN_DEFAULT_PIPEOFFSETS,
+};
+
 /*
  * Make sure any device matches here are from most specific to most
  * general.  For example, since the Quanta match is based on the subsystem
@@ -311,8 +331,10 @@ static const struct intel_device_info intel_broadwell_m_info = {
 	INTEL_HSW_M_IDS(&intel_haswell_m_info), \
 	INTEL_VLV_M_IDS(&intel_valleyview_m_info),	\
 	INTEL_VLV_D_IDS(&intel_valleyview_d_info),	\
-	INTEL_BDW_M_IDS(&intel_broadwell_m_info),	\
-	INTEL_BDW_D_IDS(&intel_broadwell_d_info)
+	INTEL_BDW_GT12M_IDS(&intel_broadwell_m_info),	\
+	INTEL_BDW_GT12D_IDS(&intel_broadwell_d_info),	\
+	INTEL_BDW_GT3M_IDS(&intel_broadwell_gt3m_info),	\
+	INTEL_BDW_GT3D_IDS(&intel_broadwell_gt3d_info)
 
 static const struct pci_device_id pciidlist[] = {		/* aka */
 	INTEL_PCI_IDS,
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 940ece4934ba..24f3cad045db 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -223,14 +223,26 @@
 	_INTEL_BDW_D(gt, 0x160A, info), /* Server */ \
 	_INTEL_BDW_D(gt, 0x160D, info) /* Workstation */
 
-#define INTEL_BDW_M_IDS(info) \
+#define INTEL_BDW_GT12M_IDS(info) \
 	_INTEL_BDW_M_IDS(1, info), \
-	_INTEL_BDW_M_IDS(2, info), \
-	_INTEL_BDW_M_IDS(3, info)
+	_INTEL_BDW_M_IDS(2, info)
 
-#define INTEL_BDW_D_IDS(info) \
+#define INTEL_BDW_GT12D_IDS(info) \
 	_INTEL_BDW_D_IDS(1, info), \
-	_INTEL_BDW_D_IDS(2, info), \
+	_INTEL_BDW_D_IDS(2, info)
+
+#define INTEL_BDW_GT3M_IDS(info) \
+	_INTEL_BDW_M_IDS(3, info)
+
+#define INTEL_BDW_GT3D_IDS(info) \
 	_INTEL_BDW_D_IDS(3, info)
 
+#define INTEL_BDW_M_IDS(info) \
+	INTEL_BDW_GT12M_IDS(info), \
+	INTEL_BDW_GT3M_IDS(info)
+
+#define INTEL_BDW_D_IDS(info) \
+	INTEL_BDW_GT12D_IDS(info), \
+	INTEL_BDW_GT3D_IDS(info)
+
 #endif /* _I915_PCIIDS_H */
-- 
cgit v1.2.3-71-gd317


From 9da93f9b7cdf8ab28da6b364cdc1fafc8670b4dc Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Mon, 5 May 2014 17:25:50 +1000
Subject: xfs: fix Q_XQUOTARM ioctl

The Q_XQUOTARM quotactl was not working properly, because
we weren't passing around proper flags.  The xfs_fs_set_xstate()
ioctl handler used the same flags for Q_XQUOTAON/OFF as
well as for Q_XQUOTARM, but Q_XQUOTAON/OFF look for
XFS_UQUOTA_ACCT, XFS_UQUOTA_ENFD, XFS_GQUOTA_ACCT etc,
i.e. quota type + state, while Q_XQUOTARM looks only for
the type of quota, i.e. XFS_DQ_USER, XFS_DQ_GROUP etc.

Unfortunately these flag spaces overlap a bit, so we
got semi-random results for Q_XQUOTARM; i.e. the value
for XFS_DQ_USER == XFS_UQUOTA_ACCT, etc.  yeargh.

Add a new quotactl op vector specifically for the QUOTARM
operation, since it operates with a different flag space.

This has been broken more or less forever, AFAICT.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Acked-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/quota/quota.c      | 14 +++++++++++++-
 fs/xfs/xfs_quotaops.c | 29 +++++++++++++++++++++++++----
 include/linux/quota.h |  1 +
 3 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 2b363e23f36e..ff3f0b3cfdb3 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -278,6 +278,17 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
 	return ret;
 }
 
+static int quota_rmxquota(struct super_block *sb, void __user *addr)
+{
+	__u32 flags;
+
+	if (copy_from_user(&flags, addr, sizeof(flags)))
+		return -EFAULT;
+	if (!sb->s_qcop->rm_xquota)
+		return -ENOSYS;
+	return sb->s_qcop->rm_xquota(sb, flags);
+}
+
 /* Copy parameters and call proper function */
 static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 		       void __user *addr, struct path *path)
@@ -316,8 +327,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 		return sb->s_qcop->quota_sync(sb, type);
 	case Q_XQUOTAON:
 	case Q_XQUOTAOFF:
-	case Q_XQUOTARM:
 		return quota_setxstate(sb, cmd, addr);
+	case Q_XQUOTARM:
+		return quota_rmxquota(sb, addr);
 	case Q_XGETQSTAT:
 		return quota_getxstate(sb, addr);
 	case Q_XGETQSTATV:
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index af33cafe69b6..2ad1b9822e92 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -100,15 +100,35 @@ xfs_fs_set_xstate(
 		if (!XFS_IS_QUOTA_ON(mp))
 			return -EINVAL;
 		return -xfs_qm_scall_quotaoff(mp, flags);
-	case Q_XQUOTARM:
-		if (XFS_IS_QUOTA_ON(mp))
-			return -EINVAL;
-		return -xfs_qm_scall_trunc_qfiles(mp, flags);
 	}
 
 	return -EINVAL;
 }
 
+STATIC int
+xfs_fs_rm_xquota(
+	struct super_block	*sb,
+	unsigned int		uflags)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+	unsigned int		flags = 0;
+	
+	if (sb->s_flags & MS_RDONLY)
+		return -EROFS;
+
+	if (XFS_IS_QUOTA_ON(mp))
+		return -EINVAL;
+
+	if (uflags & FS_USER_QUOTA)
+		flags |= XFS_DQ_USER;
+	if (uflags & FS_GROUP_QUOTA)
+		flags |= XFS_DQ_GROUP;
+	if (uflags & FS_USER_QUOTA)
+		flags |= XFS_DQ_PROJ;
+
+	return -xfs_qm_scall_trunc_qfiles(mp, flags);
+}	
+
 STATIC int
 xfs_fs_get_dqblk(
 	struct super_block	*sb,
@@ -149,6 +169,7 @@ const struct quotactl_ops xfs_quotactl_operations = {
 	.get_xstatev		= xfs_fs_get_xstatev,
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
+	.rm_xquota		= xfs_fs_rm_xquota,
 	.get_dqblk		= xfs_fs_get_dqblk,
 	.set_dqblk		= xfs_fs_set_dqblk,
 };
diff --git a/include/linux/quota.h b/include/linux/quota.h
index cc7494a35429..0f3c5d38da1f 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -329,6 +329,7 @@ struct quotactl_ops {
 	int (*get_xstate)(struct super_block *, struct fs_quota_stat *);
 	int (*set_xstate)(struct super_block *, unsigned int, int);
 	int (*get_xstatev)(struct super_block *, struct fs_quota_statv *);
+	int (*rm_xquota)(struct super_block *, unsigned int);
 };
 
 struct quota_format_type {
-- 
cgit v1.2.3-71-gd317


From 0c4972ccaa27620fe4281ac5c8c536978a563345 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Thu, 1 May 2014 10:17:27 +0300
Subject: mac80211: set an external flag for TDLS stations

Expose a new tdls flag for the public ieee80211_sta struct.
This can be used in some rate control decisions.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 ++
 net/mac80211/cfg.c     | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 451c1bf00df9..bdb4a7cbab31 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1374,6 +1374,7 @@ struct ieee80211_sta_rates {
  *	the station moves to associated state.
  * @smps_mode: current SMPS mode (off, static or dynamic)
  * @rates: rate control selection table
+ * @tdls: indicates whether the STA is a TDLS peer
  */
 struct ieee80211_sta {
 	u32 supp_rates[IEEE80211_NUM_BANDS];
@@ -1388,6 +1389,7 @@ struct ieee80211_sta {
 	enum ieee80211_sta_rx_bandwidth bandwidth;
 	enum ieee80211_smps_mode smps_mode;
 	struct ieee80211_sta_rates __rcu *rates;
+	bool tdls;
 
 	/* must be last */
 	u8 drv_priv[0] __aligned(sizeof(void *));
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7b8d3cf89574..d8b236633ca3 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1459,6 +1459,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 	if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) {
 		sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
 		sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
+	} else {
+		sta->sta.tdls = true;
 	}
 
 	err = sta_apply_parameters(local, sta, params);
-- 
cgit v1.2.3-71-gd317


From 8757ad65d30f009fe0beeb2d70d3cd834cb998f2 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Fri, 11 Apr 2014 10:37:39 -0400
Subject: NVMe: Update copyright headers

Make the copyright dates accurate and remove the final paragraph that
includes the address of the FSF.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 4 ----
 drivers/block/nvme-scsi.c | 6 +-----
 include/linux/nvme.h      | 6 +-----
 include/uapi/linux/nvme.h | 6 +-----
 4 files changed, 3 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 7c64fa756cce..eacd64e36be3 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #include <linux/nvme.h>
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 2c3f5be06da1..da3b252dea6f 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -1,6 +1,6 @@
 /*
  * NVM Express device driver
- * Copyright (c) 2011, Intel Corporation.
+ * Copyright (c) 2011-2014, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 /*
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index a50173ca1d72..cfd084cab22b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1,6 +1,6 @@
 /*
  * Definitions for the NVM Express interface
- * Copyright (c) 2011-2013, Intel Corporation.
+ * Copyright (c) 2011-2014, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #ifndef _LINUX_NVME_H
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index 096fe1c6f83d..ad9014e49693 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -1,6 +1,6 @@
 /*
  * Definitions for the NVM Express interface
- * Copyright (c) 2011-2013, Intel Corporation.
+ * Copyright (c) 2011-2014, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #ifndef _UAPI_LINUX_NVME_H
-- 
cgit v1.2.3-71-gd317


From a7d2ce2832d84e0182585f63bf96ca7323b3aee7 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 29 Apr 2014 11:41:28 -0600
Subject: NVMe: Configure support for block flush

This configures an nvme request_queue as flush capable if the device
has a volatile write cache present.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 3 +++
 include/linux/nvme.h      | 1 +
 include/uapi/linux/nvme.h | 1 +
 3 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 025dd4cad4a6..e7c4fdb6a651 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1897,6 +1897,8 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
 	if (dev->max_hw_sectors)
 		blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
+	if (dev->vwc & NVME_CTRL_VWC_PRESENT)
+		blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
 
 	disk->major = nvme_major;
 	disk->first_minor = 0;
@@ -2201,6 +2203,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	nn = le32_to_cpup(&ctrl->nn);
 	dev->oncs = le16_to_cpup(&ctrl->oncs);
 	dev->abort_limit = ctrl->acl + 1;
+	dev->vwc = ctrl->vwc;
 	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
 	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
 	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index cfd084cab22b..6266373d3147 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -99,6 +99,7 @@ struct nvme_dev {
 	u32 stripe_size;
 	u16 oncs;
 	u16 abort_limit;
+	u8 vwc;
 	u8 initialized;
 };
 
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index ad9014e49693..f090336d5bad 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -73,6 +73,7 @@ enum {
 	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
 	NVME_CTRL_ONCS_WRITE_UNCORRECTABLE	= 1 << 1,
 	NVME_CTRL_ONCS_DSM			= 1 << 2,
+	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 };
 
 struct nvme_lbaf {
-- 
cgit v1.2.3-71-gd317


From 53562be74bd06bbe74d2acf3caca5398f8eeb160 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 29 Apr 2014 11:41:29 -0600
Subject: NVMe: Flush with data support

It is possible a filesystem may send a flush flagged bio with write
data. There is no such composite NVMe command, so the driver sends flush
and write separately.

The device is allowed to execute these commands in any order, so it was
possible the driver ends the bio after the write completes, but while the
flush is still active. We don't want to let a filesystem believe flush
succeeded before it really has; this could cause data corruption on a
power loss between these events. To fix, this patch splits the flush
and write into chained bios.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 44 ++++++++++++++++++++++++--------------------
 include/linux/nvme.h      |  1 -
 2 files changed, 24 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index e7c4fdb6a651..cd8a8bc711cc 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -197,16 +197,13 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
 #define CMD_CTX_CANCELLED	(0x30C + CMD_CTX_BASE)
 #define CMD_CTX_COMPLETED	(0x310 + CMD_CTX_BASE)
 #define CMD_CTX_INVALID		(0x314 + CMD_CTX_BASE)
-#define CMD_CTX_FLUSH		(0x318 + CMD_CTX_BASE)
-#define CMD_CTX_ABORT		(0x31C + CMD_CTX_BASE)
+#define CMD_CTX_ABORT		(0x318 + CMD_CTX_BASE)
 
 static void special_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
 {
 	if (ctx == CMD_CTX_CANCELLED)
 		return;
-	if (ctx == CMD_CTX_FLUSH)
-		return;
 	if (ctx == CMD_CTX_ABORT) {
 		++nvmeq->dev->abort_limit;
 		return;
@@ -629,16 +626,6 @@ static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 	return 0;
 }
 
-int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns)
-{
-	int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH,
-					special_completion, NVME_IO_TIMEOUT);
-	if (unlikely(cmdid < 0))
-		return cmdid;
-
-	return nvme_submit_flush(nvmeq, ns, cmdid);
-}
-
 static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
 {
 	struct bio *bio = iod->private;
@@ -654,7 +641,7 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
 
 	if (bio->bi_rw & REQ_DISCARD)
 		return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid);
-	if ((bio->bi_rw & REQ_FLUSH) && !iod->nents)
+	if (bio->bi_rw & REQ_FLUSH)
 		return nvme_submit_flush(nvmeq, ns, cmdid);
 
 	control = 0;
@@ -688,6 +675,26 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
 	return 0;
 }
 
+static int nvme_split_flush_data(struct nvme_queue *nvmeq, struct bio *bio)
+{
+	struct bio *split = bio_clone(bio, GFP_ATOMIC);
+	if (!split)
+		return -ENOMEM;
+
+	split->bi_iter.bi_size = 0;
+	split->bi_phys_segments = 0;
+	bio->bi_rw &= ~REQ_FLUSH;
+	bio_chain(split, bio);
+
+	if (!waitqueue_active(&nvmeq->sq_full))
+		add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
+	bio_list_add(&nvmeq->sq_cong, split);
+	bio_list_add(&nvmeq->sq_cong, bio);
+	wake_up_process(nvme_thread);
+
+	return 0;
+}
+
 /*
  * Called with local interrupts disabled and the q_lock held.  May not sleep.
  */
@@ -698,11 +705,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 	int psegs = bio_phys_segments(ns->queue, bio);
 	int result;
 
-	if ((bio->bi_rw & REQ_FLUSH) && psegs) {
-		result = nvme_submit_flush_data(nvmeq, ns);
-		if (result)
-			return result;
-	}
+	if ((bio->bi_rw & REQ_FLUSH) && psegs)
+		return nvme_split_flush_data(nvmeq, bio);
 
 	iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
 	if (!iod)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 6266373d3147..1813cfdb7e80 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -156,7 +156,6 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
 void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
 			struct nvme_iod *iod);
 int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *);
-int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns);
 int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *,
 							u32 *result);
 int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,
-- 
cgit v1.2.3-71-gd317


From 07064c6e022ba8dc0c86ce12f7851a1de24e04fc Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 2 May 2014 16:28:03 -0700
Subject: net: Allow csum_add to be provided in arch

csum_add is really nothing more then add-with-carry which
can be implemented efficiently in some architectures.
Allow architecture to define this protected by HAVE_ARCH_CSUM_ADD.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/checksum.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/checksum.h b/include/net/checksum.h
index a28f4e0f6251..87cb1903640d 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -57,12 +57,14 @@ static __inline__ __wsum csum_and_copy_to_user
 }
 #endif
 
+#ifndef HAVE_ARCH_CSUM_ADD
 static inline __wsum csum_add(__wsum csum, __wsum addend)
 {
 	u32 res = (__force u32)csum;
 	res += (__force u32)addend;
 	return (__force __wsum)(res + (res < (__force u32)addend));
 }
+#endif
 
 static inline __wsum csum_sub(__wsum csum, __wsum addend)
 {
-- 
cgit v1.2.3-71-gd317


From 76ba0aae673075c77a8b775e9133c8e8b1a44563 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 2 May 2014 16:29:18 -0700
Subject: net: Generalize checksum_init functions

Create a general __skb_checksum_validate function (actually a
macro) to subsume the various checksum_init functions. This
function can either init the checksum, or do the full validation
(logically checksum_init+skb_check_complete)-- a flag specifies
if full vaidation is performed. Also, there is a flag to the function
to indicate that zero checksums are allowed (to support optional
UDP checksums).

Added several stub functions for calling __skb_checksum_validate.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 08074a810164..3ca0dda5a42e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2741,6 +2741,99 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
 	       0 : __skb_checksum_complete(skb);
 }
 
+/* Check if we need to perform checksum complete validation.
+ *
+ * Returns true if checksum complete is needed, false otherwise
+ * (either checksum is unnecessary or zero checksum is allowed).
+ */
+static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
+						  bool zero_okay,
+						  __sum16 check)
+{
+	if (skb_csum_unnecessary(skb)) {
+		return false;
+	} else if (zero_okay && !check) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		return false;
+	}
+
+	return true;
+}
+
+/* For small packets <= CHECKSUM_BREAK peform checksum complete directly
+ * in checksum_init.
+ */
+#define CHECKSUM_BREAK 76
+
+/* Validate (init) checksum based on checksum complete.
+ *
+ * Return values:
+ *   0: checksum is validated or try to in skb_checksum_complete. In the latter
+ *	case the ip_summed will not be CHECKSUM_UNNECESSARY and the pseudo
+ *	checksum is stored in skb->csum for use in __skb_checksum_complete
+ *   non-zero: value of invalid checksum
+ *
+ */
+static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
+						       bool complete,
+						       __wsum psum)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		if (!csum_fold(csum_add(psum, skb->csum))) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			return 0;
+		}
+	}
+
+	skb->csum = psum;
+
+	if (complete || skb->len <= CHECKSUM_BREAK)
+		return __skb_checksum_complete(skb);
+
+	return 0;
+}
+
+static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
+{
+	return 0;
+}
+
+/* Perform checksum validate (init). Note that this is a macro since we only
+ * want to calculate the pseudo header which is an input function if necessary.
+ * First we try to validate without any computation (checksum unnecessary) and
+ * then calculate based on checksum complete calling the function to compute
+ * pseudo header.
+ *
+ * Return values:
+ *   0: checksum is validated or try to in skb_checksum_complete
+ *   non-zero: value of invalid checksum
+ */
+#define __skb_checksum_validate(skb, proto, complete,			\
+				zero_okay, check, compute_pseudo)	\
+({									\
+	__sum16 __ret = 0;						\
+	if (__skb_checksum_validate_needed(skb, zero_okay, check))	\
+		__ret = __skb_checksum_validate_complete(skb,		\
+				complete, compute_pseudo(skb, proto));	\
+	__ret;								\
+})
+
+#define skb_checksum_init(skb, proto, compute_pseudo)			\
+	__skb_checksum_validate(skb, proto, false, false, 0, compute_pseudo)
+
+#define skb_checksum_init_zero_check(skb, proto, check, compute_pseudo)	\
+	__skb_checksum_validate(skb, proto, false, true, check, compute_pseudo)
+
+#define skb_checksum_validate(skb, proto, compute_pseudo)		\
+	__skb_checksum_validate(skb, proto, true, false, 0, compute_pseudo)
+
+#define skb_checksum_validate_zero_check(skb, proto, check,		\
+					 compute_pseudo)		\
+	__skb_checksum_validate_(skb, proto, true, true, check, compute_pseudo)
+
+#define skb_checksum_simple_validate(skb)				\
+	__skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
-- 
cgit v1.2.3-71-gd317


From ed70fcfcee953a76028bfc3f963d2167c2990020 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 2 May 2014 16:29:38 -0700
Subject: net: Call skb_checksum_init in IPv4

Call skb_checksum_init instead of private functions.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h    |  6 ++++++
 net/ipv4/tcp_ipv4.c | 25 ++-----------------------
 net/ipv4/udp.c      | 19 ++-----------------
 3 files changed, 10 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 3ec2b0fb9d83..1988cefdbb70 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -342,6 +342,12 @@ static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *d
 		__ip_select_ident(iph, dst, more);
 }
 
+static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
+{
+	return csum_tcpudp_nofold(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
+				  skb->len, proto, 0);
+}
+
 /*
  *	Map a multicast IP onto multicast MAC for type ethernet.
  */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 438f3b95143d..ad166dcc278f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1744,28 +1744,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 	return sk;
 }
 
-static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
-{
-	const struct iphdr *iph = ip_hdr(skb);
-
-	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		if (!tcp_v4_check(skb->len, iph->saddr,
-				  iph->daddr, skb->csum)) {
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			return 0;
-		}
-	}
-
-	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-				       skb->len, IPPROTO_TCP, 0);
-
-	if (skb->len <= 76) {
-		return __skb_checksum_complete(skb);
-	}
-	return 0;
-}
-
-
 /* The socket must have it's spinlock held when we get
  * here.
  *
@@ -1960,7 +1938,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	 * Packet length and doff are validated by header prediction,
 	 * provided case of th->doff==0 is eliminated.
 	 * So, we defer the checks. */
-	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
+
+	if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
 		goto csum_error;
 
 	th = tcp_hdr(skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4468e1adc094..f2d05d7be743 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1672,7 +1672,6 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
 				 int proto)
 {
-	const struct iphdr *iph;
 	int err;
 
 	UDP_SKB_CB(skb)->partial_cov = 0;
@@ -1684,22 +1683,8 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
 			return err;
 	}
 
-	iph = ip_hdr(skb);
-	if (uh->check == 0) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
-				      proto, skb->csum))
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-	}
-	if (!skb_csum_unnecessary(skb))
-		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-					       skb->len, proto, 0);
-	/* Probably, we should checksum udp header (it should be in cache
-	 * in any case) and data in tiny packets (< rx copybreak).
-	 */
-
-	return 0;
+	return skb_checksum_init_zero_check(skb, proto, uh->check,
+					    inet_compute_pseudo);
 }
 
 /*
-- 
cgit v1.2.3-71-gd317


From e4f45b7f40bdaade5ef8f45e7c6daed4c909fdf5 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 2 May 2014 16:29:51 -0700
Subject: net: Call skb_checksum_init in IPv6

Call skb_checksum_init instead of private functions.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_checksum.h |  7 +++++++
 net/ipv6/ip6_checksum.c    | 11 +----------
 net/ipv6/tcp_ipv6.c        | 21 +--------------------
 3 files changed, 9 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h
index 9e3c540c1b11..8ac5c21f8456 100644
--- a/include/net/ip6_checksum.h
+++ b/include/net/ip6_checksum.h
@@ -41,6 +41,13 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 			__wsum csum);
 #endif
 
+static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto)
+{
+	return ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+					    &ipv6_hdr(skb)->daddr,
+					    skb->len, proto, 0));
+}
+
 static __inline__ __sum16 tcp_v6_check(int len,
 				   const struct in6_addr *saddr,
 				   const struct in6_addr *daddr,
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index ee7a97f510cb..c69fe37b8104 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -84,16 +84,7 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
 			       &ipv6_hdr(skb)->daddr, ntohs(uh->dest));
 		return 1;
 	}
-	if (skb->ip_summed == CHECKSUM_COMPLETE &&
-	    !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
-			     skb->len, proto, skb->csum))
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	if (!skb_csum_unnecessary(skb))
-		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-							 &ipv6_hdr(skb)->daddr,
-							 skb->len, proto, 0));
-
-	return 0;
+	return skb_checksum_init(skb, IPPROTO_UDP, ip6_compute_pseudo);
 }
 EXPORT_SYMBOL(udp6_csum_init);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e289830ed6e3..7fa67439f4d6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1294,25 +1294,6 @@ out:
 	return NULL;
 }
 
-static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
-{
-	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
-				  &ipv6_hdr(skb)->daddr, skb->csum)) {
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			return 0;
-		}
-	}
-
-	skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
-					      &ipv6_hdr(skb)->saddr,
-					      &ipv6_hdr(skb)->daddr, 0));
-
-	if (skb->len <= 76)
-		return __skb_checksum_complete(skb);
-	return 0;
-}
-
 /* The socket must have it's spinlock held when we get
  * here.
  *
@@ -1486,7 +1467,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, th->doff*4))
 		goto discard_it;
 
-	if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
+	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
 		goto csum_error;
 
 	th = tcp_hdr(skb);
-- 
cgit v1.2.3-71-gd317


From 86aae6c7b577654b7293374973985a153e0c147e Mon Sep 17 00:00:00 2001
From: Libor Pechacek <lpechacek@suse.cz>
Date: Tue, 29 Apr 2014 20:38:34 +0200
Subject: Bluetooth: Convert RFCOMM spinlocks into mutexes

Enabling CONFIG_DEBUG_ATOMIC_SLEEP has shown that some rfcomm functions
acquiring spinlocks call sleeping locks further in the chain.  Converting
the offending spinlocks into mutexes makes sleeping safe.

Signed-off-by: Libor Pechacek <lpechacek@suse.cz>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/rfcomm.h |  6 +++---
 net/bluetooth/rfcomm/core.c    |  2 +-
 net/bluetooth/rfcomm/tty.c     | 20 ++++++++++----------
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 2611cc389d7d..578b83127af1 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -173,7 +173,7 @@ struct rfcomm_dlc {
 	struct sk_buff_head   tx_queue;
 	struct timer_list     timer;
 
-	spinlock_t    lock;
+	struct mutex  lock;
 	unsigned long state;
 	unsigned long flags;
 	atomic_t      refcnt;
@@ -244,8 +244,8 @@ int  rfcomm_dlc_get_modem_status(struct rfcomm_dlc *d, u8 *v24_sig);
 void rfcomm_dlc_accept(struct rfcomm_dlc *d);
 struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel);
 
-#define rfcomm_dlc_lock(d)     spin_lock(&d->lock)
-#define rfcomm_dlc_unlock(d)   spin_unlock(&d->lock)
+#define rfcomm_dlc_lock(d)     mutex_lock(&d->lock)
+#define rfcomm_dlc_unlock(d)   mutex_unlock(&d->lock)
 
 static inline void rfcomm_dlc_hold(struct rfcomm_dlc *d)
 {
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 633cceeb943e..05c609b12a1d 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -307,7 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
 	setup_timer(&d->timer, rfcomm_dlc_timeout, (unsigned long)d);
 
 	skb_queue_head_init(&d->tx_queue);
-	spin_lock_init(&d->lock);
+	mutex_init(&d->lock);
 	atomic_set(&d->refcnt, 1);
 
 	rfcomm_dlc_clear_state(d);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 403ec09f480a..8e385a0ae60e 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -70,7 +70,7 @@ struct rfcomm_dev {
 };
 
 static LIST_HEAD(rfcomm_dev_list);
-static DEFINE_SPINLOCK(rfcomm_dev_lock);
+static DEFINE_MUTEX(rfcomm_dev_lock);
 
 static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb);
 static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err);
@@ -96,9 +96,9 @@ static void rfcomm_dev_destruct(struct tty_port *port)
 	if (dev->tty_dev)
 		tty_unregister_device(rfcomm_tty_driver, dev->id);
 
-	spin_lock(&rfcomm_dev_lock);
+	mutex_lock(&rfcomm_dev_lock);
 	list_del(&dev->list);
-	spin_unlock(&rfcomm_dev_lock);
+	mutex_unlock(&rfcomm_dev_lock);
 
 	kfree(dev);
 
@@ -161,14 +161,14 @@ static struct rfcomm_dev *rfcomm_dev_get(int id)
 {
 	struct rfcomm_dev *dev;
 
-	spin_lock(&rfcomm_dev_lock);
+	mutex_lock(&rfcomm_dev_lock);
 
 	dev = __rfcomm_dev_lookup(id);
 
 	if (dev && !tty_port_get(&dev->port))
 		dev = NULL;
 
-	spin_unlock(&rfcomm_dev_lock);
+	mutex_unlock(&rfcomm_dev_lock);
 
 	return dev;
 }
@@ -224,7 +224,7 @@ static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req,
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
 
-	spin_lock(&rfcomm_dev_lock);
+	mutex_lock(&rfcomm_dev_lock);
 
 	if (req->dev_id < 0) {
 		dev->id = 0;
@@ -305,11 +305,11 @@ static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req,
 	   holds reference to this module. */
 	__module_get(THIS_MODULE);
 
-	spin_unlock(&rfcomm_dev_lock);
+	mutex_unlock(&rfcomm_dev_lock);
 	return dev;
 
 out:
-	spin_unlock(&rfcomm_dev_lock);
+	mutex_unlock(&rfcomm_dev_lock);
 	kfree(dev);
 	return ERR_PTR(err);
 }
@@ -524,7 +524,7 @@ static int rfcomm_get_dev_list(void __user *arg)
 
 	di = dl->dev_info;
 
-	spin_lock(&rfcomm_dev_lock);
+	mutex_lock(&rfcomm_dev_lock);
 
 	list_for_each_entry(dev, &rfcomm_dev_list, list) {
 		if (!tty_port_get(&dev->port))
@@ -540,7 +540,7 @@ static int rfcomm_get_dev_list(void __user *arg)
 			break;
 	}
 
-	spin_unlock(&rfcomm_dev_lock);
+	mutex_unlock(&rfcomm_dev_lock);
 
 	dl->dev_num = n;
 	size = sizeof(*dl) + n * sizeof(*di);
-- 
cgit v1.2.3-71-gd317


From bdffd893a0e9c431304142d12d9a0a21d365c502 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 29 Apr 2014 14:17:40 -0500
Subject: tracing: Replace __get_cpu_var uses with this_cpu_ptr

Replace uses of &__get_cpu_var for address calculation with this_cpu_ptr.

Link: http://lkml.kernel.org/p/alpine.DEB.2.10.1404291415560.18364@gentwo.org

Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kprobes.h | 2 +-
 kernel/trace/ftrace.c   | 4 ++--
 kernel/trace/trace.c    | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 925eaf28fca9..7bd2ad01e39c 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -355,7 +355,7 @@ static inline void reset_current_kprobe(void)
 
 static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void)
 {
-	return (&__get_cpu_var(kprobe_ctlblk));
+	return this_cpu_ptr(&kprobe_ctlblk);
 }
 
 int register_kprobe(struct kprobe *p);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9eb1aa03a18d..38e5cf73b9ae 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -822,7 +822,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,
 
 	local_irq_save(flags);
 
-	stat = &__get_cpu_var(ftrace_profile_stats);
+	stat = this_cpu_ptr(&ftrace_profile_stats);
 	if (!stat->hash || !ftrace_profile_enabled)
 		goto out;
 
@@ -853,7 +853,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	stat = &__get_cpu_var(ftrace_profile_stats);
+	stat = this_cpu_ptr(&ftrace_profile_stats);
 	if (!stat->hash || !ftrace_profile_enabled)
 		goto out;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4c392c8238bf..05431696b10c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1726,7 +1726,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 	 */
 	barrier();
 	if (use_stack == 1) {
-		trace.entries		= &__get_cpu_var(ftrace_stack).calls[0];
+		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
 
 		if (regs)
-- 
cgit v1.2.3-71-gd317


From 59af6928d2099479c0bc2ef3f66cc7b33998120a Mon Sep 17 00:00:00 2001
From: Michal Kazior <michal.kazior@tieto.com>
Date: Wed, 9 Apr 2014 15:10:59 +0200
Subject: mac80211: fix CSA tx queue stopping

It was possible for tx queues to be stuck stopped
if AP CSA finalization failed. In that case
neither stop_ap nor do_stop woke the queues up.
This means it was impossible to perform tx at all
until driver was reloaded or a successful CSA was
performed later.

It was possible to solve this in a simpler manner
however this is more robust and future proof
(having multi-vif CSA in mind).

New sdata->csa_block_tx is introduced to keep
track of which interfaces requested tx to be
blocked for CSA. This is required because mac80211
stops all tx queues for that purpose. This means
queues must be awoken only when last tx-blocking
CSA interface is finished.

It is still possible to have tx queues stopped
after CSA failure but as soon as offending
interfaces are stopped from userspace (stop_ap or
ifdown) tx queues are woken up properly.

Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |  4 ++-
 net/mac80211/cfg.c         | 77 ++++++++++++++++++++++++++++++++++++++--------
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/iface.c       |  7 +++++
 net/mac80211/mlme.c        | 37 ++++++++++++++++------
 5 files changed, 104 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index bdb4a7cbab31..3541c48a97cd 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1113,7 +1113,9 @@ enum ieee80211_vif_flags {
  * @addr: address of this interface
  * @p2p: indicates whether this AP or STA interface is a p2p
  *	interface, i.e. a GO or p2p-sta respectively
- * @csa_active: marks whether a channel switch is going on
+ * @csa_active: marks whether a channel switch is going on. Internally it is
+ *	write-protected by sdata_lock and local->mtx so holding either is fine
+ *	for read access.
  * @driver_flags: flags/capabilities the driver has for this interface,
  *	these need to be set (or cleared) when the interface is added
  *	or, if supported by the driver, the interface type is changed
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 19a7e6ff45d3..f789c3198af4 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1084,6 +1084,31 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
 	return 0;
 }
 
+bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	lockdep_assert_held(&local->mtx);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+		if (!ieee80211_sdata_running(sdata))
+			continue;
+
+		if (!sdata->vif.csa_active)
+			continue;
+
+		if (!sdata->csa_block_tx)
+			continue;
+
+		rcu_read_unlock();
+		return true;
+	}
+	rcu_read_unlock();
+
+	return false;
+}
+
 static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1101,7 +1126,14 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
 	old_probe_resp = sdata_dereference(sdata->u.ap.probe_resp, sdata);
 
 	/* abort any running channel switch */
+	mutex_lock(&local->mtx);
 	sdata->vif.csa_active = false;
+	if (!ieee80211_csa_needs_block_tx(local))
+		ieee80211_wake_queues_by_reason(&local->hw,
+					IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
+	mutex_unlock(&local->mtx);
+
 	kfree(sdata->u.ap.next_beacon);
 	sdata->u.ap.next_beacon = NULL;
 
@@ -3027,11 +3059,10 @@ static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
 	int err, changed = 0;
 
 	sdata_assert_lock(sdata);
+	lockdep_assert_held(&local->mtx);
 
-	mutex_lock(&local->mtx);
 	sdata->radar_required = sdata->csa_radar_required;
 	err = ieee80211_vif_change_channel(sdata, &changed);
-	mutex_unlock(&local->mtx);
 	if (WARN_ON(err < 0))
 		return;
 
@@ -3072,11 +3103,12 @@ static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
 
 	ieee80211_bss_info_change_notify(sdata, changed);
 
-	ieee80211_wake_queues_by_reason(&sdata->local->hw,
+	cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef);
+
+	if (!ieee80211_csa_needs_block_tx(local))
+		ieee80211_wake_queues_by_reason(&local->hw,
 					IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_CSA);
-
-	cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef);
 }
 
 void ieee80211_csa_finalize_work(struct work_struct *work)
@@ -3084,8 +3116,11 @@ void ieee80211_csa_finalize_work(struct work_struct *work)
 	struct ieee80211_sub_if_data *sdata =
 		container_of(work, struct ieee80211_sub_if_data,
 			     csa_finalize_work);
+	struct ieee80211_local *local = sdata->local;
 
 	sdata_lock(sdata);
+	mutex_lock(&local->mtx);
+
 	/* AP might have been stopped while waiting for the lock. */
 	if (!sdata->vif.csa_active)
 		goto unlock;
@@ -3096,6 +3131,7 @@ void ieee80211_csa_finalize_work(struct work_struct *work)
 	ieee80211_csa_finalize(sdata);
 
 unlock:
+	mutex_unlock(&local->mtx);
 	sdata_unlock(sdata);
 }
 
@@ -3222,8 +3258,8 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
 	return 0;
 }
 
-int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
-			     struct cfg80211_csa_settings *params)
+int __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
+			       struct cfg80211_csa_settings *params)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
@@ -3232,6 +3268,7 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
 	int err, num_chanctx, changed = 0;
 
 	sdata_assert_lock(sdata);
+	lockdep_assert_held(&local->mtx);
 
 	if (!list_empty(&local->roc_list) || local->scanning)
 		return -EBUSY;
@@ -3274,15 +3311,15 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
 		return err;
 
 	sdata->csa_radar_required = params->radar_required;
-
-	if (params->block_tx)
-		ieee80211_stop_queues_by_reason(&local->hw,
-				IEEE80211_MAX_QUEUE_MAP,
-				IEEE80211_QUEUE_STOP_REASON_CSA);
-
 	sdata->csa_chandef = params->chandef;
+	sdata->csa_block_tx = params->block_tx;
 	sdata->vif.csa_active = true;
 
+	if (sdata->csa_block_tx)
+		ieee80211_stop_queues_by_reason(&local->hw,
+					IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
+
 	if (changed) {
 		ieee80211_bss_info_change_notify(sdata, changed);
 		drv_channel_switch_beacon(sdata, &params->chandef);
@@ -3294,6 +3331,20 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
 	return 0;
 }
 
+int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
+			     struct cfg80211_csa_settings *params)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+	int err;
+
+	mutex_lock(&local->mtx);
+	err = __ieee80211_channel_switch(wiphy, dev, params);
+	mutex_unlock(&local->mtx);
+
+	return err;
+}
+
 static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 			     struct cfg80211_mgmt_tx_params *params,
 			     u64 *cookie)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f86d06aaf54b..f4ba0c4a4314 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -756,6 +756,7 @@ struct ieee80211_sub_if_data {
 	int csa_counter_offset_beacon;
 	int csa_counter_offset_presp;
 	bool csa_radar_required;
+	bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */
 	struct cfg80211_chan_def csa_chandef;
 
 	struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */
@@ -1472,6 +1473,7 @@ void ieee80211_sw_roc_work(struct work_struct *work);
 void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc);
 
 /* channel switch handling */
+bool ieee80211_csa_needs_block_tx(struct ieee80211_local *local);
 void ieee80211_csa_finalize_work(struct work_struct *work);
 int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
 			     struct cfg80211_csa_settings *params);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7fff3dcaac43..79fc98815da8 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -838,8 +838,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 
 	cancel_work_sync(&sdata->recalc_smps);
 	sdata_lock(sdata);
+	mutex_lock(&local->mtx);
 	sdata->vif.csa_active = false;
+	if (!ieee80211_csa_needs_block_tx(local))
+		ieee80211_wake_queues_by_reason(&local->hw,
+					IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
+	mutex_unlock(&local->mtx);
 	sdata_unlock(sdata);
+
 	cancel_work_sync(&sdata->csa_finalize_work);
 
 	cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 488826f188a7..d68e73cbdcd6 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -975,16 +975,20 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 	/* XXX: shouldn't really modify cfg80211-owned data! */
 	ifmgd->associated->channel = sdata->csa_chandef.chan;
 
+	ieee80211_bss_info_change_notify(sdata, changed);
+
+	mutex_lock(&local->mtx);
+	sdata->vif.csa_active = false;
 	/* XXX: wait for a beacon first? */
-	ieee80211_wake_queues_by_reason(&local->hw,
+	if (!ieee80211_csa_needs_block_tx(local))
+		ieee80211_wake_queues_by_reason(&local->hw,
 					IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_CSA);
+	mutex_unlock(&local->mtx);
 
-	ieee80211_bss_info_change_notify(sdata, changed);
-
- out:
-	sdata->vif.csa_active = false;
 	ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
+
+out:
 	sdata_unlock(sdata);
 }
 
@@ -1100,12 +1104,16 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	mutex_unlock(&local->chanctx_mtx);
 
 	sdata->csa_chandef = csa_ie.chandef;
+
+	mutex_lock(&local->mtx);
 	sdata->vif.csa_active = true;
+	sdata->csa_block_tx = csa_ie.mode;
 
-	if (csa_ie.mode)
+	if (sdata->csa_block_tx)
 		ieee80211_stop_queues_by_reason(&local->hw,
-				IEEE80211_MAX_QUEUE_MAP,
-				IEEE80211_QUEUE_STOP_REASON_CSA);
+					IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
+	mutex_unlock(&local->mtx);
 
 	if (local->ops->channel_switch) {
 		/* use driver's channel switch callback */
@@ -1817,6 +1825,12 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	ifmgd->flags = 0;
 	mutex_lock(&local->mtx);
 	ieee80211_vif_release_channel(sdata);
+
+	sdata->vif.csa_active = false;
+	if (!ieee80211_csa_needs_block_tx(local))
+		ieee80211_wake_queues_by_reason(&local->hw,
+					IEEE80211_MAX_QUEUE_MAP,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
 	mutex_unlock(&local->mtx);
 
 	sdata->encrypt_headroom = IEEE80211_ENCRYPT_HEADROOM;
@@ -2045,6 +2059,7 @@ EXPORT_SYMBOL(ieee80211_ap_probereq_get);
 
 static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 {
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
 
@@ -2058,10 +2073,14 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 			       WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
 			       true, frame_buf);
 	ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
+
+	mutex_lock(&local->mtx);
 	sdata->vif.csa_active = false;
-	ieee80211_wake_queues_by_reason(&sdata->local->hw,
+	if (!ieee80211_csa_needs_block_tx(local))
+		ieee80211_wake_queues_by_reason(&local->hw,
 					IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_CSA);
+	mutex_unlock(&local->mtx);
 
 	cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf,
 			      IEEE80211_DEAUTH_FRAME_LEN);
-- 
cgit v1.2.3-71-gd317


From f04c22033c25f71617ac62bcfe75698baa17a0b8 Mon Sep 17 00:00:00 2001
From: Michal Kazior <michal.kazior@tieto.com>
Date: Wed, 9 Apr 2014 15:11:01 +0200
Subject: cfg80211: export interface stopping function

This exports a new cfg80211_stop_iface() function.

This is intended for driver internal interface
combination management and channel switching.

Due to locking issues (it re-enters driver) the
call is asynchronous and uses cfg80211 event
list/worker.

Signed-off-by: Michal Kazior <michal.kazior@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 15 +++++++++++++++
 net/wireless/ap.c      |  4 ++--
 net/wireless/core.c    | 43 ++++++++++++++++++++++++++++++++++++-------
 net/wireless/core.h    |  7 +++++++
 net/wireless/mesh.c    |  4 ++--
 net/wireless/trace.h   | 15 +++++++++++++++
 net/wireless/util.c    |  3 +++
 7 files changed, 80 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7eae46ccec01..0631230b01eb 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4756,6 +4756,21 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
 					    void *data),
 			       void *data);
 
+/*
+ * cfg80211_stop_iface - trigger interface disconnection
+ *
+ * @wiphy: the wiphy
+ * @wdev: wireless device
+ * @gfp: context flags
+ *
+ * Trigger interface to be stopped as if AP was stopped, IBSS/mesh left, STA
+ * disconnected.
+ *
+ * Note: This doesn't need any locks and is asynchronous.
+ */
+void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
+			 gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 3e02ade508d8..bdad1f951561 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -6,8 +6,8 @@
 #include "rdev-ops.h"
 
 
-static int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
-			      struct net_device *dev, bool notify)
+int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev, bool notify)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index f509da4d9be9..7e023b74f009 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -792,23 +792,23 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
 		rdev->num_running_monitor_ifaces += num;
 }
 
-void cfg80211_leave(struct cfg80211_registered_device *rdev,
-		    struct wireless_dev *wdev)
+void __cfg80211_leave(struct cfg80211_registered_device *rdev,
+		      struct wireless_dev *wdev)
 {
 	struct net_device *dev = wdev->netdev;
 
 	ASSERT_RTNL();
+	ASSERT_WDEV_LOCK(wdev);
 
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_ADHOC:
-		cfg80211_leave_ibss(rdev, dev, true);
+		__cfg80211_leave_ibss(rdev, dev, true);
 		break;
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_STATION:
 		if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev)
 			__cfg80211_stop_sched_scan(rdev, false);
 
-		wdev_lock(wdev);
 #ifdef CONFIG_CFG80211_WEXT
 		kfree(wdev->wext.ie);
 		wdev->wext.ie = NULL;
@@ -817,20 +817,49 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev,
 #endif
 		cfg80211_disconnect(rdev, dev,
 				    WLAN_REASON_DEAUTH_LEAVING, true);
-		wdev_unlock(wdev);
 		break;
 	case NL80211_IFTYPE_MESH_POINT:
-		cfg80211_leave_mesh(rdev, dev);
+		__cfg80211_leave_mesh(rdev, dev);
 		break;
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_P2P_GO:
-		cfg80211_stop_ap(rdev, dev, true);
+		__cfg80211_stop_ap(rdev, dev, true);
 		break;
 	default:
 		break;
 	}
 }
 
+void cfg80211_leave(struct cfg80211_registered_device *rdev,
+		    struct wireless_dev *wdev)
+{
+	wdev_lock(wdev);
+	__cfg80211_leave(rdev, wdev);
+	wdev_unlock(wdev);
+}
+
+void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
+			 gfp_t gfp)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+	struct cfg80211_event *ev;
+	unsigned long flags;
+
+	trace_cfg80211_stop_iface(wiphy, wdev);
+
+	ev = kzalloc(sizeof(*ev), gfp);
+	if (!ev)
+		return;
+
+	ev->type = EVENT_STOPPED;
+
+	spin_lock_irqsave(&wdev->event_lock, flags);
+	list_add_tail(&ev->list, &wdev->event_list);
+	spin_unlock_irqrestore(&wdev->event_lock, flags);
+	queue_work(cfg80211_wq, &rdev->event_work);
+}
+EXPORT_SYMBOL(cfg80211_stop_iface);
+
 static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 					 unsigned long state, void *ptr)
 {
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 681b8fa4355b..e9afbf10e756 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -185,6 +185,7 @@ enum cfg80211_event_type {
 	EVENT_ROAMED,
 	EVENT_DISCONNECTED,
 	EVENT_IBSS_JOINED,
+	EVENT_STOPPED,
 };
 
 struct cfg80211_event {
@@ -281,6 +282,8 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev,
 		       struct mesh_setup *setup,
 		       const struct mesh_config *conf);
+int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+			  struct net_device *dev);
 int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
 			struct net_device *dev);
 int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
@@ -288,6 +291,8 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
 			      struct cfg80211_chan_def *chandef);
 
 /* AP */
+int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev, bool notify);
 int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
 		     struct net_device *dev, bool notify);
 
@@ -441,6 +446,8 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
 void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
 			       enum nl80211_iftype iftype, int num);
 
+void __cfg80211_leave(struct cfg80211_registered_device *rdev,
+		      struct wireless_dev *wdev);
 void cfg80211_leave(struct cfg80211_registered_device *rdev,
 		    struct wireless_dev *wdev);
 
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 3ddfb7cd335e..092300b30c37 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -238,8 +238,8 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
 	return 0;
 }
 
-static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
-				 struct net_device *dev)
+int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
+			  struct net_device *dev)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index f3c13ff4d04c..cdfbb00e1b37 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2636,6 +2636,21 @@ TRACE_EVENT(cfg80211_ft_event,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap))
 );
 
+TRACE_EVENT(cfg80211_stop_iface,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+	TP_ARGS(wiphy, wdev),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT,
+		  WIPHY_PR_ARG, WDEV_PR_ARG)
+);
+
 #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7c47fa07b276..a756429b3a0a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -839,6 +839,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
 			__cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid,
 					       ev->ij.channel);
 			break;
+		case EVENT_STOPPED:
+			__cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev);
+			break;
 		}
 		wdev_unlock(wdev);
 
-- 
cgit v1.2.3-71-gd317


From e7c24607b5d68a4cdc56e09d70a3c8bae5f0519f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 10 Apr 2014 20:54:51 -0400
Subject: kill iov_iter_copy_from_user()

all callers can use copy_page_from_iter() and it actually simplifies
them.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ceph/file.c         |  3 +--
 fs/cifs/file.c         |  7 +++----
 include/linux/uio.h    |  2 --
 mm/iov_iter.c          | 27 ---------------------------
 mm/process_vm_access.c |  6 +-----
 5 files changed, 5 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 88a6df4cbe6d..ef9115e4a6fa 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -737,13 +737,12 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
 		left = len;
 		for (n = 0; n < num_pages; n++) {
 			size_t plen = min_t(size_t, left, PAGE_SIZE);
-			ret = iov_iter_copy_from_user(pages[n], &i, 0, plen);
+			ret = copy_page_from_iter(pages[n], 0, plen, &i);
 			if (ret != plen) {
 				ret = -EFAULT;
 				break;
 			}
 			left -= ret;
-			iov_iter_advance(&i, ret);
 		}
 
 		if (ret < 0) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5ed03e0b8b40..2900d150654e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2444,11 +2444,10 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
 
 		save_len = cur_len;
 		for (i = 0; i < nr_pages; i++) {
-			bytes = min_t(const size_t, cur_len, PAGE_SIZE);
-			copied = iov_iter_copy_from_user(wdata->pages[i], &it,
-							 0, bytes);
+			bytes = min_t(size_t, cur_len, PAGE_SIZE);
+			copied = copy_page_from_iter(wdata->pages[i], 0, bytes,
+						     &it);
 			cur_len -= copied;
-			iov_iter_advance(&it, copied);
 			/*
 			 * If we didn't copy as much as we expected, then that
 			 * may mean we trod into an unmapped area. Stop copying
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 199bcc34241b..abbe83ded630 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -62,8 +62,6 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to);
 
 size_t iov_iter_copy_from_user_atomic(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes);
-size_t iov_iter_copy_from_user(struct page *page,
-		struct iov_iter *i, unsigned long offset, size_t bytes);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 10e46cd721de..22ec1ef068a8 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -129,33 +129,6 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 }
 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
-/*
- * This has the same sideeffects and return value as
- * iov_iter_copy_from_user_atomic().
- * The difference is that it attempts to resolve faults.
- * Page must not be locked.
- */
-size_t iov_iter_copy_from_user(struct page *page,
-		struct iov_iter *i, unsigned long offset, size_t bytes)
-{
-	char *kaddr;
-	size_t copied;
-
-	kaddr = kmap(page);
-	if (likely(i->nr_segs == 1)) {
-		int left;
-		char __user *buf = i->iov->iov_base + i->iov_offset;
-		left = __copy_from_user(kaddr + offset, buf, bytes);
-		copied = bytes - left;
-	} else {
-		copied = __iovec_copy_from_user_inatomic(kaddr + offset,
-						i->iov, i->iov_offset, bytes);
-	}
-	kunmap(page);
-	return copied;
-}
-EXPORT_SYMBOL(iov_iter_copy_from_user);
-
 void iov_iter_advance(struct iov_iter *i, size_t bytes)
 {
 	BUG_ON(i->count < bytes);
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 8505c9262b35..f32b1fbbfe69 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -46,11 +46,7 @@ static int process_vm_rw_pages(struct page **pages,
 			copy = len;
 
 		if (vm_write) {
-			if (copy > iov_iter_count(iter))
-				copy = iov_iter_count(iter);
-			copied = iov_iter_copy_from_user(page, iter,
-					offset, copy);
-			iov_iter_advance(iter, copied);
+			copied = copy_page_from_iter(page, offset, copy, iter);
 			set_page_dirty_lock(page);
 		} else {
 			copied = copy_page_to_iter(page, offset, copy, iter);
-- 
cgit v1.2.3-71-gd317


From f8579f8673b7ecdb7a81d5d5bb1d981093d9aa94 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 3 Mar 2014 22:03:20 -0500
Subject: generic_file_direct_write(): switch to iov_iter

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/file.c    |  6 ++----
 fs/fuse/file.c     |  6 ++----
 fs/ocfs2/file.c    |  6 +++---
 fs/xfs/xfs_file.c  |  5 +++--
 include/linux/fs.h |  4 ++--
 mm/filemap.c       | 15 +++++++--------
 6 files changed, 19 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ae6af072b635..9fe20c2052af 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1669,15 +1669,13 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
 	loff_t endbyte;
 	int err;
 
-	written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
-					    count, ocount);
+	iov_iter_init(&i, iov, nr_segs, count, 0);
+	written = generic_file_direct_write(iocb, &i, pos, count, ocount);
 
 	if (written < 0 || written == count)
 		return written;
 
 	pos += written;
-	count -= written;
-	iov_iter_init(&i, iov, nr_segs, count, written);
 	written_buffered = __btrfs_buffered_write(file, &i, pos);
 	if (written_buffered < 0) {
 		err = written_buffered;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 96d513e01a5d..126deb5d0a9c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1235,15 +1235,13 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		goto out;
 
 	if (file->f_flags & O_DIRECT) {
-		written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 
-						    count, ocount);
+		iov_iter_init(&i, iov, nr_segs, count, 0);
+		written = generic_file_direct_write(iocb, &i, pos, count, ocount);
 		if (written < 0 || written == count)
 			goto out;
 
 		pos += written;
-		count -= written;
 
-		iov_iter_init(&i, iov, nr_segs, count, written);
 		written_buffered = fuse_perform_write(file, mapping, &i, pos);
 		if (written_buffered < 0) {
 			err = written_buffered;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8970dcf74de5..d6d78c2aa96e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2251,6 +2251,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 	int full_coherency = !(osb->s_mount_opt &
 			       OCFS2_MOUNT_COHERENCY_BUFFERED);
 	int unaligned_dio = 0;
+	struct iov_iter from;
 
 	trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
 		(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2365,16 +2366,15 @@ relock:
 	if (ret)
 		goto out_dio;
 
+	iov_iter_init(&from, iov, nr_segs, count, 0);
 	if (direct_io) {
-		written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
+		written = generic_file_direct_write(iocb, &from, *ppos,
 						    count, ocount);
 		if (written < 0) {
 			ret = written;
 			goto out_dio;
 		}
 	} else {
-		struct iov_iter from;
-		iov_iter_init(&from, iov, nr_segs, count, 0);
 		current->backing_dev_info = file->f_mapping->backing_dev_info;
 		written = generic_perform_write(file, &from, *ppos);
 		if (likely(written >= 0))
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 951a2321ee01..8617497867c7 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -641,6 +641,7 @@ xfs_file_dio_aio_write(
 	int			iolock;
 	struct xfs_buftarg	*target = XFS_IS_REALTIME_INODE(ip) ?
 					mp->m_rtdev_targp : mp->m_ddev_targp;
+	struct iov_iter		from;
 
 	/* DIO must be aligned to device logical sector size */
 	if ((pos | count) & target->bt_logical_sectormask)
@@ -698,8 +699,8 @@ xfs_file_dio_aio_write(
 	}
 
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-	ret = generic_file_direct_write(iocb, iovp,
-			&nr_segs, pos, count, ocount);
+	iov_iter_init(&from, iovp, nr_segs, count, 0);
+	ret = generic_file_direct_write(iocb, &from, pos, count, ocount);
 
 out:
 	xfs_rw_iunlock(ip, iolock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 878031227c57..262f96e579b8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2407,8 +2407,8 @@ int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isbl
 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
-		unsigned long *, loff_t, size_t, size_t);
+extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *,
+		loff_t, size_t, size_t);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
diff --git a/mm/filemap.c b/mm/filemap.c
index 000a220e2a41..a840890ed39f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2385,9 +2385,8 @@ int pagecache_write_end(struct file *file, struct address_space *mapping,
 EXPORT_SYMBOL(pagecache_write_end);
 
 ssize_t
-generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long *nr_segs, loff_t pos,
-		size_t count, size_t ocount)
+generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
+		loff_t pos, size_t count, size_t ocount)
 {
 	struct file	*file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -2397,9 +2396,9 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	pgoff_t		end;
 
 	if (count != ocount)
-		*nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count);
+		from->nr_segs = iov_shorten((struct iovec *)from->iov, from->nr_segs, count);
 
-	write_len = iov_length(iov, *nr_segs);
+	write_len = iov_length(from->iov, from->nr_segs);
 	end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
 
 	written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
@@ -2426,7 +2425,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 		}
 	}
 
-	written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs);
+	written = mapping->a_ops->direct_IO(WRITE, iocb, from->iov, pos, from->nr_segs);
 
 	/*
 	 * Finally, try again to invalidate clean pages which might have been
@@ -2443,6 +2442,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 
 	if (written > 0) {
 		pos += written;
+		iov_iter_advance(from, written);
 		if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
 			i_size_write(inode, pos);
 			mark_inode_dirty(inode);
@@ -2645,11 +2645,10 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (unlikely(file->f_flags & O_DIRECT)) {
 		loff_t endbyte;
 
-		written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos,
+		written = generic_file_direct_write(iocb, &from, pos,
 							count, ocount);
 		if (written < 0 || written == count)
 			goto out;
-		iov_iter_advance(&from, written);
 
 		/*
 		 * direct-io write to a hole: fall through to buffered I/O
-- 
cgit v1.2.3-71-gd317


From cb66a7a1f149ff705fa37cad6d1252b046e0ad4f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 4 Mar 2014 15:24:06 -0500
Subject: kill generic_segment_checks()

all callers of ->aio_read() and ->aio_write() have iov/nr_segs already
checked - generic_segment_checks() done after that is just an odd way
to spell iov_length().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/lustre/lustre/llite/file.c | 10 ++----
 fs/btrfs/file.c                            |  7 +---
 fs/ceph/file.c                             | 13 ++------
 fs/fuse/file.c                             |  7 +---
 fs/ntfs/file.c                             |  5 +--
 fs/ocfs2/file.c                            |  7 +---
 fs/xfs/xfs_file.c                          |  9 ++---
 include/linux/fs.h                         |  2 --
 mm/filemap.c                               | 53 ++----------------------------
 mm/shmem.c                                 |  7 ++--
 10 files changed, 16 insertions(+), 104 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index 8e844a6371e0..220bd8390a84 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -1180,9 +1180,7 @@ static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t	     result;
 	int		 refcheck;
 
-	result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-	if (result)
-		return result;
+	count = iov_length(iov, nr_segs);
 
 	env = cl_env_get(&refcheck);
 	if (IS_ERR(env))
@@ -1235,14 +1233,10 @@ static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
 	struct lu_env      *env;
 	struct vvp_io_args *args;
-	size_t	      count = 0;
+	size_t	      count = iov_length(iov, nr_segs);
 	ssize_t	     result;
 	int		 refcheck;
 
-	result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
-	if (result)
-		return result;
-
 	env = cl_env_get(&refcheck);
 	if (IS_ERR(env))
 		return PTR_ERR(env);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1dafe0701daf..a0a94a30d85a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1726,12 +1726,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 
 	mutex_lock(&inode->i_mutex);
 
-	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-	if (err) {
-		mutex_unlock(&inode->i_mutex);
-		goto out;
-	}
-	count = ocount;
+	count = ocount = iov_length(iov, nr_segs);
 
 	current->backing_dev_info = inode->i_mapping->backing_dev_info;
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ef9115e4a6fa..21a56c27b74c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -828,12 +828,8 @@ again:
 		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
 		     ceph_cap_string(got));
 
-		if (!read) {
-			ret = generic_segment_checks(iov, &nr_segs,
-							&len, VERIFY_WRITE);
-			if (ret)
-				goto out;
-		}
+		if (!read)
+			len = iov_length(iov, nr_segs);
 
 		iov_iter_init(&i, iov, nr_segs, len, read);
 
@@ -855,7 +851,6 @@ again:
 
 		ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
 	}
-out:
 	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
 	ceph_put_cap_refs(ci, got);
@@ -911,9 +906,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
 	mutex_lock(&inode->i_mutex);
 
-	err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
-	if (err)
-		goto out;
+	count = iov_length(iov, nr_segs);
 
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = file->f_mapping->backing_dev_info;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 126deb5d0a9c..9c7f346879e7 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1208,12 +1208,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
 	WARN_ON(iocb->ki_pos != pos);
 
-	ocount = 0;
-	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-	if (err)
-		return err;
-
-	count = ocount;
+	count = ocount = iov_length(iov, nr_segs);
 	mutex_lock(&inode->i_mutex);
 
 	/* We can write back this queue in page reclaim */
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index db9bd8a31725..b6fa457d8d01 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2091,10 +2091,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
 	size_t count;		/* after file limit checks */
 	ssize_t written, err;
 
-	count = 0;
-	err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
-	if (err)
-		return err;
+	count = iov_length(iov, nr_segs);
 	pos = *ppos;
 	/* We can write back this queue in page reclaim. */
 	current->backing_dev_info = mapping->backing_dev_info;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index d6d78c2aa96e..d33c4ced0baf 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2355,12 +2355,7 @@ relock:
 	/* communicate with ocfs2_dio_end_io */
 	ocfs2_iocb_set_rw_locked(iocb, rw_level);
 
-	ret = generic_segment_checks(iov, &nr_segs, &ocount,
-				     VERIFY_READ);
-	if (ret)
-		goto out_dio;
-
-	count = ocount;
+	count = ocount = iov_length(iov, nr_segs);
 	ret = generic_write_checks(file, ppos, &count,
 				   S_ISBLK(inode->i_mode));
 	if (ret)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8617497867c7..f0f8084a67be 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -253,9 +253,7 @@ xfs_file_aio_read(
 	if (file->f_mode & FMODE_NOCMTIME)
 		ioflags |= IO_INVIS;
 
-	ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE);
-	if (ret < 0)
-		return ret;
+	size = iov_length(iovp, nr_segs);
 
 	if (unlikely(ioflags & IO_ISDIRECT)) {
 		xfs_buftarg_t	*target =
@@ -777,10 +775,7 @@ xfs_file_aio_write(
 
 	BUG_ON(iocb->ki_pos != pos);
 
-	ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
-	if (ret)
-		return ret;
-
+	ocount = iov_length(iovp, nr_segs);
 	if (ocount == 0)
 		return 0;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 262f96e579b8..796de742fe4a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2412,8 +2412,6 @@ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *,
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
-extern int generic_segment_checks(const struct iovec *iov,
-		unsigned long *nr_segs, size_t *count, int access_flags);
 
 /* fs/block_dev.c */
 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/mm/filemap.c b/mm/filemap.c
index a840890ed39f..7c1417b0bd7b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1663,45 +1663,6 @@ out:
 	return written ? written : error;
 }
 
-/*
- * Performs necessary checks before doing a write
- * @iov:	io vector request
- * @nr_segs:	number of segments in the iovec
- * @count:	number of bytes to write
- * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE
- *
- * Adjust number of segments and amount of bytes to write (nr_segs should be
- * properly initialized first). Returns appropriate error code that caller
- * should return or zero in case that write should be allowed.
- */
-int generic_segment_checks(const struct iovec *iov,
-			unsigned long *nr_segs, size_t *count, int access_flags)
-{
-	unsigned long   seg;
-	size_t cnt = 0;
-	for (seg = 0; seg < *nr_segs; seg++) {
-		const struct iovec *iv = &iov[seg];
-
-		/*
-		 * If any segment has a negative length, or the cumulative
-		 * length ever wraps negative then return -EINVAL.
-		 */
-		cnt += iv->iov_len;
-		if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
-			return -EINVAL;
-		if (access_ok(access_flags, iv->iov_base, iv->iov_len))
-			continue;
-		if (seg == 0)
-			return -EFAULT;
-		*nr_segs = seg;
-		cnt -= iv->iov_len;	/* This segment is no good */
-		break;
-	}
-	*count = cnt;
-	return 0;
-}
-EXPORT_SYMBOL(generic_segment_checks);
-
 /**
  * generic_file_aio_read - generic filesystem read routine
  * @iocb:	kernel I/O control block
@@ -1717,15 +1678,12 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
 	struct file *filp = iocb->ki_filp;
-	ssize_t retval;
+	ssize_t retval = 0;
 	size_t count;
 	loff_t *ppos = &iocb->ki_pos;
 	struct iov_iter i;
 
-	count = 0;
-	retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-	if (retval)
-		return retval;
+	count = iov_length(iov, nr_segs);
 	iov_iter_init(&i, iov, nr_segs, count, 0);
 
 	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
@@ -2615,12 +2573,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t		status;
 	struct iov_iter from;
 
-	ocount = 0;
-	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-	if (err)
-		return err;
-
-	count = ocount;
+	count = ocount = iov_length(iov, nr_segs);
 
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = mapping->backing_dev_info;
diff --git a/mm/shmem.c b/mm/shmem.c
index 9f70e02111c6..2a93e625adaf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1412,14 +1412,11 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb,
 	unsigned long offset;
 	enum sgp_type sgp = SGP_READ;
 	int error = 0;
-	ssize_t retval;
-	size_t count;
+	ssize_t retval = 0;
+	size_t count = iov_length(iov, nr_segs);
 	loff_t *ppos = &iocb->ki_pos;
 	struct iov_iter iter;
 
-	retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-	if (retval)
-		return retval;
 	iov_iter_init(&iter, iov, nr_segs, count, 0);
 
 	/*
-- 
cgit v1.2.3-71-gd317


From d8d3d94b80aa1a1c0ca75c58b8abdc7356f38418 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 4 Mar 2014 21:27:34 -0500
Subject: pass iov_iter to ->direct_IO()

unmodified, for now

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking          |  3 +--
 Documentation/filesystems/vfs.txt          |  3 +--
 drivers/staging/lustre/lustre/llite/rw26.c | 17 ++++++++---------
 fs/9p/vfs_addr.c                           |  5 ++---
 fs/block_dev.c                             |  9 +++++----
 fs/btrfs/inode.c                           | 12 ++++++------
 fs/ceph/addr.c                             |  4 ++--
 fs/cifs/file.c                             |  4 ++--
 fs/exofs/inode.c                           |  2 +-
 fs/ext2/inode.c                            | 11 ++++++-----
 fs/ext3/inode.c                            | 16 +++++++---------
 fs/ext4/inode.c                            | 11 +++++------
 fs/f2fs/data.c                             |  8 ++++----
 fs/fat/inode.c                             | 13 +++++++------
 fs/fuse/file.c                             | 10 +++++-----
 fs/gfs2/aops.c                             | 11 +++++------
 fs/hfs/inode.c                             |  8 ++++----
 fs/hfsplus/inode.c                         |  6 +++---
 fs/jfs/inode.c                             |  8 ++++----
 fs/nfs/direct.c                            |  8 ++++----
 fs/nilfs2/inode.c                          | 10 +++++-----
 fs/ocfs2/aops.c                            |  7 +++----
 fs/reiserfs/inode.c                        |  9 ++++-----
 fs/udf/file.c                              |  4 ++--
 fs/udf/inode.c                             |  8 ++++----
 fs/xfs/xfs_aops.c                          | 15 +++++++--------
 include/linux/fs.h                         |  3 +--
 include/linux/nfs_fs.h                     |  3 +--
 mm/filemap.c                               |  9 ++++-----
 mm/page_io.c                               |  6 ++++--
 30 files changed, 117 insertions(+), 126 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index eba790134253..9b0d5a33c8bf 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -196,8 +196,7 @@ prototypes:
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
-	int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs);
+	int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
 	int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **,
 				unsigned long *);
 	int (*migratepage)(struct address_space *, struct page *, struct page *);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 617f6d70c077..1846374a5add 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -589,8 +589,7 @@ struct address_space_operations {
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
-	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs);
+	ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
 	struct page* (*get_xip_page)(struct address_space *, sector_t,
 			int);
 	/* migrate the contents of a page to the specified target */
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 7e3e0967993b..66e05c6f4d27 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -363,15 +363,14 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
 #define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \
 		      ~(DT_MAX_BRW_SIZE - 1))
 static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
-			       const struct iovec *iov, loff_t file_offset,
-			       unsigned long nr_segs)
+			       struct iov_iter *iter, loff_t file_offset)
 {
 	struct lu_env *env;
 	struct cl_io *io;
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	struct ccc_object *obj = cl_inode2ccc(inode);
-	long count = iov_length(iov, nr_segs);
+	long count = iov_length(iter->iov, iter->nr_segs);
 	long tot_bytes = 0, result = 0;
 	struct ll_inode_info *lli = ll_i2info(inode);
 	unsigned long seg = 0;
@@ -392,9 +391,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 	       MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
 
 	/* Check that all user buffers are aligned as well */
-	for (seg = 0; seg < nr_segs; seg++) {
-		if (((unsigned long)iov[seg].iov_base & ~CFS_PAGE_MASK) ||
-		    (iov[seg].iov_len & ~CFS_PAGE_MASK))
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		if (((unsigned long)iter->iov[seg].iov_base & ~CFS_PAGE_MASK) ||
+		    (iter->iov[seg].iov_len & ~CFS_PAGE_MASK))
 			return -EINVAL;
 	}
 
@@ -411,9 +410,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 		mutex_lock(&inode->i_mutex);
 
 	LASSERT(obj->cob_transient_pages == 0);
-	for (seg = 0; seg < nr_segs; seg++) {
-		long iov_left = iov[seg].iov_len;
-		unsigned long user_addr = (unsigned long)iov[seg].iov_base;
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		long iov_left = iter->iov[seg].iov_len;
+		unsigned long user_addr = (unsigned long)iter->iov[seg].iov_base;
 
 		if (rw == READ) {
 			if (file_offset >= i_size_read(inode))
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index c71e88602ff4..cc1cfae726b3 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -259,8 +259,7 @@ static int v9fs_launder_page(struct page *page)
  *
  */
 static ssize_t
-v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-	       loff_t pos, unsigned long nr_segs)
+v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 {
 	/*
 	 * FIXME
@@ -269,7 +268,7 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	 */
 	p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n",
 		 iocb->ki_filp->f_path.dentry->d_name.name,
-		 (long long)pos, nr_segs);
+		 (long long)pos, iter->nr_segs);
 
 	return -EINVAL;
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 552a8d13bc32..938fc707d769 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -165,14 +165,15 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
 }
 
 static ssize_t
-blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs)
+blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+			loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 
-	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
-				    nr_segs, blkdev_get_block, NULL, NULL, 0);
+	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter->iov,
+				    offset, iter->nr_segs, blkdev_get_block,
+				    NULL, NULL, 0);
 }
 
 int __sync_blockdev(struct block_device *bdev, int wait)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5f805bc944fa..30a6cc51f32c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7433,8 +7433,7 @@ out:
 }
 
 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
-			const struct iovec *iov, loff_t offset,
-			unsigned long nr_segs)
+			struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -7444,8 +7443,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 	bool relock = false;
 	ssize_t ret;
 
-	if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
-			    offset, nr_segs))
+	if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter->iov,
+			    offset, iter->nr_segs))
 		return 0;
 
 	atomic_inc(&inode->i_dio_count);
@@ -7457,7 +7456,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 	 * we need to flush the dirty pages again to make absolutely sure
 	 * that any outstanding dirty pages are on disk.
 	 */
-	count = iov_length(iov, nr_segs);
+	count = iov_length(iter->iov, iter->nr_segs);
 	if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
 		     &BTRFS_I(inode)->runtime_flags))
 		filemap_fdatawrite_range(inode->i_mapping, offset, count);
@@ -7484,7 +7483,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 
 	ret = __blockdev_direct_IO(rw, iocb, inode,
 			BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
-			iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
+			iter->iov, offset, iter->nr_segs,
+			btrfs_get_blocks_direct, NULL,
 			btrfs_submit_direct, flags);
 	if (rw & WRITE) {
 		if (ret < 0 && ret != -EIOCBQUEUED)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b53278c9fd97..342ca5e423f9 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1187,8 +1187,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
  * never get called.
  */
 static ssize_t ceph_direct_io(int rw, struct kiocb *iocb,
-			      const struct iovec *iov,
-			      loff_t pos, unsigned long nr_segs)
+			      struct iov_iter *iter,
+			      loff_t pos)
 {
 	WARN_ON(1);
 	return -EINVAL;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 2900d150654e..a4ccc39e6c11 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3702,8 +3702,8 @@ void cifs_oplock_break(struct work_struct *work)
  * Direct IO is not yet supported in the cached mode. 
  */
 static ssize_t
-cifs_direct_io(int rw, struct kiocb *iocb, const struct iovec *iov,
-               loff_t pos, unsigned long nr_segs)
+cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
+               loff_t pos)
 {
         /*
          * FIXME
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index d1c244d67667..3f9cafd73931 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -964,7 +964,7 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset,
 
  /* TODO: Should be easy enough to do proprly */
 static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb,
-		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+		struct iov_iter *iter, loff_t offset)
 {
 	return 0;
 }
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b1d2a4675d42..47fbe760a7f8 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -850,18 +850,19 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
 }
 
 static ssize_t
-ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs)
+ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+			loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 ext2_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				 iter->nr_segs, ext2_get_block);
 	if (ret < 0 && (rw & WRITE))
-		ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
+		ext2_write_failed(mapping, offset +
+				  iov_length(iter->iov, iter->nr_segs));
 	return ret;
 }
 
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index f5157d0d1b43..7a5c501dc31b 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1820,8 +1820,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
  * VFS code falls back into buffered path in that case so we are safe.
  */
 static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
-			const struct iovec *iov, loff_t offset,
-			unsigned long nr_segs)
+			struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -1829,10 +1828,10 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 	handle_t *handle;
 	ssize_t ret;
 	int orphan = 0;
-	size_t count = iov_length(iov, nr_segs);
+	size_t count = iov_length(iter->iov, iter->nr_segs);
 	int retries = 0;
 
-	trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+	trace_ext3_direct_IO_enter(inode, offset, count, rw);
 
 	if (rw == WRITE) {
 		loff_t final_size = offset + count;
@@ -1856,15 +1855,15 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 	}
 
 retry:
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 ext3_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				 iter->nr_segs, ext3_get_block);
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again.
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + count;
 
 		if (end > isize)
 			ext3_truncate_failed_direct_write(inode);
@@ -1909,8 +1908,7 @@ retry:
 			ret = err;
 	}
 out:
-	trace_ext3_direct_IO_exit(inode, offset,
-				iov_length(iov, nr_segs), rw, ret);
+	trace_ext3_direct_IO_exit(inode, offset, count, rw, ret);
 	return ret;
 }
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d7b7462a0e13..f51db730da39 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3222,8 +3222,7 @@ retake_lock:
 }
 
 static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
-			      const struct iovec *iov, loff_t offset,
-			      unsigned long nr_segs)
+			      struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -3239,13 +3238,13 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
 	if (ext4_has_inline_data(inode))
 		return 0;
 
-	trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+	trace_ext4_direct_IO_enter(inode, offset, iov_length(iter->iov, iter->nr_segs), rw);
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-		ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
+		ret = ext4_ext_direct_IO(rw, iocb, iter->iov, offset, iter->nr_segs);
 	else
-		ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+		ret = ext4_ind_direct_IO(rw, iocb, iter->iov, offset, iter->nr_segs);
 	trace_ext4_direct_IO_exit(inode, offset,
-				iov_length(iov, nr_segs), rw, ret);
+				iov_length(iter->iov, iter->nr_segs), rw, ret);
 	return ret;
 }
 
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 45abd60e2bff..3a6ef121c095 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1010,7 +1010,7 @@ static int check_direct_IO(struct inode *inode, int rw,
 }
 
 static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
-		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+		struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -1019,11 +1019,11 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
 	if (f2fs_has_inline_data(inode))
 		return 0;
 
-	if (check_direct_IO(inode, rw, iov, offset, nr_segs))
+	if (check_direct_IO(inode, rw, iter->iov, offset, iter->nr_segs))
 		return 0;
 
-	return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-							get_data_block);
+	return blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				  iter->nr_segs, get_data_block);
 }
 
 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index b3361fe2bcb5..d5237a199055 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -185,8 +185,8 @@ static int fat_write_end(struct file *file, struct address_space *mapping,
 }
 
 static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
-			     const struct iovec *iov,
-			     loff_t offset, unsigned long nr_segs)
+			     struct iov_iter *iter,
+			     loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -203,7 +203,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
 		 *
 		 * Return 0, and fallback to normal buffered write.
 		 */
-		loff_t size = offset + iov_length(iov, nr_segs);
+		loff_t size = offset + iov_length(iter->iov, iter->nr_segs);
 		if (MSDOS_I(inode)->mmu_private < size)
 			return 0;
 	}
@@ -212,10 +212,11 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
 	 * FAT need to use the DIO_LOCKING for avoiding the race
 	 * condition of fat_get_block() and ->truncate().
 	 */
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 fat_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				 iter->nr_segs, fat_get_block);
 	if (ret < 0 && (rw & WRITE))
-		fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
+		fat_write_failed(mapping, offset +
+			         iov_length(iter->iov, iter->nr_segs));
 
 	return ret;
 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9c7f346879e7..17d96f36df15 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2890,8 +2890,8 @@ static inline loff_t fuse_round_up(loff_t off)
 }
 
 static ssize_t
-fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs)
+fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+			loff_t offset)
 {
 	ssize_t ret = 0;
 	struct file *file = iocb->ki_filp;
@@ -2900,7 +2900,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	loff_t pos = 0;
 	struct inode *inode;
 	loff_t i_size;
-	size_t count = iov_length(iov, nr_segs);
+	size_t count = iov_length(iter->iov, iter->nr_segs);
 	struct fuse_io_priv *io;
 
 	pos = offset;
@@ -2944,9 +2944,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 		io->async = false;
 
 	if (rw == WRITE)
-		ret = __fuse_direct_write(io, iov, nr_segs, &pos);
+		ret = __fuse_direct_write(io, iter->iov, iter->nr_segs, &pos);
 	else
-		ret = __fuse_direct_read(io, iov, nr_segs, &pos, count);
+		ret = __fuse_direct_read(io, iter->iov, iter->nr_segs, &pos, count);
 
 	if (io->async) {
 		fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ce62dcac90b6..e84ddaa42104 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1041,8 +1041,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
 
 
 static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
-			      const struct iovec *iov, loff_t offset,
-			      unsigned long nr_segs)
+			      struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -1082,7 +1081,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 	 */
 	if (mapping->nrpages) {
 		loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
-		loff_t len = iov_length(iov, nr_segs);
+		loff_t len = iov_length(iter->iov, iter->nr_segs);
 		loff_t end = PAGE_ALIGN(offset + len) - 1;
 
 		rv = 0;
@@ -1097,9 +1096,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 			truncate_inode_pages_range(mapping, lstart, end);
 	}
 
-	rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				  offset, nr_segs, gfs2_get_block_direct,
-				  NULL, NULL, 0);
+	rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
+				  iter->iov, offset, iter->nr_segs,
+				  gfs2_get_block_direct, NULL, NULL, 0);
 out:
 	gfs2_glock_dq(&gh);
 	gfs2_holder_uninit(&gh);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9e2fecd62f62..09cff13528c5 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -125,15 +125,15 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
 }
 
 static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
-		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+		struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file_inode(file)->i_mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 hfs_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				 iter->nr_segs, hfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
@@ -141,7 +141,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + iov_length(iter->iov, iter->nr_segs);
 
 		if (end > isize)
 			hfs_write_failed(mapping, end);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index a4f45bd88a63..7f894a5b5eaf 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -123,14 +123,14 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
 }
 
 static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
-		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+		struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file_inode(file)->i_mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, iter->nr_segs,
 				 hfsplus_get_block);
 
 	/*
@@ -139,7 +139,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + iov_length(iter->iov, iter->nr_segs);
 
 		if (end > isize)
 			hfsplus_write_failed(mapping, end);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 6f8fe72c2a7a..7052744d5107 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -331,15 +331,15 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
 }
 
 static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
-	const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+	struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file->f_mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 jfs_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				 iter->nr_segs, jfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
@@ -347,7 +347,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + iov_length(iter->iov, iter->nr_segs);
 
 		if (end > isize)
 			jfs_write_failed(mapping, end);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b8797ae6831f..e9cde3935001 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -121,20 +121,20 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
  * shunt off direct read and write requests before the VFS gets them,
  * so this method is only ever called for swap.
  */
-ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 {
 #ifndef CONFIG_NFS_SWAP
 	dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
-			iocb->ki_filp, (long long) pos, nr_segs);
+			iocb->ki_filp, (long long) pos, iter->nr_segs);
 
 	return -EINVAL;
 #else
 	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
 
 	if (rw == READ || rw == KERNEL_READ)
-		return nfs_file_direct_read(iocb, iov, nr_segs, pos,
+		return nfs_file_direct_read(iocb, iter->iov, iter->nr_segs, pos,
 				rw == READ ? true : false);
-	return nfs_file_direct_write(iocb, iov, nr_segs, pos,
+	return nfs_file_direct_write(iocb, iter->iov, iter->nr_segs, pos,
 				rw == WRITE ? true : false);
 #endif /* CONFIG_NFS_SWAP */
 }
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b9c5726120e3..1c0e8fedc095 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -298,8 +298,8 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
 }
 
 static ssize_t
-nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-		loff_t offset, unsigned long nr_segs)
+nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+		loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -310,8 +310,8 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 		return 0;
 
 	/* Needs synchronization with the cleaner */
-	size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				  nilfs_get_block);
+	size = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				  iter->nr_segs, nilfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
@@ -319,7 +319,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	 */
 	if (unlikely((rw & WRITE) && size < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + iov_length(iter->iov, iter->nr_segs);
 
 		if (end > isize)
 			nilfs_write_failed(mapping, end);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index d310d12a9adc..799fd0afcb35 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -599,9 +599,8 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
 
 static ssize_t ocfs2_direct_IO(int rw,
 			       struct kiocb *iocb,
-			       const struct iovec *iov,
-			       loff_t offset,
-			       unsigned long nr_segs)
+			       struct iov_iter *iter,
+			       loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file)->i_mapping->host;
@@ -618,7 +617,7 @@ static ssize_t ocfs2_direct_IO(int rw,
 		return 0;
 
 	return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
-				    iov, offset, nr_segs,
+				    iter->iov, offset, iter->nr_segs,
 				    ocfs2_direct_IO_get_blocks,
 				    ocfs2_dio_end_io, NULL, 0);
 }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index bc8b8009897d..17bf4c41a509 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3083,15 +3083,14 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
 /* We thank Mingming Cao for helping us understand in great detail what
    to do in this section of the code. */
 static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
-				  const struct iovec *iov, loff_t offset,
-				  unsigned long nr_segs)
+				  struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				  reiserfs_get_blocks_direct_io);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				 iter->nr_segs, reiserfs_get_blocks_direct_io);
 
 	/*
 	 * In case of error extending write may have instantiated a few
@@ -3099,7 +3098,7 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + iov_length(iter->iov, iter->nr_segs);
 
 		if ((end > isize) && inode_newsize_ok(inode, isize) == 0) {
 			truncate_setsize(inode, isize);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index d2c170f8b035..ade886401658 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -119,8 +119,8 @@ static int udf_adinicb_write_end(struct file *file,
 }
 
 static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
-				     const struct iovec *iov,
-				     loff_t offset, unsigned long nr_segs)
+				     struct iov_iter *iter,
+				     loff_t offset)
 {
 	/* Fallback to buffered I/O. */
 	return 0;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 5d643706212f..5b184c7f7dcb 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -217,18 +217,18 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
 }
 
 static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
-			     const struct iovec *iov,
-			     loff_t offset, unsigned long nr_segs)
+			     struct iov_iter *iter,
+			     loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, iter->nr_segs,
 				  udf_get_block);
 	if (unlikely(ret < 0 && (rw & WRITE)))
-		udf_write_failed(mapping, offset + iov_length(iov, nr_segs));
+		udf_write_failed(mapping, offset + iov_length(iter->iov, iter->nr_segs));
 	return ret;
 }
 
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0479c32c5eb1..330d7b1c4be3 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1449,9 +1449,8 @@ STATIC ssize_t
 xfs_vm_direct_IO(
 	int			rw,
 	struct kiocb		*iocb,
-	const struct iovec	*iov,
-	loff_t			offset,
-	unsigned long		nr_segs)
+	struct iov_iter		*iter,
+	loff_t			offset)
 {
 	struct inode		*inode = iocb->ki_filp->f_mapping->host;
 	struct block_device	*bdev = xfs_find_bdev_for_inode(inode);
@@ -1459,7 +1458,7 @@ xfs_vm_direct_IO(
 	ssize_t			ret;
 
 	if (rw & WRITE) {
-		size_t size = iov_length(iov, nr_segs);
+		size_t size = iov_length(iter->iov, iter->nr_segs);
 
 		/*
 		 * We cannot preallocate a size update transaction here as we
@@ -1471,16 +1470,16 @@ xfs_vm_direct_IO(
 		if (offset + size > XFS_I(inode)->i_d.di_size)
 			ioend->io_isdirect = 1;
 
-		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-					    offset, nr_segs,
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter->iov,
+					    offset, iter->nr_segs,
 					    xfs_get_blocks_direct,
 					    xfs_end_io_direct_write, NULL,
 					    DIO_ASYNC_EXTEND);
 		if (ret != -EIOCBQUEUED && iocb->private)
 			goto out_destroy_ioend;
 	} else {
-		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-					    offset, nr_segs,
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter->iov,
+					    offset, iter->nr_segs,
 					    xfs_get_blocks_direct,
 					    NULL, NULL, 0);
 	}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 796de742fe4a..399a338c92b5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -343,8 +343,7 @@ struct address_space_operations {
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, gfp_t);
 	void (*freepage)(struct page *);
-	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs);
+	ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
 	int (*get_xip_mem)(struct address_space *, pgoff_t, int,
 						void **, unsigned long *);
 	/*
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fa6918b0f829..5a0d78ec739d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -459,8 +459,7 @@ extern int nfs3_removexattr (struct dentry *, const char *name);
 /*
  * linux/fs/nfs/direct.c
  */
-extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,
-			unsigned long);
+extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
 			const struct iovec *iov, unsigned long nr_segs,
 			loff_t pos, bool uio);
diff --git a/mm/filemap.c b/mm/filemap.c
index 7c1417b0bd7b..139641274f1e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1699,10 +1699,9 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 		size = i_size_read(inode);
 		retval = filemap_write_and_wait_range(mapping, pos,
 					pos + iov_length(iov, nr_segs) - 1);
-		if (!retval) {
-			retval = mapping->a_ops->direct_IO(READ, iocb,
-							   iov, pos, nr_segs);
-		}
+		if (!retval)
+			retval = mapping->a_ops->direct_IO(READ, iocb, &i, pos);
+
 		if (retval > 0) {
 			*ppos = pos + retval;
 			count -= retval;
@@ -2383,7 +2382,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 		}
 	}
 
-	written = mapping->a_ops->direct_IO(WRITE, iocb, from->iov, pos, from->nr_segs);
+	written = mapping->a_ops->direct_IO(WRITE, iocb, from, pos);
 
 	/*
 	 * Finally, try again to invalidate clean pages which might have been
diff --git a/mm/page_io.c b/mm/page_io.c
index 7c59ef681381..0ed0644c73db 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -263,16 +263,18 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 			.iov_base = kmap(page),
 			.iov_len  = PAGE_SIZE,
 		};
+		struct iov_iter from;
 
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);
 		kiocb.ki_nbytes = PAGE_SIZE;
+		iov_iter_init(&from, &iov, 1, PAGE_SIZE, 0);
 
 		set_page_writeback(page);
 		unlock_page(page);
 		ret = mapping->a_ops->direct_IO(KERNEL_WRITE,
-						&kiocb, &iov,
-						kiocb.ki_pos, 1);
+						&kiocb, &from,
+						kiocb.ki_pos);
 		kunmap(page);
 		if (ret == PAGE_SIZE) {
 			count_vm_event(PSWPOUT);
-- 
cgit v1.2.3-71-gd317


From 619d30b4b8c488042b4a720ca79dccc346d1a516 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 4 Mar 2014 21:53:33 -0500
Subject: convert the guts of nfs_direct_IO() to iov_iter

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/direct.c        | 46 +++++++++++++++++++++-------------------------
 fs/nfs/file.c          | 18 ++++++++++++------
 include/linux/nfs_fs.h |  4 ++--
 3 files changed, 35 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e9cde3935001..21723149668b 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -132,9 +132,9 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
 	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
 
 	if (rw == READ || rw == KERNEL_READ)
-		return nfs_file_direct_read(iocb, iter->iov, iter->nr_segs, pos,
+		return nfs_file_direct_read(iocb, iter, pos,
 				rw == READ ? true : false);
-	return nfs_file_direct_write(iocb, iter->iov, iter->nr_segs, pos,
+	return nfs_file_direct_write(iocb, iter, pos,
 				rw == WRITE ? true : false);
 #endif /* CONFIG_NFS_SWAP */
 }
@@ -414,8 +414,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
 }
 
 static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
-					      const struct iovec *iov,
-					      unsigned long nr_segs,
+					      struct iov_iter *iter,
 					      loff_t pos, bool uio)
 {
 	struct nfs_pageio_descriptor desc;
@@ -430,8 +429,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 	desc.pg_dreq = dreq;
 	atomic_inc(&inode->i_dio_count);
 
-	for (seg = 0; seg < nr_segs; seg++) {
-		const struct iovec *vec = &iov[seg];
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		const struct iovec *vec = &iter->iov[seg];
 		result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
 		if (result < 0)
 			break;
@@ -461,8 +460,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 /**
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
- * @iov: vector of user buffers into which to read data
- * @nr_segs: size of iov vector
+ * @iter: vector of user buffers into which to read data
  * @pos: byte offset in file where reading starts
  *
  * We use this function for direct reads instead of calling
@@ -479,8 +477,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
  * client must read the updated atime from the server back into its
  * cache.
  */
-ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos, bool uio)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
+				loff_t pos, bool uio)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -490,7 +488,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t result = -EINVAL;
 	size_t count;
 
-	count = iov_length(iov, nr_segs);
+	count = iov_length(iter->iov, iter->nr_segs);
 	nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
 
 	dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
@@ -513,7 +511,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
 		goto out_unlock;
 
 	dreq->inode = inode;
-	dreq->bytes_left = iov_length(iov, nr_segs);
+	dreq->bytes_left = count;
 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
 	l_ctx = nfs_get_lock_context(dreq->ctx);
 	if (IS_ERR(l_ctx)) {
@@ -524,8 +522,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	NFS_I(inode)->read_io += iov_length(iov, nr_segs);
-	result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
+	NFS_I(inode)->read_io += count;
+	result = nfs_direct_read_schedule_iovec(dreq, iter, pos, uio);
 
 	mutex_unlock(&inode->i_mutex);
 
@@ -864,8 +862,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
 };
 
 static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
-					       const struct iovec *iov,
-					       unsigned long nr_segs,
+					       struct iov_iter *iter,
 					       loff_t pos, bool uio)
 {
 	struct nfs_pageio_descriptor desc;
@@ -880,9 +877,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 	get_dreq(dreq);
 	atomic_inc(&inode->i_dio_count);
 
-	NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs);
-	for (seg = 0; seg < nr_segs; seg++) {
-		const struct iovec *vec = &iov[seg];
+	NFS_I(dreq->inode)->write_io += iov_length(iter->iov, iter->nr_segs);
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		const struct iovec *vec = &iter->iov[seg];
 		result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
 		if (result < 0)
 			break;
@@ -911,8 +908,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 /**
  * nfs_file_direct_write - file direct write operation for NFS files
  * @iocb: target I/O control block
- * @iov: vector of user buffers from which to write data
- * @nr_segs: size of iov vector
+ * @iter: vector of user buffers from which to write data
  * @pos: byte offset in file where writing starts
  *
  * We use this function for direct writes instead of calling
@@ -930,8 +926,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
  * Note that O_APPEND is not supported for NFS direct writes, as there
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos, bool uio)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+				loff_t pos, bool uio)
 {
 	ssize_t result = -EINVAL;
 	struct file *file = iocb->ki_filp;
@@ -942,7 +938,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	loff_t end;
 	size_t count;
 
-	count = iov_length(iov, nr_segs);
+	count = iov_length(iter->iov, iter->nr_segs);
 	end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
 
 	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
@@ -993,7 +989,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
+	result = nfs_direct_write_schedule_iovec(dreq, iter, pos, uio);
 
 	if (mapping->nrpages) {
 		invalidate_inode_pages2_range(mapping,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 284ca901fe16..3d01b152894e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -169,14 +169,18 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
 	struct inode *inode = file_inode(iocb->ki_filp);
+	size_t count = iov_length(iov, nr_segs);
 	ssize_t result;
+	struct iov_iter to;
+
+	iov_iter_init(&to, iov, nr_segs, count, 0);
 
 	if (iocb->ki_filp->f_flags & O_DIRECT)
-		return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
+		return nfs_file_direct_read(iocb, &to, pos, true);
 
-	dprintk("NFS: read(%pD2, %lu@%lu)\n",
+	dprintk("NFS: read(%pD2, %zu@%lu)\n",
 		iocb->ki_filp,
-		(unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
+		count, (unsigned long) pos);
 
 	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 	if (!result) {
@@ -643,16 +647,18 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 	unsigned long written = 0;
 	ssize_t result;
 	size_t count = iov_length(iov, nr_segs);
+	struct iov_iter from;
+	iov_iter_init(&from, iov, nr_segs, count, 0);
 
 	result = nfs_key_timeout_notify(file, inode);
 	if (result)
 		return result;
 
 	if (file->f_flags & O_DIRECT)
-		return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
+		return nfs_file_direct_write(iocb, &from, pos, true);
 
-	dprintk("NFS: write(%pD2, %lu@%Ld)\n",
-		file, (unsigned long) count, (long long) pos);
+	dprintk("NFS: write(%pD2, %zu@%Ld)\n",
+		file, count, (long long) pos);
 
 	result = -EBUSY;
 	if (IS_SWAPFILE(inode))
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 5a0d78ec739d..0a82b6fbae8a 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -461,10 +461,10 @@ extern int nfs3_removexattr (struct dentry *, const char *name);
  */
 extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
-			const struct iovec *iov, unsigned long nr_segs,
+			struct iov_iter *iter,
 			loff_t pos, bool uio);
 extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
-			const struct iovec *iov, unsigned long nr_segs,
+			struct iov_iter *iter,
 			loff_t pos, bool uio);
 
 /*
-- 
cgit v1.2.3-71-gd317


From 31b140398ce56ab41646eda7f02bcb78d6a4c916 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 5 Mar 2014 01:33:16 -0500
Subject: switch {__,}blockdev_direct_IO() to iov_iter

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c      |  4 ++--
 fs/btrfs/inode.c    |  3 +--
 fs/direct-io.c      | 33 ++++++++++++++++-----------------
 fs/ext2/inode.c     |  3 +--
 fs/ext3/inode.c     |  3 +--
 fs/ext4/indirect.c  |  7 +++----
 fs/ext4/inode.c     |  4 ++--
 fs/f2fs/data.c      |  4 ++--
 fs/fat/inode.c      |  3 +--
 fs/gfs2/aops.c      |  2 +-
 fs/hfs/inode.c      |  3 +--
 fs/hfsplus/inode.c  |  2 +-
 fs/jfs/inode.c      |  3 +--
 fs/nilfs2/inode.c   |  4 ++--
 fs/ocfs2/aops.c     |  2 +-
 fs/reiserfs/inode.c |  4 ++--
 fs/udf/inode.c      |  3 +--
 fs/xfs/xfs_aops.c   | 10 ++++------
 include/linux/fs.h  | 12 ++++++------
 19 files changed, 49 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 938fc707d769..937e3011ed58 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -171,8 +171,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 
-	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter->iov,
-				    offset, iter->nr_segs, blkdev_get_block,
+	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter,
+				    offset, blkdev_get_block,
 				    NULL, NULL, 0);
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c46a025d0c4b..b0b8fa0efba3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7483,8 +7483,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 
 	ret = __blockdev_direct_IO(rw, iocb, inode,
 			BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
-			iter->iov, offset, iter->nr_segs,
-			btrfs_get_blocks_direct, NULL,
+			iter, offset, btrfs_get_blocks_direct, NULL,
 			btrfs_submit_direct, flags);
 	if (rw & WRITE) {
 		if (ret < 0 && ret != -EIOCBQUEUED)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 31ba0935e32e..1c677899b989 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1107,8 +1107,8 @@ static inline int drop_refcount(struct dio *dio)
  */
 static inline ssize_t
 do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, const struct iovec *iov, loff_t offset, 
-	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+	struct block_device *bdev, struct iov_iter *iter, loff_t offset, 
+	get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags)
 {
 	int seg;
@@ -1143,9 +1143,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	}
 
 	/* Check the memory alignment.  Blocks cannot straddle pages */
-	for (seg = 0; seg < nr_segs; seg++) {
-		addr = (unsigned long)iov[seg].iov_base;
-		size = iov[seg].iov_len;
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		addr = (unsigned long)iter->iov[seg].iov_base;
+		size = iter->iov[seg].iov_len;
 		end += size;
 		if (unlikely((addr & blocksize_mask) ||
 			     (size & blocksize_mask))) {
@@ -1256,18 +1256,18 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	if (unlikely(sdio.blkfactor))
 		sdio.pages_in_io = 2;
 
-	for (seg = 0; seg < nr_segs; seg++) {
-		user_addr = (unsigned long)iov[seg].iov_base;
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		user_addr = (unsigned long)iter->iov[seg].iov_base;
 		sdio.pages_in_io +=
-			((user_addr + iov[seg].iov_len + PAGE_SIZE-1) /
+			((user_addr + iter->iov[seg].iov_len + PAGE_SIZE-1) /
 				PAGE_SIZE - user_addr / PAGE_SIZE);
 	}
 
 	blk_start_plug(&plug);
 
-	for (seg = 0; seg < nr_segs; seg++) {
-		user_addr = (unsigned long)iov[seg].iov_base;
-		sdio.size += bytes = iov[seg].iov_len;
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		user_addr = (unsigned long)iter->iov[seg].iov_base;
+		sdio.size += bytes = iter->iov[seg].iov_len;
 
 		/* Index into the first page of the first block */
 		sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
@@ -1288,7 +1288,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 
 		retval = do_direct_IO(dio, &sdio, &map_bh);
 
-		dio->result += iov[seg].iov_len -
+		dio->result += iter->iov[seg].iov_len -
 			((sdio.final_block_in_request - sdio.block_in_file) <<
 					blkbits);
 
@@ -1365,8 +1365,8 @@ out:
 
 ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, const struct iovec *iov, loff_t offset,
-	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+	struct block_device *bdev, struct iov_iter *iter, loff_t offset,
+	get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags)
 {
 	/*
@@ -1381,9 +1381,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	prefetch(bdev->bd_queue);
 	prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
 
-	return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				     nr_segs, get_block, end_io,
-				     submit_io, flags);
+	return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset,
+				     get_block, end_io, submit_io, flags);
 }
 
 EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 116e809aa7cb..36d35c36311d 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -859,8 +859,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				 iter->nr_segs, ext2_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext2_get_block);
 	if (ret < 0 && (rw & WRITE))
 		ext2_write_failed(mapping, offset + count);
 	return ret;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 8582ae2c80b0..4d32133a76c4 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1855,8 +1855,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 	}
 
 retry:
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				 iter->nr_segs, ext3_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block);
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again.
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 123898a6af05..8a57e9fcd1b9 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -686,14 +686,13 @@ retry:
 			goto locked;
 		}
 		ret = __blockdev_direct_IO(rw, iocb, inode,
-				 inode->i_sb->s_bdev, iter->iov,
-				 offset, iter->nr_segs,
+				 inode->i_sb->s_bdev, iter, offset,
 				 ext4_get_block, NULL, NULL, 0);
 		inode_dio_done(inode);
 	} else {
 locked:
-		ret = blockdev_direct_IO(rw, iocb, inode, iter->iov,
-				 offset, iter->nr_segs, ext4_get_block);
+		ret = blockdev_direct_IO(rw, iocb, inode, iter,
+				 offset, ext4_get_block);
 
 		if (unlikely((rw & WRITE) && ret < 0)) {
 			loff_t isize = i_size_read(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2b993579a968..e5718385a037 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3166,8 +3166,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 		dio_flags = DIO_LOCKING;
 	}
 	ret = __blockdev_direct_IO(rw, iocb, inode,
-				   inode->i_sb->s_bdev, iter->iov,
-				   offset, iter->nr_segs,
+				   inode->i_sb->s_bdev, iter,
+				   offset,
 				   get_block_func,
 				   ext4_end_io_dio,
 				   NULL,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3a6ef121c095..151488f27755 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1022,8 +1022,8 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
 	if (check_direct_IO(inode, rw, iter->iov, offset, iter->nr_segs))
 		return 0;
 
-	return blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				  iter->nr_segs, get_data_block);
+	return blockdev_direct_IO(rw, iocb, inode, iter, offset,
+				  get_data_block);
 }
 
 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 154a6f9d3189..385cce464e82 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -213,8 +213,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
 	 * FAT need to use the DIO_LOCKING for avoiding the race
 	 * condition of fat_get_block() and ->truncate().
 	 */
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				 iter->nr_segs, fat_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block);
 	if (ret < 0 && (rw & WRITE))
 		fat_write_failed(mapping, offset + count);
 
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 228a12d2afa9..910838951d66 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1097,7 +1097,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 	}
 
 	rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
-				  iter->iov, offset, iter->nr_segs,
+				  iter, offset,
 				  gfs2_get_block_direct, NULL, NULL, 0);
 out:
 	gfs2_glock_dq(&gh);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index dc69e8f31581..f5fb09ebc850 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -133,8 +133,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				 iter->nr_segs, hfs_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index e6b1251af47a..76b930ff58ae 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -131,7 +131,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, iter->nr_segs,
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, 
 				 hfsplus_get_block);
 
 	/*
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 6cde5928693b..bd3df1ca3c9b 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -339,8 +339,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				 iter->nr_segs, jfs_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 7aaf913e8709..6252b173a465 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -311,8 +311,8 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 		return 0;
 
 	/* Needs synchronization with the cleaner */
-	size = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				  iter->nr_segs, nilfs_get_block);
+	size = blockdev_direct_IO(rw, iocb, inode, iter, offset,
+				  nilfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 799fd0afcb35..4a231a166cf8 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -617,7 +617,7 @@ static ssize_t ocfs2_direct_IO(int rw,
 		return 0;
 
 	return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
-				    iter->iov, offset, iter->nr_segs,
+				    iter, offset,
 				    ocfs2_direct_IO_get_blocks,
 				    ocfs2_dio_end_io, NULL, 0);
 }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 723affe921f1..b8003e8dd1f4 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3090,8 +3090,8 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				 iter->nr_segs, reiserfs_get_blocks_direct_io);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset,
+				 reiserfs_get_blocks_direct_io);
 
 	/*
 	 * In case of error extending write may have instantiated a few
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 28984baf6194..236cd48184c2 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -226,8 +226,7 @@ static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, iter->nr_segs,
-				  udf_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block);
 	if (unlikely(ret < 0 && (rw & WRITE)))
 		udf_write_failed(mapping, offset + count);
 	return ret;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 6462b3186784..08d13e395252 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1470,17 +1470,15 @@ xfs_vm_direct_IO(
 		if (offset + size > XFS_I(inode)->i_d.di_size)
 			ioend->io_isdirect = 1;
 
-		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter->iov,
-					    offset, iter->nr_segs,
-					    xfs_get_blocks_direct,
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+					    offset, xfs_get_blocks_direct,
 					    xfs_end_io_direct_write, NULL,
 					    DIO_ASYNC_EXTEND);
 		if (ret != -EIOCBQUEUED && iocb->private)
 			goto out_destroy_ioend;
 	} else {
-		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter->iov,
-					    offset, iter->nr_segs,
-					    xfs_get_blocks_direct,
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+					    offset, xfs_get_blocks_direct,
 					    NULL, NULL, 0);
 	}
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 399a338c92b5..946a9484844f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2474,16 +2474,16 @@ enum {
 void dio_end_io(struct bio *bio, int error);
 
 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, const struct iovec *iov, loff_t offset,
-	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+	struct block_device *bdev, struct iov_iter *iter, loff_t offset,
+	get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags);
 
 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
-		struct inode *inode, const struct iovec *iov, loff_t offset,
-		unsigned long nr_segs, get_block_t get_block)
+		struct inode *inode, struct iov_iter *iter, loff_t offset,
+		get_block_t get_block)
 {
-	return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				    offset, nr_segs, get_block, NULL, NULL,
+	return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iter,
+				    offset, get_block, NULL, NULL,
 				    DIO_LOCKING | DIO_SKIP_HOLES);
 }
 #endif
-- 
cgit v1.2.3-71-gd317


From 886a39115005ced8b15ab067c9c2a8d546b40a5e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 5 Mar 2014 13:50:45 -0500
Subject: new primitive: iov_iter_alignment()

returns the value aligned as badly as the worst remaining segment
in iov_iter is.  Use instead of open-coded equivalents.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/lustre/lustre/llite/rw26.c |  7 ++-----
 fs/direct-io.c                             | 27 +++++----------------------
 include/linux/uio.h                        |  2 ++
 mm/iov_iter.c                              | 25 +++++++++++++++++++++++++
 4 files changed, 34 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 38a5b580e7f0..f718585c9e08 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -391,11 +391,8 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 	       MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
 
 	/* Check that all user buffers are aligned as well */
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		if (((unsigned long)iter->iov[seg].iov_base & ~CFS_PAGE_MASK) ||
-		    (iter->iov[seg].iov_len & ~CFS_PAGE_MASK))
-			return -EINVAL;
-	}
+	if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK)
+		return -EINVAL;
 
 	env = cl_env_get(&refcheck);
 	LASSERT(!IS_ERR(env));
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1c677899b989..adfa1fb33456 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1112,19 +1112,18 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	dio_submit_t submit_io,	int flags)
 {
 	int seg;
-	size_t size;
-	unsigned long addr;
 	unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
 	unsigned blkbits = i_blkbits;
 	unsigned blocksize_mask = (1 << blkbits) - 1;
 	ssize_t retval = -EINVAL;
-	loff_t end = offset;
+	loff_t end = offset + iov_iter_count(iter);
 	struct dio *dio;
 	struct dio_submit sdio = { 0, };
 	unsigned long user_addr;
 	size_t bytes;
 	struct buffer_head map_bh = { 0, };
 	struct blk_plug plug;
+	unsigned long align = offset | iov_iter_alignment(iter);
 
 	if (rw & WRITE)
 		rw = WRITE_ODIRECT;
@@ -1134,32 +1133,16 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	 * the early prefetch in the caller enough time.
 	 */
 
-	if (offset & blocksize_mask) {
+	if (align & blocksize_mask) {
 		if (bdev)
 			blkbits = blksize_bits(bdev_logical_block_size(bdev));
 		blocksize_mask = (1 << blkbits) - 1;
-		if (offset & blocksize_mask)
+		if (align & blocksize_mask)
 			goto out;
 	}
 
-	/* Check the memory alignment.  Blocks cannot straddle pages */
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		addr = (unsigned long)iter->iov[seg].iov_base;
-		size = iter->iov[seg].iov_len;
-		end += size;
-		if (unlikely((addr & blocksize_mask) ||
-			     (size & blocksize_mask))) {
-			if (bdev)
-				blkbits = blksize_bits(
-					 bdev_logical_block_size(bdev));
-			blocksize_mask = (1 << blkbits) - 1;
-			if ((addr & blocksize_mask) || (size & blocksize_mask))
-				goto out;
-		}
-	}
-
 	/* watch out for a 0 len io from a tricksy fs */
-	if (rw == READ && end == offset)
+	if (rw == READ && !iov_iter_count(iter))
 		return 0;
 
 	dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index abbe83ded630..4ee17413fe1b 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -67,6 +67,7 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
+unsigned long iov_iter_alignment(const struct iov_iter *i);
 
 static inline void iov_iter_init(struct iov_iter *i,
 			const struct iovec *iov, unsigned long nr_segs,
@@ -88,4 +89,5 @@ static inline size_t iov_iter_count(struct iov_iter *i)
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
 
+
 #endif
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 22ec1ef068a8..2f762cc21080 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -195,3 +195,28 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
 		return min(i->count, iov->iov_len - i->iov_offset);
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
+
+unsigned long iov_iter_alignment(const struct iov_iter *i)
+{
+	const struct iovec *iov = i->iov;
+	unsigned long res;
+	size_t size = i->count;
+	size_t n;
+
+	if (!size)
+		return 0;
+
+	res = (unsigned long)iov->iov_base + i->iov_offset;
+	n = iov->iov_len - i->iov_offset;
+	if (n >= size)
+		return res | size;
+	size -= n;
+	res |= n;
+	while (size > (++iov)->iov_len) {
+		res |= (unsigned long)iov->iov_base | iov->iov_len;
+		size -= iov->iov_len;
+	}
+	res |= (unsigned long)iov->iov_base | size;
+	return res;
+}
+EXPORT_SYMBOL(iov_iter_alignment);
-- 
cgit v1.2.3-71-gd317


From ed978a811ec528dbe40243605c3afab55892f722 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 5 Mar 2014 22:53:04 -0500
Subject: new helper: generic_file_read_iter()

iov_iter-using variant of generic_file_aio_read().  Some callers
converted.  Note that it's still not quite there for use as ->read_iter() -
we depend on having zero iter->iov_offset in O_DIRECT case.  Fortunately,
that's true for all converted callers (and for generic_file_aio_read() itself).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ceph/file.c     | 15 +-----------
 fs/nfs/file.c      |  2 +-
 include/linux/fs.h |  1 +
 mm/filemap.c       | 67 +++++++++++++++++++++++++++---------------------------
 4 files changed, 37 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d8f383d59449..910a3022eb27 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -833,24 +833,11 @@ again:
 		/* hmm, this isn't really async... */
 		ret = ceph_sync_read(iocb, &i, &checkeof);
 	} else {
-		/*
-		 * We can't modify the content of iov,
-		 * so we only read from beginning.
-		 *
-		 * When we switch generic_file_aio_read() to iov_iter, the
-		 * if () below will be removed -- AV
-		 */
-		if (read) {
-			iocb->ki_pos = pos;
-			len = iocb->ki_nbytes;
-			read = 0;
-			iov_iter_init(&i, iov, nr_segs, len, 0);
-		}
 		dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
 		     inode, ceph_vinop(inode), pos, (unsigned)len,
 		     ceph_cap_string(got));
 
-		ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
+		ret = generic_file_read_iter(iocb, &i);
 	}
 	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3d01b152894e..a352bc6d613f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -184,7 +184,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 
 	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 	if (!result) {
-		result = generic_file_aio_read(iocb, iov, nr_segs, pos);
+		result = generic_file_read_iter(iocb, &to);
 		if (result > 0)
 			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
 	}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 946a9484844f..d096ebc7f348 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2404,6 +2404,7 @@ extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
 		unsigned long size, pgoff_t pgoff);
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *,
diff --git a/mm/filemap.c b/mm/filemap.c
index 866f4ae8223b..a7f79e90209c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1663,55 +1663,34 @@ out:
 	return written ? written : error;
 }
 
-/**
- * generic_file_aio_read - generic filesystem read routine
- * @iocb:	kernel I/O control block
- * @iov:	io vector request
- * @nr_segs:	number of segments in the iovec
- * @pos:	current file position
- *
- * This is the "read()" routine for all filesystems
- * that can use the page cache directly.
- */
 ssize_t
-generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
+generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
-	struct file *filp = iocb->ki_filp;
+	struct file *file = iocb->ki_filp;
 	ssize_t retval = 0;
-	size_t count;
 	loff_t *ppos = &iocb->ki_pos;
-	struct iov_iter i;
-
-	count = iov_length(iov, nr_segs);
-	iov_iter_init(&i, iov, nr_segs, count, 0);
+	loff_t pos = *ppos;
 
 	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
-	if (filp->f_flags & O_DIRECT) {
+	if (file->f_flags & O_DIRECT) {
+		struct address_space *mapping = file->f_mapping;
+		struct inode *inode = mapping->host;
+		size_t count = iov_iter_count(iter);
 		loff_t size;
-		struct address_space *mapping;
-		struct inode *inode;
 
-		mapping = filp->f_mapping;
-		inode = mapping->host;
 		if (!count)
 			goto out; /* skip atime */
 		size = i_size_read(inode);
 		retval = filemap_write_and_wait_range(mapping, pos,
 					pos + count - 1);
 		if (!retval) {
-			struct iov_iter data = i;
+			struct iov_iter data = *iter;
 			retval = mapping->a_ops->direct_IO(READ, iocb, &data, pos);
 		}
 
 		if (retval > 0) {
 			*ppos = pos + retval;
-			count -= retval;
-			/*
-			 * If we did a short DIO read we need to skip the
-			 * section of the iov that we've already read data into.
-			 */
-			iov_iter_advance(&i, retval);
+			iov_iter_advance(iter, retval);
 		}
 
 		/*
@@ -1722,16 +1701,38 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 		 * and return.  Otherwise fallthrough to buffered io for
 		 * the rest of the read.
 		 */
-		if (retval < 0 || !count || *ppos >= size) {
-			file_accessed(filp);
+		if (retval < 0 || !iov_iter_count(iter) || *ppos >= size) {
+			file_accessed(file);
 			goto out;
 		}
 	}
 
-	retval = do_generic_file_read(filp, ppos, &i, retval);
+	retval = do_generic_file_read(file, ppos, iter, retval);
 out:
 	return retval;
 }
+EXPORT_SYMBOL(generic_file_read_iter);
+
+/**
+ * generic_file_aio_read - generic filesystem read routine
+ * @iocb:	kernel I/O control block
+ * @iov:	io vector request
+ * @nr_segs:	number of segments in the iovec
+ * @pos:	current file position
+ *
+ * This is the "read()" routine for all filesystems
+ * that can use the page cache directly.
+ */
+ssize_t
+generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
+{
+	size_t count = iov_length(iov, nr_segs);
+	struct iov_iter i;
+
+	iov_iter_init(&i, iov, nr_segs, count, 0);
+	return generic_file_read_iter(iocb, &i);
+}
 EXPORT_SYMBOL(generic_file_aio_read);
 
 #ifdef CONFIG_MMU
-- 
cgit v1.2.3-71-gd317


From 71d8e532b1549a478e6a6a8a44f309d050294d00 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 5 Mar 2014 19:28:09 -0500
Subject: start adding the tag to iov_iter

For now, just use the same thing we pass to ->direct_IO() - it's all
iovec-based at the moment.  Pass it explicitly to iov_iter_init() and
account for kvec vs. iovec in there, by the same kludge NFS ->direct_IO()
uses.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/file.c        |  2 +-
 fs/ceph/file.c         |  8 ++++----
 fs/cifs/file.c         |  4 ++--
 fs/fuse/file.c         |  6 +++---
 fs/nfs/file.c          |  4 ++--
 fs/ocfs2/file.c        |  2 +-
 fs/pipe.c              |  2 +-
 fs/splice.c            |  2 +-
 fs/xfs/xfs_file.c      |  4 ++--
 include/linux/uio.h    | 15 +++------------
 mm/filemap.c           |  4 ++--
 mm/iov_iter.c          | 15 +++++++++++++++
 mm/page_io.c           |  2 +-
 mm/process_vm_access.c |  4 ++--
 mm/shmem.c             |  2 +-
 15 files changed, 41 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a0a94a30d85a..f8cee205618a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1740,7 +1740,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 		goto out;
 	}
 
-	iov_iter_init(&i, iov, nr_segs, count, 0);
+	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 
 	err = file_remove_suid(file);
 	if (err) {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 910a3022eb27..5b93cadedfbe 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -582,7 +582,7 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
 		CEPH_OSD_FLAG_ONDISK |
 		CEPH_OSD_FLAG_WRITE;
 
-	iov_iter_init(&i, iov, nr_segs, count, 0);
+	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 
 	while (iov_iter_count(&i) > 0) {
 		void __user *data = i.iov->iov_base + i.iov_offset;
@@ -703,7 +703,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
 		CEPH_OSD_FLAG_WRITE |
 		CEPH_OSD_FLAG_ACK;
 
-	iov_iter_init(&i, iov, nr_segs, count, 0);
+	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 
 	while ((len = iov_iter_count(&i)) > 0) {
 		size_t left;
@@ -808,7 +808,7 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	int checkeof = 0, read = 0;
 	struct iov_iter i;
 
-	iov_iter_init(&i, iov, nr_segs, len, 0);
+	iov_iter_init(&i, READ, iov, nr_segs, len);
 
 again:
 	dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
@@ -961,7 +961,7 @@ retry_snap:
 		 * are pending vmtruncate. So write and vmtruncate
 		 * can not run at the same time
 		 */
-		iov_iter_init(&from, iov, nr_segs, count, 0);
+		iov_iter_init(&from, WRITE, iov, nr_segs, count);
 		written = generic_perform_write(file, &from, pos);
 		if (likely(written >= 0))
 			iocb->ki_pos = pos + written;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a4ccc39e6c11..15201c21ac88 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2424,7 +2424,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
 	else
 		pid = current->tgid;
 
-	iov_iter_init(&it, iov, nr_segs, len, 0);
+	iov_iter_init(&it, WRITE, iov, nr_segs, len);
 	do {
 		size_t save_len;
 
@@ -2854,7 +2854,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
 	if (!len)
 		return 0;
 
-	iov_iter_init(&to, iov, nr_segs, len, 0);
+	iov_iter_init(&to, READ, iov, nr_segs, len);
 
 	INIT_LIST_HEAD(&rdata_list);
 	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index fc54d04a41e2..4a5519ca253f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1217,7 +1217,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
 	if (err)
 		goto out;
-	iov_iter_init(&i, iov, nr_segs, count, 0);
+	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 
 	if (count == 0)
 		goto out;
@@ -1386,7 +1386,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
 	struct fuse_req *req;
 	struct iov_iter ii;
 
-	iov_iter_init(&ii, iov, nr_segs, count, 0);
+	iov_iter_init(&ii, write ? WRITE : READ, iov, nr_segs, count);
 
 	if (io->async)
 		req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii));
@@ -2367,7 +2367,7 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
 	if (!bytes)
 		return 0;
 
-	iov_iter_init(&ii, iov, nr_segs, bytes, 0);
+	iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes);
 
 	while (iov_iter_count(&ii)) {
 		struct page *page = pages[page_idx++];
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index a352bc6d613f..ead8f44f7973 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -173,7 +173,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t result;
 	struct iov_iter to;
 
-	iov_iter_init(&to, iov, nr_segs, count, 0);
+	iov_iter_init(&to, READ, iov, nr_segs, count);
 
 	if (iocb->ki_filp->f_flags & O_DIRECT)
 		return nfs_file_direct_read(iocb, &to, pos, true);
@@ -648,7 +648,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t result;
 	size_t count = iov_length(iov, nr_segs);
 	struct iov_iter from;
-	iov_iter_init(&from, iov, nr_segs, count, 0);
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
 
 	result = nfs_key_timeout_notify(file, inode);
 	if (result)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index d33c4ced0baf..9ce9ed7615c1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2361,7 +2361,7 @@ relock:
 	if (ret)
 		goto out_dio;
 
-	iov_iter_init(&from, iov, nr_segs, count, 0);
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
 	if (direct_io) {
 		written = generic_file_direct_write(iocb, &from, *ppos,
 						    count, ocount);
diff --git a/fs/pipe.c b/fs/pipe.c
index 034bffac3f97..cd4ccf07e772 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -287,7 +287,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 	if (unlikely(total_len == 0))
 		return 0;
 
-	iov_iter_init(&iter, iov, nr_segs, total_len, 0);
+	iov_iter_init(&iter, READ, iov, nr_segs, total_len);
 
 	do_wakeup = 0;
 	ret = 0;
diff --git a/fs/splice.c b/fs/splice.c
index 9bc07d2b53cf..f99e420744c7 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1548,7 +1548,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
 	if (ret <= 0)
 		return ret;
 
-	iov_iter_init(&iter, iov, nr_segs, count, 0);
+	iov_iter_init(&iter, READ, iov, nr_segs, count);
 
 	sd.len = 0;
 	sd.total_len = count;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f0f8084a67be..762bb3e148a6 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -697,7 +697,7 @@ xfs_file_dio_aio_write(
 	}
 
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-	iov_iter_init(&from, iovp, nr_segs, count, 0);
+	iov_iter_init(&from, WRITE, iovp, nr_segs, count);
 	ret = generic_file_direct_write(iocb, &from, pos, count, ocount);
 
 out:
@@ -731,7 +731,7 @@ xfs_file_buffered_aio_write(
 	if (ret)
 		goto out;
 
-	iov_iter_init(&from, iovp, nr_segs, count, 0);
+	iov_iter_init(&from, WRITE, iovp, nr_segs, count);
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = mapping->backing_dev_info;
 
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 4ee17413fe1b..b80bbe197d13 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -20,6 +20,7 @@ struct kvec {
 };
 
 struct iov_iter {
+	int type;
 	const struct iovec *iov;
 	unsigned long nr_segs;
 	size_t iov_offset;
@@ -68,18 +69,8 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
-
-static inline void iov_iter_init(struct iov_iter *i,
-			const struct iovec *iov, unsigned long nr_segs,
-			size_t count, size_t written)
-{
-	i->iov = iov;
-	i->nr_segs = nr_segs;
-	i->iov_offset = 0;
-	i->count = count + written;
-
-	iov_iter_advance(i, written);
-}
+void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
+			unsigned long nr_segs, size_t count);
 
 static inline size_t iov_iter_count(struct iov_iter *i)
 {
diff --git a/mm/filemap.c b/mm/filemap.c
index a7f79e90209c..3aeaf2df4135 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1730,7 +1730,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	size_t count = iov_length(iov, nr_segs);
 	struct iov_iter i;
 
-	iov_iter_init(&i, iov, nr_segs, count, 0);
+	iov_iter_init(&i, READ, iov, nr_segs, count);
 	return generic_file_read_iter(iocb, &i);
 }
 EXPORT_SYMBOL(generic_file_aio_read);
@@ -2596,7 +2596,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (err)
 		goto out;
 
-	iov_iter_init(&from, iov, nr_segs, count, 0);
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
 
 	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
 	if (unlikely(file->f_flags & O_DIRECT)) {
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 2f762cc21080..e2c9a2db4350 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -220,3 +220,18 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 	return res;
 }
 EXPORT_SYMBOL(iov_iter_alignment);
+
+void iov_iter_init(struct iov_iter *i, int direction,
+			const struct iovec *iov, unsigned long nr_segs,
+			size_t count)
+{
+	/* It will get better.  Eventually... */
+	if (segment_eq(get_fs(), KERNEL_DS))
+		direction |= REQ_KERNEL;
+	i->type = direction;
+	i->iov = iov;
+	i->nr_segs = nr_segs;
+	i->iov_offset = 0;
+	i->count = count;
+}
+EXPORT_SYMBOL(iov_iter_init);
diff --git a/mm/page_io.c b/mm/page_io.c
index 0ed0644c73db..313bfedb75d1 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -268,7 +268,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);
 		kiocb.ki_nbytes = PAGE_SIZE;
-		iov_iter_init(&from, &iov, 1, PAGE_SIZE, 0);
+		iov_iter_init(&from, KERNEL_WRITE, &iov, 1, PAGE_SIZE);
 
 		set_page_writeback(page);
 		unlock_page(page);
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index f32b1fbbfe69..5077afcd9e11 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -274,7 +274,7 @@ static ssize_t process_vm_rw(pid_t pid,
 	if (rc <= 0)
 		goto free_iovecs;
 
-	iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
+	iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
 
 	rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
 				   iovstack_r, &iov_r);
@@ -337,7 +337,7 @@ compat_process_vm_rw(compat_pid_t pid,
 						  &iov_l);
 	if (rc <= 0)
 		goto free_iovecs;
-	iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
+	iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
 	rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
 					  UIO_FASTIOV, iovstack_r,
 					  &iov_r);
diff --git a/mm/shmem.c b/mm/shmem.c
index 2a93e625adaf..e0b76696c3f9 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1417,7 +1417,7 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb,
 	loff_t *ppos = &iocb->ki_pos;
 	struct iov_iter iter;
 
-	iov_iter_init(&iter, iov, nr_segs, count, 0);
+	iov_iter_init(&iter, READ, iov, nr_segs, count);
 
 	/*
 	 * Might this read be for a stacking filesystem?  Then when reading
-- 
cgit v1.2.3-71-gd317


From 7b2c99d15559e285384c742db52316802e24b0bd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 15 Mar 2014 04:05:57 -0400
Subject: new helper: iov_iter_get_pages()

iov_iter_get_pages(iter, pages, maxsize, &start) grabs references pinning
the pages of up to maxsize of (contiguous) data from iter.  Returns the
amount of memory grabbed or -error.  In case of success, the requested
area begins at offset start in pages[0] and runs through pages[1], etc.
Less than requested amount might be returned - either because the contiguous
area in the beginning of iterator is smaller than requested, or because
the kernel failed to pin that many pages.

direct-io.c switched to using iov_iter_get_pages()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/direct-io.c      | 111 ++++++++++++++++++----------------------------------
 include/linux/uio.h |   2 +
 mm/iov_iter.c       |  27 +++++++++++++
 3 files changed, 67 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 387d91989c45..4b410d58faae 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -77,7 +77,6 @@ struct dio_submit {
 	unsigned blocks_available;	/* At block_in_file.  changes */
 	int reap_counter;		/* rate limit reaping */
 	sector_t final_block_in_request;/* doesn't change */
-	unsigned first_block_in_page;	/* doesn't change, Used only once */
 	int boundary;			/* prev block is at a boundary */
 	get_block_t *get_block;		/* block mapping function */
 	dio_submit_t *submit_io;	/* IO submition function */
@@ -98,19 +97,14 @@ struct dio_submit {
 	sector_t cur_page_block;	/* Where it starts */
 	loff_t cur_page_fs_offset;	/* Offset in file */
 
-	/*
-	 * Page fetching state. These variables belong to dio_refill_pages().
-	 */
-	int curr_page;			/* changes */
-	int total_pages;		/* doesn't change */
-	unsigned long curr_user_address;/* changes */
-
+	struct iov_iter *iter;
 	/*
 	 * Page queue.  These variables belong to dio_refill_pages() and
 	 * dio_get_page().
 	 */
 	unsigned head;			/* next page to process */
 	unsigned tail;			/* last valid page + 1 */
+	size_t from, to;
 };
 
 /* dio_state communicated between submission path and end_io */
@@ -163,15 +157,10 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio)
  */
 static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 {
-	int ret;
-	int nr_pages;
+	ssize_t ret;
 
-	nr_pages = min(sdio->total_pages - sdio->curr_page, DIO_PAGES);
-	ret = get_user_pages_fast(
-		sdio->curr_user_address,		/* Where from? */
-		nr_pages,			/* How many pages? */
-		dio->rw == READ,		/* Write to memory? */
-		&dio->pages[0]);		/* Put results here */
+	ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE,
+				&sdio->from);
 
 	if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
 		struct page *page = ZERO_PAGE(0);
@@ -186,18 +175,19 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 		dio->pages[0] = page;
 		sdio->head = 0;
 		sdio->tail = 1;
-		ret = 0;
-		goto out;
+		sdio->from = 0;
+		sdio->to = PAGE_SIZE;
+		return 0;
 	}
 
 	if (ret >= 0) {
-		sdio->curr_user_address += ret * PAGE_SIZE;
-		sdio->curr_page += ret;
+		iov_iter_advance(sdio->iter, ret);
+		ret += sdio->from;
 		sdio->head = 0;
-		sdio->tail = ret;
-		ret = 0;
+		sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
+		sdio->to = ((ret - 1) & (PAGE_SIZE - 1)) + 1;
+		return 0;
 	}
-out:
 	return ret;	
 }
 
@@ -208,8 +198,9 @@ out:
  * L1 cache.
  */
 static inline struct page *dio_get_page(struct dio *dio,
-		struct dio_submit *sdio)
+		struct dio_submit *sdio, size_t *from, size_t *to)
 {
+	int n;
 	if (dio_pages_present(sdio) == 0) {
 		int ret;
 
@@ -218,7 +209,10 @@ static inline struct page *dio_get_page(struct dio *dio,
 			return ERR_PTR(ret);
 		BUG_ON(dio_pages_present(sdio) == 0);
 	}
-	return dio->pages[sdio->head++];
+	n = sdio->head++;
+	*from = n ? 0 : sdio->from;
+	*to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
+	return dio->pages[n];
 }
 
 /**
@@ -422,8 +416,8 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
  */
 static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
 {
-	while (dio_pages_present(sdio))
-		page_cache_release(dio_get_page(dio, sdio));
+	while (sdio->head < sdio->tail)
+		page_cache_release(dio->pages[sdio->head++]);
 }
 
 /*
@@ -912,23 +906,18 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 			struct buffer_head *map_bh)
 {
 	const unsigned blkbits = sdio->blkbits;
-	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
-	struct page *page;
-	unsigned block_in_page;
 	int ret = 0;
 
-	/* The I/O can start at any block offset within the first page */
-	block_in_page = sdio->first_block_in_page;
-
 	while (sdio->block_in_file < sdio->final_block_in_request) {
-		page = dio_get_page(dio, sdio);
+		struct page *page;
+		size_t from, to;
+		page = dio_get_page(dio, sdio, &from, &to);
 		if (IS_ERR(page)) {
 			ret = PTR_ERR(page);
 			goto out;
 		}
 
-		while (block_in_page < blocks_per_page) {
-			unsigned offset_in_page = block_in_page << blkbits;
+		while (from < to) {
 			unsigned this_chunk_bytes;	/* # of bytes mapped */
 			unsigned this_chunk_blocks;	/* # of blocks */
 			unsigned u;
@@ -999,10 +988,9 @@ do_holes:
 					page_cache_release(page);
 					goto out;
 				}
-				zero_user(page, block_in_page << blkbits,
-						1 << blkbits);
+				zero_user(page, from, 1 << blkbits);
 				sdio->block_in_file++;
-				block_in_page++;
+				from += 1 << blkbits;
 				dio->result += 1 << blkbits;
 				goto next_block;
 			}
@@ -1020,7 +1008,7 @@ do_holes:
 			 * can add to this page
 			 */
 			this_chunk_blocks = sdio->blocks_available;
-			u = (PAGE_SIZE - offset_in_page) >> blkbits;
+			u = (to - from) >> blkbits;
 			if (this_chunk_blocks > u)
 				this_chunk_blocks = u;
 			u = sdio->final_block_in_request - sdio->block_in_file;
@@ -1032,7 +1020,7 @@ do_holes:
 			if (this_chunk_blocks == sdio->blocks_available)
 				sdio->boundary = buffer_boundary(map_bh);
 			ret = submit_page_section(dio, sdio, page,
-						  offset_in_page,
+						  from,
 						  this_chunk_bytes,
 						  sdio->next_block_for_io,
 						  map_bh);
@@ -1043,9 +1031,9 @@ do_holes:
 			sdio->next_block_for_io += this_chunk_blocks;
 
 			sdio->block_in_file += this_chunk_blocks;
-			block_in_page += this_chunk_blocks;
+			from += this_chunk_bytes;
+			dio->result += this_chunk_bytes;
 			sdio->blocks_available -= this_chunk_blocks;
-			dio->result += this_chunk_blocks << blkbits;
 next_block:
 			BUG_ON(sdio->block_in_file > sdio->final_block_in_request);
 			if (sdio->block_in_file == sdio->final_block_in_request)
@@ -1054,7 +1042,6 @@ next_block:
 
 		/* Drop the ref which was taken in get_user_pages() */
 		page_cache_release(page);
-		block_in_page = 0;
 	}
 out:
 	return ret;
@@ -1122,7 +1109,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct dio *dio;
 	struct dio_submit sdio = { 0, };
 	unsigned long user_addr;
-	size_t bytes;
 	struct buffer_head map_bh = { 0, };
 	struct blk_plug plug;
 	unsigned long align = offset | iov_iter_alignment(iter);
@@ -1234,6 +1220,10 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	spin_lock_init(&dio->bio_lock);
 	dio->refcount = 1;
 
+	sdio.iter = iter;
+	sdio.final_block_in_request =
+		(offset + iov_iter_count(iter)) >> blkbits;
+
 	/*
 	 * In case of non-aligned buffers, we may need 2 more
 	 * pages since we need to zero out first and last block.
@@ -1250,34 +1240,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 
 	blk_start_plug(&plug);
 
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		user_addr = (unsigned long)iter->iov[seg].iov_base;
-		sdio.size += bytes = iter->iov[seg].iov_len;
-
-		/* Index into the first page of the first block */
-		sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
-		sdio.final_block_in_request = sdio.block_in_file +
-						(bytes >> blkbits);
-		/* Page fetching state */
-		sdio.head = 0;
-		sdio.tail = 0;
-		sdio.curr_page = 0;
-
-		sdio.total_pages = 0;
-		if (user_addr & (PAGE_SIZE-1)) {
-			sdio.total_pages++;
-			bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1));
-		}
-		sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
-		sdio.curr_user_address = user_addr;
-
-		retval = do_direct_IO(dio, &sdio, &map_bh);
-
-		if (retval) {
-			dio_cleanup(dio, &sdio);
-			break;
-		}
-	} /* end iovec loop */
+	retval = do_direct_IO(dio, &sdio, &map_bh);
+	if (retval)
+		dio_cleanup(dio, &sdio);
 
 	if (retval == -ENOTBLK) {
 		/*
diff --git a/include/linux/uio.h b/include/linux/uio.h
index b80bbe197d13..341986116d83 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -71,6 +71,8 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
+ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
+			size_t maxsize, size_t *start);
 
 static inline size_t iov_iter_count(struct iov_iter *i)
 {
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index e2c9a2db4350..45204cd5ccd8 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -235,3 +235,30 @@ void iov_iter_init(struct iov_iter *i, int direction,
 	i->count = count;
 }
 EXPORT_SYMBOL(iov_iter_init);
+
+ssize_t iov_iter_get_pages(struct iov_iter *i,
+		   struct page **pages, size_t maxsize,
+		   size_t *start)
+{
+	size_t offset = i->iov_offset;
+	const struct iovec *iov = i->iov;
+	size_t len;
+	unsigned long addr;
+	int n;
+	int res;
+
+	len = iov->iov_len - offset;
+	if (len > i->count)
+		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
+	addr = (unsigned long)iov->iov_base + offset;
+	len += *start = addr & (PAGE_SIZE - 1);
+	addr &= ~(PAGE_SIZE - 1);
+	n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
+	res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
+	if (unlikely(res < 0))
+		return res;
+	return (res == n ? len : res * PAGE_SIZE) - *start;
+}
+EXPORT_SYMBOL(iov_iter_get_pages);
-- 
cgit v1.2.3-71-gd317


From f67da30c1d5fc9e341bc8121708874bfd7b31e45 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 19 Mar 2014 01:16:16 -0400
Subject: new helper: iov_iter_npages()

counts the pages covered by iov_iter, up to given limit.
do_block_direct_io() and fuse_iter_npages() switched to
it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/direct-io.c      |  9 +--------
 fs/fuse/file.c      | 16 ++--------------
 include/linux/uio.h |  1 +
 mm/iov_iter.c       | 27 +++++++++++++++++++++++++++
 4 files changed, 31 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 4b410d58faae..98040ba388ac 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1100,7 +1100,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags)
 {
-	int seg;
 	unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
 	unsigned blkbits = i_blkbits;
 	unsigned blocksize_mask = (1 << blkbits) - 1;
@@ -1108,7 +1107,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	loff_t end = offset + iov_iter_count(iter);
 	struct dio *dio;
 	struct dio_submit sdio = { 0, };
-	unsigned long user_addr;
 	struct buffer_head map_bh = { 0, };
 	struct blk_plug plug;
 	unsigned long align = offset | iov_iter_alignment(iter);
@@ -1231,12 +1229,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	if (unlikely(sdio.blkfactor))
 		sdio.pages_in_io = 2;
 
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		user_addr = (unsigned long)iter->iov[seg].iov_base;
-		sdio.pages_in_io +=
-			((user_addr + iter->iov[seg].iov_len + PAGE_SIZE-1) /
-				PAGE_SIZE - user_addr / PAGE_SIZE);
-	}
+	sdio.pages_in_io += iov_iter_npages(iter, INT_MAX);
 
 	blk_start_plug(&plug);
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7db564d18dc6..7026014717bc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1310,7 +1310,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 
 	while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
 		unsigned npages;
-		size_t start, end, frag_size;
+		size_t start;
 		unsigned n = req->max_pages - req->num_pages;
 		ssize_t ret = iov_iter_get_pages(ii,
 					&req->pages[req->num_pages],
@@ -1344,19 +1344,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 
 static inline int fuse_iter_npages(const struct iov_iter *ii_p)
 {
-	struct iov_iter ii = *ii_p;
-	int npages = 0;
-
-	while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) {
-		unsigned long user_addr = fuse_get_user_addr(&ii);
-		unsigned offset = user_addr & ~PAGE_MASK;
-		size_t frag_size = iov_iter_single_seg_count(&ii);
-
-		npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-		iov_iter_advance(&ii, frag_size);
-	}
-
-	return min(npages, FUSE_MAX_PAGES_PER_REQ);
+	return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
 }
 
 ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 341986116d83..2f8825b06680 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -73,6 +73,7 @@ void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 			size_t maxsize, size_t *start);
+int iov_iter_npages(const struct iov_iter *i, int maxpages);
 
 static inline size_t iov_iter_count(struct iov_iter *i)
 {
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 45204cd5ccd8..0b677f8f9bad 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -262,3 +262,30 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 	return (res == n ? len : res * PAGE_SIZE) - *start;
 }
 EXPORT_SYMBOL(iov_iter_get_pages);
+
+int iov_iter_npages(const struct iov_iter *i, int maxpages)
+{
+	size_t offset = i->iov_offset;
+	size_t size = i->count;
+	const struct iovec *iov = i->iov;
+	int npages = 0;
+	int n;
+
+	for (n = 0; size && n < i->nr_segs; n++, iov++) {
+		unsigned long addr = (unsigned long)iov->iov_base + offset;
+		size_t len = iov->iov_len - offset;
+		offset = 0;
+		if (unlikely(!len))	/* empty segment */
+			continue;
+		if (len > size)
+			len = size;
+		npages += (addr + len + PAGE_SIZE - 1) / PAGE_SIZE
+			  - addr / PAGE_SIZE;
+		if (npages >= maxpages)	/* don't bother going further */
+			return maxpages;
+		size -= len;
+		offset = 0;
+	}
+	return min(npages, maxpages);
+}
+EXPORT_SYMBOL(iov_iter_npages);
-- 
cgit v1.2.3-71-gd317


From 91f79c43d1b54d7154b118860d81b39bad07dfff Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 21 Mar 2014 04:58:33 -0400
Subject: new helper: iov_iter_get_pages_alloc()

same as iov_iter_get_pages(), except that pages array is allocated
(kmalloc if possible, vmalloc if that fails) and left for caller to
free.  Lustre and NFS ->direct_IO() switched to it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/lustre/lustre/llite/rw26.c |  92 ++++-----
 fs/nfs/direct.c                            | 290 +++++++++--------------------
 include/linux/uio.h                        |   2 +
 mm/iov_iter.c                              |  40 ++++
 4 files changed, 167 insertions(+), 257 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index f718585c9e08..6b5994577b6b 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -218,14 +218,11 @@ static void ll_free_user_pages(struct page **pages, int npages, int do_dirty)
 	int i;
 
 	for (i = 0; i < npages; i++) {
-		if (pages[i] == NULL)
-			break;
 		if (do_dirty)
 			set_page_dirty_lock(pages[i]);
 		page_cache_release(pages[i]);
 	}
-
-	OBD_FREE_LARGE(pages, npages * sizeof(*pages));
+	kvfree(pages);
 }
 
 ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
@@ -370,10 +367,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	struct ccc_object *obj = cl_inode2ccc(inode);
-	long count = iov_iter_count(iter);
-	long tot_bytes = 0, result = 0;
+	ssize_t count = iov_iter_count(iter);
+	ssize_t tot_bytes = 0, result = 0;
 	struct ll_inode_info *lli = ll_i2info(inode);
-	unsigned long seg = 0;
 	long size = MAX_DIO_SIZE;
 	int refcheck;
 
@@ -407,63 +403,49 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 		mutex_lock(&inode->i_mutex);
 
 	LASSERT(obj->cob_transient_pages == 0);
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		long iov_left = iter->iov[seg].iov_len;
-		unsigned long user_addr = (unsigned long)iter->iov[seg].iov_base;
+	while (iov_iter_count(iter)) {
+		struct page **pages;
+		size_t offs;
 
+		count = min_t(size_t, iov_iter_count(iter), size);
 		if (rw == READ) {
 			if (file_offset >= i_size_read(inode))
 				break;
-			if (file_offset + iov_left > i_size_read(inode))
-				iov_left = i_size_read(inode) - file_offset;
+			if (file_offset + count > i_size_read(inode))
+				count = i_size_read(inode) - file_offset;
 		}
 
-		while (iov_left > 0) {
-			struct page **pages;
-			int page_count, max_pages = 0;
-			long bytes;
-
-			bytes = min(size, iov_left);
-			page_count = ll_get_user_pages(rw, user_addr, bytes,
-						       &pages, &max_pages);
-			if (likely(page_count > 0)) {
-				if (unlikely(page_count <  max_pages))
-					bytes = page_count << PAGE_CACHE_SHIFT;
-				result = ll_direct_IO_26_seg(env, io, rw, inode,
-							     file->f_mapping,
-							     bytes, file_offset,
-							     pages, page_count);
-				ll_free_user_pages(pages, max_pages, rw==READ);
-			} else if (page_count == 0) {
-				GOTO(out, result = -EFAULT);
-			} else {
-				result = page_count;
-			}
-			if (unlikely(result <= 0)) {
-				/* If we can't allocate a large enough buffer
-				 * for the request, shrink it to a smaller
-				 * PAGE_SIZE multiple and try again.
-				 * We should always be able to kmalloc for a
-				 * page worth of page pointers = 4MB on i386. */
-				if (result == -ENOMEM &&
-				    size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
-					   PAGE_CACHE_SIZE) {
-					size = ((((size / 2) - 1) |
-						 ~CFS_PAGE_MASK) + 1) &
-						CFS_PAGE_MASK;
-					CDEBUG(D_VFSTRACE,"DIO size now %lu\n",
-					       size);
-					continue;
-				}
-
-				GOTO(out, result);
+		result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
+		if (likely(result > 0)) {
+			int n = (result + offs + PAGE_SIZE - 1) / PAGE_SIZE;
+			result = ll_direct_IO_26_seg(env, io, rw, inode,
+						     file->f_mapping,
+						     result, file_offset,
+						     pages, n);
+			ll_free_user_pages(pages, n, rw==READ);
+		}
+		if (unlikely(result <= 0)) {
+			/* If we can't allocate a large enough buffer
+			 * for the request, shrink it to a smaller
+			 * PAGE_SIZE multiple and try again.
+			 * We should always be able to kmalloc for a
+			 * page worth of page pointers = 4MB on i386. */
+			if (result == -ENOMEM &&
+			    size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
+				   PAGE_CACHE_SIZE) {
+				size = ((((size / 2) - 1) |
+					 ~CFS_PAGE_MASK) + 1) &
+					CFS_PAGE_MASK;
+				CDEBUG(D_VFSTRACE,"DIO size now %lu\n",
+				       size);
+				continue;
 			}
 
-			tot_bytes += result;
-			file_offset += result;
-			iov_left -= result;
-			user_addr += result;
+			GOTO(out, result);
 		}
+		iov_iter_advance(iter, result);
+		tot_bytes += result;
+		file_offset += result;
 	}
 out:
 	LASSERT(obj->cob_transient_pages == 0);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1d34f454989e..b122fe21fea0 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -322,60 +322,37 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
  * handled automatically by nfs_direct_read_result().  Otherwise, if
  * no requests have been sent, just return an error.
  */
-static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
-						const struct iovec *iov,
-						loff_t pos, bool uio)
-{
-	struct nfs_direct_req *dreq = desc->pg_dreq;
-	struct nfs_open_context *ctx = dreq->ctx;
-	struct inode *inode = ctx->dentry->d_inode;
-	unsigned long user_addr = (unsigned long)iov->iov_base;
-	size_t count = iov->iov_len;
-	size_t rsize = NFS_SERVER(inode)->rsize;
-	unsigned int pgbase;
-	int result;
-	ssize_t started = 0;
-	struct page **pagevec = NULL;
-	unsigned int npages;
-
-	do {
-		size_t bytes;
-		int i;
 
-		pgbase = user_addr & ~PAGE_MASK;
-		bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
+static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
+					      struct iov_iter *iter,
+					      loff_t pos)
+{
+	struct nfs_pageio_descriptor desc;
+	struct inode *inode = dreq->inode;
+	ssize_t result = -EINVAL;
+	size_t requested_bytes = 0;
+	size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE);
 
-		result = -ENOMEM;
-		npages = nfs_page_array_len(pgbase, bytes);
-		if (!pagevec)
-			pagevec = kmalloc(npages * sizeof(struct page *),
-					  GFP_KERNEL);
-		if (!pagevec)
-			break;
-		if (uio) {
-			down_read(&current->mm->mmap_sem);
-			result = get_user_pages(current, current->mm, user_addr,
-					npages, 1, 0, pagevec, NULL);
-			up_read(&current->mm->mmap_sem);
-			if (result < 0)
-				break;
-		} else {
-			WARN_ON(npages != 1);
-			result = get_kernel_page(user_addr, 1, pagevec);
-			if (WARN_ON(result != 1))
-				break;
-		}
+	NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
+			     &nfs_direct_read_completion_ops);
+	get_dreq(dreq);
+	desc.pg_dreq = dreq;
+	atomic_inc(&inode->i_dio_count);
 
-		if ((unsigned)result < npages) {
-			bytes = result * PAGE_SIZE;
-			if (bytes <= pgbase) {
-				nfs_direct_release_pages(pagevec, result);
-				break;
-			}
-			bytes -= pgbase;
-			npages = result;
-		}
+	while (iov_iter_count(iter)) {
+		struct page **pagevec;
+		size_t bytes;
+		size_t pgbase;
+		unsigned npages, i;
 
+		result = iov_iter_get_pages_alloc(iter, &pagevec, 
+						  rsize, &pgbase);
+		if (result < 0)
+			break;
+	
+		bytes = result;
+		iov_iter_advance(iter, bytes);
+		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
 		for (i = 0; i < npages; i++) {
 			struct nfs_page *req;
 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
@@ -389,55 +366,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
 			}
 			req->wb_index = pos >> PAGE_SHIFT;
 			req->wb_offset = pos & ~PAGE_MASK;
-			if (!nfs_pageio_add_request(desc, req)) {
-				result = desc->pg_error;
+			if (!nfs_pageio_add_request(&desc, req)) {
+				result = desc.pg_error;
 				nfs_release_request(req);
 				break;
 			}
 			pgbase = 0;
 			bytes -= req_len;
-			started += req_len;
-			user_addr += req_len;
+			requested_bytes += req_len;
 			pos += req_len;
-			count -= req_len;
 			dreq->bytes_left -= req_len;
 		}
-		/* The nfs_page now hold references to these pages */
 		nfs_direct_release_pages(pagevec, npages);
-	} while (count != 0 && result >= 0);
-
-	kfree(pagevec);
-
-	if (started)
-		return started;
-	return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
-static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
-					      struct iov_iter *iter,
-					      loff_t pos, bool uio)
-{
-	struct nfs_pageio_descriptor desc;
-	struct inode *inode = dreq->inode;
-	ssize_t result = -EINVAL;
-	size_t requested_bytes = 0;
-	unsigned long seg;
-
-	NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
-			     &nfs_direct_read_completion_ops);
-	get_dreq(dreq);
-	desc.pg_dreq = dreq;
-	atomic_inc(&inode->i_dio_count);
-
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		const struct iovec *vec = &iter->iov[seg];
-		result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
+		kvfree(pagevec);
 		if (result < 0)
 			break;
-		requested_bytes += result;
-		if ((size_t)result < vec->iov_len)
-			break;
-		pos += vec->iov_len;
 	}
 
 	nfs_pageio_complete(&desc);
@@ -521,7 +464,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
 		dreq->iocb = iocb;
 
 	NFS_I(inode)->read_io += count;
-	result = nfs_direct_read_schedule_iovec(dreq, iter, pos, uio);
+	result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
 
 	mutex_unlock(&inode->i_mutex);
 
@@ -677,109 +620,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 }
 #endif
 
-/*
- * NB: Return the value of the first error return code.  Subsequent
- *     errors after the first one are ignored.
- */
-/*
- * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
- * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
- * bail and stop sending more writes.  Write length accounting is
- * handled automatically by nfs_direct_write_result().  Otherwise, if
- * no requests have been sent, just return an error.
- */
-static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
-						 const struct iovec *iov,
-						 loff_t pos, bool uio)
-{
-	struct nfs_direct_req *dreq = desc->pg_dreq;
-	struct nfs_open_context *ctx = dreq->ctx;
-	struct inode *inode = ctx->dentry->d_inode;
-	unsigned long user_addr = (unsigned long)iov->iov_base;
-	size_t count = iov->iov_len;
-	size_t wsize = NFS_SERVER(inode)->wsize;
-	unsigned int pgbase;
-	int result;
-	ssize_t started = 0;
-	struct page **pagevec = NULL;
-	unsigned int npages;
-
-	do {
-		size_t bytes;
-		int i;
-
-		pgbase = user_addr & ~PAGE_MASK;
-		bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
-
-		result = -ENOMEM;
-		npages = nfs_page_array_len(pgbase, bytes);
-		if (!pagevec)
-			pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
-		if (!pagevec)
-			break;
-
-		if (uio) {
-			down_read(&current->mm->mmap_sem);
-			result = get_user_pages(current, current->mm, user_addr,
-						npages, 0, 0, pagevec, NULL);
-			up_read(&current->mm->mmap_sem);
-			if (result < 0)
-				break;
-		} else {
-			WARN_ON(npages != 1);
-			result = get_kernel_page(user_addr, 0, pagevec);
-			if (WARN_ON(result != 1))
-				break;
-		}
-
-		if ((unsigned)result < npages) {
-			bytes = result * PAGE_SIZE;
-			if (bytes <= pgbase) {
-				nfs_direct_release_pages(pagevec, result);
-				break;
-			}
-			bytes -= pgbase;
-			npages = result;
-		}
-
-		for (i = 0; i < npages; i++) {
-			struct nfs_page *req;
-			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
-
-			req = nfs_create_request(dreq->ctx, dreq->inode,
-						 pagevec[i],
-						 pgbase, req_len);
-			if (IS_ERR(req)) {
-				result = PTR_ERR(req);
-				break;
-			}
-			nfs_lock_request(req);
-			req->wb_index = pos >> PAGE_SHIFT;
-			req->wb_offset = pos & ~PAGE_MASK;
-			if (!nfs_pageio_add_request(desc, req)) {
-				result = desc->pg_error;
-				nfs_unlock_and_release_request(req);
-				break;
-			}
-			pgbase = 0;
-			bytes -= req_len;
-			started += req_len;
-			user_addr += req_len;
-			pos += req_len;
-			count -= req_len;
-			dreq->bytes_left -= req_len;
-		}
-		/* The nfs_page now hold references to these pages */
-		nfs_direct_release_pages(pagevec, npages);
-	} while (count != 0 && result >= 0);
-
-	kfree(pagevec);
-
-	if (started)
-		return started;
-	return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
 static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_direct_req *dreq = hdr->dreq;
@@ -859,15 +699,27 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
 	.completion = nfs_direct_write_completion,
 };
 
+
+/*
+ * NB: Return the value of the first error return code.  Subsequent
+ *     errors after the first one are ignored.
+ */
+/*
+ * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+ * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
+ * bail and stop sending more writes.  Write length accounting is
+ * handled automatically by nfs_direct_write_result().  Otherwise, if
+ * no requests have been sent, just return an error.
+ */
 static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 					       struct iov_iter *iter,
-					       loff_t pos, bool uio)
+					       loff_t pos)
 {
 	struct nfs_pageio_descriptor desc;
 	struct inode *inode = dreq->inode;
 	ssize_t result = 0;
 	size_t requested_bytes = 0;
-	unsigned long seg;
+	size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
 
 	NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
 			      &nfs_direct_write_completion_ops);
@@ -875,16 +727,50 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 	get_dreq(dreq);
 	atomic_inc(&inode->i_dio_count);
 
-	NFS_I(dreq->inode)->write_io += iov_iter_count(iter);
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		const struct iovec *vec = &iter->iov[seg];
-		result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
+	NFS_I(inode)->write_io += iov_iter_count(iter);
+	while (iov_iter_count(iter)) {
+		struct page **pagevec;
+		size_t bytes;
+		size_t pgbase;
+		unsigned npages, i;
+
+		result = iov_iter_get_pages_alloc(iter, &pagevec, 
+						  wsize, &pgbase);
 		if (result < 0)
 			break;
-		requested_bytes += result;
-		if ((size_t)result < vec->iov_len)
+
+		bytes = result;
+		iov_iter_advance(iter, bytes);
+		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
+		for (i = 0; i < npages; i++) {
+			struct nfs_page *req;
+			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
+
+			req = nfs_create_request(dreq->ctx, inode,
+						 pagevec[i],
+						 pgbase, req_len);
+			if (IS_ERR(req)) {
+				result = PTR_ERR(req);
+				break;
+			}
+			nfs_lock_request(req);
+			req->wb_index = pos >> PAGE_SHIFT;
+			req->wb_offset = pos & ~PAGE_MASK;
+			if (!nfs_pageio_add_request(&desc, req)) {
+				result = desc.pg_error;
+				nfs_unlock_and_release_request(req);
+				break;
+			}
+			pgbase = 0;
+			bytes -= req_len;
+			requested_bytes += req_len;
+			pos += req_len;
+			dreq->bytes_left -= req_len;
+		}
+		nfs_direct_release_pages(pagevec, npages);
+		kvfree(pagevec);
+		if (result < 0)
 			break;
-		pos += vec->iov_len;
 	}
 	nfs_pageio_complete(&desc);
 
@@ -985,7 +871,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	result = nfs_direct_write_schedule_iovec(dreq, iter, pos, uio);
+	result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
 
 	if (mapping->nrpages) {
 		invalidate_inode_pages2_range(mapping,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 2f8825b06680..4876e9f2a58f 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -73,6 +73,8 @@ void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 			size_t maxsize, size_t *start);
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
+			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
 
 static inline size_t iov_iter_count(struct iov_iter *i)
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 0b677f8f9bad..a5c691c1a283 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -1,6 +1,8 @@
 #include <linux/export.h>
 #include <linux/uio.h>
 #include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
 
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
@@ -263,6 +265,44 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 }
 EXPORT_SYMBOL(iov_iter_get_pages);
 
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+		   struct page ***pages, size_t maxsize,
+		   size_t *start)
+{
+	size_t offset = i->iov_offset;
+	const struct iovec *iov = i->iov;
+	size_t len;
+	unsigned long addr;
+	void *p;
+	int n;
+	int res;
+
+	len = iov->iov_len - offset;
+	if (len > i->count)
+		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
+	addr = (unsigned long)iov->iov_base + offset;
+	len += *start = addr & (PAGE_SIZE - 1);
+	addr &= ~(PAGE_SIZE - 1);
+	n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
+	
+	p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
+	if (!p)
+		p = vmalloc(n * sizeof(struct page *));
+	if (!p)
+		return -ENOMEM;
+
+	res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
+	if (unlikely(res < 0)) {
+		kvfree(p);
+		return res;
+	}
+	*pages = p;
+	return (res == n ? len : res * PAGE_SIZE) - *start;
+}
+EXPORT_SYMBOL(iov_iter_get_pages_alloc);
+
 int iov_iter_npages(const struct iov_iter *i, int maxpages)
 {
 	size_t offset = i->iov_offset;
-- 
cgit v1.2.3-71-gd317


From 0c949334a9e2581646c6ff0d1470a805b1e5be99 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 22 Mar 2014 06:51:37 -0400
Subject: iov_iter_truncate()

Now It Can Be Done(tm) - we don't need to do iov_shorten() in
generic_file_direct_write() anymore, now that all ->direct_IO()
instances are converted to proper iov_iter methods and honour
iter->count and iter->iov_offset properly.

Get rid of count/ocount arguments of generic_file_direct_write(),
while we are at it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/file.c     | 17 ++++++++---------
 fs/fuse/file.c      | 18 ++++++++----------
 fs/ocfs2/file.c     | 10 +++++-----
 fs/xfs/xfs_file.c   |  9 +++++----
 include/linux/fs.h  |  3 +--
 include/linux/uio.h |  6 ++++++
 mm/filemap.c        | 19 +++++++------------
 7 files changed, 40 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f8cee205618a..ea63a51c148c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1659,8 +1659,7 @@ again:
 
 static ssize_t __btrfs_direct_write(struct kiocb *iocb,
 				    struct iov_iter *from,
-				    loff_t pos,
-				    size_t count, size_t ocount)
+				    loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
 	ssize_t written;
@@ -1668,9 +1667,9 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
 	loff_t endbyte;
 	int err;
 
-	written = generic_file_direct_write(iocb, from, pos, count, ocount);
+	written = generic_file_direct_write(iocb, from, pos);
 
-	if (written < 0 || written == count)
+	if (written < 0 || !iov_iter_count(from))
 		return written;
 
 	pos += written;
@@ -1720,13 +1719,14 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 	u64 end_pos;
 	ssize_t num_written = 0;
 	ssize_t err = 0;
-	size_t count, ocount;
+	size_t count;
 	bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
 	struct iov_iter i;
 
 	mutex_lock(&inode->i_mutex);
 
-	count = ocount = iov_length(iov, nr_segs);
+	count = iov_length(iov, nr_segs);
+	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 
 	current->backing_dev_info = inode->i_mapping->backing_dev_info;
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
@@ -1740,7 +1740,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 		goto out;
 	}
 
-	iov_iter_init(&i, WRITE, iov, nr_segs, count);
+	iov_iter_truncate(&i, count);
 
 	err = file_remove_suid(file);
 	if (err) {
@@ -1783,8 +1783,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 		atomic_inc(&BTRFS_I(inode)->sync_writers);
 
 	if (unlikely(file->f_flags & O_DIRECT)) {
-		num_written = __btrfs_direct_write(iocb, &i,
-						   pos, count, ocount);
+		num_written = __btrfs_direct_write(iocb, &i, pos);
 	} else {
 		num_written = __btrfs_buffered_write(file, &i, pos);
 		if (num_written > 0)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7026014717bc..66d2d5de19d2 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1188,8 +1188,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
-	size_t count = 0;
-	size_t ocount = 0;
+	size_t count;
 	ssize_t written = 0;
 	ssize_t written_buffered = 0;
 	struct inode *inode = mapping->host;
@@ -1208,7 +1207,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
 	WARN_ON(iocb->ki_pos != pos);
 
-	count = ocount = iov_length(iov, nr_segs);
+	count = iov_length(iov, nr_segs);
+	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 	mutex_lock(&inode->i_mutex);
 
 	/* We can write back this queue in page reclaim */
@@ -1217,11 +1217,11 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
 	if (err)
 		goto out;
-	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 
 	if (count == 0)
 		goto out;
 
+	iov_iter_truncate(&i, count);
 	err = file_remove_suid(file);
 	if (err)
 		goto out;
@@ -1231,8 +1231,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		goto out;
 
 	if (file->f_flags & O_DIRECT) {
-		written = generic_file_direct_write(iocb, &i, pos, count, ocount);
-		if (written < 0 || written == count)
+		written = generic_file_direct_write(iocb, &i, pos);
+		if (written < 0 || !iov_iter_count(&i))
 			goto out;
 
 		pos += written;
@@ -1469,8 +1469,7 @@ static ssize_t __fuse_direct_write(struct fuse_io_priv *io,
 
 	res = generic_write_checks(file, ppos, &count, 0);
 	if (!res) {
-		if (iter->count > count)
-			iter->count = count;
+		iov_iter_truncate(iter, count);
 		res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE);
 	}
 
@@ -2896,8 +2895,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 		if (offset >= i_size)
 			return 0;
 		count = min_t(loff_t, count, fuse_round_up(i_size - offset));
-		if (iter->count > count)
-			iter->count = count;
+		iov_iter_truncate(iter, count);
 	}
 
 	io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9ce9ed7615c1..06b6a16d9776 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2241,7 +2241,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 	int ret, direct_io, appending, rw_level, have_alloc_sem  = 0;
 	int can_do_direct, has_refcount = 0;
 	ssize_t written = 0;
-	size_t ocount;		/* original count */
 	size_t count;		/* after file limit checks */
 	loff_t old_size, *ppos = &iocb->ki_pos;
 	u32 old_clusters;
@@ -2253,6 +2252,9 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 	int unaligned_dio = 0;
 	struct iov_iter from;
 
+	count = iov_length(iov, nr_segs);
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
+
 	trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
 		(unsigned long long)OCFS2_I(inode)->ip_blkno,
 		file->f_path.dentry->d_name.len,
@@ -2355,16 +2357,14 @@ relock:
 	/* communicate with ocfs2_dio_end_io */
 	ocfs2_iocb_set_rw_locked(iocb, rw_level);
 
-	count = ocount = iov_length(iov, nr_segs);
 	ret = generic_write_checks(file, ppos, &count,
 				   S_ISBLK(inode->i_mode));
 	if (ret)
 		goto out_dio;
 
-	iov_iter_init(&from, WRITE, iov, nr_segs, count);
+	iov_iter_truncate(&from, count);
 	if (direct_io) {
-		written = generic_file_direct_write(iocb, &from, *ppos,
-						    count, ocount);
+		written = generic_file_direct_write(iocb, &from, *ppos);
 		if (written < 0) {
 			ret = written;
 			goto out_dio;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 762bb3e148a6..c997aa2751b2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -626,7 +626,7 @@ xfs_file_dio_aio_write(
 	const struct iovec	*iovp,
 	unsigned long		nr_segs,
 	loff_t			pos,
-	size_t			ocount)
+	size_t			count)
 {
 	struct file		*file = iocb->ki_filp;
 	struct address_space	*mapping = file->f_mapping;
@@ -634,7 +634,6 @@ xfs_file_dio_aio_write(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			ret = 0;
-	size_t			count = ocount;
 	int			unaligned_io = 0;
 	int			iolock;
 	struct xfs_buftarg	*target = XFS_IS_REALTIME_INODE(ip) ?
@@ -645,6 +644,8 @@ xfs_file_dio_aio_write(
 	if ((pos | count) & target->bt_logical_sectormask)
 		return -XFS_ERROR(EINVAL);
 
+	iov_iter_init(&from, WRITE, iovp, nr_segs, count);
+
 	/* "unaligned" here means not aligned to a filesystem block */
 	if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
 		unaligned_io = 1;
@@ -676,6 +677,7 @@ xfs_file_dio_aio_write(
 	ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
 	if (ret)
 		goto out;
+	iov_iter_truncate(&from, count);
 
 	if (mapping->nrpages) {
 		ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
@@ -697,8 +699,7 @@ xfs_file_dio_aio_write(
 	}
 
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-	iov_iter_init(&from, WRITE, iovp, nr_segs, count);
-	ret = generic_file_direct_write(iocb, &from, pos, count, ocount);
+	ret = generic_file_direct_write(iocb, &from, pos);
 
 out:
 	xfs_rw_iunlock(ip, iolock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d096ebc7f348..8153396d19b4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2407,8 +2407,7 @@ extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsig
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *,
-		loff_t, size_t, size_t);
+extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 4876e9f2a58f..532f59d0adbb 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -82,6 +82,12 @@ static inline size_t iov_iter_count(struct iov_iter *i)
 	return i->count;
 }
 
+static inline void iov_iter_truncate(struct iov_iter *i, size_t count)
+{
+	if (i->count > count)
+		i->count = count;
+}
+
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 3aeaf2df4135..c0404b763a17 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2345,8 +2345,7 @@ int pagecache_write_end(struct file *file, struct address_space *mapping,
 EXPORT_SYMBOL(pagecache_write_end);
 
 ssize_t
-generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
-		loff_t pos, size_t count, size_t ocount)
+generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 {
 	struct file	*file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -2356,10 +2355,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	pgoff_t		end;
 	struct iov_iter data;
 
-	if (count != ocount)
-		from->nr_segs = iov_shorten((struct iovec *)from->iov, from->nr_segs, count);
-
-	write_len = iov_length(from->iov, from->nr_segs);
+	write_len = iov_iter_count(from);
 	end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
 
 	written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
@@ -2568,7 +2564,6 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space * mapping = file->f_mapping;
-	size_t ocount;		/* original count */
 	size_t count;		/* after file limit checks */
 	struct inode 	*inode = mapping->host;
 	loff_t		pos = iocb->ki_pos;
@@ -2577,7 +2572,8 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t		status;
 	struct iov_iter from;
 
-	count = ocount = iov_length(iov, nr_segs);
+	count = iov_length(iov, nr_segs);
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
 
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = mapping->backing_dev_info;
@@ -2588,6 +2584,8 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (count == 0)
 		goto out;
 
+	iov_iter_truncate(&from, count);
+
 	err = file_remove_suid(file);
 	if (err)
 		goto out;
@@ -2596,14 +2594,11 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (err)
 		goto out;
 
-	iov_iter_init(&from, WRITE, iov, nr_segs, count);
-
 	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
 	if (unlikely(file->f_flags & O_DIRECT)) {
 		loff_t endbyte;
 
-		written = generic_file_direct_write(iocb, &from, pos,
-							count, ocount);
+		written = generic_file_direct_write(iocb, &from, pos);
 		if (written < 0 || written == count)
 			goto out;
 
-- 
cgit v1.2.3-71-gd317


From 7f7f25e82d54870df24d415a7007fbd327da027b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 11 Feb 2014 17:49:24 -0500
Subject: replace checking for ->read/->aio_read presence with check in
 ->f_mode

Since we are about to introduce new methods (read_iter/write_iter), the
tests in a bunch of places would have to grow inconveniently.  Check
once (at open() time) and store results in ->f_mode as FMODE_CAN_READ
and FMODE_CAN_WRITE resp.  It might end up being a temporary measure -
once everything switches from ->aio_{read,write} to ->{read,write}_iter
it might make sense to return to open-coded checks.  We'll see...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/mtd/nand/nandsim.c          |  4 ++--
 drivers/usb/gadget/storage_common.c |  4 ++--
 fs/file_table.c                     |  4 ++++
 fs/open.c                           |  4 ++++
 fs/read_write.c                     | 14 +++++++-------
 include/linux/fs.h                  |  4 ++++
 6 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 42e8a770e631..4f0d83648e5a 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -575,12 +575,12 @@ static int alloc_device(struct nandsim *ns)
 		cfile = filp_open(cache_file, O_CREAT | O_RDWR | O_LARGEFILE, 0600);
 		if (IS_ERR(cfile))
 			return PTR_ERR(cfile);
-		if (!cfile->f_op->read && !cfile->f_op->aio_read) {
+		if (!(cfile->f_mode & FMODE_CAN_READ)) {
 			NS_ERR("alloc_device: cache file not readable\n");
 			err = -EINVAL;
 			goto err_close;
 		}
-		if (!cfile->f_op->write && !cfile->f_op->aio_write) {
+		if (!(cfile->f_mode & FMODE_CAN_WRITE)) {
 			NS_ERR("alloc_device: cache file not writeable\n");
 			err = -EINVAL;
 			goto err_close;
diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c
index ec20a1f50c2d..a8898df131ed 100644
--- a/drivers/usb/gadget/storage_common.c
+++ b/drivers/usb/gadget/storage_common.c
@@ -220,11 +220,11 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
 	 * If we can't read the file, it's no good.
 	 * If we can't write the file, use it read-only.
 	 */
-	if (!(filp->f_op->read || filp->f_op->aio_read)) {
+	if (!(filp->f_mode & FMODE_CAN_READ)) {
 		LINFO(curlun, "file not readable: %s\n", filename);
 		goto out;
 	}
-	if (!(filp->f_op->write || filp->f_op->aio_write))
+	if (!(filp->f_mode & FMODE_CAN_WRITE))
 		ro = 1;
 
 	size = i_size_read(inode->i_mapping->host);
diff --git a/fs/file_table.c b/fs/file_table.c
index a374f5033e97..be73cbc48c12 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -175,6 +175,10 @@ struct file *alloc_file(struct path *path, fmode_t mode,
 	file->f_path = *path;
 	file->f_inode = path->dentry->d_inode;
 	file->f_mapping = path->dentry->d_inode->i_mapping;
+	if ((mode & FMODE_READ) && likely(fop->read || fop->aio_read))
+		mode |= FMODE_CAN_READ;
+	if ((mode & FMODE_WRITE) && likely(fop->write || fop->aio_write))
+		mode |= FMODE_CAN_WRITE;
 	file->f_mode = mode;
 	file->f_op = fop;
 	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
diff --git a/fs/open.c b/fs/open.c
index 9d64679cec73..39d3d0468ee6 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -725,6 +725,10 @@ static int do_dentry_open(struct file *f,
 	}
 	if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(inode);
+	if ((f->f_mode & FMODE_READ) && likely(f->f_op->read || f->f_op->aio_read))
+		f->f_mode |= FMODE_CAN_READ;
+	if ((f->f_mode & FMODE_WRITE) && likely(f->f_op->write || f->f_op->aio_write))
+		f->f_mode |= FMODE_CAN_WRITE;
 
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
diff --git a/fs/read_write.c b/fs/read_write.c
index 31c6efa43183..d29d2a361d2c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -396,7 +396,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 
 	if (!(file->f_mode & FMODE_READ))
 		return -EBADF;
-	if (!file->f_op->read && !file->f_op->aio_read)
+	if (!(file->f_mode & FMODE_CAN_READ))
 		return -EINVAL;
 	if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
 		return -EFAULT;
@@ -445,7 +445,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
 	const char __user *p;
 	ssize_t ret;
 
-	if (!file->f_op->write && !file->f_op->aio_write)
+	if (!(file->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
 
 	old_fs = get_fs();
@@ -472,7 +472,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 
 	if (!(file->f_mode & FMODE_WRITE))
 		return -EBADF;
-	if (!file->f_op->write && !file->f_op->aio_write)
+	if (!(file->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
 	if (unlikely(!access_ok(VERIFY_READ, buf, count)))
 		return -EFAULT;
@@ -785,7 +785,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 {
 	if (!(file->f_mode & FMODE_READ))
 		return -EBADF;
-	if (!file->f_op->aio_read && !file->f_op->read)
+	if (!(file->f_mode & FMODE_CAN_READ))
 		return -EINVAL;
 
 	return do_readv_writev(READ, file, vec, vlen, pos);
@@ -798,7 +798,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 {
 	if (!(file->f_mode & FMODE_WRITE))
 		return -EBADF;
-	if (!file->f_op->aio_write && !file->f_op->write)
+	if (!(file->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
 
 	return do_readv_writev(WRITE, file, vec, vlen, pos);
@@ -964,7 +964,7 @@ static size_t compat_readv(struct file *file,
 		goto out;
 
 	ret = -EINVAL;
-	if (!file->f_op->aio_read && !file->f_op->read)
+	if (!(file->f_mode & FMODE_CAN_READ))
 		goto out;
 
 	ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
@@ -1041,7 +1041,7 @@ static size_t compat_writev(struct file *file,
 		goto out;
 
 	ret = -EINVAL;
-	if (!file->f_op->aio_write && !file->f_op->write)
+	if (!(file->f_mode & FMODE_CAN_WRITE))
 		goto out;
 
 	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8153396d19b4..75eb71357adf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -128,6 +128,10 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define FMODE_ATOMIC_POS	((__force fmode_t)0x8000)
 /* Write access to underlying fs */
 #define FMODE_WRITER		((__force fmode_t)0x10000)
+/* Has read method(s) */
+#define FMODE_CAN_READ          ((__force fmode_t)0x20000)
+/* Has write method(s) */
+#define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)
 
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x1000000)
-- 
cgit v1.2.3-71-gd317


From 293bc9822fa9b3c9d4b7893bcb241e085580771a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 11 Feb 2014 18:37:41 -0500
Subject: new methods: ->read_iter() and ->write_iter()

Beginning to introduce those.  Just the callers for now, and it's
clumsier than it'll eventually become; once we finish converting
aio_read and aio_write instances, the things will get nicer.

For now, these guys are in parallel to ->aio_read() and ->aio_write();
they take iocb and iov_iter, with everything in iov_iter already
validated.  File offset is passed in iocb->ki_pos, iov/nr_segs -
in iov_iter.

Main concerns in that series are stack footprint and ability to
split the damn thing cleanly.

[fix from Peter Ujfalusi <peter.ujfalusi@ti.com> folded]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |  2 +
 Documentation/filesystems/vfs.txt | 10 ++++-
 fs/aio.c                          | 14 +++++-
 fs/file_table.c                   |  6 ++-
 fs/open.c                         |  6 ++-
 fs/read_write.c                   | 90 ++++++++++++++++++++++++++++++++++++---
 include/linux/fs.h                |  6 +++
 7 files changed, 121 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 9b0d5a33c8bf..b18dd1779029 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -430,6 +430,8 @@ prototypes:
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 1846374a5add..a1d0d7a30165 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -806,6 +806,8 @@ struct file_operations {
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -836,11 +838,15 @@ otherwise noted.
 
   read: called by read(2) and related system calls
 
-  aio_read: called by io_submit(2) and other asynchronous I/O operations
+  aio_read: vectored, possibly asynchronous read
+
+  read_iter: possibly asynchronous read with iov_iter as destination
 
   write: called by write(2) and related system calls
 
-  aio_write: called by io_submit(2) and other asynchronous I/O operations
+  aio_write: vectored, possibly asynchronous write
+
+  write_iter: possibly asynchronous write with iov_iter as source
 
   iterate: called when the VFS needs to read the directory contents
 
diff --git a/fs/aio.c b/fs/aio.c
index a0ed6c7d2cd2..56b28607c32d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1241,6 +1241,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 
 typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
 			    unsigned long, loff_t);
+typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
 
 static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
 				     int rw, char __user *buf,
@@ -1298,7 +1299,9 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
 	int rw;
 	fmode_t mode;
 	aio_rw_op *rw_op;
+	rw_iter_op *iter_op;
 	struct iovec inline_vec, *iovec = &inline_vec;
+	struct iov_iter iter;
 
 	switch (opcode) {
 	case IOCB_CMD_PREAD:
@@ -1306,6 +1309,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
 		mode	= FMODE_READ;
 		rw	= READ;
 		rw_op	= file->f_op->aio_read;
+		iter_op	= file->f_op->read_iter;
 		goto rw_common;
 
 	case IOCB_CMD_PWRITE:
@@ -1313,12 +1317,13 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
 		mode	= FMODE_WRITE;
 		rw	= WRITE;
 		rw_op	= file->f_op->aio_write;
+		iter_op	= file->f_op->write_iter;
 		goto rw_common;
 rw_common:
 		if (unlikely(!(file->f_mode & mode)))
 			return -EBADF;
 
-		if (!rw_op)
+		if (!rw_op && !iter_op)
 			return -EINVAL;
 
 		ret = (opcode == IOCB_CMD_PREADV ||
@@ -1347,7 +1352,12 @@ rw_common:
 		if (rw == WRITE)
 			file_start_write(file);
 
-		ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+		if (iter_op) {
+			iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
+			ret = iter_op(req, &iter);
+		} else {
+			ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+		}
 
 		if (rw == WRITE)
 			file_end_write(file);
diff --git a/fs/file_table.c b/fs/file_table.c
index be73cbc48c12..f8cc881fbbfb 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -175,9 +175,11 @@ struct file *alloc_file(struct path *path, fmode_t mode,
 	file->f_path = *path;
 	file->f_inode = path->dentry->d_inode;
 	file->f_mapping = path->dentry->d_inode->i_mapping;
-	if ((mode & FMODE_READ) && likely(fop->read || fop->aio_read))
+	if ((mode & FMODE_READ) &&
+	     likely(fop->read || fop->aio_read || fop->read_iter))
 		mode |= FMODE_CAN_READ;
-	if ((mode & FMODE_WRITE) && likely(fop->write || fop->aio_write))
+	if ((mode & FMODE_WRITE) &&
+	     likely(fop->write || fop->aio_write || fop->write_iter))
 		mode |= FMODE_CAN_WRITE;
 	file->f_mode = mode;
 	file->f_op = fop;
diff --git a/fs/open.c b/fs/open.c
index 39d3d0468ee6..36662d036237 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -725,9 +725,11 @@ static int do_dentry_open(struct file *f,
 	}
 	if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(inode);
-	if ((f->f_mode & FMODE_READ) && likely(f->f_op->read || f->f_op->aio_read))
+	if ((f->f_mode & FMODE_READ) &&
+	     likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter))
 		f->f_mode |= FMODE_CAN_READ;
-	if ((f->f_mode & FMODE_WRITE) && likely(f->f_op->write || f->f_op->aio_write))
+	if ((f->f_mode & FMODE_WRITE) &&
+	     likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter))
 		f->f_mode |= FMODE_CAN_WRITE;
 
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
diff --git a/fs/read_write.c b/fs/read_write.c
index d29d2a361d2c..fe2f9d5e3536 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -25,6 +25,7 @@
 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
 typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
 		unsigned long, loff_t);
+typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
 
 const struct file_operations generic_ro_fops = {
 	.llseek		= generic_file_llseek,
@@ -390,6 +391,27 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
 
 EXPORT_SYMBOL(do_sync_read);
 
+ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
+{
+	struct iovec iov = { .iov_base = buf, .iov_len = len };
+	struct kiocb kiocb;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	init_sync_kiocb(&kiocb, filp);
+	kiocb.ki_pos = *ppos;
+	kiocb.ki_nbytes = len;
+	iov_iter_init(&iter, READ, &iov, 1, len);
+
+	ret = filp->f_op->read_iter(&kiocb, &iter);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&kiocb);
+	*ppos = kiocb.ki_pos;
+	return ret;
+}
+
+EXPORT_SYMBOL(new_sync_read);
+
 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 {
 	ssize_t ret;
@@ -406,8 +428,10 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 		count = ret;
 		if (file->f_op->read)
 			ret = file->f_op->read(file, buf, count, pos);
-		else
+		else if (file->f_op->aio_read)
 			ret = do_sync_read(file, buf, count, pos);
+		else
+			ret = new_sync_read(file, buf, count, pos);
 		if (ret > 0) {
 			fsnotify_access(file);
 			add_rchar(current, ret);
@@ -439,6 +463,27 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 
 EXPORT_SYMBOL(do_sync_write);
 
+ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
+{
+	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
+	struct kiocb kiocb;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	init_sync_kiocb(&kiocb, filp);
+	kiocb.ki_pos = *ppos;
+	kiocb.ki_nbytes = len;
+	iov_iter_init(&iter, WRITE, &iov, 1, len);
+
+	ret = filp->f_op->write_iter(&kiocb, &iter);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&kiocb);
+	*ppos = kiocb.ki_pos;
+	return ret;
+}
+
+EXPORT_SYMBOL(new_sync_write);
+
 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
 {
 	mm_segment_t old_fs;
@@ -455,8 +500,10 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
 		count =  MAX_RW_COUNT;
 	if (file->f_op->write)
 		ret = file->f_op->write(file, p, count, pos);
-	else
+	else if (file->f_op->aio_write)
 		ret = do_sync_write(file, p, count, pos);
+	else
+		ret = new_sync_write(file, p, count, pos);
 	set_fs(old_fs);
 	if (ret > 0) {
 		fsnotify_modify(file);
@@ -483,8 +530,10 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 		file_start_write(file);
 		if (file->f_op->write)
 			ret = file->f_op->write(file, buf, count, pos);
-		else
+		else if (file->f_op->aio_write)
 			ret = do_sync_write(file, buf, count, pos);
+		else
+			ret = new_sync_write(file, buf, count, pos);
 		if (ret > 0) {
 			fsnotify_modify(file);
 			add_wchar(current, ret);
@@ -601,6 +650,25 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 }
 EXPORT_SYMBOL(iov_shorten);
 
+static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov,
+		unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn)
+{
+	struct kiocb kiocb;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	init_sync_kiocb(&kiocb, filp);
+	kiocb.ki_pos = *ppos;
+	kiocb.ki_nbytes = len;
+
+	iov_iter_init(&iter, rw, iov, nr_segs, len);
+	ret = fn(&kiocb, &iter);
+	if (ret == -EIOCBQUEUED)
+		ret = wait_on_sync_kiocb(&kiocb);
+	*ppos = kiocb.ki_pos;
+	return ret;
+}
+
 static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 		unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 {
@@ -738,6 +806,7 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	ssize_t ret;
 	io_fn_t fn;
 	iov_fn_t fnv;
+	iter_fn_t iter_fn;
 
 	ret = rw_copy_check_uvector(type, uvector, nr_segs,
 				    ARRAY_SIZE(iovstack), iovstack, &iov);
@@ -753,13 +822,18 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	if (type == READ) {
 		fn = file->f_op->read;
 		fnv = file->f_op->aio_read;
+		iter_fn = file->f_op->read_iter;
 	} else {
 		fn = (io_fn_t)file->f_op->write;
 		fnv = file->f_op->aio_write;
+		iter_fn = file->f_op->write_iter;
 		file_start_write(file);
 	}
 
-	if (fnv)
+	if (iter_fn)
+		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
+						pos, iter_fn);
+	else if (fnv)
 		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 						pos, fnv);
 	else
@@ -912,6 +986,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 	ssize_t ret;
 	io_fn_t fn;
 	iov_fn_t fnv;
+	iter_fn_t iter_fn;
 
 	ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
 					       UIO_FASTIOV, iovstack, &iov);
@@ -927,13 +1002,18 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 	if (type == READ) {
 		fn = file->f_op->read;
 		fnv = file->f_op->aio_read;
+		iter_fn = file->f_op->read_iter;
 	} else {
 		fn = (io_fn_t)file->f_op->write;
 		fnv = file->f_op->aio_write;
+		iter_fn = file->f_op->write_iter;
 		file_start_write(file);
 	}
 
-	if (fnv)
+	if (iter_fn)
+		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
+						pos, iter_fn);
+	else if (fnv)
 		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 						pos, fnv);
 	else
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 75eb71357adf..17535e0a4547 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1451,6 +1451,8 @@ struct block_device_operations;
 #define HAVE_COMPAT_IOCTL 1
 #define HAVE_UNLOCKED_IOCTL 1
 
+struct iov_iter;
+
 struct file_operations {
 	struct module *owner;
 	loff_t (*llseek) (struct file *, loff_t, int);
@@ -1458,6 +1460,8 @@ struct file_operations {
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -2415,6 +2419,8 @@ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
+extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
+extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
 
 /* fs/block_dev.c */
 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
-- 
cgit v1.2.3-71-gd317


From 8174202b34c30e0c07231bf63f18ab29af634f0b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Apr 2014 03:17:43 -0400
Subject: write_iter variants of {__,}generic_file_aio_write()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_file.c        |  8 +++----
 fs/adfs/file.c          |  4 ++--
 fs/affs/file.c          |  4 ++--
 fs/bfs/file.c           |  4 ++--
 fs/ecryptfs/file.c      |  4 ++--
 fs/exofs/file.c         |  4 ++--
 fs/ext2/file.c          |  4 ++--
 fs/ext3/file.c          |  4 ++--
 fs/f2fs/file.c          |  4 ++--
 fs/fat/file.c           |  4 ++--
 fs/hfs/inode.c          |  4 ++--
 fs/hfsplus/inode.c      |  4 ++--
 fs/hostfs/hostfs_kern.c |  4 ++--
 fs/hpfs/file.c          |  4 ++--
 fs/jffs2/file.c         |  4 ++--
 fs/jfs/file.c           |  4 ++--
 fs/logfs/file.c         |  4 ++--
 fs/minix/file.c         |  4 ++--
 fs/nilfs2/file.c        |  4 ++--
 fs/omfs/file.c          |  4 ++--
 fs/ramfs/file-mmu.c     |  4 ++--
 fs/ramfs/file-nommu.c   |  4 ++--
 fs/reiserfs/file.c      |  4 ++--
 fs/sysv/file.c          |  4 ++--
 fs/ufs/file.c           |  4 ++--
 include/linux/fs.h      |  2 ++
 mm/filemap.c            | 61 ++++++++++++++++++++++++++++++-------------------
 mm/shmem.c              |  4 ++--
 mm/vmscan.c             |  2 +-
 29 files changed, 94 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 47e0597d1e9b..b9b5f979a2ca 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -763,7 +763,7 @@ err_out:
 
 buff_write:
 	mutex_unlock(&inode->i_mutex);
-	return do_sync_write(filp, data, count, offsetp);
+	return new_sync_write(filp, data, count, offsetp);
 }
 
 /**
@@ -781,7 +781,7 @@ v9fs_cached_file_write(struct file *filp, const char __user * data,
 
 	if (filp->f_flags & O_DIRECT)
 		return v9fs_direct_write(filp, data, count, offset);
-	return do_sync_write(filp, data, count, offset);
+	return new_sync_write(filp, data, count, offset);
 }
 
 
@@ -851,7 +851,7 @@ const struct file_operations v9fs_cached_file_operations = {
 	.read = v9fs_cached_file_read,
 	.write = v9fs_cached_file_write,
 	.read_iter = generic_file_read_iter,
-	.aio_write = generic_file_aio_write,
+	.write_iter = generic_file_write_iter,
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
 	.lock = v9fs_file_lock,
@@ -864,7 +864,7 @@ const struct file_operations v9fs_cached_file_operations_dotl = {
 	.read = v9fs_cached_file_read,
 	.write = v9fs_cached_file_write,
 	.read_iter = generic_file_read_iter,
-	.aio_write = generic_file_aio_write,
+	.write_iter = generic_file_write_iter,
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
 	.lock = v9fs_file_lock_dotl,
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 3bfc9efa29b4..07c9edce5aa7 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -27,8 +27,8 @@ const struct file_operations adfs_file_operations = {
 	.read_iter	= generic_file_read_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= generic_file_fsync,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.splice_read	= generic_file_splice_read,
 };
 
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 982853f17afc..9df23175e28b 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -29,8 +29,8 @@ const struct file_operations affs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.open		= affs_file_open,
 	.release	= affs_file_release,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 0aa788892f93..e7f88ace1a25 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -25,8 +25,8 @@ const struct file_operations bfs_file_operations = {
 	.llseek 	= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
 };
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index b32827c917e1..db0fad3269c0 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -353,8 +353,8 @@ const struct file_operations ecryptfs_main_fops = {
 	.llseek = generic_file_llseek,
 	.read = new_sync_read,
 	.read_iter = ecryptfs_read_update_atime,
-	.write = do_sync_write,
-	.aio_write = generic_file_aio_write,
+	.write = new_sync_write,
+	.write_iter = generic_file_write_iter,
 	.iterate = ecryptfs_readdir,
 	.unlocked_ioctl = ecryptfs_unlocked_ioctl,
 #ifdef CONFIG_COMPAT
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 90d394da7471..5b7f6be5a2d5 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -68,9 +68,9 @@ static int exofs_flush(struct file *file, fl_owner_t id)
 const struct file_operations exofs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.open		= generic_file_open,
 	.release	= exofs_release_file,
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 407305072597..970c6aca15cc 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -63,9 +63,9 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 const struct file_operations ext2_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.unlocked_ioctl = ext2_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext2_compat_ioctl,
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 5439d2f0141b..c833b1226d4d 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -51,9 +51,9 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
 const struct file_operations ext3_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.unlocked_ioctl	= ext3_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext3_compat_ioctl,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0e01fb0bc97c..22f4900dd8eb 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -680,9 +680,9 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 const struct file_operations f2fs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.open		= generic_file_open,
 	.mmap		= f2fs_file_mmap,
 	.fsync		= f2fs_sync_file,
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 29285e990c90..85f79a89e747 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -171,9 +171,9 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 const struct file_operations fat_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.release	= fat_file_release,
 	.unlocked_ioctl	= fat_generic_ioctl,
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 6d4055aff109..d0929bc81782 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -676,8 +676,8 @@ static const struct file_operations hfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
 	.fsync		= hfs_file_fsync,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index cccc89e47cb6..0cf786f2d046 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -343,8 +343,8 @@ static const struct file_operations hfsplus_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
 	.fsync		= hfsplus_file_fsync,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index ce0005d8ffeb..bb529f3b7f2b 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -381,8 +381,8 @@ static const struct file_operations hostfs_file_fops = {
 	.read		= new_sync_read,
 	.splice_read	= generic_file_splice_read,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
-	.write		= do_sync_write,
+	.write_iter	= generic_file_write_iter,
+	.write		= new_sync_write,
 	.mmap		= generic_file_mmap,
 	.open		= hostfs_file_open,
 	.release	= hostfs_file_release,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index bacb478a4990..7f54e5f76cec 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -199,8 +199,8 @@ const struct file_operations hpfs_file_ops =
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.release	= hpfs_file_release,
 	.fsync		= hpfs_file_fsync,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 9192127d591c..64989ca9ba90 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -53,8 +53,8 @@ const struct file_operations jffs2_file_operations =
 	.open =		generic_file_open,
  	.read =		new_sync_read,
  	.read_iter =	generic_file_read_iter,
- 	.write =	do_sync_write,
- 	.aio_write =	generic_file_aio_write,
+ 	.write =	new_sync_write,
+ 	.write_iter =	generic_file_write_iter,
 	.unlocked_ioctl=jffs2_ioctl,
 	.mmap =		generic_file_readonly_mmap,
 	.fsync =	jffs2_fsync,
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index a5d8299b2208..cc744ecaf51f 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -151,10 +151,10 @@ const struct inode_operations jfs_file_inode_operations = {
 const struct file_operations jfs_file_operations = {
 	.open		= jfs_open,
 	.llseek		= generic_file_llseek,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 1ca8026dc664..8538752df2f6 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -265,14 +265,14 @@ const struct inode_operations logfs_reg_iops = {
 
 const struct file_operations logfs_reg_fops = {
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.fsync		= logfs_fsync,
 	.unlocked_ioctl	= logfs_ioctl,
 	.llseek		= generic_file_llseek,
 	.mmap		= generic_file_readonly_mmap,
 	.open		= generic_file_open,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 };
 
 const struct address_space_operations logfs_reg_aops = {
diff --git a/fs/minix/file.c b/fs/minix/file.c
index 607b47145325..a967de085ac0 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -16,8 +16,8 @@ const struct file_operations minix_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= generic_file_fsync,
 	.splice_read	= generic_file_splice_read,
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index dcb1b0e8b435..24978153c0c4 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -153,9 +153,9 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 const struct file_operations nilfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.unlocked_ioctl	= nilfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= nilfs_compat_ioctl,
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 3bf28da9f0df..902e88527fce 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -338,9 +338,9 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
 const struct file_operations omfs_file_operations = {
 	.llseek = generic_file_llseek,
 	.read = new_sync_read,
-	.write = do_sync_write,
+	.write = new_sync_write,
 	.read_iter = generic_file_read_iter,
-	.aio_write = generic_file_aio_write,
+	.write_iter = generic_file_write_iter,
 	.mmap = generic_file_mmap,
 	.fsync = generic_file_fsync,
 	.splice_read = generic_file_splice_read,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 30ffb367bc0b..6ea0b9718a9d 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -33,8 +33,8 @@
 const struct file_operations ramfs_file_operations = {
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= noop_fsync,
 	.splice_read	= generic_file_splice_read,
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 416db04f8464..9ed420f8f3ca 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -39,8 +39,8 @@ const struct file_operations ramfs_file_operations = {
 	.get_unmapped_area	= ramfs_nommu_get_unmapped_area,
 	.read			= new_sync_read,
 	.read_iter		= generic_file_read_iter,
-	.write			= do_sync_write,
-	.aio_write		= generic_file_aio_write,
+	.write			= new_sync_write,
+	.write_iter		= generic_file_write_iter,
 	.fsync			= noop_fsync,
 	.splice_read		= generic_file_splice_read,
 	.splice_write		= generic_file_splice_write,
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 7592d681fd8c..7c8ecd6468db 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -236,7 +236,7 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
 
 const struct file_operations reiserfs_file_operations = {
 	.read = new_sync_read,
-	.write = do_sync_write,
+	.write = new_sync_write,
 	.unlocked_ioctl = reiserfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = reiserfs_compat_ioctl,
@@ -246,7 +246,7 @@ const struct file_operations reiserfs_file_operations = {
 	.release = reiserfs_file_release,
 	.fsync = reiserfs_sync_file,
 	.read_iter = generic_file_read_iter,
-	.aio_write = generic_file_aio_write,
+	.write_iter = generic_file_write_iter,
 	.splice_read = generic_file_splice_read,
 	.splice_write = generic_file_splice_write,
 	.llseek = generic_file_llseek,
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index d99be8877388..b00811c75b24 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -23,8 +23,8 @@ const struct file_operations sysv_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= generic_file_fsync,
 	.splice_read	= generic_file_splice_read,
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index b6b402989e6b..c84ec010a676 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -37,8 +37,8 @@ const struct file_operations ufs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.open           = generic_file_open,
 	.fsync		= generic_file_fsync,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 17535e0a4547..4b221637f09e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2414,7 +2414,9 @@ int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isbl
 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
+extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
diff --git a/mm/filemap.c b/mm/filemap.c
index c0404b763a17..d2d9eeec8bf0 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2542,10 +2542,9 @@ again:
 EXPORT_SYMBOL(generic_perform_write);
 
 /**
- * __generic_file_aio_write - write data to a file
+ * __generic_file_write_iter - write data to a file
  * @iocb:	IO state structure (file, offset, etc.)
- * @iov:	vector with data to write
- * @nr_segs:	number of segments in the vector
+ * @from:	iov_iter with data to write
  *
  * This function does all the work needed for actually writing data to a
  * file. It does all basic checks, removes SUID from the file, updates
@@ -2559,21 +2558,16 @@ EXPORT_SYMBOL(generic_perform_write);
  * A caller has to handle it. This is mainly due to the fact that we want to
  * avoid syncing under i_mutex.
  */
-ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				 unsigned long nr_segs)
+ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space * mapping = file->f_mapping;
-	size_t count;		/* after file limit checks */
 	struct inode 	*inode = mapping->host;
 	loff_t		pos = iocb->ki_pos;
 	ssize_t		written = 0;
 	ssize_t		err;
 	ssize_t		status;
-	struct iov_iter from;
-
-	count = iov_length(iov, nr_segs);
-	iov_iter_init(&from, WRITE, iov, nr_segs, count);
+	size_t		count = iov_iter_count(from);
 
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = mapping->backing_dev_info;
@@ -2584,7 +2578,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (count == 0)
 		goto out;
 
-	iov_iter_truncate(&from, count);
+	iov_iter_truncate(from, count);
 
 	err = file_remove_suid(file);
 	if (err)
@@ -2598,7 +2592,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (unlikely(file->f_flags & O_DIRECT)) {
 		loff_t endbyte;
 
-		written = generic_file_direct_write(iocb, &from, pos);
+		written = generic_file_direct_write(iocb, from, pos);
 		if (written < 0 || written == count)
 			goto out;
 
@@ -2609,7 +2603,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		pos += written;
 		count -= written;
 
-		status = generic_perform_write(file, &from, pos);
+		status = generic_perform_write(file, from, pos);
 		/*
 		 * If generic_perform_write() returned a synchronous error
 		 * then we want to return the number of bytes which were
@@ -2641,7 +2635,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 			 */
 		}
 	} else {
-		written = generic_perform_write(file, &from, pos);
+		written = generic_perform_write(file, from, pos);
 		if (likely(written >= 0))
 			iocb->ki_pos = pos + written;
 	}
@@ -2649,30 +2643,36 @@ out:
 	current->backing_dev_info = NULL;
 	return written ? written : err;
 }
+EXPORT_SYMBOL(__generic_file_write_iter);
+
+ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+				 unsigned long nr_segs)
+{
+	size_t count = iov_length(iov, nr_segs);
+	struct iov_iter from;
+
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
+	return __generic_file_write_iter(iocb, &from);
+}
 EXPORT_SYMBOL(__generic_file_aio_write);
 
 /**
- * generic_file_aio_write - write data to a file
+ * generic_file_write_iter - write data to a file
  * @iocb:	IO state structure
- * @iov:	vector with data to write
- * @nr_segs:	number of segments in the vector
- * @pos:	position in file where to write
+ * @from:	iov_iter with data to write
  *
- * This is a wrapper around __generic_file_aio_write() to be used by most
+ * This is a wrapper around __generic_file_write_iter() to be used by most
  * filesystems. It takes care of syncing the file in case of O_SYNC file
  * and acquires i_mutex as needed.
  */
-ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
+ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	ssize_t ret;
 
-	BUG_ON(iocb->ki_pos != pos);
-
 	mutex_lock(&inode->i_mutex);
-	ret = __generic_file_aio_write(iocb, iov, nr_segs);
+	ret = __generic_file_write_iter(iocb, from);
 	mutex_unlock(&inode->i_mutex);
 
 	if (ret > 0) {
@@ -2684,6 +2684,19 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	}
 	return ret;
 }
+EXPORT_SYMBOL(generic_file_write_iter);
+
+ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
+{
+	size_t count = iov_length(iov, nr_segs);
+	struct iov_iter from;
+
+	BUG_ON(iocb->ki_pos != pos);
+
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
+	return generic_file_write_iter(iocb, &from);
+}
 EXPORT_SYMBOL(generic_file_aio_write);
 
 /**
diff --git a/mm/shmem.c b/mm/shmem.c
index edc6c7e817e9..d3e5c6fc313c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2618,9 +2618,9 @@ static const struct file_operations shmem_file_operations = {
 #ifdef CONFIG_TMPFS
 	.llseek		= shmem_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= shmem_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.fsync		= noop_fsync,
 	.splice_read	= shmem_file_splice_read,
 	.splice_write	= generic_file_splice_write,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 32c661d66a45..9c2dba6ac685 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -458,7 +458,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
 	 * stalls if we need to run get_block().  We could test
 	 * PagePrivate for that.
 	 *
-	 * If this process is currently in __generic_file_aio_write() against
+	 * If this process is currently in __generic_file_write_iter() against
 	 * this page's queue, we can perform writeback even if that
 	 * will block.
 	 *
-- 
cgit v1.2.3-71-gd317


From 1456c0a87c4241d3a801651019e66983c69ad17d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Apr 2014 03:21:50 -0400
Subject: blkdev_aio_write() - turn into blkdev_write_iter()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/char/raw.c |  4 ++--
 fs/block_dev.c     | 16 ++++++----------
 include/linux/fs.h |  3 +--
 3 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index cfb607a64b85..0102dc788608 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -286,8 +286,8 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd,
 static const struct file_operations raw_fops = {
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= blkdev_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= blkdev_write_iter,
 	.fsync		= blkdev_fsync,
 	.open		= raw_open,
 	.release	= raw_release,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3d97f4a257ff..4e36b8ea8aa4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1509,28 +1509,24 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
  * Does not take i_mutex for the write and thus is not for general purpose
  * use.
  */
-ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
-			 unsigned long nr_segs, loff_t pos)
+ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct blk_plug plug;
 	ssize_t ret;
 
-	BUG_ON(iocb->ki_pos != pos);
-
 	blk_start_plug(&plug);
-	ret = __generic_file_aio_write(iocb, iov, nr_segs);
+	ret = __generic_file_write_iter(iocb, from);
 	if (ret > 0) {
 		ssize_t err;
-
-		err = generic_write_sync(file, pos, ret);
+		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
 		if (err < 0)
 			ret = err;
 	}
 	blk_finish_plug(&plug);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(blkdev_aio_write);
+EXPORT_SYMBOL_GPL(blkdev_write_iter);
 
 static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
@@ -1577,9 +1573,9 @@ const struct file_operations def_blk_fops = {
 	.release	= blkdev_close,
 	.llseek		= block_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= blkdev_read_iter,
-	.aio_write	= blkdev_aio_write,
+	.write_iter	= blkdev_write_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= blkdev_fsync,
 	.unlocked_ioctl	= block_ioctl,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b221637f09e..1b9b6c59abdd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2425,8 +2425,7 @@ extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
 extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
 
 /* fs/block_dev.c */
-extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos);
+extern ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from);
 extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
 			int datasync);
 extern void block_sync_page(struct page *page);
-- 
cgit v1.2.3-71-gd317


From a8f3550cd228b6edc5d17fce1a9af8cc7004f185 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Apr 2014 03:32:25 -0400
Subject: bury __generic_file_aio_write()

all users converted to __generic_file_write_iter() now

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h |  1 -
 mm/filemap.c       | 11 -----------
 2 files changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1b9b6c59abdd..99817c9e665e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2413,7 +2413,6 @@ extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
-extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/mm/filemap.c b/mm/filemap.c
index d2d9eeec8bf0..7dcdb9db710d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2645,17 +2645,6 @@ out:
 }
 EXPORT_SYMBOL(__generic_file_write_iter);
 
-ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				 unsigned long nr_segs)
-{
-	size_t count = iov_length(iov, nr_segs);
-	struct iov_iter from;
-
-	iov_iter_init(&from, WRITE, iov, nr_segs, count);
-	return __generic_file_write_iter(iocb, &from);
-}
-EXPORT_SYMBOL(__generic_file_aio_write);
-
 /**
  * generic_file_write_iter - write data to a file
  * @iocb:	IO state structure
-- 
cgit v1.2.3-71-gd317


From f0d1bec9d58d4c038d0ac958c9af82be6eb18045 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Apr 2014 15:05:18 -0400
Subject: new helper: copy_page_from_iter()

parallel to copy_page_to_iter().  pipe_write() switched to it (and became
->write_iter()).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c           | 129 ++++++++--------------------------------------------
 include/linux/uio.h |   2 +
 mm/iov_iter.c       |  78 +++++++++++++++++++++++++++++++
 3 files changed, 99 insertions(+), 110 deletions(-)

(limited to 'include')

diff --git a/fs/pipe.c b/fs/pipe.c
index 05ccb00cb407..21981e58e2a6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -116,50 +116,6 @@ void pipe_wait(struct pipe_inode_info *pipe)
 	pipe_lock(pipe);
 }
 
-static int
-pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
-			int atomic)
-{
-	unsigned long copy;
-
-	while (len > 0) {
-		while (!iov->iov_len)
-			iov++;
-		copy = min_t(unsigned long, len, iov->iov_len);
-
-		if (atomic) {
-			if (__copy_from_user_inatomic(to, iov->iov_base, copy))
-				return -EFAULT;
-		} else {
-			if (copy_from_user(to, iov->iov_base, copy))
-				return -EFAULT;
-		}
-		to += copy;
-		len -= copy;
-		iov->iov_base += copy;
-		iov->iov_len -= copy;
-	}
-	return 0;
-}
-
-/*
- * Pre-fault in the user memory, so we can use atomic copies.
- */
-static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
-{
-	while (!iov->iov_len)
-		iov++;
-
-	while (len > 0) {
-		unsigned long this_len;
-
-		this_len = min_t(unsigned long, len, iov->iov_len);
-		fault_in_pages_readable(iov->iov_base, this_len);
-		len -= this_len;
-		iov++;
-	}
-}
-
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 				  struct pipe_buffer *buf)
 {
@@ -380,24 +336,19 @@ static inline int is_packetized(struct file *file)
 }
 
 static ssize_t
-pipe_write(struct kiocb *iocb, const struct iovec *_iov,
-	    unsigned long nr_segs, loff_t ppos)
+pipe_write(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *filp = iocb->ki_filp;
 	struct pipe_inode_info *pipe = filp->private_data;
-	ssize_t ret;
-	int do_wakeup;
-	struct iovec *iov = (struct iovec *)_iov;
-	size_t total_len;
+	ssize_t ret = 0;
+	int do_wakeup = 0;
+	size_t total_len = iov_iter_count(from);
 	ssize_t chars;
 
-	total_len = iov_length(iov, nr_segs);
 	/* Null write succeeds. */
 	if (unlikely(total_len == 0))
 		return 0;
 
-	do_wakeup = 0;
-	ret = 0;
 	__pipe_lock(pipe);
 
 	if (!pipe->readers) {
@@ -416,38 +367,19 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 		int offset = buf->offset + buf->len;
 
 		if (ops->can_merge && offset + chars <= PAGE_SIZE) {
-			int error, atomic = 1;
-			void *addr;
-
-			error = ops->confirm(pipe, buf);
+			int error = ops->confirm(pipe, buf);
 			if (error)
 				goto out;
 
-			iov_fault_in_pages_read(iov, chars);
-redo1:
-			if (atomic)
-				addr = kmap_atomic(buf->page);
-			else
-				addr = kmap(buf->page);
-			error = pipe_iov_copy_from_user(offset + addr, iov,
-							chars, atomic);
-			if (atomic)
-				kunmap_atomic(addr);
-			else
-				kunmap(buf->page);
-			ret = error;
-			do_wakeup = 1;
-			if (error) {
-				if (atomic) {
-					atomic = 0;
-					goto redo1;
-				}
+			ret = copy_page_from_iter(buf->page, offset, chars, from);
+			if (unlikely(ret < chars)) {
+				error = -EFAULT;
 				goto out;
 			}
+			do_wakeup = 1;
 			buf->len += chars;
-			total_len -= chars;
 			ret = chars;
-			if (!total_len)
+			if (!iov_iter_count(from))
 				goto out;
 		}
 	}
@@ -466,8 +398,7 @@ redo1:
 			int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);
 			struct pipe_buffer *buf = pipe->bufs + newbuf;
 			struct page *page = pipe->tmp_page;
-			char *src;
-			int error, atomic = 1;
+			int copied;
 
 			if (!page) {
 				page = alloc_page(GFP_HIGHUSER);
@@ -483,40 +414,19 @@ redo1:
 			 * FIXME! Is this really true?
 			 */
 			do_wakeup = 1;
-			chars = PAGE_SIZE;
-			if (chars > total_len)
-				chars = total_len;
-
-			iov_fault_in_pages_read(iov, chars);
-redo2:
-			if (atomic)
-				src = kmap_atomic(page);
-			else
-				src = kmap(page);
-
-			error = pipe_iov_copy_from_user(src, iov, chars,
-							atomic);
-			if (atomic)
-				kunmap_atomic(src);
-			else
-				kunmap(page);
-
-			if (unlikely(error)) {
-				if (atomic) {
-					atomic = 0;
-					goto redo2;
-				}
+			copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
+			if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
 				if (!ret)
-					ret = error;
+					ret = -EFAULT;
 				break;
 			}
-			ret += chars;
+			ret += copied;
 
 			/* Insert it into the buffer array */
 			buf->page = page;
 			buf->ops = &anon_pipe_buf_ops;
 			buf->offset = 0;
-			buf->len = chars;
+			buf->len = copied;
 			buf->flags = 0;
 			if (is_packetized(filp)) {
 				buf->ops = &packet_pipe_buf_ops;
@@ -525,8 +435,7 @@ redo2:
 			pipe->nrbufs = ++bufs;
 			pipe->tmp_page = NULL;
 
-			total_len -= chars;
-			if (!total_len)
+			if (!iov_iter_count(from))
 				break;
 		}
 		if (bufs < pipe->buffers)
@@ -1040,8 +949,8 @@ const struct file_operations pipefifo_fops = {
 	.llseek		= no_llseek,
 	.read		= new_sync_read,
 	.read_iter	= pipe_read,
-	.write		= do_sync_write,
-	.aio_write	= pipe_write,
+	.write		= new_sync_write,
+	.write_iter	= pipe_write,
 	.poll		= pipe_poll,
 	.unlocked_ioctl	= pipe_ioctl,
 	.release	= pipe_release,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 532f59d0adbb..66012352d333 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -68,6 +68,8 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
+size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index a5c691c1a283..081e3273085b 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -82,6 +82,84 @@ done:
 }
 EXPORT_SYMBOL(copy_page_to_iter);
 
+size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+	void *kaddr, *to;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	if (!fault_in_pages_readable(buf, copy)) {
+		kaddr = kmap_atomic(page);
+		to = kaddr + offset;
+
+		/* first chunk, usually the only one */
+		left = __copy_from_user_inatomic(to, buf, copy);
+		copy -= left;
+		skip += copy;
+		to += copy;
+		bytes -= copy;
+
+		while (unlikely(!left && bytes)) {
+			iov++;
+			buf = iov->iov_base;
+			copy = min(bytes, iov->iov_len);
+			left = __copy_from_user_inatomic(to, buf, copy);
+			copy -= left;
+			skip = copy;
+			to += copy;
+			bytes -= copy;
+		}
+		if (likely(!bytes)) {
+			kunmap_atomic(kaddr);
+			goto done;
+		}
+		offset = to - kaddr;
+		buf += copy;
+		kunmap_atomic(kaddr);
+		copy = min(bytes, iov->iov_len - skip);
+	}
+	/* Too bad - revert to non-atomic kmap */
+	kaddr = kmap(page);
+	to = kaddr + offset;
+	left = __copy_from_user(to, buf, copy);
+	copy -= left;
+	skip += copy;
+	to += copy;
+	bytes -= copy;
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __copy_from_user(to, buf, copy);
+		copy -= left;
+		skip = copy;
+		to += copy;
+		bytes -= copy;
+	}
+	kunmap(page);
+done:
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+EXPORT_SYMBOL(copy_page_from_iter);
+
 static size_t __iovec_copy_from_user_inatomic(char *vaddr,
 			const struct iovec *iov, size_t base, size_t bytes)
 {
-- 
cgit v1.2.3-71-gd317


From 2b777c9dd9ebbb2f8b6818d454cc5e6d7c1e3c8b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Apr 2014 22:31:22 -0400
Subject: ceph_sync_read: stop poking into iov_iter guts

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ceph/file.c               | 46 +++++++++++++++++---------------------------
 include/linux/ceph/libceph.h |  2 --
 net/ceph/pagevec.c           | 35 ++++-----------------------------
 3 files changed, 22 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index c9a24ba98c9a..672b0fedb17b 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -418,7 +418,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
 	struct page **pages;
 	u64 off = iocb->ki_pos;
 	int num_pages, ret;
-	size_t len = i->count;
+	size_t len = iov_iter_count(i);
 
 	dout("sync_read on file %p %llu~%u %s\n", file, off,
 	     (unsigned)len,
@@ -436,25 +436,26 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
 
 	if (file->f_flags & O_DIRECT) {
 		while (iov_iter_count(i)) {
-			void __user *data = i->iov[0].iov_base + i->iov_offset;
-			size_t len = i->iov[0].iov_len - i->iov_offset;
+			size_t start;
+			ssize_t n;
 
-			num_pages = calc_pages_for((unsigned long)data, len);
-			pages = ceph_get_direct_page_vector(data,
-							    num_pages, true);
-			if (IS_ERR(pages))
-				return PTR_ERR(pages);
+			n = iov_iter_get_pages_alloc(i, &pages, INT_MAX, &start);
+			if (n < 0)
+				return n;
 
-			ret = striped_read(inode, off, len,
+			num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE;
+
+			ret = striped_read(inode, off, n,
 					   pages, num_pages, checkeof,
-					   1, (unsigned long)data & ~PAGE_MASK);
+					   1, start);
+
 			ceph_put_page_vector(pages, num_pages, true);
 
 			if (ret <= 0)
 				break;
 			off += ret;
 			iov_iter_advance(i, ret);
-			if (ret < len)
+			if (ret < n)
 				break;
 		}
 	} else {
@@ -466,25 +467,14 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
 					num_pages, checkeof, 0, 0);
 		if (ret > 0) {
 			int l, k = 0;
-			size_t left = len = ret;
+			size_t left = ret;
 
 			while (left) {
-				void __user *data = i->iov[0].iov_base
-							+ i->iov_offset;
-				l = min(i->iov[0].iov_len - i->iov_offset,
-					left);
-
-				ret = ceph_copy_page_vector_to_user(&pages[k],
-								    data, off,
-								    l);
-				if (ret > 0) {
-					iov_iter_advance(i, ret);
-					left -= ret;
-					off += ret;
-					k = calc_pages_for(iocb->ki_pos,
-							   len - left + 1) - 1;
-					BUG_ON(k >= num_pages && left);
-				} else
+				int copy = min_t(size_t, PAGE_SIZE, left);
+				l = copy_page_to_iter(pages[k++], 0, copy, i);
+				off += l;
+				left -= l;
+				if (l < copy)
 					break;
 			}
 		}
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 2f49aa4c4f7f..279b0afac1c1 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -222,8 +222,6 @@ extern void ceph_copy_to_page_vector(struct page **pages,
 extern void ceph_copy_from_page_vector(struct page **pages,
 				    void *data,
 				    loff_t off, size_t len);
-extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data,
-				    loff_t off, size_t len);
 extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
 
 
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 815a2249cfa9..555013034f7a 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -53,7 +53,10 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
 			set_page_dirty_lock(pages[i]);
 		put_page(pages[i]);
 	}
-	kfree(pages);
+	if (is_vmalloc_addr(pages))
+		vfree(pages);
+	else
+		kfree(pages);
 }
 EXPORT_SYMBOL(ceph_put_page_vector);
 
@@ -164,36 +167,6 @@ void ceph_copy_from_page_vector(struct page **pages,
 }
 EXPORT_SYMBOL(ceph_copy_from_page_vector);
 
-/*
- * copy user data from a page vector into a user pointer
- */
-int ceph_copy_page_vector_to_user(struct page **pages,
-					 void __user *data,
-					 loff_t off, size_t len)
-{
-	int i = 0;
-	int po = off & ~PAGE_CACHE_MASK;
-	int left = len;
-	int l, bad;
-
-	while (left > 0) {
-		l = min_t(int, left, PAGE_CACHE_SIZE-po);
-		bad = copy_to_user(data, page_address(pages[i]) + po, l);
-		if (bad == l)
-			return -EFAULT;
-		data += l - bad;
-		left -= l - bad;
-		if (po) {
-			po += l - bad;
-			if (po == PAGE_CACHE_SIZE)
-				po = 0;
-		}
-		i++;
-	}
-	return len;
-}
-EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
-
 /*
  * Zero an extent within a page vector.  Offset is relative to the
  * start of the first page.
-- 
cgit v1.2.3-71-gd317


From b42b15fdad3ebb790250041d1517acebb9bd56d9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Apr 2014 12:15:19 -0400
Subject: lustre: get rid of messing with iovecs

* switch to ->read_iter/->write_iter
* keep a pointer to iov_iter instead of iov/nr_segs
* do not modify iovecs; use iov_iter_truncate()/iov_iter_advance() and
a new primitive - iov_iter_reexpand() (expand previously truncated
iterator) istead.
* (racy) check for lustre VMAs intersecting with iovecs kept for now as
for_each_iov() loop.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/lustre/lustre/include/lclient.h    |  11 +--
 drivers/staging/lustre/lustre/lclient/lcommon_cl.c |  48 +---------
 drivers/staging/lustre/lustre/llite/file.c         | 106 ++++-----------------
 .../staging/lustre/lustre/llite/llite_internal.h   |   3 +-
 drivers/staging/lustre/lustre/llite/rw.c           |   3 +-
 drivers/staging/lustre/lustre/llite/vvp_io.c       |  29 +++---
 include/linux/uio.h                                |   9 ++
 7 files changed, 46 insertions(+), 163 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h
index 827209ea6bd0..386a36c00f57 100644
--- a/drivers/staging/lustre/lustre/include/lclient.h
+++ b/drivers/staging/lustre/lustre/include/lclient.h
@@ -82,16 +82,7 @@ struct ccc_io {
 	/**
 	 * I/O vector information to or from which read/write is going.
 	 */
-	struct iovec *cui_iov;
-	unsigned long cui_nrsegs;
-	/**
-	 * Total iov count for left IO.
-	 */
-	unsigned long cui_tot_nrsegs;
-	/**
-	 * Old length for iov that was truncated partially.
-	 */
-	size_t cui_iov_olen;
+	struct iov_iter *cui_iter;
 	/**
 	 * Total size for the left IO.
 	 */
diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
index 6907a16dbbd1..a07d5156bc50 100644
--- a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
@@ -721,31 +721,12 @@ int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
 void ccc_io_update_iov(const struct lu_env *env,
 		       struct ccc_io *cio, struct cl_io *io)
 {
-	int i;
 	size_t size = io->u.ci_rw.crw_count;
 
-	cio->cui_iov_olen = 0;
-	if (!cl_is_normalio(env, io) || cio->cui_tot_nrsegs == 0)
+	if (!cl_is_normalio(env, io) || cio->cui_iter == NULL)
 		return;
 
-	for (i = 0; i < cio->cui_tot_nrsegs; i++) {
-		struct iovec *iv = &cio->cui_iov[i];
-
-		if (iv->iov_len < size)
-			size -= iv->iov_len;
-		else {
-			if (iv->iov_len > size) {
-				cio->cui_iov_olen = iv->iov_len;
-				iv->iov_len = size;
-			}
-			break;
-		}
-	}
-
-	cio->cui_nrsegs = i + 1;
-	LASSERTF(cio->cui_tot_nrsegs >= cio->cui_nrsegs,
-		 "tot_nrsegs: %lu, nrsegs: %lu\n",
-		 cio->cui_tot_nrsegs, cio->cui_nrsegs);
+	iov_iter_truncate(cio->cui_iter, size);
 }
 
 int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
@@ -776,30 +757,7 @@ void ccc_io_advance(const struct lu_env *env,
 	if (!cl_is_normalio(env, io))
 		return;
 
-	LASSERT(cio->cui_tot_nrsegs >= cio->cui_nrsegs);
-	LASSERT(cio->cui_tot_count  >= nob);
-
-	cio->cui_iov	+= cio->cui_nrsegs;
-	cio->cui_tot_nrsegs -= cio->cui_nrsegs;
-	cio->cui_tot_count  -= nob;
-
-	/* update the iov */
-	if (cio->cui_iov_olen > 0) {
-		struct iovec *iv;
-
-		cio->cui_iov--;
-		cio->cui_tot_nrsegs++;
-		iv = &cio->cui_iov[0];
-		if (io->ci_continue) {
-			iv->iov_base += iv->iov_len;
-			LASSERT(cio->cui_iov_olen > iv->iov_len);
-			iv->iov_len = cio->cui_iov_olen - iv->iov_len;
-		} else {
-			/* restore the iov_len, in case of restart io. */
-			iv->iov_len = cio->cui_iov_olen;
-		}
-		cio->cui_iov_olen = 0;
-	}
+	iov_iter_reexpand(cio->cui_iter, cio->cui_tot_count  -= nob);
 }
 
 /**
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index 220bd8390a84..3efda2540d29 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -1105,9 +1105,7 @@ restart:
 
 		switch (vio->cui_io_subtype) {
 		case IO_NORMAL:
-			cio->cui_iov = args->u.normal.via_iov;
-			cio->cui_nrsegs = args->u.normal.via_nrsegs;
-			cio->cui_tot_nrsegs = cio->cui_nrsegs;
+			cio->cui_iter = args->u.normal.via_iter;
 			cio->cui_iocb = args->u.normal.via_iocb;
 			if ((iot == CIT_WRITE) &&
 			    !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
@@ -1171,56 +1169,23 @@ out:
 	return result;
 }
 
-static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos)
+static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct lu_env      *env;
 	struct vvp_io_args *args;
-	size_t	      count = 0;
 	ssize_t	     result;
 	int		 refcheck;
 
-	count = iov_length(iov, nr_segs);
-
 	env = cl_env_get(&refcheck);
 	if (IS_ERR(env))
 		return PTR_ERR(env);
 
 	args = vvp_env_args(env, IO_NORMAL);
-	args->u.normal.via_iov = (struct iovec *)iov;
-	args->u.normal.via_nrsegs = nr_segs;
+	args->u.normal.via_iter = to;
 	args->u.normal.via_iocb = iocb;
 
 	result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
-				    &iocb->ki_pos, count);
-	cl_env_put(env, &refcheck);
-	return result;
-}
-
-static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
-			    loff_t *ppos)
-{
-	struct lu_env *env;
-	struct iovec  *local_iov;
-	struct kiocb  *kiocb;
-	ssize_t	result;
-	int	    refcheck;
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env))
-		return PTR_ERR(env);
-
-	local_iov = &vvp_env_info(env)->vti_local_iov;
-	kiocb = &vvp_env_info(env)->vti_kiocb;
-	local_iov->iov_base = (void __user *)buf;
-	local_iov->iov_len = count;
-	init_sync_kiocb(kiocb, file);
-	kiocb->ki_pos = *ppos;
-	kiocb->ki_nbytes = count;
-
-	result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
-	*ppos = kiocb->ki_pos;
-
+				    &iocb->ki_pos, iov_iter_count(to));
 	cl_env_put(env, &refcheck);
 	return result;
 }
@@ -1228,12 +1193,10 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
 /*
  * Write to a file (through the page cache).
  */
-static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				 unsigned long nr_segs, loff_t pos)
+static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct lu_env      *env;
 	struct vvp_io_args *args;
-	size_t	      count = iov_length(iov, nr_segs);
 	ssize_t	     result;
 	int		 refcheck;
 
@@ -1242,46 +1205,15 @@ static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		return PTR_ERR(env);
 
 	args = vvp_env_args(env, IO_NORMAL);
-	args->u.normal.via_iov = (struct iovec *)iov;
-	args->u.normal.via_nrsegs = nr_segs;
+	args->u.normal.via_iter = from;
 	args->u.normal.via_iocb = iocb;
 
 	result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
-				  &iocb->ki_pos, count);
+				  &iocb->ki_pos, iov_iter_count(from));
 	cl_env_put(env, &refcheck);
 	return result;
 }
 
-static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
-			     loff_t *ppos)
-{
-	struct lu_env *env;
-	struct iovec  *local_iov;
-	struct kiocb  *kiocb;
-	ssize_t	result;
-	int	    refcheck;
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env))
-		return PTR_ERR(env);
-
-	local_iov = &vvp_env_info(env)->vti_local_iov;
-	kiocb = &vvp_env_info(env)->vti_kiocb;
-	local_iov->iov_base = (void __user *)buf;
-	local_iov->iov_len = count;
-	init_sync_kiocb(kiocb, file);
-	kiocb->ki_pos = *ppos;
-	kiocb->ki_nbytes = count;
-
-	result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
-	*ppos = kiocb->ki_pos;
-
-	cl_env_put(env, &refcheck);
-	return result;
-}
-
-
-
 /*
  * Send file content (through pagecache) somewhere with helper
  */
@@ -3133,10 +3065,10 @@ int ll_inode_permission(struct inode *inode, int mask)
 
 /* -o localflock - only provides locally consistent flock locks */
 struct file_operations ll_file_operations = {
-	.read	   = ll_file_read,
-	.aio_read = ll_file_aio_read,
-	.write	  = ll_file_write,
-	.aio_write = ll_file_aio_write,
+	.read	   = new_sync_read,
+	.read_iter = ll_file_read_iter,
+	.write	  = new_sync_write,
+	.write_iter = ll_file_write_iter,
 	.unlocked_ioctl = ll_file_ioctl,
 	.open	   = ll_file_open,
 	.release	= ll_file_release,
@@ -3148,10 +3080,10 @@ struct file_operations ll_file_operations = {
 };
 
 struct file_operations ll_file_operations_flock = {
-	.read	   = ll_file_read,
-	.aio_read    = ll_file_aio_read,
-	.write	  = ll_file_write,
-	.aio_write   = ll_file_aio_write,
+	.read	   = new_sync_read,
+	.read_iter    = ll_file_read_iter,
+	.write	  = new_sync_write,
+	.write_iter   = ll_file_write_iter,
 	.unlocked_ioctl = ll_file_ioctl,
 	.open	   = ll_file_open,
 	.release	= ll_file_release,
@@ -3166,10 +3098,10 @@ struct file_operations ll_file_operations_flock = {
 
 /* These are for -o noflock - to return ENOSYS on flock calls */
 struct file_operations ll_file_operations_noflock = {
-	.read	   = ll_file_read,
-	.aio_read    = ll_file_aio_read,
-	.write	  = ll_file_write,
-	.aio_write   = ll_file_aio_write,
+	.read	   = new_sync_read,
+	.read_iter    = ll_file_read_iter,
+	.write	  = new_sync_write,
+	.write_iter   = ll_file_write_iter,
 	.unlocked_ioctl = ll_file_ioctl,
 	.open	   = ll_file_open,
 	.release	= ll_file_release,
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 69aba0afca41..fbb8650ead34 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -974,8 +974,7 @@ struct vvp_io_args {
 	union {
 		struct {
 			struct kiocb      *via_iocb;
-			struct iovec      *via_iov;
-			unsigned long      via_nrsegs;
+			struct iov_iter   *via_iter;
 		} normal;
 		struct {
 			struct pipe_inode_info  *via_pipe;
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index 416f7a094a6d..b345dfa599f3 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -157,8 +157,7 @@ static struct ll_cl_context *ll_cl_init(struct file *file,
 		result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_CACHE_SIZE);
 		if (result == 0) {
 			cio->cui_fd = LUSTRE_FPRIVATE(file);
-			cio->cui_iov = NULL;
-			cio->cui_nrsegs = 0;
+			cio->cui_iter = NULL;
 			result = cl_io_iter_init(env, io);
 			if (result == 0) {
 				result = cl_io_lock(env, io);
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index c7d70091246e..cfe8c625ae64 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -211,27 +211,26 @@ static int vvp_mmap_locks(const struct lu_env *env,
 	struct cl_lock_descr   *descr = &cti->cti_descr;
 	ldlm_policy_data_t      policy;
 	unsigned long	   addr;
-	unsigned long	   seg;
 	ssize_t		 count;
 	int		     result;
+	struct iov_iter i;
+	struct iovec iov;
 
 	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
 
 	if (!cl_is_normalio(env, io))
 		return 0;
 
-	if (vio->cui_iov == NULL) /* nfs or loop back device write */
+	if (vio->cui_iter == NULL) /* nfs or loop back device write */
 		return 0;
 
 	/* No MM (e.g. NFS)? No vmas too. */
 	if (mm == NULL)
 		return 0;
 
-	for (seg = 0; seg < vio->cui_nrsegs; seg++) {
-		const struct iovec *iv = &vio->cui_iov[seg];
-
-		addr = (unsigned long)iv->iov_base;
-		count = iv->iov_len;
+	iov_for_each(iov, i, *(vio->cui_iter)) {
+		addr = (unsigned long)iov.iov_base;
+		count = iov.iov_len;
 		if (count == 0)
 			continue;
 
@@ -527,9 +526,7 @@ static int vvp_io_read_start(const struct lu_env *env,
 	switch (vio->cui_io_subtype) {
 	case IO_NORMAL:
 		LASSERT(cio->cui_iocb->ki_pos == pos);
-		result = generic_file_aio_read(cio->cui_iocb,
-					       cio->cui_iov, cio->cui_nrsegs,
-					       cio->cui_iocb->ki_pos);
+		result = generic_file_read_iter(cio->cui_iocb, cio->cui_iter);
 		break;
 	case IO_SPLICE:
 		result = generic_file_splice_read(file, &pos,
@@ -595,12 +592,11 @@ static int vvp_io_write_start(const struct lu_env *env,
 
 	CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
 
-	if (cio->cui_iov == NULL) /* from a temp io in ll_cl_init(). */
+	if (cio->cui_iter == NULL) /* from a temp io in ll_cl_init(). */
 		result = 0;
 	else
-		result = generic_file_aio_write(cio->cui_iocb,
-						cio->cui_iov, cio->cui_nrsegs,
-						cio->cui_iocb->ki_pos);
+		result = generic_file_write_iter(cio->cui_iocb, cio->cui_iter);
+
 	if (result > 0) {
 		if (result < cnt)
 			io->ci_continue = 0;
@@ -1162,10 +1158,9 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
 		 *  results."  -- Single Unix Spec */
 		if (count == 0)
 			result = 1;
-		else {
+		else
 			cio->cui_tot_count = count;
-			cio->cui_tot_nrsegs = 0;
-		}
+
 		/* for read/write, we store the jobid in the inode, and
 		 * it'll be fetched by osc when building RPC.
 		 *
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 66012352d333..e8a109a75de1 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -90,6 +90,15 @@ static inline void iov_iter_truncate(struct iov_iter *i, size_t count)
 		i->count = count;
 }
 
+/*
+ * reexpand a previously truncated iterator; count must be no more than how much
+ * we had shrunk it.
+ */
+static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
+{
+	i->count = count;
+}
+
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
 
-- 
cgit v1.2.3-71-gd317


From 6abd232274fd652e4a57f486d14e52ffee6f72e9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Apr 2014 14:20:57 -0400
Subject: bury generic_file_aio_{read,write}

no callers left

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h |  2 --
 mm/filemap.c       | 43 ++++++++-----------------------------------
 2 files changed, 8 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 99817c9e665e..a6448849dbce 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2411,10 +2411,8 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
 		unsigned long size, pgoff_t pgoff);
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
-extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
-extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
diff --git a/mm/filemap.c b/mm/filemap.c
index 7dcdb9db710d..2f724e3cdf24 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1663,6 +1663,14 @@ out:
 	return written ? written : error;
 }
 
+/**
+ * generic_file_read_iter - generic filesystem read routine
+ * @iocb:	kernel I/O control block
+ * @iter:	destination for the data read
+ *
+ * This is the "read_iter()" routine for all filesystems
+ * that can use the page cache directly.
+ */
 ssize_t
 generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
@@ -1713,28 +1721,6 @@ out:
 }
 EXPORT_SYMBOL(generic_file_read_iter);
 
-/**
- * generic_file_aio_read - generic filesystem read routine
- * @iocb:	kernel I/O control block
- * @iov:	io vector request
- * @nr_segs:	number of segments in the iovec
- * @pos:	current file position
- *
- * This is the "read()" routine for all filesystems
- * that can use the page cache directly.
- */
-ssize_t
-generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
-{
-	size_t count = iov_length(iov, nr_segs);
-	struct iov_iter i;
-
-	iov_iter_init(&i, READ, iov, nr_segs, count);
-	return generic_file_read_iter(iocb, &i);
-}
-EXPORT_SYMBOL(generic_file_aio_read);
-
 #ifdef CONFIG_MMU
 /**
  * page_cache_read - adds requested page to the page cache if not already there
@@ -2675,19 +2661,6 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 }
 EXPORT_SYMBOL(generic_file_write_iter);
 
-ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
-{
-	size_t count = iov_length(iov, nr_segs);
-	struct iov_iter from;
-
-	BUG_ON(iocb->ki_pos != pos);
-
-	iov_iter_init(&from, WRITE, iov, nr_segs, count);
-	return generic_file_write_iter(iocb, &from);
-}
-EXPORT_SYMBOL(generic_file_aio_write);
-
 /**
  * try_to_release_page() - release old fs-specific metadata on a page
  *
-- 
cgit v1.2.3-71-gd317


From 62a8067a7f35dba2de501c9cb00e4cf36da90bc0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Apr 2014 23:12:29 -0400
Subject: bio_vec-backed iov_iter

New variant of iov_iter - ITER_BVEC in iter->type, backed with
bio_vec array instead of iovec one.  Primitives taught to deal
with such beasts, __swap_write() switched to using that kind
of iov_iter.

Note that bio_vec is just a <page, offset, length> triple - there's
nothing block-specific about it.  I've left the definition where it
was, but took it from under ifdef CONFIG_BLOCK.

Next target: ->splice_write()...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fuse/file.c            |   2 +-
 include/linux/blk_types.h |   4 +-
 include/linux/uio.h       |  14 +-
 mm/iov_iter.c             | 390 ++++++++++++++++++++++++++++++++++++++++++----
 mm/page_io.c              |  19 ++-
 5 files changed, 385 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7fbc803cf51d..b2dae9d1437c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1288,7 +1288,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 	size_t nbytes = 0;  /* # bytes already packed in req */
 
 	/* Special case for kernel I/O: can copy directly into the buffer */
-	if (ii->type & REQ_KERNEL) {
+	if (ii->type & ITER_KVEC) {
 		unsigned long user_addr = fuse_get_user_addr(ii);
 		size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index aa0eaa2d0bd8..86df13b97160 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -5,8 +5,6 @@
 #ifndef __LINUX_BLK_TYPES_H
 #define __LINUX_BLK_TYPES_H
 
-#ifdef CONFIG_BLOCK
-
 #include <linux/types.h>
 
 struct bio_set;
@@ -28,6 +26,8 @@ struct bio_vec {
 	unsigned int	bv_offset;
 };
 
+#ifdef CONFIG_BLOCK
+
 struct bvec_iter {
 	sector_t		bi_sector;	/* device address in 512 byte
 						   sectors */
diff --git a/include/linux/uio.h b/include/linux/uio.h
index e8a109a75de1..e2231e47cec1 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -19,12 +19,21 @@ struct kvec {
 	size_t iov_len;
 };
 
+enum {
+	ITER_IOVEC = 0,
+	ITER_KVEC = 2,
+	ITER_BVEC = 4,
+};
+
 struct iov_iter {
 	int type;
-	const struct iovec *iov;
-	unsigned long nr_segs;
 	size_t iov_offset;
 	size_t count;
+	union {
+		const struct iovec *iov;
+		const struct bio_vec *bvec;
+	};
+	unsigned long nr_segs;
 };
 
 /*
@@ -54,6 +63,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
 }
 
 #define iov_for_each(iov, iter, start)				\
+	if (!((start).type & ITER_BVEC))			\
 	for (iter = (start);					\
 	     (iter).count &&					\
 	     ((iov = iov_iter_iovec(&(iter))), 1);		\
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index fcdaaab438b6..7b5dbd1517b5 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -4,7 +4,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
-size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
 	size_t skip, copy, left, wanted;
@@ -84,9 +84,8 @@ done:
 	i->iov_offset = skip;
 	return wanted - bytes;
 }
-EXPORT_SYMBOL(copy_page_to_iter);
 
-size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
 	size_t skip, copy, left, wanted;
@@ -166,7 +165,6 @@ done:
 	i->iov_offset = skip;
 	return wanted - bytes;
 }
-EXPORT_SYMBOL(copy_page_from_iter);
 
 static size_t __iovec_copy_from_user_inatomic(char *vaddr,
 			const struct iovec *iov, size_t base, size_t bytes)
@@ -195,7 +193,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr,
  * were successfully copied.  If a fault is encountered then return the number of
  * bytes which were copied.
  */
-size_t iov_iter_copy_from_user_atomic(struct page *page,
+static size_t copy_from_user_atomic_iovec(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes)
 {
 	char *kaddr;
@@ -215,9 +213,8 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 
 	return copied;
 }
-EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
-void iov_iter_advance(struct iov_iter *i, size_t bytes)
+static void advance_iovec(struct iov_iter *i, size_t bytes)
 {
 	BUG_ON(i->count < bytes);
 
@@ -252,7 +249,6 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes)
 		i->nr_segs = nr_segs;
 	}
 }
-EXPORT_SYMBOL(iov_iter_advance);
 
 /*
  * Fault in the first iovec of the given iov_iter, to a maximum length
@@ -265,26 +261,16 @@ EXPORT_SYMBOL(iov_iter_advance);
  */
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 {
-	char __user *buf = i->iov->iov_base + i->iov_offset;
-	bytes = min(bytes, i->iov->iov_len - i->iov_offset);
-	return fault_in_pages_readable(buf, bytes);
+	if (!(i->type & ITER_BVEC)) {
+		char __user *buf = i->iov->iov_base + i->iov_offset;
+		bytes = min(bytes, i->iov->iov_len - i->iov_offset);
+		return fault_in_pages_readable(buf, bytes);
+	}
+	return 0;
 }
 EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
-/*
- * Return the count of just the current iov_iter segment.
- */
-size_t iov_iter_single_seg_count(const struct iov_iter *i)
-{
-	const struct iovec *iov = i->iov;
-	if (i->nr_segs == 1)
-		return i->count;
-	else
-		return min(i->count, iov->iov_len - i->iov_offset);
-}
-EXPORT_SYMBOL(iov_iter_single_seg_count);
-
-unsigned long iov_iter_alignment(const struct iov_iter *i)
+static unsigned long alignment_iovec(const struct iov_iter *i)
 {
 	const struct iovec *iov = i->iov;
 	unsigned long res;
@@ -307,7 +293,6 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 	res |= (unsigned long)iov->iov_base | size;
 	return res;
 }
-EXPORT_SYMBOL(iov_iter_alignment);
 
 void iov_iter_init(struct iov_iter *i, int direction,
 			const struct iovec *iov, unsigned long nr_segs,
@@ -315,7 +300,7 @@ void iov_iter_init(struct iov_iter *i, int direction,
 {
 	/* It will get better.  Eventually... */
 	if (segment_eq(get_fs(), KERNEL_DS))
-		direction |= REQ_KERNEL;
+		direction |= ITER_KVEC;
 	i->type = direction;
 	i->iov = iov;
 	i->nr_segs = nr_segs;
@@ -324,7 +309,7 @@ void iov_iter_init(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_init);
 
-ssize_t iov_iter_get_pages(struct iov_iter *i,
+static ssize_t get_pages_iovec(struct iov_iter *i,
 		   struct page **pages, size_t maxsize,
 		   size_t *start)
 {
@@ -349,9 +334,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 		return res;
 	return (res == n ? len : res * PAGE_SIZE) - *start;
 }
-EXPORT_SYMBOL(iov_iter_get_pages);
 
-ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+static ssize_t get_pages_alloc_iovec(struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
 		   size_t *start)
 {
@@ -387,9 +371,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 	*pages = p;
 	return (res == n ? len : res * PAGE_SIZE) - *start;
 }
-EXPORT_SYMBOL(iov_iter_get_pages_alloc);
 
-int iov_iter_npages(const struct iov_iter *i, int maxpages)
+static int iov_iter_npages_iovec(const struct iov_iter *i, int maxpages)
 {
 	size_t offset = i->iov_offset;
 	size_t size = i->count;
@@ -414,4 +397,347 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 	}
 	return min(npages, maxpages);
 }
+
+static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
+{
+	char *from = kmap_atomic(page);
+	memcpy(to, from + offset, len);
+	kunmap_atomic(from);
+}
+
+static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len)
+{
+	char *to = kmap_atomic(page);
+	memcpy(to + offset, from, len);
+	kunmap_atomic(to);
+}
+
+static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	size_t skip, copy, wanted;
+	const struct bio_vec *bvec;
+	void *kaddr, *from;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	bvec = i->bvec;
+	skip = i->iov_offset;
+	copy = min_t(size_t, bytes, bvec->bv_len - skip);
+
+	kaddr = kmap_atomic(page);
+	from = kaddr + offset;
+	memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy);
+	skip += copy;
+	from += copy;
+	bytes -= copy;
+	while (bytes) {
+		bvec++;
+		copy = min(bytes, (size_t)bvec->bv_len);
+		memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, copy);
+		skip = copy;
+		from += copy;
+		bytes -= copy;
+	}
+	kunmap_atomic(kaddr);
+	if (skip == bvec->bv_len) {
+		bvec++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= bvec - i->bvec;
+	i->bvec = bvec;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
+static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	size_t skip, copy, wanted;
+	const struct bio_vec *bvec;
+	void *kaddr, *to;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	bvec = i->bvec;
+	skip = i->iov_offset;
+
+	kaddr = kmap_atomic(page);
+
+	to = kaddr + offset;
+
+	copy = min(bytes, bvec->bv_len - skip);
+
+	memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy);
+
+	to += copy;
+	skip += copy;
+	bytes -= copy;
+
+	while (bytes) {
+		bvec++;
+		copy = min(bytes, (size_t)bvec->bv_len);
+		memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, copy);
+		skip = copy;
+		to += copy;
+		bytes -= copy;
+	}
+	kunmap_atomic(kaddr);
+	if (skip == bvec->bv_len) {
+		bvec++;
+		skip = 0;
+	}
+	i->count -= wanted;
+	i->nr_segs -= bvec - i->bvec;
+	i->bvec = bvec;
+	i->iov_offset = skip;
+	return wanted;
+}
+
+static size_t copy_from_user_bvec(struct page *page,
+		struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+	char *kaddr;
+	size_t left;
+	const struct bio_vec *bvec;
+	size_t base = i->iov_offset;
+
+	kaddr = kmap_atomic(page);
+	for (left = bytes, bvec = i->bvec; left; bvec++, base = 0) {
+		size_t copy = min(left, bvec->bv_len - base);
+		if (!bvec->bv_len)
+			continue;
+		memcpy_from_page(kaddr + offset, bvec->bv_page,
+				 bvec->bv_offset + base, copy);
+		offset += copy;
+		left -= copy;
+	}
+	kunmap_atomic(kaddr);
+	return bytes;
+}
+
+static void advance_bvec(struct iov_iter *i, size_t bytes)
+{
+	BUG_ON(i->count < bytes);
+
+	if (likely(i->nr_segs == 1)) {
+		i->iov_offset += bytes;
+		i->count -= bytes;
+	} else {
+		const struct bio_vec *bvec = i->bvec;
+		size_t base = i->iov_offset;
+		unsigned long nr_segs = i->nr_segs;
+
+		/*
+		 * The !iov->iov_len check ensures we skip over unlikely
+		 * zero-length segments (without overruning the iovec).
+		 */
+		while (bytes || unlikely(i->count && !bvec->bv_len)) {
+			int copy;
+
+			copy = min(bytes, bvec->bv_len - base);
+			BUG_ON(!i->count || i->count < copy);
+			i->count -= copy;
+			bytes -= copy;
+			base += copy;
+			if (bvec->bv_len == base) {
+				bvec++;
+				nr_segs--;
+				base = 0;
+			}
+		}
+		i->bvec = bvec;
+		i->iov_offset = base;
+		i->nr_segs = nr_segs;
+	}
+}
+
+static unsigned long alignment_bvec(const struct iov_iter *i)
+{
+	const struct bio_vec *bvec = i->bvec;
+	unsigned long res;
+	size_t size = i->count;
+	size_t n;
+
+	if (!size)
+		return 0;
+
+	res = bvec->bv_offset + i->iov_offset;
+	n = bvec->bv_len - i->iov_offset;
+	if (n >= size)
+		return res | size;
+	size -= n;
+	res |= n;
+	while (size > (++bvec)->bv_len) {
+		res |= bvec->bv_offset | bvec->bv_len;
+		size -= bvec->bv_len;
+	}
+	res |= bvec->bv_offset | size;
+	return res;
+}
+
+static ssize_t get_pages_bvec(struct iov_iter *i,
+		   struct page **pages, size_t maxsize,
+		   size_t *start)
+{
+	const struct bio_vec *bvec = i->bvec;
+	size_t len = bvec->bv_len - i->iov_offset;
+	if (len > i->count)
+		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
+	*start = bvec->bv_offset + i->iov_offset;
+
+	get_page(*pages = bvec->bv_page);
+
+	return len;
+}
+
+static ssize_t get_pages_alloc_bvec(struct iov_iter *i,
+		   struct page ***pages, size_t maxsize,
+		   size_t *start)
+{
+	const struct bio_vec *bvec = i->bvec;
+	size_t len = bvec->bv_len - i->iov_offset;
+	if (len > i->count)
+		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
+	*start = bvec->bv_offset + i->iov_offset;
+
+	*pages = kmalloc(sizeof(struct page *), GFP_KERNEL);
+	if (!*pages)
+		return -ENOMEM;
+
+	get_page(**pages = bvec->bv_page);
+
+	return len;
+}
+
+static int iov_iter_npages_bvec(const struct iov_iter *i, int maxpages)
+{
+	size_t offset = i->iov_offset;
+	size_t size = i->count;
+	const struct bio_vec *bvec = i->bvec;
+	int npages = 0;
+	int n;
+
+	for (n = 0; size && n < i->nr_segs; n++, bvec++) {
+		size_t len = bvec->bv_len - offset;
+		offset = 0;
+		if (unlikely(!len))	/* empty segment */
+			continue;
+		if (len > size)
+			len = size;
+		npages++;
+		if (npages >= maxpages)	/* don't bother going further */
+			return maxpages;
+		size -= len;
+		offset = 0;
+	}
+	return min(npages, maxpages);
+}
+
+size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return copy_page_to_iter_bvec(page, offset, bytes, i);
+	else
+		return copy_page_to_iter_iovec(page, offset, bytes, i);
+}
+EXPORT_SYMBOL(copy_page_to_iter);
+
+size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return copy_page_from_iter_bvec(page, offset, bytes, i);
+	else
+		return copy_page_from_iter_iovec(page, offset, bytes, i);
+}
+EXPORT_SYMBOL(copy_page_from_iter);
+
+size_t iov_iter_copy_from_user_atomic(struct page *page,
+		struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+	if (i->type & ITER_BVEC)
+		return copy_from_user_bvec(page, i, offset, bytes);
+	else
+		return copy_from_user_atomic_iovec(page, i, offset, bytes);
+}
+EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
+
+void iov_iter_advance(struct iov_iter *i, size_t size)
+{
+	if (i->type & ITER_BVEC)
+		advance_bvec(i, size);
+	else
+		advance_iovec(i, size);
+}
+EXPORT_SYMBOL(iov_iter_advance);
+
+/*
+ * Return the count of just the current iov_iter segment.
+ */
+size_t iov_iter_single_seg_count(const struct iov_iter *i)
+{
+	if (i->nr_segs == 1)
+		return i->count;
+	else if (i->type & ITER_BVEC)
+		return min(i->count, i->iov->iov_len - i->iov_offset);
+	else
+		return min(i->count, i->bvec->bv_len - i->iov_offset);
+}
+EXPORT_SYMBOL(iov_iter_single_seg_count);
+
+unsigned long iov_iter_alignment(const struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return alignment_bvec(i);
+	else
+		return alignment_iovec(i);
+}
+EXPORT_SYMBOL(iov_iter_alignment);
+
+ssize_t iov_iter_get_pages(struct iov_iter *i,
+		   struct page **pages, size_t maxsize,
+		   size_t *start)
+{
+	if (i->type & ITER_BVEC)
+		return get_pages_bvec(i, pages, maxsize, start);
+	else
+		return get_pages_iovec(i, pages, maxsize, start);
+}
+EXPORT_SYMBOL(iov_iter_get_pages);
+
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+		   struct page ***pages, size_t maxsize,
+		   size_t *start)
+{
+	if (i->type & ITER_BVEC)
+		return get_pages_alloc_bvec(i, pages, maxsize, start);
+	else
+		return get_pages_alloc_iovec(i, pages, maxsize, start);
+}
+EXPORT_SYMBOL(iov_iter_get_pages_alloc);
+
+int iov_iter_npages(const struct iov_iter *i, int maxpages)
+{
+	if (i->type & ITER_BVEC)
+		return iov_iter_npages_bvec(i, maxpages);
+	else
+		return iov_iter_npages_iovec(i, maxpages);
+}
 EXPORT_SYMBOL(iov_iter_npages);
diff --git a/mm/page_io.c b/mm/page_io.c
index 313bfedb75d1..33bb38c4aad7 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -259,23 +259,28 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		struct kiocb kiocb;
 		struct file *swap_file = sis->swap_file;
 		struct address_space *mapping = swap_file->f_mapping;
-		struct iovec iov = {
-			.iov_base = kmap(page),
-			.iov_len  = PAGE_SIZE,
+		struct bio_vec bv = {
+			.bv_page = page,
+			.bv_len  = PAGE_SIZE,
+			.bv_offset = 0
+		};
+		struct iov_iter from = {
+			.type = ITER_BVEC | WRITE,
+			.count = PAGE_SIZE,
+			.iov_offset = 0,
+			.nr_segs = 1,
+			.bvec = &bv
 		};
-		struct iov_iter from;
 
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);
 		kiocb.ki_nbytes = PAGE_SIZE;
-		iov_iter_init(&from, KERNEL_WRITE, &iov, 1, PAGE_SIZE);
 
 		set_page_writeback(page);
 		unlock_page(page);
-		ret = mapping->a_ops->direct_IO(KERNEL_WRITE,
+		ret = mapping->a_ops->direct_IO(ITER_BVEC | WRITE,
 						&kiocb, &from,
 						kiocb.ki_pos);
-		kunmap(page);
 		if (ret == PAGE_SIZE) {
 			count_vm_event(PSWPOUT);
 			ret = 0;
-- 
cgit v1.2.3-71-gd317


From 9c69de4c94fcb11db919160d5fa0b48f13d1757a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2014 19:37:13 +0200
Subject: nfsd: remove <linux/nfsd/nfsfh.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The only real user of this header is fs/nfsd/nfsfh.h, so merge the
two.  Various lockѕ source files used it to indirectly get other
sunrpc or nfs headers, so fix those up.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/clnt4xdr.c         |  2 ++
 fs/lockd/clntxdr.c          |  2 ++
 fs/lockd/svcsubs.c          |  2 +-
 fs/lockd/xdr.c              |  2 ++
 fs/nfsd/nfsd.h              |  1 +
 fs/nfsd/nfsfh.h             | 59 ++++++++++++++++++++++++++++++++++++++----
 fs/nfsd/state.h             |  1 -
 include/linux/lockd/lockd.h |  2 +-
 include/linux/nfsd/export.h |  6 ++++-
 include/linux/nfsd/nfsfh.h  | 63 ---------------------------------------------
 10 files changed, 68 insertions(+), 72 deletions(-)
 delete mode 100644 include/linux/nfsd/nfsfh.h

(limited to 'include')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 00ec0b9c94d1..d3e40db28930 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -14,6 +14,8 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/lockd/lockd.h>
 
+#include <uapi/linux/nfs3.h>
+
 #define NLMDBG_FACILITY		NLMDBG_XDR
 
 #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 9a55797a1cd4..3e9f7874b975 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -15,6 +15,8 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/lockd/lockd.h>
 
+#include <uapi/linux/nfs2.h>
+
 #define NLMDBG_FACILITY		NLMDBG_XDR
 
 #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index dc5c75930f0f..7ec6b1074d8c 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -14,12 +14,12 @@
 #include <linux/mutex.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/addr.h>
-#include <linux/nfsd/nfsfh.h>
 #include <linux/nfsd/export.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
 #include <linux/module.h>
 #include <linux/mount.h>
+#include <uapi/linux/nfs2.h>
 
 #define NLMDBG_FACILITY		NLMDBG_SVCSUBS
 
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 964666c68a86..9340e7e10ef6 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -16,6 +16,8 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/lockd/lockd.h>
 
+#include <uapi/linux/nfs2.h>
+
 #define NLMDBG_FACILITY		NLMDBG_XDR
 
 
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 479eb681c27c..7d5c310678d0 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -15,6 +15,7 @@
 #include <linux/nfs2.h>
 #include <linux/nfs3.h>
 #include <linux/nfs4.h>
+#include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/msg_prot.h>
 
 #include <linux/nfsd/debug.h>
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index ad67964d0bb1..2e89e70ac15c 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -1,9 +1,58 @@
-/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
+/*
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ *
+ * This file describes the layout of the file handles as passed
+ * over the wire.
+ */
+#ifndef _LINUX_NFSD_NFSFH_H
+#define _LINUX_NFSD_NFSFH_H
+
+#include <linux/sunrpc/svc.h>
+#include <uapi/linux/nfsd/nfsfh.h>
+
+static inline __u32 ino_t_to_u32(ino_t ino)
+{
+	return (__u32) ino;
+}
+
+static inline ino_t u32_to_ino_t(__u32 uino)
+{
+	return (ino_t) uino;
+}
 
-#ifndef _LINUX_NFSD_FH_INT_H
-#define _LINUX_NFSD_FH_INT_H
+/*
+ * This is the internal representation of an NFS handle used in knfsd.
+ * pre_mtime/post_version will be used to support wcc_attr's in NFSv3.
+ */
+typedef struct svc_fh {
+	struct knfsd_fh		fh_handle;	/* FH data */
+	struct dentry *		fh_dentry;	/* validated dentry */
+	struct svc_export *	fh_export;	/* export pointer */
+	int			fh_maxsize;	/* max size for fh_handle */
+
+	unsigned char		fh_locked;	/* inode locked by us */
+	unsigned char		fh_want_write;	/* remount protection taken */
+
+#ifdef CONFIG_NFSD_V3
+	unsigned char		fh_post_saved;	/* post-op attrs saved */
+	unsigned char		fh_pre_saved;	/* pre-op attrs saved */
+
+	/* Pre-op attributes saved during fh_lock */
+	__u64			fh_pre_size;	/* size before operation */
+	struct timespec		fh_pre_mtime;	/* mtime before oper */
+	struct timespec		fh_pre_ctime;	/* ctime before oper */
+	/*
+	 * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
+	 *  to find out if it is valid.
+	 */
+	u64			fh_pre_change;
+
+	/* Post-op attributes saved in fh_unlock */
+	struct kstat		fh_post_attr;	/* full attrs after operation */
+	u64			fh_post_change; /* nfsv4 change; see above */
+#endif /* CONFIG_NFSD_V3 */
 
-#include <linux/nfsd/nfsfh.h>
+} svc_fh;
 
 enum nfsd_fsid {
 	FSID_DEV = 0,
@@ -215,4 +264,4 @@ fh_unlock(struct svc_fh *fhp)
 	}
 }
 
-#endif /* _LINUX_NFSD_FH_INT_H */
+#endif /* _LINUX_NFSD_NFSFH_H */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 424d8f5f2317..5b3bbf24097c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -37,7 +37,6 @@
 
 #include <linux/idr.h>
 #include <linux/sunrpc/svc_xprt.h>
-#include <linux/nfsd/nfsfh.h>
 #include "nfsfh.h"
 
 typedef struct {
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index dcaad79f54ed..219d79627c05 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -17,13 +17,13 @@
 #include <linux/fs.h>
 #include <linux/kref.h>
 #include <linux/utsname.h>
-#include <linux/nfsd/nfsfh.h>
 #include <linux/lockd/bind.h>
 #include <linux/lockd/xdr.h>
 #ifdef CONFIG_LOCKD_V4
 #include <linux/lockd/xdr4.h>
 #endif
 #include <linux/lockd/debug.h>
+#include <linux/sunrpc/svc.h>
 
 /*
  * Version string
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 7898c997dfea..b12c4e526ef2 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -9,9 +9,13 @@
 #ifndef NFSD_EXPORT_H
 #define NFSD_EXPORT_H
 
-# include <linux/nfsd/nfsfh.h>
+#include <linux/sunrpc/cache.h>
 #include <uapi/linux/nfsd/export.h>
 
+struct knfsd_fh;
+struct svc_fh;
+struct svc_rqst;
+
 /*
  * FS Locations
  */
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
deleted file mode 100644
index a93593f1fa4e..000000000000
--- a/include/linux/nfsd/nfsfh.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * include/linux/nfsd/nfsfh.h
- *
- * This file describes the layout of the file handles as passed
- * over the wire.
- *
- * Earlier versions of knfsd used to sign file handles using keyed MD5
- * or SHA. I've removed this code, because it doesn't give you more
- * security than blocking external access to port 2049 on your firewall.
- *
- * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
- */
-#ifndef _LINUX_NFSD_FH_H
-#define _LINUX_NFSD_FH_H
-
-# include <linux/sunrpc/svc.h>
-#include <uapi/linux/nfsd/nfsfh.h>
-
-static inline __u32 ino_t_to_u32(ino_t ino)
-{
-	return (__u32) ino;
-}
-
-static inline ino_t u32_to_ino_t(__u32 uino)
-{
-	return (ino_t) uino;
-}
-
-/*
- * This is the internal representation of an NFS handle used in knfsd.
- * pre_mtime/post_version will be used to support wcc_attr's in NFSv3.
- */
-typedef struct svc_fh {
-	struct knfsd_fh		fh_handle;	/* FH data */
-	struct dentry *		fh_dentry;	/* validated dentry */
-	struct svc_export *	fh_export;	/* export pointer */
-	int			fh_maxsize;	/* max size for fh_handle */
-
-	unsigned char		fh_locked;	/* inode locked by us */
-	unsigned char		fh_want_write;	/* remount protection taken */
-
-#ifdef CONFIG_NFSD_V3
-	unsigned char		fh_post_saved;	/* post-op attrs saved */
-	unsigned char		fh_pre_saved;	/* pre-op attrs saved */
-
-	/* Pre-op attributes saved during fh_lock */
-	__u64			fh_pre_size;	/* size before operation */
-	struct timespec		fh_pre_mtime;	/* mtime before oper */
-	struct timespec		fh_pre_ctime;	/* ctime before oper */
-	/*
-	 * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
-	 *  to find out if it is valid.
-	 */
-	u64			fh_pre_change;
-
-	/* Post-op attributes saved in fh_unlock */
-	struct kstat		fh_post_attr;	/* full attrs after operation */
-	u64			fh_post_change; /* nfsv4 change; see above */
-#endif /* CONFIG_NFSD_V3 */
-
-} svc_fh;
-
-#endif /* _LINUX_NFSD_FH_H */
-- 
cgit v1.2.3-71-gd317


From d430e8d530e900c923bf77718d72478b1c280592 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2014 19:37:14 +0200
Subject: nfsd: move <linux/nfsd/export.h> to fs/nfsd

There are no legitimate users outside of fs/nfsd, so move it there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svcsubs.c          |   1 -
 fs/nfsd/export.h            | 109 ++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfsd.h              |   3 +-
 include/linux/nfsd/export.h | 114 --------------------------------------------
 4 files changed, 111 insertions(+), 116 deletions(-)
 create mode 100644 fs/nfsd/export.h
 delete mode 100644 include/linux/nfsd/export.h

(limited to 'include')

diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 7ec6b1074d8c..b6f3b84b6e99 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -14,7 +14,6 @@
 #include <linux/mutex.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/addr.h>
-#include <linux/nfsd/export.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
 #include <linux/module.h>
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
new file mode 100644
index 000000000000..d7939a62a0ae
--- /dev/null
+++ b/fs/nfsd/export.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ */
+#ifndef NFSD_EXPORT_H
+#define NFSD_EXPORT_H
+
+#include <linux/sunrpc/cache.h>
+#include <uapi/linux/nfsd/export.h>
+
+struct knfsd_fh;
+struct svc_fh;
+struct svc_rqst;
+
+/*
+ * FS Locations
+ */
+
+#define MAX_FS_LOCATIONS	128
+
+struct nfsd4_fs_location {
+	char *hosts; /* colon separated list of hosts */
+	char *path;  /* slash separated list of path components */
+};
+
+struct nfsd4_fs_locations {
+	uint32_t locations_count;
+	struct nfsd4_fs_location *locations;
+/* If we're not actually serving this data ourselves (only providing a
+ * list of replicas that do serve it) then we set "migrated": */
+	int migrated;
+};
+
+/*
+ * We keep an array of pseudoflavors with the export, in order from most
+ * to least preferred.  For the foreseeable future, we don't expect more
+ * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3,
+ * spkm3i, and spkm3p (and using all 8 at once should be rare).
+ */
+#define MAX_SECINFO_LIST	8
+
+struct exp_flavor_info {
+	u32	pseudoflavor;
+	u32	flags;
+};
+
+struct svc_export {
+	struct cache_head	h;
+	struct auth_domain *	ex_client;
+	int			ex_flags;
+	struct path		ex_path;
+	kuid_t			ex_anon_uid;
+	kgid_t			ex_anon_gid;
+	int			ex_fsid;
+	unsigned char *		ex_uuid; /* 16 byte fsid */
+	struct nfsd4_fs_locations ex_fslocs;
+	int			ex_nflavors;
+	struct exp_flavor_info	ex_flavors[MAX_SECINFO_LIST];
+	struct cache_detail	*cd;
+};
+
+/* an "export key" (expkey) maps a filehandlefragement to an
+ * svc_export for a given client.  There can be several per export,
+ * for the different fsid types.
+ */
+struct svc_expkey {
+	struct cache_head	h;
+
+	struct auth_domain *	ek_client;
+	int			ek_fsidtype;
+	u32			ek_fsid[6];
+
+	struct path		ek_path;
+};
+
+#define EX_ISSYNC(exp)		(!((exp)->ex_flags & NFSEXP_ASYNC))
+#define EX_NOHIDE(exp)		((exp)->ex_flags & NFSEXP_NOHIDE)
+#define EX_WGATHER(exp)		((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
+
+int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp);
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
+
+/*
+ * Function declarations
+ */
+int			nfsd_export_init(struct net *);
+void			nfsd_export_shutdown(struct net *);
+void			nfsd_export_flush(struct net *);
+struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
+					     struct path *);
+struct svc_export *	rqst_exp_parent(struct svc_rqst *,
+					struct path *);
+struct svc_export *	rqst_find_fsidzero_export(struct svc_rqst *);
+int			exp_rootfh(struct net *, struct auth_domain *,
+					char *path, struct knfsd_fh *, int maxsize);
+__be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
+__be32			nfserrno(int errno);
+
+static inline void exp_put(struct svc_export *exp)
+{
+	cache_put(&exp->h, exp->cd);
+}
+
+static inline void exp_get(struct svc_export *exp)
+{
+	cache_get(&exp->h);
+}
+struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *);
+
+#endif /* NFSD_EXPORT_H */
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 7d5c310678d0..72004caad718 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -19,9 +19,10 @@
 #include <linux/sunrpc/msg_prot.h>
 
 #include <linux/nfsd/debug.h>
-#include <linux/nfsd/export.h>
 #include <linux/nfsd/stats.h>
 
+#include "export.h"
+
 /*
  * nfsd version
  */
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
deleted file mode 100644
index b12c4e526ef2..000000000000
--- a/include/linux/nfsd/export.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * include/linux/nfsd/export.h
- * 
- * Public declarations for NFS exports. The definitions for the
- * syscall interface are in nfsctl.h
- *
- * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
- */
-#ifndef NFSD_EXPORT_H
-#define NFSD_EXPORT_H
-
-#include <linux/sunrpc/cache.h>
-#include <uapi/linux/nfsd/export.h>
-
-struct knfsd_fh;
-struct svc_fh;
-struct svc_rqst;
-
-/*
- * FS Locations
- */
-
-#define MAX_FS_LOCATIONS	128
-
-struct nfsd4_fs_location {
-	char *hosts; /* colon separated list of hosts */
-	char *path;  /* slash separated list of path components */
-};
-
-struct nfsd4_fs_locations {
-	uint32_t locations_count;
-	struct nfsd4_fs_location *locations;
-/* If we're not actually serving this data ourselves (only providing a
- * list of replicas that do serve it) then we set "migrated": */
-	int migrated;
-};
-
-/*
- * We keep an array of pseudoflavors with the export, in order from most
- * to least preferred.  For the foreseeable future, we don't expect more
- * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3,
- * spkm3i, and spkm3p (and using all 8 at once should be rare).
- */
-#define MAX_SECINFO_LIST	8
-
-struct exp_flavor_info {
-	u32	pseudoflavor;
-	u32	flags;
-};
-
-struct svc_export {
-	struct cache_head	h;
-	struct auth_domain *	ex_client;
-	int			ex_flags;
-	struct path		ex_path;
-	kuid_t			ex_anon_uid;
-	kgid_t			ex_anon_gid;
-	int			ex_fsid;
-	unsigned char *		ex_uuid; /* 16 byte fsid */
-	struct nfsd4_fs_locations ex_fslocs;
-	int			ex_nflavors;
-	struct exp_flavor_info	ex_flavors[MAX_SECINFO_LIST];
-	struct cache_detail	*cd;
-};
-
-/* an "export key" (expkey) maps a filehandlefragement to an
- * svc_export for a given client.  There can be several per export,
- * for the different fsid types.
- */
-struct svc_expkey {
-	struct cache_head	h;
-
-	struct auth_domain *	ek_client;
-	int			ek_fsidtype;
-	u32			ek_fsid[6];
-
-	struct path		ek_path;
-};
-
-#define EX_ISSYNC(exp)		(!((exp)->ex_flags & NFSEXP_ASYNC))
-#define EX_NOHIDE(exp)		((exp)->ex_flags & NFSEXP_NOHIDE)
-#define EX_WGATHER(exp)		((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
-
-int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp);
-__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
-
-/*
- * Function declarations
- */
-int			nfsd_export_init(struct net *);
-void			nfsd_export_shutdown(struct net *);
-void			nfsd_export_flush(struct net *);
-struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
-					     struct path *);
-struct svc_export *	rqst_exp_parent(struct svc_rqst *,
-					struct path *);
-struct svc_export *	rqst_find_fsidzero_export(struct svc_rqst *);
-int			exp_rootfh(struct net *, struct auth_domain *,
-					char *path, struct knfsd_fh *, int maxsize);
-__be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
-__be32			nfserrno(int errno);
-
-static inline void exp_put(struct svc_export *exp)
-{
-	cache_put(&exp->h, exp->cd);
-}
-
-static inline void exp_get(struct svc_export *exp)
-{
-	cache_get(&exp->h);
-}
-struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *);
-
-#endif /* NFSD_EXPORT_H */
-- 
cgit v1.2.3-71-gd317


From 7f94423e8fcc1e0f3416be76d3da0982f586d565 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2014 19:37:15 +0200
Subject: nfsd: move <linux/nfsd/stats.h> to fs/nfsd

There are no legitimate users outside of fs/nfsd, so move it there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsd.h             |  2 +-
 fs/nfsd/stats.c            |  1 -
 fs/nfsd/stats.h            | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfsd/stats.h | 45 ---------------------------------------------
 4 files changed, 44 insertions(+), 47 deletions(-)
 create mode 100644 fs/nfsd/stats.h
 delete mode 100644 include/linux/nfsd/stats.h

(limited to 'include')

diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 72004caad718..7a07f9c6ee78 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -19,8 +19,8 @@
 #include <linux/sunrpc/msg_prot.h>
 
 #include <linux/nfsd/debug.h>
-#include <linux/nfsd/stats.h>
 
+#include "stats.h"
 #include "export.h"
 
 /*
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 6d4521feb6e3..cd90878a76aa 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -24,7 +24,6 @@
 #include <linux/seq_file.h>
 #include <linux/module.h>
 #include <linux/sunrpc/stats.h>
-#include <linux/nfsd/stats.h>
 #include <net/net_namespace.h>
 
 #include "nfsd.h"
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
new file mode 100644
index 000000000000..a5c944b771c6
--- /dev/null
+++ b/fs/nfsd/stats.h
@@ -0,0 +1,43 @@
+/*
+ * Statistics for NFS server.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+#ifndef _NFSD_STATS_H
+#define _NFSD_STATS_H
+
+#include <uapi/linux/nfsd/stats.h>
+
+
+struct nfsd_stats {
+	unsigned int	rchits;		/* repcache hits */
+	unsigned int	rcmisses;	/* repcache hits */
+	unsigned int	rcnocache;	/* uncached reqs */
+	unsigned int	fh_stale;	/* FH stale error */
+	unsigned int	fh_lookup;	/* dentry cached */
+	unsigned int	fh_anon;	/* anon file dentry returned */
+	unsigned int	fh_nocache_dir;	/* filehandle not found in dcache */
+	unsigned int	fh_nocache_nondir;	/* filehandle not found in dcache */
+	unsigned int	io_read;	/* bytes returned to read requests */
+	unsigned int	io_write;	/* bytes passed in write requests */
+	unsigned int	th_cnt;		/* number of available threads */
+	unsigned int	th_usage[10];	/* number of ticks during which n perdeciles
+					 * of available threads were in use */
+	unsigned int	th_fullcnt;	/* number of times last free thread was used */
+	unsigned int	ra_size;	/* size of ra cache */
+	unsigned int	ra_depth[11];	/* number of times ra entry was found that deep
+					 * in the cache (10percentiles). [10] = not found */
+#ifdef CONFIG_NFSD_V4
+	unsigned int	nfs4_opcount[LAST_NFS4_OP + 1];	/* count of individual nfsv4 operations */
+#endif
+
+};
+
+
+extern struct nfsd_stats	nfsdstats;
+extern struct svc_stat		nfsd_svcstats;
+
+void	nfsd_stat_init(void);
+void	nfsd_stat_shutdown(void);
+
+#endif /* _NFSD_STATS_H */
diff --git a/include/linux/nfsd/stats.h b/include/linux/nfsd/stats.h
deleted file mode 100644
index e75b2544ff12..000000000000
--- a/include/linux/nfsd/stats.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * linux/include/linux/nfsd/stats.h
- *
- * Statistics for NFS server.
- *
- * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
- */
-#ifndef LINUX_NFSD_STATS_H
-#define LINUX_NFSD_STATS_H
-
-#include <uapi/linux/nfsd/stats.h>
-
-
-struct nfsd_stats {
-	unsigned int	rchits;		/* repcache hits */
-	unsigned int	rcmisses;	/* repcache hits */
-	unsigned int	rcnocache;	/* uncached reqs */
-	unsigned int	fh_stale;	/* FH stale error */
-	unsigned int	fh_lookup;	/* dentry cached */
-	unsigned int	fh_anon;	/* anon file dentry returned */
-	unsigned int	fh_nocache_dir;	/* filehandle not found in dcache */
-	unsigned int	fh_nocache_nondir;	/* filehandle not found in dcache */
-	unsigned int	io_read;	/* bytes returned to read requests */
-	unsigned int	io_write;	/* bytes passed in write requests */
-	unsigned int	th_cnt;		/* number of available threads */
-	unsigned int	th_usage[10];	/* number of ticks during which n perdeciles
-					 * of available threads were in use */
-	unsigned int	th_fullcnt;	/* number of times last free thread was used */
-	unsigned int	ra_size;	/* size of ra cache */
-	unsigned int	ra_depth[11];	/* number of times ra entry was found that deep
-					 * in the cache (10percentiles). [10] = not found */
-#ifdef CONFIG_NFSD_V4
-	unsigned int	nfs4_opcount[LAST_NFS4_OP + 1];	/* count of individual nfsv4 operations */
-#endif
-
-};
-
-
-extern struct nfsd_stats	nfsdstats;
-extern struct svc_stat		nfsd_svcstats;
-
-void	nfsd_stat_init(void);
-void	nfsd_stat_shutdown(void);
-
-#endif /* LINUX_NFSD_STATS_H */
-- 
cgit v1.2.3-71-gd317


From 6f226e2ab1b895c8685e868af0a5f797fcaaaf57 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2014 19:37:16 +0200
Subject: nfsd: remove <linux/nfsd/debug.h>

There is almost nothing left it in, just merge it into the only file
that includes it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsd.h             |  9 ++++++++-
 include/linux/nfsd/debug.h | 19 -------------------
 2 files changed, 8 insertions(+), 20 deletions(-)
 delete mode 100644 include/linux/nfsd/debug.h

(limited to 'include')

diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 7a07f9c6ee78..e9f2fd42d184 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -18,11 +18,18 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/msg_prot.h>
 
-#include <linux/nfsd/debug.h>
+#include <uapi/linux/nfsd/debug.h>
 
 #include "stats.h"
 #include "export.h"
 
+#undef ifdebug
+#ifdef NFSD_DEBUG
+# define ifdebug(flag)		if (nfsd_debug & NFSDDBG_##flag)
+#else
+# define ifdebug(flag)		if (0)
+#endif
+
 /*
  * nfsd version
  */
diff --git a/include/linux/nfsd/debug.h b/include/linux/nfsd/debug.h
deleted file mode 100644
index 19ef8375b577..000000000000
--- a/include/linux/nfsd/debug.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * linux/include/linux/nfsd/debug.h
- *
- * Debugging-related stuff for nfsd
- *
- * Copyright (C) 1995 Olaf Kirch <okir@monad.swb.de>
- */
-#ifndef LINUX_NFSD_DEBUG_H
-#define LINUX_NFSD_DEBUG_H
-
-#include <uapi/linux/nfsd/debug.h>
-
-# undef ifdebug
-# ifdef NFSD_DEBUG
-#  define ifdebug(flag)		if (nfsd_debug & NFSDDBG_##flag)
-# else
-#  define ifdebug(flag)		if (0)
-# endif
-#endif /* LINUX_NFSD_DEBUG_H */
-- 
cgit v1.2.3-71-gd317


From 6403eb1f646a49cc92f25c08f8716f8870a4a865 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Sat, 26 Apr 2014 19:59:52 +0800
Subject: f2fs: introduce help macro ADDRS_PER_PAGE()

Introduce help macro ADDRS_PER_PAGE() to get the number of address pointers in
direct node or inode.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/data.c          | 6 ++----
 fs/f2fs/file.c          | 5 +----
 fs/f2fs/recovery.c      | 5 +----
 include/linux/f2fs_fs.h | 3 +++
 4 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0147de7e3973..273fe1631af9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -652,8 +652,7 @@ static int get_data_block(struct inode *inode, sector_t iblock,
 		goto put_out;
 	}
 
-	end_offset = IS_INODE(dn.node_page) ?
-			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+	end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
 	bh_result->b_size = (((size_t)1) << blkbits);
 	dn.ofs_in_node++;
 	pgofs++;
@@ -675,8 +674,7 @@ get_next:
 		if (dn.data_blkaddr == NEW_ADDR)
 			goto put_out;
 
-		end_offset = IS_INODE(dn.node_page) ?
-			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
 	}
 
 	if (maxblocks > (bh_result->b_size >> blkbits)) {
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 60e7d5448a1d..bb365c932555 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -288,10 +288,7 @@ int truncate_blocks(struct inode *inode, u64 from)
 		return err;
 	}
 
-	if (IS_INODE(dn.node_page))
-		count = ADDRS_PER_INODE(F2FS_I(inode));
-	else
-		count = ADDRS_PER_BLOCK;
+	count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
 
 	count -= dn.ofs_in_node;
 	f2fs_bug_on(count < 0);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 9eb6487f383d..be1e3e881725 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -301,10 +301,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 		goto out;
 
 	start = start_bidx_of_node(ofs_of_node(page), fi);
-	if (IS_INODE(page))
-		end = start + ADDRS_PER_INODE(fi);
-	else
-		end = start + ADDRS_PER_BLOCK;
+	end = start + ADDRS_PER_PAGE(page, fi);
 
 	f2fs_lock_op(sbi);
 
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index df53e1753a76..8c03f71307c6 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -146,6 +146,9 @@ struct f2fs_extent {
 #define ADDRS_PER_BLOCK		1018	/* Address Pointers in a Direct Block */
 #define NIDS_PER_BLOCK		1018	/* Node IDs in an Indirect Block */
 
+#define ADDRS_PER_PAGE(page, fi)	\
+	(IS_INODE(page) ? ADDRS_PER_INODE(fi) : ADDRS_PER_BLOCK)
+
 #define	NODE_DIR1_BLOCK		(DEF_ADDRS_PER_INODE + 1)
 #define	NODE_DIR2_BLOCK		(DEF_ADDRS_PER_INODE + 2)
 #define	NODE_IND1_BLOCK		(DEF_ADDRS_PER_INODE + 3)
-- 
cgit v1.2.3-71-gd317


From 62aed044ea4bef36ac3f6885611b013b11c9be2d Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Tue, 6 May 2014 16:46:04 +0800
Subject: f2fs: add a tracepoint for f2fs_write_begin

This patch adds a tracepoint for f2fs_write_begin to trace write op of user.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/data.c              |  2 ++
 include/trace/events/f2fs.h | 30 ++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

(limited to 'include')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a7bb98f5831a..cde0d69969b9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -900,6 +900,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 	struct dnode_of_data dn;
 	int err = 0;
 
+	trace_f2fs_write_begin(inode, pos, len, flags);
+
 	f2fs_balance_fs(sbi);
 repeat:
 	err = f2fs_convert_inline_data(inode, pos + len);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 67f38faac589..1e9375aa9340 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -659,6 +659,36 @@ DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio,
 	TP_CONDITION(bio)
 );
 
+TRACE_EVENT(f2fs_write_begin,
+
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+				unsigned int flags),
+
+	TP_ARGS(inode, pos, len, flags),
+
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(ino_t,	ino)
+		__field(loff_t,	pos)
+		__field(unsigned int, len)
+		__field(unsigned int, flags)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->pos	= pos;
+		__entry->len	= len;
+		__entry->flags	= flags;
+	),
+
+	TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, flags = %u",
+		show_dev_ino(__entry),
+		(unsigned long long)__entry->pos,
+		__entry->len,
+		__entry->flags)
+);
+
 DECLARE_EVENT_CLASS(f2fs__page,
 
 	TP_PROTO(struct page *page, int type),
-- 
cgit v1.2.3-71-gd317


From dfb2bf38bf89e15a65f9996566b2945921388a2f Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Tue, 6 May 2014 16:47:23 +0800
Subject: f2fs: add a tracepoint for f2fs_write_end

This patch adds a tracepoint for f2fs_write_end to trace write op of user.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/data.c              |  2 ++
 include/trace/events/f2fs.h | 30 ++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

(limited to 'include')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index cde0d69969b9..ea58fb698ac6 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -989,6 +989,8 @@ static int f2fs_write_end(struct file *file,
 {
 	struct inode *inode = page->mapping->host;
 
+	trace_f2fs_write_end(inode, pos, len, copied);
+
 	SetPageUptodate(page);
 	set_page_dirty(page);
 
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 1e9375aa9340..483804b55742 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -689,6 +689,36 @@ TRACE_EVENT(f2fs_write_begin,
 		__entry->flags)
 );
 
+TRACE_EVENT(f2fs_write_end,
+
+	TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,
+				unsigned int copied),
+
+	TP_ARGS(inode, pos, len, copied),
+
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(ino_t,	ino)
+		__field(loff_t,	pos)
+		__field(unsigned int, len)
+		__field(unsigned int, copied)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->pos	= pos;
+		__entry->len	= len;
+		__entry->copied	= copied;
+	),
+
+	TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, copied = %u",
+		show_dev_ino(__entry),
+		(unsigned long long)__entry->pos,
+		__entry->len,
+		__entry->copied)
+);
+
 DECLARE_EVENT_CLASS(f2fs__page,
 
 	TP_PROTO(struct page *page, int type),
-- 
cgit v1.2.3-71-gd317


From ecda0de3430455378f1c02523bf3ad71d91d613a Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Tue, 6 May 2014 16:48:26 +0800
Subject: f2fs: add a tracepoint for f2fs_write_{meta,node,data}_page

This patch adds a tracepoint for f2fs_write_{meta,node,data}_page to trace when
page is writting out.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/checkpoint.c        | 2 ++
 fs/f2fs/data.c              | 2 ++
 fs/f2fs/node.c              | 2 ++
 include/trace/events/f2fs.h | 7 +++++++
 4 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 4206f1664be5..c95d62281d7e 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -157,6 +157,8 @@ static int f2fs_write_meta_page(struct page *page,
 	struct inode *inode = page->mapping->host;
 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 
+	trace_f2fs_writepage(page, META);
+
 	if (unlikely(sbi->por_doing))
 		goto redirty_out;
 	if (wbc->for_reclaim)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index ea58fb698ac6..b997d552880e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -788,6 +788,8 @@ static int f2fs_write_data_page(struct page *page,
 		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
 	};
 
+	trace_f2fs_writepage(page, DATA);
+
 	if (page->index < end_index)
 		goto write;
 
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 059aaf5dda2b..49bdddbcadea 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1199,6 +1199,8 @@ static int f2fs_write_node_page(struct page *page,
 		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
 	};
 
+	trace_f2fs_writepage(page, NODE);
+
 	if (unlikely(sbi->por_doing))
 		goto redirty_out;
 
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 483804b55742..d70991e69e58 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -751,6 +751,13 @@ DECLARE_EVENT_CLASS(f2fs__page,
 		__entry->dirty)
 );
 
+DEFINE_EVENT(f2fs__page, f2fs_writepage,
+
+	TP_PROTO(struct page *page, int type),
+
+	TP_ARGS(page, type)
+);
+
 DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty,
 
 	TP_PROTO(struct page *page, int type),
-- 
cgit v1.2.3-71-gd317


From e57484343898094bb8f72a2aa1a50929d27aa027 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Tue, 6 May 2014 16:51:24 +0800
Subject: f2fs: add a tracepoint for f2fs_write_{meta,node,data}_pages

This patch adds a tracepoint for f2fs_write_{meta,node,data}_pages to trace when
pages are fsyncing/flushing.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/checkpoint.c        |  2 ++
 fs/f2fs/data.c              |  2 ++
 fs/f2fs/node.c              |  2 ++
 include/trace/events/f2fs.h | 64 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 70 insertions(+)

(limited to 'include')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index c95d62281d7e..fe968c7bfc90 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -186,6 +186,8 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
 	long diff, written;
 
+	trace_f2fs_writepages(mapping->host, wbc, META);
+
 	/* collect a number of dirty meta pages and write together */
 	if (wbc->for_kupdate ||
 		get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b997d552880e..21bfafaafe83 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -860,6 +860,8 @@ static int f2fs_write_data_pages(struct address_space *mapping,
 	int ret;
 	long diff;
 
+	trace_f2fs_writepages(mapping->host, wbc, DATA);
+
 	/* deal with chardevs and other special file */
 	if (!mapping->a_ops->writepage)
 		return 0;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 49bdddbcadea..3d60d3d34ed2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1242,6 +1242,8 @@ static int f2fs_write_node_pages(struct address_space *mapping,
 	struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
 	long diff;
 
+	trace_f2fs_writepages(mapping->host, wbc, NODE);
+
 	/* balancing f2fs's metadata in background */
 	f2fs_balance_fs_bg(sbi);
 
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index d70991e69e58..91b1fcc5ec93 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -772,6 +772,70 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite,
 	TP_ARGS(page, type)
 );
 
+TRACE_EVENT(f2fs_writepages,
+
+	TP_PROTO(struct inode *inode, struct writeback_control *wbc, int type),
+
+	TP_ARGS(inode, wbc, type),
+
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(ino_t,	ino)
+		__field(int,	type)
+		__field(int,	dir)
+		__field(long,	nr_to_write)
+		__field(long,	pages_skipped)
+		__field(loff_t,	range_start)
+		__field(loff_t,	range_end)
+		__field(pgoff_t, writeback_index)
+		__field(int,	sync_mode)
+		__field(char,	for_kupdate)
+		__field(char,	for_background)
+		__field(char,	tagged_writepages)
+		__field(char,	for_reclaim)
+		__field(char,	range_cyclic)
+		__field(char,	for_sync)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->type		= type;
+		__entry->dir		= S_ISDIR(inode->i_mode);
+		__entry->nr_to_write	= wbc->nr_to_write;
+		__entry->pages_skipped	= wbc->pages_skipped;
+		__entry->range_start	= wbc->range_start;
+		__entry->range_end	= wbc->range_end;
+		__entry->writeback_index = inode->i_mapping->writeback_index;
+		__entry->sync_mode	= wbc->sync_mode;
+		__entry->for_kupdate	= wbc->for_kupdate;
+		__entry->for_background	= wbc->for_background;
+		__entry->tagged_writepages	= wbc->tagged_writepages;
+		__entry->for_reclaim	= wbc->for_reclaim;
+		__entry->range_cyclic	= wbc->range_cyclic;
+		__entry->for_sync	= wbc->for_sync;
+	),
+
+	TP_printk("dev = (%d,%d), ino = %lu, %s, %s, nr_to_write %ld, "
+		"skipped %ld, start %lld, end %lld, wb_idx %lu, sync_mode %d, "
+		"kupdate %u background %u tagged %u reclaim %u cyclic %u sync %u",
+		show_dev_ino(__entry),
+		show_block_type(__entry->type),
+		show_file_type(__entry->dir),
+		__entry->nr_to_write,
+		__entry->pages_skipped,
+		__entry->range_start,
+		__entry->range_end,
+		(unsigned long)__entry->writeback_index,
+		__entry->sync_mode,
+		__entry->for_kupdate,
+		__entry->for_background,
+		__entry->tagged_writepages,
+		__entry->for_reclaim,
+		__entry->range_cyclic,
+		__entry->for_sync)
+);
+
 TRACE_EVENT(f2fs_submit_page_mbio,
 
 	TP_PROTO(struct page *page, int rw, int type, block_t blk_addr),
-- 
cgit v1.2.3-71-gd317


From c20e89cde669799eff62bf8c00ca9a4819c4e11f Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Tue, 6 May 2014 16:53:08 +0800
Subject: f2fs: add a tracepoint for f2fs_read_data_page

This patch adds a tracepoint for f2fs_read_data_page to trace when page is
readed by user.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/data.c              |  2 ++
 include/trace/events/f2fs.h | 15 +++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 21bfafaafe83..8c250a5d6f26 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -713,6 +713,8 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
 	struct inode *inode = page->mapping->host;
 	int ret;
 
+	trace_f2fs_readpage(page, DATA);
+
 	/* If the file has inline data, try to read it directlly */
 	if (f2fs_has_inline_data(inode))
 		ret = f2fs_read_inline_data(inode, page);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 91b1fcc5ec93..b983990b4a9f 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -732,6 +732,7 @@ DECLARE_EVENT_CLASS(f2fs__page,
 		__field(int, dir)
 		__field(pgoff_t, index)
 		__field(int, dirty)
+		__field(int, uptodate)
 	),
 
 	TP_fast_assign(
@@ -741,14 +742,17 @@ DECLARE_EVENT_CLASS(f2fs__page,
 		__entry->dir	= S_ISDIR(page->mapping->host->i_mode);
 		__entry->index	= page->index;
 		__entry->dirty	= PageDirty(page);
+		__entry->uptodate = PageUptodate(page);
 	),
 
-	TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, dirty = %d",
+	TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, "
+		"dirty = %d, uptodate = %d",
 		show_dev_ino(__entry),
 		show_block_type(__entry->type),
 		show_file_type(__entry->dir),
 		(unsigned long)__entry->index,
-		__entry->dirty)
+		__entry->dirty,
+		__entry->uptodate)
 );
 
 DEFINE_EVENT(f2fs__page, f2fs_writepage,
@@ -758,6 +762,13 @@ DEFINE_EVENT(f2fs__page, f2fs_writepage,
 	TP_ARGS(page, type)
 );
 
+DEFINE_EVENT(f2fs__page, f2fs_readpage,
+
+	TP_PROTO(struct page *page, int type),
+
+	TP_ARGS(page, type)
+);
+
 DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty,
 
 	TP_PROTO(struct page *page, int type),
-- 
cgit v1.2.3-71-gd317


From 257462dbf3ed233de0dc2e489dcc58579535b33f Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Thu, 27 Feb 2014 14:53:34 +0900
Subject: pwm-backlight: switch to gpiod interface

Switch to the new gpiod interface, which allows to handle GPIO
properties such as active low transparently and removes a whole bunch of
code.

There are still a couple of users of this driver that rely on passing
the enable GPIO number through platform data, so a fallback mechanism
using a GPIO number is still available to avoid breaking them. It will
be removed once current users have switched to the GPIO lookup tables
provided by the gpiod interface.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/video/backlight/pwm_bl.c | 69 +++++++++++++++++-----------------------
 include/linux/pwm_backlight.h    |  5 +--
 2 files changed, 30 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index fa7f5c35b7fb..cc49347bbcc0 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -10,8 +10,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/gpio/consumer.h>
 #include <linux/gpio.h>
-#include <linux/of_gpio.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -32,8 +32,7 @@ struct pwm_bl_data {
 	unsigned int		*levels;
 	bool			enabled;
 	struct regulator	*power_supply;
-	int			enable_gpio;
-	unsigned long		enable_gpio_flags;
+	struct gpio_desc	*enable_gpio;
 	unsigned int		scale;
 	int			(*notify)(struct device *,
 					  int brightness);
@@ -54,12 +53,8 @@ static void pwm_backlight_power_on(struct pwm_bl_data *pb, int brightness)
 	if (err < 0)
 		dev_err(pb->dev, "failed to enable power supply\n");
 
-	if (gpio_is_valid(pb->enable_gpio)) {
-		if (pb->enable_gpio_flags & PWM_BACKLIGHT_GPIO_ACTIVE_LOW)
-			gpio_set_value(pb->enable_gpio, 0);
-		else
-			gpio_set_value(pb->enable_gpio, 1);
-	}
+	if (pb->enable_gpio)
+		gpiod_set_value(pb->enable_gpio, 1);
 
 	pwm_enable(pb->pwm);
 	pb->enabled = true;
@@ -73,12 +68,8 @@ static void pwm_backlight_power_off(struct pwm_bl_data *pb)
 	pwm_config(pb->pwm, 0, pb->period);
 	pwm_disable(pb->pwm);
 
-	if (gpio_is_valid(pb->enable_gpio)) {
-		if (pb->enable_gpio_flags & PWM_BACKLIGHT_GPIO_ACTIVE_LOW)
-			gpio_set_value(pb->enable_gpio, 1);
-		else
-			gpio_set_value(pb->enable_gpio, 0);
-	}
+	if (pb->enable_gpio)
+		gpiod_set_value(pb->enable_gpio, 0);
 
 	regulator_disable(pb->power_supply);
 	pb->enabled = false;
@@ -148,7 +139,6 @@ static int pwm_backlight_parse_dt(struct device *dev,
 				  struct platform_pwm_backlight_data *data)
 {
 	struct device_node *node = dev->of_node;
-	enum of_gpio_flags flags;
 	struct property *prop;
 	int length;
 	u32 value;
@@ -189,14 +179,6 @@ static int pwm_backlight_parse_dt(struct device *dev,
 		data->max_brightness--;
 	}
 
-	data->enable_gpio = of_get_named_gpio_flags(node, "enable-gpios", 0,
-						    &flags);
-	if (data->enable_gpio == -EPROBE_DEFER)
-		return -EPROBE_DEFER;
-
-	if (gpio_is_valid(data->enable_gpio) && (flags & OF_GPIO_ACTIVE_LOW))
-		data->enable_gpio_flags |= PWM_BACKLIGHT_GPIO_ACTIVE_LOW;
-
 	return 0;
 }
 
@@ -256,8 +238,6 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 	} else
 		pb->scale = data->max_brightness;
 
-	pb->enable_gpio = data->enable_gpio;
-	pb->enable_gpio_flags = data->enable_gpio_flags;
 	pb->notify = data->notify;
 	pb->notify_after = data->notify_after;
 	pb->check_fb = data->check_fb;
@@ -265,26 +245,38 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 	pb->dev = &pdev->dev;
 	pb->enabled = false;
 
-	if (gpio_is_valid(pb->enable_gpio)) {
-		unsigned long flags;
-
-		if (pb->enable_gpio_flags & PWM_BACKLIGHT_GPIO_ACTIVE_LOW)
-			flags = GPIOF_OUT_INIT_HIGH;
+	pb->enable_gpio = devm_gpiod_get(&pdev->dev, "enable");
+	if (IS_ERR(pb->enable_gpio)) {
+		ret = PTR_ERR(pb->enable_gpio);
+		if (ret == -ENOENT)
+			pb->enable_gpio = NULL;
 		else
-			flags = GPIOF_OUT_INIT_LOW;
+			goto err_alloc;
+	}
 
-		ret = gpio_request_one(pb->enable_gpio, flags, "enable");
+	/*
+	 * Compatibility fallback for drivers still using the integer GPIO
+	 * platform data. Must go away soon.
+	 */
+	if (!pb->enable_gpio && gpio_is_valid(data->enable_gpio)) {
+		ret = devm_gpio_request_one(&pdev->dev, data->enable_gpio,
+					    GPIOF_OUT_INIT_HIGH, "enable");
 		if (ret < 0) {
 			dev_err(&pdev->dev, "failed to request GPIO#%d: %d\n",
-				pb->enable_gpio, ret);
+				data->enable_gpio, ret);
 			goto err_alloc;
 		}
+
+		pb->enable_gpio = gpio_to_desc(data->enable_gpio);
 	}
 
+	if (pb->enable_gpio)
+		gpiod_direction_output(pb->enable_gpio, 1);
+
 	pb->power_supply = devm_regulator_get(&pdev->dev, "power");
 	if (IS_ERR(pb->power_supply)) {
 		ret = PTR_ERR(pb->power_supply);
-		goto err_gpio;
+		goto err_alloc;
 	}
 
 	pb->pwm = devm_pwm_get(&pdev->dev, NULL);
@@ -295,7 +287,7 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 		if (IS_ERR(pb->pwm)) {
 			dev_err(&pdev->dev, "unable to request legacy PWM\n");
 			ret = PTR_ERR(pb->pwm);
-			goto err_gpio;
+			goto err_alloc;
 		}
 	}
 
@@ -320,7 +312,7 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		ret = PTR_ERR(bl);
-		goto err_gpio;
+		goto err_alloc;
 	}
 
 	if (data->dft_brightness > data->max_brightness) {
@@ -336,9 +328,6 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, bl);
 	return 0;
 
-err_gpio:
-	if (gpio_is_valid(pb->enable_gpio))
-		gpio_free(pb->enable_gpio);
 err_alloc:
 	if (data->exit)
 		data->exit(&pdev->dev);
diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h
index 2de2e275b2cb..efdd9227a49c 100644
--- a/include/linux/pwm_backlight.h
+++ b/include/linux/pwm_backlight.h
@@ -6,9 +6,6 @@
 
 #include <linux/backlight.h>
 
-/* TODO: convert to gpiod_*() API once it has been merged */
-#define PWM_BACKLIGHT_GPIO_ACTIVE_LOW	(1 << 0)
-
 struct platform_pwm_backlight_data {
 	int pwm_id;
 	unsigned int max_brightness;
@@ -16,8 +13,8 @@ struct platform_pwm_backlight_data {
 	unsigned int lth_brightness;
 	unsigned int pwm_period_ns;
 	unsigned int *levels;
+	/* TODO remove once all users are switched to gpiod_* API */
 	int enable_gpio;
-	unsigned long enable_gpio_flags;
 	int (*init)(struct device *dev);
 	int (*notify)(struct device *dev, int brightness);
 	void (*notify_after)(struct device *dev, int brightness);
-- 
cgit v1.2.3-71-gd317


From 2b53f41fa8604845f4f7c538723694a453088b15 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 7 May 2014 09:21:56 -0400
Subject: cgroup: remove unused CGRP_SANE_BEHAVIOR

This cgroup flag has never been used.  Only CGRP_ROOT_SANE_BEHAVIOR is
used.  Remove it.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2dfabb3b749a..f482f95c2c72 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -140,8 +140,6 @@ enum {
 	 * specified at mount time and thus is implemented here.
 	 */
 	CGRP_CPUSET_CLONE_CHILDREN,
-	/* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
-	CGRP_SANE_BEHAVIOR,
 };
 
 struct cgroup {
-- 
cgit v1.2.3-71-gd317


From 7c65bbc7dcface00b295bbd18bce82fe1db3d633 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 6 May 2014 09:26:30 -0400
Subject: tracing: Add trace_<tracepoint>_enabled() function

There are some code paths in the kernel that need to do some preparations
before it calls a tracepoint. As that code is worthless overhead when
the tracepoint is not enabled, it would be prudent to have that code
only run when the tracepoint is active. To accomplish this, all tracepoints
now get a static inline function called "trace_<tracepoint-name>_enabled()"
which returns true when the tracepoint is enabled and false otherwise.

As an added bonus, that function uses the static_key of the tracepoint
such that no branch is needed.

  if (trace_mytracepoint_enabled()) {
	arg = process_tp_arg();
	trace_mytracepoint(arg);
  }

Will keep the "process_tp_arg()" (which may be expensive to run) from
being executed when the tracepoint isn't enabled.

It's best to encapsulate the tracepoint itself in the if statement
just to keep races. For example, if you had:

  if (trace_mytracepoint_enabled())
	arg = process_tp_arg();
  trace_mytracepoint(arg);

There's a chance that the tracepoint could be enabled just after the
if statement, and arg will be undefined when calling the tracepoint.

Link: http://lkml.kernel.org/r/20140506094407.507b6435@gandalf.local.home

Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 Documentation/trace/tracepoints.txt | 24 ++++++++++++++++++++++++
 include/linux/tracepoint.h          | 10 ++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/Documentation/trace/tracepoints.txt b/Documentation/trace/tracepoints.txt
index 6b018b53177a..a3efac621c5a 100644
--- a/Documentation/trace/tracepoints.txt
+++ b/Documentation/trace/tracepoints.txt
@@ -115,6 +115,30 @@ If the tracepoint has to be used in kernel modules, an
 EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be
 used to export the defined tracepoints.
 
+If you need to do a bit of work for a tracepoint parameter, and
+that work is only used for the tracepoint, that work can be encapsulated
+within an if statement with the following:
+
+	if (trace_foo_bar_enabled()) {
+		int i;
+		int tot = 0;
+
+		for (i = 0; i < count; i++)
+			tot += calculate_nuggets();
+
+		trace_foo_bar(tot);
+	}
+
+All trace_<tracepoint>() calls have a matching trace_<tracepoint>_enabled()
+function defined that returns true if the tracepoint is enabled and
+false otherwise. The trace_<tracepoint>() should always be within the
+block of the if (trace_<tracepoint>_enabled()) to prevent races between
+the tracepoint being enabled and the check being seen.
+
+The advantage of using the trace_<tracepoint>_enabled() is that it uses
+the static_key of the tracepoint to allow the if statement to be implemented
+with jump labels and avoid conditional branches.
+
 Note: The convenience macro TRACE_EVENT provides an alternative way to
       define tracepoints. Check http://lwn.net/Articles/379903,
       http://lwn.net/Articles/381064 and http://lwn.net/Articles/383362
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 9d30ee469c2a..2e2a5f7717e5 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -185,6 +185,11 @@ extern void syscall_unregfunc(void);
 	static inline void						\
 	check_trace_callback_type_##name(void (*cb)(data_proto))	\
 	{								\
+	}								\
+	static inline bool						\
+	trace_##name##_enabled(void)					\
+	{								\
+		return static_key_false(&__tracepoint_##name.key);	\
 	}
 
 /*
@@ -230,6 +235,11 @@ extern void syscall_unregfunc(void);
 	}								\
 	static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
 	{								\
+	}								\
+	static inline bool						\
+	trace_##name##_enabled(void)					\
+	{								\
+		return false;						\
 	}
 
 #define DEFINE_TRACE_FN(name, reg, unreg)
-- 
cgit v1.2.3-71-gd317


From 69902c718c0b476e94ed7fccd3cf29ca39fe433a Mon Sep 17 00:00:00 2001
From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
Date: Thu, 1 May 2014 10:56:44 +0530
Subject: kprobes: Ensure blacklist data is aligned

ARC Linux (not supporting native unaligned access) was failing
to boot because __start_kprobe_blacklist was not aligned.

This was because per generated vmlinux.lds it was emitted right
next to .rodata with strings etc hence could be randomly
unaligned.

Fix that by ensuring a word alignment. While 4 would suffice for
32bit arches and problem at hand, it is probably better to put 8.

| Path: (null) CPU: 0 PID: 1 Comm: swapper Not tainted
| 3.15.0-rc3-next-20140430 #2
| task: 8f044000 ti: 8f01e000 task.ti: 8f01e000
|
| [ECR   ]: 0x00230400 => Misaligned r/w from 0x800fb0d3
| [EFA   ]: 0x800fb0d3
| [BLINK ]: do_one_initcall+0x86/0x1bc
| [ERET  ]: init_kprobes+0x52/0x120

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: <torvalds@linux-foundation.org>
Cc: <rusty@rustcorp.com.au>
Cc: <rdunlap@infradead.org>
Cc: <jeremy@goop.org>
Cc: <arnd@arndb.de>
Cc: <dl9pf@gmx.de>
Cc: <sparse@chrisli.org>
Cc: <anil.s.keshavamurthy@intel.com>
Cc: <davem@davemloft.net>
Cc: <ananth@in.ibm.com>
Cc: <masami.hiramatsu.pt@hitachi.com>
Cc: <chrisw@sous-sol.org>
Cc: <akataria@vmware.com>
Cc: anton Kolesov <Anton.Kolesov@synopsys.com>
Link: http://lkml.kernel.org/r/5361DB14.7010406@synopsys.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 40ceb3ceba79..8e0204a68c74 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -110,7 +110,8 @@
 #endif
 
 #ifdef CONFIG_KPROBES
-#define KPROBE_BLACKLIST()	VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \
+#define KPROBE_BLACKLIST()	. = ALIGN(8);				      \
+				VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \
 				*(_kprobe_blacklist)			      \
 				VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .;
 #else
-- 
cgit v1.2.3-71-gd317


From d28071d102f232d92e52af06d242d041074b54b6 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sun, 4 May 2014 20:55:39 -0400
Subject: tunnel: fix RFC number in comment for INET_ECN_decapsulate()

The quoted text and figure are from RFC 6040 ("Tunnelling of Explicit
Congestion Notification").

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_ecn.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 3bd22795c3e2..84b20835b736 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -150,7 +150,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
 }
 
 /*
- * RFC 6080 4.2
+ * RFC 6040 4.2
  *  To decapsulate the inner header at the tunnel egress, a compliant
  *  tunnel egress MUST set the outgoing ECN field to the codepoint at the
  *  intersection of the appropriate arriving inner header (row) and outer
-- 
cgit v1.2.3-71-gd317


From 698365fa1874aa7635d51667a34a2842228e9837 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 5 May 2014 15:55:55 -0700
Subject: net: clean up snmp stats code

commit 8f0ea0fe3a036a47767f9c80e (snmp: reduce percpu needs by 50%)
reduced snmp array size to 1, so technically it doesn't have to be
an array any more. What's more, after the following commit:

	commit 933393f58fef9963eac61db8093689544e29a600
	Date:   Thu Dec 22 11:58:51 2011 -0600

	    percpu: Remove irqsafe_cpu_xxx variants

	    We simply say that regular this_cpu use must be safe regardless of
	    preemption and interrupt state.  That has no material change for x86
	    and s390 implementations of this_cpu operations.  However, arches that
	    do not provide their own implementation for this_cpu operations will
	    now get code generated that disables interrupts instead of preemption.

probably no arch wants to have SNMP_ARRAY_SZ == 2. At least after
almost 3 years, no one complains.

So, just convert the array to a single pointer and remove snmp_mib_init()
and snmp_mib_free() as well.

Cc: Christoph Lameter <cl@linux.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h         | 18 ++--------
 include/net/snmp.h       | 32 ++++++++---------
 net/dccp/proto.c         |  9 ++---
 net/ipv4/af_inet.c       | 93 ++++++++++++++++--------------------------------
 net/ipv4/proc.c          | 24 ++++++-------
 net/ipv6/addrconf.c      | 17 ++++-----
 net/ipv6/addrconf_core.c |  2 +-
 net/ipv6/af_inet6.c      | 42 +++++++++-------------
 net/ipv6/proc.c          |  6 ++--
 net/sctp/protocol.c      |  9 ++---
 net/xfrm/xfrm_policy.c   | 10 +++---
 net/xfrm/xfrm_proc.c     |  3 +-
 12 files changed, 103 insertions(+), 162 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 1988cefdbb70..16146b667ddb 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -196,27 +196,15 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 #define NET_ADD_STATS_BH(net, field, adnd) SNMP_ADD_STATS_BH((net)->mib.net_statistics, field, adnd)
 #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd)
 
-unsigned long snmp_fold_field(void __percpu *mib[], int offt);
+unsigned long snmp_fold_field(void __percpu *mib, int offt);
 #if BITS_PER_LONG==32
-u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t sync_off);
+u64 snmp_fold_field64(void __percpu *mib, int offt, size_t sync_off);
 #else
-static inline u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_off)
+static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_off)
 {
 	return snmp_fold_field(mib, offt);
 }
 #endif
-int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align);
-
-static inline void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ])
-{
-	int i;
-
-	BUG_ON(ptr == NULL);
-	for (i = 0; i < SNMP_ARRAY_SZ; i++) {
-		free_percpu(ptr[i]);
-		ptr[i] = NULL;
-	}
-}
 
 void inet_get_local_port_range(struct net *net, int *low, int *high);
 
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 71596261fa99..f1f27fdbb0d5 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -116,51 +116,49 @@ struct linux_xfrm_mib {
 	unsigned long	mibs[LINUX_MIB_XFRMMAX];
 };
 
-#define SNMP_ARRAY_SZ 1
-
 #define DEFINE_SNMP_STAT(type, name)	\
-	__typeof__(type) __percpu *name[SNMP_ARRAY_SZ]
+	__typeof__(type) __percpu *name
 #define DEFINE_SNMP_STAT_ATOMIC(type, name)	\
 	__typeof__(type) *name
 #define DECLARE_SNMP_STAT(type, name)	\
-	extern __typeof__(type) __percpu *name[SNMP_ARRAY_SZ]
+	extern __typeof__(type) __percpu *name
 
 #define SNMP_INC_STATS_BH(mib, field)	\
-			__this_cpu_inc(mib[0]->mibs[field])
+			__this_cpu_inc(mib->mibs[field])
 
 #define SNMP_INC_STATS_USER(mib, field)	\
-			this_cpu_inc(mib[0]->mibs[field])
+			this_cpu_inc(mib->mibs[field])
 
 #define SNMP_INC_STATS_ATOMIC_LONG(mib, field)	\
 			atomic_long_inc(&mib->mibs[field])
 
 #define SNMP_INC_STATS(mib, field)	\
-			this_cpu_inc(mib[0]->mibs[field])
+			this_cpu_inc(mib->mibs[field])
 
 #define SNMP_DEC_STATS(mib, field)	\
-			this_cpu_dec(mib[0]->mibs[field])
+			this_cpu_dec(mib->mibs[field])
 
 #define SNMP_ADD_STATS_BH(mib, field, addend)	\
-			__this_cpu_add(mib[0]->mibs[field], addend)
+			__this_cpu_add(mib->mibs[field], addend)
 
 #define SNMP_ADD_STATS_USER(mib, field, addend)	\
-			this_cpu_add(mib[0]->mibs[field], addend)
+			this_cpu_add(mib->mibs[field], addend)
 
 #define SNMP_ADD_STATS(mib, field, addend)	\
-			this_cpu_add(mib[0]->mibs[field], addend)
+			this_cpu_add(mib->mibs[field], addend)
 /*
- * Use "__typeof__(*mib[0]) *ptr" instead of "__typeof__(mib[0]) ptr"
+ * Use "__typeof__(*mib) *ptr" instead of "__typeof__(mib) ptr"
  * to make @ptr a non-percpu pointer.
  */
 #define SNMP_UPD_PO_STATS(mib, basefield, addend)	\
 	do { \
-		__typeof__(*mib[0]->mibs) *ptr = mib[0]->mibs;	\
+		__typeof__(*mib->mibs) *ptr = mib->mibs;	\
 		this_cpu_inc(ptr[basefield##PKTS]);		\
 		this_cpu_add(ptr[basefield##OCTETS], addend);	\
 	} while (0)
 #define SNMP_UPD_PO_STATS_BH(mib, basefield, addend)	\
 	do { \
-		__typeof__(*mib[0]->mibs) *ptr = mib[0]->mibs;	\
+		__typeof__(*mib->mibs) *ptr = mib->mibs;	\
 		__this_cpu_inc(ptr[basefield##PKTS]);		\
 		__this_cpu_add(ptr[basefield##OCTETS], addend);	\
 	} while (0)
@@ -170,7 +168,7 @@ struct linux_xfrm_mib {
 
 #define SNMP_ADD_STATS64_BH(mib, field, addend) 			\
 	do {								\
-		__typeof__(*mib[0]) *ptr = __this_cpu_ptr((mib)[0]);	\
+		__typeof__(*mib) *ptr = __this_cpu_ptr(mib);		\
 		u64_stats_update_begin(&ptr->syncp);			\
 		ptr->mibs[field] += addend;				\
 		u64_stats_update_end(&ptr->syncp);			\
@@ -191,8 +189,8 @@ struct linux_xfrm_mib {
 #define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1)
 #define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend)			\
 	do {								\
-		__typeof__(*mib[0]) *ptr;				\
-		ptr = __this_cpu_ptr((mib)[0]);				\
+		__typeof__(*mib) *ptr;					\
+		ptr = __this_cpu_ptr(mib);				\
 		u64_stats_update_begin(&ptr->syncp);			\
 		ptr->mibs[basefield##PKTS]++;				\
 		ptr->mibs[basefield##OCTETS] += addend;			\
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index eb892b4f4814..de2c1e719305 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1084,14 +1084,15 @@ EXPORT_SYMBOL_GPL(dccp_shutdown);
 
 static inline int dccp_mib_init(void)
 {
-	return snmp_mib_init((void __percpu **)dccp_statistics,
-			     sizeof(struct dccp_mib),
-			     __alignof__(struct dccp_mib));
+	dccp_statistics = alloc_percpu(struct dccp_mib);
+	if (!dccp_statistics)
+		return -ENOMEM;
+	return 0;
 }
 
 static inline void dccp_mib_exit(void)
 {
-	snmp_mib_free((void __percpu **)dccp_statistics);
+	free_percpu(dccp_statistics);
 }
 
 static int thash_entries;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8c54870db792..6765d91b8e75 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1476,22 +1476,20 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
 }
 EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
 
-unsigned long snmp_fold_field(void __percpu *mib[], int offt)
+unsigned long snmp_fold_field(void __percpu *mib, int offt)
 {
 	unsigned long res = 0;
-	int i, j;
+	int i;
 
-	for_each_possible_cpu(i) {
-		for (j = 0; j < SNMP_ARRAY_SZ; j++)
-			res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt);
-	}
+	for_each_possible_cpu(i)
+		res += *(((unsigned long *) per_cpu_ptr(mib, i)) + offt);
 	return res;
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
 #if BITS_PER_LONG==32
 
-u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
+u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
 {
 	u64 res = 0;
 	int cpu;
@@ -1502,7 +1500,7 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
 		u64 v;
 		unsigned int start;
 
-		bhptr = per_cpu_ptr(mib[0], cpu);
+		bhptr = per_cpu_ptr(mib, cpu);
 		syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
 		do {
 			start = u64_stats_fetch_begin_irq(syncp);
@@ -1516,25 +1514,6 @@ u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
 EXPORT_SYMBOL_GPL(snmp_fold_field64);
 #endif
 
-int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
-{
-	BUG_ON(ptr == NULL);
-	ptr[0] = __alloc_percpu(mibsize, align);
-	if (!ptr[0])
-		return -ENOMEM;
-
-#if SNMP_ARRAY_SZ == 2
-	ptr[1] = __alloc_percpu(mibsize, align);
-	if (!ptr[1]) {
-		free_percpu(ptr[0]);
-		ptr[0] = NULL;
-		return -ENOMEM;
-	}
-#endif
-	return 0;
-}
-EXPORT_SYMBOL_GPL(snmp_mib_init);
-
 #ifdef CONFIG_IP_MULTICAST
 static const struct net_protocol igmp_protocol = {
 	.handler =	igmp_rcv,
@@ -1570,40 +1549,30 @@ static __net_init int ipv4_mib_init_net(struct net *net)
 {
 	int i;
 
-	if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
-			  sizeof(struct tcp_mib),
-			  __alignof__(struct tcp_mib)) < 0)
+	net->mib.tcp_statistics = alloc_percpu(struct tcp_mib);
+	if (!net->mib.tcp_statistics)
 		goto err_tcp_mib;
-	if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
-			  sizeof(struct ipstats_mib),
-			  __alignof__(struct ipstats_mib)) < 0)
+	net->mib.ip_statistics = alloc_percpu(struct ipstats_mib);
+	if (!net->mib.ip_statistics)
 		goto err_ip_mib;
 
 	for_each_possible_cpu(i) {
 		struct ipstats_mib *af_inet_stats;
-		af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i);
+		af_inet_stats = per_cpu_ptr(net->mib.ip_statistics, i);
 		u64_stats_init(&af_inet_stats->syncp);
-#if SNMP_ARRAY_SZ == 2
-		af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i);
-		u64_stats_init(&af_inet_stats->syncp);
-#endif
 	}
 
-	if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
-			  sizeof(struct linux_mib),
-			  __alignof__(struct linux_mib)) < 0)
+	net->mib.net_statistics = alloc_percpu(struct linux_mib);
+	if (!net->mib.net_statistics)
 		goto err_net_mib;
-	if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
-			  sizeof(struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
+	net->mib.udp_statistics = alloc_percpu(struct udp_mib);
+	if (!net->mib.udp_statistics)
 		goto err_udp_mib;
-	if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
-			  sizeof(struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
+	net->mib.udplite_statistics = alloc_percpu(struct udp_mib);
+	if (!net->mib.udplite_statistics)
 		goto err_udplite_mib;
-	if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
-			  sizeof(struct icmp_mib),
-			  __alignof__(struct icmp_mib)) < 0)
+	net->mib.icmp_statistics = alloc_percpu(struct icmp_mib);
+	if (!net->mib.icmp_statistics)
 		goto err_icmp_mib;
 	net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
 					      GFP_KERNEL);
@@ -1614,17 +1583,17 @@ static __net_init int ipv4_mib_init_net(struct net *net)
 	return 0;
 
 err_icmpmsg_mib:
-	snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
+	free_percpu(net->mib.icmp_statistics);
 err_icmp_mib:
-	snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
+	free_percpu(net->mib.udplite_statistics);
 err_udplite_mib:
-	snmp_mib_free((void __percpu **)net->mib.udp_statistics);
+	free_percpu(net->mib.udp_statistics);
 err_udp_mib:
-	snmp_mib_free((void __percpu **)net->mib.net_statistics);
+	free_percpu(net->mib.net_statistics);
 err_net_mib:
-	snmp_mib_free((void __percpu **)net->mib.ip_statistics);
+	free_percpu(net->mib.ip_statistics);
 err_ip_mib:
-	snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
+	free_percpu(net->mib.tcp_statistics);
 err_tcp_mib:
 	return -ENOMEM;
 }
@@ -1632,12 +1601,12 @@ err_tcp_mib:
 static __net_exit void ipv4_mib_exit_net(struct net *net)
 {
 	kfree(net->mib.icmpmsg_statistics);
-	snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
-	snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
-	snmp_mib_free((void __percpu **)net->mib.udp_statistics);
-	snmp_mib_free((void __percpu **)net->mib.net_statistics);
-	snmp_mib_free((void __percpu **)net->mib.ip_statistics);
-	snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
+	free_percpu(net->mib.icmp_statistics);
+	free_percpu(net->mib.udplite_statistics);
+	free_percpu(net->mib.udp_statistics);
+	free_percpu(net->mib.net_statistics);
+	free_percpu(net->mib.ip_statistics);
+	free_percpu(net->mib.tcp_statistics);
 }
 
 static __net_initdata struct pernet_operations ipv4_mib_ops = {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ad737fad6d8b..ae0af9386f7c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -345,15 +345,15 @@ static void icmp_put(struct seq_file *seq)
 	for (i = 0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " Out%s", icmpmibmap[i].name);
 	seq_printf(seq, "\nIcmp: %lu %lu %lu",
-		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
-		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS),
-		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
+		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS),
+		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS),
+		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
 	for (i = 0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
 			   atomic_long_read(ptr + icmpmibmap[i].index));
 	seq_printf(seq, " %lu %lu",
-		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
-		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
+		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
+		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
 	for (i = 0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
 			   atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
@@ -379,7 +379,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %llu",
-			   snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+			   snmp_fold_field64(net->mib.ip_statistics,
 					     snmp4_ipstats_list[i].entry,
 					     offsetof(struct ipstats_mib, syncp)));
 
@@ -395,11 +395,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 		/* MaxConn field is signed, RFC 2012 */
 		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
 			seq_printf(seq, " %ld",
-				   snmp_fold_field((void __percpu **)net->mib.tcp_statistics,
+				   snmp_fold_field(net->mib.tcp_statistics,
 						   snmp4_tcp_list[i].entry));
 		else
 			seq_printf(seq, " %lu",
-				   snmp_fold_field((void __percpu **)net->mib.tcp_statistics,
+				   snmp_fold_field(net->mib.tcp_statistics,
 						   snmp4_tcp_list[i].entry));
 	}
 
@@ -410,7 +410,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nUdp:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.udp_statistics,
+			   snmp_fold_field(net->mib.udp_statistics,
 					   snmp4_udp_list[i].entry));
 
 	/* the UDP and UDP-Lite MIBs are the same */
@@ -421,7 +421,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nUdpLite:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.udplite_statistics,
+			   snmp_fold_field(net->mib.udplite_statistics,
 					   snmp4_udp_list[i].entry));
 
 	seq_putc(seq, '\n');
@@ -458,7 +458,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nTcpExt:");
 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.net_statistics,
+			   snmp_fold_field(net->mib.net_statistics,
 					   snmp4_net_list[i].entry));
 
 	seq_puts(seq, "\nIpExt:");
@@ -468,7 +468,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nIpExt:");
 	for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %llu",
-			   snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+			   snmp_fold_field64(net->mib.ip_statistics,
 					     snmp4_ipextstats_list[i].entry,
 					     offsetof(struct ipstats_mib, syncp)));
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1ac13c0300b7..5667b3003af9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -275,19 +275,14 @@ static int snmp6_alloc_dev(struct inet6_dev *idev)
 {
 	int i;
 
-	if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
-			  sizeof(struct ipstats_mib),
-			  __alignof__(struct ipstats_mib)) < 0)
+	idev->stats.ipv6 = alloc_percpu(struct ipstats_mib);
+	if (!idev->stats.ipv6)
 		goto err_ip;
 
 	for_each_possible_cpu(i) {
 		struct ipstats_mib *addrconf_stats;
-		addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i);
+		addrconf_stats = per_cpu_ptr(idev->stats.ipv6, i);
 		u64_stats_init(&addrconf_stats->syncp);
-#if SNMP_ARRAY_SZ == 2
-		addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i);
-		u64_stats_init(&addrconf_stats->syncp);
-#endif
 	}
 
 
@@ -305,7 +300,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev)
 err_icmpmsg:
 	kfree(idev->stats.icmpv6dev);
 err_icmp:
-	snmp_mib_free((void __percpu **)idev->stats.ipv6);
+	free_percpu(idev->stats.ipv6);
 err_ip:
 	return -ENOMEM;
 }
@@ -4363,7 +4358,7 @@ static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
 	memset(&stats[items], 0, pad);
 }
 
-static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib,
+static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
 				      int items, int bytes, size_t syncpoff)
 {
 	int i;
@@ -4383,7 +4378,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
 {
 	switch (attrtype) {
 	case IFLA_INET6_STATS:
-		__snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6,
+		__snmp6_fill_stats64(stats, idev->stats.ipv6,
 				     IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
 		break;
 	case IFLA_INET6_ICMP6STATS:
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 4c11cbcf8308..e6960457f625 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -123,7 +123,7 @@ static void snmp6_free_dev(struct inet6_dev *idev)
 {
 	kfree(idev->stats.icmpv6msgdev);
 	kfree(idev->stats.icmpv6dev);
-	snmp_mib_free((void __percpu **)idev->stats.ipv6);
+	free_percpu(idev->stats.ipv6);
 }
 
 /* Nobody refers to this device, we may destroy it. */
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d935889f1008..dc47cc757b80 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -715,33 +715,25 @@ static int __net_init ipv6_init_mibs(struct net *net)
 {
 	int i;
 
-	if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
-			  sizeof(struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
+	net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib);
+	if (!net->mib.udp_stats_in6)
 		return -ENOMEM;
-	if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
-			  sizeof(struct udp_mib),
-			  __alignof__(struct udp_mib)) < 0)
+	net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib);
+	if (!net->mib.udplite_stats_in6)
 		goto err_udplite_mib;
-	if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
-			  sizeof(struct ipstats_mib),
-			  __alignof__(struct ipstats_mib)) < 0)
+	net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib);
+	if (!net->mib.ipv6_statistics)
 		goto err_ip_mib;
 
 	for_each_possible_cpu(i) {
 		struct ipstats_mib *af_inet6_stats;
-		af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i);
+		af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i);
 		u64_stats_init(&af_inet6_stats->syncp);
-#if SNMP_ARRAY_SZ == 2
-		af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i);
-		u64_stats_init(&af_inet6_stats->syncp);
-#endif
 	}
 
 
-	if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
-			  sizeof(struct icmpv6_mib),
-			  __alignof__(struct icmpv6_mib)) < 0)
+	net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib);
+	if (!net->mib.icmpv6_statistics)
 		goto err_icmp_mib;
 	net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib),
 						GFP_KERNEL);
@@ -750,22 +742,22 @@ static int __net_init ipv6_init_mibs(struct net *net)
 	return 0;
 
 err_icmpmsg_mib:
-	snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+	free_percpu(net->mib.icmpv6_statistics);
 err_icmp_mib:
-	snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
+	free_percpu(net->mib.ipv6_statistics);
 err_ip_mib:
-	snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
+	free_percpu(net->mib.udplite_stats_in6);
 err_udplite_mib:
-	snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
+	free_percpu(net->mib.udp_stats_in6);
 	return -ENOMEM;
 }
 
 static void ipv6_cleanup_mibs(struct net *net)
 {
-	snmp_mib_free((void __percpu **)net->mib.udp_stats_in6);
-	snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6);
-	snmp_mib_free((void __percpu **)net->mib.ipv6_statistics);
-	snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics);
+	free_percpu(net->mib.udp_stats_in6);
+	free_percpu(net->mib.udplite_stats_in6);
+	free_percpu(net->mib.ipv6_statistics);
+	free_percpu(net->mib.icmpv6_statistics);
 	kfree(net->mib.icmpv6msg_statistics);
 }
 
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 091d066a57b3..69cb47625df7 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -201,7 +201,7 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib,
 	}
 }
 
-static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
+static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
 				  const struct snmp_mib *itemlist, size_t syncpoff)
 {
 	int i;
@@ -215,7 +215,7 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = (struct net *)seq->private;
 
-	snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
+	snmp6_seq_show_item64(seq, net->mib.ipv6_statistics,
 			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
 	snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
 			    NULL, snmp6_icmp6_list);
@@ -245,7 +245,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
 	struct inet6_dev *idev = (struct inet6_dev *)seq->private;
 
 	seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex);
-	snmp6_seq_show_item64(seq, (void __percpu **)idev->stats.ipv6,
+	snmp6_seq_show_item64(seq, idev->stats.ipv6,
 			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
 	snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,
 			    snmp6_icmp6_list);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index c09757fbf803..074b60e2faab 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1100,14 +1100,15 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family)
 
 static inline int init_sctp_mibs(struct net *net)
 {
-	return snmp_mib_init((void __percpu **)net->sctp.sctp_statistics,
-			     sizeof(struct sctp_mib),
-			     __alignof__(struct sctp_mib));
+	net->sctp.sctp_statistics = alloc_percpu(struct sctp_mib);
+	if (!net->sctp.sctp_statistics)
+		return -ENOMEM;
+	return 0;
 }
 
 static inline void cleanup_sctp_mibs(struct net *net)
 {
-	snmp_mib_free((void __percpu **)net->sctp.sctp_statistics);
+	free_percpu(net->sctp.sctp_statistics);
 }
 
 static void sctp_v4_pf_init(void)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c08fbd11ceff..e63f242ae03e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2783,21 +2783,19 @@ static struct notifier_block xfrm_dev_notifier = {
 static int __net_init xfrm_statistics_init(struct net *net)
 {
 	int rv;
-
-	if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
-			  sizeof(struct linux_xfrm_mib),
-			  __alignof__(struct linux_xfrm_mib)) < 0)
+	net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
+	if (!net->mib.xfrm_statistics)
 		return -ENOMEM;
 	rv = xfrm_proc_init(net);
 	if (rv < 0)
-		snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
+		free_percpu(net->mib.xfrm_statistics);
 	return rv;
 }
 
 static void xfrm_statistics_fini(struct net *net)
 {
 	xfrm_proc_fini(net);
-	snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
+	free_percpu(net->mib.xfrm_statistics);
 }
 #else
 static int __net_init xfrm_statistics_init(struct net *net)
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index fc5abd0b456f..9c4fbd8935f4 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -54,8 +54,7 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
 	int i;
 	for (i = 0; xfrm_mib_list[i].name; i++)
 		seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name,
-			   snmp_fold_field((void __percpu **)
-					   net->mib.xfrm_statistics,
+			   snmp_fold_field(net->mib.xfrm_statistics,
 					   xfrm_mib_list[i].entry));
 	return 0;
 }
-- 
cgit v1.2.3-71-gd317


From 552a515707a5caf9fa9f3620d68fc28146c6b1b9 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Wed, 7 May 2014 09:28:31 +0200
Subject: ath9k: Allow platform override without EEPROM override

Add a new platform data flag "use_eeprom" that indicates that the eeprom
found on the card itself should be used instead of the one present in
the platform data.

This allows to override the MAC address of a PCI card while preserving
the eeprom data from the card itself.

The default behavior is preserved.

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/init.c | 2 +-
 include/linux/ath9k_platform.h        | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index a6e273a2c44b..bcc7cfb1866d 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -508,7 +508,7 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc,
 	sc->tx99_power = MAX_RATE_POWER + 1;
 	init_waitqueue_head(&sc->tx_wait);
 
-	if (!pdata) {
+	if (!pdata || pdata->use_eeprom) {
 		ah->ah_flags |= AH_USE_EEPROM;
 		sc->sc_ah->led_pin = -1;
 	} else {
diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index 8598f8eacb20..a495a959e8a7 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h
@@ -36,6 +36,8 @@ struct ath9k_platform_data {
 
 	int (*get_mac_revision)(void);
 	int (*external_reset)(void);
+
+	bool use_eeprom;
 };
 
 #endif /* _LINUX_ATH9K_PLATFORM_H */
-- 
cgit v1.2.3-71-gd317


From db6d8cc00773d8ef5a8b421b42a5ded235307b10 Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@canonical.com>
Date: Wed, 26 Mar 2014 02:27:02 -0700
Subject: dell-led: add mic mute led interface

This patch provides similar led functional of

  420f973 thinkpad-acpi: Add mute and mic-mute LED functionality

Signed-off-by: Alex Hung <alex.hung@canonical.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/dell-led.c  | 171 +++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/dell-led.h |  10 +++
 2 files changed, 174 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/dell-led.h

(limited to 'include')

diff --git a/drivers/leds/dell-led.c b/drivers/leds/dell-led.c
index e5c57389efd6..c36acaf566a6 100644
--- a/drivers/leds/dell-led.c
+++ b/drivers/leds/dell-led.c
@@ -15,12 +15,15 @@
 #include <linux/leds.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/dell-led.h>
 
 MODULE_AUTHOR("Louis Davis/Jim Dailey");
 MODULE_DESCRIPTION("Dell LED Control Driver");
 MODULE_LICENSE("GPL");
 
 #define DELL_LED_BIOS_GUID "F6E4FE6E-909D-47cb-8BAB-C9F6F2F8D396"
+#define DELL_APP_GUID "A80593CE-A997-11DA-B012-B622A1EF5492"
 MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID);
 
 /* Error Result Codes: */
@@ -39,6 +42,149 @@ MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID);
 #define CMD_LED_OFF	17
 #define CMD_LED_BLINK	18
 
+struct app_wmi_args {
+	u16 class;
+	u16 selector;
+	u32 arg1;
+	u32 arg2;
+	u32 arg3;
+	u32 arg4;
+	u32 res1;
+	u32 res2;
+	u32 res3;
+	u32 res4;
+	char dummy[92];
+};
+
+#define GLOBAL_MIC_MUTE_ENABLE	0x364
+#define GLOBAL_MIC_MUTE_DISABLE	0x365
+
+struct dell_bios_data_token {
+	u16 tokenid;
+	u16 location;
+	u16 value;
+};
+
+struct __attribute__ ((__packed__)) dell_bios_calling_interface {
+	struct	dmi_header header;
+	u16	cmd_io_addr;
+	u8	cmd_io_code;
+	u32	supported_cmds;
+	struct	dell_bios_data_token damap[];
+};
+
+static struct dell_bios_data_token dell_mic_tokens[2];
+
+static int dell_wmi_perform_query(struct app_wmi_args *args)
+{
+	struct app_wmi_args *bios_return;
+	union acpi_object *obj;
+	struct acpi_buffer input;
+	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+	acpi_status status;
+	u32 rc = -EINVAL;
+
+	input.length = 128;
+	input.pointer = args;
+
+	status = wmi_evaluate_method(DELL_APP_GUID, 0, 1, &input, &output);
+	if (!ACPI_SUCCESS(status))
+		goto err_out0;
+
+	obj = output.pointer;
+	if (!obj)
+		goto err_out0;
+
+	if (obj->type != ACPI_TYPE_BUFFER)
+		goto err_out1;
+
+	bios_return = (struct app_wmi_args *)obj->buffer.pointer;
+	rc = bios_return->res1;
+	if (rc)
+		goto err_out1;
+
+	memcpy(args, bios_return, sizeof(struct app_wmi_args));
+	rc = 0;
+
+ err_out1:
+	kfree(obj);
+ err_out0:
+	return rc;
+}
+
+static void __init find_micmute_tokens(const struct dmi_header *dm, void *dummy)
+{
+	struct dell_bios_calling_interface *calling_interface;
+	struct dell_bios_data_token *token;
+	int token_size = sizeof(struct dell_bios_data_token);
+	int i = 0;
+
+	if (dm->type == 0xda && dm->length > 17) {
+		calling_interface = container_of(dm,
+				struct dell_bios_calling_interface, header);
+
+		token = &calling_interface->damap[i];
+		while (token->tokenid != 0xffff) {
+			if (token->tokenid == GLOBAL_MIC_MUTE_DISABLE)
+				memcpy(&dell_mic_tokens[0], token, token_size);
+			else if (token->tokenid == GLOBAL_MIC_MUTE_ENABLE)
+				memcpy(&dell_mic_tokens[1], token, token_size);
+
+			i++;
+			token = &calling_interface->damap[i];
+		}
+	}
+}
+
+static int dell_micmute_led_set(int state)
+{
+	struct app_wmi_args args;
+	struct dell_bios_data_token *token;
+
+	if (!wmi_has_guid(DELL_APP_GUID))
+		return -ENODEV;
+
+	if (state == 0 || state == 1)
+		token = &dell_mic_tokens[state];
+	else
+		return -EINVAL;
+
+	memset(&args, 0, sizeof(struct app_wmi_args));
+
+	args.class = 1;
+	args.arg1 = token->location;
+	args.arg2 = token->value;
+
+	dell_wmi_perform_query(&args);
+
+	return state;
+}
+
+int dell_app_wmi_led_set(int whichled, int on)
+{
+	int state = 0;
+
+	switch (whichled) {
+	case DELL_LED_MICMUTE:
+		state = dell_micmute_led_set(on);
+		break;
+	default:
+		pr_warn("led type %x is not supported\n", whichled);
+		break;
+	}
+
+	return state;
+}
+EXPORT_SYMBOL_GPL(dell_app_wmi_led_set);
+
+static int __init dell_micmute_led_init(void)
+{
+	memset(dell_mic_tokens, 0, sizeof(struct dell_bios_data_token) * 2);
+	dmi_walk(find_micmute_tokens, NULL);
+
+	return 0;
+}
+
 struct bios_args {
 	unsigned char length;
 	unsigned char result_code;
@@ -181,21 +327,32 @@ static int __init dell_led_init(void)
 {
 	int error = 0;
 
-	if (!wmi_has_guid(DELL_LED_BIOS_GUID))
+	if (!wmi_has_guid(DELL_LED_BIOS_GUID) && !wmi_has_guid(DELL_APP_GUID))
 		return -ENODEV;
 
-	error = led_off();
-	if (error != 0)
-		return -ENODEV;
+	if (wmi_has_guid(DELL_APP_GUID))
+		error = dell_micmute_led_init();
 
-	return led_classdev_register(NULL, &dell_led);
+	if (wmi_has_guid(DELL_LED_BIOS_GUID)) {
+		error = led_off();
+		if (error != 0)
+			return -ENODEV;
+
+		error = led_classdev_register(NULL, &dell_led);
+	}
+
+	return error;
 }
 
 static void __exit dell_led_exit(void)
 {
-	led_classdev_unregister(&dell_led);
+	int error = 0;
 
-	led_off();
+	if (wmi_has_guid(DELL_LED_BIOS_GUID)) {
+		error = led_off();
+		if (error == 0)
+			led_classdev_unregister(&dell_led);
+	}
 }
 
 module_init(dell_led_init);
diff --git a/include/linux/dell-led.h b/include/linux/dell-led.h
new file mode 100644
index 000000000000..7009b8bec77b
--- /dev/null
+++ b/include/linux/dell-led.h
@@ -0,0 +1,10 @@
+#ifndef __DELL_LED_H__
+#define __DELL_LED_H__
+
+enum {
+	DELL_LED_MICMUTE,
+};
+
+int dell_app_wmi_led_set(int whichled, int on);
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 2ce112f1e788a695630e2c275f47d57a801673d3 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 5 Apr 2014 20:16:34 -0700
Subject: leds: pca9685: Remove leds-pca9685 driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver is replaced by pwm-pca9685 driver and there is no user uses this
driver in current tree. So remove it.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Acked-by: Maximilian Güntner <maximilian.guentner@gmail.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/Kconfig                       |  10 --
 drivers/leds/Makefile                      |   1 -
 drivers/leds/leds-pca9685.c                | 213 -----------------------------
 include/linux/platform_data/leds-pca9685.h |  35 -----
 4 files changed, 259 deletions(-)
 delete mode 100644 drivers/leds/leds-pca9685.c
 delete mode 100644 include/linux/platform_data/leds-pca9685.h

(limited to 'include')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 6de9dfbf61c1..6713dbb6bfda 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -300,16 +300,6 @@ config LEDS_PCA963X
 	  LED driver chip accessed via the I2C bus. Supported
 	  devices include PCA9633 and PCA9634
 
-config LEDS_PCA9685
-	tristate "LED support for PCA9685 I2C chip"
-	depends on LEDS_CLASS
-	depends on I2C
-	help
-	  This option enables support for LEDs connected to the PCA9685
-	  LED driver chip accessed via the I2C bus.
-	  The PCA9685 offers 12-bit PWM (4095 levels of brightness) on
-	  16 individual channels.
-
 config LEDS_WM831X_STATUS
 	tristate "LED support for status LEDs on WM831x PMICs"
 	depends on LEDS_CLASS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 3cd76dbd9be2..8979b0b2c85e 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -36,7 +36,6 @@ obj-$(CONFIG_LEDS_OT200)		+= leds-ot200.o
 obj-$(CONFIG_LEDS_FSG)			+= leds-fsg.o
 obj-$(CONFIG_LEDS_PCA955X)		+= leds-pca955x.o
 obj-$(CONFIG_LEDS_PCA963X)		+= leds-pca963x.o
-obj-$(CONFIG_LEDS_PCA9685)		+= leds-pca9685.o
 obj-$(CONFIG_LEDS_DA903X)		+= leds-da903x.o
 obj-$(CONFIG_LEDS_DA9052)		+= leds-da9052.o
 obj-$(CONFIG_LEDS_WM831X_STATUS)	+= leds-wm831x-status.o
diff --git a/drivers/leds/leds-pca9685.c b/drivers/leds/leds-pca9685.c
deleted file mode 100644
index 6e1ef3a9d6ef..000000000000
--- a/drivers/leds/leds-pca9685.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Copyright 2013 Maximilian Güntner <maximilian.guentner@gmail.com>
- *
- * This file is subject to the terms and conditions of version 2 of
- * the GNU General Public License.  See the file COPYING in the main
- * directory of this archive for more details.
- *
- * Based on leds-pca963x.c driver by
- * Peter Meerwald <p.meerwald@bct-electronic.com>
- *
- * Driver for the NXP PCA9685 12-Bit PWM LED driver chip.
- *
- */
-
-#include <linux/ctype.h>
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/i2c.h>
-#include <linux/leds.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/workqueue.h>
-
-#include <linux/platform_data/leds-pca9685.h>
-
-/* Register Addresses */
-#define PCA9685_MODE1 0x00
-#define PCA9685_MODE2 0x01
-#define PCA9685_LED0_ON_L 0x06
-#define PCA9685_ALL_LED_ON_L 0xFA
-
-/* MODE1 Register */
-#define PCA9685_ALLCALL 0x00
-#define PCA9685_SLEEP   0x04
-#define PCA9685_AI      0x05
-
-/* MODE2 Register */
-#define PCA9685_INVRT   0x04
-#define PCA9685_OUTDRV  0x02
-
-static const struct i2c_device_id pca9685_id[] = {
-	{ "pca9685", 0 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, pca9685_id);
-
-struct pca9685_led {
-	struct i2c_client *client;
-	struct work_struct work;
-	u16 brightness;
-	struct led_classdev led_cdev;
-	int led_num; /* 0-15 */
-	char name[32];
-};
-
-static void pca9685_write_msg(struct i2c_client *client, u8 *buf, u8 len)
-{
-	struct i2c_msg msg = {
-		.addr = client->addr,
-		.flags = 0x00,
-		.len = len,
-		.buf = buf
-	};
-	i2c_transfer(client->adapter, &msg, 1);
-}
-
-static void pca9685_all_off(struct i2c_client *client)
-{
-	u8 i2c_buffer[5] = {PCA9685_ALL_LED_ON_L, 0x00, 0x00, 0x00, 0x10};
-	pca9685_write_msg(client, i2c_buffer, 5);
-}
-
-static void pca9685_led_work(struct work_struct *work)
-{
-	struct pca9685_led *pca9685;
-	u8 i2c_buffer[5];
-
-	pca9685 = container_of(work, struct pca9685_led, work);
-	i2c_buffer[0] = PCA9685_LED0_ON_L + 4 * pca9685->led_num;
-	/*
-	 * 4095 is the maximum brightness, so we set the ON time to 0x1000
-	 * which disables the PWM generator for that LED
-	 */
-	if (pca9685->brightness == 4095)
-		*((__le16 *)(i2c_buffer+1)) = cpu_to_le16(0x1000);
-	else
-		*((__le16 *)(i2c_buffer+1)) = 0x0000;
-
-	if (pca9685->brightness == 0)
-		*((__le16 *)(i2c_buffer+3)) = cpu_to_le16(0x1000);
-	else if (pca9685->brightness == 4095)
-		*((__le16 *)(i2c_buffer+3)) = 0x0000;
-	else
-		*((__le16 *)(i2c_buffer+3)) = cpu_to_le16(pca9685->brightness);
-
-	pca9685_write_msg(pca9685->client, i2c_buffer, 5);
-}
-
-static void pca9685_led_set(struct led_classdev *led_cdev,
-		enum led_brightness value)
-{
-	struct pca9685_led *pca9685;
-	pca9685 = container_of(led_cdev, struct pca9685_led, led_cdev);
-	pca9685->brightness = value;
-
-	schedule_work(&pca9685->work);
-}
-
-static int pca9685_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
-{
-	struct pca9685_led *pca9685;
-	struct pca9685_platform_data *pdata;
-	int err;
-	u8 i;
-
-	pdata = dev_get_platdata(&client->dev);
-	if (pdata) {
-		if (pdata->leds.num_leds < 1 || pdata->leds.num_leds > 15) {
-			dev_err(&client->dev, "board info must claim 1-16 LEDs");
-			return -EINVAL;
-		}
-	}
-
-	pca9685 = devm_kzalloc(&client->dev, 16 * sizeof(*pca9685), GFP_KERNEL);
-	if (!pca9685)
-		return -ENOMEM;
-
-	i2c_set_clientdata(client, pca9685);
-	pca9685_all_off(client);
-
-	for (i = 0; i < 16; i++) {
-		pca9685[i].client = client;
-		pca9685[i].led_num = i;
-		pca9685[i].name[0] = '\0';
-		if (pdata && i < pdata->leds.num_leds) {
-			if (pdata->leds.leds[i].name)
-				strncpy(pca9685[i].name,
-					pdata->leds.leds[i].name,
-					sizeof(pca9685[i].name)-1);
-			if (pdata->leds.leds[i].default_trigger)
-				pca9685[i].led_cdev.default_trigger =
-					pdata->leds.leds[i].default_trigger;
-		}
-		if (strlen(pca9685[i].name) == 0) {
-			/*
-			 * Write adapter and address to the name as well.
-			 * Otherwise multiple chips attached to one host would
-			 * not work.
-			 */
-			snprintf(pca9685[i].name, sizeof(pca9685[i].name),
-					"pca9685:%d:x%.2x:%d",
-					client->adapter->nr, client->addr, i);
-		}
-		pca9685[i].led_cdev.name = pca9685[i].name;
-		pca9685[i].led_cdev.max_brightness = 0xfff;
-		pca9685[i].led_cdev.brightness_set = pca9685_led_set;
-
-		INIT_WORK(&pca9685[i].work, pca9685_led_work);
-		err = led_classdev_register(&client->dev, &pca9685[i].led_cdev);
-		if (err < 0)
-			goto exit;
-	}
-
-	if (pdata)
-		i2c_smbus_write_byte_data(client, PCA9685_MODE2,
-			pdata->outdrv << PCA9685_OUTDRV |
-			pdata->inverted << PCA9685_INVRT);
-	else
-		i2c_smbus_write_byte_data(client, PCA9685_MODE2,
-			PCA9685_TOTEM_POLE << PCA9685_OUTDRV);
-	/* Enable Auto-Increment, enable oscillator, ALLCALL/SUBADDR disabled */
-	i2c_smbus_write_byte_data(client, PCA9685_MODE1, BIT(PCA9685_AI));
-
-	return 0;
-
-exit:
-	while (i--) {
-		led_classdev_unregister(&pca9685[i].led_cdev);
-		cancel_work_sync(&pca9685[i].work);
-	}
-	return err;
-}
-
-static int pca9685_remove(struct i2c_client *client)
-{
-	struct pca9685_led *pca9685 = i2c_get_clientdata(client);
-	u8 i;
-
-	for (i = 0; i < 16; i++) {
-		led_classdev_unregister(&pca9685[i].led_cdev);
-		cancel_work_sync(&pca9685[i].work);
-	}
-	pca9685_all_off(client);
-	return 0;
-}
-
-static struct i2c_driver pca9685_driver = {
-	.driver = {
-		.name = "leds-pca9685",
-		.owner = THIS_MODULE,
-	},
-	.probe = pca9685_probe,
-	.remove = pca9685_remove,
-	.id_table = pca9685_id,
-};
-
-module_i2c_driver(pca9685_driver);
-
-MODULE_AUTHOR("Maximilian Güntner <maximilian.guentner@gmail.com>");
-MODULE_DESCRIPTION("PCA9685 LED Driver");
-MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/leds-pca9685.h b/include/linux/platform_data/leds-pca9685.h
deleted file mode 100644
index 778e9e4249cc..000000000000
--- a/include/linux/platform_data/leds-pca9685.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2013 Maximilian Güntner <maximilian.guentner@gmail.com>
- *
- * This file is subject to the terms and conditions of version 2 of
- * the GNU General Public License.  See the file COPYING in the main
- * directory of this archive for more details.
- *
- * Based on leds-pca963x.h by Peter Meerwald <p.meerwald@bct-electronic.com>
- *
- * LED driver for the NXP PCA9685 PWM chip
- *
- */
-
-#ifndef __LINUX_PCA9685_H
-#define __LINUX_PCA9685_H
-
-#include <linux/leds.h>
-
-enum pca9685_outdrv {
-	PCA9685_OPEN_DRAIN,
-	PCA9685_TOTEM_POLE,
-};
-
-enum pca9685_inverted {
-	PCA9685_NOT_INVERTED,
-	PCA9685_INVERTED,
-};
-
-struct pca9685_platform_data {
-	struct led_platform_data leds;
-	enum pca9685_outdrv outdrv;
-	enum pca9685_inverted inverted;
-};
-
-#endif /* __LINUX_PCA9685_H */
-- 
cgit v1.2.3-71-gd317


From c3d620362dfe9218c7637354c7bce344ea771a31 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Wed, 7 May 2014 19:07:05 +0300
Subject: cfg80211: fix docbook warning

When trying to generate documentation, at least xmldocs, we get the
following warning:

Warning(include/net/cfg80211.h:461): No description found for parameter 'nl80211_iftype'

Fix it by adding the iftype argument name to the
cfg80211_chandef_dfs_required() function declaration.

Reported-and-tested-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0631230b01eb..28f6f1a5b445 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -458,7 +458,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
  */
 int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
 				  const struct cfg80211_chan_def *chandef,
-				  enum nl80211_iftype);
+				  enum nl80211_iftype iftype);
 
 /**
  * ieee80211_chandef_rate_flags - returns rate flags for a channel
-- 
cgit v1.2.3-71-gd317


From 5ae76a94150c86a6e0ee84eb74e7f7e1909b8d39 Mon Sep 17 00:00:00 2001
From: Andrzej Kaczmarek <andrzej.kaczmarek@tieto.com>
Date: Thu, 8 May 2014 15:32:08 +0200
Subject: Bluetooth: Store RSSI for connection

This patch adds support to store RSSI for connection when reply for
HCI_Read_RSSI is received.

Signed-off-by: Andrzej Kaczmarek <andrzej.kaczmarek@tieto.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 10 ++++++++++
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_event.c        | 23 +++++++++++++++++++++++
 3 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 4261a67682c0..ad2ecc92380d 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1064,6 +1064,16 @@ struct hci_rp_read_page_scan_type {
 	#define PAGE_SCAN_TYPE_STANDARD		0x00
 	#define PAGE_SCAN_TYPE_INTERLACED	0x01
 
+#define HCI_OP_READ_RSSI		0x1405
+struct hci_cp_read_rssi {
+	__le16   handle;
+} __packed;
+struct hci_rp_read_rssi {
+	__u8     status;
+	__le16   handle;
+	__s8     rssi;
+} __packed;
+
 #define HCI_OP_READ_LOCAL_AMP_INFO	0x1409
 struct hci_rp_read_local_amp_info {
 	__u8     status;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index d73f41855ada..0318d5263837 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -374,6 +374,7 @@ struct hci_conn {
 	__u16		setting;
 	__u16		le_conn_min_interval;
 	__u16		le_conn_max_interval;
+	__s8		rssi;
 	unsigned long	flags;
 
 	__u8		remote_cap;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 07c37d0cecb2..2bb0053d4c45 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1245,6 +1245,25 @@ static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev,
 	amp_write_rem_assoc_continue(hdev, rp->phy_handle);
 }
 
+static void hci_cc_read_rssi(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_rp_read_rssi *rp = (void *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
+	if (conn)
+		conn->rssi = rp->rssi;
+
+	hci_dev_unlock(hdev);
+}
+
 static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
 {
 	BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -2637,6 +2656,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cc_write_remote_amp_assoc(hdev, skb);
 		break;
 
+	case HCI_OP_READ_RSSI:
+		hci_cc_read_rssi(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
 		break;
-- 
cgit v1.2.3-71-gd317


From 5409e46f1bcf960c651f3fff35f2f25e539655cf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 7 May 2014 13:49:44 +0200
Subject: nfsd: clean up fh_auth usage

Use fh_fsid when reffering to the fsid part of the filehandle.  The
variable length auth field envisioned in nfsfh wasn't ever implemented.
Also clean up some lose ends around this and document the file handle
format better.

Btw, why do we even export nfsfh.h to userspace?  The file handle very
much is kernel private, and nothing in nfs-utils include the header
either.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsfh.c                 | 20 +++++++++-----------
 include/uapi/linux/nfsd/nfsfh.h | 32 +++++++-------------------------
 2 files changed, 16 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 3c37b160dcad..a337106d6875 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -169,8 +169,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
 		data_left -= len;
 		if (data_left < 0)
 			return error;
-		exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth);
-		fid = (struct fid *)(fh->fh_auth + len);
+		exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid);
+		fid = (struct fid *)(fh->fh_fsid + len);
 	} else {
 		__u32 tfh[2];
 		dev_t xdev;
@@ -385,7 +385,7 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
 {
 	if (dentry != exp->ex_path.dentry) {
 		struct fid *fid = (struct fid *)
-			(fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1);
+			(fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1);
 		int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
 		int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK);
 
@@ -513,7 +513,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 	 */
 
 	struct inode * inode = dentry->d_inode;
-	__u32 *datap;
 	dev_t ex_dev = exp_sb(exp)->s_dev;
 
 	dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
@@ -557,17 +556,16 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
 		if (inode)
 			_fh_update_old(dentry, exp, &fhp->fh_handle);
 	} else {
-		int len;
+		fhp->fh_handle.fh_size =
+			key_len(fhp->fh_handle.fh_fsid_type) + 4;
 		fhp->fh_handle.fh_auth_type = 0;
-		datap = fhp->fh_handle.fh_auth+0;
-		mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev,
+
+		mk_fsid(fhp->fh_handle.fh_fsid_type,
+			fhp->fh_handle.fh_fsid,
+			ex_dev,
 			exp->ex_path.dentry->d_inode->i_ino,
 			exp->ex_fsid, exp->ex_uuid);
 
-		len = key_len(fhp->fh_handle.fh_fsid_type);
-		datap += len/4;
-		fhp->fh_handle.fh_size = 4 + len;
-
 		if (inode)
 			_fh_update(fhp, exp, dentry);
 		if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h
index 616e3b396476..20391235d088 100644
--- a/include/uapi/linux/nfsd/nfsfh.h
+++ b/include/uapi/linux/nfsd/nfsfh.h
@@ -1,13 +1,7 @@
 /*
- * include/linux/nfsd/nfsfh.h
- *
  * This file describes the layout of the file handles as passed
  * over the wire.
  *
- * Earlier versions of knfsd used to sign file handles using keyed MD5
- * or SHA. I've removed this code, because it doesn't give you more
- * security than blocking external access to port 2049 on your firewall.
- *
  * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
  */
 
@@ -37,7 +31,7 @@ struct nfs_fhbase_old {
 };
 
 /*
- * This is the new flexible, extensible style NFSv2/v3 file handle.
+ * This is the new flexible, extensible style NFSv2/v3/v4 file handle.
  * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
  *
  * The file handle starts with a sequence of four-byte words.
@@ -47,14 +41,7 @@ struct nfs_fhbase_old {
  *
  * All four-byte values are in host-byte-order.
  *
- * The auth_type field specifies how the filehandle can be authenticated
- * This might allow a file to be confirmed to be in a writable part of a
- * filetree without checking the path from it up to the root.
- * Current values:
- *     0  - No authentication.  fb_auth is 0 bytes long
- * Possible future values:
- *     1  - 4 bytes taken from MD5 hash of the remainer of the file handle
- *          prefixed by a secret and with the important export flags.
+ * The auth_type field is deprecated and must be set to 0.
  *
  * The fsid_type identifies how the filesystem (or export point) is
  *    encoded.
@@ -71,14 +58,9 @@ struct nfs_fhbase_old {
  *     7  - 8 byte inode number and 16 byte uuid
  *
  * The fileid_type identified how the file within the filesystem is encoded.
- * This is (will be) passed to, and set by, the underlying filesystem if it supports
- * filehandle operations.  The filesystem must not use the value '0' or '0xff' and may
- * only use the values 1 and 2 as defined below:
- *  Current values:
- *    0   - The root, or export point, of the filesystem.  fb_fileid is 0 bytes.
- *    1   - 32bit inode number, 32 bit generation number.
- *    2   - 32bit inode number, 32 bit generation number, 32 bit parent directory inode number.
- *
+ *   The values for this field are filesystem specific, exccept that
+ *   filesystems must not use the values '0' or '0xff'. 'See enum fid_type'
+ *   in include/linux/exportfs.h for currently registered values.
  */
 struct nfs_fhbase_new {
 	__u8		fb_version;	/* == 1, even => nfs_fhbase_old */
@@ -114,9 +96,9 @@ struct knfsd_fh {
 #define	fh_fsid_type		fh_base.fh_new.fb_fsid_type
 #define	fh_auth_type		fh_base.fh_new.fb_auth_type
 #define	fh_fileid_type		fh_base.fh_new.fb_fileid_type
-#define	fh_auth			fh_base.fh_new.fb_auth
 #define	fh_fsid			fh_base.fh_new.fb_auth
 
-
+/* Do not use, provided for userspace compatiblity. */
+#define	fh_auth			fh_base.fh_new.fb_auth
 
 #endif /* _UAPI_LINUX_NFSD_FH_H */
-- 
cgit v1.2.3-71-gd317


From f6837ba8c98afcf28ec25f6863a8597274aeefd6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 30 Apr 2014 14:19:04 +0200
Subject: mac80211: handle failed restart/resume better

When the driver fails during HW restart or resume, the whole
stack goes into a very confused state with interfaces being
up while the hardware is down etc.

Address this by shutting down everything; we'll run into a
lot of warnings in the process but that's better than having
the whole stack get messed up.

Reviewed-by: Arik Nemtsov <arik@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h     |  14 ++++++
 net/mac80211/driver-ops.h  | 108 +++++++++++++++++++++++++++++----------------
 net/mac80211/ieee80211_i.h |   1 +
 net/mac80211/scan.c        |  15 ++++---
 net/mac80211/util.c        |  46 +++++++++++++++++--
 net/wireless/core.c        |  20 ++++++---
 6 files changed, 153 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 28f6f1a5b445..5c7169b0ac57 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4771,6 +4771,20 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
 void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
 			 gfp_t gfp);
 
+/**
+ * cfg80211_shutdown_all_interfaces - shut down all interfaces for a wiphy
+ * @wiphy: the wiphy to shut down
+ *
+ * This function shuts down all interfaces belonging to this wiphy by
+ * calling dev_close() (and treating non-netdev interfaces as needed).
+ * It shouldn't really be used unless there are some fatal device errors
+ * that really can't be recovered in any other way.
+ *
+ * Callers must hold the RTNL and be able to deal with callbacks into
+ * the driver while the function is running.
+ */
+void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 5331582a2c81..df1d50291344 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -5,11 +5,11 @@
 #include "ieee80211_i.h"
 #include "trace.h"
 
-static inline void check_sdata_in_driver(struct ieee80211_sub_if_data *sdata)
+static inline bool check_sdata_in_driver(struct ieee80211_sub_if_data *sdata)
 {
-	WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER),
-	     "%s:  Failed check-sdata-in-driver check, flags: 0x%x\n",
-	     sdata->dev ? sdata->dev->name : sdata->name, sdata->flags);
+	return !WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER),
+		     "%s:  Failed check-sdata-in-driver check, flags: 0x%x\n",
+		     sdata->dev ? sdata->dev->name : sdata->name, sdata->flags);
 }
 
 static inline struct ieee80211_sub_if_data *
@@ -168,7 +168,8 @@ static inline int drv_change_interface(struct ieee80211_local *local,
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_change_interface(local, sdata, type, p2p);
 	ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
@@ -181,7 +182,8 @@ static inline void drv_remove_interface(struct ieee80211_local *local,
 {
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_remove_interface(local, sdata);
 	local->ops->remove_interface(&local->hw, &sdata->vif);
@@ -219,7 +221,8 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
 			 sdata->vif.type == NL80211_IFTYPE_MONITOR))
 		return;
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_bss_info_changed(local, sdata, info, changed);
 	if (local->ops->bss_info_changed)
@@ -278,7 +281,8 @@ static inline int drv_set_key(struct ieee80211_local *local,
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_set_key(local, cmd, sdata, sta, key);
 	ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key);
@@ -298,7 +302,8 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local,
 		ista = &sta->sta;
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_update_tkip_key(local, sdata, conf, ista, iv32);
 	if (local->ops->update_tkip_key)
@@ -315,7 +320,8 @@ static inline int drv_hw_scan(struct ieee80211_local *local,
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_hw_scan(local, sdata);
 	ret = local->ops->hw_scan(&local->hw, &sdata->vif, req);
@@ -328,7 +334,8 @@ static inline void drv_cancel_hw_scan(struct ieee80211_local *local,
 {
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_cancel_hw_scan(local, sdata);
 	local->ops->cancel_hw_scan(&local->hw, &sdata->vif);
@@ -345,7 +352,8 @@ drv_sched_scan_start(struct ieee80211_local *local,
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_sched_scan_start(local, sdata);
 	ret = local->ops->sched_scan_start(&local->hw, &sdata->vif,
@@ -361,7 +369,8 @@ static inline int drv_sched_scan_stop(struct ieee80211_local *local,
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_sched_scan_stop(local, sdata);
 	ret = local->ops->sched_scan_stop(&local->hw, &sdata->vif);
@@ -462,7 +471,8 @@ static inline void drv_sta_notify(struct ieee80211_local *local,
 				  struct ieee80211_sta *sta)
 {
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_sta_notify(local, sdata, cmd, sta);
 	if (local->ops->sta_notify)
@@ -479,7 +489,8 @@ static inline int drv_sta_add(struct ieee80211_local *local,
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_sta_add(local, sdata, sta);
 	if (local->ops->sta_add)
@@ -497,7 +508,8 @@ static inline void drv_sta_remove(struct ieee80211_local *local,
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_sta_remove(local, sdata, sta);
 	if (local->ops->sta_remove)
@@ -515,7 +527,8 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local,
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	if (local->ops->sta_add_debugfs)
 		local->ops->sta_add_debugfs(&local->hw, &sdata->vif,
@@ -545,7 +558,8 @@ static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local,
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_sta_pre_rcu_remove(local, sdata, &sta->sta);
 	if (local->ops->sta_pre_rcu_remove)
@@ -566,7 +580,8 @@ int drv_sta_state(struct ieee80211_local *local,
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_sta_state(local, sdata, &sta->sta, old_state, new_state);
 	if (local->ops->sta_state) {
@@ -590,7 +605,8 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local,
 				     struct ieee80211_sta *sta, u32 changed)
 {
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED &&
 		(sdata->vif.type != NL80211_IFTYPE_ADHOC &&
@@ -612,7 +628,8 @@ static inline int drv_conf_tx(struct ieee80211_local *local,
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_conf_tx(local, sdata, ac, params);
 	if (local->ops->conf_tx)
@@ -629,7 +646,8 @@ static inline u64 drv_get_tsf(struct ieee80211_local *local,
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return ret;
 
 	trace_drv_get_tsf(local, sdata);
 	if (local->ops->get_tsf)
@@ -644,7 +662,8 @@ static inline void drv_set_tsf(struct ieee80211_local *local,
 {
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_set_tsf(local, sdata, tsf);
 	if (local->ops->set_tsf)
@@ -657,7 +676,8 @@ static inline void drv_reset_tsf(struct ieee80211_local *local,
 {
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_reset_tsf(local, sdata);
 	if (local->ops->reset_tsf)
@@ -689,7 +709,8 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 	might_sleep();
 
 	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size);
 
@@ -733,8 +754,8 @@ static inline void drv_flush(struct ieee80211_local *local,
 
 	might_sleep();
 
-	if (sdata)
-		check_sdata_in_driver(sdata);
+	if (sdata && !check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_flush(local, queues, drop);
 	if (local->ops->flush)
@@ -854,7 +875,8 @@ static inline int drv_set_bitrate_mask(struct ieee80211_local *local,
 
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_set_bitrate_mask(local, sdata, mask);
 	if (local->ops->set_bitrate_mask)
@@ -869,7 +891,8 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local,
 				      struct ieee80211_sub_if_data *sdata,
 				      struct cfg80211_gtk_rekey_data *data)
 {
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_set_rekey_data(local, sdata, data);
 	if (local->ops->set_rekey_data)
@@ -937,7 +960,8 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local,
 {
 	might_sleep();
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 	WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
 
 	trace_drv_mgd_prepare_tx(local, sdata);
@@ -964,6 +988,9 @@ static inline int drv_add_chanctx(struct ieee80211_local *local,
 static inline void drv_remove_chanctx(struct ieee80211_local *local,
 				      struct ieee80211_chanctx *ctx)
 {
+	if (WARN_ON(!ctx->driver_present))
+		return;
+
 	trace_drv_remove_chanctx(local, ctx);
 	if (local->ops->remove_chanctx)
 		local->ops->remove_chanctx(&local->hw, &ctx->conf);
@@ -989,7 +1016,8 @@ static inline int drv_assign_vif_chanctx(struct ieee80211_local *local,
 {
 	int ret = 0;
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_assign_vif_chanctx(local, sdata, ctx);
 	if (local->ops->assign_vif_chanctx) {
@@ -1007,7 +1035,8 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local,
 					    struct ieee80211_sub_if_data *sdata,
 					    struct ieee80211_chanctx *ctx)
 {
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_unassign_vif_chanctx(local, sdata, ctx);
 	if (local->ops->unassign_vif_chanctx) {
@@ -1024,7 +1053,8 @@ static inline int drv_start_ap(struct ieee80211_local *local,
 {
 	int ret = 0;
 
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_start_ap(local, sdata, &sdata->vif.bss_conf);
 	if (local->ops->start_ap)
@@ -1036,7 +1066,8 @@ static inline int drv_start_ap(struct ieee80211_local *local,
 static inline void drv_stop_ap(struct ieee80211_local *local,
 			       struct ieee80211_sub_if_data *sdata)
 {
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_stop_ap(local, sdata);
 	if (local->ops->stop_ap)
@@ -1059,7 +1090,8 @@ drv_set_default_unicast_key(struct ieee80211_local *local,
 			    struct ieee80211_sub_if_data *sdata,
 			    int key_idx)
 {
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	WARN_ON_ONCE(key_idx < -1 || key_idx > 3);
 
@@ -1101,7 +1133,8 @@ static inline int drv_join_ibss(struct ieee80211_local *local,
 	int ret = 0;
 
 	might_sleep();
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
 
 	trace_drv_join_ibss(local, sdata, &sdata->vif.bss_conf);
 	if (local->ops->join_ibss)
@@ -1114,7 +1147,8 @@ static inline void drv_leave_ibss(struct ieee80211_local *local,
 				  struct ieee80211_sub_if_data *sdata)
 {
 	might_sleep();
-	check_sdata_in_driver(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
 
 	trace_drv_leave_ibss(local, sdata);
 	if (local->ops->leave_ibss)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f4ba0c4a4314..4668ce9a1d3f 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1459,6 +1459,7 @@ __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
 int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
 				       struct cfg80211_sched_scan_request *req);
 int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata);
+void ieee80211_sched_scan_end(struct ieee80211_local *local);
 void ieee80211_sched_scan_stopped_work(struct work_struct *work);
 
 /* off-channel helpers */
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 28185c8dc19a..f40661eb75b5 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -1076,12 +1076,8 @@ void ieee80211_sched_scan_results(struct ieee80211_hw *hw)
 }
 EXPORT_SYMBOL(ieee80211_sched_scan_results);
 
-void ieee80211_sched_scan_stopped_work(struct work_struct *work)
+void ieee80211_sched_scan_end(struct ieee80211_local *local)
 {
-	struct ieee80211_local *local =
-		container_of(work, struct ieee80211_local,
-			     sched_scan_stopped_work);
-
 	mutex_lock(&local->mtx);
 
 	if (!rcu_access_pointer(local->sched_scan_sdata)) {
@@ -1099,6 +1095,15 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work)
 	cfg80211_sched_scan_stopped(local->hw.wiphy);
 }
 
+void ieee80211_sched_scan_stopped_work(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local,
+			     sched_scan_stopped_work);
+
+	ieee80211_sched_scan_end(local);
+}
+
 void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ad058759e85e..7e0dd4be8097 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1457,6 +1457,44 @@ void ieee80211_stop_device(struct ieee80211_local *local)
 	drv_stop(local);
 }
 
+static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_chanctx *ctx;
+
+	/*
+	 * We get here if during resume the device can't be restarted properly.
+	 * We might also get here if this happens during HW reset, which is a
+	 * slightly different situation and we need to drop all connections in
+	 * the latter case.
+	 *
+	 * Ask cfg80211 to turn off all interfaces, this will result in more
+	 * warnings but at least we'll then get into a clean stopped state.
+	 */
+
+	local->resuming = false;
+	local->suspended = false;
+	local->started = false;
+
+	/* scheduled scan clearly can't be running any more, but tell
+	 * cfg80211 and clear local state
+	 */
+	ieee80211_sched_scan_end(local);
+
+	list_for_each_entry(sdata, &local->interfaces, list)
+		sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
+
+	/* Mark channel contexts as not being in the driver any more to avoid
+	 * removing them from the driver during the shutdown process...
+	 */
+	mutex_lock(&local->chanctx_mtx);
+	list_for_each_entry(ctx, &local->chanctx_list, list)
+		ctx->driver_present = false;
+	mutex_unlock(&local->chanctx_mtx);
+
+	cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+}
+
 static void ieee80211_assign_chanctx(struct ieee80211_local *local,
 				     struct ieee80211_sub_if_data *sdata)
 {
@@ -1520,9 +1558,11 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	 */
 	res = drv_start(local);
 	if (res) {
-		WARN(local->suspended, "Hardware became unavailable "
-		     "upon resume. This could be a software issue "
-		     "prior to suspend or a hardware issue.\n");
+		if (local->suspended)
+			WARN(1, "Hardware became unavailable upon resume. This could be a software issue prior to suspend or a hardware issue.\n");
+		else
+			WARN(1, "Hardware became unavailable during restart.\n");
+		ieee80211_handle_reconfig_failure(local);
 		return res;
 	}
 
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7e023b74f009..39788711ce6e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -210,15 +210,12 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
 	}
 }
 
-static int cfg80211_rfkill_set_block(void *data, bool blocked)
+void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy)
 {
-	struct cfg80211_registered_device *rdev = data;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct wireless_dev *wdev;
 
-	if (!blocked)
-		return 0;
-
-	rtnl_lock();
+	ASSERT_RTNL();
 
 	list_for_each_entry(wdev, &rdev->wdev_list, list) {
 		if (wdev->netdev) {
@@ -234,7 +231,18 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked)
 			break;
 		}
 	}
+}
+EXPORT_SYMBOL_GPL(cfg80211_shutdown_all_interfaces);
 
+static int cfg80211_rfkill_set_block(void *data, bool blocked)
+{
+	struct cfg80211_registered_device *rdev = data;
+
+	if (!blocked)
+		return 0;
+
+	rtnl_lock();
+	cfg80211_shutdown_all_interfaces(&rdev->wiphy);
 	rtnl_unlock();
 
 	return 0;
-- 
cgit v1.2.3-71-gd317


From 2070d50e1cbe3d7f157cbf8e63279c893f375d7f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 9 May 2014 15:11:53 -0400
Subject: percpu-refcount: rename percpu_ref_tryget() to
 percpu_ref_tryget_live()

percpu_ref_tryget() is different from the usual tryget semantics in
that it fails if the refcnt is in its dying stage even if the refcnt
hasn't reached zero yet.  We're about to introduce the more
conventional tryget and the current one has only one user.  Let's
rename it to percpu_ref_tryget_live() so that it explicitly signifies
the peculiarities of its semantics.

This is pure rename.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/cgroup.h          | 2 +-
 include/linux/percpu-refcount.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c2515851c1aa..549aed8de32b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -101,7 +101,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
 {
 	if (css->flags & CSS_ROOT)
 		return true;
-	return percpu_ref_tryget(&css->refcnt);
+	return percpu_ref_tryget_live(&css->refcnt);
 }
 
 /**
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 95961f0bf62d..e22d15597cc3 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -118,7 +118,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
 }
 
 /**
- * percpu_ref_tryget - try to increment a percpu refcount
+ * percpu_ref_tryget_live - try to increment a live percpu refcount
  * @ref: percpu_ref to try-get
  *
  * Increment a percpu refcount unless it has already been killed.  Returns
@@ -129,7 +129,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
  * used.  After the confirm_kill callback is invoked, it's guaranteed that
  * no new reference will be given out by percpu_ref_tryget().
  */
-static inline bool percpu_ref_tryget(struct percpu_ref *ref)
+static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
 	unsigned __percpu *pcpu_count;
 	int ret = false;
-- 
cgit v1.2.3-71-gd317


From 4fb6e25049cb6fa0accc7f1b7c192b952fad7ac8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 9 May 2014 15:11:53 -0400
Subject: percpu-refcount: implement percpu_ref_tryget()

Implement percpu_ref_tryget() which fails if the refcnt already
reached zero.  Note that this is different from the recently renamed
percpu_ref_tryget_live() which fails if the refcnt has been killed and
is draining the remaining references.  percpu_ref_tryget() succeeds on
a killed refcnt as long as its current refcnt is above zero.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index e22d15597cc3..dba35c411e8c 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -117,6 +117,36 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
 	rcu_read_unlock_sched();
 }
 
+/**
+ * percpu_ref_tryget - try to increment a percpu refcount
+ * @ref: percpu_ref to try-get
+ *
+ * Increment a percpu refcount unless its count already reached zero.
+ * Returns %true on success; %false on failure.
+ *
+ * The caller is responsible for ensuring that @ref stays accessible.
+ */
+static inline bool percpu_ref_tryget(struct percpu_ref *ref)
+{
+	unsigned __percpu *pcpu_count;
+	int ret = false;
+
+	rcu_read_lock_sched();
+
+	pcpu_count = ACCESS_ONCE(ref->pcpu_count);
+
+	if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) {
+		__this_cpu_inc(*pcpu_count);
+		ret = true;
+	} else {
+		ret = atomic_inc_not_zero(&ref->count);
+	}
+
+	rcu_read_unlock_sched();
+
+	return ret;
+}
+
 /**
  * percpu_ref_tryget_live - try to increment a live percpu refcount
  * @ref: percpu_ref to try-get
@@ -128,6 +158,8 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
  * will fail.  For such guarantee, percpu_ref_kill_and_confirm() should be
  * used.  After the confirm_kill callback is invoked, it's guaranteed that
  * no new reference will be given out by percpu_ref_tryget().
+ *
+ * The caller is responsible for ensuring that @ref stays accessible.
  */
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
-- 
cgit v1.2.3-71-gd317


From 49c50b97b5522a987b80fbbf9d9869deee8d23b0 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 6 May 2014 16:02:19 -0700
Subject: mtd: nand: refactor erase_cmd() to return chip status

The nand_chip::erase_cmd callback previously served a dual purpose; for
one, it allowed a per-flash-chip override, so that AG-AND devices could
use a different erase command than other NAND. These AND devices were
dropped in commit 14c6578683367b1e7af0c3c09e872b45a45183a7 (mtd: nand:
remove AG-AND support). On the other hand, some drivers (denali and
doc-g4) need to use this sort of callback to implement
controller-specific erase operations.

To make the latter operation easier for some drivers (e.g., ST's new BCH
NAND driver), it helps if the command dispatch and wait functions can be
lumped together, rather than called separately.

This patch does two things:
 1. Pull the call to chip->waitfunc() into chip->erase_cmd(), and return
    the status from this callback
 2. Rename erase_cmd() to just erase(), since this callback does a
    little more than just send a command

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Tested-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mtd/nand/denali.c    |  7 +++----
 drivers/mtd/nand/docg4.c     |  6 ++++--
 drivers/mtd/nand/nand_base.c | 14 +++++++-------
 include/linux/mtd/nand.h     |  5 ++---
 4 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index c07cd573ad3a..9f2012a3e764 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1233,7 +1233,7 @@ static int denali_waitfunc(struct mtd_info *mtd, struct nand_chip *chip)
 	return status;
 }
 
-static void denali_erase(struct mtd_info *mtd, int page)
+static int denali_erase(struct mtd_info *mtd, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
@@ -1250,8 +1250,7 @@ static void denali_erase(struct mtd_info *mtd, int page)
 	irq_status = wait_for_irq(denali, INTR_STATUS__ERASE_COMP |
 					INTR_STATUS__ERASE_FAIL);
 
-	denali->status = (irq_status & INTR_STATUS__ERASE_FAIL) ?
-						NAND_STATUS_FAIL : PASS;
+	return (irq_status & INTR_STATUS__ERASE_FAIL) ? NAND_STATUS_FAIL : PASS;
 }
 
 static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col,
@@ -1584,7 +1583,7 @@ int denali_init(struct denali_nand_info *denali)
 	denali->nand.ecc.write_page_raw = denali_write_page_raw;
 	denali->nand.ecc.read_oob = denali_read_oob;
 	denali->nand.ecc.write_oob = denali_write_oob;
-	denali->nand.erase_cmd = denali_erase;
+	denali->nand.erase = denali_erase;
 
 	if (nand_scan_tail(&denali->mtd)) {
 		ret = -ENXIO;
diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index 1b0265e85a06..ce24637e14f1 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c
@@ -872,7 +872,7 @@ static int docg4_read_oob(struct mtd_info *mtd, struct nand_chip *nand,
 	return 0;
 }
 
-static void docg4_erase_block(struct mtd_info *mtd, int page)
+static int docg4_erase_block(struct mtd_info *mtd, int page)
 {
 	struct nand_chip *nand = mtd->priv;
 	struct docg4_priv *doc = nand->priv;
@@ -916,6 +916,8 @@ static void docg4_erase_block(struct mtd_info *mtd, int page)
 	write_nop(docptr);
 	poll_status(doc);
 	write_nop(docptr);
+
+	return nand->waitfunc(mtd, nand);
 }
 
 static int write_page(struct mtd_info *mtd, struct nand_chip *nand,
@@ -1236,7 +1238,7 @@ static void __init init_mtd_structs(struct mtd_info *mtd)
 	nand->block_markbad = docg4_block_markbad;
 	nand->read_buf = docg4_read_buf;
 	nand->write_buf = docg4_write_buf16;
-	nand->erase_cmd = docg4_erase_block;
+	nand->erase = docg4_erase_block;
 	nand->ecc.read_page = docg4_read_page;
 	nand->ecc.write_page = docg4_write_page;
 	nand->ecc.read_page_raw = docg4_read_page_raw;
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index f6c5685b79a6..7853b9b0a05e 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2617,18 +2617,20 @@ out:
 }
 
 /**
- * single_erase_cmd - [GENERIC] NAND standard block erase command function
+ * single_erase - [GENERIC] NAND standard block erase command function
  * @mtd: MTD device structure
  * @page: the page address of the block which will be erased
  *
- * Standard erase command for NAND chips.
+ * Standard erase command for NAND chips. Returns NAND status.
  */
-static void single_erase_cmd(struct mtd_info *mtd, int page)
+static int single_erase(struct mtd_info *mtd, int page)
 {
 	struct nand_chip *chip = mtd->priv;
 	/* Send commands to erase a block */
 	chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page);
 	chip->cmdfunc(mtd, NAND_CMD_ERASE2, -1, -1);
+
+	return chip->waitfunc(mtd, chip);
 }
 
 /**
@@ -2709,9 +2711,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 		    (page + pages_per_block))
 			chip->pagebuf = -1;
 
-		chip->erase_cmd(mtd, page & chip->pagemask);
-
-		status = chip->waitfunc(mtd, chip);
+		status = chip->erase(mtd, page & chip->pagemask);
 
 		/*
 		 * See if operation failed and additional status checks are
@@ -3684,7 +3684,7 @@ ident_done:
 	}
 
 	chip->badblockbits = 8;
-	chip->erase_cmd = single_erase_cmd;
+	chip->erase = single_erase;
 
 	/* Do not replace user supplied command function! */
 	if (mtd->writesize > 512 && chip->cmdfunc == nand_command)
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 450d61ec7f06..7a922e6c4e4b 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -552,8 +552,7 @@ struct nand_buffers {
  * @ecc:		[BOARDSPECIFIC] ECC control structure
  * @buffers:		buffer structure for read/write
  * @hwcontrol:		platform-specific hardware control structure
- * @erase_cmd:		[INTERN] erase command write function, selectable due
- *			to AND support.
+ * @erase:		[REPLACEABLE] erase function
  * @scan_bbt:		[REPLACEABLE] function to scan bad block table
  * @chip_delay:		[BOARDSPECIFIC] chip dependent delay for transferring
  *			data from array to read regs (tR).
@@ -637,7 +636,7 @@ struct nand_chip {
 	void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column,
 			int page_addr);
 	int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this);
-	void (*erase_cmd)(struct mtd_info *mtd, int page);
+	int (*erase)(struct mtd_info *mtd, int page);
 	int (*scan_bbt)(struct mtd_info *mtd);
 	int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state,
 			int status, int page);
-- 
cgit v1.2.3-71-gd317


From 5a134faeef82b46ff4ad244d11d8c6be41679834 Mon Sep 17 00:00:00 2001
From: Andrzej Kaczmarek <andrzej.kaczmarek@tieto.com>
Date: Fri, 9 May 2014 21:35:28 +0200
Subject: Bluetooth: Store TX power level for connection

This patch adds support to store local TX power level for connection
when reply for HCI_Read_Transmit_Power_Level is received.

Signed-off-by: Andrzej Kaczmarek <andrzej.kaczmarek@tieto.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 11 +++++++++++
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_conn.c         |  1 +
 net/bluetooth/hci_event.c        | 28 ++++++++++++++++++++++++++++
 4 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index ad2ecc92380d..16587dcd6a91 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1054,6 +1054,17 @@ struct hci_cp_write_page_scan_activity {
 	__le16   window;
 } __packed;
 
+#define HCI_OP_READ_TX_POWER		0x0c2d
+struct hci_cp_read_tx_power {
+	__le16   handle;
+	__u8     type;
+} __packed;
+struct hci_rp_read_tx_power {
+	__u8     status;
+	__le16   handle;
+	__s8     tx_power;
+} __packed;
+
 #define HCI_OP_READ_PAGE_SCAN_TYPE	0x0c46
 struct hci_rp_read_page_scan_type {
 	__u8     status;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 0318d5263837..211bad6a3366 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -375,6 +375,7 @@ struct hci_conn {
 	__u16		le_conn_min_interval;
 	__u16		le_conn_max_interval;
 	__s8		rssi;
+	__s8		tx_power;
 	unsigned long	flags;
 
 	__u8		remote_cap;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 55a174317925..74b368bfe102 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -407,6 +407,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 	conn->io_capability = hdev->io_capability;
 	conn->remote_auth = 0xff;
 	conn->key_type = 0xff;
+	conn->tx_power = HCI_TX_POWER_INVALID;
 
 	set_bit(HCI_CONN_POWER_SAVE, &conn->flags);
 	conn->disc_timeout = HCI_DISCONN_TIMEOUT;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 2bb0053d4c45..fa614e3430a7 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1264,6 +1264,30 @@ static void hci_cc_read_rssi(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_dev_unlock(hdev);
 }
 
+static void hci_cc_read_tx_power(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_cp_read_tx_power *sent;
+	struct hci_rp_read_tx_power *rp = (void *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER);
+	if (!sent)
+		return;
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
+	if (conn && sent->type == 0x00)
+		conn->tx_power = rp->tx_power;
+
+	hci_dev_unlock(hdev);
+}
+
 static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
 {
 	BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -2660,6 +2684,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cc_read_rssi(hdev, skb);
 		break;
 
+	case HCI_OP_READ_TX_POWER:
+		hci_cc_read_tx_power(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
 		break;
-- 
cgit v1.2.3-71-gd317


From f2727f7eb9132803d06309839a95de3dad82d237 Mon Sep 17 00:00:00 2001
From: Dimitri John Ledkov <dimitri.ledkov@canonical.com>
Date: Wed, 7 May 2014 20:55:30 +0100
Subject: NVMe: Update namespace and controller identify structures to the 1.1a
 spec

Controller: add CNTLID, AVSCC, APSTA, NVSCC, ACWU, SGLS fields.

Namespace: add NMIC, RESCAP, EUI64 fields. EUI64 is specifically
interesting, since it can be used to construct an UEFI NVMe device
path for a boot entry.

As per NVM Express 1.1a spec:
http://www.nvmexpress.org/wp-content/uploads/NVM-Express-1_1a.pdf

Signed-off-by: Dimitri John Ledkov <dimitri.ledkov@canonical.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/uapi/linux/nvme.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index f090336d5bad..e74da782d69c 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -45,7 +45,8 @@ struct nvme_id_ctrl {
 	__u8			ieee[3];
 	__u8			mic;
 	__u8			mdts;
-	__u8			rsvd78[178];
+	__u16			cntlid;
+	__u8			rsvd80[176];
 	__le16			oacs;
 	__u8			acl;
 	__u8			aerl;
@@ -53,7 +54,9 @@ struct nvme_id_ctrl {
 	__u8			lpa;
 	__u8			elpe;
 	__u8			npss;
-	__u8			rsvd264[248];
+	__u8			avscc;
+	__u8			apsta;
+	__u8			rsvd266[246];
 	__u8			sqes;
 	__u8			cqes;
 	__u8			rsvd514[2];
@@ -64,7 +67,12 @@ struct nvme_id_ctrl {
 	__u8			vwc;
 	__le16			awun;
 	__le16			awupf;
-	__u8			rsvd530[1518];
+	__u8			nvscc;
+	__u8			rsvd531;
+	__le16			acwu;
+	__u8			rsvd534[2];
+	__le32			sgls;
+	__u8			rsvd540[1508];
 	struct nvme_id_power_state	psd[32];
 	__u8			vs[1024];
 };
@@ -92,7 +100,10 @@ struct nvme_id_ns {
 	__u8			mc;
 	__u8			dpc;
 	__u8			dps;
-	__u8			rsvd30[98];
+	__u8			nmic;
+	__u8			rescap;
+	__u8			rsvd32[88];
+	__u8			eui64[8];
 	struct nvme_lbaf	lbaf[16];
 	__u8			rsvd192[192];
 	__u8			vs[3712];
-- 
cgit v1.2.3-71-gd317


From 9739eef13c926645fbf88bcb77e66442fa75d688 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 8 May 2014 14:10:51 -0700
Subject: net: filter: make BPF conversion more readable

Introduce BPF helper macros to define instructions
(similar to old BPF_STMT/BPF_JUMP macros)

Use them while converting classic BPF to internal
and in BPF testsuite later.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  51 ++++++++++++++++++
 net/core/filter.c      | 142 +++++++++++++++++--------------------------------
 2 files changed, 101 insertions(+), 92 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index ed1efab10b8f..4457b383961c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -79,6 +79,57 @@ enum {
 /* BPF program can access up to 512 bytes of stack space. */
 #define MAX_BPF_STACK	512
 
+/* bpf_add|sub|...: a += x, bpf_mov: a = x */
+#define BPF_ALU64_REG(op, a, x) \
+	((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_X, a, x, 0, 0})
+#define BPF_ALU32_REG(op, a, x) \
+	((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_X, a, x, 0, 0})
+
+/* bpf_add|sub|...: a += imm, bpf_mov: a = imm */
+#define BPF_ALU64_IMM(op, a, imm) \
+	((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_K, a, 0, 0, imm})
+#define BPF_ALU32_IMM(op, a, imm) \
+	((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_K, a, 0, 0, imm})
+
+/* R0 = *(uint *) (skb->data + off) */
+#define BPF_LD_ABS(size, off) \
+	((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_ABS, 0, 0, 0, off})
+
+/* R0 = *(uint *) (skb->data + x + off) */
+#define BPF_LD_IND(size, x, off) \
+	((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_IND, 0, x, 0, off})
+
+/* a = *(uint *) (x + off) */
+#define BPF_LDX_MEM(sz, a, x, off) \
+	((struct sock_filter_int) {BPF_LDX|BPF_SIZE(sz)|BPF_MEM, a, x, off, 0})
+
+/* if (a 'op' x) goto pc+off */
+#define BPF_JMP_REG(op, a, x, off) \
+	((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_X, a, x, off, 0})
+
+/* if (a 'op' imm) goto pc+off */
+#define BPF_JMP_IMM(op, a, imm, off) \
+	((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_K, a, 0, off, imm})
+
+#define BPF_EXIT_INSN() \
+	((struct sock_filter_int) {BPF_JMP|BPF_EXIT, 0, 0, 0, 0})
+
+static inline int size_to_bpf(int size)
+{
+	switch (size) {
+	case 1:
+		return BPF_B;
+	case 2:
+		return BPF_H;
+	case 4:
+		return BPF_W;
+	case 8:
+		return BPF_DW;
+	default:
+		return -EINVAL;
+	}
+}
+
 /* Macro to invoke filter function. */
 #define SK_RUN_FILTER(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index eb020a7d6f55..9aaa05ad8fe3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -668,10 +668,9 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_PROTOCOL:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = offsetof(struct sk_buff, protocol);
+		/* A = *(u16 *) (ctx + offsetof(protocol)) */
+		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				    offsetof(struct sk_buff, protocol));
 		insn++;
 
 		/* A = ntohs(A) [emitting a nop or swap16] */
@@ -681,37 +680,27 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
-		insn->code = BPF_LDX | BPF_MEM | BPF_B;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = pkt_type_offset();
+		*insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
+				    pkt_type_offset());
 		if (insn->off < 0)
 			return false;
 		insn++;
 
-		insn->code = BPF_ALU | BPF_AND | BPF_K;
-		insn->a_reg = BPF_REG_A;
-		insn->imm = PKT_TYPE_MAX;
+		*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
 		break;
 
 	case SKF_AD_OFF + SKF_AD_IFINDEX:
 	case SKF_AD_OFF + SKF_AD_HATYPE:
-		if (FIELD_SIZEOF(struct sk_buff, dev) == 8)
-			insn->code = BPF_LDX | BPF_MEM | BPF_DW;
-		else
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = BPF_REG_TMP;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = offsetof(struct sk_buff, dev);
+		*insn = BPF_LDX_MEM(size_to_bpf(FIELD_SIZEOF(struct sk_buff, dev)),
+				    BPF_REG_TMP, BPF_REG_CTX,
+				    offsetof(struct sk_buff, dev));
 		insn++;
 
-		insn->code = BPF_JMP | BPF_JNE | BPF_K;
-		insn->a_reg = BPF_REG_TMP;
-		insn->imm = 0;
-		insn->off = 1;
+		/* if (tmp != 0) goto pc+1 */
+		*insn = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
 		insn++;
 
-		insn->code = BPF_JMP | BPF_EXIT;
+		*insn = BPF_EXIT_INSN();
 		insn++;
 
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
@@ -732,55 +721,45 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_MARK:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = offsetof(struct sk_buff, mark);
+		*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
+				    offsetof(struct sk_buff, mark));
 		break;
 
 	case SKF_AD_OFF + SKF_AD_RXHASH:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = offsetof(struct sk_buff, hash);
+		*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
+				    offsetof(struct sk_buff, hash));
 		break;
 
 	case SKF_AD_OFF + SKF_AD_QUEUE:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = offsetof(struct sk_buff, queue_mapping);
+		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				    offsetof(struct sk_buff, queue_mapping));
 		break;
 
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = offsetof(struct sk_buff, vlan_tci);
+		/* A = *(u16 *) (ctx + offsetof(vlan_tci)) */
+		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				    offsetof(struct sk_buff, vlan_tci));
 		insn++;
 
 		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
 
 		if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
-			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = BPF_REG_A;
-			insn->imm = ~VLAN_TAG_PRESENT;
+			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
+					      ~VLAN_TAG_PRESENT);
 		} else {
-			insn->code = BPF_ALU | BPF_RSH | BPF_K;
-			insn->a_reg = BPF_REG_A;
-			insn->imm = 12;
+			/* A >>= 12 */
+			*insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
 			insn++;
 
-			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = BPF_REG_A;
-			insn->imm = 1;
+			/* A &= 1 */
+			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
 		}
 		break;
 
@@ -790,21 +769,15 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_CPU:
 	case SKF_AD_OFF + SKF_AD_RANDOM:
 		/* arg1 = ctx */
-		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = BPF_REG_ARG1;
-		insn->x_reg = BPF_REG_CTX;
+		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG1, BPF_REG_CTX);
 		insn++;
 
 		/* arg2 = A */
-		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = BPF_REG_ARG2;
-		insn->x_reg = BPF_REG_A;
+		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG2, BPF_REG_A);
 		insn++;
 
 		/* arg3 = X */
-		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = BPF_REG_ARG3;
-		insn->x_reg = BPF_REG_X;
+		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG3, BPF_REG_X);
 		insn++;
 
 		/* Emit call(ctx, arg2=A, arg3=X) */
@@ -829,9 +802,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
-		insn->code = BPF_ALU | BPF_XOR | BPF_X;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_X;
+		/* A ^= X */
+		*insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
 		break;
 
 	default:
@@ -897,9 +869,7 @@ do_pass:
 	fp = prog;
 
 	if (new_insn) {
-		new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		new_insn->a_reg = BPF_REG_CTX;
-		new_insn->x_reg = BPF_REG_ARG1;
+		*new_insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_CTX, BPF_REG_ARG1);
 	}
 	new_insn++;
 
@@ -1027,34 +997,28 @@ do_pass:
 
 		/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
 		case BPF_LDX | BPF_MSH | BPF_B:
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = BPF_REG_TMP;
-			insn->x_reg = BPF_REG_A;
+			/* tmp = A */
+			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_TMP, BPF_REG_A);
 			insn++;
 
-			insn->code = BPF_LD | BPF_ABS | BPF_B;
-			insn->a_reg = BPF_REG_A;
-			insn->imm = fp->k;
+			/* A = R0 = *(u8 *) (skb->data + K) */
+			*insn = BPF_LD_ABS(BPF_B, fp->k);
 			insn++;
 
-			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = BPF_REG_A;
-			insn->imm = 0xf;
+			/* A &= 0xf */
+			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
 			insn++;
 
-			insn->code = BPF_ALU | BPF_LSH | BPF_K;
-			insn->a_reg = BPF_REG_A;
-			insn->imm = 2;
+			/* A <<= 2 */
+			*insn = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
 			insn++;
 
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = BPF_REG_X;
-			insn->x_reg = BPF_REG_A;
+			/* X = A */
+			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A);
 			insn++;
 
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = BPF_REG_A;
-			insn->x_reg = BPF_REG_TMP;
+			/* A = tmp */
+			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_TMP);
 			break;
 
 		/* RET_K, RET_A are remaped into 2 insns. */
@@ -1068,7 +1032,7 @@ do_pass:
 			insn->imm = fp->k;
 			insn++;
 
-			insn->code = BPF_JMP | BPF_EXIT;
+			*insn = BPF_EXIT_INSN();
 			break;
 
 		/* Store to stack. */
@@ -1102,16 +1066,12 @@ do_pass:
 
 		/* X = A */
 		case BPF_MISC | BPF_TAX:
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = BPF_REG_X;
-			insn->x_reg = BPF_REG_A;
+			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A);
 			break;
 
 		/* A = X */
 		case BPF_MISC | BPF_TXA:
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = BPF_REG_A;
-			insn->x_reg = BPF_REG_X;
+			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_X);
 			break;
 
 		/* A = skb->len or X = skb->len */
@@ -1126,10 +1086,8 @@ do_pass:
 
 		/* access seccomp_data fields */
 		case BPF_LDX | BPF_ABS | BPF_W:
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->a_reg = BPF_REG_A;
-			insn->x_reg = BPF_REG_CTX;
-			insn->off = fp->k;
+			/* A = *(u32 *) (ctx + K) */
+			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
 			break;
 
 		default:
-- 
cgit v1.2.3-71-gd317


From 7d87a7f709650bde4d7d63117f25ee1c095da5dd Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Wed, 9 Apr 2014 18:19:04 +0300
Subject: srm/i915/chv: Add Cherryview PCI IDs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: Update to also fill in the new num_pipes field.

v3: Rebase on top of the pciid extraction.

v4: Switch from info->has*ring to info->ring mask. Also add VEBOX support whiel
at it.

v5: s/CHV_PCI_IDS/CHV_IDS/, and drop the trailing '\'

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.c | 12 +++++++++++-
 include/drm/i915_pciids.h       |  6 ++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 4024e16ae63d..6868bc0eabd3 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -299,6 +299,15 @@ static const struct intel_device_info intel_broadwell_gt3m_info = {
 	GEN_DEFAULT_PIPEOFFSETS,
 };
 
+static const struct intel_device_info intel_cherryview_info = {
+	.is_preliminary = 1,
+	.gen = 8, .num_pipes = 2,
+	.need_gfx_hws = 1, .has_hotplug = 1,
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+	.is_valleyview = 1,
+	.display_mmio_offset = VLV_DISPLAY_BASE,
+};
+
 /*
  * Make sure any device matches here are from most specific to most
  * general.  For example, since the Quanta match is based on the subsystem
@@ -334,7 +343,8 @@ static const struct intel_device_info intel_broadwell_gt3m_info = {
 	INTEL_BDW_GT12M_IDS(&intel_broadwell_m_info),	\
 	INTEL_BDW_GT12D_IDS(&intel_broadwell_d_info),	\
 	INTEL_BDW_GT3M_IDS(&intel_broadwell_gt3m_info),	\
-	INTEL_BDW_GT3D_IDS(&intel_broadwell_gt3d_info)
+	INTEL_BDW_GT3D_IDS(&intel_broadwell_gt3d_info), \
+	INTEL_CHV_IDS(&intel_cherryview_info)
 
 static const struct pci_device_id pciidlist[] = {		/* aka */
 	INTEL_PCI_IDS,
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 24f3cad045db..d18f31a77987 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -245,4 +245,10 @@
 	INTEL_BDW_GT12D_IDS(info), \
 	INTEL_BDW_GT3D_IDS(info)
 
+#define INTEL_CHV_IDS(info) \
+	INTEL_VGA_DEVICE(0x22b0, info), \
+	INTEL_VGA_DEVICE(0x22b1, info), \
+	INTEL_VGA_DEVICE(0x22b2, info), \
+	INTEL_VGA_DEVICE(0x22b3, info)
+
 #endif /* _I915_PCIIDS_H */
-- 
cgit v1.2.3-71-gd317


From 60ff746739bf805a912484643c720b6124826140 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Sun, 4 May 2014 16:39:18 -0700
Subject: net: rename local_df to ignore_df
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As suggested by several people, rename local_df to ignore_df,
since it means "ignore df bit if it is set".

Cc: Maciej Żenczykowski <maze@google.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h                  |  4 ++--
 include/net/ip.h                        |  6 +++---
 include/net/ip6_route.h                 |  2 +-
 net/core/skbuff.c                       |  4 ++--
 net/ipv4/ip_forward.c                   |  2 +-
 net/ipv4/ip_output.c                    | 12 ++++++------
 net/ipv4/netfilter/nf_defrag_ipv4.c     |  2 +-
 net/ipv4/xfrm4_output.c                 |  2 +-
 net/ipv6/ip6_output.c                   | 12 ++++++------
 net/ipv6/netfilter/nf_conntrack_reasm.c |  2 +-
 net/ipv6/xfrm6_output.c                 |  6 +++---
 net/l2tp/l2tp_core.c                    |  2 +-
 net/netfilter/ipvs/ip_vs_xmit.c         | 20 ++++++++++----------
 net/openvswitch/vport-gre.c             |  2 +-
 net/openvswitch/vport-vxlan.c           |  2 +-
 net/sctp/ipv6.c                         |  2 +-
 net/sctp/output.c                       |  2 +-
 17 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3ca0dda5a42e..7a9beeb1c458 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -426,7 +426,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
  *	@csum_start: Offset from skb->head where checksumming should start
  *	@csum_offset: Offset from csum_start where checksum should be stored
  *	@priority: Packet queueing priority
- *	@local_df: allow local fragmentation
+ *	@ignore_df: allow local fragmentation
  *	@cloned: Head may be cloned (check refcnt to be sure)
  *	@ip_summed: Driver fed us an IP checksum
  *	@nohdr: Payload reference only, must not modify header
@@ -514,7 +514,7 @@ struct sk_buff {
 	};
 	__u32			priority;
 	kmemcheck_bitfield_begin(flags1);
-	__u8			local_df:1,
+	__u8			ignore_df:1,
 				cloned:1,
 				ip_summed:2,
 				nohdr:1,
diff --git a/include/net/ip.h b/include/net/ip.h
index 16146b667ddb..55752985c144 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -269,7 +269,7 @@ static inline bool ip_sk_use_pmtu(const struct sock *sk)
 	return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
 }
 
-static inline bool ip_sk_local_df(const struct sock *sk)
+static inline bool ip_sk_ignore_df(const struct sock *sk)
 {
 	return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO ||
 	       inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT;
@@ -304,7 +304,7 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s
 {
 	struct iphdr *iph = ip_hdr(skb);
 
-	if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) {
+	if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
 		/* This is only to work around buggy Windows95/2000
 		 * VJ compression implementations.  If the ID field
 		 * does not change, they drop every other packet in
@@ -320,7 +320,7 @@ static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *d
 {
 	struct iphdr *iph = ip_hdr(skb);
 
-	if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) {
+	if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
 		if (sk && inet_sk(sk)->inet_daddr) {
 			iph->id = htons(inet_sk(sk)->inet_id);
 			inet_sk(sk)->inet_id += 1 + more;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 6c4f5eac98e7..38e41e4d0998 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -185,7 +185,7 @@ static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
 	       inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT;
 }
 
-static inline bool ip6_sk_local_df(const struct sock *sk)
+static inline bool ip6_sk_ignore_df(const struct sock *sk)
 {
 	return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO ||
 	       inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1b62343f5837..3d74530ae82b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -694,7 +694,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 #endif
 	memcpy(new->cb, old->cb, sizeof(old->cb));
 	new->csum		= old->csum;
-	new->local_df		= old->local_df;
+	new->ignore_df		= old->ignore_df;
 	new->pkt_type		= old->pkt_type;
 	new->ip_summed		= old->ip_summed;
 	skb_copy_queue_mapping(new, old);
@@ -3913,7 +3913,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	skb->tstamp.tv64 = 0;
 	skb->pkt_type = PACKET_HOST;
 	skb->skb_iif = 0;
-	skb->local_df = 0;
+	skb->ignore_df = 0;
 	skb_dst_drop(skb);
 	skb->mark = 0;
 	secpath_reset(skb);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 6f111e48e11c..3a83ce5efa80 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -42,7 +42,7 @@
 static bool ip_may_fragment(const struct sk_buff *skb)
 {
 	return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
-		skb->local_df;
+		skb->ignore_df;
 }
 
 static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a52f50187b54..6aa4380fde1a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -415,7 +415,7 @@ packet_routed:
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
 	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
-	if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
+	if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
@@ -501,7 +501,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	iph = ip_hdr(skb);
 
 	mtu = ip_skb_dst_mtu(skb);
-	if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) ||
+	if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
 		     (IPCB(skb)->frag_max_size &&
 		      IPCB(skb)->frag_max_size > mtu))) {
 		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
@@ -866,7 +866,7 @@ static int __ip_append_data(struct sock *sk,
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
-	maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
+	maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
 
 	if (cork->length + length > maxnonfragsize - fragheaderlen) {
 		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1189,7 +1189,7 @@ ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
-	maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
+	maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
 
 	if (cork->length + size > maxnonfragsize - fragheaderlen) {
 		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1350,10 +1350,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 	 * to fragment the frame generated here. No matter, what transforms
 	 * how transforms change size of the packet, it will come out.
 	 */
-	skb->local_df = ip_sk_local_df(sk);
+	skb->ignore_df = ip_sk_ignore_df(sk);
 
 	/* DF bit is set when we want to see DF on outgoing frames.
-	 * If local_df is set too, we still allow to fragment this frame
+	 * If ignore_df is set too, we still allow to fragment this frame
 	 * locally. */
 	if (inet->pmtudisc == IP_PMTUDISC_DO ||
 	    inet->pmtudisc == IP_PMTUDISC_PROBE ||
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index f40f321b41fc..b8f6381c7d0b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -34,7 +34,7 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 
 	if (!err) {
 		ip_send_check(ip_hdr(skb));
-		skb->local_df = 1;
+		skb->ignore_df = 1;
 	}
 
 	return err;
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 40e701f2e1e0..8e8c018d9d2d 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -25,7 +25,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
 	if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
 		goto out;
 
-	if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
+	if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df)
 		goto out;
 
 	mtu = dst_mtu(skb_dst(skb));
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 31a38bde69ef..ab0cc57f779c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -219,7 +219,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	skb->mark = sk->sk_mark;
 
 	mtu = dst_mtu(dst);
-	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
+	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			      IPSTATS_MIB_OUT, skb->len);
 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
@@ -347,11 +347,11 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 	if (skb->len <= mtu)
 		return false;
 
-	/* ipv6 conntrack defrag sets max_frag_size + local_df */
+	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 		return true;
 
-	if (skb->local_df)
+	if (skb->ignore_df)
 		return false;
 
 	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
@@ -559,7 +559,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	/* We must not fragment if the socket is set to force MTU discovery
 	 * or if the skb it not generated by a local socket.
 	 */
-	if (unlikely(!skb->local_df && skb->len > mtu) ||
+	if (unlikely(!skb->ignore_df && skb->len > mtu) ||
 		     (IP6CB(skb)->frag_max_size &&
 		      IP6CB(skb)->frag_max_size > mtu)) {
 		if (skb->sk && dst_allfrag(skb_dst(skb)))
@@ -1234,7 +1234,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 			      sizeof(struct frag_hdr) : 0) +
 			     rt->rt6i_nfheader_len;
 
-		if (ip6_sk_local_df(sk))
+		if (ip6_sk_ignore_df(sk))
 			maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
 		else
 			maxnonfragsize = mtu;
@@ -1544,7 +1544,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	}
 
 	/* Allow local fragmentation. */
-	skb->local_df = ip6_sk_local_df(sk);
+	skb->ignore_df = ip6_sk_ignore_df(sk);
 
 	*final_dst = fl6->daddr;
 	__skb_pull(skb, skb_network_header_len(skb));
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 767ab8da8218..0d5279fd852a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -451,7 +451,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
 	}
 	sub_frag_mem_limit(&fq->q, head->truesize);
 
-	head->local_df = 1;
+	head->ignore_df = 1;
 	head->next = NULL;
 	head->dev = dev;
 	head->tstamp = fq->q.stamp;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 19ef329bdbf8..f47c8b153dd3 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -78,7 +78,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
 
-	if (!skb->local_df && skb->len > mtu) {
+	if (!skb->ignore_df && skb->len > mtu) {
 		skb->dev = dst->dev;
 
 		if (xfrm6_local_dontfrag(skb))
@@ -120,7 +120,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 #endif
 
 	skb->protocol = htons(ETH_P_IPV6);
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	return x->outer_mode->output2(x, skb);
 }
@@ -150,7 +150,7 @@ static int __xfrm6_output(struct sk_buff *skb)
 	if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
 		xfrm6_local_rxpmtu(skb, mtu);
 		return -EMSGSIZE;
-	} else if (!skb->local_df && skb->len > mtu && skb->sk) {
+	} else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
 		xfrm_local_error(skb, mtu);
 		return -EMSGSIZE;
 	}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index aa1a9d44c107..ed0716a075ba 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1073,7 +1073,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 	}
 
 	/* Queue the packet to IP for output */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 #if IS_ENABLED(CONFIG_IPV6)
 	if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
 		error = inet6_csk_xmit(tunnel->sock, skb, NULL);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index c47444e4cf8c..487b55e04337 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -562,7 +562,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_send_check(iph);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
 	rcu_read_unlock();
@@ -590,7 +590,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error;
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
 	rcu_read_unlock();
@@ -684,7 +684,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	   MTU problem. */
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
 	rcu_read_unlock();
@@ -774,7 +774,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	   MTU problem. */
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
 	rcu_read_unlock();
@@ -886,7 +886,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_select_ident(skb, &rt->dst, NULL);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
 	if (ret == NF_ACCEPT)
@@ -974,7 +974,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	iph->hop_limit		=	old_iph->hop_limit;
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
 	if (ret == NF_ACCEPT)
@@ -1023,7 +1023,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_send_check(ip_hdr(skb));
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
 	rcu_read_unlock();
@@ -1060,7 +1060,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
 	rcu_read_unlock();
@@ -1157,7 +1157,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_vs_nat_icmp(skb, pp, cp, 0);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
 	rcu_read_unlock();
@@ -1249,7 +1249,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
 	rcu_read_unlock();
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index ebb6e2442554..0856f014d7a9 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -172,7 +172,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
 		htons(IP_DF) : 0;
 
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
 			     OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 21cceb3bdf78..a93efa3f64c3 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -170,7 +170,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
 		htons(IP_DF) : 0;
 
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	inet_get_local_port_range(net, &port_min, &port_max);
 	src_port = vxlan_src_port(port_min, port_max, skb);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 2b1738ef9394..4dc5d9e08311 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -216,7 +216,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 	IP6_ECN_flow_xmit(sk, fl6->flowlabel);
 
 	if (!(transport->param_flags & SPP_PMTUD_ENABLE))
-		skb->local_df = 1;
+		skb->ignore_df = 1;
 
 	SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0f4d15fc2627..01ab8e0723f0 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -591,7 +591,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 
 	pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len);
 
-	nskb->local_df = packet->ipfragok;
+	nskb->ignore_df = packet->ipfragok;
 	tp->af_specific->sctp_xmit(nskb, tp);
 
 out:
-- 
cgit v1.2.3-71-gd317


From f06c7f9f92295faf701a9628b383156c4efb6119 Mon Sep 17 00:00:00 2001
From: dingtianhong <dingtianhong@huawei.com>
Date: Fri, 9 May 2014 14:58:05 +0800
Subject: vlan: rename __vlan_find_dev_deep() to __vlan_find_dev_deep_rcu()

The __vlan_find_dev_deep should always called in RCU, according
David's suggestion, rename to __vlan_find_dev_deep_rcu looks more
reasonable.

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c |  2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |  2 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c   |  2 +-
 drivers/net/usb/cdc_mbim.c                         |  4 ++--
 drivers/s390/net/qeth_l3_main.c                    | 10 +++++-----
 include/linux/if_vlan.h                            |  4 ++--
 net/8021q/vlan_core.c                              |  6 +++---
 net/bridge/br_netfilter.c                          |  2 +-
 8 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
index c0a9dd55f4e5..b0cbb2b7fd48 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
@@ -185,7 +185,7 @@ static struct net_device *get_iff_from_mac(struct adapter *adapter,
 		if (ether_addr_equal(dev->dev_addr, mac)) {
 			rcu_read_lock();
 			if (vlan && vlan != VLAN_VID_MASK) {
-				dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), vlan);
+				dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), vlan);
 			} else if (netif_is_bond_slave(dev)) {
 				struct net_device *upper_dev;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 8efeed3325b5..0f1e886d89e3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4068,7 +4068,7 @@ static int update_root_dev_clip(struct net_device *dev)
 
 	/* Parse all bond and vlan devices layered on top of the physical dev */
 	for (i = 0; i < VLAN_N_VID; i++) {
-		root_dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), i);
+		root_dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), i);
 		if (!root_dev)
 			continue;
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index d2e18b52caba..8a2aeb85e320 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -4146,7 +4146,7 @@ void qlcnic_restore_indev_addr(struct net_device *netdev, unsigned long event)
 
 	rcu_read_lock();
 	for_each_set_bit(vid, adapter->vlans, VLAN_N_VID) {
-		dev = __vlan_find_dev_deep(netdev, htons(ETH_P_8021Q), vid);
+		dev = __vlan_find_dev_deep_rcu(netdev, htons(ETH_P_8021Q), vid);
 		if (!dev)
 			continue;
 		qlcnic_config_indev_addr(adapter, dev, event);
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 2e025ddcef21..0ab79fca822c 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -223,8 +223,8 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci)
 	/* need to send the NA on the VLAN dev, if any */
 	rcu_read_lock();
 	if (tci) {
-		netdev = __vlan_find_dev_deep(dev->net, htons(ETH_P_8021Q),
-					      tci);
+		netdev = __vlan_find_dev_deep_rcu(dev->net, htons(ETH_P_8021Q),
+						  tci);
 		if (!netdev) {
 			rcu_read_unlock();
 			return;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index c8d91d797ec8..bc2499a24884 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1659,7 +1659,7 @@ static void qeth_l3_add_vlan_mc(struct qeth_card *card)
 	for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) {
 		struct net_device *netdev;
 
-		netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q),
+		netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
 					      vid);
 		if (netdev == NULL ||
 		    !(netdev->flags & IFF_UP))
@@ -1721,7 +1721,7 @@ static void qeth_l3_add_vlan_mc6(struct qeth_card *card)
 	for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) {
 		struct net_device *netdev;
 
-		netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q),
+		netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
 					      vid);
 		if (netdev == NULL ||
 		    !(netdev->flags & IFF_UP))
@@ -1766,7 +1766,7 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card,
 
 	QETH_CARD_TEXT(card, 4, "frvaddr4");
 
-	netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), vid);
+	netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid);
 	if (!netdev)
 		return;
 	in_dev = in_dev_get(netdev);
@@ -1796,7 +1796,7 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card,
 
 	QETH_CARD_TEXT(card, 4, "frvaddr6");
 
-	netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), vid);
+	netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid);
 	if (!netdev)
 		return;
 	in6_dev = in6_dev_get(netdev);
@@ -2089,7 +2089,7 @@ static int qeth_l3_verify_vlan_dev(struct net_device *dev,
 		struct net_device *netdev;
 
 		rcu_read_lock();
-		netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q),
+		netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
 					      vid);
 		rcu_read_unlock();
 		if (netdev == dev) {
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 13bbbde00e68..8c0fb7f3a9a5 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -106,7 +106,7 @@ struct vlan_pcpu_stats {
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 
-extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
+extern struct net_device *__vlan_find_dev_deep_rcu(struct net_device *real_dev,
 					       __be16 vlan_proto, u16 vlan_id);
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
 extern u16 vlan_dev_vlan_id(const struct net_device *dev);
@@ -199,7 +199,7 @@ extern void vlan_vids_del_by_dev(struct net_device *dev,
 extern bool vlan_uses_dev(const struct net_device *dev);
 #else
 static inline struct net_device *
-__vlan_find_dev_deep(struct net_device *real_dev,
+__vlan_find_dev_deep_rcu(struct net_device *real_dev,
 		     __be16 vlan_proto, u16 vlan_id)
 {
 	return NULL;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 3c32bd257b73..9012b1c922b6 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -63,7 +63,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
 }
 
 /* Must be invoked with rcu_read_lock. */
-struct net_device *__vlan_find_dev_deep(struct net_device *dev,
+struct net_device *__vlan_find_dev_deep_rcu(struct net_device *dev,
 					__be16 vlan_proto, u16 vlan_id)
 {
 	struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info);
@@ -81,13 +81,13 @@ struct net_device *__vlan_find_dev_deep(struct net_device *dev,
 
 		upper_dev = netdev_master_upper_dev_get_rcu(dev);
 		if (upper_dev)
-			return __vlan_find_dev_deep(upper_dev,
+			return __vlan_find_dev_deep_rcu(upper_dev,
 						    vlan_proto, vlan_id);
 	}
 
 	return NULL;
 }
-EXPORT_SYMBOL(__vlan_find_dev_deep);
+EXPORT_SYMBOL(__vlan_find_dev_deep_rcu);
 
 struct net_device *vlan_dev_real_dev(const struct net_device *dev)
 {
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 2acf7fa1fec6..a615264cf01a 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -535,7 +535,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
 	if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))
 		return br;
 
-	vlan = __vlan_find_dev_deep(br, skb->vlan_proto,
+	vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto,
 				    vlan_tx_tag_get(skb) & VLAN_VID_MASK);
 
 	return vlan ? vlan : br;
-- 
cgit v1.2.3-71-gd317


From 907abd51012b9b0b0b687e97d5ebadbddbc682fe Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Mon, 3 Mar 2014 11:36:43 +0900
Subject: mmc: dw_mmc: remove unused member variable.

Since using the device-tree, didn't use the callback pointer.
So removed the unused callback pointer.
When the set_power callback is used, it should be added in future.

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/dw_mmc.c  | 33 +++------------------------------
 include/linux/mmc/dw_mmc.h | 14 --------------
 2 files changed, 3 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 55c34cb702d4..81991eca5671 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -850,8 +850,6 @@ static void __dw_mci_start_request(struct dw_mci *host,
 	u32 cmdflags;
 
 	mrq = slot->mrq;
-	if (host->pdata->select_slot)
-		host->pdata->select_slot(slot->id);
 
 	host->cur_slot = slot;
 	host->mrq = mrq;
@@ -985,17 +983,11 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	switch (ios->power_mode) {
 	case MMC_POWER_UP:
 		set_bit(DW_MMC_CARD_NEED_INIT, &slot->flags);
-		/* Power up slot */
-		if (slot->host->pdata->setpower)
-			slot->host->pdata->setpower(slot->id, mmc->ocr_avail);
 		regs = mci_readl(slot->host, PWREN);
 		regs |= (1 << slot->id);
 		mci_writel(slot->host, PWREN, regs);
 		break;
 	case MMC_POWER_OFF:
-		/* Power down slot */
-		if (slot->host->pdata->setpower)
-			slot->host->pdata->setpower(slot->id, 0);
 		regs = mci_readl(slot->host, PWREN);
 		regs &= ~(1 << slot->id);
 		mci_writel(slot->host, PWREN, regs);
@@ -1009,13 +1001,10 @@ static int dw_mci_get_ro(struct mmc_host *mmc)
 {
 	int read_only;
 	struct dw_mci_slot *slot = mmc_priv(mmc);
-	struct dw_mci_board *brd = slot->host->pdata;
 
 	/* Use platform get_ro function, else try on board write protect */
 	if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT)
 		read_only = 0;
-	else if (brd->get_ro)
-		read_only = brd->get_ro(slot->id);
 	else if (gpio_is_valid(slot->wp_gpio))
 		read_only = gpio_get_value(slot->wp_gpio);
 	else
@@ -1039,8 +1028,6 @@ static int dw_mci_get_cd(struct mmc_host *mmc)
 	/* Use platform get_cd function, else try onboard card detect */
 	if (brd->quirks & DW_MCI_QUIRK_BROKEN_CARD_DETECTION)
 		present = 1;
-	else if (brd->get_cd)
-		present = !brd->get_cd(slot->id);
 	else if (!IS_ERR_VALUE(gpio_cd))
 		present = gpio_cd;
 	else
@@ -2138,17 +2125,7 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 		mmc->f_max = freq[1];
 	}
 
-	if (host->pdata->get_ocr)
-		mmc->ocr_avail = host->pdata->get_ocr(id);
-	else
-		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-
-	/*
-	 * Start with slot power disabled, it will be enabled when a card
-	 * is detected.
-	 */
-	if (host->pdata->setpower)
-		host->pdata->setpower(id, 0);
+	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
 	if (host->pdata->caps)
 		mmc->caps = host->pdata->caps;
@@ -2217,10 +2194,6 @@ err_setup_bus:
 
 static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id)
 {
-	/* Shutdown detect IRQ */
-	if (slot->host->pdata->exit)
-		slot->host->pdata->exit(id);
-
 	/* Debugfs stuff is cleaned up by mmc core */
 	mmc_remove_host(slot->mmc);
 	slot->host->slot[id] = NULL;
@@ -2395,9 +2368,9 @@ int dw_mci_probe(struct dw_mci *host)
 		}
 	}
 
-	if (!host->pdata->select_slot && host->pdata->num_slots > 1) {
+	if (host->pdata->num_slots > 1) {
 		dev_err(host->dev,
-			"Platform data must supply select_slot function\n");
+			"Platform data must supply num_slots.\n");
 		return -ENODEV;
 	}
 
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 6ce7d2cd3c7a..babaea93bca6 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -248,20 +248,6 @@ struct dw_mci_board {
 	/* delay in mS before detecting cards after interrupt */
 	u32 detect_delay_ms;
 
-	int (*init)(u32 slot_id, irq_handler_t , void *);
-	int (*get_ro)(u32 slot_id);
-	int (*get_cd)(u32 slot_id);
-	int (*get_ocr)(u32 slot_id);
-	int (*get_bus_wd)(u32 slot_id);
-	/*
-	 * Enable power to selected slot and set voltage to desired level.
-	 * Voltage levels are specified using MMC_VDD_xxx defines defined
-	 * in linux/mmc/host.h file.
-	 */
-	void (*setpower)(u32 slot_id, u32 volt);
-	void (*exit)(u32 slot_id);
-	void (*select_slot)(u32 slot_id);
-
 	struct dw_mci_dma_ops *dma_ops;
 	struct dma_pdata *data;
 	struct block_settings *blk_settings;
-- 
cgit v1.2.3-71-gd317


From cdc991790c51c693d0c347a5286af017826a5d01 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Wed, 23 Apr 2014 17:07:35 +0900
Subject: mmc: drop the speed mode of card's state

Timing mode identifier has same role and can take the place
of speed mode. This change removes all related speed mode.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Tested-by: Jaehoon Chung <jh80.chung@samsung.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/bus.c                  |  8 ++++----
 drivers/mmc/core/core.c                 |  3 +--
 drivers/mmc/core/mmc.c                  | 11 +++--------
 drivers/mmc/core/sd.c                   | 16 +++-------------
 drivers/mmc/core/sd.h                   |  1 -
 drivers/mmc/core/sdio.c                 |  8 ++------
 drivers/net/wireless/rsi/rsi_91x_sdio.c |  4 +---
 include/linux/mmc/card.h                | 23 ++++++-----------------
 include/linux/mmc/host.h                | 23 +++++++++++++++++++++++
 9 files changed, 43 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 824644875d41..f37e9d6af84a 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -341,16 +341,16 @@ int mmc_add_card(struct mmc_card *card)
 	if (mmc_host_is_spi(card->host)) {
 		pr_info("%s: new %s%s%s card on SPI\n",
 			mmc_hostname(card->host),
-			mmc_card_highspeed(card) ? "high speed " : "",
-			mmc_card_ddr_mode(card) ? "DDR " : "",
+			mmc_card_hs(card) ? "high speed " : "",
+			mmc_card_ddr52(card) ? "DDR " : "",
 			type);
 	} else {
 		pr_info("%s: new %s%s%s%s%s card at address %04x\n",
 			mmc_hostname(card->host),
 			mmc_card_uhs(card) ? "ultra high speed " :
-			(mmc_card_highspeed(card) ? "high speed " : ""),
+			(mmc_card_hs(card) ? "high speed " : ""),
 			(mmc_card_hs200(card) ? "HS200 " : ""),
-			mmc_card_ddr_mode(card) ? "DDR " : "",
+			mmc_card_ddr52(card) ? "DDR " : "",
 			uhs_bus_speed_mode, type, card->rca);
 	}
 
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index d97dff5fab62..02baa30653fa 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2192,7 +2192,7 @@ int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen)
 {
 	struct mmc_command cmd = {0};
 
-	if (mmc_card_blockaddr(card) || mmc_card_ddr_mode(card))
+	if (mmc_card_blockaddr(card) || mmc_card_ddr52(card))
 		return 0;
 
 	cmd.opcode = MMC_SET_BLOCKLEN;
@@ -2272,7 +2272,6 @@ static int mmc_do_hw_reset(struct mmc_host *host, int check)
 		}
 	}
 
-	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_DDR);
 	if (mmc_host_is_spi(host)) {
 		host->ios.chip_select = MMC_CS_HIGH;
 		host->ios.bus_mode = MMC_BUSMODE_PUSHPULL;
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index fbcf93d81858..31220529e171 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1083,11 +1083,9 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		} else {
 			if (card->ext_csd.hs_max_dtr > 52000000 &&
 			    host->caps2 & MMC_CAP2_HS200) {
-				mmc_card_set_hs200(card);
 				mmc_set_timing(card->host,
 					       MMC_TIMING_MMC_HS200);
 			} else {
-				mmc_card_set_highspeed(card);
 				mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
 			}
 		}
@@ -1098,10 +1096,10 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 */
 	max_dtr = (unsigned int)-1;
 
-	if (mmc_card_highspeed(card) || mmc_card_hs200(card)) {
+	if (mmc_card_hs(card) || mmc_card_hs200(card)) {
 		if (max_dtr > card->ext_csd.hs_max_dtr)
 			max_dtr = card->ext_csd.hs_max_dtr;
-		if (mmc_card_highspeed(card) && (max_dtr > 52000000))
+		if (mmc_card_hs(card) && (max_dtr > 52000000))
 			max_dtr = 52000000;
 	} else if (max_dtr > card->csd.max_dtr) {
 		max_dtr = card->csd.max_dtr;
@@ -1112,7 +1110,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	/*
 	 * Indicate DDR mode (if supported).
 	 */
-	if (mmc_card_highspeed(card)) {
+	if (mmc_card_hs(card)) {
 		if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V)
 			&& (host->caps & MMC_CAP_1_8V_DDR))
 				ddr = MMC_1_8V_DDR_MODE;
@@ -1255,7 +1253,6 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 				if (err)
 					goto err;
 			}
-			mmc_card_set_ddr_mode(card);
 			mmc_set_timing(card->host, MMC_TIMING_MMC_DDR52);
 			mmc_set_bus_width(card->host, bus_width);
 		}
@@ -1499,7 +1496,6 @@ static int _mmc_suspend(struct mmc_host *host, bool is_suspend)
 		err = mmc_sleep(host);
 	else if (!mmc_host_is_spi(host))
 		err = mmc_deselect_cards(host);
-	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
 
 	if (!err) {
 		mmc_power_off(host);
@@ -1629,7 +1625,6 @@ static int mmc_power_restore(struct mmc_host *host)
 {
 	int ret;
 
-	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
 	mmc_claim_host(host);
 	ret = mmc_init_card(host, host->card->ocr, host->card);
 	mmc_release_host(host);
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index aef515755e5b..0c44510bf717 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -887,7 +887,7 @@ unsigned mmc_sd_get_max_clock(struct mmc_card *card)
 {
 	unsigned max_dtr = (unsigned int)-1;
 
-	if (mmc_card_highspeed(card)) {
+	if (mmc_card_hs(card)) {
 		if (max_dtr > card->sw_caps.hs_max_dtr)
 			max_dtr = card->sw_caps.hs_max_dtr;
 	} else if (max_dtr > card->csd.max_dtr) {
@@ -897,12 +897,6 @@ unsigned mmc_sd_get_max_clock(struct mmc_card *card)
 	return max_dtr;
 }
 
-void mmc_sd_go_highspeed(struct mmc_card *card)
-{
-	mmc_card_set_highspeed(card);
-	mmc_set_timing(card->host, MMC_TIMING_SD_HS);
-}
-
 /*
  * Handle the detection and initialisation of a card.
  *
@@ -977,16 +971,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 		err = mmc_sd_init_uhs_card(card);
 		if (err)
 			goto free_card;
-
-		/* Card is an ultra-high-speed card */
-		mmc_card_set_uhs(card);
 	} else {
 		/*
 		 * Attempt to change to high-speed (if supported)
 		 */
 		err = mmc_sd_switch_hs(card);
 		if (err > 0)
-			mmc_sd_go_highspeed(card);
+			mmc_set_timing(card->host, MMC_TIMING_SD_HS);
 		else if (err)
 			goto free_card;
 
@@ -1081,7 +1072,7 @@ static int _mmc_sd_suspend(struct mmc_host *host)
 
 	if (!mmc_host_is_spi(host))
 		err = mmc_deselect_cards(host);
-	host->card->state &= ~MMC_STATE_HIGHSPEED;
+
 	if (!err) {
 		mmc_power_off(host);
 		mmc_card_set_suspended(host->card);
@@ -1190,7 +1181,6 @@ static int mmc_sd_power_restore(struct mmc_host *host)
 {
 	int ret;
 
-	host->card->state &= ~MMC_STATE_HIGHSPEED;
 	mmc_claim_host(host);
 	ret = mmc_sd_init_card(host, host->card->ocr, host->card);
 	mmc_release_host(host);
diff --git a/drivers/mmc/core/sd.h b/drivers/mmc/core/sd.h
index 4b34b24f3f76..aab824a9a7f3 100644
--- a/drivers/mmc/core/sd.h
+++ b/drivers/mmc/core/sd.h
@@ -12,6 +12,5 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card,
 	bool reinit);
 unsigned mmc_sd_get_max_clock(struct mmc_card *card);
 int mmc_sd_switch_hs(struct mmc_card *card);
-void mmc_sd_go_highspeed(struct mmc_card *card);
 
 #endif
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 9933e426bc36..e636d9e99e4a 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -363,7 +363,7 @@ static unsigned mmc_sdio_get_max_clock(struct mmc_card *card)
 {
 	unsigned max_dtr;
 
-	if (mmc_card_highspeed(card)) {
+	if (mmc_card_hs(card)) {
 		/*
 		 * The SDIO specification doesn't mention how
 		 * the CIS transfer speed register relates to
@@ -733,7 +733,6 @@ try_again:
 		mmc_set_clock(host, card->cis.max_dtr);
 
 		if (card->cccr.high_speed) {
-			mmc_card_set_highspeed(card);
 			mmc_set_timing(card->host, MMC_TIMING_SD_HS);
 		}
 
@@ -792,16 +791,13 @@ try_again:
 		err = mmc_sdio_init_uhs_card(card);
 		if (err)
 			goto remove;
-
-		/* Card is an ultra-high-speed card */
-		mmc_card_set_uhs(card);
 	} else {
 		/*
 		 * Switch to high-speed (if supported).
 		 */
 		err = sdio_enable_hs(card);
 		if (err > 0)
-			mmc_sd_go_highspeed(card);
+			mmc_set_timing(card->host, MMC_TIMING_SD_HS);
 		else if (err)
 			goto remove;
 
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index 2e39d38d6a9e..46e7af446f01 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -285,7 +285,6 @@ static void rsi_reset_card(struct sdio_func *pfunction)
 		if (err) {
 			rsi_dbg(ERR_ZONE, "%s: CCCR speed reg read failed: %d\n",
 				__func__, err);
-			card->state &= ~MMC_STATE_HIGHSPEED;
 		} else {
 			err = rsi_cmd52writebyte(card,
 						 SDIO_CCCR_SPEED,
@@ -296,14 +295,13 @@ static void rsi_reset_card(struct sdio_func *pfunction)
 					__func__, err);
 				return;
 			}
-			mmc_card_set_highspeed(card);
 			host->ios.timing = MMC_TIMING_SD_HS;
 			host->ops->set_ios(host, &host->ios);
 		}
 	}
 
 	/* Set clock */
-	if (mmc_card_highspeed(card))
+	if (mmc_card_hs(card))
 		clock = 50000000;
 	else
 		clock = card->cis.max_dtr;
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index aa7e57f60fb2..aadeaf155d0e 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -194,6 +194,7 @@ struct sdio_cis {
 };
 
 struct mmc_host;
+struct mmc_ios;
 struct sdio_func;
 struct sdio_func_tuple;
 
@@ -250,15 +251,11 @@ struct mmc_card {
 	unsigned int		state;		/* (our) card state */
 #define MMC_STATE_PRESENT	(1<<0)		/* present in sysfs */
 #define MMC_STATE_READONLY	(1<<1)		/* card is read-only */
-#define MMC_STATE_HIGHSPEED	(1<<2)		/* card is in high speed mode */
-#define MMC_STATE_BLOCKADDR	(1<<3)		/* card uses block-addressing */
-#define MMC_STATE_HIGHSPEED_DDR (1<<4)		/* card is in high speed mode */
-#define MMC_STATE_ULTRAHIGHSPEED (1<<5)		/* card is in ultra high speed mode */
-#define MMC_CARD_SDXC		(1<<6)		/* card is SDXC */
-#define MMC_CARD_REMOVED	(1<<7)		/* card has been removed */
-#define MMC_STATE_HIGHSPEED_200	(1<<8)		/* card is in HS200 mode */
-#define MMC_STATE_DOING_BKOPS	(1<<10)		/* card is doing BKOPS */
-#define MMC_STATE_SUSPENDED	(1<<11)		/* card is suspended */
+#define MMC_STATE_BLOCKADDR	(1<<2)		/* card uses block-addressing */
+#define MMC_CARD_SDXC		(1<<3)		/* card is SDXC */
+#define MMC_CARD_REMOVED	(1<<4)		/* card has been removed */
+#define MMC_STATE_DOING_BKOPS	(1<<5)		/* card is doing BKOPS */
+#define MMC_STATE_SUSPENDED	(1<<6)		/* card is suspended */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -418,11 +415,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 
 #define mmc_card_present(c)	((c)->state & MMC_STATE_PRESENT)
 #define mmc_card_readonly(c)	((c)->state & MMC_STATE_READONLY)
-#define mmc_card_highspeed(c)	((c)->state & MMC_STATE_HIGHSPEED)
-#define mmc_card_hs200(c)	((c)->state & MMC_STATE_HIGHSPEED_200)
 #define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
-#define mmc_card_ddr_mode(c)	((c)->state & MMC_STATE_HIGHSPEED_DDR)
-#define mmc_card_uhs(c)		((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
 #define mmc_card_removed(c)	((c) && ((c)->state & MMC_CARD_REMOVED))
 #define mmc_card_doing_bkops(c)	((c)->state & MMC_STATE_DOING_BKOPS)
@@ -430,11 +423,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
-#define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
-#define mmc_card_set_hs200(c)	((c)->state |= MMC_STATE_HIGHSPEED_200)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
-#define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR)
-#define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
 #define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
 #define mmc_card_set_doing_bkops(c)	((c)->state |= MMC_STATE_DOING_BKOPS)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0cf705c83998..a43853779799 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -17,6 +17,7 @@
 #include <linux/fault-inject.h>
 
 #include <linux/mmc/core.h>
+#include <linux/mmc/card.h>
 #include <linux/mmc/pm.h>
 
 struct mmc_ios {
@@ -478,4 +479,26 @@ static inline unsigned int mmc_host_clk_rate(struct mmc_host *host)
 	return host->ios.clock;
 }
 #endif
+
+static inline int mmc_card_hs(struct mmc_card *card)
+{
+	return card->host->ios.timing == MMC_TIMING_SD_HS ||
+		card->host->ios.timing == MMC_TIMING_MMC_HS;
+}
+
+static inline int mmc_card_uhs(struct mmc_card *card)
+{
+	return card->host->ios.timing >= MMC_TIMING_UHS_SDR12 &&
+		card->host->ios.timing <= MMC_TIMING_UHS_DDR50;
+}
+
+static inline bool mmc_card_hs200(struct mmc_card *card)
+{
+	return card->host->ios.timing == MMC_TIMING_MMC_HS200;
+}
+
+static inline bool mmc_card_ddr52(struct mmc_card *card)
+{
+	return card->host->ios.timing == MMC_TIMING_MMC_DDR52;
+}
 #endif /* LINUX_MMC_HOST_H */
-- 
cgit v1.2.3-71-gd317


From 2415c0ef618b3cd95581c7f633cbab78b29b7ab0 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Wed, 23 Apr 2014 17:07:58 +0900
Subject: mmc: identify available device type to select

Device types which are supported by both host and device can be
identified when EXT_CSD is read. There is no need to check host's
capability anymore.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Tested-by: Jaehoon Chung <jh80.chung@samsung.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/mmc.c   | 72 ++++++++++++++++++++++++++----------------------
 include/linux/mmc/card.h |  2 +-
 include/linux/mmc/host.h |  6 ----
 include/linux/mmc/mmc.h  | 12 +++++---
 4 files changed, 48 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 31220529e171..b5691fee9629 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -243,28 +243,46 @@ static void mmc_select_card_type(struct mmc_card *card)
 	u8 card_type = card->ext_csd.raw_card_type & EXT_CSD_CARD_TYPE_MASK;
 	u32 caps = host->caps, caps2 = host->caps2;
 	unsigned int hs_max_dtr = 0;
+	unsigned int avail_type = 0;
 
-	if (card_type & EXT_CSD_CARD_TYPE_26)
+	if (caps & MMC_CAP_MMC_HIGHSPEED &&
+	    card_type & EXT_CSD_CARD_TYPE_HS_26) {
 		hs_max_dtr = MMC_HIGH_26_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS_26;
+	}
 
 	if (caps & MMC_CAP_MMC_HIGHSPEED &&
-			card_type & EXT_CSD_CARD_TYPE_52)
+	    card_type & EXT_CSD_CARD_TYPE_HS_52) {
 		hs_max_dtr = MMC_HIGH_52_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS_52;
+	}
 
-	if ((caps & MMC_CAP_1_8V_DDR &&
-			card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) ||
-	    (caps & MMC_CAP_1_2V_DDR &&
-			card_type & EXT_CSD_CARD_TYPE_DDR_1_2V))
+	if (caps & MMC_CAP_1_8V_DDR &&
+	    card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) {
 		hs_max_dtr = MMC_HIGH_DDR_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_DDR_1_8V;
+	}
+
+	if (caps & MMC_CAP_1_2V_DDR &&
+	    card_type & EXT_CSD_CARD_TYPE_DDR_1_2V) {
+		hs_max_dtr = MMC_HIGH_DDR_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_DDR_1_2V;
+	}
 
-	if ((caps2 & MMC_CAP2_HS200_1_8V_SDR &&
-			card_type & EXT_CSD_CARD_TYPE_SDR_1_8V) ||
-	    (caps2 & MMC_CAP2_HS200_1_2V_SDR &&
-			card_type & EXT_CSD_CARD_TYPE_SDR_1_2V))
+	if (caps2 & MMC_CAP2_HS200_1_8V_SDR &&
+	    card_type & EXT_CSD_CARD_TYPE_HS200_1_8V) {
 		hs_max_dtr = MMC_HS200_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS200_1_8V;
+	}
+
+	if (caps2 & MMC_CAP2_HS200_1_2V_SDR &&
+	    card_type & EXT_CSD_CARD_TYPE_HS200_1_2V) {
+		hs_max_dtr = MMC_HS200_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS200_1_2V;
+	}
 
 	card->ext_csd.hs_max_dtr = hs_max_dtr;
-	card->ext_csd.card_type = card_type;
+	card->mmc_avail_type = avail_type;
 }
 
 /*
@@ -800,12 +818,10 @@ static int mmc_select_hs200(struct mmc_card *card)
 
 	host = card->host;
 
-	if (card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_2V &&
-			host->caps2 & MMC_CAP2_HS200_1_2V_SDR)
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V)
 		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120);
 
-	if (err && card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_8V &&
-			host->caps2 & MMC_CAP2_HS200_1_8V_SDR)
+	if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_8V)
 		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
 
 	/* If fails try again during next card power cycle */
@@ -1064,10 +1080,9 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 */
 	if (card->ext_csd.hs_max_dtr != 0) {
 		err = 0;
-		if (card->ext_csd.hs_max_dtr > 52000000 &&
-		    host->caps2 & MMC_CAP2_HS200)
+		if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)
 			err = mmc_select_hs200(card);
-		else if	(host->caps & MMC_CAP_MMC_HIGHSPEED)
+		else if	(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS)
 			err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 					EXT_CSD_HS_TIMING, 1,
 					card->ext_csd.generic_cmd6_time,
@@ -1081,13 +1096,11 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 			       mmc_hostname(card->host));
 			err = 0;
 		} else {
-			if (card->ext_csd.hs_max_dtr > 52000000 &&
-			    host->caps2 & MMC_CAP2_HS200) {
+			if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)
 				mmc_set_timing(card->host,
 					       MMC_TIMING_MMC_HS200);
-			} else {
+			else
 				mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
-			}
 		}
 	}
 
@@ -1110,14 +1123,8 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	/*
 	 * Indicate DDR mode (if supported).
 	 */
-	if (mmc_card_hs(card)) {
-		if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V)
-			&& (host->caps & MMC_CAP_1_8V_DDR))
-				ddr = MMC_1_8V_DDR_MODE;
-		else if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_2V)
-			&& (host->caps & MMC_CAP_1_2V_DDR))
-				ddr = MMC_1_2V_DDR_MODE;
-	}
+	if (mmc_card_hs(card))
+		ddr = card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_52;
 
 	/*
 	 * Indicate HS200 SDR mode (if supported).
@@ -1137,8 +1144,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		 * 3. set the clock to > 52Mhz <=200MHz and
 		 * 4. execute tuning for HS200
 		 */
-		if ((host->caps2 & MMC_CAP2_HS200) &&
-		    card->host->ops->execute_tuning) {
+		if (card->host->ops->execute_tuning) {
 			mmc_host_clk_hold(card->host);
 			err = card->host->ops->execute_tuning(card->host,
 				MMC_SEND_TUNING_BLOCK_HS200);
@@ -1247,7 +1253,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 			 *
 			 * WARNING: eMMC rules are NOT the same as SD DDR
 			 */
-			if (ddr == MMC_1_2V_DDR_MODE) {
+			if (ddr & EXT_CSD_CARD_TYPE_DDR_1_2V) {
 				err = __mmc_set_signal_voltage(host,
 					MMC_SIGNAL_VOLTAGE_120);
 				if (err)
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index aadeaf155d0e..fe31f8d89a03 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -68,7 +68,6 @@ struct mmc_ext_csd {
 #define MMC_HIGH_DDR_MAX_DTR	52000000
 #define MMC_HS200_MAX_DTR	200000000
 	unsigned int		sectors;
-	unsigned int		card_type;
 	unsigned int		hc_erase_size;		/* In sectors */
 	unsigned int		hc_erase_timeout;	/* In milliseconds */
 	unsigned int		sec_trim_mult;	/* Secure trim multiplier  */
@@ -298,6 +297,7 @@ struct mmc_card {
 	struct sdio_func_tuple	*tuples;	/* unknown common tuples */
 
 	unsigned int		sd_bus_speed;	/* Bus Speed Mode set for the card */
+	unsigned int		mmc_avail_type;	/* supported device type by both host and card */
 
 	struct dentry		*debugfs_root;
 	struct mmc_part	part[MMC_NUM_PHY_PARTITION]; /* physical partitions */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index a43853779799..6b1e9ee6ca10 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -62,12 +62,6 @@ struct mmc_ios {
 #define MMC_TIMING_MMC_DDR52	8
 #define MMC_TIMING_MMC_HS200	9
 
-#define MMC_SDR_MODE		0
-#define MMC_1_2V_DDR_MODE	1
-#define MMC_1_8V_DDR_MODE	2
-#define MMC_1_2V_SDR_MODE	3
-#define MMC_1_8V_SDR_MODE	4
-
 	unsigned char	signal_voltage;		/* signalling voltage (1.8V or 3.3V) */
 
 #define MMC_SIGNAL_VOLTAGE_330	0
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 50bcde3677ca..f734c0c64575 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -354,18 +354,22 @@ struct _mmc_csd {
 #define EXT_CSD_CMD_SET_SECURE		(1<<1)
 #define EXT_CSD_CMD_SET_CPSECURE	(1<<2)
 
-#define EXT_CSD_CARD_TYPE_26	(1<<0)	/* Card can run at 26MHz */
-#define EXT_CSD_CARD_TYPE_52	(1<<1)	/* Card can run at 52MHz */
 #define EXT_CSD_CARD_TYPE_MASK	0x3F	/* Mask out reserved bits */
+#define EXT_CSD_CARD_TYPE_HS_26	(1<<0)	/* Card can run at 26MHz */
+#define EXT_CSD_CARD_TYPE_HS_52	(1<<1)	/* Card can run at 52MHz */
+#define EXT_CSD_CARD_TYPE_HS	(EXT_CSD_CARD_TYPE_HS_26 | \
+				 EXT_CSD_CARD_TYPE_HS_52)
 #define EXT_CSD_CARD_TYPE_DDR_1_8V  (1<<2)   /* Card can run at 52MHz */
 					     /* DDR mode @1.8V or 3V I/O */
 #define EXT_CSD_CARD_TYPE_DDR_1_2V  (1<<3)   /* Card can run at 52MHz */
 					     /* DDR mode @1.2V I/O */
 #define EXT_CSD_CARD_TYPE_DDR_52       (EXT_CSD_CARD_TYPE_DDR_1_8V  \
 					| EXT_CSD_CARD_TYPE_DDR_1_2V)
-#define EXT_CSD_CARD_TYPE_SDR_1_8V	(1<<4)	/* Card can run at 200MHz */
-#define EXT_CSD_CARD_TYPE_SDR_1_2V	(1<<5)	/* Card can run at 200MHz */
+#define EXT_CSD_CARD_TYPE_HS200_1_8V	(1<<4)	/* Card can run at 200MHz */
+#define EXT_CSD_CARD_TYPE_HS200_1_2V	(1<<5)	/* Card can run at 200MHz */
 						/* SDR mode @1.2V I/O */
+#define EXT_CSD_CARD_TYPE_HS200		(EXT_CSD_CARD_TYPE_HS200_1_8V | \
+					 EXT_CSD_CARD_TYPE_HS200_1_2V)
 
 #define EXT_CSD_BUS_WIDTH_1	0	/* Card is in 1 bit mode */
 #define EXT_CSD_BUS_WIDTH_4	1	/* Card is in 4 bit mode */
-- 
cgit v1.2.3-71-gd317


From 577fb13199b11d8cd75609183649be4b5561243f Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Wed, 23 Apr 2014 17:08:44 +0900
Subject: mmc: rework selection of bus speed mode

Current implementation for bus speed mode selection is too
complicated. This patch is to simplify the codes and remove
some duplicate parts.

The following changes are including:
* Adds functions for each mode selection(HS, HS-DDR, HS200 and etc)
* Rearranged the mode selection sequence with supported device type
* Adds maximum speed for HS200 mode(hs200_max_dtr)
* Adds field definition for HS_TIMING of EXT_CSD

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Tested-by: Jaehoon Chung <jh80.chung@samsung.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/debugfs.c |   2 +-
 drivers/mmc/core/mmc.c     | 431 ++++++++++++++++++++++++---------------------
 include/linux/mmc/card.h   |   1 +
 include/linux/mmc/mmc.h    |   4 +
 4 files changed, 238 insertions(+), 200 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 509229b48b55..1f730dbfaeea 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -139,7 +139,7 @@ static int mmc_ios_show(struct seq_file *s, void *data)
 		str = "mmc DDR52";
 		break;
 	case MMC_TIMING_MMC_HS200:
-		str = "mmc high-speed SDR200";
+		str = "mmc HS200";
 		break;
 	default:
 		str = "invalid";
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 4538541ac5ab..bec6786efd19 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -242,7 +242,7 @@ static void mmc_select_card_type(struct mmc_card *card)
 	struct mmc_host *host = card->host;
 	u8 card_type = card->ext_csd.raw_card_type & EXT_CSD_CARD_TYPE_MASK;
 	u32 caps = host->caps, caps2 = host->caps2;
-	unsigned int hs_max_dtr = 0;
+	unsigned int hs_max_dtr = 0, hs200_max_dtr = 0;
 	unsigned int avail_type = 0;
 
 	if (caps & MMC_CAP_MMC_HIGHSPEED &&
@@ -271,17 +271,18 @@ static void mmc_select_card_type(struct mmc_card *card)
 
 	if (caps2 & MMC_CAP2_HS200_1_8V_SDR &&
 	    card_type & EXT_CSD_CARD_TYPE_HS200_1_8V) {
-		hs_max_dtr = MMC_HS200_MAX_DTR;
+		hs200_max_dtr = MMC_HS200_MAX_DTR;
 		avail_type |= EXT_CSD_CARD_TYPE_HS200_1_8V;
 	}
 
 	if (caps2 & MMC_CAP2_HS200_1_2V_SDR &&
 	    card_type & EXT_CSD_CARD_TYPE_HS200_1_2V) {
-		hs_max_dtr = MMC_HS200_MAX_DTR;
+		hs200_max_dtr = MMC_HS200_MAX_DTR;
 		avail_type |= EXT_CSD_CARD_TYPE_HS200_1_2V;
 	}
 
 	card->ext_csd.hs_max_dtr = hs_max_dtr;
+	card->ext_csd.hs200_max_dtr = hs200_max_dtr;
 	card->mmc_avail_type = avail_type;
 }
 
@@ -825,37 +826,46 @@ static int mmc_select_powerclass(struct mmc_card *card)
 }
 
 /*
- * Selects the desired buswidth and switch to the HS200 mode
- * if bus width set without error
+ * Set the bus speed for the selected speed mode.
  */
-static int mmc_select_hs200(struct mmc_card *card)
+static void mmc_set_bus_speed(struct mmc_card *card)
+{
+	unsigned int max_dtr = (unsigned int)-1;
+
+	if (mmc_card_hs200(card) && max_dtr > card->ext_csd.hs200_max_dtr)
+		max_dtr = card->ext_csd.hs200_max_dtr;
+	else if (mmc_card_hs(card) && max_dtr > card->ext_csd.hs_max_dtr)
+		max_dtr = card->ext_csd.hs_max_dtr;
+	else if (max_dtr > card->csd.max_dtr)
+		max_dtr = card->csd.max_dtr;
+
+	mmc_set_clock(card->host, max_dtr);
+}
+
+/*
+ * Select the bus width amoung 4-bit and 8-bit(SDR).
+ * If the bus width is changed successfully, return the selected width value.
+ * Zero is returned instead of error value if the wide width is not supported.
+ */
+static int mmc_select_bus_width(struct mmc_card *card)
 {
-	int idx, err = -EINVAL;
-	struct mmc_host *host;
 	static unsigned ext_csd_bits[] = {
-		EXT_CSD_BUS_WIDTH_4,
 		EXT_CSD_BUS_WIDTH_8,
+		EXT_CSD_BUS_WIDTH_4,
 	};
 	static unsigned bus_widths[] = {
-		MMC_BUS_WIDTH_4,
 		MMC_BUS_WIDTH_8,
+		MMC_BUS_WIDTH_4,
 	};
+	struct mmc_host *host = card->host;
+	unsigned idx, bus_width = 0;
+	int err = 0;
 
-	BUG_ON(!card);
-
-	host = card->host;
-
-	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V)
-		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120);
-
-	if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_8V)
-		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
-
-	/* If fails try again during next card power cycle */
-	if (err)
-		goto err;
+	if ((card->csd.mmca_vsn < CSD_SPEC_VER_4) &&
+	    !(host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA)))
+		return 0;
 
-	idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 1 : 0;
+	idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 0 : 1;
 
 	/*
 	 * Unlike SD, MMC cards dont have a configuration register to notify
@@ -863,8 +873,7 @@ static int mmc_select_hs200(struct mmc_card *card)
 	 * the supported bus width or compare the ext csd values of current
 	 * bus width and ext csd values of 1 bit mode read earlier.
 	 */
-	for (; idx >= 0; idx--) {
-
+	for (; idx < ARRAY_SIZE(bus_widths); idx++) {
 		/*
 		 * Host is capable of 8bit transfer, then switch
 		 * the device to work in 8bit transfer mode. If the
@@ -879,26 +888,201 @@ static int mmc_select_hs200(struct mmc_card *card)
 		if (err)
 			continue;
 
-		mmc_set_bus_width(card->host, bus_widths[idx]);
+		bus_width = bus_widths[idx];
+		mmc_set_bus_width(host, bus_width);
 
+		/*
+		 * If controller can't handle bus width test,
+		 * compare ext_csd previously read in 1 bit mode
+		 * against ext_csd at new bus width
+		 */
 		if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
-			err = mmc_compare_ext_csds(card, bus_widths[idx]);
+			err = mmc_compare_ext_csds(card, bus_width);
 		else
-			err = mmc_bus_test(card, bus_widths[idx]);
-		if (!err)
+			err = mmc_bus_test(card, bus_width);
+
+		if (!err) {
+			err = bus_width;
 			break;
+		} else {
+			pr_warn("%s: switch to bus width %d failed\n",
+				mmc_hostname(host), ext_csd_bits[idx]);
+		}
 	}
 
-	/* switch to HS200 mode if bus width set successfully */
+	return err;
+}
+
+/*
+ * Switch to the high-speed mode
+ */
+static int mmc_select_hs(struct mmc_card *card)
+{
+	int err;
+
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			   EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS,
+			   card->ext_csd.generic_cmd6_time,
+			   true, true, true);
 	if (!err)
+		mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+
+	return err;
+}
+
+/*
+ * Activate wide bus and DDR if supported.
+ */
+static int mmc_select_hs_ddr(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	u32 bus_width, ext_csd_bits;
+	int err = 0;
+
+	if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_52))
+		return 0;
+
+	bus_width = host->ios.bus_width;
+	if (bus_width == MMC_BUS_WIDTH_1)
+		return 0;
+
+	ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
+		EXT_CSD_DDR_BUS_WIDTH_8 : EXT_CSD_DDR_BUS_WIDTH_4;
+
+	err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			EXT_CSD_BUS_WIDTH,
+			ext_csd_bits,
+			card->ext_csd.generic_cmd6_time);
+	if (err) {
+		pr_warn("%s: switch to bus width %d ddr failed\n",
+			mmc_hostname(host), 1 << bus_width);
+		return err;
+	}
+
+	/*
+	 * eMMC cards can support 3.3V to 1.2V i/o (vccq)
+	 * signaling.
+	 *
+	 * EXT_CSD_CARD_TYPE_DDR_1_8V means 3.3V or 1.8V vccq.
+	 *
+	 * 1.8V vccq at 3.3V core voltage (vcc) is not required
+	 * in the JEDEC spec for DDR.
+	 *
+	 * Do not force change in vccq since we are obviously
+	 * working and no change to vccq is needed.
+	 *
+	 * WARNING: eMMC rules are NOT the same as SD DDR
+	 */
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_1_2V) {
+		err = __mmc_set_signal_voltage(host,
+				MMC_SIGNAL_VOLTAGE_120);
+		if (err)
+			return err;
+	}
+
+	mmc_set_timing(host, MMC_TIMING_MMC_DDR52);
+
+	return err;
+}
+
+/*
+ * For device supporting HS200 mode, the following sequence
+ * should be done before executing the tuning process.
+ * 1. set the desired bus width(4-bit or 8-bit, 1-bit is not supported)
+ * 2. switch to HS200 mode
+ * 3. set the clock to > 52Mhz and <=200MHz
+ */
+static int mmc_select_hs200(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	int err = -EINVAL;
+
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V)
+		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120);
+
+	if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_8V)
+		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
+
+	/* If fails try again during next card power cycle */
+	if (err)
+		goto err;
+
+	/*
+	 * Set the bus width(4 or 8) with host's support and
+	 * switch to HS200 mode if bus width is set successfully.
+	 */
+	err = mmc_select_bus_width(card);
+	if (!IS_ERR_VALUE(err)) {
 		err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-				EXT_CSD_HS_TIMING, 2,
-				card->ext_csd.generic_cmd6_time,
-				true, true, true);
+				   EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS200,
+				   card->ext_csd.generic_cmd6_time,
+				   true, true, true);
+		if (!err)
+			mmc_set_timing(host, MMC_TIMING_MMC_HS200);
+	}
 err:
 	return err;
 }
 
+/*
+ * Activate High Speed or HS200 mode if supported.
+ */
+static int mmc_select_timing(struct mmc_card *card)
+{
+	int err = 0;
+
+	if ((card->csd.mmca_vsn < CSD_SPEC_VER_4 &&
+	     card->ext_csd.hs_max_dtr == 0))
+		goto bus_speed;
+
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)
+		err = mmc_select_hs200(card);
+	else if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS)
+		err = mmc_select_hs(card);
+
+	if (err && err != -EBADMSG)
+		return err;
+
+	if (err) {
+		pr_warn("%s: switch to %s failed\n",
+			mmc_card_hs(card) ? "high-speed" :
+			(mmc_card_hs200(card) ? "hs200" : ""),
+			mmc_hostname(card->host));
+		err = 0;
+	}
+
+bus_speed:
+	/*
+	 * Set the bus speed to the selected bus timing.
+	 * If timing is not selected, backward compatible is the default.
+	 */
+	mmc_set_bus_speed(card);
+	return err;
+}
+
+/*
+ * Execute tuning sequence to seek the proper bus operating
+ * conditions for HS200, which sends CMD21 to the device.
+ */
+static int mmc_hs200_tuning(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	int err = 0;
+
+	if (host->ops->execute_tuning) {
+		mmc_host_clk_hold(host);
+		err = host->ops->execute_tuning(host,
+				MMC_SEND_TUNING_BLOCK_HS200);
+		mmc_host_clk_release(host);
+
+		if (err)
+			pr_warn("%s: tuning execution failed\n",
+				mmc_hostname(host));
+	}
+
+	return err;
+}
+
 /*
  * Handle the detection and initialisation of a card.
  *
@@ -909,9 +1093,8 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	struct mmc_card *oldcard)
 {
 	struct mmc_card *card;
-	int err, ddr = 0;
+	int err;
 	u32 cid[4];
-	unsigned int max_dtr;
 	u32 rocr;
 	u8 *ext_csd = NULL;
 
@@ -1103,173 +1286,23 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	}
 
 	/*
-	 * Activate high speed (if supported)
-	 */
-	if (card->ext_csd.hs_max_dtr != 0) {
-		err = 0;
-		if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)
-			err = mmc_select_hs200(card);
-		else if	(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS)
-			err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-					EXT_CSD_HS_TIMING, 1,
-					card->ext_csd.generic_cmd6_time,
-					true, true, true);
-
-		if (err && err != -EBADMSG)
-			goto free_card;
-
-		if (err) {
-			pr_warning("%s: switch to highspeed failed\n",
-			       mmc_hostname(card->host));
-			err = 0;
-		} else {
-			if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)
-				mmc_set_timing(card->host,
-					       MMC_TIMING_MMC_HS200);
-			else
-				mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
-		}
-	}
-
-	/*
-	 * Compute bus speed.
-	 */
-	max_dtr = (unsigned int)-1;
-
-	if (mmc_card_hs(card) || mmc_card_hs200(card)) {
-		if (max_dtr > card->ext_csd.hs_max_dtr)
-			max_dtr = card->ext_csd.hs_max_dtr;
-		if (mmc_card_hs(card) && (max_dtr > 52000000))
-			max_dtr = 52000000;
-	} else if (max_dtr > card->csd.max_dtr) {
-		max_dtr = card->csd.max_dtr;
-	}
-
-	mmc_set_clock(host, max_dtr);
-
-	/*
-	 * Indicate DDR mode (if supported).
+	 * Select timing interface
 	 */
-	if (mmc_card_hs(card))
-		ddr = card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_52;
+	err = mmc_select_timing(card);
+	if (err)
+		goto free_card;
 
-	/*
-	 * Indicate HS200 SDR mode (if supported).
-	 */
 	if (mmc_card_hs200(card)) {
-		u32 ext_csd_bits;
-		u32 bus_width = card->host->ios.bus_width;
-
-		/*
-		 * For devices supporting HS200 mode, the bus width has
-		 * to be set before executing the tuning function. If
-		 * set before tuning, then device will respond with CRC
-		 * errors for responses on CMD line. So for HS200 the
-		 * sequence will be
-		 * 1. set bus width 4bit / 8 bit (1 bit not supported)
-		 * 2. switch to HS200 mode
-		 * 3. set the clock to > 52Mhz <=200MHz and
-		 * 4. execute tuning for HS200
-		 */
-		if (card->host->ops->execute_tuning) {
-			mmc_host_clk_hold(card->host);
-			err = card->host->ops->execute_tuning(card->host,
-				MMC_SEND_TUNING_BLOCK_HS200);
-			mmc_host_clk_release(card->host);
-		}
-		if (err) {
-			pr_warning("%s: tuning execution failed\n",
-				   mmc_hostname(card->host));
+		err = mmc_hs200_tuning(card);
+		if (err)
 			goto err;
-		}
-
-		ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
-				EXT_CSD_BUS_WIDTH_8 : EXT_CSD_BUS_WIDTH_4;
-	}
-
-	/*
-	 * Activate wide bus and DDR (if supported).
-	 */
-	if (!mmc_card_hs200(card) &&
-	    (card->csd.mmca_vsn >= CSD_SPEC_VER_4) &&
-	    (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) {
-		static unsigned ext_csd_bits[][2] = {
-			{ EXT_CSD_BUS_WIDTH_8, EXT_CSD_DDR_BUS_WIDTH_8 },
-			{ EXT_CSD_BUS_WIDTH_4, EXT_CSD_DDR_BUS_WIDTH_4 },
-			{ EXT_CSD_BUS_WIDTH_1, EXT_CSD_BUS_WIDTH_1 },
-		};
-		static unsigned bus_widths[] = {
-			MMC_BUS_WIDTH_8,
-			MMC_BUS_WIDTH_4,
-			MMC_BUS_WIDTH_1
-		};
-		unsigned idx, bus_width = 0;
-
-		if (host->caps & MMC_CAP_8_BIT_DATA)
-			idx = 0;
-		else
-			idx = 1;
-		for (; idx < ARRAY_SIZE(bus_widths); idx++) {
-			bus_width = bus_widths[idx];
-			if (bus_width == MMC_BUS_WIDTH_1)
-				ddr = 0; /* no DDR for 1-bit width */
-
-			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-					 EXT_CSD_BUS_WIDTH,
-					 ext_csd_bits[idx][0],
-					 card->ext_csd.generic_cmd6_time);
-			if (!err) {
-				mmc_set_bus_width(card->host, bus_width);
-
-				/*
-				 * If controller can't handle bus width test,
-				 * compare ext_csd previously read in 1 bit mode
-				 * against ext_csd at new bus width
-				 */
-				if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
-					err = mmc_compare_ext_csds(card,
-						bus_width);
-				else
-					err = mmc_bus_test(card, bus_width);
-				if (!err)
-					break;
-			}
-		}
-
-		if (!err && ddr) {
-			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-					 EXT_CSD_BUS_WIDTH,
-					 ext_csd_bits[idx][1],
-					 card->ext_csd.generic_cmd6_time);
-		}
-		if (err) {
-			pr_warning("%s: switch to bus width %d ddr %d "
-				"failed\n", mmc_hostname(card->host),
-				1 << bus_width, ddr);
-			goto free_card;
-		} else if (ddr) {
-			/*
-			 * eMMC cards can support 3.3V to 1.2V i/o (vccq)
-			 * signaling.
-			 *
-			 * EXT_CSD_CARD_TYPE_DDR_1_8V means 3.3V or 1.8V vccq.
-			 *
-			 * 1.8V vccq at 3.3V core voltage (vcc) is not required
-			 * in the JEDEC spec for DDR.
-			 *
-			 * Do not force change in vccq since we are obviously
-			 * working and no change to vccq is needed.
-			 *
-			 * WARNING: eMMC rules are NOT the same as SD DDR
-			 */
-			if (ddr & EXT_CSD_CARD_TYPE_DDR_1_2V) {
-				err = __mmc_set_signal_voltage(host,
-					MMC_SIGNAL_VOLTAGE_120);
-				if (err)
-					goto err;
-			}
-			mmc_set_timing(card->host, MMC_TIMING_MMC_DDR52);
-			mmc_set_bus_width(card->host, bus_width);
+	} else if (mmc_card_hs(card)) {
+		/* Select the desired bus width optionally */
+		err = mmc_select_bus_width(card);
+		if (!IS_ERR_VALUE(err)) {
+			err = mmc_select_hs_ddr(card);
+			if (err)
+				goto err;
 		}
 	}
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index fe31f8d89a03..176073692872 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -63,6 +63,7 @@ struct mmc_ext_csd {
 	unsigned int            power_off_longtime;     /* Units: ms */
 	u8			power_off_notification;	/* state */
 	unsigned int		hs_max_dtr;
+	unsigned int		hs200_max_dtr;
 #define MMC_HIGH_26_MAX_DTR	26000000
 #define MMC_HIGH_52_MAX_DTR	52000000
 #define MMC_HIGH_DDR_MAX_DTR	52000000
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index f734c0c64575..f429f13be433 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -377,6 +377,10 @@ struct _mmc_csd {
 #define EXT_CSD_DDR_BUS_WIDTH_4	5	/* Card is in 4 bit DDR mode */
 #define EXT_CSD_DDR_BUS_WIDTH_8	6	/* Card is in 8 bit DDR mode */
 
+#define EXT_CSD_TIMING_BC	0	/* Backwards compatility */
+#define EXT_CSD_TIMING_HS	1	/* High speed */
+#define EXT_CSD_TIMING_HS200	2	/* HS200 */
+
 #define EXT_CSD_SEC_ER_EN	BIT(0)
 #define EXT_CSD_SEC_BD_BLK_EN	BIT(2)
 #define EXT_CSD_SEC_GB_CL_EN	BIT(4)
-- 
cgit v1.2.3-71-gd317


From 0a5b6438ee482696360bb013e67b8488f63d3e9e Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Wed, 23 Apr 2014 17:14:58 +0900
Subject: mmc: add support for HS400 mode of eMMC5.0

This patch adds HS400 mode support for eMMC5.0 device.  HS400 mode is high
speed DDR interface timing from HS200.  Clock frequency is up to 200MHz
and only 8-bit bus width is supported. In addition, tuning process of
HS200 is required to synchronize the command response on the CMD line
because CMD input timing for HS400 mode is the same as HS200 mode.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Reviewed-by: Jackey Shen <jackey.shen@amd.com>
Tested-by: Jaehoon Chung <jh80.chung@samsung.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/bus.c     |  1 +
 drivers/mmc/core/debugfs.c |  3 ++
 drivers/mmc/core/mmc.c     | 98 +++++++++++++++++++++++++++++++++++++++++++---
 include/linux/mmc/card.h   |  1 +
 include/linux/mmc/host.h   | 14 +++++++
 include/linux/mmc/mmc.h    |  7 +++-
 6 files changed, 118 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index f37e9d6af84a..d2dbf02022bd 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -349,6 +349,7 @@ int mmc_add_card(struct mmc_card *card)
 			mmc_hostname(card->host),
 			mmc_card_uhs(card) ? "ultra high speed " :
 			(mmc_card_hs(card) ? "high speed " : ""),
+			mmc_card_hs400(card) ? "HS400 " :
 			(mmc_card_hs200(card) ? "HS200 " : ""),
 			mmc_card_ddr52(card) ? "DDR " : "",
 			uhs_bus_speed_mode, type, card->rca);
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 1f730dbfaeea..91eb16223246 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -141,6 +141,9 @@ static int mmc_ios_show(struct seq_file *s, void *data)
 	case MMC_TIMING_MMC_HS200:
 		str = "mmc HS200";
 		break;
+	case MMC_TIMING_MMC_HS400:
+		str = "mmc HS400";
+		break;
 	default:
 		str = "invalid";
 		break;
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index bec6786efd19..793c6f7ddb04 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -240,7 +240,7 @@ static int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd)
 static void mmc_select_card_type(struct mmc_card *card)
 {
 	struct mmc_host *host = card->host;
-	u8 card_type = card->ext_csd.raw_card_type & EXT_CSD_CARD_TYPE_MASK;
+	u8 card_type = card->ext_csd.raw_card_type;
 	u32 caps = host->caps, caps2 = host->caps2;
 	unsigned int hs_max_dtr = 0, hs200_max_dtr = 0;
 	unsigned int avail_type = 0;
@@ -281,6 +281,18 @@ static void mmc_select_card_type(struct mmc_card *card)
 		avail_type |= EXT_CSD_CARD_TYPE_HS200_1_2V;
 	}
 
+	if (caps2 & MMC_CAP2_HS400_1_8V &&
+	    card_type & EXT_CSD_CARD_TYPE_HS400_1_8V) {
+		hs200_max_dtr = MMC_HS200_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS400_1_8V;
+	}
+
+	if (caps2 & MMC_CAP2_HS400_1_2V &&
+	    card_type & EXT_CSD_CARD_TYPE_HS400_1_2V) {
+		hs200_max_dtr = MMC_HS200_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS400_1_2V;
+	}
+
 	card->ext_csd.hs_max_dtr = hs_max_dtr;
 	card->ext_csd.hs200_max_dtr = hs200_max_dtr;
 	card->mmc_avail_type = avail_type;
@@ -499,6 +511,8 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 			ext_csd[EXT_CSD_PWR_CL_DDR_52_195];
 		card->ext_csd.raw_pwr_cl_ddr_52_360 =
 			ext_csd[EXT_CSD_PWR_CL_DDR_52_360];
+		card->ext_csd.raw_pwr_cl_ddr_200_360 =
+			ext_csd[EXT_CSD_PWR_CL_DDR_200_360];
 	}
 
 	if (card->ext_csd.rev >= 5) {
@@ -665,7 +679,10 @@ static int mmc_compare_ext_csds(struct mmc_card *card, unsigned bus_width)
 		(card->ext_csd.raw_pwr_cl_ddr_52_195 ==
 			bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_195]) &&
 		(card->ext_csd.raw_pwr_cl_ddr_52_360 ==
-			bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_360]));
+			bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_360]) &&
+		(card->ext_csd.raw_pwr_cl_ddr_200_360 ==
+			bw_ext_csd[EXT_CSD_PWR_CL_DDR_200_360]));
+
 	if (err)
 		err = -EINVAL;
 
@@ -768,7 +785,9 @@ static int __mmc_select_powerclass(struct mmc_card *card,
 				ext_csd->raw_pwr_cl_52_360 :
 				ext_csd->raw_pwr_cl_ddr_52_360;
 		else if (host->ios.clock <= MMC_HS200_MAX_DTR)
-			pwrclass_val = ext_csd->raw_pwr_cl_200_360;
+			pwrclass_val = (bus_width == EXT_CSD_DDR_BUS_WIDTH_8) ?
+				ext_csd->raw_pwr_cl_ddr_200_360 :
+				ext_csd->raw_pwr_cl_200_360;
 		break;
 	default:
 		pr_warning("%s: Voltage range not supported "
@@ -832,7 +851,8 @@ static void mmc_set_bus_speed(struct mmc_card *card)
 {
 	unsigned int max_dtr = (unsigned int)-1;
 
-	if (mmc_card_hs200(card) && max_dtr > card->ext_csd.hs200_max_dtr)
+	if ((mmc_card_hs200(card) || mmc_card_hs400(card)) &&
+	     max_dtr > card->ext_csd.hs200_max_dtr)
 		max_dtr = card->ext_csd.hs200_max_dtr;
 	else if (mmc_card_hs(card) && max_dtr > card->ext_csd.hs_max_dtr)
 		max_dtr = card->ext_csd.hs_max_dtr;
@@ -985,6 +1005,61 @@ static int mmc_select_hs_ddr(struct mmc_card *card)
 	return err;
 }
 
+static int mmc_select_hs400(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	int err = 0;
+
+	/*
+	 * HS400 mode requires 8-bit bus width
+	 */
+	if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
+	      host->ios.bus_width == MMC_BUS_WIDTH_8))
+		return 0;
+
+	/*
+	 * Before switching to dual data rate operation for HS400,
+	 * it is required to convert from HS200 mode to HS mode.
+	 */
+	mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+	mmc_set_bus_speed(card);
+
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			   EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS,
+			   card->ext_csd.generic_cmd6_time,
+			   true, true, true);
+	if (err) {
+		pr_warn("%s: switch to high-speed from hs200 failed, err:%d\n",
+			mmc_hostname(host), err);
+		return err;
+	}
+
+	err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			 EXT_CSD_BUS_WIDTH,
+			 EXT_CSD_DDR_BUS_WIDTH_8,
+			 card->ext_csd.generic_cmd6_time);
+	if (err) {
+		pr_warn("%s: switch to bus width for hs400 failed, err:%d\n",
+			mmc_hostname(host), err);
+		return err;
+	}
+
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			   EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS400,
+			   card->ext_csd.generic_cmd6_time,
+			   true, true, true);
+	if (err) {
+		pr_warn("%s: switch to hs400 failed, err:%d\n",
+			 mmc_hostname(host), err);
+		return err;
+	}
+
+	mmc_set_timing(host, MMC_TIMING_MMC_HS400);
+	mmc_set_bus_speed(card);
+
+	return 0;
+}
+
 /*
  * For device supporting HS200 mode, the following sequence
  * should be done before executing the tuning process.
@@ -1062,13 +1137,22 @@ bus_speed:
 
 /*
  * Execute tuning sequence to seek the proper bus operating
- * conditions for HS200, which sends CMD21 to the device.
+ * conditions for HS200 and HS400, which sends CMD21 to the device.
  */
 static int mmc_hs200_tuning(struct mmc_card *card)
 {
 	struct mmc_host *host = card->host;
 	int err = 0;
 
+	/*
+	 * Timing should be adjusted to the HS400 target
+	 * operation frequency for tuning process
+	 */
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
+	    host->ios.bus_width == MMC_BUS_WIDTH_8)
+		if (host->ops->prepare_hs400_tuning)
+			host->ops->prepare_hs400_tuning(host, &host->ios);
+
 	if (host->ops->execute_tuning) {
 		mmc_host_clk_hold(host);
 		err = host->ops->execute_tuning(host,
@@ -1296,6 +1380,10 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		err = mmc_hs200_tuning(card);
 		if (err)
 			goto err;
+
+		err = mmc_select_hs400(card);
+		if (err)
+			goto err;
 	} else if (mmc_card_hs(card)) {
 		/* Select the desired bus width optionally */
 		err = mmc_select_bus_width(card);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 176073692872..d424b9de3aff 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -110,6 +110,7 @@ struct mmc_ext_csd {
 	u8			raw_pwr_cl_200_360;	/* 237 */
 	u8			raw_pwr_cl_ddr_52_195;	/* 238 */
 	u8			raw_pwr_cl_ddr_52_360;	/* 239 */
+	u8			raw_pwr_cl_ddr_200_360;	/* 253 */
 	u8			raw_bkops_status;	/* 246 */
 	u8			raw_sectors[4];		/* 212 - 4 bytes */
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 6b1e9ee6ca10..183087374215 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -61,6 +61,7 @@ struct mmc_ios {
 #define MMC_TIMING_UHS_DDR50	7
 #define MMC_TIMING_MMC_DDR52	8
 #define MMC_TIMING_MMC_HS200	9
+#define MMC_TIMING_MMC_HS400	10
 
 	unsigned char	signal_voltage;		/* signalling voltage (1.8V or 3.3V) */
 
@@ -132,6 +133,9 @@ struct mmc_host_ops {
 
 	/* The tuning command opcode value is different for SD and eMMC cards */
 	int	(*execute_tuning)(struct mmc_host *host, u32 opcode);
+
+	/* Prepare HS400 target operating frequency depending host driver */
+	int	(*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios);
 	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv);
 	void	(*hw_reset)(struct mmc_host *host);
 	void	(*card_event)(struct mmc_host *host);
@@ -274,6 +278,10 @@ struct mmc_host {
 #define MMC_CAP2_PACKED_CMD	(MMC_CAP2_PACKED_RD | \
 				 MMC_CAP2_PACKED_WR)
 #define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14)	/* Don't power up before scan */
+#define MMC_CAP2_HS400_1_8V	(1 << 15)	/* Can support HS400 1.8V */
+#define MMC_CAP2_HS400_1_2V	(1 << 16)	/* Can support HS400 1.2V */
+#define MMC_CAP2_HS400		(MMC_CAP2_HS400_1_8V | \
+				 MMC_CAP2_HS400_1_2V)
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
@@ -495,4 +503,10 @@ static inline bool mmc_card_ddr52(struct mmc_card *card)
 {
 	return card->host->ios.timing == MMC_TIMING_MMC_DDR52;
 }
+
+static inline bool mmc_card_hs400(struct mmc_card *card)
+{
+	return card->host->ios.timing == MMC_TIMING_MMC_HS400;
+}
+
 #endif /* LINUX_MMC_HOST_H */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index f429f13be433..64ec963ed347 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -325,6 +325,7 @@ struct _mmc_csd {
 #define EXT_CSD_POWER_OFF_LONG_TIME	247	/* RO */
 #define EXT_CSD_GENERIC_CMD6_TIME	248	/* RO */
 #define EXT_CSD_CACHE_SIZE		249	/* RO, 4 bytes */
+#define EXT_CSD_PWR_CL_DDR_200_360	253	/* RO */
 #define EXT_CSD_TAG_UNIT_SIZE		498	/* RO */
 #define EXT_CSD_DATA_TAG_SUPPORT	499	/* RO */
 #define EXT_CSD_MAX_PACKED_WRITES	500	/* RO */
@@ -354,7 +355,6 @@ struct _mmc_csd {
 #define EXT_CSD_CMD_SET_SECURE		(1<<1)
 #define EXT_CSD_CMD_SET_CPSECURE	(1<<2)
 
-#define EXT_CSD_CARD_TYPE_MASK	0x3F	/* Mask out reserved bits */
 #define EXT_CSD_CARD_TYPE_HS_26	(1<<0)	/* Card can run at 26MHz */
 #define EXT_CSD_CARD_TYPE_HS_52	(1<<1)	/* Card can run at 52MHz */
 #define EXT_CSD_CARD_TYPE_HS	(EXT_CSD_CARD_TYPE_HS_26 | \
@@ -370,6 +370,10 @@ struct _mmc_csd {
 						/* SDR mode @1.2V I/O */
 #define EXT_CSD_CARD_TYPE_HS200		(EXT_CSD_CARD_TYPE_HS200_1_8V | \
 					 EXT_CSD_CARD_TYPE_HS200_1_2V)
+#define EXT_CSD_CARD_TYPE_HS400_1_8V	(1<<6)	/* Card can run at 200MHz DDR, 1.8V */
+#define EXT_CSD_CARD_TYPE_HS400_1_2V	(1<<7)	/* Card can run at 200MHz DDR, 1.2V */
+#define EXT_CSD_CARD_TYPE_HS400		(EXT_CSD_CARD_TYPE_HS400_1_8V | \
+					 EXT_CSD_CARD_TYPE_HS400_1_2V)
 
 #define EXT_CSD_BUS_WIDTH_1	0	/* Card is in 1 bit mode */
 #define EXT_CSD_BUS_WIDTH_4	1	/* Card is in 4 bit mode */
@@ -380,6 +384,7 @@ struct _mmc_csd {
 #define EXT_CSD_TIMING_BC	0	/* Backwards compatility */
 #define EXT_CSD_TIMING_HS	1	/* High speed */
 #define EXT_CSD_TIMING_HS200	2	/* HS200 */
+#define EXT_CSD_TIMING_HS400	3	/* HS400 */
 
 #define EXT_CSD_SEC_ER_EN	BIT(0)
 #define EXT_CSD_SEC_BD_BLK_EN	BIT(2)
-- 
cgit v1.2.3-71-gd317


From 4d1f52f9a9f9a63371dba589093b3ae90fc80c3d Mon Sep 17 00:00:00 2001
From: Tim Kryger <tim.kryger@linaro.org>
Date: Tue, 6 May 2014 15:57:01 -0700
Subject: mmc: core: Improve support for deferred regulators

Callers of mmc_regulator_get_supply could benefit from knowing if either
of the regulators are present but not yet available.  Since callers do
not currently examine the return value, modify this function to return
zero or -EPROBE_DEFER if either regulator get returns the same.

Furthermore, since callers check vmmc/vqmmc using IS_ERR and can deal
with absent regulators, switch to devm_regulator_get_optional. This has
the added benefit of allowing this function to behave correctly even in
the !CONFIG_REGULATOR case such that the stub can be removed.

Signed-off-by: Tim Kryger <tim.kryger@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/core.c  | 31 +++++++++++++++++++------------
 include/linux/mmc/host.h |  8 ++------
 2 files changed, 21 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 02baa30653fa..7dc0c85fdb60 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1314,31 +1314,38 @@ int mmc_regulator_set_ocr(struct mmc_host *mmc,
 }
 EXPORT_SYMBOL_GPL(mmc_regulator_set_ocr);
 
+#endif /* CONFIG_REGULATOR */
+
 int mmc_regulator_get_supply(struct mmc_host *mmc)
 {
 	struct device *dev = mmc_dev(mmc);
-	struct regulator *supply;
 	int ret;
 
-	supply = devm_regulator_get(dev, "vmmc");
-	mmc->supply.vmmc = supply;
+	mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc");
 	mmc->supply.vqmmc = devm_regulator_get_optional(dev, "vqmmc");
 
-	if (IS_ERR(supply))
-		return PTR_ERR(supply);
+	if (IS_ERR(mmc->supply.vmmc)) {
+		if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_info(dev, "No vmmc regulator found\n");
+	} else {
+		ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc);
+		if (ret > 0)
+			mmc->ocr_avail = ret;
+		else
+			dev_warn(dev, "Failed getting OCR mask: %d\n", ret);
+	}
 
-	ret = mmc_regulator_get_ocrmask(supply);
-	if (ret > 0)
-		mmc->ocr_avail = ret;
-	else
-		dev_warn(mmc_dev(mmc), "Failed getting OCR mask: %d\n", ret);
+	if (IS_ERR(mmc->supply.vqmmc)) {
+		if (PTR_ERR(mmc->supply.vqmmc) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_info(dev, "No vqmmc regulator found\n");
+	}
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mmc_regulator_get_supply);
 
-#endif /* CONFIG_REGULATOR */
-
 /*
  * Mask off any voltages we don't support and select
  * the lowest voltage
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 183087374215..cd595275e118 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -402,7 +402,6 @@ int mmc_regulator_get_ocrmask(struct regulator *supply);
 int mmc_regulator_set_ocr(struct mmc_host *mmc,
 			struct regulator *supply,
 			unsigned short vdd_bit);
-int mmc_regulator_get_supply(struct mmc_host *mmc);
 #else
 static inline int mmc_regulator_get_ocrmask(struct regulator *supply)
 {
@@ -415,13 +414,10 @@ static inline int mmc_regulator_set_ocr(struct mmc_host *mmc,
 {
 	return 0;
 }
-
-static inline int mmc_regulator_get_supply(struct mmc_host *mmc)
-{
-	return 0;
-}
 #endif
 
+int mmc_regulator_get_supply(struct mmc_host *mmc);
+
 int mmc_pm_notify(struct notifier_block *notify_block, unsigned long, void *);
 
 static inline int mmc_card_is_removable(struct mmc_host *host)
-- 
cgit v1.2.3-71-gd317


From 8c48b50a1a888ac5511fe856d63f72fb688c6bb4 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 5 May 2014 11:48:40 +0200
Subject: cfg80211: allow restricting supported dfs regions

At the moment, the ath9k/ath10k DFS module only supports detecting ETSI
radar patterns.
Add a bitmap in the interface combinations, indicating which DFS regions
are supported by the detector. If unset, support for all regions is
assumed.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  2 ++
 include/uapi/linux/nl80211.h |  3 +++
 net/wireless/nl80211.c       |  6 ++++--
 net/wireless/util.c          | 14 ++++++++++++++
 4 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5c7169b0ac57..e3a48b0a2b3b 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2638,6 +2638,7 @@ struct ieee80211_iface_limit {
  *	between infrastructure and AP types must match. This is required
  *	only in special cases.
  * @radar_detect_widths: bitmap of channel widths supported for radar detection
+ * @radar_detect_regions: bitmap of regions supported for radar detection
  *
  * With this structure the driver can describe which interface
  * combinations it supports concurrently.
@@ -2695,6 +2696,7 @@ struct ieee80211_iface_combination {
 	u8 n_limits;
 	bool beacon_int_infra_match;
 	u8 radar_detect_widths;
+	u8 radar_detect_regions;
 };
 
 struct ieee80211_txrx_stypes {
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 406010d4def0..b65095a85dee 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3688,6 +3688,8 @@ enum nl80211_iface_limit_attrs {
  *	different channels may be used within this group.
  * @NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS: u32 attribute containing the bitmap
  *	of supported channel widths for radar detection.
+ * @NL80211_IFACE_COMB_RADAR_DETECT_REGIONS: u32 attribute containing the bitmap
+ *	of supported regulatory regions for radar detection.
  * @NUM_NL80211_IFACE_COMB: number of attributes
  * @MAX_NL80211_IFACE_COMB: highest attribute number
  *
@@ -3721,6 +3723,7 @@ enum nl80211_if_combination_attrs {
 	NL80211_IFACE_COMB_STA_AP_BI_MATCH,
 	NL80211_IFACE_COMB_NUM_CHANNELS,
 	NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
+	NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
 
 	/* keep last */
 	NUM_NL80211_IFACE_COMB,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0f1b18f209d6..c0833830cfe7 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -970,8 +970,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy,
 				c->max_interfaces))
 			goto nla_put_failure;
 		if (large &&
-		    nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
-				c->radar_detect_widths))
+		    (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
+				c->radar_detect_widths) ||
+		     nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
+				c->radar_detect_regions)))
 			goto nla_put_failure;
 
 		nla_nest_end(msg, nl_combi);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index a756429b3a0a..8c61d5c6fad3 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1274,10 +1274,20 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
 					    void *data),
 			       void *data)
 {
+	const struct ieee80211_regdomain *regdom;
+	enum nl80211_dfs_regions region = 0;
 	int i, j, iftype;
 	int num_interfaces = 0;
 	u32 used_iftypes = 0;
 
+	if (radar_detect) {
+		rcu_read_lock();
+		regdom = rcu_dereference(cfg80211_regdomain);
+		if (regdom)
+			region = regdom->dfs_region;
+		rcu_read_unlock();
+	}
+
 	for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
 		num_interfaces += iftype_num[iftype];
 		if (iftype_num[iftype] > 0 &&
@@ -1318,6 +1328,10 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
 		if (radar_detect != (c->radar_detect_widths & radar_detect))
 			goto cont;
 
+		if (radar_detect && c->radar_detect_regions &&
+		    !(c->radar_detect_regions & BIT(region)))
+			goto cont;
+
 		/* Finally check that all iftypes that we're currently
 		 * using are actually part of this combination. If they
 		 * aren't then we can't use this combination and have
-- 
cgit v1.2.3-71-gd317


From ec903c0c858e4963a9e0724bdcadfa837253341c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 May 2014 12:11:01 -0400
Subject: cgroup: rename css_tryget*() to css_tryget_online*()

Unlike the more usual refcnting, what css_tryget() provides is the
distinction between online and offline csses instead of protection
against upping a refcnt which already reached zero.  cgroup is
planning to provide actual tryget which fails if the refcnt already
reached zero.  Let's rename the existing trygets so that they clearly
indicate that they're onliness.

I thought about keeping the existing names as-are and introducing new
names for the planned actual tryget; however, given that each
controller participates in the synchronization of the online state, it
seems worthwhile to make it explicit that these functions are about
on/offline state.

Rename css_tryget() to css_tryget_online() and css_tryget_from_dir()
to css_tryget_online_from_dir().  This is pure rename.

v2: cgroup_freezer grew new usages of css_tryget().  Update
    accordingly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
---
 block/blk-cgroup.c      |  2 +-
 fs/bio.c                |  2 +-
 include/linux/cgroup.h  | 14 +++++++-------
 kernel/cgroup.c         | 40 ++++++++++++++++++++--------------------
 kernel/cgroup_freezer.c |  4 ++--
 kernel/cpuset.c         |  6 +++---
 kernel/events/core.c    |  3 ++-
 mm/hugetlb_cgroup.c     |  2 +-
 mm/memcontrol.c         | 46 ++++++++++++++++++++++++----------------------
 9 files changed, 61 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 1039fb9ff5f5..9f5bce33e6fe 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -185,7 +185,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
 	lockdep_assert_held(q->queue_lock);
 
 	/* blkg holds a reference to blkcg */
-	if (!css_tryget(&blkcg->css)) {
+	if (!css_tryget_online(&blkcg->css)) {
 		ret = -EINVAL;
 		goto err_free_blkg;
 	}
diff --git a/fs/bio.c b/fs/bio.c
index 6f0362b77806..0608ef9422bd 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1970,7 +1970,7 @@ int bio_associate_current(struct bio *bio)
 	/* associate blkcg if exists */
 	rcu_read_lock();
 	css = task_css(current, blkio_cgrp_id);
-	if (css && css_tryget(css))
+	if (css && css_tryget_online(css))
 		bio->bi_css = css;
 	rcu_read_unlock();
 
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index bde44618d8c2..c5f3684ef557 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -95,16 +95,16 @@ static inline void css_get(struct cgroup_subsys_state *css)
 }
 
 /**
- * css_tryget - try to obtain a reference on the specified css
+ * css_tryget_online - try to obtain a reference on the specified css if online
  * @css: target css
  *
- * Obtain a reference on @css if it's alive.  The caller naturally needs to
- * ensure that @css is accessible but doesn't have to be holding a
+ * Obtain a reference on @css if it's online.  The caller naturally needs
+ * to ensure that @css is accessible but doesn't have to be holding a
  * reference on it - IOW, RCU protected access is good enough for this
  * function.  Returns %true if a reference count was successfully obtained;
  * %false otherwise.
  */
-static inline bool css_tryget(struct cgroup_subsys_state *css)
+static inline bool css_tryget_online(struct cgroup_subsys_state *css)
 {
 	if (css->flags & CSS_ROOT)
 		return true;
@@ -115,7 +115,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
  * css_put - put a css reference
  * @css: target css
  *
- * Put a reference obtained via css_get() and css_tryget().
+ * Put a reference obtained via css_get() and css_tryget_online().
  */
 static inline void css_put(struct cgroup_subsys_state *css)
 {
@@ -905,8 +905,8 @@ void css_task_iter_end(struct css_task_iter *it);
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
 
-struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry,
-						struct cgroup_subsys *ss);
+struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
+						       struct cgroup_subsys *ss);
 
 #else /* !CONFIG_CGROUPS */
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7633703e9baf..671d8a6dae37 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3771,7 +3771,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
 
 	/*
 	 * We aren't being called from kernfs and there's no guarantee on
-	 * @kn->priv's validity.  For this and css_tryget_from_dir(),
+	 * @kn->priv's validity.  For this and css_tryget_online_from_dir(),
 	 * @kn->priv is RCU safe.  Let's do the RCU dancing.
 	 */
 	rcu_read_lock();
@@ -4060,9 +4060,9 @@ err:
  *    Implemented in kill_css().
  *
  * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
- *    and thus css_tryget() is guaranteed to fail, the css can be offlined
- *    by invoking offline_css().  After offlining, the base ref is put.
- *    Implemented in css_killed_work_fn().
+ *    and thus css_tryget_online() is guaranteed to fail, the css can be
+ *    offlined by invoking offline_css().  After offlining, the base ref is
+ *    put.  Implemented in css_killed_work_fn().
  *
  * 3. When the percpu_ref reaches zero, the only possible remaining
  *    accessors are inside RCU read sections.  css_release() schedules the
@@ -4386,7 +4386,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 
 /*
  * This is called when the refcnt of a css is confirmed to be killed.
- * css_tryget() is now guaranteed to fail.
+ * css_tryget_online() is now guaranteed to fail.
  */
 static void css_killed_work_fn(struct work_struct *work)
 {
@@ -4398,8 +4398,8 @@ static void css_killed_work_fn(struct work_struct *work)
 	mutex_lock(&cgroup_mutex);
 
 	/*
-	 * css_tryget() is guaranteed to fail now.  Tell subsystems to
-	 * initate destruction.
+	 * css_tryget_online() is guaranteed to fail now.  Tell subsystems
+	 * to initate destruction.
 	 */
 	offline_css(css);
 
@@ -4440,8 +4440,8 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
  *
  * This function initiates destruction of @css by removing cgroup interface
  * files and putting its base reference.  ->css_offline() will be invoked
- * asynchronously once css_tryget() is guaranteed to fail and when the
- * reference count reaches zero, @css will be released.
+ * asynchronously once css_tryget_online() is guaranteed to fail and when
+ * the reference count reaches zero, @css will be released.
  */
 static void kill_css(struct cgroup_subsys_state *css)
 {
@@ -4462,7 +4462,7 @@ static void kill_css(struct cgroup_subsys_state *css)
 	/*
 	 * cgroup core guarantees that, by the time ->css_offline() is
 	 * invoked, no new css reference will be given out via
-	 * css_tryget().  We can't simply call percpu_ref_kill() and
+	 * css_tryget_online().  We can't simply call percpu_ref_kill() and
 	 * proceed to offlining css's because percpu_ref_kill() doesn't
 	 * guarantee that the ref is seen as killed on all CPUs on return.
 	 *
@@ -4478,9 +4478,9 @@ static void kill_css(struct cgroup_subsys_state *css)
  *
  * css's make use of percpu refcnts whose killing latency shouldn't be
  * exposed to userland and are RCU protected.  Also, cgroup core needs to
- * guarantee that css_tryget() won't succeed by the time ->css_offline() is
- * invoked.  To satisfy all the requirements, destruction is implemented in
- * the following two steps.
+ * guarantee that css_tryget_online() won't succeed by the time
+ * ->css_offline() is invoked.  To satisfy all the requirements,
+ * destruction is implemented in the following two steps.
  *
  * s1. Verify @cgrp can be destroyed and mark it dying.  Remove all
  *     userland visible parts and start killing the percpu refcnts of
@@ -4574,9 +4574,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	/*
 	 * There are two control paths which try to determine cgroup from
 	 * dentry without going through kernfs - cgroupstats_build() and
-	 * css_tryget_from_dir().  Those are supported by RCU protecting
-	 * clearing of cgrp->kn->priv backpointer, which should happen
-	 * after all files under it have been removed.
+	 * css_tryget_online_from_dir().  Those are supported by RCU
+	 * protecting clearing of cgrp->kn->priv backpointer, which should
+	 * happen after all files under it have been removed.
 	 */
 	kernfs_remove(cgrp->kn);	/* @cgrp has an extra ref on its kn */
 	RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL);
@@ -5173,7 +5173,7 @@ static int __init cgroup_disable(char *str)
 __setup("cgroup_disable=", cgroup_disable);
 
 /**
- * css_tryget_from_dir - get corresponding css from the dentry of a cgroup dir
+ * css_tryget_online_from_dir - get corresponding css from a cgroup dentry
  * @dentry: directory dentry of interest
  * @ss: subsystem of interest
  *
@@ -5181,8 +5181,8 @@ __setup("cgroup_disable=", cgroup_disable);
  * to get the corresponding css and return it.  If such css doesn't exist
  * or can't be pinned, an ERR_PTR value is returned.
  */
-struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry,
-						struct cgroup_subsys *ss)
+struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
+						       struct cgroup_subsys *ss)
 {
 	struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
 	struct cgroup_subsys_state *css = NULL;
@@ -5204,7 +5204,7 @@ struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry,
 	if (cgrp)
 		css = cgroup_css(cgrp, ss);
 
-	if (!css || !css_tryget(css))
+	if (!css || !css_tryget_online(css))
 		css = ERR_PTR(-ENOENT);
 
 	rcu_read_unlock();
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 345628c78b5b..0398f7e9ac81 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -304,7 +304,7 @@ static int freezer_read(struct seq_file *m, void *v)
 
 	/* update states bottom-up */
 	css_for_each_descendant_post(pos, css) {
-		if (!css_tryget(pos))
+		if (!css_tryget_online(pos))
 			continue;
 		rcu_read_unlock();
 
@@ -404,7 +404,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
 		struct freezer *pos_f = css_freezer(pos);
 		struct freezer *parent = parent_freezer(pos_f);
 
-		if (!css_tryget(pos))
+		if (!css_tryget_online(pos))
 			continue;
 		rcu_read_unlock();
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 7c0e8da59e26..37ca0a5c226d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -872,7 +872,7 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs, bool update_root)
 				continue;
 			}
 		}
-		if (!css_tryget(&cp->css))
+		if (!css_tryget_online(&cp->css))
 			continue;
 		rcu_read_unlock();
 
@@ -1108,7 +1108,7 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs, bool update_root)
 				continue;
 			}
 		}
-		if (!css_tryget(&cp->css))
+		if (!css_tryget_online(&cp->css))
 			continue;
 		rcu_read_unlock();
 
@@ -2153,7 +2153,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
 
 		rcu_read_lock();
 		cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
-			if (cs == &top_cpuset || !css_tryget(&cs->css))
+			if (cs == &top_cpuset || !css_tryget_online(&cs->css))
 				continue;
 			rcu_read_unlock();
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f83a71a3e46d..077968d19b8a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -607,7 +607,8 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
 	if (!f.file)
 		return -EBADF;
 
-	css = css_tryget_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys);
+	css = css_tryget_online_from_dir(f.file->f_dentry,
+					 &perf_event_cgrp_subsys);
 	if (IS_ERR(css)) {
 		ret = PTR_ERR(css);
 		goto out;
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 595d7fd795e1..372f1adca491 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -181,7 +181,7 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 again:
 	rcu_read_lock();
 	h_cg = hugetlb_cgroup_from_task(current);
-	if (!css_tryget(&h_cg->css)) {
+	if (!css_tryget_online(&h_cg->css)) {
 		rcu_read_unlock();
 		goto again;
 	}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c3f82f69ef58..5cf3246314a2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -567,7 +567,8 @@ void sock_update_memcg(struct sock *sk)
 		memcg = mem_cgroup_from_task(current);
 		cg_proto = sk->sk_prot->proto_cgroup(memcg);
 		if (!mem_cgroup_is_root(memcg) &&
-		    memcg_proto_active(cg_proto) && css_tryget(&memcg->css)) {
+		    memcg_proto_active(cg_proto) &&
+		    css_tryget_online(&memcg->css)) {
 			sk->sk_cgrp = cg_proto;
 		}
 		rcu_read_unlock();
@@ -834,7 +835,7 @@ retry:
 	 */
 	__mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
 	if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
-		!css_tryget(&mz->memcg->css))
+	    !css_tryget_online(&mz->memcg->css))
 		goto retry;
 done:
 	return mz;
@@ -1076,7 +1077,7 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 		memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
 		if (unlikely(!memcg))
 			memcg = root_mem_cgroup;
-	} while (!css_tryget(&memcg->css));
+	} while (!css_tryget_online(&memcg->css));
 	rcu_read_unlock();
 	return memcg;
 }
@@ -1113,7 +1114,8 @@ skip_node:
 	 */
 	if (next_css) {
 		if ((next_css == &root->css) ||
-		    ((next_css->flags & CSS_ONLINE) && css_tryget(next_css)))
+		    ((next_css->flags & CSS_ONLINE) &&
+		     css_tryget_online(next_css)))
 			return mem_cgroup_from_css(next_css);
 
 		prev_css = next_css;
@@ -1159,7 +1161,7 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter,
 		 * would be returned all the time.
 		 */
 		if (position && position != root &&
-				!css_tryget(&position->css))
+		    !css_tryget_online(&position->css))
 			position = NULL;
 	}
 	return position;
@@ -2785,9 +2787,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
 
 /*
  * A helper function to get mem_cgroup from ID. must be called under
- * rcu_read_lock().  The caller is responsible for calling css_tryget if
- * the mem_cgroup is used for charging. (dropping refcnt from swap can be
- * called against removed memcg.)
+ * rcu_read_lock().  The caller is responsible for calling
+ * css_tryget_online() if the mem_cgroup is used for charging. (dropping
+ * refcnt from swap can be called against removed memcg.)
  */
 static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 {
@@ -2810,14 +2812,14 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 	lock_page_cgroup(pc);
 	if (PageCgroupUsed(pc)) {
 		memcg = pc->mem_cgroup;
-		if (memcg && !css_tryget(&memcg->css))
+		if (memcg && !css_tryget_online(&memcg->css))
 			memcg = NULL;
 	} else if (PageSwapCache(page)) {
 		ent.val = page_private(page);
 		id = lookup_swap_cgroup_id(ent);
 		rcu_read_lock();
 		memcg = mem_cgroup_lookup(id);
-		if (memcg && !css_tryget(&memcg->css))
+		if (memcg && !css_tryget_online(&memcg->css))
 			memcg = NULL;
 		rcu_read_unlock();
 	}
@@ -3473,7 +3475,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
 	}
 
 	/* The corresponding put will be done in the workqueue. */
-	if (!css_tryget(&memcg->css))
+	if (!css_tryget_online(&memcg->css))
 		goto out;
 	rcu_read_unlock();
 
@@ -4246,8 +4248,8 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
 	memcg = mem_cgroup_lookup(id);
 	if (memcg) {
 		/*
-		 * We uncharge this because swap is freed.
-		 * This memcg can be obsolete one. We avoid calling css_tryget
+		 * We uncharge this because swap is freed.  This memcg can
+		 * be obsolete one. We avoid calling css_tryget_online().
 		 */
 		if (!mem_cgroup_is_root(memcg))
 			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
@@ -5840,10 +5842,10 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
 	 * which is then paired with css_put during uncharge resp. here.
 	 *
 	 * Although this might sound strange as this path is called from
-	 * css_offline() when the referencemight have dropped down to 0
-	 * and shouldn't be incremented anymore (css_tryget would fail)
-	 * we do not have other options because of the kmem allocations
-	 * lifetime.
+	 * css_offline() when the referencemight have dropped down to 0 and
+	 * shouldn't be incremented anymore (css_tryget_online() would
+	 * fail) we do not have other options because of the kmem
+	 * allocations lifetime.
 	 */
 	css_get(&memcg->css);
 
@@ -6051,8 +6053,8 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
 	 * automatically removed on cgroup destruction but the removal is
 	 * asynchronous, so take an extra ref on @css.
 	 */
-	cfile_css = css_tryget_from_dir(cfile.file->f_dentry->d_parent,
-					&memory_cgrp_subsys);
+	cfile_css = css_tryget_online_from_dir(cfile.file->f_dentry->d_parent,
+					       &memory_cgrp_subsys);
 	ret = -EINVAL;
 	if (IS_ERR(cfile_css))
 		goto out_put_cfile;
@@ -6496,7 +6498,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	/*
 	 * XXX: css_offline() would be where we should reparent all
 	 * memory to prepare the cgroup for destruction.  However,
-	 * memcg does not do css_tryget() and res_counter charging
+	 * memcg does not do css_tryget_online() and res_counter charging
 	 * under the same RCU lock region, which means that charging
 	 * could race with offlining.  Offlining only happens to
 	 * cgroups with no tasks in them but charges can show up
@@ -6510,9 +6512,9 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	 *                           lookup_swap_cgroup_id()
 	 *                           rcu_read_lock()
 	 *                           mem_cgroup_lookup()
-	 *                           css_tryget()
+	 *                           css_tryget_online()
 	 *                           rcu_read_unlock()
-	 * disable css_tryget()
+	 * disable css_tryget_online()
 	 * call_rcu()
 	 *   offline_css()
 	 *     reparent_charges()
-- 
cgit v1.2.3-71-gd317


From b41686401e501430ffe93b575ef7959d2ecc6f2e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 May 2014 12:16:21 -0400
Subject: cgroup: implement cftype->write()

During the recent conversion to kernfs, cftype's seq_file operations
are updated so that they are directly mapped to kernfs operations and
thus can fully access the associated kernfs and cgroup contexts;
however, write path hasn't seen similar updates and none of the
existing write operations has access to, for example, the associated
kernfs_open_file.

Let's introduce a new operation cftype->write() which maps directly to
the kernfs write operation and has access to all the arguments and
contexts.  This will replace ->write_string() and ->trigger() and ease
manipulation of kernfs active protection from cgroup file operations.

Two accessors - of_cft() and of_css() - are introduced to enable
accessing the associated cgroup context from cftype->write() which
only takes kernfs_open_file for the context information.  The
accessors for seq_file operations - seq_cft() and seq_css() - are
rewritten to wrap the of_ accessors.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 27 +++++++++++++++++++++++----
 kernel/cgroup.c        | 14 ++++++++------
 2 files changed, 31 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c5f3684ef557..c5a170ca4a48 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -515,6 +515,15 @@ struct cftype {
 	 */
 	int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
 
+	/*
+	 * write() is the generic write callback which maps directly to
+	 * kernfs write operation and overrides all other operations.
+	 * Maximum write size is determined by ->max_write_len.  Use
+	 * of_css/cft() to access the associated css and cft.
+	 */
+	ssize_t (*write)(struct kernfs_open_file *of,
+			 char *buf, size_t nbytes, loff_t off);
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lock_class_key	lockdep_key;
 #endif
@@ -552,14 +561,24 @@ static inline ino_t cgroup_ino(struct cgroup *cgrp)
 		return 0;
 }
 
-static inline struct cftype *seq_cft(struct seq_file *seq)
+/* cft/css accessors for cftype->write() operation */
+static inline struct cftype *of_cft(struct kernfs_open_file *of)
 {
-	struct kernfs_open_file *of = seq->private;
-
 	return of->kn->priv;
 }
 
-struct cgroup_subsys_state *seq_css(struct seq_file *seq);
+struct cgroup_subsys_state *of_css(struct kernfs_open_file *of);
+
+/* cft/css accessors for cftype->seq_*() operations */
+static inline struct cftype *seq_cft(struct seq_file *seq)
+{
+	return of_cft(seq->private);
+}
+
+static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq)
+{
+	return of_css(seq->private);
+}
 
 /*
  * Name / path handling functions.  All are thin wrappers around the kernfs
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 671d8a6dae37..a16f91d12f4e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -283,11 +283,10 @@ static inline bool cgroup_is_dead(const struct cgroup *cgrp)
 	return test_bit(CGRP_DEAD, &cgrp->flags);
 }
 
-struct cgroup_subsys_state *seq_css(struct seq_file *seq)
+struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
 {
-	struct kernfs_open_file *of = seq->private;
 	struct cgroup *cgrp = of->kn->parent->priv;
-	struct cftype *cft = seq_cft(seq);
+	struct cftype *cft = of_cft(of);
 
 	/*
 	 * This is open and unprotected implementation of cgroup_css().
@@ -302,7 +301,7 @@ struct cgroup_subsys_state *seq_css(struct seq_file *seq)
 	else
 		return &cgrp->dummy_css;
 }
-EXPORT_SYMBOL_GPL(seq_css);
+EXPORT_SYMBOL_GPL(of_css);
 
 /**
  * cgroup_is_descendant - test ancestry
@@ -1035,8 +1034,8 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
 	if (cft->read_u64 || cft->read_s64 || cft->seq_show)
 		mode |= S_IRUGO;
 
-	if (cft->write_u64 || cft->write_s64 || cft->write_string ||
-	    cft->trigger)
+	if (cft->write_u64 || cft->write_s64 || cft->write ||
+	    cft->write_string || cft->trigger)
 		mode |= S_IWUSR;
 
 	return mode;
@@ -2726,6 +2725,9 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 	struct cgroup_subsys_state *css;
 	int ret;
 
+	if (cft->write)
+		return cft->write(of, buf, nbytes, off);
+
 	/*
 	 * kernfs guarantees that a file isn't deleted with operations in
 	 * flight, which means that the matching css is and stays alive and
-- 
cgit v1.2.3-71-gd317


From 451af504df0c62f695a69b83c250486e77c66378 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 May 2014 12:16:21 -0400
Subject: cgroup: replace cftype->write_string() with cftype->write()

Convert all cftype->write_string() users to the new cftype->write()
which maps directly to kernfs write operation and has full access to
kernfs and cgroup contexts.  The conversions are mostly mechanical.

* @css and @cft are accessed using of_css() and of_cft() accessors
  respectively instead of being specified as arguments.

* Should return @nbytes on success instead of 0.

* @buf is not trimmed automatically.  Trim if necessary.  Note that
  blkcg and netprio don't need this as the parsers already handle
  whitespaces.

cftype->write_string() has no user left after the conversions and
removed.

While at it, remove unnecessary local variable @p in
cgroup_subtree_control_write() and stale comment about
CGROUP_LOCAL_BUFFER_SIZE in cgroup_freezer.c.

This patch doesn't introduce any visible behavior changes.

v2: netprio was missing from conversion.  Converted.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Aristeu Rozanski <arozansk@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: "David S. Miller" <davem@davemloft.net>
---
 block/blk-throttle.c      | 32 ++++++++++++++++----------------
 block/cfq-iosched.c       | 28 ++++++++++++++--------------
 include/linux/cgroup.h    | 10 +---------
 kernel/cgroup.c           | 38 +++++++++++++++++++-------------------
 kernel/cgroup_freezer.c   | 20 +++++++++-----------
 kernel/cpuset.c           | 16 +++++++++-------
 mm/hugetlb_cgroup.c       | 17 +++++++++--------
 mm/memcontrol.c           | 46 +++++++++++++++++++++++++---------------------
 net/core/netprio_cgroup.c | 12 ++++++------
 net/ipv4/tcp_memcontrol.c | 16 +++++++++-------
 security/device_cgroup.c  | 14 +++++++-------
 11 files changed, 124 insertions(+), 125 deletions(-)

(limited to 'include')

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 033745cd7fba..5e8fd1bace98 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1346,10 +1346,10 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v)
 	return 0;
 }
 
-static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
-		       const char *buf, bool is_u64)
+static ssize_t tg_set_conf(struct kernfs_open_file *of,
+			   char *buf, size_t nbytes, loff_t off, bool is_u64)
 {
-	struct blkcg *blkcg = css_to_blkcg(css);
+	struct blkcg *blkcg = css_to_blkcg(of_css(of));
 	struct blkg_conf_ctx ctx;
 	struct throtl_grp *tg;
 	struct throtl_service_queue *sq;
@@ -1368,9 +1368,9 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
 		ctx.v = -1;
 
 	if (is_u64)
-		*(u64 *)((void *)tg + cft->private) = ctx.v;
+		*(u64 *)((void *)tg + of_cft(of)->private) = ctx.v;
 	else
-		*(unsigned int *)((void *)tg + cft->private) = ctx.v;
+		*(unsigned int *)((void *)tg + of_cft(of)->private) = ctx.v;
 
 	throtl_log(&tg->service_queue,
 		   "limit change rbps=%llu wbps=%llu riops=%u wiops=%u",
@@ -1404,19 +1404,19 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
 	}
 
 	blkg_conf_finish(&ctx);
-	return 0;
+	return nbytes;
 }
 
-static int tg_set_conf_u64(struct cgroup_subsys_state *css, struct cftype *cft,
-			   char *buf)
+static ssize_t tg_set_conf_u64(struct kernfs_open_file *of,
+			       char *buf, size_t nbytes, loff_t off)
 {
-	return tg_set_conf(css, cft, buf, true);
+	return tg_set_conf(of, buf, nbytes, off, true);
 }
 
-static int tg_set_conf_uint(struct cgroup_subsys_state *css, struct cftype *cft,
-			    char *buf)
+static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
 {
-	return tg_set_conf(css, cft, buf, false);
+	return tg_set_conf(of, buf, nbytes, off, false);
 }
 
 static struct cftype throtl_files[] = {
@@ -1424,25 +1424,25 @@ static struct cftype throtl_files[] = {
 		.name = "throttle.read_bps_device",
 		.private = offsetof(struct throtl_grp, bps[READ]),
 		.seq_show = tg_print_conf_u64,
-		.write_string = tg_set_conf_u64,
+		.write = tg_set_conf_u64,
 	},
 	{
 		.name = "throttle.write_bps_device",
 		.private = offsetof(struct throtl_grp, bps[WRITE]),
 		.seq_show = tg_print_conf_u64,
-		.write_string = tg_set_conf_u64,
+		.write = tg_set_conf_u64,
 	},
 	{
 		.name = "throttle.read_iops_device",
 		.private = offsetof(struct throtl_grp, iops[READ]),
 		.seq_show = tg_print_conf_uint,
-		.write_string = tg_set_conf_uint,
+		.write = tg_set_conf_uint,
 	},
 	{
 		.name = "throttle.write_iops_device",
 		.private = offsetof(struct throtl_grp, iops[WRITE]),
 		.seq_show = tg_print_conf_uint,
-		.write_string = tg_set_conf_uint,
+		.write = tg_set_conf_uint,
 	},
 	{
 		.name = "throttle.io_service_bytes",
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e0985f1955e7..a73020b8c9af 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1670,11 +1670,11 @@ static int cfq_print_leaf_weight(struct seq_file *sf, void *v)
 	return 0;
 }
 
-static int __cfqg_set_weight_device(struct cgroup_subsys_state *css,
-				    struct cftype *cft, const char *buf,
-				    bool is_leaf_weight)
+static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
+					char *buf, size_t nbytes, loff_t off,
+					bool is_leaf_weight)
 {
-	struct blkcg *blkcg = css_to_blkcg(css);
+	struct blkcg *blkcg = css_to_blkcg(of_css(of));
 	struct blkg_conf_ctx ctx;
 	struct cfq_group *cfqg;
 	int ret;
@@ -1697,19 +1697,19 @@ static int __cfqg_set_weight_device(struct cgroup_subsys_state *css,
 	}
 
 	blkg_conf_finish(&ctx);
-	return ret;
+	return ret ?: nbytes;
 }
 
-static int cfqg_set_weight_device(struct cgroup_subsys_state *css,
-				  struct cftype *cft, char *buf)
+static ssize_t cfqg_set_weight_device(struct kernfs_open_file *of,
+				      char *buf, size_t nbytes, loff_t off)
 {
-	return __cfqg_set_weight_device(css, cft, buf, false);
+	return __cfqg_set_weight_device(of, buf, nbytes, off, false);
 }
 
-static int cfqg_set_leaf_weight_device(struct cgroup_subsys_state *css,
-				       struct cftype *cft, char *buf)
+static ssize_t cfqg_set_leaf_weight_device(struct kernfs_open_file *of,
+					   char *buf, size_t nbytes, loff_t off)
 {
-	return __cfqg_set_weight_device(css, cft, buf, true);
+	return __cfqg_set_weight_device(of, buf, nbytes, off, true);
 }
 
 static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
@@ -1837,7 +1837,7 @@ static struct cftype cfq_blkcg_files[] = {
 		.name = "weight_device",
 		.flags = CFTYPE_ONLY_ON_ROOT,
 		.seq_show = cfqg_print_leaf_weight_device,
-		.write_string = cfqg_set_leaf_weight_device,
+		.write = cfqg_set_leaf_weight_device,
 	},
 	{
 		.name = "weight",
@@ -1851,7 +1851,7 @@ static struct cftype cfq_blkcg_files[] = {
 		.name = "weight_device",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = cfqg_print_weight_device,
-		.write_string = cfqg_set_weight_device,
+		.write = cfqg_set_weight_device,
 	},
 	{
 		.name = "weight",
@@ -1863,7 +1863,7 @@ static struct cftype cfq_blkcg_files[] = {
 	{
 		.name = "leaf_weight_device",
 		.seq_show = cfqg_print_leaf_weight_device,
-		.write_string = cfqg_set_leaf_weight_device,
+		.write = cfqg_set_leaf_weight_device,
 	},
 	{
 		.name = "leaf_weight",
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c5a170ca4a48..aecdc84fe128 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -453,8 +453,7 @@ struct cftype {
 
 	/*
 	 * The maximum length of string, excluding trailing nul, that can
-	 * be passed to write_string.  If < PAGE_SIZE-1, PAGE_SIZE-1 is
-	 * assumed.
+	 * be passed to write.  If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed.
 	 */
 	size_t max_write_len;
 
@@ -500,13 +499,6 @@ struct cftype {
 	int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft,
 			 s64 val);
 
-	/*
-	 * write_string() is passed a nul-terminated kernelspace
-	 * buffer of maximum length determined by max_write_len.
-	 * Returns 0 or -ve error code.
-	 */
-	int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft,
-			    char *buffer);
 	/*
 	 * trigger() callback can be used to get some kick from the
 	 * userspace, when the actual string written is not important
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a16f91d12f4e..2a88ce7b24b6 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1035,7 +1035,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
 		mode |= S_IRUGO;
 
 	if (cft->write_u64 || cft->write_s64 || cft->write ||
-	    cft->write_string || cft->trigger)
+	    cft->trigger)
 		mode |= S_IWUSR;
 
 	return mode;
@@ -2352,20 +2352,21 @@ static int cgroup_procs_write(struct cgroup_subsys_state *css,
 	return attach_task_by_pid(css->cgroup, tgid, true);
 }
 
-static int cgroup_release_agent_write(struct cgroup_subsys_state *css,
-				      struct cftype *cft, char *buffer)
+static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
+					  char *buf, size_t nbytes, loff_t off)
 {
-	struct cgroup_root *root = css->cgroup->root;
+	struct cgroup *cgrp = of_css(of)->cgroup;
+	struct cgroup_root *root = cgrp->root;
 
 	BUILD_BUG_ON(sizeof(root->release_agent_path) < PATH_MAX);
-	if (!cgroup_lock_live_group(css->cgroup))
+	if (!cgroup_lock_live_group(cgrp))
 		return -ENODEV;
 	spin_lock(&release_agent_path_lock);
-	strlcpy(root->release_agent_path, buffer,
+	strlcpy(root->release_agent_path, strstrip(buf),
 		sizeof(root->release_agent_path));
 	spin_unlock(&release_agent_path_lock);
 	mutex_unlock(&cgroup_mutex);
-	return 0;
+	return nbytes;
 }
 
 static int cgroup_release_agent_show(struct seq_file *seq, void *v)
@@ -2530,21 +2531,22 @@ out_finish:
 }
 
 /* change the enabled child controllers for a cgroup in the default hierarchy */
-static int cgroup_subtree_control_write(struct cgroup_subsys_state *dummy_css,
-					struct cftype *cft, char *buffer)
+static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
+					    char *buf, size_t nbytes,
+					    loff_t off)
 {
 	unsigned int enable = 0, disable = 0;
-	struct cgroup *cgrp = dummy_css->cgroup, *child;
+	struct cgroup *cgrp = of_css(of)->cgroup, *child;
 	struct cgroup_subsys *ss;
-	char *tok, *p;
+	char *tok;
 	int ssid, ret;
 
 	/*
 	 * Parse input - space separated list of subsystem names prefixed
 	 * with either + or -.
 	 */
-	p = buffer;
-	while ((tok = strsep(&p, " "))) {
+	buf = strstrip(buf);
+	while ((tok = strsep(&buf, " "))) {
 		if (tok[0] == '\0')
 			continue;
 		for_each_subsys(ss, ssid) {
@@ -2692,7 +2694,7 @@ out_unlock_tree:
 out_unbreak:
 	kernfs_unbreak_active_protection(cgrp->control_kn);
 	cgroup_put(cgrp);
-	return ret;
+	return ret ?: nbytes;
 
 err_undo_css:
 	cgrp->child_subsys_mask &= ~enable;
@@ -2738,9 +2740,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 	css = cgroup_css(cgrp, cft->ss);
 	rcu_read_unlock();
 
-	if (cft->write_string) {
-		ret = cft->write_string(css, cft, strstrip(buf));
-	} else if (cft->write_u64) {
+	if (cft->write_u64) {
 		unsigned long long v;
 		ret = kstrtoull(buf, 0, &v);
 		if (!ret)
@@ -3984,7 +3984,7 @@ static struct cftype cgroup_base_files[] = {
 		.name = "cgroup.subtree_control",
 		.flags = CFTYPE_ONLY_ON_DFL,
 		.seq_show = cgroup_subtree_control_show,
-		.write_string = cgroup_subtree_control_write,
+		.write = cgroup_subtree_control_write,
 	},
 	{
 		.name = "cgroup.populated",
@@ -4018,7 +4018,7 @@ static struct cftype cgroup_base_files[] = {
 		.name = "release_agent",
 		.flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT,
 		.seq_show = cgroup_release_agent_show,
-		.write_string = cgroup_release_agent_write,
+		.write = cgroup_release_agent_write,
 		.max_write_len = PATH_MAX - 1,
 	},
 	{ }	/* terminate */
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 0398f7e9ac81..6b4e60e33a9a 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -73,10 +73,6 @@ bool cgroup_freezing(struct task_struct *task)
 	return ret;
 }
 
-/*
- * cgroups_write_string() limits the size of freezer state strings to
- * CGROUP_LOCAL_BUFFER_SIZE
- */
 static const char *freezer_state_strs(unsigned int state)
 {
 	if (state & CGROUP_FROZEN)
@@ -423,20 +419,22 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
 	mutex_unlock(&freezer_mutex);
 }
 
-static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft,
-			 char *buffer)
+static ssize_t freezer_write(struct kernfs_open_file *of,
+			     char *buf, size_t nbytes, loff_t off)
 {
 	bool freeze;
 
-	if (strcmp(buffer, freezer_state_strs(0)) == 0)
+	buf = strstrip(buf);
+
+	if (strcmp(buf, freezer_state_strs(0)) == 0)
 		freeze = false;
-	else if (strcmp(buffer, freezer_state_strs(CGROUP_FROZEN)) == 0)
+	else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
 		freeze = true;
 	else
 		return -EINVAL;
 
-	freezer_change_state(css_freezer(css), freeze);
-	return 0;
+	freezer_change_state(css_freezer(of_css(of)), freeze);
+	return nbytes;
 }
 
 static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
@@ -460,7 +458,7 @@ static struct cftype files[] = {
 		.name = "state",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = freezer_read,
-		.write_string = freezer_write,
+		.write = freezer_write,
 	},
 	{
 		.name = "self_freezing",
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 37ca0a5c226d..2f4b08b8db24 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1603,13 +1603,15 @@ out_unlock:
 /*
  * Common handling for a write to a "cpus" or "mems" file.
  */
-static int cpuset_write_resmask(struct cgroup_subsys_state *css,
-				struct cftype *cft, char *buf)
+static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
+				    char *buf, size_t nbytes, loff_t off)
 {
-	struct cpuset *cs = css_cs(css);
+	struct cpuset *cs = css_cs(of_css(of));
 	struct cpuset *trialcs;
 	int retval = -ENODEV;
 
+	buf = strstrip(buf);
+
 	/*
 	 * CPU or memory hotunplug may leave @cs w/o any execution
 	 * resources, in which case the hotplug code asynchronously updates
@@ -1633,7 +1635,7 @@ static int cpuset_write_resmask(struct cgroup_subsys_state *css,
 		goto out_unlock;
 	}
 
-	switch (cft->private) {
+	switch (of_cft(of)->private) {
 	case FILE_CPULIST:
 		retval = update_cpumask(cs, trialcs, buf);
 		break;
@@ -1648,7 +1650,7 @@ static int cpuset_write_resmask(struct cgroup_subsys_state *css,
 	free_trial_cpuset(trialcs);
 out_unlock:
 	mutex_unlock(&cpuset_mutex);
-	return retval;
+	return retval ?: nbytes;
 }
 
 /*
@@ -1750,7 +1752,7 @@ static struct cftype files[] = {
 	{
 		.name = "cpus",
 		.seq_show = cpuset_common_seq_show,
-		.write_string = cpuset_write_resmask,
+		.write = cpuset_write_resmask,
 		.max_write_len = (100U + 6 * NR_CPUS),
 		.private = FILE_CPULIST,
 	},
@@ -1758,7 +1760,7 @@ static struct cftype files[] = {
 	{
 		.name = "mems",
 		.seq_show = cpuset_common_seq_show,
-		.write_string = cpuset_write_resmask,
+		.write = cpuset_write_resmask,
 		.max_write_len = (100U + 6 * MAX_NUMNODES),
 		.private = FILE_MEMLIST,
 	},
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 372f1adca491..191de26b0148 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -253,15 +253,16 @@ static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
 	return res_counter_read_u64(&h_cg->hugepage[idx], name);
 }
 
-static int hugetlb_cgroup_write(struct cgroup_subsys_state *css,
-				struct cftype *cft, char *buffer)
+static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
+				    char *buf, size_t nbytes, loff_t off)
 {
 	int idx, name, ret;
 	unsigned long long val;
-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 
-	idx = MEMFILE_IDX(cft->private);
-	name = MEMFILE_ATTR(cft->private);
+	buf = strstrip(buf);
+	idx = MEMFILE_IDX(of_cft(of)->private);
+	name = MEMFILE_ATTR(of_cft(of)->private);
 
 	switch (name) {
 	case RES_LIMIT:
@@ -271,7 +272,7 @@ static int hugetlb_cgroup_write(struct cgroup_subsys_state *css,
 			break;
 		}
 		/* This function does all necessary parse...reuse it */
-		ret = res_counter_memparse_write_strategy(buffer, &val);
+		ret = res_counter_memparse_write_strategy(buf, &val);
 		if (ret)
 			break;
 		ret = res_counter_set_limit(&h_cg->hugepage[idx], val);
@@ -280,7 +281,7 @@ static int hugetlb_cgroup_write(struct cgroup_subsys_state *css,
 		ret = -EINVAL;
 		break;
 	}
-	return ret;
+	return ret ?: nbytes;
 }
 
 static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css,
@@ -331,7 +332,7 @@ static void __init __hugetlb_cgroup_file_init(int idx)
 	snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
 	cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 	cft->read_u64 = hugetlb_cgroup_read_u64;
-	cft->write_string = hugetlb_cgroup_write;
+	cft->write = hugetlb_cgroup_write;
 
 	/* Add the usage file */
 	cft = &h->cgroup_files[1];
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5cf3246314a2..7098a43f7447 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5143,17 +5143,18 @@ static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
  * The user of this function is...
  * RES_LIMIT.
  */
-static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
-			    char *buffer)
+static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 	enum res_type type;
 	int name;
 	unsigned long long val;
 	int ret;
 
-	type = MEMFILE_TYPE(cft->private);
-	name = MEMFILE_ATTR(cft->private);
+	buf = strstrip(buf);
+	type = MEMFILE_TYPE(of_cft(of)->private);
+	name = MEMFILE_ATTR(of_cft(of)->private);
 
 	switch (name) {
 	case RES_LIMIT:
@@ -5162,7 +5163,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
 			break;
 		}
 		/* This function does all necessary parse...reuse it */
-		ret = res_counter_memparse_write_strategy(buffer, &val);
+		ret = res_counter_memparse_write_strategy(buf, &val);
 		if (ret)
 			break;
 		if (type == _MEM)
@@ -5175,7 +5176,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
 			return -EINVAL;
 		break;
 	case RES_SOFT_LIMIT:
-		ret = res_counter_memparse_write_strategy(buffer, &val);
+		ret = res_counter_memparse_write_strategy(buf, &val);
 		if (ret)
 			break;
 		/*
@@ -5192,7 +5193,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
 		ret = -EINVAL; /* should be BUG() ? */
 		break;
 	}
-	return ret;
+	return ret ?: nbytes;
 }
 
 static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
@@ -5964,9 +5965,10 @@ static void memcg_event_ptable_queue_proc(struct file *file,
  * Input must be in format '<event_fd> <control_fd> <args>'.
  * Interpretation of args is defined by control file implementation.
  */
-static int memcg_write_event_control(struct cgroup_subsys_state *css,
-				     struct cftype *cft, char *buffer)
+static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
+					 char *buf, size_t nbytes, loff_t off)
 {
+	struct cgroup_subsys_state *css = of_css(of);
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct mem_cgroup_event *event;
 	struct cgroup_subsys_state *cfile_css;
@@ -5977,15 +5979,17 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
 	char *endp;
 	int ret;
 
-	efd = simple_strtoul(buffer, &endp, 10);
+	buf = strstrip(buf);
+
+	efd = simple_strtoul(buf, &endp, 10);
 	if (*endp != ' ')
 		return -EINVAL;
-	buffer = endp + 1;
+	buf = endp + 1;
 
-	cfd = simple_strtoul(buffer, &endp, 10);
+	cfd = simple_strtoul(buf, &endp, 10);
 	if ((*endp != ' ') && (*endp != '\0'))
 		return -EINVAL;
-	buffer = endp + 1;
+	buf = endp + 1;
 
 	event = kzalloc(sizeof(*event), GFP_KERNEL);
 	if (!event)
@@ -6063,7 +6067,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
 		goto out_put_cfile;
 	}
 
-	ret = event->register_event(memcg, event->eventfd, buffer);
+	ret = event->register_event(memcg, event->eventfd, buf);
 	if (ret)
 		goto out_put_css;
 
@@ -6076,7 +6080,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
 	fdput(cfile);
 	fdput(efile);
 
-	return 0;
+	return nbytes;
 
 out_put_css:
 	css_put(css);
@@ -6107,13 +6111,13 @@ static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "limit_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),
-		.write_string = mem_cgroup_write,
+		.write = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
 		.name = "soft_limit_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
-		.write_string = mem_cgroup_write,
+		.write = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
@@ -6138,7 +6142,7 @@ static struct cftype mem_cgroup_files[] = {
 	},
 	{
 		.name = "cgroup.event_control",		/* XXX: for compat */
-		.write_string = memcg_write_event_control,
+		.write = memcg_write_event_control,
 		.flags = CFTYPE_NO_PREFIX,
 		.mode = S_IWUGO,
 	},
@@ -6171,7 +6175,7 @@ static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "kmem.limit_in_bytes",
 		.private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
-		.write_string = mem_cgroup_write,
+		.write = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
@@ -6217,7 +6221,7 @@ static struct cftype memsw_cgroup_files[] = {
 	{
 		.name = "memsw.limit_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
-		.write_string = mem_cgroup_write,
+		.write = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 3825f669147b..b990cefd906b 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -185,15 +185,15 @@ static int read_priomap(struct seq_file *sf, void *v)
 	return 0;
 }
 
-static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
-			 char *buffer)
+static ssize_t write_priomap(struct kernfs_open_file *of,
+			     char *buf, size_t nbytes, loff_t off)
 {
 	char devname[IFNAMSIZ + 1];
 	struct net_device *dev;
 	u32 prio;
 	int ret;
 
-	if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
+	if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
 		return -EINVAL;
 
 	dev = dev_get_by_name(&init_net, devname);
@@ -202,11 +202,11 @@ static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
 
 	rtnl_lock();
 
-	ret = netprio_set_prio(css, dev, prio);
+	ret = netprio_set_prio(of_css(of), dev, prio);
 
 	rtnl_unlock();
 	dev_put(dev);
-	return ret;
+	return ret ?: nbytes;
 }
 
 static int update_netprio(const void *v, struct file *file, unsigned n)
@@ -239,7 +239,7 @@ static struct cftype ss_files[] = {
 	{
 		.name = "ifpriomap",
 		.seq_show = read_priomap,
-		.write_string = write_priomap,
+		.write = write_priomap,
 	},
 	{ }	/* terminate */
 };
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index d4f015ad6c84..841fd3fa937a 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -102,17 +102,19 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
 	return 0;
 }
 
-static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
-			    char *buffer)
+static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 	unsigned long long val;
 	int ret = 0;
 
-	switch (cft->private) {
+	buf = strstrip(buf);
+
+	switch (of_cft(of)->private) {
 	case RES_LIMIT:
 		/* see memcontrol.c */
-		ret = res_counter_memparse_write_strategy(buffer, &val);
+		ret = res_counter_memparse_write_strategy(buf, &val);
 		if (ret)
 			break;
 		ret = tcp_update_limit(memcg, val);
@@ -121,7 +123,7 @@ static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
 		ret = -EINVAL;
 		break;
 	}
-	return ret;
+	return ret ?: nbytes;
 }
 
 static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
@@ -193,7 +195,7 @@ static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
 static struct cftype tcp_files[] = {
 	{
 		.name = "kmem.tcp.limit_in_bytes",
-		.write_string = tcp_cgroup_write,
+		.write = tcp_cgroup_write,
 		.read_u64 = tcp_cgroup_read,
 		.private = RES_LIMIT,
 	},
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 9134dbf70d3e..7dbac4061b1c 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -767,27 +767,27 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 	return rc;
 }
 
-static int devcgroup_access_write(struct cgroup_subsys_state *css,
-				  struct cftype *cft, char *buffer)
+static ssize_t devcgroup_access_write(struct kernfs_open_file *of,
+				      char *buf, size_t nbytes, loff_t off)
 {
 	int retval;
 
 	mutex_lock(&devcgroup_mutex);
-	retval = devcgroup_update_access(css_to_devcgroup(css),
-					 cft->private, buffer);
+	retval = devcgroup_update_access(css_to_devcgroup(of_css(of)),
+					 of_cft(of)->private, strstrip(buf));
 	mutex_unlock(&devcgroup_mutex);
-	return retval;
+	return retval ?: nbytes;
 }
 
 static struct cftype dev_cgroup_files[] = {
 	{
 		.name = "allow",
-		.write_string  = devcgroup_access_write,
+		.write = devcgroup_access_write,
 		.private = DEVCG_ALLOW,
 	},
 	{
 		.name = "deny",
-		.write_string = devcgroup_access_write,
+		.write = devcgroup_access_write,
 		.private = DEVCG_DENY,
 	},
 	{
-- 
cgit v1.2.3-71-gd317


From 6770c64e5c8da4705d1f0973bdeb5c2bf4f3a404 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 May 2014 12:16:21 -0400
Subject: cgroup: replace cftype->trigger() with cftype->write()

cftype->trigger() is pointless.  It's trivial to ignore the input
buffer from a regular ->write() operation.  Convert all ->trigger()
users to ->write() and remove ->trigger().

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
---
 include/linux/cgroup.h    |  8 --------
 kernel/cgroup.c           |  5 +----
 mm/hugetlb_cgroup.c       | 16 ++++++++--------
 mm/memcontrol.c           | 34 ++++++++++++++++++----------------
 net/ipv4/tcp_memcontrol.c | 15 ++++++++-------
 5 files changed, 35 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index aecdc84fe128..08eb71ee600b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -499,14 +499,6 @@ struct cftype {
 	int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft,
 			 s64 val);
 
-	/*
-	 * trigger() callback can be used to get some kick from the
-	 * userspace, when the actual string written is not important
-	 * at all. The private field can be used to determine the
-	 * kick type for multiplexing.
-	 */
-	int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
-
 	/*
 	 * write() is the generic write callback which maps directly to
 	 * kernfs write operation and overrides all other operations.
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2a88ce7b24b6..2f16aab03493 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1034,8 +1034,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
 	if (cft->read_u64 || cft->read_s64 || cft->seq_show)
 		mode |= S_IRUGO;
 
-	if (cft->write_u64 || cft->write_s64 || cft->write ||
-	    cft->trigger)
+	if (cft->write_u64 || cft->write_s64 || cft->write)
 		mode |= S_IWUSR;
 
 	return mode;
@@ -2750,8 +2749,6 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 		ret = kstrtoll(buf, 0, &v);
 		if (!ret)
 			ret = cft->write_s64(css, cft, v);
-	} else if (cft->trigger) {
-		ret = cft->trigger(css, (unsigned int)cft->private);
 	} else {
 		ret = -EINVAL;
 	}
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 191de26b0148..a380681ab3cf 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -284,14 +284,14 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
 	return ret ?: nbytes;
 }
 
-static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css,
-				unsigned int event)
+static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
+				    char *buf, size_t nbytes, loff_t off)
 {
 	int idx, name, ret = 0;
-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 
-	idx = MEMFILE_IDX(event);
-	name = MEMFILE_ATTR(event);
+	idx = MEMFILE_IDX(of_cft(of)->private);
+	name = MEMFILE_ATTR(of_cft(of)->private);
 
 	switch (name) {
 	case RES_MAX_USAGE:
@@ -304,7 +304,7 @@ static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css,
 		ret = -EINVAL;
 		break;
 	}
-	return ret;
+	return ret ?: nbytes;
 }
 
 static char *mem_fmt(char *buf, int size, unsigned long hsize)
@@ -344,14 +344,14 @@ static void __init __hugetlb_cgroup_file_init(int idx)
 	cft = &h->cgroup_files[2];
 	snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
 	cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
-	cft->trigger = hugetlb_cgroup_reset;
+	cft->write = hugetlb_cgroup_reset;
 	cft->read_u64 = hugetlb_cgroup_read_u64;
 
 	/* Add the failcntfile */
 	cft = &h->cgroup_files[3];
 	snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
 	cft->private  = MEMFILE_PRIVATE(idx, RES_FAILCNT);
-	cft->trigger  = hugetlb_cgroup_reset;
+	cft->write = hugetlb_cgroup_reset;
 	cft->read_u64 = hugetlb_cgroup_read_u64;
 
 	/* NULL terminate the last cft */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7098a43f7447..b638a79209ee 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4887,14 +4887,15 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
 	return 0;
 }
 
-static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css,
-					unsigned int event)
+static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of,
+					    char *buf, size_t nbytes,
+					    loff_t off)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 
 	if (mem_cgroup_is_root(memcg))
 		return -EINVAL;
-	return mem_cgroup_force_empty(memcg);
+	return mem_cgroup_force_empty(memcg) ?: nbytes;
 }
 
 static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css,
@@ -5220,14 +5221,15 @@ out:
 	*memsw_limit = min_memsw_limit;
 }
 
-static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
+static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf,
+				size_t nbytes, loff_t off)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 	int name;
 	enum res_type type;
 
-	type = MEMFILE_TYPE(event);
-	name = MEMFILE_ATTR(event);
+	type = MEMFILE_TYPE(of_cft(of)->private);
+	name = MEMFILE_ATTR(of_cft(of)->private);
 
 	switch (name) {
 	case RES_MAX_USAGE:
@@ -5252,7 +5254,7 @@ static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
 		break;
 	}
 
-	return 0;
+	return nbytes;
 }
 
 static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css,
@@ -6105,7 +6107,7 @@ static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "max_usage_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
@@ -6123,7 +6125,7 @@ static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "failcnt",
 		.private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
@@ -6132,7 +6134,7 @@ static struct cftype mem_cgroup_files[] = {
 	},
 	{
 		.name = "force_empty",
-		.trigger = mem_cgroup_force_empty_write,
+		.write = mem_cgroup_force_empty_write,
 	},
 	{
 		.name = "use_hierarchy",
@@ -6186,13 +6188,13 @@ static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "kmem.failcnt",
 		.private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
 		.name = "kmem.max_usage_in_bytes",
 		.private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 #ifdef CONFIG_SLABINFO
@@ -6215,7 +6217,7 @@ static struct cftype memsw_cgroup_files[] = {
 	{
 		.name = "memsw.max_usage_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
@@ -6227,7 +6229,7 @@ static struct cftype memsw_cgroup_files[] = {
 	{
 		.name = "memsw.failcnt",
 		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{ },	/* terminate */
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 841fd3fa937a..f7a2ec3ac584 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -170,17 +170,18 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
 	return val;
 }
 
-static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
+static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
 {
 	struct mem_cgroup *memcg;
 	struct cg_proto *cg_proto;
 
-	memcg = mem_cgroup_from_css(css);
+	memcg = mem_cgroup_from_css(of_css(of));
 	cg_proto = tcp_prot.proto_cgroup(memcg);
 	if (!cg_proto)
-		return 0;
+		return nbytes;
 
-	switch (event) {
+	switch (of_cft(of)->private) {
 	case RES_MAX_USAGE:
 		res_counter_reset_max(&cg_proto->memory_allocated);
 		break;
@@ -189,7 +190,7 @@ static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
 		break;
 	}
 
-	return 0;
+	return nbytes;
 }
 
 static struct cftype tcp_files[] = {
@@ -207,13 +208,13 @@ static struct cftype tcp_files[] = {
 	{
 		.name = "kmem.tcp.failcnt",
 		.private = RES_FAILCNT,
-		.trigger = tcp_cgroup_reset,
+		.write = tcp_cgroup_reset,
 		.read_u64 = tcp_cgroup_read,
 	},
 	{
 		.name = "kmem.tcp.max_usage_in_bytes",
 		.private = RES_MAX_USAGE,
-		.trigger = tcp_cgroup_reset,
+		.write = tcp_cgroup_reset,
 		.read_u64 = tcp_cgroup_read,
 	},
 	{ }	/* terminate */
-- 
cgit v1.2.3-71-gd317


From b7fc5ad235936379fae67a9f7b50bb53487a1a3a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 May 2014 12:16:22 -0400
Subject: cgroup: remove cgroup->control_kn

Now that cgroup_subtree_control_write() has access to the associated
kernfs_open_file and thus the kernfs_node, there's no need to cache it
in cgroup->control_kn on creation.  Remove cgroup->control_kn and use
@of->kn directly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 1 -
 kernel/cgroup.c        | 8 +++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 08eb71ee600b..aa7353deaaf3 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -177,7 +177,6 @@ struct cgroup {
 
 	struct cgroup *parent;		/* my parent */
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
-	struct kernfs_node *control_kn;	/* kn for "cgroup.subtree_control" */
 	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
 
 	/*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9a48c117ebf1..94d259bcd2b9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2580,7 +2580,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 	 * active_ref protection.
 	 */
 	cgroup_get(cgrp);
-	kernfs_break_active_protection(cgrp->control_kn);
+	kernfs_break_active_protection(of->kn);
 
 	mutex_lock(&cgroup_tree_mutex);
 
@@ -2697,7 +2697,7 @@ out_unlock:
 out_unlock_tree:
 	mutex_unlock(&cgroup_tree_mutex);
 out_unbreak:
-	kernfs_unbreak_active_protection(cgrp->control_kn);
+	kernfs_unbreak_active_protection(of->kn);
 	cgroup_put(cgrp);
 	return ret ?: nbytes;
 
@@ -2887,9 +2887,7 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
 		return ret;
 	}
 
-	if (cft->seq_show == cgroup_subtree_control_show)
-		cgrp->control_kn = kn;
-	else if (cft->seq_show == cgroup_populated_show)
+	if (cft->seq_show == cgroup_populated_show)
 		cgrp->populated_kn = kn;
 	return 0;
 }
-- 
cgit v1.2.3-71-gd317


From 7ad24ea4bf620a32631d7b3069c3e30c078b0c3e Mon Sep 17 00:00:00 2001
From: Wilfried Klaebe <w-lkml@lebenslange-mailadresse.de>
Date: Sun, 11 May 2014 00:12:32 +0000
Subject: net: get rid of SET_ETHTOOL_OPS

net: get rid of SET_ETHTOOL_OPS

Dave Miller mentioned he'd like to see SET_ETHTOOL_OPS gone.
This does that.

Mostly done via coccinelle script:
@@
struct ethtool_ops *ops;
struct net_device *dev;
@@
-       SET_ETHTOOL_OPS(dev, ops);
+       dev->ethtool_ops = ops;

Compile tested only, but I'd seriously wonder if this broke anything.

Suggested-by: Dave Miller <davem@davemloft.net>
Signed-off-by: Wilfried Klaebe <w-lkml@lebenslange-mailadresse.de>
Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_ethtool.c            | 2 +-
 drivers/net/ethernet/3com/3c509.c                       | 2 +-
 drivers/net/ethernet/3com/3c589_cs.c                    | 2 +-
 drivers/net/ethernet/3com/typhoon.c                     | 2 +-
 drivers/net/ethernet/adaptec/starfire.c                 | 2 +-
 drivers/net/ethernet/alteon/acenic.c                    | 2 +-
 drivers/net/ethernet/altera/altera_tse_ethtool.c        | 2 +-
 drivers/net/ethernet/amd/amd8111e.c                     | 2 +-
 drivers/net/ethernet/amd/au1000_eth.c                   | 2 +-
 drivers/net/ethernet/amd/nmclan_cs.c                    | 2 +-
 drivers/net/ethernet/atheros/alx/main.c                 | 2 +-
 drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c      | 2 +-
 drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c      | 2 +-
 drivers/net/ethernet/atheros/atlx/atl2.c                | 2 +-
 drivers/net/ethernet/broadcom/b44.c                     | 2 +-
 drivers/net/ethernet/broadcom/bcm63xx_enet.c            | 4 ++--
 drivers/net/ethernet/broadcom/bcmsysport.c              | 2 +-
 drivers/net/ethernet/broadcom/bgmac.c                   | 2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c     | 6 ++----
 drivers/net/ethernet/broadcom/genet/bcmgenet.c          | 2 +-
 drivers/net/ethernet/brocade/bna/bnad_ethtool.c         | 2 +-
 drivers/net/ethernet/calxeda/xgmac.c                    | 2 +-
 drivers/net/ethernet/chelsio/cxgb/cxgb2.c               | 2 +-
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c         | 2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c         | 2 +-
 drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c     | 2 +-
 drivers/net/ethernet/cisco/enic/enic_ethtool.c          | 2 +-
 drivers/net/ethernet/dec/tulip/tulip_core.c             | 2 +-
 drivers/net/ethernet/dlink/dl2k.c                       | 2 +-
 drivers/net/ethernet/dlink/sundance.c                   | 2 +-
 drivers/net/ethernet/emulex/benet/be_main.c             | 2 +-
 drivers/net/ethernet/faraday/ftgmac100.c                | 2 +-
 drivers/net/ethernet/faraday/ftmac100.c                 | 2 +-
 drivers/net/ethernet/freescale/ucc_geth_ethtool.c       | 2 +-
 drivers/net/ethernet/fujitsu/fmvj18x_cs.c               | 2 +-
 drivers/net/ethernet/ibm/ehea/ehea_ethtool.c            | 2 +-
 drivers/net/ethernet/ibm/emac/core.c                    | 2 +-
 drivers/net/ethernet/icplus/ipg.c                       | 2 +-
 drivers/net/ethernet/intel/e100.c                       | 2 +-
 drivers/net/ethernet/intel/e1000/e1000_ethtool.c        | 2 +-
 drivers/net/ethernet/intel/e1000e/ethtool.c             | 2 +-
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c          | 2 +-
 drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c      | 2 +-
 drivers/net/ethernet/intel/igb/igb_ethtool.c            | 2 +-
 drivers/net/ethernet/intel/igbvf/ethtool.c              | 2 +-
 drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c          | 2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c        | 2 +-
 drivers/net/ethernet/intel/ixgbevf/ethtool.c            | 2 +-
 drivers/net/ethernet/marvell/mv643xx_eth.c              | 2 +-
 drivers/net/ethernet/marvell/mvneta.c                   | 2 +-
 drivers/net/ethernet/marvell/pxa168_eth.c               | 2 +-
 drivers/net/ethernet/marvell/sky2.c                     | 2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c          | 2 +-
 drivers/net/ethernet/micrel/ks8695net.c                 | 6 +++---
 drivers/net/ethernet/micrel/ks8851.c                    | 2 +-
 drivers/net/ethernet/micrel/ksz884x.c                   | 2 +-
 drivers/net/ethernet/microchip/enc28j60.c               | 2 +-
 drivers/net/ethernet/myricom/myri10ge/myri10ge.c        | 2 +-
 drivers/net/ethernet/natsemi/natsemi.c                  | 2 +-
 drivers/net/ethernet/natsemi/ns83820.c                  | 2 +-
 drivers/net/ethernet/neterion/s2io.c                    | 2 +-
 drivers/net/ethernet/neterion/vxge/vxge-ethtool.c       | 2 +-
 drivers/net/ethernet/nvidia/forcedeth.c                 | 2 +-
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c | 2 +-
 drivers/net/ethernet/packetengines/hamachi.c            | 6 ++----
 drivers/net/ethernet/packetengines/yellowfin.c          | 2 +-
 drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c    | 2 +-
 drivers/net/ethernet/qlogic/qla3xxx.c                   | 2 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c        | 8 +++-----
 drivers/net/ethernet/qlogic/qlge/qlge_main.c            | 2 +-
 drivers/net/ethernet/realtek/r8169.c                    | 2 +-
 drivers/net/ethernet/renesas/sh_eth.c                   | 2 +-
 drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c      | 2 +-
 drivers/net/ethernet/sfc/efx.c                          | 2 +-
 drivers/net/ethernet/sis/sis190.c                       | 2 +-
 drivers/net/ethernet/smsc/smc91c92_cs.c                 | 2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c    | 2 +-
 drivers/net/ethernet/tehuti/tehuti.c                    | 2 +-
 drivers/net/ethernet/ti/cpsw.c                          | 4 ++--
 drivers/net/ethernet/ti/davinci_emac.c                  | 2 +-
 drivers/net/hyperv/netvsc_drv.c                         | 2 +-
 drivers/net/ntb_netdev.c                                | 2 +-
 drivers/net/rionet.c                                    | 2 +-
 drivers/net/usb/catc.c                                  | 2 +-
 drivers/net/usb/hso.c                                   | 2 +-
 drivers/net/usb/ipheth.c                                | 2 +-
 drivers/net/usb/kaweth.c                                | 2 +-
 drivers/net/usb/pegasus.c                               | 2 +-
 drivers/net/usb/r8152.c                                 | 2 +-
 drivers/net/usb/rtl8150.c                               | 2 +-
 drivers/net/virtio_net.c                                | 2 +-
 drivers/net/vmxnet3/vmxnet3_ethtool.c                   | 2 +-
 drivers/net/vxlan.c                                     | 2 +-
 drivers/net/wireless/hostap/hostap_main.c               | 2 +-
 drivers/net/xen-netback/interface.c                     | 2 +-
 drivers/net/xen-netfront.c                              | 2 +-
 drivers/s390/net/qeth_l2_main.c                         | 7 +++----
 drivers/s390/net/qeth_l3_main.c                         | 2 +-
 drivers/staging/et131x/et131x.c                         | 2 +-
 drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c        | 2 +-
 drivers/staging/netlogic/xlr_net.c                      | 2 +-
 drivers/staging/octeon/ethernet.c                       | 2 +-
 drivers/usb/gadget/u_ether.c                            | 4 ++--
 include/linux/netdevice.h                               | 3 ---
 net/batman-adv/soft-interface.c                         | 2 +-
 net/bridge/br_device.c                                  | 2 +-
 net/dsa/slave.c                                         | 2 +-
 net/openvswitch/vport-internal_dev.c                    | 2 +-
 108 files changed, 118 insertions(+), 128 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index c4b3940845e6..078cadd6c797 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -105,5 +105,5 @@ static const struct ethtool_ops ipoib_ethtool_ops = {
 
 void ipoib_set_ethtool_ops(struct net_device *dev)
 {
-	SET_ETHTOOL_OPS(dev, &ipoib_ethtool_ops);
+	dev->ethtool_ops = &ipoib_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index 35df0b9e6848..a968654b631d 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -534,7 +534,7 @@ static int el3_common_init(struct net_device *dev)
 	/* The EL3-specific entries in the device structure. */
 	dev->netdev_ops = &netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 
 	err = register_netdev(dev);
 	if (err) {
diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c
index 063557e037f2..f18647c23559 100644
--- a/drivers/net/ethernet/3com/3c589_cs.c
+++ b/drivers/net/ethernet/3com/3c589_cs.c
@@ -218,7 +218,7 @@ static int tc589_probe(struct pcmcia_device *link)
 	dev->netdev_ops = &el3_netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 
-	SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+	dev->ethtool_ops = &netdev_ethtool_ops;
 
 	return tc589_config(link);
 }
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index 465cc7108d8a..e13b04624ded 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -2435,7 +2435,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	netif_napi_add(dev, &tp->napi, typhoon_poll, 16);
 	dev->watchdog_timeo	= TX_TIMEOUT;
 
-	SET_ETHTOOL_OPS(dev, &typhoon_ethtool_ops);
+	dev->ethtool_ops = &typhoon_ethtool_ops;
 
 	/* We can handle scatter gather, up to 16 entries, and
 	 * we can do IP checksumming (only version 4, doh...)
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index 171d73c1d3c2..40dbbf740331 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -784,7 +784,7 @@ static int starfire_init_one(struct pci_dev *pdev,
 
 	dev->netdev_ops = &netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 
 	netif_napi_add(dev, &np->napi, netdev_poll, max_interrupt_work);
 
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index 1517e9df5ba1..9a6991be9749 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -476,7 +476,7 @@ static int acenic_probe_one(struct pci_dev *pdev,
 	dev->watchdog_timeo = 5*HZ;
 
 	dev->netdev_ops = &ace_netdev_ops;
-	SET_ETHTOOL_OPS(dev, &ace_ethtool_ops);
+	dev->ethtool_ops = &ace_ethtool_ops;
 
 	/* we only display this string ONCE */
 	if (!boards_found)
diff --git a/drivers/net/ethernet/altera/altera_tse_ethtool.c b/drivers/net/ethernet/altera/altera_tse_ethtool.c
index 76133caffa78..d817e285b266 100644
--- a/drivers/net/ethernet/altera/altera_tse_ethtool.c
+++ b/drivers/net/ethernet/altera/altera_tse_ethtool.c
@@ -237,5 +237,5 @@ static const struct ethtool_ops tse_ethtool_ops = {
 
 void altera_tse_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &tse_ethtool_ops);
+	netdev->ethtool_ops = &tse_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 26efaaa5e73f..068dc7cad5fa 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -1900,7 +1900,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev,
 
 	/* Initialize driver entry points */
 	dev->netdev_ops = &amd8111e_netdev_ops;
-	SET_ETHTOOL_OPS(dev, &ops);
+	dev->ethtool_ops = &ops;
 	dev->irq =pdev->irq;
 	dev->watchdog_timeo = AMD8111E_TX_TIMEOUT;
 	netif_napi_add(dev, &lp->napi, amd8111e_rx_poll, 32);
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index a2bd91e3d302..a78e4c136959 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -1229,7 +1229,7 @@ static int au1000_probe(struct platform_device *pdev)
 	dev->base_addr = base->start;
 	dev->irq = irq;
 	dev->netdev_ops = &au1000_netdev_ops;
-	SET_ETHTOOL_OPS(dev, &au1000_ethtool_ops);
+	dev->ethtool_ops = &au1000_ethtool_ops;
 	dev->watchdog_timeo = ETH_TX_TIMEOUT;
 
 	/*
diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c
index 08569fe2b182..abf3b1581c82 100644
--- a/drivers/net/ethernet/amd/nmclan_cs.c
+++ b/drivers/net/ethernet/amd/nmclan_cs.c
@@ -457,7 +457,7 @@ static int nmclan_probe(struct pcmcia_device *link)
     lp->tx_free_frames=AM2150_MAX_TX_FRAMES;
 
     dev->netdev_ops = &mace_netdev_ops;
-    SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+    dev->ethtool_ops = &netdev_ethtool_ops;
     dev->watchdog_timeo = TX_TIMEOUT;
 
     return nmclan_config(link);
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 17bb9ce96260..49faa97a30c3 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1302,7 +1302,7 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	netdev->netdev_ops = &alx_netdev_ops;
-	SET_ETHTOOL_OPS(netdev, &alx_ethtool_ops);
+	netdev->ethtool_ops = &alx_ethtool_ops;
 	netdev->irq = pdev->irq;
 	netdev->watchdog_timeo = ALX_WATCHDOG_TIME;
 
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
index 859ea844ba0f..ecacaaeb2b92 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
@@ -305,5 +305,5 @@ static const struct ethtool_ops atl1c_ethtool_ops = {
 
 void atl1c_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &atl1c_ethtool_ops);
+	netdev->ethtool_ops = &atl1c_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
index 82b23861bf55..206e9b7be431 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
@@ -388,5 +388,5 @@ static const struct ethtool_ops atl1e_ethtool_ops = {
 
 void atl1e_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &atl1e_ethtool_ops);
+	netdev->ethtool_ops = &atl1e_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 78befb522a52..2587fed7b02c 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -1396,7 +1396,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	atl2_setup_pcicmd(pdev);
 
 	netdev->netdev_ops = &atl2_netdev_ops;
-	SET_ETHTOOL_OPS(netdev, &atl2_ethtool_ops);
+	netdev->ethtool_ops = &atl2_ethtool_ops;
 	netdev->watchdog_timeo = 5 * HZ;
 	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
 
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 05ba62589017..ca5a20a48b14 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -2380,7 +2380,7 @@ static int b44_init_one(struct ssb_device *sdev,
 	netif_napi_add(dev, &bp->napi, b44_poll, 64);
 	dev->watchdog_timeo = B44_TX_TIMEOUT;
 	dev->irq = sdev->irq;
-	SET_ETHTOOL_OPS(dev, &b44_ethtool_ops);
+	dev->ethtool_ops = &b44_ethtool_ops;
 
 	err = ssb_bus_powerup(sdev->bus, 0);
 	if (err) {
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 8db34d389675..3e8d1a88ed3d 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -1897,7 +1897,7 @@ static int bcm_enet_probe(struct platform_device *pdev)
 	dev->netdev_ops = &bcm_enet_ops;
 	netif_napi_add(dev, &priv->napi, bcm_enet_poll, 16);
 
-	SET_ETHTOOL_OPS(dev, &bcm_enet_ethtool_ops);
+	dev->ethtool_ops = &bcm_enet_ethtool_ops;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	ret = register_netdev(dev);
@@ -2783,7 +2783,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
 	/* register netdevice */
 	dev->netdev_ops = &bcm_enetsw_ops;
 	netif_napi_add(dev, &priv->napi, bcm_enet_poll, 16);
-	SET_ETHTOOL_OPS(dev, &bcm_enetsw_ethtool_ops);
+	dev->ethtool_ops = &bcm_enetsw_ethtool_ops;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	spin_lock_init(&priv->enetsw_mdio_lock);
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 4dc8d1e9829b..56b74a495181 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1540,7 +1540,7 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	dev_set_drvdata(&pdev->dev, dev);
-	SET_ETHTOOL_OPS(dev, &bcm_sysport_ethtool_ops);
+	dev->ethtool_ops = &bcm_sysport_ethtool_ops;
 	dev->netdev_ops = &bcm_sysport_netdev_ops;
 	netif_napi_add(dev, &priv->napi, bcm_sysport_poll, 64);
 
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 0297a79a38e1..05c6af6c418f 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1436,7 +1436,7 @@ static int bgmac_probe(struct bcma_device *core)
 		return -ENOMEM;
 	net_dev->netdev_ops = &bgmac_netdev_ops;
 	net_dev->irq = core->irq;
-	SET_ETHTOOL_OPS(net_dev, &bgmac_ethtool_ops);
+	net_dev->ethtool_ops = &bgmac_ethtool_ops;
 	bgmac = netdev_priv(net_dev);
 	bgmac->net_dev = net_dev;
 	bgmac->core = core;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index b6de05e3149b..03224090ecf9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -3506,8 +3506,6 @@ static const struct ethtool_ops bnx2x_vf_ethtool_ops = {
 
 void bnx2x_set_ethtool_ops(struct bnx2x *bp, struct net_device *netdev)
 {
-	if (IS_PF(bp))
-		SET_ETHTOOL_OPS(netdev, &bnx2x_ethtool_ops);
-	else /* vf */
-		SET_ETHTOOL_OPS(netdev, &bnx2x_vf_ethtool_ops);
+	netdev->ethtool_ops = (IS_PF(bp)) ?
+		&bnx2x_ethtool_ops : &bnx2x_vf_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 0966bd04375f..5ba1cfbd60da 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2481,7 +2481,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	dev_set_drvdata(&pdev->dev, dev);
 	ether_addr_copy(dev->dev_addr, macaddr);
 	dev->watchdog_timeo = 2 * HZ;
-	SET_ETHTOOL_OPS(dev, &bcmgenet_ethtool_ops);
+	dev->ethtool_ops = &bcmgenet_ethtool_ops;
 	dev->netdev_ops = &bcmgenet_netdev_ops;
 	netif_napi_add(dev, &priv->napi, bcmgenet_poll, 64);
 
diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
index f9e150825bb5..adca62b72837 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
@@ -1137,5 +1137,5 @@ static const struct ethtool_ops bnad_ethtool_ops = {
 void
 bnad_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &bnad_ethtool_ops);
+	netdev->ethtool_ops = &bnad_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index 521dfea44b83..25d6b2a10e4e 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -1737,7 +1737,7 @@ static int xgmac_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, ndev);
 	ether_setup(ndev);
 	ndev->netdev_ops = &xgmac_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &xgmac_ethtool_ops);
+	ndev->ethtool_ops = &xgmac_ethtool_ops;
 	spin_lock_init(&priv->stats_lock);
 	INIT_WORK(&priv->tx_timeout_work, xgmac_tx_timeout_work);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 0fe7ff750d77..c1b2c1dbf015 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -1100,7 +1100,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 		netif_napi_add(netdev, &adapter->napi, t1_poll, 64);
 
-		SET_ETHTOOL_OPS(netdev, &t1_ethtool_ops);
+		netdev->ethtool_ops = &t1_ethtool_ops;
 	}
 
 	if (t1_init_sw_modules(adapter, bi) < 0) {
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 07bbb711b7e5..3ed507947248 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -3291,7 +3291,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			netdev->features |= NETIF_F_HIGHDMA;
 
 		netdev->netdev_ops = &cxgb_netdev_ops;
-		SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
+		netdev->ethtool_ops = &cxgb_ethtool_ops;
 	}
 
 	pci_set_drvdata(pdev, adapter);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 0f1e886d89e3..266a5bc6aedf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -6083,7 +6083,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->priv_flags |= IFF_UNICAST_FLT;
 
 		netdev->netdev_ops = &cxgb4_netdev_ops;
-		SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
+		netdev->ethtool_ops = &cxgb_ethtool_ops;
 	}
 
 	pci_set_drvdata(pdev, adapter);
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 52859288de7b..ff1cdd1788b5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2664,7 +2664,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 		netdev->priv_flags |= IFF_UNICAST_FLT;
 
 		netdev->netdev_ops = &cxgb4vf_netdev_ops;
-		SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops);
+		netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
 
 		/*
 		 * Initialize the hardware/software state for the port.
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index 47e3562f4866..58a8c67638e3 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -253,5 +253,5 @@ static const struct ethtool_ops enic_ethtool_ops = {
 
 void enic_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &enic_ethtool_ops);
+	netdev->ethtool_ops = &enic_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index 1642de78aac8..861660841ce2 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -1703,7 +1703,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 #ifdef CONFIG_TULIP_NAPI
 	netif_napi_add(dev, &tp->napi, tulip_poll, 16);
 #endif
-	SET_ETHTOOL_OPS(dev, &ops);
+	dev->ethtool_ops = &ops;
 
 	if (register_netdev(dev))
 		goto err_out_free_ring;
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index 4fb756d219f7..2324f2ddfd48 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -227,7 +227,7 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 	dev->netdev_ops = &netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 #if 0
 	dev->features = NETIF_F_IP_CSUM;
 #endif
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index d9e5ca0d48c1..433c1e185442 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -577,7 +577,7 @@ static int sundance_probe1(struct pci_dev *pdev,
 
 	/* The chip-specific entries in the device structure. */
 	dev->netdev_ops = &netdev_ops;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 
 	pci_set_drvdata(pdev, dev);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 3f04356afa82..dcc5e5c69743 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4304,7 +4304,7 @@ static void be_netdev_init(struct net_device *netdev)
 
 	netdev->netdev_ops = &be_netdev_ops;
 
-	SET_ETHTOOL_OPS(netdev, &be_ethtool_ops);
+	netdev->ethtool_ops = &be_ethtool_ops;
 }
 
 static void be_unmap_pci_bars(struct be_adapter *adapter)
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 68069eabc4f8..c77fa4a69844 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1210,7 +1210,7 @@ static int ftgmac100_probe(struct platform_device *pdev)
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 
-	SET_ETHTOOL_OPS(netdev, &ftgmac100_ethtool_ops);
+	netdev->ethtool_ops = &ftgmac100_ethtool_ops;
 	netdev->netdev_ops = &ftgmac100_netdev_ops;
 	netdev->features = NETIF_F_IP_CSUM | NETIF_F_GRO;
 
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 8be5b40c0a12..4ff1adc6bfca 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1085,7 +1085,7 @@ static int ftmac100_probe(struct platform_device *pdev)
 	}
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
-	SET_ETHTOOL_OPS(netdev, &ftmac100_ethtool_ops);
+	netdev->ethtool_ops = &ftmac100_ethtool_ops;
 	netdev->netdev_ops = &ftmac100_netdev_ops;
 
 	platform_set_drvdata(pdev, netdev);
diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
index 413329eff2ff..cc83350d56ba 100644
--- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
+++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
@@ -417,5 +417,5 @@ static const struct ethtool_ops uec_ethtool_ops = {
 
 void uec_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &uec_ethtool_ops);
+	netdev->ethtool_ops = &uec_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index 7becab1aa3e4..cfe7a7431730 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
@@ -256,7 +256,7 @@ static int fmvj18x_probe(struct pcmcia_device *link)
     dev->netdev_ops = &fjn_netdev_ops;
     dev->watchdog_timeo = TX_TIMEOUT;
 
-    SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+    dev->ethtool_ops = &netdev_ethtool_ops;
 
     return fmvj18x_config(link);
 } /* fmvj18x_attach */
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c
index 95837b99a464..6055e3eaf49c 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c
@@ -278,5 +278,5 @@ static const struct ethtool_ops ehea_ethtool_ops = {
 
 void ehea_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &ehea_ethtool_ops);
+	netdev->ethtool_ops = &ehea_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index ae342fdb42c8..87bd953cc2ee 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -2879,7 +2879,7 @@ static int emac_probe(struct platform_device *ofdev)
 		dev->commac.ops = &emac_commac_sg_ops;
 	} else
 		ndev->netdev_ops = &emac_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &emac_ethtool_ops);
+	ndev->ethtool_ops = &emac_ethtool_ops;
 
 	netif_carrier_off(ndev);
 
diff --git a/drivers/net/ethernet/icplus/ipg.c b/drivers/net/ethernet/icplus/ipg.c
index 25045ae07171..5727779a7df2 100644
--- a/drivers/net/ethernet/icplus/ipg.c
+++ b/drivers/net/ethernet/icplus/ipg.c
@@ -2245,7 +2245,7 @@ static int ipg_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 */
 	dev->netdev_ops = &ipg_netdev_ops;
 	SET_NETDEV_DEV(dev, &pdev->dev);
-	SET_ETHTOOL_OPS(dev, &ipg_ethtool_ops);
+	dev->ethtool_ops = &ipg_ethtool_ops;
 
 	rc = pci_request_regions(pdev, DRV_NAME);
 	if (rc)
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index b56461ce674c..9d979d7debef 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -2854,7 +2854,7 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	netdev->hw_features |= NETIF_F_RXALL;
 
 	netdev->netdev_ops = &e100_netdev_ops;
-	SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops);
+	netdev->ethtool_ops = &e100_ethtool_ops;
 	netdev->watchdog_timeo = E100_WATCHDOG_PERIOD;
 	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
 
diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index 73a8aeefb92a..341889a4ef7f 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -1905,5 +1905,5 @@ static const struct ethtool_ops e1000_ethtool_ops = {
 
 void e1000_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &e1000_ethtool_ops);
+	netdev->ethtool_ops = &e1000_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 4e5ad7ebe1f2..e9a48bb5caac 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -2318,5 +2318,5 @@ static const struct ethtool_ops e1000_ethtool_ops = {
 
 void e1000e_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &e1000_ethtool_ops);
+	netdev->ethtool_ops = &e1000_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 0cf47c958081..f62929419a09 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1700,5 +1700,5 @@ static const struct ethtool_ops i40e_ethtool_ops = {
 
 void i40e_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &i40e_ethtool_ops);
+	netdev->ethtool_ops = &i40e_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index a46be016039e..77e786d2d0e0 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -705,5 +705,5 @@ static struct ethtool_ops i40evf_ethtool_ops = {
  **/
 void i40evf_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &i40evf_ethtool_ops);
+	netdev->ethtool_ops = &i40evf_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 333a2b0bbada..a84297c85fb1 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -3035,5 +3035,5 @@ static const struct ethtool_ops igb_ethtool_ops = {
 
 void igb_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &igb_ethtool_ops);
+	netdev->ethtool_ops = &igb_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c
index 90eef07943f4..f58170bae18b 100644
--- a/drivers/net/ethernet/intel/igbvf/ethtool.c
+++ b/drivers/net/ethernet/intel/igbvf/ethtool.c
@@ -476,5 +476,5 @@ static const struct ethtool_ops igbvf_ethtool_ops = {
 
 void igbvf_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &igbvf_ethtool_ops);
+	netdev->ethtool_ops = &igbvf_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
index dbb7dd2f8e36..1da2d987d370 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
@@ -656,5 +656,5 @@ static const struct ethtool_ops ixgb_ethtool_ops = {
 
 void ixgb_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &ixgb_ethtool_ops);
+	netdev->ethtool_ops = &ixgb_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 6c55c14d082a..31d7268401e7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -3099,5 +3099,5 @@ static const struct ethtool_ops ixgbe_ethtool_ops = {
 
 void ixgbe_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &ixgbe_ethtool_ops);
+	netdev->ethtool_ops = &ixgbe_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 1baecb60f065..a757f0734719 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -813,5 +813,5 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = {
 
 void ixgbevf_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &ixgbevf_ethtool_ops);
+	netdev->ethtool_ops = &ixgbevf_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index b7b8d74c22d9..df1d1b97187f 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2889,7 +2889,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
 	if (err)
 		goto out;
 
-	SET_ETHTOOL_OPS(dev, &mv643xx_eth_ethtool_ops);
+	dev->ethtool_ops = &mv643xx_eth_ethtool_ops;
 
 	init_pscr(mp, pd->speed, pd->duplex);
 
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 14786c8bf99e..72bc47f2d2e9 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2813,7 +2813,7 @@ static int mvneta_probe(struct platform_device *pdev)
 	dev->watchdog_timeo = 5 * HZ;
 	dev->netdev_ops = &mvneta_netdev_ops;
 
-	SET_ETHTOOL_OPS(dev, &mvneta_eth_tool_ops);
+	dev->ethtool_ops = &mvneta_eth_tool_ops;
 
 	pp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index b358c2f6f4bd..8f5aa7c62b18 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1488,7 +1488,7 @@ static int pxa168_eth_probe(struct platform_device *pdev)
 	dev->netdev_ops = &pxa168_eth_netdev_ops;
 	dev->watchdog_timeo = 2 * HZ;
 	dev->base_addr = 0;
-	SET_ETHTOOL_OPS(dev, &pxa168_ethtool_ops);
+	dev->ethtool_ops = &pxa168_ethtool_ops;
 
 	INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task);
 
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index b81106451a0a..69693384b58c 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -4760,7 +4760,7 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port,
 
 	SET_NETDEV_DEV(dev, &hw->pdev->dev);
 	dev->irq = hw->pdev->irq;
-	SET_ETHTOOL_OPS(dev, &sky2_ethtool_ops);
+	dev->ethtool_ops = &sky2_ethtool_ops;
 	dev->watchdog_timeo = TX_WATCHDOG;
 	dev->netdev_ops = &sky2_netdev_ops[port];
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index fba3c8e77626..79c6f467d17e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2539,7 +2539,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	netif_set_real_num_tx_queues(dev, priv->tx_ring_num);
 	netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
 
-	SET_ETHTOOL_OPS(dev, &mlx4_en_ethtool_ops);
+	dev->ethtool_ops = &mlx4_en_ethtool_ops;
 
 	/*
 	 * Set driver features
diff --git a/drivers/net/ethernet/micrel/ks8695net.c b/drivers/net/ethernet/micrel/ks8695net.c
index 16435b3cfa9f..6c7c78baedca 100644
--- a/drivers/net/ethernet/micrel/ks8695net.c
+++ b/drivers/net/ethernet/micrel/ks8695net.c
@@ -1504,15 +1504,15 @@ ks8695_probe(struct platform_device *pdev)
 	if (ksp->phyiface_regs && ksp->link_irq == -1) {
 		ks8695_init_switch(ksp);
 		ksp->dtype = KS8695_DTYPE_LAN;
-		SET_ETHTOOL_OPS(ndev, &ks8695_ethtool_ops);
+		ndev->ethtool_ops = &ks8695_ethtool_ops;
 	} else if (ksp->phyiface_regs && ksp->link_irq != -1) {
 		ks8695_init_wan_phy(ksp);
 		ksp->dtype = KS8695_DTYPE_WAN;
-		SET_ETHTOOL_OPS(ndev, &ks8695_wan_ethtool_ops);
+		ndev->ethtool_ops = &ks8695_wan_ethtool_ops;
 	} else {
 		/* No initialisation since HPNA does not have a PHY */
 		ksp->dtype = KS8695_DTYPE_HPNA;
-		SET_ETHTOOL_OPS(ndev, &ks8695_ethtool_ops);
+		ndev->ethtool_ops = &ks8695_ethtool_ops;
 	}
 
 	/* And bring up the net_device with the net core */
diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index e0c92e0e5e1d..13767eb36a48 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -1471,7 +1471,7 @@ static int ks8851_probe(struct spi_device *spi)
 
 	skb_queue_head_init(&ks->txq);
 
-	SET_ETHTOOL_OPS(ndev, &ks8851_ethtool_ops);
+	ndev->ethtool_ops = &ks8851_ethtool_ops;
 	SET_NETDEV_DEV(ndev, &spi->dev);
 
 	spi_set_drvdata(spi, ks);
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 14ac0e2bc09f..4b9592c1fb40 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -7106,7 +7106,7 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id)
 		}
 
 		dev->netdev_ops = &netdev_ops;
-		SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+		dev->ethtool_ops = &netdev_ethtool_ops;
 		if (register_netdev(dev))
 			goto pcidev_init_reg_err;
 		port_set_power_saving(port, true);
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index c7b40aa21f22..b1b5f66b8b69 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1593,7 +1593,7 @@ static int enc28j60_probe(struct spi_device *spi)
 	dev->irq = spi->irq;
 	dev->netdev_ops = &enc28j60_netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(dev, &enc28j60_ethtool_ops);
+	dev->ethtool_ops = &enc28j60_ethtool_ops;
 
 	enc28j60_lowpower(priv, true);
 
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 130f6b204efa..f3d5d79f1cd1 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -4112,7 +4112,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	setup_timer(&mgp->watchdog_timer, myri10ge_watchdog_timer,
 		    (unsigned long)mgp);
 
-	SET_ETHTOOL_OPS(netdev, &myri10ge_ethtool_ops);
+	netdev->ethtool_ops = &myri10ge_ethtool_ops;
 	INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog);
 	status = register_netdev(netdev);
 	if (status != 0) {
diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index 64ec2a437f46..291fba8b9f07 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -927,7 +927,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->netdev_ops = &natsemi_netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 
 	if (mtu)
 		dev->mtu = mtu;
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index dbccf1de49ec..19bb8244b9e3 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -2030,7 +2030,7 @@ static int ns83820_init_one(struct pci_dev *pci_dev,
 		pci_dev->subsystem_vendor, pci_dev->subsystem_device);
 
 	ndev->netdev_ops = &netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &ops);
+	ndev->ethtool_ops = &ops;
 	ndev->watchdog_timeo = 5 * HZ;
 	pci_set_drvdata(pci_dev, ndev);
 
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index e900c1abdef7..e3cf38e6ce3c 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -7910,7 +7910,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 
 	/*  Driver entry points */
 	dev->netdev_ops = &s2io_netdev_ops;
-	SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+	dev->ethtool_ops = &netdev_ethtool_ops;
 	dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM |
 		NETIF_F_TSO | NETIF_F_TSO6 |
 		NETIF_F_RXCSUM | NETIF_F_LRO;
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c
index f8f073880f84..ddcc81ad1ae1 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c
@@ -1128,5 +1128,5 @@ static const struct ethtool_ops vxge_ethtool_ops = {
 
 void vxge_initialize_ethtool_ops(struct net_device *ndev)
 {
-	SET_ETHTOOL_OPS(ndev, &vxge_ethtool_ops);
+	ndev->ethtool_ops = &vxge_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index fddb464aeab3..e8235c5c5e69 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -5766,7 +5766,7 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 		dev->netdev_ops = &nv_netdev_ops_optimized;
 
 	netif_napi_add(dev, &np->napi, nv_napi_poll, RX_WORK_PER_LOOP);
-	SET_ETHTOOL_OPS(dev, &ops);
+	dev->ethtool_ops = &ops;
 	dev->watchdog_timeo = NV_WATCHDOG_TIMEO;
 
 	pci_set_drvdata(pci_dev, dev);
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
index 826f0ccdc23c..114d2fe52cc2 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
@@ -508,5 +508,5 @@ static const struct ethtool_ops pch_gbe_ethtool_ops = {
 
 void pch_gbe_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &pch_gbe_ethtool_ops);
+	netdev->ethtool_ops = &pch_gbe_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c
index b6bdeb3c1971..9a997e4c3e08 100644
--- a/drivers/net/ethernet/packetengines/hamachi.c
+++ b/drivers/net/ethernet/packetengines/hamachi.c
@@ -724,10 +724,8 @@ static int hamachi_init_one(struct pci_dev *pdev,
 
 	/* The Hamachi-specific entries in the device structure. */
 	dev->netdev_ops = &hamachi_netdev_ops;
-	if (chip_tbl[hmp->chip_id].flags & CanHaveMII)
-		SET_ETHTOOL_OPS(dev, &ethtool_ops);
-	else
-		SET_ETHTOOL_OPS(dev, &ethtool_ops_no_mii);
+	dev->ethtool_ops = (chip_tbl[hmp->chip_id].flags & CanHaveMII) ?
+		&ethtool_ops : &ethtool_ops_no_mii;
 	dev->watchdog_timeo = TX_TIMEOUT;
 	if (mtu)
 		dev->mtu = mtu;
diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c
index 9a6cb482dcd0..69a8dc095072 100644
--- a/drivers/net/ethernet/packetengines/yellowfin.c
+++ b/drivers/net/ethernet/packetengines/yellowfin.c
@@ -472,7 +472,7 @@ static int yellowfin_init_one(struct pci_dev *pdev,
 
 	/* The Yellowfin-specific entries in the device structure. */
 	dev->netdev_ops = &netdev_ops;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 
 	if (mtu)
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index f09c35d669b3..5bf05818a12c 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -1373,7 +1373,7 @@ netxen_setup_netdev(struct netxen_adapter *adapter,
 
 	netxen_nic_change_mtu(netdev, netdev->mtu);
 
-	SET_ETHTOOL_OPS(netdev, &netxen_nic_ethtool_ops);
+	netdev->ethtool_ops = &netxen_nic_ethtool_ops;
 
 	netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO |
 	                      NETIF_F_RXCSUM;
diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index 2eabd44f8914..b5d6bc1a8b00 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -3838,7 +3838,7 @@ static int ql3xxx_probe(struct pci_dev *pdev,
 
 	/* Set driver entry points */
 	ndev->netdev_ops = &ql3xxx_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &ql3xxx_ethtool_ops);
+	ndev->ethtool_ops = &ql3xxx_ethtool_ops;
 	ndev->watchdog_timeo = 5 * HZ;
 
 	netif_napi_add(ndev, &qdev->napi, ql_poll, 64);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 8a2aeb85e320..f0a285359e66 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -2265,10 +2265,8 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev,
 
 	qlcnic_change_mtu(netdev, netdev->mtu);
 
-	if (qlcnic_sriov_vf_check(adapter))
-		SET_ETHTOOL_OPS(netdev, &qlcnic_sriov_vf_ethtool_ops);
-	else
-		SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_ops);
+	netdev->ethtool_ops = (qlcnic_sriov_vf_check(adapter)) ?
+		&qlcnic_sriov_vf_ethtool_ops : &qlcnic_ethtool_ops;
 
 	netdev->features |= (NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 			     NETIF_F_IPV6_CSUM | NETIF_F_GRO |
@@ -2682,7 +2680,7 @@ err_out_disable_pdev:
 err_out_maintenance_mode:
 	set_bit(__QLCNIC_MAINTENANCE_MODE, &adapter->state);
 	netdev->netdev_ops = &qlcnic_netdev_failed_ops;
-	SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_failed_ops);
+	netdev->ethtool_ops = &qlcnic_ethtool_failed_ops;
 	ahw->port_type = QLCNIC_XGBE;
 
 	if (qlcnic_83xx_check(adapter))
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 6e36fe14b848..b40050e03a56 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -4770,7 +4770,7 @@ static int qlge_probe(struct pci_dev *pdev,
 	ndev->irq = pdev->irq;
 
 	ndev->netdev_ops = &qlge_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &qlge_ethtool_ops);
+	ndev->ethtool_ops = &qlge_ethtool_ops;
 	ndev->watchdog_timeo = 10 * HZ;
 
 	err = register_netdev(ndev);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index aa1c079f231d..be425ad5e824 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -7125,7 +7125,7 @@ rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	for (i = 0; i < ETH_ALEN; i++)
 		dev->dev_addr[i] = RTL_R8(MAC0 + i);
 
-	SET_ETHTOOL_OPS(dev, &rtl8169_ethtool_ops);
+	dev->ethtool_ops = &rtl8169_ethtool_ops;
 	dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
 
 	netif_napi_add(dev, &tp->napi, rtl8169_poll, R8169_NAPI_WEIGHT);
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 6a9509ccd33b..967314cade95 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2843,7 +2843,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 		ndev->netdev_ops = &sh_eth_netdev_ops_tsu;
 	else
 		ndev->netdev_ops = &sh_eth_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &sh_eth_ethtool_ops);
+	ndev->ethtool_ops = &sh_eth_ethtool_ops;
 	ndev->watchdog_timeo = TX_TIMEOUT;
 
 	/* debug message level */
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
index 0415fa50eeb7..c0981ae45874 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
@@ -520,5 +520,5 @@ static const struct ethtool_ops sxgbe_ethtool_ops = {
 
 void sxgbe_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &sxgbe_ethtool_ops);
+	netdev->ethtool_ops = &sxgbe_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 63d595fd3cc5..1e274045970f 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -2248,7 +2248,7 @@ static int efx_register_netdev(struct efx_nic *efx)
 	} else {
 		net_dev->netdev_ops = &efx_farch_netdev_ops;
 	}
-	SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops);
+	net_dev->ethtool_ops = &efx_ethtool_ops;
 	net_dev->gso_max_segs = EFX_TSO_MAX_SEGS;
 
 	rtnl_lock();
diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c
index acbbe48a519c..a86339903b9b 100644
--- a/drivers/net/ethernet/sis/sis190.c
+++ b/drivers/net/ethernet/sis/sis190.c
@@ -1877,7 +1877,7 @@ static int sis190_init_one(struct pci_dev *pdev,
 
 	dev->netdev_ops = &sis190_netdev_ops;
 
-	SET_ETHTOOL_OPS(dev, &sis190_ethtool_ops);
+	dev->ethtool_ops = &sis190_ethtool_ops;
 	dev->watchdog_timeo = SIS190_TX_TIMEOUT;
 
 	spin_lock_init(&tp->lock);
diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c
index c7a4868571f9..6b33127ab352 100644
--- a/drivers/net/ethernet/smsc/smc91c92_cs.c
+++ b/drivers/net/ethernet/smsc/smc91c92_cs.c
@@ -318,7 +318,7 @@ static int smc91c92_probe(struct pcmcia_device *link)
 
     /* The SMC91c92-specific entries in the device structure. */
     dev->netdev_ops = &smc_netdev_ops;
-    SET_ETHTOOL_OPS(dev, &ethtool_ops);
+    dev->ethtool_ops = &ethtool_ops;
     dev->watchdog_timeo = TX_TIMEOUT;
 
     smc->mii_if.dev = dev;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index c5f9cb85c8ef..c963394ded6c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -784,5 +784,5 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
 
 void stmmac_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &stmmac_ethtool_ops);
+	netdev->ethtool_ops = &stmmac_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index 2ead87759ab4..38da73a2a886 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -2413,7 +2413,7 @@ static void bdx_set_ethtool_ops(struct net_device *netdev)
 		.get_ethtool_stats = bdx_get_ethtool_stats,
 	};
 
-	SET_ETHTOOL_OPS(netdev, &bdx_ethtool_ops);
+	netdev->ethtool_ops = &bdx_ethtool_ops;
 }
 
 /**
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 91499be03c6f..e3d871055d63 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1998,7 +1998,7 @@ static int cpsw_probe_dual_emac(struct platform_device *pdev,
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops);
+	ndev->ethtool_ops = &cpsw_ethtool_ops;
 	netif_napi_add(ndev, &priv_sl2->napi, cpsw_poll, CPSW_POLL_WEIGHT);
 
 	/* register the network device */
@@ -2227,7 +2227,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops);
+	ndev->ethtool_ops = &cpsw_ethtool_ops;
 	netif_napi_add(ndev, &priv->napi, cpsw_poll, CPSW_POLL_WEIGHT);
 
 	/* register the network device */
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 8f0e69ce07ca..e76eae541151 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1980,7 +1980,7 @@ static int davinci_emac_probe(struct platform_device *pdev)
 	}
 
 	ndev->netdev_ops = &emac_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &ethtool_ops);
+	ndev->ethtool_ops = &ethtool_ops;
 	netif_napi_add(ndev, &priv->napi, emac_poll, EMAC_POLL_WEIGHT);
 
 	/* register the network device */
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 1de3ef5dd5d2..2e967a7bdb33 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -810,7 +810,7 @@ static int netvsc_probe(struct hv_device *dev,
 	net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM |
 			NETIF_F_IP_CSUM | NETIF_F_TSO;
 
-	SET_ETHTOOL_OPS(net, &ethtool_ops);
+	net->ethtool_ops = &ethtool_ops;
 	SET_NETDEV_DEV(net, &dev->device);
 
 	/* Notify the netvsc driver of the new device */
diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index 63aa9d9e34c5..27536aa89199 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -348,7 +348,7 @@ static int ntb_netdev_probe(struct pci_dev *pdev)
 	memcpy(ndev->dev_addr, ndev->perm_addr, ndev->addr_len);
 
 	ndev->netdev_ops = &ntb_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &ntb_ethtool_ops);
+	ndev->ethtool_ops = &ntb_ethtool_ops;
 
 	dev->qp = ntb_transport_create_queue(ndev, pdev, &ntb_netdev_handlers);
 	if (!dev->qp) {
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index a8497183ff8b..dac7a0d9bb46 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -494,7 +494,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev)
 	ndev->mtu = RIO_MAX_MSG_SIZE - 14;
 	ndev->features = NETIF_F_LLTX;
 	SET_NETDEV_DEV(ndev, &mport->dev);
-	SET_ETHTOOL_OPS(ndev, &rionet_ethtool_ops);
+	ndev->ethtool_ops = &rionet_ethtool_ops;
 
 	spin_lock_init(&rnet->lock);
 	spin_lock_init(&rnet->tx_lock);
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 630caf48f63a..8cfc3bb0c6a6 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -793,7 +793,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
 
 	netdev->netdev_ops = &catc_netdev_ops;
 	netdev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 
 	catc->usbdev = usbdev;
 	catc->netdev = netdev;
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 660bd5ea9fc0..a3a05869309d 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2425,7 +2425,7 @@ static void hso_net_init(struct net_device *net)
 	net->type = ARPHRD_NONE;
 	net->mtu = DEFAULT_MTU - 14;
 	net->tx_queue_len = 10;
-	SET_ETHTOOL_OPS(net, &ops);
+	net->ethtool_ops = &ops;
 
 	/* and initialize the semaphore */
 	spin_lock_init(&hso_net->net_lock);
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index 421934c83f1c..f72570708edb 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -524,7 +524,7 @@ static int ipheth_probe(struct usb_interface *intf,
 	usb_set_intfdata(intf, dev);
 
 	SET_NETDEV_DEV(netdev, &intf->dev);
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 
 	retval = register_netdev(netdev);
 	if (retval) {
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index a359d3bb7c5b..dcb6d33141e0 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -1171,7 +1171,7 @@ err_fw:
 	netdev->netdev_ops = &kaweth_netdev_ops;
 	netdev->watchdog_timeo = KAWETH_TX_TIMEOUT;
 	netdev->mtu = le16_to_cpu(kaweth->configuration.segment_size);
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 
 	/* kaweth is zeroed as part of alloc_netdev */
 	INIT_DELAYED_WORK(&kaweth->lowmem_work, kaweth_resubmit_tl);
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 03e8a15d7deb..f84080215915 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -1159,7 +1159,7 @@ static int pegasus_probe(struct usb_interface *intf,
 
 	net->watchdog_timeo = PEGASUS_TX_TIMEOUT;
 	net->netdev_ops = &pegasus_netdev_ops;
-	SET_ETHTOOL_OPS(net, &ops);
+	net->ethtool_ops = &ops;
 	pegasus->mii.dev = net;
 	pegasus->mii.mdio_read = mdio_read;
 	pegasus->mii.mdio_write = mdio_write;
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 3fbfb0869030..9f91c7aba4b0 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3452,7 +3452,7 @@ static int rtl8152_probe(struct usb_interface *intf,
 			      NETIF_F_TSO | NETIF_F_FRAGLIST |
 			      NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
 
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 	netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE);
 
 	tp->mii.dev = netdev;
diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index da2c4583bd2d..6e87e5710048 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -878,7 +878,7 @@ static int rtl8150_probe(struct usb_interface *intf,
 	dev->netdev = netdev;
 	netdev->netdev_ops = &rtl8150_netdev_ops;
 	netdev->watchdog_timeo = RTL8150_TX_TIMEOUT;
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 	dev->intr_interval = 100;	/* 100ms */
 
 	if (!alloc_all_urbs(dev)) {
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b852bb368acc..7d9f84a91f37 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1646,7 +1646,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	dev->netdev_ops = &virtnet_netdev;
 	dev->features = NETIF_F_HIGHDMA;
 
-	SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
+	dev->ethtool_ops = &virtnet_ethtool_ops;
 	SET_NETDEV_DEV(dev, &vdev->dev);
 
 	/* Do we support "hardware" checksums? */
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 600ab56c0008..00e120296e92 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -635,5 +635,5 @@ static const struct ethtool_ops vmxnet3_ethtool_ops = {
 
 void vmxnet3_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &vmxnet3_ethtool_ops);
+	netdev->ethtool_ops = &vmxnet3_ethtool_ops;
 }
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 1dfee9a7fbf7..e68c8eb4ea8e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2716,7 +2716,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 		return -EEXIST;
 	}
 
-	SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops);
+	dev->ethtool_ops = &vxlan_ethtool_ops;
 
 	/* create an fdb entry for a valid default destination */
 	if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 67db34e56d7e..52919ad42726 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -882,7 +882,7 @@ void hostap_setup_dev(struct net_device *dev, local_info_t *local,
 	dev->mtu = local->mtu;
 
 
-	SET_ETHTOOL_OPS(dev, &prism2_ethtool_ops);
+	dev->ethtool_ops = &prism2_ethtool_ops;
 
 }
 
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index ef05c5c49d41..a7557331699f 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -386,7 +386,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 		NETIF_F_TSO | NETIF_F_TSO6;
 	dev->features = dev->hw_features | NETIF_F_RXCSUM;
-	SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);
+	dev->ethtool_ops = &xenvif_ethtool_ops;
 
 	dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
 
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 158b5e639fc7..895355de8ac4 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1332,7 +1332,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
          */
 	netdev->features |= netdev->hw_features;
 
-	SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops);
+	netdev->ethtool_ops = &xennet_ethtool_ops;
 	SET_NETDEV_DEV(netdev, &dev->dev);
 
 	netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER);
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index e232ceca38fe..5ef5b4f45758 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -969,10 +969,9 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 	card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
 	card->dev->mtu = card->info.initial_mtu;
 	card->dev->netdev_ops = &qeth_l2_netdev_ops;
-	if (card->info.type != QETH_CARD_TYPE_OSN)
-		SET_ETHTOOL_OPS(card->dev, &qeth_l2_ethtool_ops);
-	else
-		SET_ETHTOOL_OPS(card->dev, &qeth_l2_osn_ops);
+	card->dev->ethtool_ops =
+		(card->info.type != QETH_CARD_TYPE_OSN) ?
+		&qeth_l2_ethtool_ops : &qeth_l2_osn_ops;
 	card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 	card->info.broadcast_capable = 1;
 	qeth_l2_request_initial_mac(card);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index bc2499a24884..c58f82af3658 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3301,7 +3301,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 	card->dev->ml_priv = card;
 	card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
 	card->dev->mtu = card->info.initial_mtu;
-	SET_ETHTOOL_OPS(card->dev, &qeth_l3_ethtool_ops);
+	card->dev->ethtool_ops = &qeth_l3_ethtool_ops;
 	card->dev->features |=	NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_CTAG_RX |
 				NETIF_F_HW_VLAN_CTAG_FILTER;
diff --git a/drivers/staging/et131x/et131x.c b/drivers/staging/et131x/et131x.c
index d329cf314360..15e0f4da3ce0 100644
--- a/drivers/staging/et131x/et131x.c
+++ b/drivers/staging/et131x/et131x.c
@@ -4604,7 +4604,7 @@ static int et131x_pci_setup(struct pci_dev *pdev,
 	netdev->netdev_ops     = &et131x_netdev_ops;
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
-	SET_ETHTOOL_OPS(netdev, &et131x_ethtool_ops);
+	netdev->ethtool_ops = &et131x_ethtool_ops;
 
 	adapter = et131x_adapter_init(netdev, pdev);
 
diff --git a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
index d6421b9b5981..a6158bef58e5 100644
--- a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
+++ b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
@@ -2249,7 +2249,7 @@ struct net_device *init_ft1000_card(struct pcmcia_device *link,
 
 	ft1000InitProc(dev);
 	ft1000_card_present = 1;
-	SET_ETHTOOL_OPS(dev, &ops);
+	dev->ethtool_ops = &ops;
 	printk(KERN_INFO "ft1000: %s: addr 0x%04lx irq %d, MAC addr %pM\n",
 			dev->name, dev->base_addr, dev->irq, dev->dev_addr);
 	return dev;
diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c
index c83e3375104b..9d957615e32a 100644
--- a/drivers/staging/netlogic/xlr_net.c
+++ b/drivers/staging/netlogic/xlr_net.c
@@ -1066,7 +1066,7 @@ static int xlr_net_probe(struct platform_device *pdev)
 	xlr_set_rx_mode(ndev);
 
 	priv->num_rx_desc += MAX_NUM_DESC_SPILL;
-	SET_ETHTOOL_OPS(ndev, &xlr_ethtool_ops);
+	ndev->ethtool_ops = &xlr_ethtool_ops;
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 
 	/* Common registers, do one time initialization */
diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c
index ff7214aac9dd..da9dd6bc5660 100644
--- a/drivers/staging/octeon/ethernet.c
+++ b/drivers/staging/octeon/ethernet.c
@@ -469,7 +469,7 @@ int cvm_oct_common_init(struct net_device *dev)
 
 	/* We do our own locking, Linux doesn't need to */
 	dev->features |= NETIF_F_LLTX;
-	SET_ETHTOOL_OPS(dev, &cvm_oct_ethtool_ops);
+	dev->ethtool_ops = &cvm_oct_ethtool_ops;
 
 	cvm_oct_phy_setup_device(dev);
 	cvm_oct_set_mac_filter(dev);
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index b7d4f82872b7..ce8e28146162 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -793,7 +793,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g,
 
 	net->netdev_ops = &eth_netdev_ops;
 
-	SET_ETHTOOL_OPS(net, &ops);
+	net->ethtool_ops = &ops;
 
 	dev->gadget = g;
 	SET_NETDEV_DEV(net, &g->dev);
@@ -850,7 +850,7 @@ struct net_device *gether_setup_name_default(const char *netname)
 
 	net->netdev_ops = &eth_netdev_ops;
 
-	SET_ETHTOOL_OPS(net, &ops);
+	net->ethtool_ops = &ops;
 	SET_NETDEV_DEVTYPE(net, &gadget_type);
 
 	return net;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index adc4658e9873..2dea98cbbdba 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -56,9 +56,6 @@ struct device;
 struct phy_device;
 /* 802.11 specific */
 struct wireless_dev;
-					/* source back-compat hooks */
-#define SET_ETHTOOL_OPS(netdev,ops) \
-	( (netdev)->ethtool_ops = (ops) )
 
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 744a59b85e15..e7ee65dc20bf 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -884,7 +884,7 @@ static void batadv_softif_init_early(struct net_device *dev)
 	/* generate random address */
 	eth_hw_addr_random(dev);
 
-	SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
+	dev->ethtool_ops = &batadv_ethtool_ops;
 
 	memset(priv, 0, sizeof(*priv));
 }
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 3e2da2cb72db..9212015abc8f 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -348,7 +348,7 @@ void br_dev_setup(struct net_device *dev)
 
 	dev->netdev_ops = &br_netdev_ops;
 	dev->destructor = br_dev_free;
-	SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
+	dev->ethtool_ops = &br_ethtool_ops;
 	SET_NETDEV_DEVTYPE(dev, &br_type);
 	dev->tx_queue_len = 0;
 	dev->priv_flags = IFF_EBRIDGE;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 02c0e1716f64..64c5af0a10dd 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -346,7 +346,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 		return slave_dev;
 
 	slave_dev->features = master->vlan_features;
-	SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops);
+	slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
 	eth_hw_addr_inherit(slave_dev, master);
 	slave_dev->tx_queue_len = 0;
 
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 729c68763fe7..789af9280e77 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -130,7 +130,7 @@ static void do_setup(struct net_device *netdev)
 	netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 	netdev->destructor = internal_dev_destructor;
-	SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
+	netdev->ethtool_ops = &internal_dev_ethtool_ops;
 	netdev->tx_queue_len = 0;
 
 	netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
-- 
cgit v1.2.3-71-gd317


From 50a0ffaf75e9d2d97200b523f2c600f40e9756b1 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Sun, 11 May 2014 10:47:15 +0200
Subject: net: cdc_ncm/cdc_mbim: rework probing of NCM/MBIM functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The NCM class match in the cdc_mbim driver is confusing and
cause unexpected behaviour. The USB core guarantees that a
USB interface is in altsetting 0 when probing starts. This
means that devices implementing a NCM 1.0 backwards
compatible MBIM function (a "NCM/MBIM function") always hit
the NCM entry in the cdc_mbim driver match table. Such
functions will never match any of the MBIM entries.

This causes unexpeced behaviour for cases where the NCM and
MBIM entries are differet, which is currently the case for
all except Ericsson devices.

Improve the probing of NCM/MBIM functions by looking up the
device again in the cdc_mbim match table after switching to
the MBIM identity.

The shared altsetting selection is updated to better
accommodate the new probing logic, returning the preferred
altsetting for the control interface instead of the data
interface. The control interface altsetting update is moved
to the cdc_mbim driver. It is never necessary to change the
control interface altsetting for NCM.

Cc: Greg Suarez <gsuarez@smithmicro.com>
Reported by: Yu-an Shih <yshih@nvidia.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_mbim.c  | 43 +++++++++++++++++++++++++++++++++++++++++--
 drivers/net/usb/cdc_ncm.c   | 27 +++++++++++++--------------
 include/linux/usb/cdc_ncm.h |  2 +-
 3 files changed, 55 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 80d27719ba38..bc23273d0455 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -107,15 +107,54 @@ static const struct net_device_ops cdc_mbim_netdev_ops = {
 	.ndo_vlan_rx_kill_vid = cdc_mbim_rx_kill_vid,
 };
 
+/* Change the control interface altsetting and update the .driver_info
+ * pointer if the matching entry after changing class codes points to
+ * a different struct
+ */
+static int cdc_mbim_set_ctrlalt(struct usbnet *dev, struct usb_interface *intf, u8 alt)
+{
+	struct usb_driver *driver = to_usb_driver(intf->dev.driver);
+	const struct usb_device_id *id;
+	struct driver_info *info;
+	int ret;
+
+	ret = usb_set_interface(dev->udev,
+				intf->cur_altsetting->desc.bInterfaceNumber,
+				alt);
+	if (ret)
+		return ret;
+
+	id = usb_match_id(intf, driver->id_table);
+	if (!id)
+		return -ENODEV;
+
+	info = (struct driver_info *)id->driver_info;
+	if (info != dev->driver_info) {
+		dev_dbg(&intf->dev, "driver_info updated to '%s'\n",
+			info->description);
+		dev->driver_info = info;
+	}
+	return 0;
+}
+
 static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf)
 {
 	struct cdc_ncm_ctx *ctx;
 	struct usb_driver *subdriver = ERR_PTR(-ENODEV);
 	int ret = -ENODEV;
-	u8 data_altsetting = cdc_ncm_select_altsetting(dev, intf);
+	u8 data_altsetting = 1;
 	struct cdc_mbim_state *info = (void *)&dev->data;
 
-	/* Probably NCM, defer for cdc_ncm_bind */
+	/* should we change control altsetting on a NCM/MBIM function? */
+	if (cdc_ncm_select_altsetting(intf) == CDC_NCM_COMM_ALTSETTING_MBIM) {
+		data_altsetting = CDC_NCM_DATA_ALTSETTING_MBIM;
+		ret = cdc_mbim_set_ctrlalt(dev, intf, CDC_NCM_COMM_ALTSETTING_MBIM);
+		if (ret)
+			goto err;
+		ret = -ENODEV;
+	}
+
+	/* we will hit this for NCM/MBIM functions if prefer_mbim is false */
 	if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
 		goto err;
 
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 9a2bd11943eb..d23bca57a23f 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -541,10 +541,10 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf)
 }
 EXPORT_SYMBOL_GPL(cdc_ncm_unbind);
 
-/* Select the MBIM altsetting iff it is preferred and available,
- * returning the number of the corresponding data interface altsetting
+/* Return the number of the MBIM control interface altsetting iff it
+ * is preferred and available,
  */
-u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf)
+u8 cdc_ncm_select_altsetting(struct usb_interface *intf)
 {
 	struct usb_host_interface *alt;
 
@@ -563,15 +563,15 @@ u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf)
 	 *   the rules given in section 6 (USB Device Model) of this
 	 *   specification."
 	 */
-	if (prefer_mbim && intf->num_altsetting == 2) {
+	if (intf->num_altsetting < 2)
+		return intf->cur_altsetting->desc.bAlternateSetting;
+
+	if (prefer_mbim) {
 		alt = usb_altnum_to_altsetting(intf, CDC_NCM_COMM_ALTSETTING_MBIM);
-		if (alt && cdc_ncm_comm_intf_is_mbim(alt) &&
-		    !usb_set_interface(dev->udev,
-				       intf->cur_altsetting->desc.bInterfaceNumber,
-				       CDC_NCM_COMM_ALTSETTING_MBIM))
-			return CDC_NCM_DATA_ALTSETTING_MBIM;
+		if (alt && cdc_ncm_comm_intf_is_mbim(alt))
+			return CDC_NCM_COMM_ALTSETTING_MBIM;
 	}
-	return CDC_NCM_DATA_ALTSETTING_NCM;
+	return CDC_NCM_COMM_ALTSETTING_NCM;
 }
 EXPORT_SYMBOL_GPL(cdc_ncm_select_altsetting);
 
@@ -580,12 +580,11 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
 	int ret;
 
 	/* MBIM backwards compatible function? */
-	cdc_ncm_select_altsetting(dev, intf);
-	if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
+	if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM)
 		return -ENODEV;
 
-	/* NCM data altsetting is always 1 */
-	ret = cdc_ncm_bind_common(dev, intf, 1);
+	/* The NCM data altsetting is fixed */
+	ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM);
 
 	/*
 	 * We should get an event when network connection is "connected" or
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 44b38b92236a..55b6feead93b 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -121,7 +121,7 @@ struct cdc_ncm_ctx {
 	u16 connected;
 };
 
-u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf);
+u8 cdc_ncm_select_altsetting(struct usb_interface *intf);
 int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting);
 void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
 struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign);
-- 
cgit v1.2.3-71-gd317


From 5b7ed0892f2af4e60b9a8d2c71c77774512a6cb9 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Sun, 11 May 2014 20:22:09 -0700
Subject: tcp: move fastopen functions to tcp_fastopen.c

Move common TFO functions that will be used by both v4 and v6
to tcp_fastopen.c. Create a helper tcp_fastopen_queue_check().

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Daniel Lee <longinus00@gmail.com>
Signed-off-by: Jerry Chu <hkchu@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h       |  10 ++-
 net/ipv4/tcp_fastopen.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_ipv4.c     | 185 +---------------------------------------------
 3 files changed, 200 insertions(+), 185 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3c9418456640..012236838583 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1329,8 +1329,14 @@ void tcp_free_fastopen_req(struct tcp_sock *tp);
 
 extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
 int tcp_fastopen_reset_cipher(void *key, unsigned int len);
-void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
-			     struct tcp_fastopen_cookie *foc);
+int tcp_fastopen_create_child(struct sock *sk,
+			      struct sk_buff *skb,
+			      struct sk_buff *skb_synack,
+			      struct request_sock *req);
+bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
+			struct request_sock *req,
+			struct tcp_fastopen_cookie *foc,
+			struct tcp_fastopen_cookie *valid_foc);
 void tcp_fastopen_init_key_once(bool publish);
 #define TCP_FASTOPEN_KEY_LENGTH 16
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index f195d9316e55..0606c91d9d0b 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -94,3 +94,193 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
 	}
 	rcu_read_unlock();
 }
+
+int tcp_fastopen_create_child(struct sock *sk,
+			      struct sk_buff *skb,
+			      struct sk_buff *skb_synack,
+			      struct request_sock *req)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+	const struct inet_request_sock *ireq = inet_rsk(req);
+	struct sock *child;
+	int err;
+
+	req->num_retrans = 0;
+	req->num_timeout = 0;
+	req->sk = NULL;
+
+	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
+	if (child == NULL) {
+		NET_INC_STATS_BH(sock_net(sk),
+				 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
+		kfree_skb(skb_synack);
+		return -1;
+	}
+	err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
+				    ireq->ir_rmt_addr, ireq->opt);
+	err = net_xmit_eval(err);
+	if (!err)
+		tcp_rsk(req)->snt_synack = tcp_time_stamp;
+	/* XXX (TFO) - is it ok to ignore error and continue? */
+
+	spin_lock(&queue->fastopenq->lock);
+	queue->fastopenq->qlen++;
+	spin_unlock(&queue->fastopenq->lock);
+
+	/* Initialize the child socket. Have to fix some values to take
+	 * into account the child is a Fast Open socket and is created
+	 * only out of the bits carried in the SYN packet.
+	 */
+	tp = tcp_sk(child);
+
+	tp->fastopen_rsk = req;
+	/* Do a hold on the listner sk so that if the listener is being
+	 * closed, the child that has been accepted can live on and still
+	 * access listen_lock.
+	 */
+	sock_hold(sk);
+	tcp_rsk(req)->listener = sk;
+
+	/* RFC1323: The window in SYN & SYN/ACK segments is never
+	 * scaled. So correct it appropriately.
+	 */
+	tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
+
+	/* Activate the retrans timer so that SYNACK can be retransmitted.
+	 * The request socket is not added to the SYN table of the parent
+	 * because it's been added to the accept queue directly.
+	 */
+	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
+				  TCP_TIMEOUT_INIT, TCP_RTO_MAX);
+
+	/* Add the child socket directly into the accept queue */
+	inet_csk_reqsk_queue_add(sk, req, child);
+
+	/* Now finish processing the fastopen child socket. */
+	inet_csk(child)->icsk_af_ops->rebuild_header(child);
+	tcp_init_congestion_control(child);
+	tcp_mtup_init(child);
+	tcp_init_metrics(child);
+	tcp_init_buffer_space(child);
+
+	/* Queue the data carried in the SYN packet. We need to first
+	 * bump skb's refcnt because the caller will attempt to free it.
+	 *
+	 * XXX (TFO) - we honor a zero-payload TFO request for now.
+	 * (Any reason not to?)
+	 */
+	if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
+		/* Don't queue the skb if there is no payload in SYN.
+		 * XXX (TFO) - How about SYN+FIN?
+		 */
+		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+	} else {
+		skb = skb_get(skb);
+		skb_dst_drop(skb);
+		__skb_pull(skb, tcp_hdr(skb)->doff * 4);
+		skb_set_owner_r(skb, child);
+		__skb_queue_tail(&child->sk_receive_queue, skb);
+		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		tp->syn_data_acked = 1;
+	}
+	sk->sk_data_ready(sk);
+	bh_unlock_sock(child);
+	sock_put(child);
+	WARN_ON(req->sk == NULL);
+	return 0;
+}
+EXPORT_SYMBOL(tcp_fastopen_create_child);
+
+static bool tcp_fastopen_queue_check(struct sock *sk)
+{
+	struct fastopen_queue *fastopenq;
+
+	/* Make sure the listener has enabled fastopen, and we don't
+	 * exceed the max # of pending TFO requests allowed before trying
+	 * to validating the cookie in order to avoid burning CPU cycles
+	 * unnecessarily.
+	 *
+	 * XXX (TFO) - The implication of checking the max_qlen before
+	 * processing a cookie request is that clients can't differentiate
+	 * between qlen overflow causing Fast Open to be disabled
+	 * temporarily vs a server not supporting Fast Open at all.
+	 */
+	fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
+	if (fastopenq == NULL || fastopenq->max_qlen == 0)
+		return false;
+
+	if (fastopenq->qlen >= fastopenq->max_qlen) {
+		struct request_sock *req1;
+		spin_lock(&fastopenq->lock);
+		req1 = fastopenq->rskq_rst_head;
+		if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
+			spin_unlock(&fastopenq->lock);
+			NET_INC_STATS_BH(sock_net(sk),
+					 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
+			return false;
+		}
+		fastopenq->rskq_rst_head = req1->dl_next;
+		fastopenq->qlen--;
+		spin_unlock(&fastopenq->lock);
+		reqsk_free(req1);
+	}
+	return true;
+}
+
+bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
+			struct request_sock *req,
+			struct tcp_fastopen_cookie *foc,
+			struct tcp_fastopen_cookie *valid_foc)
+{
+	bool skip_cookie = false;
+
+	if (likely(!fastopen_cookie_present(foc))) {
+		/* See include/net/tcp.h for the meaning of these knobs */
+		if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
+		    ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
+		    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
+			skip_cookie = true; /* no cookie to validate */
+		else
+			return false;
+	}
+	/* A FO option is present; bump the counter. */
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
+
+	if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
+	    !tcp_fastopen_queue_check(sk))
+		return false;
+
+	if (skip_cookie) {
+		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		return true;
+	}
+
+	if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
+		if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
+			tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
+						ip_hdr(skb)->daddr, valid_foc);
+			if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
+			    memcmp(&foc->val[0], &valid_foc->val[0],
+			    TCP_FASTOPEN_COOKIE_SIZE) != 0)
+				return false;
+			valid_foc->len = -1;
+		}
+		/* Acknowledge the data received from the peer. */
+		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		return true;
+	} else if (foc->len == 0) { /* Client requesting a cookie */
+		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
+					ip_hdr(skb)->daddr, valid_foc);
+		NET_INC_STATS_BH(sock_net(sk),
+		    LINUX_MIB_TCPFASTOPENCOOKIEREQD);
+	} else {
+		/* Client sent a cookie with wrong size. Treat it
+		 * the same as invalid and return a valid one.
+		 */
+		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
+					ip_hdr(skb)->daddr, valid_foc);
+	}
+	return false;
+}
+EXPORT_SYMBOL(tcp_fastopen_check);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad166dcc278f..032fcaee164a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1260,187 +1260,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
 };
 #endif
 
-static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
-			       struct request_sock *req,
-			       struct tcp_fastopen_cookie *foc,
-			       struct tcp_fastopen_cookie *valid_foc)
-{
-	bool skip_cookie = false;
-	struct fastopen_queue *fastopenq;
-
-	if (likely(!fastopen_cookie_present(foc))) {
-		/* See include/net/tcp.h for the meaning of these knobs */
-		if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
-		    ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
-		    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
-			skip_cookie = true; /* no cookie to validate */
-		else
-			return false;
-	}
-	fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
-	/* A FO option is present; bump the counter. */
-	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
-
-	/* Make sure the listener has enabled fastopen, and we don't
-	 * exceed the max # of pending TFO requests allowed before trying
-	 * to validating the cookie in order to avoid burning CPU cycles
-	 * unnecessarily.
-	 *
-	 * XXX (TFO) - The implication of checking the max_qlen before
-	 * processing a cookie request is that clients can't differentiate
-	 * between qlen overflow causing Fast Open to be disabled
-	 * temporarily vs a server not supporting Fast Open at all.
-	 */
-	if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
-	    fastopenq == NULL || fastopenq->max_qlen == 0)
-		return false;
-
-	if (fastopenq->qlen >= fastopenq->max_qlen) {
-		struct request_sock *req1;
-		spin_lock(&fastopenq->lock);
-		req1 = fastopenq->rskq_rst_head;
-		if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
-			spin_unlock(&fastopenq->lock);
-			NET_INC_STATS_BH(sock_net(sk),
-			    LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
-			/* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
-			foc->len = -1;
-			return false;
-		}
-		fastopenq->rskq_rst_head = req1->dl_next;
-		fastopenq->qlen--;
-		spin_unlock(&fastopenq->lock);
-		reqsk_free(req1);
-	}
-	if (skip_cookie) {
-		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		return true;
-	}
-
-	if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
-		if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
-			tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-						ip_hdr(skb)->daddr, valid_foc);
-			if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
-			    memcmp(&foc->val[0], &valid_foc->val[0],
-			    TCP_FASTOPEN_COOKIE_SIZE) != 0)
-				return false;
-			valid_foc->len = -1;
-		}
-		/* Acknowledge the data received from the peer. */
-		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		return true;
-	} else if (foc->len == 0) { /* Client requesting a cookie */
-		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-					ip_hdr(skb)->daddr, valid_foc);
-		NET_INC_STATS_BH(sock_net(sk),
-		    LINUX_MIB_TCPFASTOPENCOOKIEREQD);
-	} else {
-		/* Client sent a cookie with wrong size. Treat it
-		 * the same as invalid and return a valid one.
-		 */
-		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-					ip_hdr(skb)->daddr, valid_foc);
-	}
-	return false;
-}
-
-static int tcp_v4_conn_req_fastopen(struct sock *sk,
-				    struct sk_buff *skb,
-				    struct sk_buff *skb_synack,
-				    struct request_sock *req)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
-	const struct inet_request_sock *ireq = inet_rsk(req);
-	struct sock *child;
-	int err;
-
-	req->num_retrans = 0;
-	req->num_timeout = 0;
-	req->sk = NULL;
-
-	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
-	if (child == NULL) {
-		NET_INC_STATS_BH(sock_net(sk),
-				 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-		kfree_skb(skb_synack);
-		return -1;
-	}
-	err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
-				    ireq->ir_rmt_addr, ireq->opt);
-	err = net_xmit_eval(err);
-	if (!err)
-		tcp_rsk(req)->snt_synack = tcp_time_stamp;
-	/* XXX (TFO) - is it ok to ignore error and continue? */
-
-	spin_lock(&queue->fastopenq->lock);
-	queue->fastopenq->qlen++;
-	spin_unlock(&queue->fastopenq->lock);
-
-	/* Initialize the child socket. Have to fix some values to take
-	 * into account the child is a Fast Open socket and is created
-	 * only out of the bits carried in the SYN packet.
-	 */
-	tp = tcp_sk(child);
-
-	tp->fastopen_rsk = req;
-	/* Do a hold on the listner sk so that if the listener is being
-	 * closed, the child that has been accepted can live on and still
-	 * access listen_lock.
-	 */
-	sock_hold(sk);
-	tcp_rsk(req)->listener = sk;
-
-	/* RFC1323: The window in SYN & SYN/ACK segments is never
-	 * scaled. So correct it appropriately.
-	 */
-	tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
-
-	/* Activate the retrans timer so that SYNACK can be retransmitted.
-	 * The request socket is not added to the SYN table of the parent
-	 * because it's been added to the accept queue directly.
-	 */
-	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
-	    TCP_TIMEOUT_INIT, TCP_RTO_MAX);
-
-	/* Add the child socket directly into the accept queue */
-	inet_csk_reqsk_queue_add(sk, req, child);
-
-	/* Now finish processing the fastopen child socket. */
-	inet_csk(child)->icsk_af_ops->rebuild_header(child);
-	tcp_init_congestion_control(child);
-	tcp_mtup_init(child);
-	tcp_init_metrics(child);
-	tcp_init_buffer_space(child);
-
-	/* Queue the data carried in the SYN packet. We need to first
-	 * bump skb's refcnt because the caller will attempt to free it.
-	 *
-	 * XXX (TFO) - we honor a zero-payload TFO request for now.
-	 * (Any reason not to?)
-	 */
-	if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
-		/* Don't queue the skb if there is no payload in SYN.
-		 * XXX (TFO) - How about SYN+FIN?
-		 */
-		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-	} else {
-		skb = skb_get(skb);
-		skb_dst_drop(skb);
-		__skb_pull(skb, tcp_hdr(skb)->doff * 4);
-		skb_set_owner_r(skb, child);
-		__skb_queue_tail(&child->sk_receive_queue, skb);
-		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		tp->syn_data_acked = 1;
-	}
-	sk->sk_data_ready(sk);
-	bh_unlock_sock(child);
-	sock_put(child);
-	WARN_ON(req->sk == NULL);
-	return 0;
-}
-
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_options_received tmp_opt;
@@ -1599,8 +1418,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		if (fastopen_cookie_present(&foc) && foc.len != 0)
 			NET_INC_STATS_BH(sock_net(sk),
 			    LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-	} else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
-		goto drop_and_free;
+	} else if (tcp_fastopen_create_child(sk, skb, skb_synack, req))
+		goto drop_and_release;
 
 	return 0;
 
-- 
cgit v1.2.3-71-gd317


From 89278c9dc922272df921042aafa18311f3398c6c Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Sun, 11 May 2014 20:22:10 -0700
Subject: tcp: simplify fast open cookie processing

Consolidate various cookie checking and generation code to simplify
the fast open processing. The main goal is to reduce code duplication
in tcp_v4_conn_request() for IPv6 support.

Removes two experimental sysctl flags TFO_SERVER_ALWAYS and
TFO_SERVER_COOKIE_NOT_CHKD used primarily for developmental debugging
purposes.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Daniel Lee <longinus00@gmail.com>
Signed-off-by: Jerry Chu <hkchu@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h     |  5 ----
 include/net/tcp.h       |  9 +------
 net/ipv4/tcp_fastopen.c | 71 +++++++++++++++++++------------------------------
 net/ipv4/tcp_ipv4.c     | 10 +++----
 net/ipv4/tcp_output.c   |  2 +-
 5 files changed, 33 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4e37c71ecd74..bc35e4709e8e 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -366,11 +366,6 @@ static inline bool tcp_passive_fastopen(const struct sock *sk)
 		tcp_sk(sk)->fastopen_rsk != NULL);
 }
 
-static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc)
-{
-	return foc->len != -1;
-}
-
 extern void tcp_sock_destruct(struct sock *sk);
 
 static inline int fastopen_init_queue(struct sock *sk, int backlog)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 012236838583..17d7c6a3d037 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -220,8 +220,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define	TFO_SERVER_ENABLE	2
 #define	TFO_CLIENT_NO_COOKIE	4	/* Data in SYN w/o cookie option */
 
-/* Process SYN data but skip cookie validation */
-#define	TFO_SERVER_COOKIE_NOT_CHKED	0x100
 /* Accept SYN data w/o any cookie option */
 #define	TFO_SERVER_COOKIE_NOT_REQD	0x200
 
@@ -230,10 +228,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
  */
 #define	TFO_SERVER_WO_SOCKOPT1	0x400
 #define	TFO_SERVER_WO_SOCKOPT2	0x800
-/* Always create TFO child sockets on a TFO listener even when
- * cookie/data not present. (For testing purpose!)
- */
-#define	TFO_SERVER_ALWAYS	0x1000
 
 extern struct inet_timewait_death_row tcp_death_row;
 
@@ -1335,8 +1329,7 @@ int tcp_fastopen_create_child(struct sock *sk,
 			      struct request_sock *req);
 bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
 			struct request_sock *req,
-			struct tcp_fastopen_cookie *foc,
-			struct tcp_fastopen_cookie *valid_foc);
+			struct tcp_fastopen_cookie *foc);
 void tcp_fastopen_init_key_once(bool publish);
 #define TCP_FASTOPEN_KEY_LENGTH 16
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 0606c91d9d0b..5a98277b9a82 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -228,59 +228,44 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
 	return true;
 }
 
+/* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
+ * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
+ * cookie request (foc->len == 0).
+ */
 bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
 			struct request_sock *req,
-			struct tcp_fastopen_cookie *foc,
-			struct tcp_fastopen_cookie *valid_foc)
+			struct tcp_fastopen_cookie *foc)
 {
-	bool skip_cookie = false;
-
-	if (likely(!fastopen_cookie_present(foc))) {
-		/* See include/net/tcp.h for the meaning of these knobs */
-		if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
-		    ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
-		    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
-			skip_cookie = true; /* no cookie to validate */
-		else
-			return false;
-	}
-	/* A FO option is present; bump the counter. */
-	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
+	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
+	bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
 
-	if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
-	    !tcp_fastopen_queue_check(sk))
+	if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+	      (syn_data || foc->len >= 0) &&
+	      tcp_fastopen_queue_check(sk))) {
+		foc->len = -1;
 		return false;
-
-	if (skip_cookie) {
-		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		return true;
 	}
 
-	if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
-		if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
-			tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-						ip_hdr(skb)->daddr, valid_foc);
-			if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
-			    memcmp(&foc->val[0], &valid_foc->val[0],
-			    TCP_FASTOPEN_COOKIE_SIZE) != 0)
-				return false;
-			valid_foc->len = -1;
-		}
-		/* Acknowledge the data received from the peer. */
+	if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
+		goto fastopen;
+
+	tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
+				ip_hdr(skb)->daddr, &valid_foc);
+
+	if (foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
+	    foc->len == valid_foc.len &&
+	    !memcmp(foc->val, valid_foc.val, foc->len)) {
+fastopen:
 		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		foc->len = -1;
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
 		return true;
-	} else if (foc->len == 0) { /* Client requesting a cookie */
-		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-					ip_hdr(skb)->daddr, valid_foc);
-		NET_INC_STATS_BH(sock_net(sk),
-		    LINUX_MIB_TCPFASTOPENCOOKIEREQD);
-	} else {
-		/* Client sent a cookie with wrong size. Treat it
-		 * the same as invalid and return a valid one.
-		 */
-		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-					ip_hdr(skb)->daddr, valid_foc);
 	}
+
+	NET_INC_STATS_BH(sock_net(sk), foc->len ?
+			 LINUX_MIB_TCPFASTOPENPASSIVEFAIL :
+			 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
+	*foc = valid_foc;
 	return false;
 }
 EXPORT_SYMBOL(tcp_fastopen_check);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 032fcaee164a..5ea0949dadfd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1273,7 +1273,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	bool want_cookie = false;
 	struct flowi4 fl4;
 	struct tcp_fastopen_cookie foc = { .len = -1 };
-	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 	struct sk_buff *skb_synack;
 	int do_fastopen;
 
@@ -1381,7 +1380,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		if (dst == NULL)
 			goto drop_and_free;
 	}
-	do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
+	do_fastopen = !want_cookie &&
+		      tcp_fastopen_check(sk, skb, req, &foc);
 
 	/* We don't call tcp_v4_send_synack() directly because we need
 	 * to make sure a child socket can be created successfully before
@@ -1394,8 +1394,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 * latter to remove its dependency on the current implementation
 	 * of tcp_v4_send_synack()->tcp_select_initial_window().
 	 */
-	skb_synack = tcp_make_synack(sk, dst, req,
-	    fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
+	skb_synack = tcp_make_synack(sk, dst, req, &foc);
 
 	if (skb_synack) {
 		__tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
@@ -1415,9 +1414,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		tcp_rsk(req)->listener = NULL;
 		/* Add the request_sock to the SYN table */
 		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-		if (fastopen_cookie_present(&foc) && foc.len != 0)
-			NET_INC_STATS_BH(sock_net(sk),
-			    LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
 	} else if (tcp_fastopen_create_child(sk, skb, skb_synack, req))
 		goto drop_and_release;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 694711a140d4..b20fc02920f9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -627,7 +627,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
 		if (unlikely(!ireq->tstamp_ok))
 			remaining -= TCPOLEN_SACKPERM_ALIGNED;
 	}
-	if (foc != NULL) {
+	if (foc != NULL && foc->len >= 0) {
 		u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
 		need = (need + 3) & ~3U;  /* Align to 32 bits */
 		if (remaining >= need) {
-- 
cgit v1.2.3-71-gd317


From 843f4a55e336e6d0c7bb92e7f9621535bc8d5fcd Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Sun, 11 May 2014 20:22:11 -0700
Subject: tcp: use tcp_v4_send_synack on first SYN-ACK

To avoid large code duplication in IPv6, we need to first simplify
the complicate SYN-ACK sending code in tcp_v4_conn_request().

To use tcp_v4(6)_send_synack() to send all SYN-ACKs, we need to
initialize the mini socket's receive window before trying to
create the child socket and/or building the SYN-ACK packet. So we move
that initialization from tcp_make_synack() to tcp_v4_conn_request()
as a new function tcp_openreq_init_req_rwin().

After this refactoring the SYN-ACK sending code is simpler and easier
to implement Fast Open for IPv6.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Daniel Lee <longinus00@gmail.com>
Signed-off-by: Jerry Chu <hkchu@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        | 14 +++++-----
 net/ipv4/tcp_fastopen.c  | 67 ++++++++++++++++++++++--------------------------
 net/ipv4/tcp_ipv4.c      | 57 ++++++++++++----------------------------
 net/ipv4/tcp_minisocks.c | 31 ++++++++++++++++++++++
 net/ipv4/tcp_output.c    | 21 ---------------
 5 files changed, 85 insertions(+), 105 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 17d7c6a3d037..f5d6ca4a9d28 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1114,6 +1114,9 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
 }
 
+extern void tcp_openreq_init_rwin(struct request_sock *req,
+				  struct sock *sk, struct dst_entry *dst);
+
 void tcp_enter_memory_pressure(struct sock *sk);
 
 static inline int keepalive_intvl_when(const struct tcp_sock *tp)
@@ -1323,13 +1326,10 @@ void tcp_free_fastopen_req(struct tcp_sock *tp);
 
 extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
 int tcp_fastopen_reset_cipher(void *key, unsigned int len);
-int tcp_fastopen_create_child(struct sock *sk,
-			      struct sk_buff *skb,
-			      struct sk_buff *skb_synack,
-			      struct request_sock *req);
-bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
-			struct request_sock *req,
-			struct tcp_fastopen_cookie *foc);
+bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
+		      struct request_sock *req,
+		      struct tcp_fastopen_cookie *foc,
+		      struct dst_entry *dst);
 void tcp_fastopen_init_key_once(bool publish);
 #define TCP_FASTOPEN_KEY_LENGTH 16
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 5a98277b9a82..9b947a9aaf6e 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -95,34 +95,22 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
 	rcu_read_unlock();
 }
 
-int tcp_fastopen_create_child(struct sock *sk,
-			      struct sk_buff *skb,
-			      struct sk_buff *skb_synack,
-			      struct request_sock *req)
+static bool tcp_fastopen_create_child(struct sock *sk,
+				      struct sk_buff *skb,
+				      struct dst_entry *dst,
+				      struct request_sock *req)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
-	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct sock *child;
-	int err;
 
 	req->num_retrans = 0;
 	req->num_timeout = 0;
 	req->sk = NULL;
 
 	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
-	if (child == NULL) {
-		NET_INC_STATS_BH(sock_net(sk),
-				 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-		kfree_skb(skb_synack);
-		return -1;
-	}
-	err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
-				    ireq->ir_rmt_addr, ireq->opt);
-	err = net_xmit_eval(err);
-	if (!err)
-		tcp_rsk(req)->snt_synack = tcp_time_stamp;
-	/* XXX (TFO) - is it ok to ignore error and continue? */
+	if (child == NULL)
+		return false;
 
 	spin_lock(&queue->fastopenq->lock);
 	queue->fastopenq->qlen++;
@@ -167,28 +155,24 @@ int tcp_fastopen_create_child(struct sock *sk,
 	/* Queue the data carried in the SYN packet. We need to first
 	 * bump skb's refcnt because the caller will attempt to free it.
 	 *
-	 * XXX (TFO) - we honor a zero-payload TFO request for now.
-	 * (Any reason not to?)
+	 * XXX (TFO) - we honor a zero-payload TFO request for now,
+	 * (any reason not to?) but no need to queue the skb since
+	 * there is no data. How about SYN+FIN?
 	 */
-	if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
-		/* Don't queue the skb if there is no payload in SYN.
-		 * XXX (TFO) - How about SYN+FIN?
-		 */
-		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-	} else {
+	if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) {
 		skb = skb_get(skb);
 		skb_dst_drop(skb);
 		__skb_pull(skb, tcp_hdr(skb)->doff * 4);
 		skb_set_owner_r(skb, child);
 		__skb_queue_tail(&child->sk_receive_queue, skb);
-		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 		tp->syn_data_acked = 1;
 	}
+	tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 	sk->sk_data_ready(sk);
 	bh_unlock_sock(child);
 	sock_put(child);
 	WARN_ON(req->sk == NULL);
-	return 0;
+	return true;
 }
 EXPORT_SYMBOL(tcp_fastopen_create_child);
 
@@ -232,9 +216,10 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
  * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
  * cookie request (foc->len == 0).
  */
-bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
-			struct request_sock *req,
-			struct tcp_fastopen_cookie *foc)
+bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
+		      struct request_sock *req,
+		      struct tcp_fastopen_cookie *foc,
+		      struct dst_entry *dst)
 {
 	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 	bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
@@ -255,11 +240,21 @@ bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
 	if (foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
 	    foc->len == valid_foc.len &&
 	    !memcmp(foc->val, valid_foc.val, foc->len)) {
+		/* Cookie is valid. Create a (full) child socket to accept
+		 * the data in SYN before returning a SYN-ACK to ack the
+		 * data. If we fail to create the socket, fall back and
+		 * ack the ISN only but includes the same cookie.
+		 *
+		 * Note: Data-less SYN with valid cookie is allowed to send
+		 * data in SYN_RECV state.
+		 */
 fastopen:
-		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		foc->len = -1;
-		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
-		return true;
+		if (tcp_fastopen_create_child(sk, skb, dst, req)) {
+			foc->len = -1;
+			NET_INC_STATS_BH(sock_net(sk),
+					 LINUX_MIB_TCPFASTOPENPASSIVE);
+			return true;
+		}
 	}
 
 	NET_INC_STATS_BH(sock_net(sk), foc->len ?
@@ -268,4 +263,4 @@ fastopen:
 	*foc = valid_foc;
 	return false;
 }
-EXPORT_SYMBOL(tcp_fastopen_check);
+EXPORT_SYMBOL(tcp_try_fastopen);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5ea0949dadfd..1665f0f84233 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -822,7 +822,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  */
 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 			      struct request_sock *req,
-			      u16 queue_mapping)
+			      u16 queue_mapping,
+			      struct tcp_fastopen_cookie *foc)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct flowi4 fl4;
@@ -833,7 +834,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 		return -1;
 
-	skb = tcp_make_synack(sk, dst, req, NULL);
+	skb = tcp_make_synack(sk, dst, req, foc);
 
 	if (skb) {
 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
@@ -852,7 +853,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 
 static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
 {
-	int res = tcp_v4_send_synack(sk, NULL, req, 0);
+	int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL);
 
 	if (!res) {
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
@@ -1270,11 +1271,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	__be32 saddr = ip_hdr(skb)->saddr;
 	__be32 daddr = ip_hdr(skb)->daddr;
 	__u32 isn = TCP_SKB_CB(skb)->when;
-	bool want_cookie = false;
+	bool want_cookie = false, fastopen;
 	struct flowi4 fl4;
 	struct tcp_fastopen_cookie foc = { .len = -1 };
-	struct sk_buff *skb_synack;
-	int do_fastopen;
+	int err;
 
 	/* Never answer to SYNs send to broadcast or multicast */
 	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1373,49 +1373,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 
 		isn = tcp_v4_init_sequence(skb);
 	}
-	tcp_rsk(req)->snt_isn = isn;
-
-	if (dst == NULL) {
-		dst = inet_csk_route_req(sk, &fl4, req);
-		if (dst == NULL)
-			goto drop_and_free;
-	}
-	do_fastopen = !want_cookie &&
-		      tcp_fastopen_check(sk, skb, req, &foc);
-
-	/* We don't call tcp_v4_send_synack() directly because we need
-	 * to make sure a child socket can be created successfully before
-	 * sending back synack!
-	 *
-	 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
-	 * (or better yet, call tcp_send_synack() in the child context
-	 * directly, but will have to fix bunch of other code first)
-	 * after syn_recv_sock() except one will need to first fix the
-	 * latter to remove its dependency on the current implementation
-	 * of tcp_v4_send_synack()->tcp_select_initial_window().
-	 */
-	skb_synack = tcp_make_synack(sk, dst, req, &foc);
-
-	if (skb_synack) {
-		__tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
-		skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
-	} else
+	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 		goto drop_and_free;
 
-	if (likely(!do_fastopen)) {
-		int err;
-		err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr,
-		     ireq->ir_rmt_addr, ireq->opt);
-		err = net_xmit_eval(err);
+	tcp_rsk(req)->snt_isn = isn;
+	tcp_rsk(req)->snt_synack = tcp_time_stamp;
+	tcp_openreq_init_rwin(req, sk, dst);
+	fastopen = !want_cookie &&
+		   tcp_try_fastopen(sk, skb, req, &foc, dst);
+	err = tcp_v4_send_synack(sk, dst, req,
+				 skb_get_queue_mapping(skb), &foc);
+	if (!fastopen) {
 		if (err || want_cookie)
 			goto drop_and_free;
 
 		tcp_rsk(req)->snt_synack = tcp_time_stamp;
 		tcp_rsk(req)->listener = NULL;
-		/* Add the request_sock to the SYN table */
 		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-	} else if (tcp_fastopen_create_child(sk, skb, skb_synack, req))
-		goto drop_and_release;
+	}
 
 	return 0;
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 05c1b155251d..e68e0d4af6c9 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -362,6 +362,37 @@ void tcp_twsk_destructor(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
 
+void tcp_openreq_init_rwin(struct request_sock *req,
+			   struct sock *sk, struct dst_entry *dst)
+{
+	struct inet_request_sock *ireq = inet_rsk(req);
+	struct tcp_sock *tp = tcp_sk(sk);
+	__u8 rcv_wscale;
+	int mss = dst_metric_advmss(dst);
+
+	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
+		mss = tp->rx_opt.user_mss;
+
+	/* Set this up on the first call only */
+	req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
+
+	/* limit the window selection if the user enforce a smaller rx buffer */
+	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+	    (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
+		req->window_clamp = tcp_full_space(sk);
+
+	/* tcp_full_space because it is guaranteed to be the first packet */
+	tcp_select_initial_window(tcp_full_space(sk),
+		mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+		&req->rcv_wnd,
+		&req->window_clamp,
+		ireq->wscale_ok,
+		&rcv_wscale,
+		dst_metric(dst, RTAX_INITRWND));
+	ireq->rcv_wscale = rcv_wscale;
+}
+EXPORT_SYMBOL(tcp_openreq_init_rwin);
+
 static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
 					 struct request_sock *req)
 {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b20fc02920f9..3d61c52bdf79 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2803,27 +2803,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
 		mss = tp->rx_opt.user_mss;
 
-	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
-		__u8 rcv_wscale;
-		/* Set this up on the first call only */
-		req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
-
-		/* limit the window selection if the user enforce a smaller rx buffer */
-		if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
-		    (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
-			req->window_clamp = tcp_full_space(sk);
-
-		/* tcp_full_space because it is guaranteed to be the first packet */
-		tcp_select_initial_window(tcp_full_space(sk),
-			mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
-			&req->rcv_wnd,
-			&req->window_clamp,
-			ireq->wscale_ok,
-			&rcv_wscale,
-			dst_metric(dst, RTAX_INITRWND));
-		ireq->rcv_wscale = rcv_wscale;
-	}
-
 	memset(&opts, 0, sizeof(opts));
 #ifdef CONFIG_SYN_COOKIES
 	if (unlikely(req->cookie_ts))
-- 
cgit v1.2.3-71-gd317


From e110861f86094cd78cc85593b873970092deb43a Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Tue, 13 May 2014 10:17:33 -0700
Subject: net: add a sysctl to reflect the fwmark on replies

Kernel-originated IP packets that have no user socket associated
with them (e.g., ICMP errors and echo replies, TCP RSTs, etc.)
are emitted with a mark of zero. Add a sysctl to make them have
the same mark as the packet they are replying to.

This allows an administrator that wishes to do so to use
mark-based routing, firewalling, etc. for these replies by
marking the original packets inbound.

Tested using user-mode linux:
 - ICMP/ICMPv6 echo replies and errors.
 - TCP RST packets (IPv4 and IPv6).

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h           |  3 +++
 include/net/ipv6.h         |  3 +++
 include/net/netns/ipv4.h   |  2 ++
 include/net/netns/ipv6.h   |  1 +
 net/ipv4/icmp.c            | 11 +++++++++--
 net/ipv4/ip_output.c       |  3 ++-
 net/ipv4/sysctl_net_ipv4.c |  7 +++++++
 net/ipv6/icmp.c            |  6 ++++++
 net/ipv6/sysctl_net_ipv6.c |  7 +++++++
 net/ipv6/tcp_ipv6.c        |  1 +
 10 files changed, 41 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 55752985c144..14c50a1650ef 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -231,6 +231,9 @@ void ipfrag_init(void);
 
 void ip_static_sysctl_init(void);
 
+#define IP4_REPLY_MARK(net, mark) \
+	((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
+
 static inline bool ip_is_fragment(const struct iphdr *iph)
 {
 	return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 5b40ad297b8c..ba810d0546bc 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -113,6 +113,9 @@ struct frag_hdr {
 #define	IP6_MF		0x0001
 #define	IP6_OFFSET	0xFFF8
 
+#define IP6_REPLY_MARK(net, mark) \
+	((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)
+
 #include <net/sock.h>
 
 /* sysctls */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index b2704fd0ec80..a32fc4d705da 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -77,6 +77,8 @@ struct netns_ipv4 {
 	int sysctl_ip_no_pmtu_disc;
 	int sysctl_ip_fwd_use_pmtu;
 
+	int sysctl_fwmark_reflect;
+
 	struct ping_group_range ping_group_range;
 
 	atomic_t dev_addr_genid;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 21edaf1f7916..19d3446e59d2 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 {
 	int flowlabel_consistency;
 	int icmpv6_time;
 	int anycast_src_echo_reply;
+	int fwmark_reflect;
 };
 
 struct netns_ipv6 {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fe52666dc43c..79c3d947a481 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	struct sock *sk;
 	struct inet_sock *inet;
 	__be32 daddr, saddr;
+	u32 mark = IP4_REPLY_MARK(net, skb->mark);
 
 	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
 		return;
@@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	icmp_param->data.icmph.checksum = 0;
 
 	inet->tos = ip_hdr(skb)->tos;
+	sk->sk_mark = mark;
 	daddr = ipc.addr = ip_hdr(skb)->saddr;
 	saddr = fib_compute_spec_dst(skb);
 	ipc.opt = NULL;
@@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	memset(&fl4, 0, sizeof(fl4));
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
+	fl4.flowi4_mark = mark;
 	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
 	fl4.flowi4_proto = IPPROTO_ICMP;
 	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
@@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
 					struct flowi4 *fl4,
 					struct sk_buff *skb_in,
 					const struct iphdr *iph,
-					__be32 saddr, u8 tos,
+					__be32 saddr, u8 tos, u32 mark,
 					int type, int code,
 					struct icmp_bxm *param)
 {
@@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
 	fl4->daddr = (param->replyopts.opt.opt.srr ?
 		      param->replyopts.opt.opt.faddr : iph->saddr);
 	fl4->saddr = saddr;
+	fl4->flowi4_mark = mark;
 	fl4->flowi4_tos = RT_TOS(tos);
 	fl4->flowi4_proto = IPPROTO_ICMP;
 	fl4->fl4_icmp_type = type;
@@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	struct flowi4 fl4;
 	__be32 saddr;
 	u8  tos;
+	u32 mark;
 	struct net *net;
 	struct sock *sk;
 
@@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
 					   IPTOS_PREC_INTERNETCONTROL) :
 					  iph->tos;
+	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
 	if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in))
 		goto out_unlock;
@@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	icmp_param->skb	  = skb_in;
 	icmp_param->offset = skb_network_offset(skb_in);
 	inet_sk(sk)->tos = tos;
+	sk->sk_mark = mark;
 	ipc.addr = iph->saddr;
 	ipc.opt = &icmp_param->replyopts.opt;
 	ipc.tx_flags = 0;
 	ipc.ttl = 0;
 	ipc.tos = -1;
 
-	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos,
+	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
 			       type, code, icmp_param);
 	if (IS_ERR(rt))
 		goto out_unlock;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6aa4380fde1a..6e231ab58d65 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1546,7 +1546,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
 			daddr = replyopts.opt.opt.faddr;
 	}
 
-	flowi4_init_output(&fl4, arg->bound_dev_if, 0,
+	flowi4_init_output(&fl4, arg->bound_dev_if,
+			   IP4_REPLY_MARK(net, skb->mark),
 			   RT_TOS(arg->tos),
 			   RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
 			   ip_reply_arg_flowi_flags(arg),
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5cde8f263d40..f50d51850285 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -838,6 +838,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "fwmark_reflect",
+		.data		= &init_net.ipv4.sysctl_fwmark_reflect,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 8d3952796d39..f6c84a6eb238 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	int len;
 	int hlimit;
 	int err = 0;
+	u32 mark = IP6_REPLY_MARK(net, skb->mark);
 
 	if ((u8 *)hdr < skb->head ||
 	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
@@ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	fl6.daddr = hdr->saddr;
 	if (saddr)
 		fl6.saddr = *saddr;
+	fl6.flowi6_mark = mark;
 	fl6.flowi6_oif = iif;
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
@@ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	sk = icmpv6_xmit_lock(net);
 	if (sk == NULL)
 		return;
+	sk->sk_mark = mark;
 	np = inet6_sk(sk);
 
 	if (!icmpv6_xrlim_allow(sk, type, &fl6))
@@ -551,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	int err = 0;
 	int hlimit;
 	u8 tclass;
+	u32 mark = IP6_REPLY_MARK(net, skb->mark);
 
 	saddr = &ipv6_hdr(skb)->daddr;
 
@@ -569,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 		fl6.saddr = *saddr;
 	fl6.flowi6_oif = skb->dev->ifindex;
 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
+	fl6.flowi6_mark = mark;
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
 	sk = icmpv6_xmit_lock(net);
 	if (sk == NULL)
 		return;
+	sk->sk_mark = mark;
 	np = inet6_sk(sk);
 
 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7f405a168822..058f3eca2e53 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "fwmark_reflect",
+		.data		= &init_net.ipv6.sysctl.fwmark_reflect,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{ }
 };
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3a267bf14f2f..c54976a44425 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -812,6 +812,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 		fl6.flowi6_oif = inet6_iif(skb);
 	else
 		fl6.flowi6_oif = oif;
+	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
 	fl6.fl6_dport = t1->dest;
 	fl6.fl6_sport = t1->source;
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
-- 
cgit v1.2.3-71-gd317


From 84f39b08d7868ce10eeaf640627cb89777f0ae93 Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Tue, 13 May 2014 10:17:35 -0700
Subject: net: support marking accepting TCP sockets

When using mark-based routing, sockets returned from accept()
may need to be marked differently depending on the incoming
connection request.

This is the case, for example, if different socket marks identify
different networks: a listening socket may want to accept
connections from all networks, but each connection should be
marked with the network that the request came in on, so that
subsequent packets are sent on the correct network.

This patch adds a sysctl to mark TCP sockets based on the fwmark
of the incoming SYN packet. If enabled, and an unmarked socket
receives a SYN, then the SYN packet's fwmark is written to the
connection's inet_request_sock, and later written back to the
accepted socket when the connection is established.  If the
socket already has a nonzero mark, then the behaviour is the same
as it is today, i.e., the listening socket's fwmark is used.

Black-box tested using user-mode linux:

- IPv4/IPv6 SYN+ACK, FIN, etc. packets are routed based on the
  mark of the incoming SYN packet.
- The socket returned by accept() is marked with the mark of the
  incoming SYN packet.
- Tested with syncookies=1 and syncookies=2.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h          | 10 ++++++++++
 include/net/netns/ipv4.h         |  1 +
 net/ipv4/inet_connection_sock.c  |  6 ++++--
 net/ipv4/syncookies.c            |  3 ++-
 net/ipv4/sysctl_net_ipv4.c       |  7 +++++++
 net/ipv4/tcp_ipv4.c              |  1 +
 net/ipv6/inet6_connection_sock.c |  2 +-
 net/ipv6/syncookies.c            |  4 +++-
 net/ipv6/tcp_ipv6.c              |  1 +
 9 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1833c3f389ee..b1edf17bec01 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -90,6 +90,7 @@ struct inet_request_sock {
 	kmemcheck_bitfield_end(flags);
 	struct ip_options_rcu	*opt;
 	struct sk_buff		*pktopts;
+	u32                     ir_mark;
 };
 
 static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
@@ -97,6 +98,15 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
 	return (struct inet_request_sock *)sk;
 }
 
+static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
+{
+	if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) {
+		return skb->mark;
+	} else {
+		return sk->sk_mark;
+	}
+}
+
 struct inet_cork {
 	unsigned int		flags;
 	__be32			addr;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index a32fc4d705da..2f0cfad66666 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -78,6 +78,7 @@ struct netns_ipv4 {
 	int sysctl_ip_fwd_use_pmtu;
 
 	int sysctl_fwmark_reflect;
+	int sysctl_tcp_fwmark_accept;
 
 	struct ping_group_range ping_group_range;
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a56b8e6e866a..12e502cbfdc7 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -408,7 +408,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 	struct net *net = sock_net(sk);
 	int flags = inet_sk_flowi_flags(sk);
 
-	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+	flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol,
 			   flags,
@@ -445,7 +445,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
 
 	rcu_read_lock();
 	opt = rcu_dereference(newinet->inet_opt);
-	flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+	flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
 			   sk->sk_protocol, inet_sk_flowi_flags(sk),
 			   (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
@@ -680,6 +680,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 		inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
 		newsk->sk_write_space = sk_stream_write_space;
 
+		newsk->sk_mark = inet_rsk(req)->ir_mark;
+
 		newicsk->icsk_retransmits = 0;
 		newicsk->icsk_backoff	  = 0;
 		newicsk->icsk_probes_out  = 0;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f2ed13c2125f..c86624b36a62 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -303,6 +303,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->ir_rmt_port	= th->source;
 	ireq->ir_loc_addr	= ip_hdr(skb)->daddr;
 	ireq->ir_rmt_addr	= ip_hdr(skb)->saddr;
+	ireq->ir_mark		= inet_request_mark(sk, skb);
 	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
@@ -339,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	 * hasn't changed since we received the original syn, but I see
 	 * no easy way to do this.
 	 */
-	flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
+	flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
 			   RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
 			   inet_sk_flowi_flags(sk),
 			   (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index f50d51850285..a33b9fbc1d80 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -845,6 +845,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "tcp_fwmark_accept",
+		.data		= &init_net.ipv4.sysctl_tcp_fwmark_accept,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a2780e5334c9..77cccda1ad0c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1318,6 +1318,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq->ir_rmt_addr = saddr;
 	ireq->no_srccheck = inet_sk(sk)->transparent;
 	ireq->opt = tcp_v4_save_options(skb);
+	ireq->ir_mark = inet_request_mark(sk, skb);
 
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index d4ade34ab375..a245e5ddffbd 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -81,7 +81,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
 	final_p = fl6_update_dst(fl6, np->opt, &final);
 	fl6->saddr = ireq->ir_v6_loc_addr;
 	fl6->flowi6_oif = ireq->ir_iif;
-	fl6->flowi6_mark = sk->sk_mark;
+	fl6->flowi6_mark = ireq->ir_mark;
 	fl6->fl6_dport = ireq->ir_rmt_port;
 	fl6->fl6_sport = htons(ireq->ir_num);
 	security_req_classify_flow(req, flowi6_to_flowi(fl6));
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb53a5e73c1a..a822b880689b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -216,6 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
 		ireq->ir_iif = inet6_iif(skb);
 
+	ireq->ir_mark = inet_request_mark(sk, skb);
+
 	req->expires = 0UL;
 	req->num_retrans = 0;
 	ireq->ecn_ok		= ecn_ok;
@@ -242,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		final_p = fl6_update_dst(&fl6, np->opt, &final);
 		fl6.saddr = ireq->ir_v6_loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
-		fl6.flowi6_mark = sk->sk_mark;
+		fl6.flowi6_mark = ireq->ir_mark;
 		fl6.fl6_dport = ireq->ir_rmt_port;
 		fl6.fl6_sport = inet_sk(sk)->inet_sport;
 		security_req_classify_flow(req, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c54976a44425..f07b2abba359 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1034,6 +1034,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		TCP_ECN_create_request(req, skb, sock_net(sk));
 
 	ireq->ir_iif = sk->sk_bound_dev_if;
+	ireq->ir_mark = inet_request_mark(sk, skb);
 
 	/* So that link locals have meaning */
 	if (!sk->sk_bound_dev_if &&
-- 
cgit v1.2.3-71-gd317


From cea092c9488cbb22c8b70336ab1413e0daf350f0 Mon Sep 17 00:00:00 2001
From: Brian W Hart <hartb@linux.vnet.ibm.com>
Date: Wed, 14 May 2014 10:33:45 +0930
Subject: cpumask.h: silence warning with -Wsign-compare
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Silence the warning when building with -Wsign-compare when cpumask.h
is included:

include/linux/cpumask.h: In function ‘cpumask_parse’:
include/linux/cpumask.h:603:26: warning: signed and unsigned type in conditional expression [-Wsign-compare]
  int len = nl ? nl - buf : strlen(buf);
                          ^

V2: Rusty pointed out that unsigned should be used instead.

Signed-off-by: Brian W Hart <hartb@linux.vnet.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/cpumask.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d08e4d2a9b92..3557ea7b2049 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -600,7 +600,7 @@ static inline int cpulist_scnprintf(char *buf, int len,
 static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
 {
 	char *nl = strchr(buf, '\n');
-	int len = nl ? nl - buf : strlen(buf);
+	unsigned int len = nl ? (unsigned int)(nl - buf) : strlen(buf);
 
 	return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }
-- 
cgit v1.2.3-71-gd317


From 08e53fbdb85c0f6f45c0f7c1ea3defc1752a95ce Mon Sep 17 00:00:00 2001
From: Amos Kong <akong@redhat.com>
Date: Wed, 14 May 2014 10:33:46 +0930
Subject: virtio-rng: support multiple virtio-rng devices

Current hwrng core supports to register multiple hwrng devices,
and there is only one device really works in the same time.
QEMU alsu supports to have multiple virtio-rng backends.

This patch changes virtio-rng driver to support multiple
virtio-rng devices.

]# cat /sys/class/misc/hw_random/rng_available
virtio_rng.0 virtio_rng.1
]# cat /sys/class/misc/hw_random/rng_current
virtio_rng.0
]# echo -n virtio_rng.1 > /sys/class/misc/hw_random/rng_current
]# dd if=/dev/hwrng of=/dev/null

Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/hw_random/virtio-rng.c | 102 ++++++++++++++++++++++--------------
 include/linux/hw_random.h           |   2 +-
 2 files changed, 64 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 2ce0e225e58c..12e242bbb0f5 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -25,88 +25,108 @@
 #include <linux/virtio_rng.h>
 #include <linux/module.h>
 
-static struct virtqueue *vq;
-static unsigned int data_avail;
-static DECLARE_COMPLETION(have_data);
-static bool busy;
+
+struct virtrng_info {
+	struct virtio_device *vdev;
+	struct hwrng hwrng;
+	struct virtqueue *vq;
+	unsigned int data_avail;
+	struct completion have_data;
+	bool busy;
+};
 
 static void random_recv_done(struct virtqueue *vq)
 {
+	struct virtrng_info *vi = vq->vdev->priv;
+
 	/* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */
-	if (!virtqueue_get_buf(vq, &data_avail))
+	if (!virtqueue_get_buf(vi->vq, &vi->data_avail))
 		return;
 
-	complete(&have_data);
+	complete(&vi->have_data);
 }
 
 /* The host will fill any buffer we give it with sweet, sweet randomness. */
-static void register_buffer(u8 *buf, size_t size)
+static void register_buffer(struct virtrng_info *vi, u8 *buf, size_t size)
 {
 	struct scatterlist sg;
 
 	sg_init_one(&sg, buf, size);
 
 	/* There should always be room for one buffer. */
-	virtqueue_add_inbuf(vq, &sg, 1, buf, GFP_KERNEL);
+	virtqueue_add_inbuf(vi->vq, &sg, 1, buf, GFP_KERNEL);
 
-	virtqueue_kick(vq);
+	virtqueue_kick(vi->vq);
 }
 
 static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
 {
 	int ret;
+	struct virtrng_info *vi = (struct virtrng_info *)rng->priv;
 
-	if (!busy) {
-		busy = true;
-		init_completion(&have_data);
-		register_buffer(buf, size);
+	if (!vi->busy) {
+		vi->busy = true;
+		init_completion(&vi->have_data);
+		register_buffer(vi, buf, size);
 	}
 
 	if (!wait)
 		return 0;
 
-	ret = wait_for_completion_killable(&have_data);
+	ret = wait_for_completion_killable(&vi->have_data);
 	if (ret < 0)
 		return ret;
 
-	busy = false;
+	vi->busy = false;
 
-	return data_avail;
+	return vi->data_avail;
 }
 
 static void virtio_cleanup(struct hwrng *rng)
 {
-	if (busy)
-		wait_for_completion(&have_data);
-}
-
+	struct virtrng_info *vi = (struct virtrng_info *)rng->priv;
 
-static struct hwrng virtio_hwrng = {
-	.name		= "virtio",
-	.cleanup	= virtio_cleanup,
-	.read		= virtio_read,
-};
+	if (vi->busy)
+		wait_for_completion(&vi->have_data);
+}
 
 static int probe_common(struct virtio_device *vdev)
 {
-	int err;
+	int err, i;
+	struct virtrng_info *vi = NULL;
+
+	vi = kmalloc(sizeof(struct virtrng_info), GFP_KERNEL);
+	vi->hwrng.name = kmalloc(40, GFP_KERNEL);
+	init_completion(&vi->have_data);
+
+	vi->hwrng.read = virtio_read;
+	vi->hwrng.cleanup = virtio_cleanup;
+	vi->hwrng.priv = (unsigned long)vi;
+	vdev->priv = vi;
 
-	if (vq) {
-		/* We only support one device for now */
-		return -EBUSY;
-	}
 	/* We expect a single virtqueue. */
-	vq = virtio_find_single_vq(vdev, random_recv_done, "input");
-	if (IS_ERR(vq)) {
-		err = PTR_ERR(vq);
-		vq = NULL;
+	vi->vq = virtio_find_single_vq(vdev, random_recv_done, "input");
+	if (IS_ERR(vi->vq)) {
+		err = PTR_ERR(vi->vq);
+		kfree(vi->hwrng.name);
+		vi->vq = NULL;
+		kfree(vi);
+		vi = NULL;
 		return err;
 	}
 
-	err = hwrng_register(&virtio_hwrng);
+	i = 0;
+	do {
+		sprintf(vi->hwrng.name, "virtio_rng.%d", i++);
+		err = hwrng_register(&vi->hwrng);
+	} while (err == -EEXIST);
+
 	if (err) {
 		vdev->config->del_vqs(vdev);
-		vq = NULL;
+		kfree(vi->hwrng.name);
+		vi->vq = NULL;
+		kfree(vi);
+		vi = NULL;
 		return err;
 	}
 
@@ -115,11 +135,15 @@ static int probe_common(struct virtio_device *vdev)
 
 static void remove_common(struct virtio_device *vdev)
 {
+	struct virtrng_info *vi = vdev->priv;
 	vdev->config->reset(vdev);
-	busy = false;
-	hwrng_unregister(&virtio_hwrng);
+	vi->busy = false;
+	hwrng_unregister(&vi->hwrng);
 	vdev->config->del_vqs(vdev);
-	vq = NULL;
+	kfree(vi->hwrng.name);
+	vi->vq = NULL;
+	kfree(vi);
+	vi = NULL;
 }
 
 static int virtrng_probe(struct virtio_device *vdev)
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index b4b0eef5fddf..02d9c87be54c 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -31,7 +31,7 @@
  * @priv:		Private data, for use by the RNG driver.
  */
 struct hwrng {
-	const char *name;
+	char *name;
 	int (*init)(struct hwrng *rng);
 	void (*cleanup)(struct hwrng *rng);
 	int (*data_present)(struct hwrng *rng, int wait);
-- 
cgit v1.2.3-71-gd317


From b02ef20a9fba08948e643d3eec0efadf1da01a44 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 12 May 2014 18:24:45 +0200
Subject: uprobes/x86: Fix the wrong ->si_addr when xol triggers a trap

If the probed insn triggers a trap, ->si_addr = regs->ip is technically
correct, but this is not what the signal handler wants; we need to pass
the address of the probed insn, not the address of xol slot.

Add the new arch-agnostic helper, uprobe_get_trap_addr(), and change
fill_trap_info() and math_error() to use it. !CONFIG_UPROBES case in
uprobes.h uses a macro to avoid include hell and ensure that it can be
compiled even if an architecture doesn't define instruction_pointer().

Test-case:

	#include <signal.h>
	#include <stdio.h>
	#include <unistd.h>

	extern void probe_div(void);

	void sigh(int sig, siginfo_t *info, void *c)
	{
		int passed = (info->si_addr == probe_div);
		printf(passed ? "PASS\n" : "FAIL\n");
		_exit(!passed);
	}

	int main(void)
	{
		struct sigaction sa = {
			.sa_sigaction	= sigh,
			.sa_flags	= SA_SIGINFO,
		};

		sigaction(SIGFPE, &sa, NULL);

		asm (
			"xor %ecx,%ecx\n"
			".globl probe_div; probe_div:\n"
			"idiv %ecx\n"
		);

		return 0;
	}

it fails if probe_div() is probed.

Note: show_unhandled_signals users should probably use this helper too,
but we need to cleanup them first.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
---
 arch/x86/kernel/traps.c |  7 ++++---
 include/linux/uprobes.h |  4 ++++
 kernel/events/uprobes.c | 10 ++++++++++
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 73b3ea32245a..3fdb20548c4b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
+#include <linux/uprobes.h>
 #include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
@@ -148,11 +149,11 @@ static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr,
 
 	case X86_TRAP_DE:
 		sicode = FPE_INTDIV;
-		siaddr = regs->ip;
+		siaddr = uprobe_get_trap_addr(regs);
 		break;
 	case X86_TRAP_UD:
 		sicode = ILL_ILLOPN;
-		siaddr = regs->ip;
+		siaddr = uprobe_get_trap_addr(regs);
 		break;
 	case X86_TRAP_AC:
 		sicode = BUS_ADRALN;
@@ -531,7 +532,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 	task->thread.error_code = error_code;
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
-	info.si_addr = (void __user *)regs->ip;
+	info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
 	if (trapnr == X86_TRAP_MF) {
 		unsigned short cwd, swd;
 		/*
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index edff2b97b864..88c3b7e8b384 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -102,6 +102,7 @@ extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, u
 extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
 extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
 extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
+extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
@@ -130,6 +131,9 @@ extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *r
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
+
+#define uprobe_get_trap_addr(regs)	instruction_pointer(regs)
+
 static inline int
 uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a13251e8bfa4..3b02c72938a8 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1351,6 +1351,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
 	return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
 }
 
+unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	if (unlikely(utask && utask->active_uprobe))
+		return utask->vaddr;
+
+	return instruction_pointer(regs);
+}
+
 /*
  * Called with no locks held.
  * Called in context of a exiting or a exec-ing thread.
-- 
cgit v1.2.3-71-gd317


From 9d800df12d31734a6853915e9d2deb5d6747985f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 May 2014 09:15:00 -0400
Subject: cgroup: rename cgroup->dummy_css to ->self and move it to the top

cgroup->dummy_css is used as the placeholder css when performing css
oriended operations on the cgroup.  We're gonna shift more cgroup
management to this css.  Let's rename it to ->self and move it to the
top.

This is pure rename and field relocation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  6 +++---
 kernel/cgroup.c        | 20 ++++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index aa7353deaaf3..164851e388e7 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -143,6 +143,9 @@ enum {
 };
 
 struct cgroup {
+	/* self css with NULL ->ss, points back to this cgroup */
+	struct cgroup_subsys_state self;
+
 	unsigned long flags;		/* "unsigned long" so bitops work */
 
 	/*
@@ -224,9 +227,6 @@ struct cgroup {
 	struct list_head pidlists;
 	struct mutex pidlist_mutex;
 
-	/* dummy css with NULL ->ss, points back to this cgroup */
-	struct cgroup_subsys_state dummy_css;
-
 	/* For css percpu_ref killing and RCU-protected deletion */
 	struct rcu_head rcu_head;
 	struct work_struct destroy_work;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f36fd9c15b3a..b57a949ae4bc 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -220,7 +220,7 @@ static void cgroup_idr_remove(struct idr *idr, int id)
 /**
  * cgroup_css - obtain a cgroup's css for the specified subsystem
  * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest (%NULL returns the dummy_css)
+ * @ss: the subsystem of interest (%NULL returns @cgrp->self)
  *
  * Return @cgrp's css (cgroup_subsys_state) associated with @ss.  This
  * function must be called either under cgroup_mutex or rcu_read_lock() and
@@ -235,13 +235,13 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
 		return rcu_dereference_check(cgrp->subsys[ss->id],
 					lockdep_is_held(&cgroup_mutex));
 	else
-		return &cgrp->dummy_css;
+		return &cgrp->self;
 }
 
 /**
  * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
  * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest (%NULL returns the dummy_css)
+ * @ss: the subsystem of interest (%NULL returns @cgrp->self)
  *
  * Similar to cgroup_css() but returns the effctive css, which is defined
  * as the matching css of the nearest ancestor including self which has @ss
@@ -254,7 +254,7 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
 	lockdep_assert_held(&cgroup_mutex);
 
 	if (!ss)
-		return &cgrp->dummy_css;
+		return &cgrp->self;
 
 	if (!(cgrp->root->subsys_mask & (1 << ss->id)))
 		return NULL;
@@ -288,7 +288,7 @@ struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
 	if (cft->ss)
 		return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
 	else
-		return &cgrp->dummy_css;
+		return &cgrp->self;
 }
 EXPORT_SYMBOL_GPL(of_css);
 
@@ -1551,7 +1551,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	mutex_init(&cgrp->pidlist_mutex);
-	cgrp->dummy_css.cgroup = cgrp;
+	cgrp->self.cgroup = cgrp;
 
 	for_each_subsys(ss, ssid)
 		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
@@ -3454,7 +3454,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
 	 * ->can_attach() fails.
 	 */
 	do {
-		css_task_iter_start(&from->dummy_css, &it);
+		css_task_iter_start(&from->self, &it);
 		task = css_task_iter_next(&it);
 		if (task)
 			get_task_struct(task);
@@ -3719,7 +3719,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
 	if (!array)
 		return -ENOMEM;
 	/* now, populate the array */
-	css_task_iter_start(&cgrp->dummy_css, &it);
+	css_task_iter_start(&cgrp->self, &it);
 	while ((tsk = css_task_iter_next(&it))) {
 		if (unlikely(n == length))
 			break;
@@ -3793,7 +3793,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
 	}
 	rcu_read_unlock();
 
-	css_task_iter_start(&cgrp->dummy_css, &it);
+	css_task_iter_start(&cgrp->self, &it);
 	while ((tsk = css_task_iter_next(&it))) {
 		switch (tsk->state) {
 		case TASK_RUNNING:
@@ -4274,7 +4274,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	init_cgroup_housekeeping(cgrp);
 
 	cgrp->parent = parent;
-	cgrp->dummy_css.parent = &parent->dummy_css;
+	cgrp->self.parent = &parent->self;
 	cgrp->root = root;
 
 	if (notify_on_release(parent))
-- 
cgit v1.2.3-71-gd317


From 249f3468a282dcbad53484c821bebb447f14ee03 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 May 2014 09:15:01 -0400
Subject: cgroup: remove cgroup_destory_css_killed()

cgroup_destroy_css_killed() is cgroup destruction stage which happens
after all csses are offlined.  After the recent updates, it no longer
does anything other than putting the base reference.  This patch
removes the function and makes cgroup_destroy_locked() put the base
ref at the end isntead.

This also makes cgroup->nr_css unnecessary.  Removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  3 ---
 kernel/cgroup.c        | 62 +++++---------------------------------------------
 2 files changed, 6 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 164851e388e7..160fcc69149e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -158,9 +158,6 @@ struct cgroup {
 	 */
 	int id;
 
-	/* the number of attached css's */
-	int nr_css;
-
 	/*
 	 * If this cgroup contains any tasks, it contributes one to
 	 * populated_cnt.  All children with non-zero popuplated_cnt of
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e9aa2a51ca68..4a94b0be598d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -178,7 +178,6 @@ static struct cftype cgroup_base_files[];
 static void cgroup_put(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned int ss_mask);
-static void cgroup_destroy_css_killed(struct cgroup *cgrp);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
 static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
 static void kill_css(struct cgroup_subsys_state *css);
@@ -4169,7 +4168,6 @@ static int online_css(struct cgroup_subsys_state *css)
 		ret = ss->css_online(css);
 	if (!ret) {
 		css->flags |= CSS_ONLINE;
-		css->cgroup->nr_css++;
 		rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
 	}
 	return ret;
@@ -4189,7 +4187,6 @@ static void offline_css(struct cgroup_subsys_state *css)
 		ss->css_offline(css);
 
 	css->flags &= ~CSS_ONLINE;
-	css->cgroup->nr_css--;
 	RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
 
 	wake_up_all(&css->cgroup->offline_waitq);
@@ -4374,39 +4371,18 @@ out_destroy:
 
 /*
  * This is called when the refcnt of a css is confirmed to be killed.
- * css_tryget_online() is now guaranteed to fail.
+ * css_tryget_online() is now guaranteed to fail.  Tell the subsystem to
+ * initate destruction and put the css ref from kill_css().
  */
 static void css_killed_work_fn(struct work_struct *work)
 {
 	struct cgroup_subsys_state *css =
 		container_of(work, struct cgroup_subsys_state, destroy_work);
-	struct cgroup *cgrp = css->cgroup;
 
 	mutex_lock(&cgroup_mutex);
-
-	/*
-	 * css_tryget_online() is guaranteed to fail now.  Tell subsystems
-	 * to initate destruction.
-	 */
 	offline_css(css);
-
-	/*
-	 * If @cgrp is marked dead, it's waiting for refs of all css's to
-	 * be disabled before proceeding to the second phase of cgroup
-	 * destruction.  If we are the last one, kick it off.
-	 */
-	if (!cgrp->nr_css && cgroup_is_dead(cgrp))
-		cgroup_destroy_css_killed(cgrp);
-
 	mutex_unlock(&cgroup_mutex);
 
-	/*
-	 * Put the css refs from kill_css().  Each css holds an extra
-	 * reference to the cgroup's dentry and cgroup removal proceeds
-	 * regardless of css refs.  On the last put of each css, whenever
-	 * that may be, the extra dentry ref is put so that dentry
-	 * destruction happens only after all css's are released.
-	 */
 	css_put(css);
 }
 
@@ -4518,11 +4494,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 */
 	set_bit(CGRP_DEAD, &cgrp->flags);
 
-	/*
-	 * Initiate massacre of all css's.  cgroup_destroy_css_killed()
-	 * will be invoked to perform the rest of destruction once the
-	 * percpu refs of all css's are confirmed to be killed.
-	 */
+	/* initiate massacre of all css's */
 	for_each_css(css, ssid, cgrp)
 		kill_css(css);
 
@@ -4532,15 +4504,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 		list_del_init(&cgrp->release_list);
 	raw_spin_unlock(&release_list_lock);
 
-	/*
-	 * If @cgrp has css's attached, the second stage of cgroup
-	 * destruction is kicked off from css_killed_work_fn() after the
-	 * refs of all attached css's are killed.  If @cgrp doesn't have
-	 * any css, we kick it off here.
-	 */
-	if (!cgrp->nr_css)
-		cgroup_destroy_css_killed(cgrp);
-
 	/*
 	 * Remove @cgrp directory along with the base files.  @cgrp has an
 	 * extra ref on its kn.
@@ -4550,25 +4513,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	set_bit(CGRP_RELEASABLE, &cgrp->parent->flags);
 	check_for_release(cgrp->parent);
 
+	/* put the base reference */
+	cgroup_put(cgrp);
+
 	return 0;
 };
 
-/**
- * cgroup_destroy_css_killed - the second step of cgroup destruction
- * @cgrp: the cgroup whose csses have just finished offlining
- *
- * This function is invoked from a work item for a cgroup which is being
- * destroyed after all css's are offlined and performs the rest of
- * destruction.  This is the second step of destruction described in the
- * comment above cgroup_destroy_locked().
- */
-static void cgroup_destroy_css_killed(struct cgroup *cgrp)
-{
-	lockdep_assert_held(&cgroup_mutex);
-
-	cgroup_put(cgrp);
-}
-
 static int cgroup_rmdir(struct kernfs_node *kn)
 {
 	struct cgroup *cgrp;
-- 
cgit v1.2.3-71-gd317


From 9395a4500404e05173eda9a2d198b6fa500e90c5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 May 2014 09:15:02 -0400
Subject: cgroup: enable refcnting for root csses

Currently, css_get(), css_tryget() and css_tryget_online() are noops
for root csses as an optimization; however, we're planning to use css
refcnts to track of cgroup lifetime too and root cgroups also need to
be reference counted.  Since css has been converted to percpu_refcnt,
the overhead of refcnting is miniscule and this optimization isn't too
meaningful anymore.  Furthermore, controllers which optimize the root
cgroup often never even invoke these functions in their hot paths.

This patch enables refcnting for root csses too.  This makes CSS_ROOT
flag unused and removes it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 10 ++--------
 kernel/cgroup.c        |  6 +++---
 2 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 160fcc69149e..286e39e4e9bf 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -77,7 +77,6 @@ struct cgroup_subsys_state {
 
 /* bits in struct cgroup_subsys_state flags field */
 enum {
-	CSS_ROOT	= (1 << 0), /* this CSS is the root of the subsystem */
 	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
 };
 
@@ -89,9 +88,7 @@ enum {
  */
 static inline void css_get(struct cgroup_subsys_state *css)
 {
-	/* We don't need to reference count the root state */
-	if (!(css->flags & CSS_ROOT))
-		percpu_ref_get(&css->refcnt);
+	percpu_ref_get(&css->refcnt);
 }
 
 /**
@@ -106,8 +103,6 @@ static inline void css_get(struct cgroup_subsys_state *css)
  */
 static inline bool css_tryget_online(struct cgroup_subsys_state *css)
 {
-	if (css->flags & CSS_ROOT)
-		return true;
 	return percpu_ref_tryget_live(&css->refcnt);
 }
 
@@ -119,8 +114,7 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css)
  */
 static inline void css_put(struct cgroup_subsys_state *css)
 {
-	if (!(css->flags & CSS_ROOT))
-		percpu_ref_put(&css->refcnt);
+	percpu_ref_put(&css->refcnt);
 }
 
 /* bits in struct cgroup flags field */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e694f4153edb..cb5864e36f99 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4158,8 +4158,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 	if (cgrp->parent) {
 		css->parent = cgroup_css(cgrp->parent, ss);
 		css_get(css->parent);
-	} else {
-		css->flags |= CSS_ROOT;
 	}
 
 	BUG_ON(cgroup_css(cgrp, ss));
@@ -4582,9 +4580,10 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
 	BUG_ON(IS_ERR(css));
 	init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
 	if (early) {
-		/* idr_alloc() can't be called safely during early init */
+		/* allocation can't be done safely during early init */
 		css->id = 1;
 	} else {
+		BUG_ON(percpu_ref_init(&css->refcnt, css_release));
 		css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
 		BUG_ON(css->id < 0);
 	}
@@ -4671,6 +4670,7 @@ int __init cgroup_init(void)
 			struct cgroup_subsys_state *css =
 				init_css_set.subsys[ss->id];
 
+			BUG_ON(percpu_ref_init(&css->refcnt, css_release));
 			css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
 						   GFP_KERNEL);
 			BUG_ON(css->id < 0);
-- 
cgit v1.2.3-71-gd317


From 9d755d33f0db8c9b49438f71b38a56e375b34360 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 May 2014 09:15:02 -0400
Subject: cgroup: use cgroup->self.refcnt for cgroup refcnting

Currently cgroup implements refcnting separately using atomic_t
cgroup->refcnt.  The destruction paths of cgroup and css are rather
complex and bear a lot of similiarities including the use of RCU and
bouncing to a work item.

This patch makes cgroup use the refcnt of self css for refcnting
instead of using its own.  This makes cgroup refcnting use css's
percpu refcnt and share the destruction mechanism.

* css_release_work_fn() and css_free_work_fn() are updated to handle
  both csses and cgroups.  This is a bit messy but should do until we
  can make cgroup->self a full css, which currently can't be done
  thanks to multiple hierarchies.

* cgroup_destroy_locked() now performs
  percpu_ref_kill(&cgrp->self.refcnt) instead of cgroup_put(cgrp).

* Negative refcnt sanity check in cgroup_get() is no longer necessary
  as percpu_ref already handles it.

* Similarly, as a cgroup which hasn't been killed will never be
  released regardless of its refcnt value and percpu_ref has sanity
  check on kill, cgroup_is_dead() sanity check in cgroup_put() is no
  longer necessary.

* As whether a refcnt reached zero or not can only be decided after
  the reference count is killed, cgroup_root->cgrp's refcnting can no
  longer be used to decide whether to kill the root or not.  Let's
  make cgroup_kill_sb() explicitly initiate destruction if the root
  doesn't have any children.  This makes sense anyway as unmounted
  cgroup hierarchy without any children should be destroyed.

While this is a bit messy, this will allow pushing more bookkeeping
towards cgroup->self and thus handling cgroups and csses in more
uniform way.  In the very long term, it should be possible to
introduce a base subsystem and convert the self css to a proper one
making things whole lot simpler and unified.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |   6 --
 kernel/cgroup.c        | 146 +++++++++++++++++++++++++++----------------------
 2 files changed, 80 insertions(+), 72 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 286e39e4e9bf..76dadd77a120 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -160,8 +160,6 @@ struct cgroup {
 	 */
 	int populated_cnt;
 
-	atomic_t refcnt;
-
 	/*
 	 * We link our 'sibling' struct into our parent's 'children'.
 	 * Our children link their 'sibling' into our 'children'.
@@ -218,10 +216,6 @@ struct cgroup {
 	struct list_head pidlists;
 	struct mutex pidlist_mutex;
 
-	/* For css percpu_ref killing and RCU-protected deletion */
-	struct rcu_head rcu_head;
-	struct work_struct destroy_work;
-
 	/* used to wait for offlining of csses */
 	wait_queue_head_t offline_waitq;
 };
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index cb5864e36f99..c01e8e8dfad0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -176,10 +176,12 @@ static int need_forkexit_callback __read_mostly;
 static struct cftype cgroup_base_files[];
 
 static void cgroup_put(struct cgroup *cgrp);
+static bool cgroup_has_live_children(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned int ss_mask);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
 static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
+static void css_release(struct percpu_ref *ref);
 static void kill_css(struct cgroup_subsys_state *css);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
 			      bool is_add);
@@ -1008,62 +1010,15 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
 	return mode;
 }
 
-static void cgroup_free_fn(struct work_struct *work)
-{
-	struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
-
-	atomic_dec(&cgrp->root->nr_cgrps);
-	cgroup_pidlist_destroy_all(cgrp);
-
-	if (cgrp->parent) {
-		/*
-		 * We get a ref to the parent, and put the ref when this
-		 * cgroup is being freed, so it's guaranteed that the
-		 * parent won't be destroyed before its children.
-		 */
-		cgroup_put(cgrp->parent);
-		kernfs_put(cgrp->kn);
-		kfree(cgrp);
-	} else {
-		/*
-		 * This is root cgroup's refcnt reaching zero, which
-		 * indicates that the root should be released.
-		 */
-		cgroup_destroy_root(cgrp->root);
-	}
-}
-
-static void cgroup_free_rcu(struct rcu_head *head)
-{
-	struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
-
-	INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
-	queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
-}
-
 static void cgroup_get(struct cgroup *cgrp)
 {
 	WARN_ON_ONCE(cgroup_is_dead(cgrp));
-	WARN_ON_ONCE(atomic_read(&cgrp->refcnt) <= 0);
-	atomic_inc(&cgrp->refcnt);
+	css_get(&cgrp->self);
 }
 
 static void cgroup_put(struct cgroup *cgrp)
 {
-	if (!atomic_dec_and_test(&cgrp->refcnt))
-		return;
-	if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp)))
-		return;
-
-	/* delete this cgroup from parent->children */
-	mutex_lock(&cgroup_mutex);
-	list_del_rcu(&cgrp->sibling);
-	mutex_unlock(&cgroup_mutex);
-
-	cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
-	cgrp->id = -1;
-
-	call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
+	css_put(&cgrp->self);
 }
 
 /**
@@ -1548,7 +1503,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	struct cgroup_subsys *ss;
 	int ssid;
 
-	atomic_set(&cgrp->refcnt, 1);
 	INIT_LIST_HEAD(&cgrp->sibling);
 	INIT_LIST_HEAD(&cgrp->children);
 	INIT_LIST_HEAD(&cgrp->cset_links);
@@ -1597,6 +1551,10 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 		goto out;
 	root_cgrp->id = ret;
 
+	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release);
+	if (ret)
+		goto out;
+
 	/*
 	 * We're accessing css_set_count without locking css_set_rwsem here,
 	 * but that's OK - it can only be increased by someone holding
@@ -1605,11 +1563,11 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 	 */
 	ret = allocate_cgrp_cset_links(css_set_count, &tmp_links);
 	if (ret)
-		goto out;
+		goto cancel_ref;
 
 	ret = cgroup_init_root_id(root);
 	if (ret)
-		goto out;
+		goto cancel_ref;
 
 	root->kf_root = kernfs_create_root(&cgroup_kf_syscall_ops,
 					   KERNFS_ROOT_CREATE_DEACTIVATED,
@@ -1657,6 +1615,8 @@ destroy_root:
 	root->kf_root = NULL;
 exit_root_id:
 	cgroup_exit_root_id(root);
+cancel_ref:
+	percpu_ref_cancel_init(&root_cgrp->self.refcnt);
 out:
 	free_cgrp_cset_links(&tmp_links);
 	return ret;
@@ -1735,13 +1695,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 		}
 
 		/*
-		 * A root's lifetime is governed by its root cgroup.  Zero
-		 * ref indicate that the root is being destroyed.  Wait for
-		 * destruction to complete so that the subsystems are free.
-		 * We can use wait_queue for the wait but this path is
-		 * super cold.  Let's just sleep for a bit and retry.
+		 * A root's lifetime is governed by its root cgroup.
+		 * tryget_live failure indicate that the root is being
+		 * destroyed.  Wait for destruction to complete so that the
+		 * subsystems are free.  We can use wait_queue for the wait
+		 * but this path is super cold.  Let's just sleep for a bit
+		 * and retry.
 		 */
-		if (!atomic_inc_not_zero(&root->cgrp.refcnt)) {
+		if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
 			mutex_unlock(&cgroup_mutex);
 			msleep(10);
 			ret = restart_syscall();
@@ -1794,7 +1755,16 @@ static void cgroup_kill_sb(struct super_block *sb)
 	struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
 	struct cgroup_root *root = cgroup_root_from_kf(kf_root);
 
-	cgroup_put(&root->cgrp);
+	/*
+	 * If @root doesn't have any mounts or children, start killing it.
+	 * This prevents new mounts by disabling percpu_ref_tryget_live().
+	 * cgroup_mount() may wait for @root's release.
+	 */
+	if (cgroup_has_live_children(&root->cgrp))
+		cgroup_put(&root->cgrp);
+	else
+		percpu_ref_kill(&root->cgrp.self.refcnt);
+
 	kernfs_kill_sb(sb);
 }
 
@@ -4110,11 +4080,37 @@ static void css_free_work_fn(struct work_struct *work)
 		container_of(work, struct cgroup_subsys_state, destroy_work);
 	struct cgroup *cgrp = css->cgroup;
 
-	if (css->parent)
-		css_put(css->parent);
+	if (css->ss) {
+		/* css free path */
+		if (css->parent)
+			css_put(css->parent);
 
-	css->ss->css_free(css);
-	cgroup_put(cgrp);
+		css->ss->css_free(css);
+		cgroup_put(cgrp);
+	} else {
+		/* cgroup free path */
+		atomic_dec(&cgrp->root->nr_cgrps);
+		cgroup_pidlist_destroy_all(cgrp);
+
+		if (cgrp->parent) {
+			/*
+			 * We get a ref to the parent, and put the ref when
+			 * this cgroup is being freed, so it's guaranteed
+			 * that the parent won't be destroyed before its
+			 * children.
+			 */
+			cgroup_put(cgrp->parent);
+			kernfs_put(cgrp->kn);
+			kfree(cgrp);
+		} else {
+			/*
+			 * This is root cgroup's refcnt reaching zero,
+			 * which indicates that the root should be
+			 * released.
+			 */
+			cgroup_destroy_root(cgrp->root);
+		}
+	}
 }
 
 static void css_free_rcu_fn(struct rcu_head *rcu_head)
@@ -4131,8 +4127,20 @@ static void css_release_work_fn(struct work_struct *work)
 	struct cgroup_subsys_state *css =
 		container_of(work, struct cgroup_subsys_state, destroy_work);
 	struct cgroup_subsys *ss = css->ss;
+	struct cgroup *cgrp = css->cgroup;
 
-	cgroup_idr_remove(&ss->css_idr, css->id);
+	if (ss) {
+		/* css release path */
+		cgroup_idr_remove(&ss->css_idr, css->id);
+	} else {
+		/* cgroup release path */
+		mutex_lock(&cgroup_mutex);
+		list_del_rcu(&cgrp->sibling);
+		mutex_unlock(&cgroup_mutex);
+
+		cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
+		cgrp->id = -1;
+	}
 
 	call_rcu(&css->rcu_head, css_free_rcu_fn);
 }
@@ -4285,6 +4293,10 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 		goto out_unlock;
 	}
 
+	ret = percpu_ref_init(&cgrp->self.refcnt, css_release);
+	if (ret)
+		goto out_free_cgrp;
+
 	/*
 	 * Temporarily set the pointer to NULL, so idr_find() won't return
 	 * a half-baked cgroup.
@@ -4292,7 +4304,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_NOWAIT);
 	if (cgrp->id < 0) {
 		ret = -ENOMEM;
-		goto out_free_cgrp;
+		goto out_cancel_ref;
 	}
 
 	init_cgroup_housekeeping(cgrp);
@@ -4365,6 +4377,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 
 out_free_id:
 	cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
+out_cancel_ref:
+	percpu_ref_cancel_init(&cgrp->self.refcnt);
 out_free_cgrp:
 	kfree(cgrp);
 out_unlock:
@@ -4521,7 +4535,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	check_for_release(cgrp->parent);
 
 	/* put the base reference */
-	cgroup_put(cgrp);
+	percpu_ref_kill(&cgrp->self.refcnt);
 
 	return 0;
 };
-- 
cgit v1.2.3-71-gd317


From 7413af1fb70e7efa6dbc7f27663e7a5126b3aa33 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 6 May 2014 21:34:14 -0400
Subject: ftrace: Make get_ftrace_addr() and get_ftrace_addr_old() global

Move and rename get_ftrace_addr() and get_ftrace_addr_old() to
ftrace_get_addr_new() and ftrace_get_addr_curr() respectively.

This moves these two helper functions in the generic code out from
the arch specific code, and renames them to have a better generic
name. This will allow other archs to use them as well as makes it
a bit easier to work on getting separate trampolines for different
functions.

ftrace_get_addr_new() returns the trampoline address that the mcount
call address will be converted to.

ftrace_get_addr_curr() returns the trampoline address of what the
mcount call address currently jumps to.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/ftrace.c | 36 +++++-------------------------------
 include/linux/ftrace.h   |  2 ++
 kernel/trace/ftrace.c    | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 4b3c195d4133..5ef43ce8492f 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -349,38 +349,12 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
 	return add_break(rec->ip, old);
 }
 
-/*
- * If the record has the FTRACE_FL_REGS set, that means that it
- * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
- * is not not set, then it wants to convert to the normal callback.
- */
-static unsigned long get_ftrace_addr(struct dyn_ftrace *rec)
-{
-	if (rec->flags & FTRACE_FL_REGS)
-		return (unsigned long)FTRACE_REGS_ADDR;
-	else
-		return (unsigned long)FTRACE_ADDR;
-}
-
-/*
- * The FTRACE_FL_REGS_EN is set when the record already points to
- * a function that saves all the regs. Basically the '_EN' version
- * represents the current state of the function.
- */
-static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec)
-{
-	if (rec->flags & FTRACE_FL_REGS_EN)
-		return (unsigned long)FTRACE_REGS_ADDR;
-	else
-		return (unsigned long)FTRACE_ADDR;
-}
-
 static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 {
 	unsigned long ftrace_addr;
 	int ret;
 
-	ftrace_addr = get_ftrace_old_addr(rec);
+	ftrace_addr = ftrace_get_addr_curr(rec);
 
 	ret = ftrace_test_record(rec, enable);
 
@@ -438,14 +412,14 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
 		 * If not, don't touch the breakpoint, we make just create
 		 * a disaster.
 		 */
-		ftrace_addr = get_ftrace_addr(rec);
+		ftrace_addr = ftrace_get_addr_new(rec);
 		nop = ftrace_call_replace(ip, ftrace_addr);
 
 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
 			goto update;
 
 		/* Check both ftrace_addr and ftrace_old_addr */
-		ftrace_addr = get_ftrace_old_addr(rec);
+		ftrace_addr = ftrace_get_addr_curr(rec);
 		nop = ftrace_call_replace(ip, ftrace_addr);
 
 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
@@ -489,7 +463,7 @@ static int add_update(struct dyn_ftrace *rec, int enable)
 
 	ret = ftrace_test_record(rec, enable);
 
-	ftrace_addr  = get_ftrace_addr(rec);
+	ftrace_addr  = ftrace_get_addr_new(rec);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
@@ -536,7 +510,7 @@ static int finish_update(struct dyn_ftrace *rec, int enable)
 
 	ret = ftrace_update_record(rec, enable);
 
-	ftrace_addr = get_ftrace_addr(rec);
+	ftrace_addr = ftrace_get_addr_new(rec);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f0ff2c2453e7..2f8cbffecd3d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -400,6 +400,8 @@ int ftrace_update_record(struct dyn_ftrace *rec, int enable);
 int ftrace_test_record(struct dyn_ftrace *rec, int enable);
 void ftrace_run_stop_machine(int command);
 unsigned long ftrace_location(unsigned long ip);
+unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec);
+unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec);
 
 extern ftrace_func_t ftrace_trace_function;
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 98fa931b6864..e825fded435d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1755,6 +1755,42 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable)
 	return ftrace_check_record(rec, enable, 0);
 }
 
+/**
+ * ftrace_get_addr_new - Get the call address to set to
+ * @rec:  The ftrace record descriptor
+ *
+ * If the record has the FTRACE_FL_REGS set, that means that it
+ * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
+ * is not not set, then it wants to convert to the normal callback.
+ *
+ * Returns the address of the trampoline to set to
+ */
+unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec)
+{
+	if (rec->flags & FTRACE_FL_REGS)
+		return (unsigned long)FTRACE_REGS_ADDR;
+	else
+		return (unsigned long)FTRACE_ADDR;
+}
+
+/**
+ * ftrace_get_addr_curr - Get the call address that is already there
+ * @rec:  The ftrace record descriptor
+ *
+ * The FTRACE_FL_REGS_EN is set when the record already points to
+ * a function that saves all the regs. Basically the '_EN' version
+ * represents the current state of the function.
+ *
+ * Returns the address of the trampoline that is currently being called
+ */
+unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
+{
+	if (rec->flags & FTRACE_FL_REGS_EN)
+		return (unsigned long)FTRACE_REGS_ADDR;
+	else
+		return (unsigned long)FTRACE_ADDR;
+}
+
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {
-- 
cgit v1.2.3-71-gd317


From f1b2f2bd5821c6ab7feed2e133343dd54b212ed9 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 7 May 2014 16:09:49 -0400
Subject: ftrace: Remove FTRACE_UPDATE_MODIFY_CALL_REGS flag

As the decision to what needs to be done (converting a call to the
ftrace_caller to ftrace_caller_regs or to convert from ftrace_caller_regs
to ftrace_caller) can easily be determined from the rec->flags of
FTRACE_FL_REGS and FTRACE_FL_REGS_EN, there's no need to have the
ftrace_check_record() return either a UPDATE_MODIFY_CALL_REGS or a
UPDATE_MODIFY_CALL. Just he latter is enough. This added flag causes
more complexity than is required. Remove it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/ftrace.c |  3 ---
 include/linux/ftrace.h   |  2 --
 kernel/trace/ftrace.c    | 13 ++++---------
 3 files changed, 4 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 5ef43ce8492f..89de3eaf8772 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -366,7 +366,6 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 		/* converting nop to call */
 		return add_brk_on_nop(rec);
 
-	case FTRACE_UPDATE_MODIFY_CALL_REGS:
 	case FTRACE_UPDATE_MODIFY_CALL:
 	case FTRACE_UPDATE_MAKE_NOP:
 		/* converting a call to a nop */
@@ -469,7 +468,6 @@ static int add_update(struct dyn_ftrace *rec, int enable)
 	case FTRACE_UPDATE_IGNORE:
 		return 0;
 
-	case FTRACE_UPDATE_MODIFY_CALL_REGS:
 	case FTRACE_UPDATE_MODIFY_CALL:
 	case FTRACE_UPDATE_MAKE_CALL:
 		/* converting nop to call */
@@ -516,7 +514,6 @@ static int finish_update(struct dyn_ftrace *rec, int enable)
 	case FTRACE_UPDATE_IGNORE:
 		return 0;
 
-	case FTRACE_UPDATE_MODIFY_CALL_REGS:
 	case FTRACE_UPDATE_MODIFY_CALL:
 	case FTRACE_UPDATE_MAKE_CALL:
 		/* converting nop to call */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 2f8cbffecd3d..3e6dfb31f8e6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -362,14 +362,12 @@ enum {
  *  IGNORE           - The function is already what we want it to be
  *  MAKE_CALL        - Start tracing the function
  *  MODIFY_CALL      - Stop saving regs for the function
- *  MODIFY_CALL_REGS - Start saving regs for the function
  *  MAKE_NOP         - Stop tracing the function
  */
 enum {
 	FTRACE_UPDATE_IGNORE,
 	FTRACE_UPDATE_MAKE_CALL,
 	FTRACE_UPDATE_MODIFY_CALL,
-	FTRACE_UPDATE_MODIFY_CALL_REGS,
 	FTRACE_UPDATE_MAKE_NOP,
 };
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 52c2b53b7953..cc07b7fc4372 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1701,19 +1701,15 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
 		/*
 		 * If this record is being updated from a nop, then
 		 *   return UPDATE_MAKE_CALL.
-		 * Otherwise, if the EN flag is set, then return
-		 *   UPDATE_MODIFY_CALL_REGS to tell the caller to convert
-		 *   from the non-save regs, to a save regs function.
 		 * Otherwise,
 		 *   return UPDATE_MODIFY_CALL to tell the caller to convert
-		 *   from the save regs, to a non-save regs function.
+		 *   from the save regs, to a non-save regs function or
+		 *   vice versa.
 		 */
 		if (flag & FTRACE_FL_ENABLED)
 			return FTRACE_UPDATE_MAKE_CALL;
-		else if (rec->flags & FTRACE_FL_REGS_EN)
-			return FTRACE_UPDATE_MODIFY_CALL_REGS;
-		else
-			return FTRACE_UPDATE_MODIFY_CALL;
+
+		return FTRACE_UPDATE_MODIFY_CALL;
 	}
 
 	if (update) {
@@ -1815,7 +1811,6 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 	case FTRACE_UPDATE_MAKE_NOP:
 		return ftrace_make_nop(NULL, rec, ftrace_addr);
 
-	case FTRACE_UPDATE_MODIFY_CALL_REGS:
 	case FTRACE_UPDATE_MODIFY_CALL:
 		return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
 	}
-- 
cgit v1.2.3-71-gd317


From f9f36917903b57c571b1ddcfc6bc794ca4dd8232 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <kefeng.wang@linaro.org>
Date: Wed, 14 May 2014 14:13:41 +0800
Subject: libahci_platform: add host_flags parameter in
 ahci_platform_init_host()

Add a dynamic host_flags argument to make ahci_platform_init_host more flexible,
then remove the AHCI_HFLAGS(...) argument from some driver's ata_port_info,
and pass that in as the new argument.

Cc: Hans de Geode <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Kefeng Wang <kefeng.wang@linaro.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci_da850.c       | 3 ++-
 drivers/ata/ahci_imx.c         | 3 ++-
 drivers/ata/ahci_mvebu.c       | 3 ++-
 drivers/ata/ahci_platform.c    | 2 +-
 drivers/ata/ahci_st.c          | 2 +-
 drivers/ata/ahci_sunxi.c       | 9 ++++++---
 drivers/ata/ahci_xgene.c       | 7 +++++--
 drivers/ata/libahci_platform.c | 5 ++++-
 include/linux/ahci_platform.h  | 1 +
 9 files changed, 24 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/ahci_da850.c b/drivers/ata/ahci_da850.c
index 2c83613ce2db..2b77d53bccf8 100644
--- a/drivers/ata/ahci_da850.c
+++ b/drivers/ata/ahci_da850.c
@@ -85,7 +85,8 @@ static int ahci_da850_probe(struct platform_device *pdev)
 
 	da850_sata_init(dev, pwrdn_reg, hpriv->mmio);
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_da850_port_info, 0, 0);
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_da850_port_info,
+				     0, 0, 0);
 	if (rc)
 		goto disable_resources;
 
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
index 497c7abe1c7d..e7e44a73e4fe 100644
--- a/drivers/ata/ahci_imx.c
+++ b/drivers/ata/ahci_imx.c
@@ -267,7 +267,8 @@ static int imx_ahci_probe(struct platform_device *pdev)
 	reg_val = clk_get_rate(imxpriv->ahb_clk) / 1000;
 	writel(reg_val, hpriv->mmio + HOST_TIMER1MS);
 
-	ret = ahci_platform_init_host(pdev, hpriv, &ahci_imx_port_info, 0, 0);
+	ret = ahci_platform_init_host(pdev, hpriv, &ahci_imx_port_info,
+				      0, 0, 0);
 	if (ret)
 		imx_sata_disable(hpriv);
 
diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
index 1df8630c6b65..fd3dfd733b84 100644
--- a/drivers/ata/ahci_mvebu.c
+++ b/drivers/ata/ahci_mvebu.c
@@ -88,7 +88,8 @@ static int ahci_mvebu_probe(struct platform_device *pdev)
 	ahci_mvebu_mbus_config(hpriv, dram);
 	ahci_mvebu_regret_option(hpriv);
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info, 0, 0);
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info,
+				     0, 0, 0);
 	if (rc)
 		goto disable_resources;
 
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index ef67e79944f9..a476a1fd3f8f 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -55,7 +55,7 @@ static int ahci_probe(struct platform_device *pdev)
 			goto disable_resources;
 	}
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info, 0, 0);
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info, 0, 0, 0);
 	if (rc)
 		goto pdata_exit;
 
diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c
index 633222226c19..2595598df9ce 100644
--- a/drivers/ata/ahci_st.c
+++ b/drivers/ata/ahci_st.c
@@ -166,7 +166,7 @@ static int st_ahci_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
-	err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info, 0, 0);
+	err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info, 0, 0, 0);
 	if (err) {
 		ahci_platform_disable_resources(hpriv);
 		return err;
diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c
index 42d3f64e74b3..02002f125bd4 100644
--- a/drivers/ata/ahci_sunxi.c
+++ b/drivers/ata/ahci_sunxi.c
@@ -157,8 +157,6 @@ static void ahci_sunxi_start_engine(struct ata_port *ap)
 }
 
 static const struct ata_port_info ahci_sunxi_port_info = {
-	AHCI_HFLAGS(AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI |
-			  AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ),
 	.flags		= AHCI_FLAG_COMMON | ATA_FLAG_NCQ,
 	.pio_mask	= ATA_PIO4,
 	.udma_mask	= ATA_UDMA6,
@@ -169,6 +167,7 @@ static int ahci_sunxi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct ahci_host_priv *hpriv;
+	unsigned long hflags;
 	int rc;
 
 	hpriv = ahci_platform_get_resources(pdev);
@@ -185,7 +184,11 @@ static int ahci_sunxi_probe(struct platform_device *pdev)
 	if (rc)
 		goto disable_resources;
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_sunxi_port_info, 0, 0);
+	hflags = AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI |
+		 AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ;
+
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_sunxi_port_info,
+				     hflags, 0, 0);
 	if (rc)
 		goto disable_resources;
 
diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index 77c89bf171f1..042a9bb45c86 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -303,7 +303,6 @@ static struct ata_port_operations xgene_ahci_ops = {
 };
 
 static const struct ata_port_info xgene_ahci_port_info = {
-	AHCI_HFLAGS(AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ),
 	.flags = AHCI_FLAG_COMMON | ATA_FLAG_NCQ,
 	.pio_mask = ATA_PIO4,
 	.udma_mask = ATA_UDMA6,
@@ -382,6 +381,7 @@ static int xgene_ahci_probe(struct platform_device *pdev)
 	struct ahci_host_priv *hpriv;
 	struct xgene_ahci_context *ctx;
 	struct resource *res;
+	unsigned long hflags;
 	int rc;
 
 	hpriv = ahci_platform_get_resources(pdev);
@@ -450,7 +450,10 @@ static int xgene_ahci_probe(struct platform_device *pdev)
 		goto disable_resources;
 	}
 
-	rc = ahci_platform_init_host(pdev, hpriv, &xgene_ahci_port_info, 0, 0);
+	hflags = AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ;
+
+	rc = ahci_platform_init_host(pdev, hpriv, &xgene_ahci_port_info,
+				     hflags, 0, 0);
 	if (rc)
 		goto disable_resources;
 
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 7cb3a85719c0..3a5b4ed25a4f 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -283,6 +283,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_get_resources);
  * @pdev: platform device pointer for the host
  * @hpriv: ahci-host private data for the host
  * @pi_template: template for the ata_port_info to use
+ * @host_flags: ahci host flags used in ahci_host_priv
  * @force_port_map: param passed to ahci_save_initial_config
  * @mask_port_map: param passed to ahci_save_initial_config
  *
@@ -296,6 +297,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_get_resources);
 int ahci_platform_init_host(struct platform_device *pdev,
 			    struct ahci_host_priv *hpriv,
 			    const struct ata_port_info *pi_template,
+			    unsigned long host_flags,
 			    unsigned int force_port_map,
 			    unsigned int mask_port_map)
 {
@@ -312,7 +314,8 @@ int ahci_platform_init_host(struct platform_device *pdev,
 	}
 
 	/* prepare host */
-	hpriv->flags |= (unsigned long)pi.private_data;
+	pi.private_data = (void *)host_flags;
+	hpriv->flags |= host_flags;
 
 	ahci_save_initial_config(dev, hpriv, force_port_map, mask_port_map);
 
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 1f16d502600c..6dfd51a04d77 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -44,6 +44,7 @@ struct ahci_host_priv *ahci_platform_get_resources(
 int ahci_platform_init_host(struct platform_device *pdev,
 			    struct ahci_host_priv *hpriv,
 			    const struct ata_port_info *pi_template,
+			    unsigned long host_flags,
 			    unsigned int force_port_map,
 			    unsigned int mask_port_map);
 
-- 
cgit v1.2.3-71-gd317


From 1a56f2aa4752293e5a9c0c3a2331620aa1fdb808 Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Wed, 14 May 2014 13:43:37 +0900
Subject: workqueue: Remove deprecated flush[_delayed]_work_sync()

flush[_delayed]_work_sync() were deprecated by 4382973 ("workqueue:
deprecate flush[_delayed]_work_sync()") and have been deprecated
for a long time. In addition, these are not used anymore. So,
let's remove these functions.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1b22c42e9c2d..aa92d0295e28 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -587,18 +587,6 @@ static inline bool keventd_up(void)
 	return system_wq != NULL;
 }
 
-/* used to be different but now identical to flush_work(), deprecated */
-static inline bool __deprecated flush_work_sync(struct work_struct *work)
-{
-	return flush_work(work);
-}
-
-/* used to be different but now identical to flush_delayed_work(), deprecated */
-static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwork)
-{
-	return flush_delayed_work(dwork);
-}
-
 #ifndef CONFIG_SMP
 static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
 {
-- 
cgit v1.2.3-71-gd317


From cf416171e7e1d966111f53bdae82f51af05e7bf8 Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Wed, 14 May 2014 13:58:06 +0900
Subject: workqueue: Remove deprecated system_nrt[_freezable]_wq

system_nrt[_freezable]_wq were deprecated by 3b07e9c ("workqueue:
deprecate system_nrt[_freezable]_wq") and have been deprecated
for a long time. In addition, these are not used anymore. So,
let's remove these functions.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index aa92d0295e28..d93d28b2ec73 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -364,20 +364,6 @@ extern struct workqueue_struct *system_freezable_wq;
 extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
 
-static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
-{
-	return system_wq;
-}
-
-static inline struct workqueue_struct * __deprecated __system_nrt_freezable_wq(void)
-{
-	return system_freezable_wq;
-}
-
-/* equivlalent to system_wq and system_freezable_wq, deprecated */
-#define system_nrt_wq			__system_nrt_wq()
-#define system_nrt_freezable_wq		__system_nrt_freezable_wq()
-
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
-- 
cgit v1.2.3-71-gd317


From 122ff243f5f104194750ecbc76d5946dd1eec934 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 12 May 2014 16:04:53 -0700
Subject: ipv4: make ip_local_reserved_ports per netns

ip_local_port_range is already per netns, so should ip_local_reserved_ports
be. And since it is none by default we don't actually need it when we don't
enable CONFIG_SYSCTL.

By the way, rename inet_is_reserved_local_port() to inet_is_local_reserved_port()

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h                | 14 +++++++++++---
 include/net/netns/ipv4.h        |  4 ++++
 kernel/sysctl.c                 |  4 ++--
 net/ipv4/af_inet.c              |  8 +-------
 net/ipv4/inet_connection_sock.c |  5 +----
 net/ipv4/inet_hashtables.c      |  2 +-
 net/ipv4/sysctl_net_ipv4.c      | 31 ++++++++++++++-----------------
 net/ipv4/udp.c                  |  2 +-
 net/sctp/socket.c               |  5 +++--
 9 files changed, 38 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 14c50a1650ef..512bcd5dabac 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -208,11 +208,19 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
 
 void inet_get_local_port_range(struct net *net, int *low, int *high);
 
-extern unsigned long *sysctl_local_reserved_ports;
-static inline int inet_is_reserved_local_port(int port)
+#if CONFIG_SYSCTL
+static inline int inet_is_local_reserved_port(struct net *net, int port)
 {
-	return test_bit(port, sysctl_local_reserved_ports);
+	if (!net->ipv4.sysctl_local_reserved_ports)
+		return 0;
+	return test_bit(port, net->ipv4.sysctl_local_reserved_ports);
 }
+#else
+static inline int inet_is_local_reserved_port(struct net *net, int port)
+{
+	return 0;
+}
+#endif
 
 extern int sysctl_ip_nonlocal_bind;
 
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 2f0cfad66666..aec5e12f9f19 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -84,6 +84,10 @@ struct netns_ipv4 {
 
 	atomic_t dev_addr_genid;
 
+#ifdef CONFIG_SYSCTL
+	unsigned long *sysctl_local_reserved_ports;
+#endif
+
 #ifdef CONFIG_IP_MROUTE
 #ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES
 	struct mr_table		*mrt;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 74f5b580fe34..e36ae4b15726 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2501,11 +2501,11 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
 	bool first = 1;
 	size_t left = *lenp;
 	unsigned long bitmap_len = table->maxlen;
-	unsigned long *bitmap = (unsigned long *) table->data;
+	unsigned long *bitmap = *(unsigned long **) table->data;
 	unsigned long *tmp_bitmap = NULL;
 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
 
-	if (!bitmap_len || !left || (*ppos && !write)) {
+	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
 		*lenp = 0;
 		return 0;
 	}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 211c0cc6c3d3..279132bcadd9 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1705,13 +1705,9 @@ static int __init inet_init(void)
 
 	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
 
-	sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
-	if (!sysctl_local_reserved_ports)
-		goto out;
-
 	rc = proto_register(&tcp_prot, 1);
 	if (rc)
-		goto out_free_reserved_ports;
+		goto out;
 
 	rc = proto_register(&udp_prot, 1);
 	if (rc)
@@ -1821,8 +1817,6 @@ out_unregister_udp_proto:
 	proto_unregister(&udp_prot);
 out_unregister_tcp_proto:
 	proto_unregister(&tcp_prot);
-out_free_reserved_ports:
-	kfree(sysctl_local_reserved_ports);
 	goto out;
 }
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 12e502cbfdc7..14d02ea905b6 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -29,9 +29,6 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
 EXPORT_SYMBOL(inet_csk_timer_bug_msg);
 #endif
 
-unsigned long *sysctl_local_reserved_ports;
-EXPORT_SYMBOL(sysctl_local_reserved_ports);
-
 void inet_get_local_port_range(struct net *net, int *low, int *high)
 {
 	unsigned int seq;
@@ -113,7 +110,7 @@ again:
 
 		smallest_size = -1;
 		do {
-			if (inet_is_reserved_local_port(rover))
+			if (inet_is_local_reserved_port(net, rover))
 				goto next_nolock;
 			head = &hashinfo->bhash[inet_bhashfn(net, rover,
 					hashinfo->bhash_size)];
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 8b9cf279450d..83331f1b86ac 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -500,7 +500,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		local_bh_disable();
 		for (i = 1; i <= remaining; i++) {
 			port = low + (i + offset) % remaining;
-			if (inet_is_reserved_local_port(port))
+			if (inet_is_local_reserved_port(net, port))
 				continue;
 			head = &hinfo->bhash[inet_bhashfn(net, port,
 					hinfo->bhash_size)];
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a33b9fbc1d80..79a007c52558 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -436,13 +436,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-	{
-		.procname	= "ip_local_reserved_ports",
-		.data		= NULL, /* initialized in sysctl_ipv4_init */
-		.maxlen		= 65536,
-		.mode		= 0644,
-		.proc_handler	= proc_do_large_bitmap,
-	},
 	{
 		.procname	= "igmp_max_memberships",
 		.data		= &sysctl_igmp_max_memberships,
@@ -824,6 +817,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= ipv4_local_port_range,
 	},
+	{
+		.procname	= "ip_local_reserved_ports",
+		.data		= &init_net.ipv4.sysctl_local_reserved_ports,
+		.maxlen		= 65536,
+		.mode		= 0644,
+		.proc_handler	= proc_do_large_bitmap,
+	},
 	{
 		.procname	= "ip_no_pmtu_disc",
 		.data		= &init_net.ipv4.sysctl_ip_no_pmtu_disc,
@@ -876,8 +876,14 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
 	if (net->ipv4.ipv4_hdr == NULL)
 		goto err_reg;
 
+	net->ipv4.sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
+	if (!net->ipv4.sysctl_local_reserved_ports)
+		goto err_ports;
+
 	return 0;
 
+err_ports:
+	unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
 err_reg:
 	if (!net_eq(net, &init_net))
 		kfree(table);
@@ -889,6 +895,7 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net)
 {
 	struct ctl_table *table;
 
+	kfree(net->ipv4.sysctl_local_reserved_ports);
 	table = net->ipv4.ipv4_hdr->ctl_table_arg;
 	unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
 	kfree(table);
@@ -902,16 +909,6 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
 static __init int sysctl_ipv4_init(void)
 {
 	struct ctl_table_header *hdr;
-	struct ctl_table *i;
-
-	for (i = ipv4_table; i->procname; i++) {
-		if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
-			i->data = sysctl_local_reserved_ports;
-			break;
-		}
-	}
-	if (!i->procname)
-		return -EINVAL;
 
 	hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
 	if (hdr == NULL)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 54ea0a3a48f1..6729ea97a59d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -246,7 +246,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 			do {
 				if (low <= snum && snum <= high &&
 				    !test_bit(snum >> udptable->log, bitmap) &&
-				    !inet_is_reserved_local_port(snum))
+				    !inet_is_local_reserved_port(net, snum))
 					goto found;
 				snum += rand;
 			} while (snum != first);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e37b2cbbf177..2af76eaba8f7 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5946,8 +5946,9 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 		/* Search for an available port. */
 		int low, high, remaining, index;
 		unsigned int rover;
+		struct net *net = sock_net(sk);
 
-		inet_get_local_port_range(sock_net(sk), &low, &high);
+		inet_get_local_port_range(net, &low, &high);
 		remaining = (high - low) + 1;
 		rover = prandom_u32() % remaining + low;
 
@@ -5955,7 +5956,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 			rover++;
 			if ((rover < low) || (rover > high))
 				rover = low;
-			if (inet_is_reserved_local_port(rover))
+			if (inet_is_local_reserved_port(net, rover))
 				continue;
 			index = sctp_phashfn(sock_net(sk), rover);
 			head = &sctp_port_hashtable[index];
-- 
cgit v1.2.3-71-gd317


From 2eacc23c422b4553030168f315cb49522fa1b1f6 Mon Sep 17 00:00:00 2001
From: Yuval Atias <yuvala@mellanox.com>
Date: Wed, 14 May 2014 12:15:10 +0300
Subject: net/mlx4_core: Enforce irq affinity changes immediatly

During heavy traffic, napi is constatntly polling the complition queue
and no interrupt is fired. Because of that, changes to irq affinity are
ignored until traffic is stopped and resumed.

By registering to the irq notifier mechanism, and forcing interrupt when
affinity is changed, irq affinity changes will be immediatly enforced.

Signed-off-by: Yuval Atias <yuvala@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cq.c    |  3 ++
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 11 +++++-
 drivers/net/ethernet/mellanox/mlx4/en_tx.c |  6 +++
 drivers/net/ethernet/mellanox/mlx4/eq.c    | 62 ++++++++++++++++++++++++++++++
 include/linux/mlx4/device.h                |  3 ++
 5 files changed, 83 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 0487121e4a0f..8542030b89cf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -293,6 +293,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
 	atomic_set(&cq->refcount, 1);
 	init_completion(&cq->free);
 
+	cq->irq = priv->eq_table.eq[cq->vector].irq;
+	cq->irq_affinity_change = false;
+
 	return 0;
 
 err_radix:
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index a1512450816d..e8c0d2b832b7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -895,10 +895,17 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
 	mlx4_en_cq_unlock_napi(cq);
 
 	/* If we used up all the quota - we're probably not done yet... */
-	if (done == budget)
+	if (done == budget) {
 		INC_PERF_COUNTER(priv->pstats.napi_quota);
-	else {
+		if (unlikely(cq->mcq.irq_affinity_change)) {
+			cq->mcq.irq_affinity_change = false;
+			napi_complete(napi);
+			mlx4_en_arm_cq(priv, cq);
+			return 0;
+		}
+	} else {
 		/* Done for now */
+		cq->mcq.irq_affinity_change = false;
 		napi_complete(napi);
 		mlx4_en_arm_cq(priv, cq);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 89585c6311c3..cb964056d710 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -474,9 +474,15 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
 	/* If we used up all the quota - we're probably not done yet... */
 	if (done < budget) {
 		/* Done for now */
+		cq->mcq.irq_affinity_change = false;
 		napi_complete(napi);
 		mlx4_en_arm_cq(priv, cq);
 		return done;
+	} else if (unlikely(cq->mcq.irq_affinity_change)) {
+		cq->mcq.irq_affinity_change = false;
+		napi_complete(napi);
+		mlx4_en_arm_cq(priv, cq);
+		return 0;
 	}
 	return budget;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 6c088bc1845b..d954ec1eac17 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -53,6 +53,11 @@ enum {
 	MLX4_EQ_ENTRY_SIZE	= 0x20
 };
 
+struct mlx4_irq_notify {
+	void *arg;
+	struct irq_affinity_notify notify;
+};
+
 #define MLX4_EQ_STATUS_OK	   ( 0 << 28)
 #define MLX4_EQ_STATUS_WRITE_FAIL  (10 << 28)
 #define MLX4_EQ_OWNER_SW	   ( 0 << 24)
@@ -1083,6 +1088,57 @@ static void mlx4_unmap_clr_int(struct mlx4_dev *dev)
 	iounmap(priv->clr_base);
 }
 
+static void mlx4_irq_notifier_notify(struct irq_affinity_notify *notify,
+				     const cpumask_t *mask)
+{
+	struct mlx4_irq_notify *n = container_of(notify,
+						 struct mlx4_irq_notify,
+						 notify);
+	struct mlx4_priv *priv = (struct mlx4_priv *)n->arg;
+	struct radix_tree_iter iter;
+	void **slot;
+
+	radix_tree_for_each_slot(slot, &priv->cq_table.tree, &iter, 0) {
+		struct mlx4_cq *cq = (struct mlx4_cq *)(*slot);
+
+		if (cq->irq == notify->irq)
+			cq->irq_affinity_change = true;
+	}
+}
+
+static void mlx4_release_irq_notifier(struct kref *ref)
+{
+	struct mlx4_irq_notify *n = container_of(ref, struct mlx4_irq_notify,
+						 notify.kref);
+	kfree(n);
+}
+
+static void mlx4_assign_irq_notifier(struct mlx4_priv *priv,
+				     struct mlx4_dev *dev, int irq)
+{
+	struct mlx4_irq_notify *irq_notifier = NULL;
+	int err = 0;
+
+	irq_notifier = kzalloc(sizeof(*irq_notifier), GFP_KERNEL);
+	if (!irq_notifier) {
+		mlx4_warn(dev, "Failed to allocate irq notifier. irq %d\n",
+			  irq);
+		return;
+	}
+
+	irq_notifier->notify.irq = irq;
+	irq_notifier->notify.notify = mlx4_irq_notifier_notify;
+	irq_notifier->notify.release = mlx4_release_irq_notifier;
+	irq_notifier->arg = priv;
+	err = irq_set_affinity_notifier(irq, &irq_notifier->notify);
+	if (err) {
+		kfree(irq_notifier);
+		irq_notifier = NULL;
+		mlx4_warn(dev, "Failed to set irq notifier. irq %d\n", irq);
+	}
+}
+
+
 int mlx4_alloc_eq_table(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1353,6 +1409,9 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
 				continue;
 				/*we dont want to break here*/
 			}
+			mlx4_assign_irq_notifier(priv, dev,
+						 priv->eq_table.eq[vec].irq);
+
 			eq_set_ci(&priv->eq_table.eq[vec], 1);
 		}
 	}
@@ -1379,6 +1438,9 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec)
 		  Belonging to a legacy EQ*/
 		mutex_lock(&priv->msix_ctl.pool_lock);
 		if (priv->msix_ctl.pool_bm & 1ULL << i) {
+			irq_set_affinity_notifier(
+				priv->eq_table.eq[vec].irq,
+				NULL);
 			free_irq(priv->eq_table.eq[vec].irq,
 				 &priv->eq_table.eq[vec]);
 			priv->msix_ctl.pool_bm &= ~(1ULL << i);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ba87bd21295a..c0468e6f0442 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -577,6 +577,9 @@ struct mlx4_cq {
 
 	u32			cons_index;
 
+	u16                     irq;
+	bool                    irq_affinity_change;
+
 	__be32		       *set_ci_db;
 	__be32		       *arm_db;
 	int			arm_sn;
-- 
cgit v1.2.3-71-gd317


From c7228317441f4dee5e5916e30300dd8c61f75af7 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 13 May 2014 20:30:07 -0700
Subject: net: Use a more standard macro for INET_ADDR_COOKIE

Missing a colon on definition use is a bit odd so
change the macro for the 32 bit case to declare an
__attribute__((unused)) and __deprecated variable.

The __deprecated attribute will cause gcc to emit
an error if the variable is actually used.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h | 8 +++++---
 net/ipv4/inet_hashtables.c    | 4 ++--
 net/ipv4/udp.c                | 2 +-
 3 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 1bdb47715def..dd1950a7e273 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -292,12 +292,12 @@ static inline struct sock *inet_lookup_listener(struct net *net,
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
 	const __addrpair __name = (__force __addrpair) ( \
 				   (((__force __u64)(__be32)(__saddr)) << 32) | \
-				   ((__force __u64)(__be32)(__daddr)));
+				   ((__force __u64)(__be32)(__daddr)))
 #else /* __LITTLE_ENDIAN */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
 	const __addrpair __name = (__force __addrpair) ( \
 				   (((__force __u64)(__be32)(__daddr)) << 32) | \
-				   ((__force __u64)(__be32)(__saddr)));
+				   ((__force __u64)(__be32)(__saddr)))
 #endif /* __BIG_ENDIAN */
 #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)	\
 	(((__sk)->sk_portpair == (__ports))			&&	\
@@ -306,7 +306,9 @@ static inline struct sock *inet_lookup_listener(struct net *net,
 	   ((__sk)->sk_bound_dev_if == (__dif))) 		&& 	\
 	 net_eq(sock_net(__sk), (__net)))
 #else /* 32-bit arch */
-#define INET_ADDR_COOKIE(__name, __saddr, __daddr)
+#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
+	const int __name __deprecated __attribute__((unused))
+
 #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) \
 	(((__sk)->sk_portpair == (__ports))		&&		\
 	 ((__sk)->sk_daddr	== (__saddr))		&&		\
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 83331f1b86ac..43116e8c8e13 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -274,7 +274,7 @@ struct sock *__inet_lookup_established(struct net *net,
 				  const __be32 daddr, const u16 hnum,
 				  const int dif)
 {
-	INET_ADDR_COOKIE(acookie, saddr, daddr)
+	INET_ADDR_COOKIE(acookie, saddr, daddr);
 	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
 	struct sock *sk;
 	const struct hlist_nulls_node *node;
@@ -327,7 +327,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
 	__be32 daddr = inet->inet_rcv_saddr;
 	__be32 saddr = inet->inet_daddr;
 	int dif = sk->sk_bound_dev_if;
-	INET_ADDR_COOKIE(acookie, saddr, daddr)
+	INET_ADDR_COOKIE(acookie, saddr, daddr);
 	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
 	struct net *net = sock_net(sk);
 	unsigned int hash = inet_ehashfn(net, daddr, lport,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6729ea97a59d..590532a7bd2d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1875,7 +1875,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
 	unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
 	unsigned int slot2 = hash2 & udp_table.mask;
 	struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
-	INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr)
+	INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
 	const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
 
 	rcu_read_lock();
-- 
cgit v1.2.3-71-gd317


From 542ad4f8886b376dac9a4334bdb38f9c22a4d8da Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Apr 2014 15:08:29 -0700
Subject: Input: gpio_keys - convert struct descriptions to kernel-doc

This patch converts descriptions of the structures defined in
linux/gpio_keys.h to follow kernel-doc format.

There is no functional change.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/gpio_keys.h | 48 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index a7e977ff4abf..8b622468952c 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h
@@ -3,29 +3,53 @@
 
 struct device;
 
+/**
+ * struct gpio_keys_button - configuration parameters
+ * @code:		input event code (KEY_*, SW_*)
+ * @gpio:		%-1 if this key does not support gpio
+ * @active_low:		%true indicates that button is considered
+ *			depressed when gpio is low
+ * @desc:		label that will be attached to button's gpio
+ * @type:		input event type (%EV_KEY, %EV_SW, %EV_ABS)
+ * @wakeup:		configure the button as a wake-up source
+ * @debounce_interval:	debounce ticks interval in msecs
+ * @can_disable:	%true indicates that userspace is allowed to
+ *			disable button via sysfs
+ * @value:		axis value for %EV_ABS
+ * @irq:		Irq number in case of interrupt keys
+ */
 struct gpio_keys_button {
-	/* Configuration parameters */
-	unsigned int code;	/* input event code (KEY_*, SW_*) */
-	int gpio;		/* -1 if this key does not support gpio */
+	unsigned int code;
+	int gpio;
 	int active_low;
 	const char *desc;
-	unsigned int type;	/* input event type (EV_KEY, EV_SW, EV_ABS) */
-	int wakeup;		/* configure the button as a wake-up source */
-	int debounce_interval;	/* debounce ticks interval in msecs */
+	unsigned int type;
+	int wakeup;
+	int debounce_interval;
 	bool can_disable;
-	int value;		/* axis value for EV_ABS */
-	unsigned int irq;	/* Irq number in case of interrupt keys */
+	int value;
+	unsigned int irq;
 };
 
+/**
+ * struct gpio_keys_platform_data - platform data for gpio_keys driver
+ * @buttons:		pointer to array of &gpio_keys_button structures
+ *			describing buttons attached to the device
+ * @nbuttons:		number of elements in @buttons array
+ * @poll_interval:	polling interval in msecs - for polling driver only
+ * @rep:		enable input subsystem auto repeat
+ * @enable:		platform hook for enabling the device
+ * @disable:		platform hook for disabling the device
+ * @name:		input device name
+ */
 struct gpio_keys_platform_data {
 	struct gpio_keys_button *buttons;
 	int nbuttons;
-	unsigned int poll_interval;	/* polling interval in msecs -
-					   for polling driver only */
-	unsigned int rep:1;		/* enable input subsystem auto repeat */
+	unsigned int poll_interval;
+	unsigned int rep:1;
 	int (*enable)(struct device *dev);
 	void (*disable)(struct device *dev);
-	const char *name;		/* input device name */
+	const char *name;
 };
 
 #endif
-- 
cgit v1.2.3-71-gd317


From bf1de9761c21f56d5b0c6a0acd3b792d801c61e6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 28 Apr 2014 10:49:51 -0700
Subject: Input: implement managed polled input devices

Managed resources are becoming more and more popular in drivers. Let's
implement managed polled input devices, to complement managed regular input
devices.

Similarly to managed regular input devices only one new call
devm_input_allocate_polled_device() is added and the rest of APIs is
modified to work with both managed and non-managed devices.

Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Tested-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/input-polldev.c | 118 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/input-polldev.h |   3 ++
 2 files changed, 119 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c
index 4b191908d5de..3664f81655ca 100644
--- a/drivers/input/input-polldev.c
+++ b/drivers/input/input-polldev.c
@@ -176,6 +176,91 @@ struct input_polled_dev *input_allocate_polled_device(void)
 }
 EXPORT_SYMBOL(input_allocate_polled_device);
 
+struct input_polled_devres {
+	struct input_polled_dev *polldev;
+};
+
+static int devm_input_polldev_match(struct device *dev, void *res, void *data)
+{
+	struct input_polled_devres *devres = res;
+
+	return devres->polldev == data;
+}
+
+static void devm_input_polldev_release(struct device *dev, void *res)
+{
+	struct input_polled_devres *devres = res;
+	struct input_polled_dev *polldev = devres->polldev;
+
+	dev_dbg(dev, "%s: dropping reference/freeing %s\n",
+		__func__, dev_name(&polldev->input->dev));
+
+	input_put_device(polldev->input);
+	kfree(polldev);
+}
+
+static void devm_input_polldev_unregister(struct device *dev, void *res)
+{
+	struct input_polled_devres *devres = res;
+	struct input_polled_dev *polldev = devres->polldev;
+
+	dev_dbg(dev, "%s: unregistering device %s\n",
+		__func__, dev_name(&polldev->input->dev));
+	input_unregister_device(polldev->input);
+
+	/*
+	 * Note that we are still holding extra reference to the input
+	 * device so it will stick around until devm_input_polldev_release()
+	 * is called.
+	 */
+}
+
+/**
+ * devm_input_allocate_polled_device - allocate managed polled device
+ * @dev: device owning the polled device being created
+ *
+ * Returns prepared &struct input_polled_dev or %NULL.
+ *
+ * Managed polled input devices do not need to be explicitly unregistered
+ * or freed as it will be done automatically when owner device unbinds
+ * from * its driver (or binding fails). Once such managed polled device
+ * is allocated, it is ready to be set up and registered in the same
+ * fashion as regular polled input devices (using
+ * input_register_polled_device() function).
+ *
+ * If you want to manually unregister and free such managed polled devices,
+ * it can be still done by calling input_unregister_polled_device() and
+ * input_free_polled_device(), although it is rarely needed.
+ *
+ * NOTE: the owner device is set up as parent of input device and users
+ * should not override it.
+ */
+struct input_polled_dev *devm_input_allocate_polled_device(struct device *dev)
+{
+	struct input_polled_dev *polldev;
+	struct input_polled_devres *devres;
+
+	devres = devres_alloc(devm_input_polldev_release, sizeof(*devres),
+			      GFP_KERNEL);
+	if (!devres)
+		return NULL;
+
+	polldev = input_allocate_polled_device();
+	if (!polldev) {
+		devres_free(devres);
+		return NULL;
+	}
+
+	polldev->input->dev.parent = dev;
+	polldev->devres_managed = true;
+
+	devres->polldev = polldev;
+	devres_add(dev, devres);
+
+	return polldev;
+}
+EXPORT_SYMBOL(devm_input_allocate_polled_device);
+
 /**
  * input_free_polled_device - free memory allocated for polled device
  * @dev: device to free
@@ -186,7 +271,12 @@ EXPORT_SYMBOL(input_allocate_polled_device);
 void input_free_polled_device(struct input_polled_dev *dev)
 {
 	if (dev) {
-		input_free_device(dev->input);
+		if (dev->devres_managed)
+			WARN_ON(devres_destroy(dev->input->dev.parent,
+						devm_input_polldev_release,
+						devm_input_polldev_match,
+						dev));
+		input_put_device(dev->input);
 		kfree(dev);
 	}
 }
@@ -204,9 +294,19 @@ EXPORT_SYMBOL(input_free_polled_device);
  */
 int input_register_polled_device(struct input_polled_dev *dev)
 {
+	struct input_polled_devres *devres = NULL;
 	struct input_dev *input = dev->input;
 	int error;
 
+	if (dev->devres_managed) {
+		devres = devres_alloc(devm_input_polldev_unregister,
+				      sizeof(*devres), GFP_KERNEL);
+		if (!devres)
+			return -ENOMEM;
+
+		devres->polldev = dev;
+	}
+
 	input_set_drvdata(input, dev);
 	INIT_DELAYED_WORK(&dev->work, input_polled_device_work);
 
@@ -221,8 +321,10 @@ int input_register_polled_device(struct input_polled_dev *dev)
 	input->dev.groups = input_polldev_attribute_groups;
 
 	error = input_register_device(input);
-	if (error)
+	if (error) {
+		devres_free(devres);
 		return error;
+	}
 
 	/*
 	 * Take extra reference to the underlying input device so
@@ -233,6 +335,12 @@ int input_register_polled_device(struct input_polled_dev *dev)
 	 */
 	input_get_device(input);
 
+	if (dev->devres_managed) {
+		dev_dbg(input->dev.parent, "%s: registering %s with devres.\n",
+			__func__, dev_name(&input->dev));
+		devres_add(input->dev.parent, devres);
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(input_register_polled_device);
@@ -247,6 +355,12 @@ EXPORT_SYMBOL(input_register_polled_device);
  */
 void input_unregister_polled_device(struct input_polled_dev *dev)
 {
+	if (dev->devres_managed)
+		WARN_ON(devres_destroy(dev->input->dev.parent,
+					devm_input_polldev_unregister,
+					devm_input_polldev_match,
+					dev));
+
 	input_unregister_device(dev->input);
 }
 EXPORT_SYMBOL(input_unregister_polled_device);
diff --git a/include/linux/input-polldev.h b/include/linux/input-polldev.h
index ce0b72464eb8..2465182670db 100644
--- a/include/linux/input-polldev.h
+++ b/include/linux/input-polldev.h
@@ -48,9 +48,12 @@ struct input_polled_dev {
 
 /* private: */
 	struct delayed_work work;
+
+	bool devres_managed;
 };
 
 struct input_polled_dev *input_allocate_polled_device(void);
+struct input_polled_dev *devm_input_allocate_polled_device(struct device *dev);
 void input_free_polled_device(struct input_polled_dev *dev);
 int input_register_polled_device(struct input_polled_dev *dev);
 void input_unregister_polled_device(struct input_polled_dev *dev);
-- 
cgit v1.2.3-71-gd317


From 34d22ce22b0b249804816990a3b62b08b1a62546 Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Fri, 9 May 2014 14:11:44 +0300
Subject: cfg80211: Add API to update CSA counters in mgmt frames

Add NL80211_ATTR_CSA_C_OFFSETS_TX which holds an array
of offsets to the CSA counters which should be updated
when sending a management frames with NL80211_CMD_FRAME.

This API should be used by the drivers that wish to keep the
CSA counter updated in probe responses, but do not implement
probe response offloading and so, do not use
ieee80211_proberesp_get function.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  4 ++++
 include/uapi/linux/nl80211.h |  8 ++++++++
 net/wireless/nl80211.c       | 24 ++++++++++++++++++++++--
 3 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e3a48b0a2b3b..f46e1e15746d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1986,6 +1986,8 @@ struct cfg80211_update_ft_ies_params {
  * @len: buffer length
  * @no_cck: don't use cck rates for this frame
  * @dont_wait_for_ack: tells the low level not to wait for an ack
+ * @n_csa_offsets: length of csa_offsets array
+ * @csa_offsets: array of all the csa offsets in the frame
  */
 struct cfg80211_mgmt_tx_params {
 	struct ieee80211_channel *chan;
@@ -1995,6 +1997,8 @@ struct cfg80211_mgmt_tx_params {
 	size_t len;
 	bool no_cck;
 	bool dont_wait_for_ack;
+	int n_csa_offsets;
+	const u16 *csa_offsets;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b65095a85dee..ec90fc9d2358 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -503,6 +503,9 @@
  *	TX status event pertaining to the TX request.
  *	%NL80211_ATTR_TX_NO_CCK_RATE is used to decide whether to send the
  *	management frames at CCK rate or not in 2GHz band.
+ *	%NL80211_ATTR_CSA_C_OFFSETS_TX is an array of offsets to CSA
+ *	counters which will be updated to the current value. This attribute
+ *	is used during CSA period.
  * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this
  *	command may be used with the corresponding cookie to cancel the wait
  *	time if it is known that it is no longer necessary.
@@ -1576,6 +1579,9 @@ enum nl80211_commands {
  *	advertise values that cannot always be met. In such cases, an attempt
  *	to add a new station entry with @NL80211_CMD_NEW_STATION may fail.
  *
+ * @NL80211_ATTR_CSA_C_OFFSETS_TX: An array of csa counter offsets (u16) which
+ *	should be updated when the frame is transmitted.
+ *
  * @NL80211_ATTR_TDLS_PEER_CAPABILITY: flags for TDLS peer capabilities, u32.
  *	As specified in the &enum nl80211_tdls_peer_capability.
  *
@@ -1920,6 +1926,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_IFACE_SOCKET_OWNER,
 
+	NL80211_ATTR_CSA_C_OFFSETS_TX,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 74e7299e4add..4c0ca40ef90e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -386,6 +386,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 },
 	[NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG },
+	[NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY },
 };
 
 /* policy for the key attributes */
@@ -7786,6 +7787,27 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	if (!chandef.chan && params.offchan)
 		return -EINVAL;
 
+	params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
+	params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
+
+	if (info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]) {
+		int len = nla_len(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]);
+		int i;
+
+		if (len % sizeof(u16))
+			return -EINVAL;
+
+		params.n_csa_offsets = len / sizeof(u16);
+		params.csa_offsets =
+			nla_data(info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX]);
+
+		/* check that all the offsets fit the frame */
+		for (i = 0; i < params.n_csa_offsets; i++) {
+			if (params.csa_offsets[i] >= params.len)
+				return -EINVAL;
+		}
+	}
+
 	if (!params.dont_wait_for_ack) {
 		msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 		if (!msg)
@@ -7799,8 +7821,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
-	params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]);
-	params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]);
 	params.chan = chandef.chan;
 	err = cfg80211_mlme_mgmt_tx(rdev, wdev, &params, &cookie);
 	if (err)
-- 
cgit v1.2.3-71-gd317


From 9a774c78e2114c7e8605e3a168ccd552cbe3d922 Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Fri, 9 May 2014 14:11:46 +0300
Subject: cfg80211: Support multiple CSA counters

Change the type of NL80211_ATTR_CSA_C_OFF_BEACON and
NL80211_ATTR_CSA_C_OFF_PRESP to be NLA_BINARY which allows
userspace to use beacons and probe responses with
multiple CSA counters.
This isn't breaking the API since userspace can
continue to use nla_put_u16 for this attributes, which
is equivalent to a single element u16 array.
In addition advertise max number of supported CSA counters.
This is needed when using CSA and eCSA IEs together.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 18 ++++++++++--
 include/uapi/linux/nl80211.h | 11 +++++---
 net/mac80211/cfg.c           | 10 +++++--
 net/wireless/core.c          |  2 ++
 net/wireless/nl80211.c       | 65 ++++++++++++++++++++++++++++++++++----------
 net/wireless/trace.h         | 22 +++++++++------
 6 files changed, 96 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f46e1e15746d..447cb58f0d77 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -694,8 +694,10 @@ struct cfg80211_ap_settings {
  *
  * @chandef: defines the channel to use after the switch
  * @beacon_csa: beacon data while performing the switch
- * @counter_offset_beacon: offset for the counter within the beacon (tail)
- * @counter_offset_presp: offset for the counter within the probe response
+ * @counter_offsets_beacon: offsets of the counters within the beacon (tail)
+ * @counter_offsets_presp: offsets of the counters within the probe response
+ * @n_counter_offsets_beacon: number of csa counters the beacon (tail)
+ * @n_counter_offsets_presp: number of csa counters in the probe response
  * @beacon_after: beacon data to be used on the new channel
  * @radar_required: whether radar detection is required on the new channel
  * @block_tx: whether transmissions should be blocked while changing
@@ -704,7 +706,10 @@ struct cfg80211_ap_settings {
 struct cfg80211_csa_settings {
 	struct cfg80211_chan_def chandef;
 	struct cfg80211_beacon_data beacon_csa;
-	u16 counter_offset_beacon, counter_offset_presp;
+	const u16 *counter_offsets_beacon;
+	const u16 *counter_offsets_presp;
+	unsigned int n_counter_offsets_beacon;
+	unsigned int n_counter_offsets_presp;
 	struct cfg80211_beacon_data beacon_after;
 	bool radar_required;
 	bool block_tx;
@@ -3048,6 +3053,13 @@ struct wiphy {
 
 	u16 max_ap_assoc_sta;
 
+	/*
+	 * Number of supported csa_counters in beacons and probe responses.
+	 * This value should be set if the driver wishes to limit the number of
+	 * csa counters. Default (0) means infinite.
+	 */
+	u8 max_num_csa_counters;
+
 	char priv[0] __aligned(NETDEV_ALIGN);
 };
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ec90fc9d2358..0cfa827123ff 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1528,10 +1528,10 @@ enum nl80211_commands {
  *	operation).
  * @NL80211_ATTR_CSA_IES: Nested set of attributes containing the IE information
  *	for the time while performing a channel switch.
- * @NL80211_ATTR_CSA_C_OFF_BEACON: Offset of the channel switch counter
- *	field in the beacons tail (%NL80211_ATTR_BEACON_TAIL).
- * @NL80211_ATTR_CSA_C_OFF_PRESP: Offset of the channel switch counter
- *	field in the probe response (%NL80211_ATTR_PROBE_RESP).
+ * @NL80211_ATTR_CSA_C_OFF_BEACON: An array of offsets (u16) to the channel
+ *	switch counters in the beacons tail (%NL80211_ATTR_BEACON_TAIL).
+ * @NL80211_ATTR_CSA_C_OFF_PRESP: An array of offsets (u16) to the channel
+ *	switch counters in the probe response (%NL80211_ATTR_PROBE_RESP).
  *
  * @NL80211_ATTR_RXMGMT_FLAGS: flags for nl80211_send_mgmt(), u32.
  *	As specified in the &enum nl80211_rxmgmt_flags.
@@ -1581,6 +1581,8 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_CSA_C_OFFSETS_TX: An array of csa counter offsets (u16) which
  *	should be updated when the frame is transmitted.
+ * @NL80211_ATTR_MAX_CSA_COUNTERS: U8 attribute used to advertise the maximum
+ *	supported number of csa counters.
  *
  * @NL80211_ATTR_TDLS_PEER_CAPABILITY: flags for TDLS peer capabilities, u32.
  *	As specified in the &enum nl80211_tdls_peer_capability.
@@ -1927,6 +1929,7 @@ enum nl80211_attrs {
 	NL80211_ATTR_IFACE_SOCKET_OWNER,
 
 	NL80211_ATTR_CSA_C_OFFSETS_TX,
+	NL80211_ATTR_MAX_CSA_COUNTERS,
 
 	/* add attributes here, update the policy in nl80211.c */
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 8e4754ebc2d8..7a6f8aba5c46 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3192,8 +3192,14 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
 			break;
 
 		sdata->csa_counter_offset_beacon =
-			params->counter_offset_beacon;
-		sdata->csa_counter_offset_presp = params->counter_offset_presp;
+			params->counter_offsets_beacon[0];
+
+		if (params->n_counter_offsets_presp)
+			sdata->csa_counter_offset_presp =
+				params->counter_offsets_presp[0];
+		else
+			sdata->csa_counter_offset_presp = 0;
+
 		err = ieee80211_assign_beacon(sdata, &params->beacon_csa);
 		if (err < 0) {
 			kfree(sdata->u.ap.next_beacon);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 39788711ce6e..d03d8bdb29ca 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -402,6 +402,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	rdev->wiphy.rts_threshold = (u32) -1;
 	rdev->wiphy.coverage_class = 0;
 
+	rdev->wiphy.max_num_csa_counters = 1;
+
 	return &rdev->wiphy;
 }
 EXPORT_SYMBOL(wiphy_new);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4c0ca40ef90e..ca19b1520389 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -371,8 +371,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
 	[NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG },
 	[NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED },
-	[NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_U16 },
-	[NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_U16 },
+	[NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_BINARY },
+	[NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_BINARY },
 	[NL80211_ATTR_STA_SUPPORTED_CHANNELS] = { .type = NLA_BINARY },
 	[NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = { .type = NLA_BINARY },
 	[NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG },
@@ -1670,6 +1670,13 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 			}
 			nla_nest_end(msg, nested);
 		}
+		state->split_start++;
+		break;
+	case 12:
+		if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH &&
+		    nla_put_u8(msg, NL80211_ATTR_MAX_CSA_COUNTERS,
+			       rdev->wiphy.max_num_csa_counters))
+			goto nla_put_failure;
 
 		/* done */
 		state->split_start = 0;
@@ -5864,6 +5871,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
 	u8 radar_detect_width = 0;
 	int err;
 	bool need_new_beacon = false;
+	int len, i;
 
 	if (!rdev->ops->channel_switch ||
 	    !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH))
@@ -5922,26 +5930,55 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
 	if (!csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON])
 		return -EINVAL;
 
-	params.counter_offset_beacon =
-		nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
-	if (params.counter_offset_beacon >= params.beacon_csa.tail_len)
+	len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
+	if (!len || (len % sizeof(u16)))
 		return -EINVAL;
 
-	/* sanity check - counters should be the same */
-	if (params.beacon_csa.tail[params.counter_offset_beacon] !=
-	    params.count)
+	params.n_counter_offsets_beacon = len / sizeof(u16);
+	if (rdev->wiphy.max_num_csa_counters &&
+	    (params.n_counter_offsets_beacon >
+	     rdev->wiphy.max_num_csa_counters))
 		return -EINVAL;
 
+	params.counter_offsets_beacon =
+		nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
+
+	/* sanity checks - counters should fit and be the same */
+	for (i = 0; i < params.n_counter_offsets_beacon; i++) {
+		u16 offset = params.counter_offsets_beacon[i];
+
+		if (offset >= params.beacon_csa.tail_len)
+			return -EINVAL;
+
+		if (params.beacon_csa.tail[offset] != params.count)
+			return -EINVAL;
+	}
+
 	if (csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]) {
-		params.counter_offset_presp =
-			nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
-		if (params.counter_offset_presp >=
-		    params.beacon_csa.probe_resp_len)
+		len = nla_len(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
+		if (!len || (len % sizeof(u16)))
 			return -EINVAL;
 
-		if (params.beacon_csa.probe_resp[params.counter_offset_presp] !=
-		    params.count)
+		params.n_counter_offsets_presp = len / sizeof(u16);
+		if (rdev->wiphy.max_num_csa_counters &&
+		    (params.n_counter_offsets_beacon >
+		     rdev->wiphy.max_num_csa_counters))
 			return -EINVAL;
+
+		params.counter_offsets_presp =
+			nla_data(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
+
+		/* sanity checks - counters should fit and be the same */
+		for (i = 0; i < params.n_counter_offsets_presp; i++) {
+			u16 offset = params.counter_offsets_presp[i];
+
+			if (offset >= params.beacon_csa.probe_resp_len)
+				return -EINVAL;
+
+			if (params.beacon_csa.probe_resp[offset] !=
+			    params.count)
+				return -EINVAL;
+		}
 	}
 
 skip_beacons:
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index cdfbb00e1b37..560ed77084e9 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1876,29 +1876,33 @@ TRACE_EVENT(rdev_channel_switch,
 		WIPHY_ENTRY
 		NETDEV_ENTRY
 		CHAN_DEF_ENTRY
-		__field(u16, counter_offset_beacon)
-		__field(u16, counter_offset_presp)
 		__field(bool, radar_required)
 		__field(bool, block_tx)
 		__field(u8, count)
+		__dynamic_array(u16, bcn_ofs, params->n_counter_offsets_beacon)
+		__dynamic_array(u16, pres_ofs, params->n_counter_offsets_presp)
 	),
 	TP_fast_assign(
 		WIPHY_ASSIGN;
 		NETDEV_ASSIGN;
 		CHAN_DEF_ASSIGN(&params->chandef);
-		__entry->counter_offset_beacon = params->counter_offset_beacon;
-		__entry->counter_offset_presp = params->counter_offset_presp;
 		__entry->radar_required = params->radar_required;
 		__entry->block_tx = params->block_tx;
 		__entry->count = params->count;
+		memcpy(__get_dynamic_array(bcn_ofs),
+		       params->counter_offsets_beacon,
+		       params->n_counter_offsets_beacon * sizeof(u16));
+
+		/* probe response offsets are optional */
+		if (params->n_counter_offsets_presp)
+			memcpy(__get_dynamic_array(pres_ofs),
+			       params->counter_offsets_presp,
+			       params->n_counter_offsets_presp * sizeof(u16));
 	),
 	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
-		  ", block_tx: %d, count: %u, radar_required: %d"
-		  ", counter offsets (beacon/presp): %u/%u",
+		  ", block_tx: %d, count: %u, radar_required: %d",
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
-		  __entry->block_tx, __entry->count, __entry->radar_required,
-		  __entry->counter_offset_beacon,
-		  __entry->counter_offset_presp)
+		  __entry->block_tx, __entry->count, __entry->radar_required)
 );
 
 TRACE_EVENT(rdev_set_qos_map,
-- 
cgit v1.2.3-71-gd317


From 6ec8c332a0f93959e615158cc212b3abfd52abe7 Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Fri, 9 May 2014 14:11:49 +0300
Subject: mac80211: Provide ieee80211_beacon_get_template API

Add a new API ieee80211_beacon_get_template, which doesn't
affect DTIM counter and should be used if the device generates beacon
frames, and new beacon template is needed. In addition set the offsets
to TIM IE for MESH interface.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 43 ++++++++++++++++++++++-----
 net/mac80211/tx.c      | 80 ++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 94 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3541c48a97cd..e6521261a585 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3411,6 +3411,39 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
  */
 void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets);
 
+/**
+ * struct ieee80211_mutable_offsets - mutable beacon offsets
+ * @tim_offset: position of TIM element
+ * @tim_length: size of TIM element
+ */
+struct ieee80211_mutable_offsets {
+	u16 tim_offset;
+	u16 tim_length;
+};
+
+/**
+ * ieee80211_beacon_get_template - beacon template generation function
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @offs: &struct ieee80211_mutable_offsets pointer to struct that will
+ *	receive the offsets that may be updated by the driver.
+ *
+ * If the driver implements beaconing modes, it must use this function to
+ * obtain the beacon template.
+ *
+ * This function should be used if the beacon frames are generated by the
+ * device, and then the driver must use the returned beacon as the template
+ * The driver is responsible to update the DTIM count.
+ *
+ * The driver is responsible for freeing the returned skb.
+ *
+ * Return: The beacon template. %NULL on error.
+ */
+struct sk_buff *
+ieee80211_beacon_get_template(struct ieee80211_hw *hw,
+			      struct ieee80211_vif *vif,
+			      struct ieee80211_mutable_offsets *offs);
+
 /**
  * ieee80211_beacon_get_tim - beacon generation function
  * @hw: pointer obtained from ieee80211_alloc_hw().
@@ -3422,16 +3455,12 @@ void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets);
  *	Set to 0 if invalid (in non-AP modes).
  *
  * If the driver implements beaconing modes, it must use this function to
- * obtain the beacon frame/template.
+ * obtain the beacon frame.
  *
  * If the beacon frames are generated by the host system (i.e., not in
  * hardware/firmware), the driver uses this function to get each beacon
- * frame from mac80211 -- it is responsible for calling this function
- * before the beacon is needed (e.g. based on hardware interrupt).
- *
- * If the beacon frames are generated by the device, then the driver
- * must use the returned beacon as the template and change the TIM IE
- * according to the current DTIM parameters/TIM bitmap.
+ * frame from mac80211 -- it is responsible for calling this function exactly
+ * once before the beacon is needed (e.g. based on hardware interrupt).
  *
  * The driver is responsible for freeing the returned skb.
  *
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 0d1a42d4b6de..509456e5722d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2328,7 +2328,8 @@ void ieee80211_tx_pending(unsigned long data)
 /* functions for drivers to get certain frames */
 
 static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
-				       struct ps_data *ps, struct sk_buff *skb)
+				       struct ps_data *ps, struct sk_buff *skb,
+				       bool is_template)
 {
 	u8 *pos, *tim;
 	int aid0 = 0;
@@ -2341,11 +2342,12 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
 		 * checking byte-for-byte */
 		have_bits = !bitmap_empty((unsigned long *)ps->tim,
 					  IEEE80211_MAX_AID+1);
-
-	if (ps->dtim_count == 0)
-		ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1;
-	else
-		ps->dtim_count--;
+	if (!is_template) {
+		if (ps->dtim_count == 0)
+			ps->dtim_count = sdata->vif.bss_conf.dtim_period - 1;
+		else
+			ps->dtim_count--;
+	}
 
 	tim = pos = (u8 *) skb_put(skb, 6);
 	*pos++ = WLAN_EID_TIM;
@@ -2391,7 +2393,8 @@ static void __ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
 }
 
 static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
-				    struct ps_data *ps, struct sk_buff *skb)
+				    struct ps_data *ps, struct sk_buff *skb,
+				    bool is_template)
 {
 	struct ieee80211_local *local = sdata->local;
 
@@ -2403,10 +2406,10 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
 	 * of the tim bitmap in mac80211 and the driver.
 	 */
 	if (local->tim_in_locked_section) {
-		__ieee80211_beacon_add_tim(sdata, ps, skb);
+		__ieee80211_beacon_add_tim(sdata, ps, skb, is_template);
 	} else {
 		spin_lock_bh(&local->tim_lock);
-		__ieee80211_beacon_add_tim(sdata, ps, skb);
+		__ieee80211_beacon_add_tim(sdata, ps, skb, is_template);
 		spin_unlock_bh(&local->tim_lock);
 	}
 
@@ -2536,9 +2539,11 @@ bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
 }
 EXPORT_SYMBOL(ieee80211_csa_is_complete);
 
-struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
-					 struct ieee80211_vif *vif,
-					 u16 *tim_offset, u16 *tim_length)
+static struct sk_buff *
+__ieee80211_beacon_get(struct ieee80211_hw *hw,
+		       struct ieee80211_vif *vif,
+		       struct ieee80211_mutable_offsets *offs,
+		       bool is_template)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct sk_buff *skb = NULL;
@@ -2556,10 +2561,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 	if (!ieee80211_sdata_running(sdata) || !chanctx_conf)
 		goto out;
 
-	if (tim_offset)
-		*tim_offset = 0;
-	if (tim_length)
-		*tim_length = 0;
+	if (offs)
+		memset(offs, 0, sizeof(*offs));
 
 	if (sdata->vif.type == NL80211_IFTYPE_AP) {
 		struct ieee80211_if_ap *ap = &sdata->u.ap;
@@ -2584,12 +2587,13 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 			memcpy(skb_put(skb, beacon->head_len), beacon->head,
 			       beacon->head_len);
 
-			ieee80211_beacon_add_tim(sdata, &ap->ps, skb);
+			ieee80211_beacon_add_tim(sdata, &ap->ps, skb,
+						 is_template);
 
-			if (tim_offset)
-				*tim_offset = beacon->head_len;
-			if (tim_length)
-				*tim_length = skb->len - beacon->head_len;
+			if (offs) {
+				offs->tim_offset = beacon->head_len;
+				offs->tim_length = skb->len - beacon->head_len;
+			}
 
 			if (beacon->tail)
 				memcpy(skb_put(skb, beacon->tail_len),
@@ -2641,7 +2645,13 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 			goto out;
 		skb_reserve(skb, local->tx_headroom);
 		memcpy(skb_put(skb, bcn->head_len), bcn->head, bcn->head_len);
-		ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb);
+		ieee80211_beacon_add_tim(sdata, &ifmsh->ps, skb, is_template);
+
+		if (offs) {
+			offs->tim_offset = bcn->head_len;
+			offs->tim_length = skb->len - bcn->head_len;
+		}
+
 		memcpy(skb_put(skb, bcn->tail_len), bcn->tail, bcn->tail_len);
 	} else {
 		WARN_ON(1);
@@ -2678,6 +2688,32 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
  out:
 	rcu_read_unlock();
 	return skb;
+
+}
+
+struct sk_buff *
+ieee80211_beacon_get_template(struct ieee80211_hw *hw,
+			      struct ieee80211_vif *vif,
+			      struct ieee80211_mutable_offsets *offs)
+{
+	return __ieee80211_beacon_get(hw, vif, offs, true);
+}
+EXPORT_SYMBOL(ieee80211_beacon_get_template);
+
+struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
+					 struct ieee80211_vif *vif,
+					 u16 *tim_offset, u16 *tim_length)
+{
+	struct ieee80211_mutable_offsets offs = {};
+	struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false);
+
+	if (tim_offset)
+		*tim_offset = offs.tim_offset;
+
+	if (tim_length)
+		*tim_length = offs.tim_length;
+
+	return bcn;
 }
 EXPORT_SYMBOL(ieee80211_beacon_get_tim);
 
-- 
cgit v1.2.3-71-gd317


From 1af586c9116cdf6863823a830593c48cd9bcecde Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Fri, 9 May 2014 14:11:50 +0300
Subject: mac80211: Handle the CSA counters correctly

Make the beacon CSA counters part of ieee80211_mutable_offsets and don't
decrement CSA counters when generating a beacon template. This permits the
driver to offload the CSA counters handling. Since mac80211 updates the probe
responses with the correct counter, the driver should sync the counter's value
with mac80211 using ieee80211_csa_update_counter function.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     | 23 +++++++++++++-
 net/mac80211/cfg.c         |  4 +--
 net/mac80211/ieee80211_i.h |  2 --
 net/mac80211/tx.c          | 76 ++++++++++++++++++++++++++++++++++------------
 4 files changed, 80 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e6521261a585..982d2cd80166 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3411,14 +3411,20 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
  */
 void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets);
 
+#define IEEE80211_MAX_CSA_COUNTERS_NUM 2
+
 /**
  * struct ieee80211_mutable_offsets - mutable beacon offsets
  * @tim_offset: position of TIM element
  * @tim_length: size of TIM element
+ * @csa_offs: array of IEEE80211_MAX_CSA_COUNTERS_NUM offsets to CSA counters.
+ *	This array can contain zero values which should be ignored.
  */
 struct ieee80211_mutable_offsets {
 	u16 tim_offset;
 	u16 tim_length;
+
+	u16 csa_counter_offs[IEEE80211_MAX_CSA_COUNTERS_NUM];
 };
 
 /**
@@ -3433,7 +3439,8 @@ struct ieee80211_mutable_offsets {
  *
  * This function should be used if the beacon frames are generated by the
  * device, and then the driver must use the returned beacon as the template
- * The driver is responsible to update the DTIM count.
+ * The driver or the device are responsible to update the DTIM and, when
+ * applicable, the CSA count.
  *
  * The driver is responsible for freeing the returned skb.
  *
@@ -3485,6 +3492,20 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	return ieee80211_beacon_get_tim(hw, vif, NULL, NULL);
 }
 
+/**
+ * ieee80211_csa_update_counter - request mac80211 to decrement the csa counter
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * The csa counter should be updated after each beacon transmission.
+ * This function is called implicitly when
+ * ieee80211_beacon_get/ieee80211_beacon_get_tim are called, however if the
+ * beacon frames are generated by the device, the driver should call this
+ * function after each beacon transmission to sync mac80211's csa counters.
+ *
+ * Return: new csa counter value
+ */
+u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif);
+
 /**
  * ieee80211_csa_finish - notify mac80211 about channel switch
  * @vif: &struct ieee80211_vif pointer from the add_interface callback.
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index d44dca56b8ff..bfd2534e5a4d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3502,10 +3502,10 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 	     sdata->vif.type == NL80211_IFTYPE_ADHOC) &&
 	    params->n_csa_offsets) {
 		int i;
+		u8 c = sdata->csa_current_counter;
 
 		for (i = 0; i < params->n_csa_offsets; i++)
-			data[params->csa_offsets[i]] =
-					sdata->csa_current_counter;
+			data[params->csa_offsets[i]] = c;
 	}
 
 	IEEE80211_SKB_CB(skb)->flags = flags;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 05ed592d8bbe..57e0b2682cef 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -70,8 +70,6 @@ struct ieee80211_local;
 
 #define IEEE80211_DEAUTH_FRAME_LEN	(24 /* hdr */ + 2 /* reason */)
 
-#define IEEE80211_MAX_CSA_COUNTERS_NUM 2
-
 struct ieee80211_fragment_entry {
 	unsigned long first_frag_time;
 	unsigned int seq;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 509456e5722d..5214686d9fd1 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2416,13 +2416,14 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
 	return 0;
 }
 
-static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata,
-				 struct beacon_data *beacon)
+static void ieee80211_set_csa(struct ieee80211_sub_if_data *sdata,
+			      struct beacon_data *beacon)
 {
 	struct probe_resp *resp;
 	u8 *beacon_data;
 	size_t beacon_data_len;
 	int i;
+	u8 count = sdata->csa_current_counter;
 
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_AP:
@@ -2450,16 +2451,7 @@ static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata,
 			if (WARN_ON(counter_offset_beacon >= beacon_data_len))
 				return;
 
-			/* Warn if the driver did not check for/react to csa
-			 * completeness.  A beacon with CSA counter set to 0
-			 * should never occur, because a counter of 1 means
-			 * switch just before the next beacon.
-			 */
-			if (WARN_ON(beacon_data[counter_offset_beacon] == 1))
-				return;
-
-			beacon_data[counter_offset_beacon] =
-				sdata->csa_current_counter - 1;
+			beacon_data[counter_offset_beacon] = count;
 		}
 
 		if (sdata->vif.type == NL80211_IFTYPE_AP &&
@@ -2474,14 +2466,24 @@ static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata,
 				rcu_read_unlock();
 				return;
 			}
-			resp->data[counter_offset_presp] =
-				sdata->csa_current_counter - 1;
+			resp->data[counter_offset_presp] = count;
 			rcu_read_unlock();
 		}
 	}
+}
+
+u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 
 	sdata->csa_current_counter--;
+
+	/* the counter should never reach 0 */
+	WARN_ON(!sdata->csa_current_counter);
+
+	return sdata->csa_current_counter;
 }
+EXPORT_SYMBOL(ieee80211_csa_update_counter);
 
 bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
 {
@@ -2552,6 +2554,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
 	enum ieee80211_band band;
 	struct ieee80211_tx_rate_control txrc;
 	struct ieee80211_chanctx_conf *chanctx_conf;
+	int csa_off_base = 0;
 
 	rcu_read_lock();
 
@@ -2569,8 +2572,12 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
 		struct beacon_data *beacon = rcu_dereference(ap->beacon);
 
 		if (beacon) {
-			if (sdata->vif.csa_active)
-				ieee80211_update_csa(sdata, beacon);
+			if (sdata->vif.csa_active) {
+				if (!is_template)
+					ieee80211_csa_update_counter(vif);
+
+				ieee80211_set_csa(sdata, beacon);
+			}
 
 			/*
 			 * headroom, head length,
@@ -2593,6 +2600,9 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
 			if (offs) {
 				offs->tim_offset = beacon->head_len;
 				offs->tim_length = skb->len - beacon->head_len;
+
+				/* for AP the csa offsets are from tail */
+				csa_off_base = skb->len;
 			}
 
 			if (beacon->tail)
@@ -2608,9 +2618,12 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
 		if (!presp)
 			goto out;
 
-		if (sdata->vif.csa_active)
-			ieee80211_update_csa(sdata, presp);
+		if (sdata->vif.csa_active) {
+			if (!is_template)
+				ieee80211_csa_update_counter(vif);
 
+			ieee80211_set_csa(sdata, presp);
+		}
 
 		skb = dev_alloc_skb(local->tx_headroom + presp->head_len +
 				    local->hw.extra_beacon_tailroom);
@@ -2630,8 +2643,17 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
 		if (!bcn)
 			goto out;
 
-		if (sdata->vif.csa_active)
-			ieee80211_update_csa(sdata, bcn);
+		if (sdata->vif.csa_active) {
+			if (!is_template)
+				/* TODO: For mesh csa_counter is in TU, so
+				 * decrementing it by one isn't correct, but
+				 * for now we leave it consistent with overall
+				 * mac80211's behavior.
+				 */
+				ieee80211_csa_update_counter(vif);
+
+			ieee80211_set_csa(sdata, bcn);
+		}
 
 		if (ifmsh->sync_ops)
 			ifmsh->sync_ops->adjust_tbtt(sdata, bcn);
@@ -2658,6 +2680,20 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
 		goto out;
 	}
 
+	/* CSA offsets */
+	if (offs) {
+		int i;
+
+		for (i = 0; i < IEEE80211_MAX_CSA_COUNTERS_NUM; i++) {
+			u16 csa_off = sdata->csa_counter_offset_beacon[i];
+
+			if (!csa_off)
+				continue;
+
+			offs->csa_counter_offs[i] = csa_off_base + csa_off;
+		}
+	}
+
 	band = chanctx_conf->def.chan->band;
 
 	info = IEEE80211_SKB_CB(skb);
-- 
cgit v1.2.3-71-gd317


From 4449bf927b61bdb4389393c6fea6837214d1ace7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 6 May 2014 13:10:24 -0400
Subject: tracing: Add __bitmask() macro to trace events to cpumasks and other
 bitmasks

Being able to show a cpumask of events can be useful as some events
may affect only some CPUs. There is no standard way to record the
cpumask and converting it to a string is rather expensive during
the trace as traces happen in hotpaths. It would be better to record
the raw event mask and be able to parse it at print time.

The following macros were added for use with the TRACE_EVENT() macro:

  __bitmask()
  __assign_bitmask()
  __get_bitmask()

To test this, I added this to the sched_migrate_task event, which
looked like this:

TRACE_EVENT(sched_migrate_task,

	TP_PROTO(struct task_struct *p, int dest_cpu, const struct cpumask *cpus),

	TP_ARGS(p, dest_cpu, cpus),

	TP_STRUCT__entry(
		__array(	char,	comm,	TASK_COMM_LEN	)
		__field(	pid_t,	pid			)
		__field(	int,	prio			)
		__field(	int,	orig_cpu		)
		__field(	int,	dest_cpu		)
		__bitmask(	cpumask, num_possible_cpus()	)
	),

	TP_fast_assign(
		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
		__entry->pid		= p->pid;
		__entry->prio		= p->prio;
		__entry->orig_cpu	= task_cpu(p);
		__entry->dest_cpu	= dest_cpu;
		__assign_bitmask(cpumask, cpumask_bits(cpus), num_possible_cpus());
	),

	TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d cpumask=%s",
		  __entry->comm, __entry->pid, __entry->prio,
		  __entry->orig_cpu, __entry->dest_cpu,
		  __get_bitmask(cpumask))
);

With the output of:

        ksmtuned-3613  [003] d..2   485.220508: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=3 dest_cpu=2 cpumask=00000000,0000000f
     migration/1-13    [001] d..5   485.221202: sched_migrate_task: comm=ksmtuned pid=3614 prio=120 orig_cpu=1 dest_cpu=0 cpumask=00000000,0000000f
             awk-3615  [002] d.H5   485.221747: sched_migrate_task: comm=rcu_preempt pid=7 prio=120 orig_cpu=0 dest_cpu=1 cpumask=00000000,000000ff
     migration/2-18    [002] d..5   485.222062: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=2 dest_cpu=3 cpumask=00000000,0000000f

Link: http://lkml.kernel.org/r/1399377998-14870-6-git-send-email-javi.merino@arm.com
Link: http://lkml.kernel.org/r/20140506132238.22e136d1@gandalf.local.home

Suggested-by: Javi Merino <javi.merino@arm.com>
Tested-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |  3 +++
 include/linux/trace_seq.h    | 10 ++++++++
 include/trace/ftrace.h       | 57 +++++++++++++++++++++++++++++++++++++++++++-
 kernel/trace/trace_output.c  | 41 +++++++++++++++++++++++++++++++
 4 files changed, 110 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index d16da3e53bc7..cff3106ffe2c 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -38,6 +38,9 @@ const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
 								 *symbol_array);
 #endif
 
+const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
+				     unsigned int bitmask_size);
+
 const char *ftrace_print_hex_seq(struct trace_seq *p,
 				 const unsigned char *buf, int len);
 
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index a32d86ec8bf2..136116924d8d 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -46,6 +46,9 @@ extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
 extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
 extern int trace_seq_path(struct trace_seq *s, const struct path *path);
 
+extern int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+			     int nmaskbits);
+
 #else /* CONFIG_TRACING */
 static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 {
@@ -57,6 +60,13 @@ trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 	return 0;
 }
 
+static inline int
+trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+		  int nmaskbits)
+{
+	return 0;
+}
+
 static inline int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
 	return 0;
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0a1a4f7caf09..9b7a989dcbcc 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -53,6 +53,9 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
+
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
@@ -128,6 +131,9 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 	struct ftrace_data_offsets_##call {				\
@@ -200,6 +206,15 @@
 #undef __get_str
 #define __get_str(field) (char *)__get_dynamic_array(field)
 
+#undef __get_bitmask
+#define __get_bitmask(field)						\
+	({								\
+		void *__bitmask = __get_dynamic_array(field);		\
+		unsigned int __bitmask_size;				\
+		__bitmask_size = (__entry->__data_loc_##field >> 16) & 0xffff; \
+		ftrace_print_bitmask_seq(p, __bitmask, __bitmask_size);	\
+	})
+
 #undef __print_flags
 #define __print_flags(flag, delim, flag_array...)			\
 	({								\
@@ -322,6 +337,9 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = {	\
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)	\
 static int notrace __init						\
@@ -372,6 +390,29 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 #define __string(item, src) __dynamic_array(char, item,			\
 		    strlen((src) ? (const char *)(src) : "(null)") + 1)
 
+/*
+ * __bitmask_size_in_bytes_raw is the number of bytes needed to hold
+ * num_possible_cpus().
+ */
+#define __bitmask_size_in_bytes_raw(nr_bits)	\
+	(((nr_bits) + 7) / 8)
+
+#define __bitmask_size_in_longs(nr_bits)			\
+	((__bitmask_size_in_bytes_raw(nr_bits) +		\
+	  ((BITS_PER_LONG / 8) - 1)) / (BITS_PER_LONG / 8))
+
+/*
+ * __bitmask_size_in_bytes is the number of bytes needed to hold
+ * num_possible_cpus() padded out to the nearest long. This is what
+ * is saved in the buffer, just to be consistent.
+ */
+#define __bitmask_size_in_bytes(nr_bits)				\
+	(__bitmask_size_in_longs(nr_bits) * (BITS_PER_LONG / 8))
+
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item,	\
+					 __bitmask_size_in_longs(nr_bits))
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static inline notrace int ftrace_get_offsets_##call(			\
@@ -513,12 +554,22 @@ static inline notrace int ftrace_get_offsets_##call(			\
 	__entry->__data_loc_##item = __data_offsets.item;
 
 #undef __string
-#define __string(item, src) __dynamic_array(char, item, -1)       	\
+#define __string(item, src) __dynamic_array(char, item, -1)
 
 #undef __assign_str
 #define __assign_str(dst, src)						\
 	strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)");
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
+#undef __get_bitmask
+#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+
+#undef __assign_bitmask
+#define __assign_bitmask(dst, src, nr_bits)					\
+	memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
+
 #undef TP_fast_assign
 #define TP_fast_assign(args...) args
 
@@ -586,6 +637,7 @@ static inline void ftrace_test_probe_##call(void)			\
 #undef __print_hex
 #undef __get_dynamic_array
 #undef __get_str
+#undef __get_bitmask
 
 #undef TP_printk
 #define TP_printk(fmt, args...) "\"" fmt "\", "  __stringify(args)
@@ -651,6 +703,9 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
 #undef __get_str
 #define __get_str(field) (char *)__get_dynamic_array(field)
 
+#undef __get_bitmask
+#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+
 #undef __perf_addr
 #define __perf_addr(a)	(__addr = (a))
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index a436de18aa99..f3dad80c20b2 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -125,6 +125,34 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 }
 EXPORT_SYMBOL_GPL(trace_seq_printf);
 
+/**
+ * trace_seq_bitmask - put a list of longs as a bitmask print output
+ * @s:		trace sequence descriptor
+ * @maskp:	points to an array of unsigned longs that represent a bitmask
+ * @nmaskbits:	The number of bits that are valid in @maskp
+ *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, 1 otherwise.
+ *
+ * Writes a ASCII representation of a bitmask string into @s.
+ */
+int
+trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+		  int nmaskbits)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	int ret;
+
+	if (s->full || !len)
+		return 0;
+
+	ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits);
+	s->len += ret;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(trace_seq_bitmask);
+
 /**
  * trace_seq_vprintf - sequence printing of trace information
  * @s: trace sequence descriptor
@@ -398,6 +426,19 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
 EXPORT_SYMBOL(ftrace_print_symbols_seq_u64);
 #endif
 
+const char *
+ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
+			 unsigned int bitmask_size)
+{
+	const char *ret = p->buffer + p->len;
+
+	trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8);
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ftrace_print_bitmask_seq);
+
 const char *
 ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
 {
-- 
cgit v1.2.3-71-gd317


From fcd77db07dd2b8d35e0db0d1209f2ce1bb05531e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 15 May 2014 13:43:14 -0400
Subject: net: Fix CONFIG_SYSCTL ifdef test.

> include/net/ip.h:211:5: warning: "CONFIG_SYSCTL" is not defined [-Wundef]
>  #if CONFIG_SYSCTL
>      ^

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 512bcd5dabac..2e4947895d75 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -208,7 +208,7 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
 
 void inet_get_local_port_range(struct net *net, int *low, int *high);
 
-#if CONFIG_SYSCTL
+#ifdef CONFIG_SYSCTL
 static inline int inet_is_local_reserved_port(struct net *net, int port)
 {
 	if (!net->ipv4.sysctl_local_reserved_ports)
-- 
cgit v1.2.3-71-gd317


From c3a6114f31600b94ee10ebf62e4d493b401ade87 Mon Sep 17 00:00:00 2001
From: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Date: Wed, 14 May 2014 17:43:06 +0200
Subject: ieee802154: add definitions for link-layer security and header
 functions

When dealing with 802.15.4, one often has to know the maximum payload
size for a given packet. This depends on many factors, one of which is
whether or not a security header is present in the frame. These
definitions and functions provide an easy way for any upper layer to
calculate the maximum payload size for a packet. The first obvious user
for this is 6lowpan, which duplicates this calculation and gets it
partially wrong because it ignores security headers.

Signed-off-by: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ieee802154.h        |  9 +++++++
 include/net/ieee802154_netdev.h | 29 +++++++++++++++++++++++
 net/ieee802154/header_ops.c     | 52 +++++++++++++++++++++++++++++++++++------
 3 files changed, 83 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/ieee802154.h b/include/net/ieee802154.h
index c7ae0ac528dc..0aa7122e8f15 100644
--- a/include/net/ieee802154.h
+++ b/include/net/ieee802154.h
@@ -79,6 +79,15 @@
 #define IEEE802154_SCF_KEY_SHORT_INDEX		2
 #define IEEE802154_SCF_KEY_HW_INDEX		3
 
+#define IEEE802154_SCF_SECLEVEL_NONE		0
+#define IEEE802154_SCF_SECLEVEL_MIC32		1
+#define IEEE802154_SCF_SECLEVEL_MIC64		2
+#define IEEE802154_SCF_SECLEVEL_MIC128		3
+#define IEEE802154_SCF_SECLEVEL_ENC		4
+#define IEEE802154_SCF_SECLEVEL_ENC_MIC32	5
+#define IEEE802154_SCF_SECLEVEL_ENC_MIC64	6
+#define IEEE802154_SCF_SECLEVEL_ENC_MIC128	7
+
 /* MAC footer size */
 #define IEEE802154_MFR_SIZE	2 /* 2 octets */
 
diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index 5a719ca892f4..6e4d3e1071b5 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -27,6 +27,7 @@
 #ifndef IEEE802154_NETDEVICE_H
 #define IEEE802154_NETDEVICE_H
 
+#include <net/ieee802154.h>
 #include <net/af_ieee802154.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
@@ -114,6 +115,34 @@ int ieee802154_hdr_pull(struct sk_buff *skb, struct ieee802154_hdr *hdr);
 int ieee802154_hdr_peek_addrs(const struct sk_buff *skb,
 			      struct ieee802154_hdr *hdr);
 
+/* parses the full 802.15.4 header a given skb and stores them into hdr,
+ * performing pan id decompression and length checks to be suitable for use in
+ * header_ops.parse
+ */
+int ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr);
+
+int ieee802154_max_payload(const struct ieee802154_hdr *hdr);
+
+static inline int
+ieee802154_sechdr_authtag_len(const struct ieee802154_sechdr *sec)
+{
+	switch (sec->level) {
+	case IEEE802154_SCF_SECLEVEL_MIC32:
+	case IEEE802154_SCF_SECLEVEL_ENC_MIC32:
+		return 4;
+	case IEEE802154_SCF_SECLEVEL_MIC64:
+	case IEEE802154_SCF_SECLEVEL_ENC_MIC64:
+		return 8;
+	case IEEE802154_SCF_SECLEVEL_MIC128:
+	case IEEE802154_SCF_SECLEVEL_ENC_MIC128:
+		return 16;
+	case IEEE802154_SCF_SECLEVEL_NONE:
+	case IEEE802154_SCF_SECLEVEL_ENC:
+	default:
+		return 0;
+	}
+}
+
 static inline int ieee802154_hdr_length(struct sk_buff *skb)
 {
 	struct ieee802154_hdr hdr;
diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c
index bed42a48408c..c09294e39ca6 100644
--- a/net/ieee802154/header_ops.c
+++ b/net/ieee802154/header_ops.c
@@ -195,15 +195,16 @@ ieee802154_hdr_get_sechdr(const u8 *buf, struct ieee802154_sechdr *hdr)
 	return pos;
 }
 
+static int ieee802154_sechdr_lengths[4] = {
+	[IEEE802154_SCF_KEY_IMPLICIT] = 5,
+	[IEEE802154_SCF_KEY_INDEX] = 6,
+	[IEEE802154_SCF_KEY_SHORT_INDEX] = 10,
+	[IEEE802154_SCF_KEY_HW_INDEX] = 14,
+};
+
 static int ieee802154_hdr_sechdr_len(u8 sc)
 {
-	switch (IEEE802154_SCF_KEY_ID_MODE(sc)) {
-	case IEEE802154_SCF_KEY_IMPLICIT: return 5;
-	case IEEE802154_SCF_KEY_INDEX: return 6;
-	case IEEE802154_SCF_KEY_SHORT_INDEX: return 10;
-	case IEEE802154_SCF_KEY_HW_INDEX: return 14;
-	default: return -EINVAL;
-	}
+	return ieee802154_sechdr_lengths[IEEE802154_SCF_KEY_ID_MODE(sc)];
 }
 
 static int ieee802154_hdr_minlen(const struct ieee802154_hdr *hdr)
@@ -285,3 +286,40 @@ ieee802154_hdr_peek_addrs(const struct sk_buff *skb, struct ieee802154_hdr *hdr)
 	return pos;
 }
 EXPORT_SYMBOL_GPL(ieee802154_hdr_peek_addrs);
+
+int
+ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr)
+{
+	const u8 *buf = skb_mac_header(skb);
+	int pos;
+
+	pos = ieee802154_hdr_peek_addrs(skb, hdr);
+	if (pos < 0)
+		return -EINVAL;
+
+	if (hdr->fc.security_enabled) {
+		u8 key_id_mode = IEEE802154_SCF_KEY_ID_MODE(*(buf + pos));
+		int want = pos + ieee802154_sechdr_lengths[key_id_mode];
+
+		if (buf + want > skb_tail_pointer(skb))
+			return -EINVAL;
+
+		pos += ieee802154_hdr_get_sechdr(buf + pos, &hdr->sec);
+	}
+
+	return pos;
+}
+EXPORT_SYMBOL_GPL(ieee802154_hdr_peek);
+
+int ieee802154_max_payload(const struct ieee802154_hdr *hdr)
+{
+	int hlen = ieee802154_hdr_minlen(hdr);
+
+	if (hdr->fc.security_enabled) {
+		hlen += ieee802154_sechdr_lengths[hdr->sec.key_id_mode] - 1;
+		hlen += ieee802154_sechdr_authtag_len(&hdr->sec);
+	}
+
+	return IEEE802154_MTU - hlen - IEEE802154_MFR_SIZE;
+}
+EXPORT_SYMBOL_GPL(ieee802154_max_payload);
-- 
cgit v1.2.3-71-gd317


From 32edc40ae65cf84e1ab69f6f8316ce81559e115d Mon Sep 17 00:00:00 2001
From: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Date: Wed, 14 May 2014 17:43:08 +0200
Subject: ieee802154: change _cb handling slightly

The current mac_cb handling of ieee802154 is rather awkward and limited.
Decompose the single flags field into multiple fields with the meanings
of each subfield of the flags field to make future extensions (for
example, link-layer security) easier. Also don't set the frame sequence
number in upper layers, since that's a thing the MAC is supposed to set
on frame transmit - we set it on header creation, but assuming that
upper layers do not blindly duplicate our headers, this is fine.

Signed-off-by: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ieee802154_netdev.h | 25 +++++++------------------
 net/ieee802154/6lowpan_rtnl.c   |  9 ++++-----
 net/ieee802154/dgram.c          | 10 ++++------
 net/mac802154/rx.c              |  2 --
 net/mac802154/wpan.c            | 26 +++++++++++++-------------
 5 files changed, 28 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index 6e4d3e1071b5..bc9a7475e57e 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -222,8 +222,9 @@ static inline void ieee802154_addr_to_sa(struct ieee802154_addr_sa *sa,
  */
 struct ieee802154_mac_cb {
 	u8 lqi;
-	u8 flags;
-	u8 seq;
+	u8 type;
+	bool ackreq;
+	bool secen;
 	struct ieee802154_addr source;
 	struct ieee802154_addr dest;
 };
@@ -233,24 +234,12 @@ static inline struct ieee802154_mac_cb *mac_cb(struct sk_buff *skb)
 	return (struct ieee802154_mac_cb *)skb->cb;
 }
 
-#define MAC_CB_FLAG_TYPEMASK		((1 << 3) - 1)
-
-#define MAC_CB_FLAG_ACKREQ		(1 << 3)
-#define MAC_CB_FLAG_SECEN		(1 << 4)
-
-static inline bool mac_cb_is_ackreq(struct sk_buff *skb)
-{
-	return mac_cb(skb)->flags & MAC_CB_FLAG_ACKREQ;
-}
-
-static inline bool mac_cb_is_secen(struct sk_buff *skb)
+static inline struct ieee802154_mac_cb *mac_cb_init(struct sk_buff *skb)
 {
-	return mac_cb(skb)->flags & MAC_CB_FLAG_SECEN;
-}
+	BUILD_BUG_ON(sizeof(struct ieee802154_mac_cb) > sizeof(skb->cb));
 
-static inline int mac_cb_type(struct sk_buff *skb)
-{
-	return mac_cb(skb)->flags & MAC_CB_FLAG_TYPEMASK;
+	memset(skb->cb, 0, sizeof(struct ieee802154_mac_cb));
+	return mac_cb(skb);
 }
 
 #define IEEE802154_MAC_SCAN_ED		0
diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c
index 0f5a69ed746d..d0191c53cbd8 100644
--- a/net/ieee802154/6lowpan_rtnl.c
+++ b/net/ieee802154/6lowpan_rtnl.c
@@ -92,6 +92,7 @@ static int lowpan_header_create(struct sk_buff *skb,
 	const u8 *saddr = _saddr;
 	const u8 *daddr = _daddr;
 	struct ieee802154_addr sa, da;
+	struct ieee802154_mac_cb *cb = mac_cb_init(skb);
 
 	/* TODO:
 	 * if this package isn't ipv6 one, where should it be routed?
@@ -115,8 +116,7 @@ static int lowpan_header_create(struct sk_buff *skb,
 	 * from MAC subif of the 'dev' and 'real_dev' network devices, but
 	 * this isn't implemented in mainline yet, so currently we assign 0xff
 	 */
-	mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
-	mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
+	cb->type = IEEE802154_FC_TYPE_DATA;
 
 	/* prepare wpan address data */
 	sa.mode = IEEE802154_ADDR_LONG;
@@ -135,11 +135,10 @@ static int lowpan_header_create(struct sk_buff *skb,
 	} else {
 		da.mode = IEEE802154_ADDR_LONG;
 		da.extended_addr = ieee802154_devaddr_from_raw(daddr);
-
-		/* request acknowledgment */
-		mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
 	}
 
+	cb->ackreq = !lowpan_is_addr_broadcast(daddr);
+
 	return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
 			type, (void *)&da, (void *)&sa, 0);
 }
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 786437bc0c08..d95e2e1bdf54 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -209,6 +209,7 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
 	struct net_device *dev;
 	unsigned int mtu;
 	struct sk_buff *skb;
+	struct ieee802154_mac_cb *cb;
 	struct dgram_sock *ro = dgram_sk(sk);
 	int hlen, tlen;
 	int err;
@@ -249,18 +250,15 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
 
 	skb_reset_network_header(skb);
 
-	mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
-	if (ro->want_ack)
-		mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
+	cb = mac_cb_init(skb);
+	cb->type = IEEE802154_FC_TYPE_DATA;
+	cb->ackreq = ro->want_ack;
 
-	mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
 	err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &ro->dst_addr,
 			ro->bound ? &ro->src_addr : NULL, size);
 	if (err < 0)
 		goto out_skb;
 
-	skb_reset_mac_header(skb);
-
 	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
 	if (err < 0)
 		goto out_skb;
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index 03855b0677cc..0597b96dc9ba 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -59,8 +59,6 @@ mac802154_subif_rx(struct ieee802154_dev *hw, struct sk_buff *skb, u8 lqi)
 	skb->protocol = htons(ETH_P_IEEE802154);
 	skb_reset_mac_header(skb);
 
-	BUILD_BUG_ON(sizeof(struct ieee802154_mac_cb) > sizeof(skb->cb));
-
 	if (!(priv->hw.flags & IEEE802154_HW_OMIT_CKSUM)) {
 		u16 crc;
 
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index cb34064d05f6..bb49e88c460f 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -192,15 +192,17 @@ static int mac802154_header_create(struct sk_buff *skb,
 {
 	struct ieee802154_hdr hdr;
 	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	struct ieee802154_mac_cb *cb = mac_cb(skb);
 	int hlen;
 
 	if (!daddr)
 		return -EINVAL;
 
 	memset(&hdr.fc, 0, sizeof(hdr.fc));
-	hdr.fc.type = mac_cb_type(skb);
-	hdr.fc.security_enabled = mac_cb_is_secen(skb);
-	hdr.fc.ack_request = mac_cb_is_ackreq(skb);
+	hdr.fc.type = cb->type;
+	hdr.fc.security_enabled = cb->secen;
+	hdr.fc.ack_request = cb->ackreq;
+	hdr.seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
 
 	if (!saddr) {
 		spin_lock_bh(&priv->mib_lock);
@@ -391,12 +393,12 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb)
 	sdata->dev->stats.rx_packets++;
 	sdata->dev->stats.rx_bytes += skb->len;
 
-	switch (mac_cb_type(skb)) {
+	switch (mac_cb(skb)->type) {
 	case IEEE802154_FC_TYPE_DATA:
 		return mac802154_process_data(sdata->dev, skb);
 	default:
 		pr_warn("ieee802154: bad frame received (type = %d)\n",
-			mac_cb_type(skb));
+			mac_cb(skb)->type);
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
@@ -423,6 +425,7 @@ static int mac802154_parse_frame_start(struct sk_buff *skb)
 {
 	int hlen;
 	struct ieee802154_hdr hdr;
+	struct ieee802154_mac_cb *cb = mac_cb_init(skb);
 
 	hlen = ieee802154_hdr_pull(skb, &hdr);
 	if (hlen < 0)
@@ -433,18 +436,15 @@ static int mac802154_parse_frame_start(struct sk_buff *skb)
 	pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr.fc),
 		 hdr.seq);
 
-	mac_cb(skb)->flags = hdr.fc.type;
-
-	if (hdr.fc.ack_request)
-		mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
-	if (hdr.fc.security_enabled)
-		mac_cb(skb)->flags |= MAC_CB_FLAG_SECEN;
+	cb->type = hdr.fc.type;
+	cb->ackreq = hdr.fc.ack_request;
+	cb->secen = hdr.fc.security_enabled;
 
 	mac802154_print_addr("destination", &hdr.dest);
 	mac802154_print_addr("source", &hdr.source);
 
-	mac_cb(skb)->source = hdr.source;
-	mac_cb(skb)->dest = hdr.dest;
+	cb->source = hdr.source;
+	cb->dest = hdr.dest;
 
 	if (hdr.fc.security_enabled) {
 		u64 key;
-- 
cgit v1.2.3-71-gd317


From 622582786c9e041d0bd52bde201787adeab249f8 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 13 May 2014 19:50:46 -0700
Subject: net: filter: x86: internal BPF JIT

Maps all internal BPF instructions into x86_64 instructions.
This patch replaces original BPF x64 JIT with internal BPF x64 JIT.
sysctl net.core.bpf_jit_enable is reused as on/off switch.

Performance:

1. old BPF JIT and internal BPF JIT generate equivalent x86_64 code.
  No performance difference is observed for filters that were JIT-able before

Example assembler code for BPF filter "tcpdump port 22"

original BPF -> old JIT:            original BPF -> internal BPF -> new JIT:
   0:   push   %rbp                      0:     push   %rbp
   1:   mov    %rsp,%rbp                 1:     mov    %rsp,%rbp
   4:   sub    $0x60,%rsp                4:     sub    $0x228,%rsp
   8:   mov    %rbx,-0x8(%rbp)           b:     mov    %rbx,-0x228(%rbp) // prologue
                                        12:     mov    %r13,-0x220(%rbp)
                                        19:     mov    %r14,-0x218(%rbp)
                                        20:     mov    %r15,-0x210(%rbp)
                                        27:     xor    %eax,%eax         // clear A
   c:   xor    %ebx,%ebx                29:     xor    %r13,%r13         // clear X
   e:   mov    0x68(%rdi),%r9d          2c:     mov    0x68(%rdi),%r9d
  12:   sub    0x6c(%rdi),%r9d          30:     sub    0x6c(%rdi),%r9d
  16:   mov    0xd8(%rdi),%r8           34:     mov    0xd8(%rdi),%r10
                                        3b:     mov    %rdi,%rbx
  1d:   mov    $0xc,%esi                3e:     mov    $0xc,%esi
  22:   callq  0xffffffffe1021e15       43:     callq  0xffffffffe102bd75
  27:   cmp    $0x86dd,%eax             48:     cmp    $0x86dd,%rax
  2c:   jne    0x0000000000000069       4f:     jne    0x000000000000009a
  2e:   mov    $0x14,%esi               51:     mov    $0x14,%esi
  33:   callq  0xffffffffe1021e31       56:     callq  0xffffffffe102bd91
  38:   cmp    $0x84,%eax               5b:     cmp    $0x84,%rax
  3d:   je     0x0000000000000049       62:     je     0x0000000000000074
  3f:   cmp    $0x6,%eax                64:     cmp    $0x6,%rax
  42:   je     0x0000000000000049       68:     je     0x0000000000000074
  44:   cmp    $0x11,%eax               6a:     cmp    $0x11,%rax
  47:   jne    0x00000000000000c6       6e:     jne    0x0000000000000117
  49:   mov    $0x36,%esi               74:     mov    $0x36,%esi
  4e:   callq  0xffffffffe1021e15       79:     callq  0xffffffffe102bd75
  53:   cmp    $0x16,%eax               7e:     cmp    $0x16,%rax
  56:   je     0x00000000000000bf       82:     je     0x0000000000000110
  58:   mov    $0x38,%esi               88:     mov    $0x38,%esi
  5d:   callq  0xffffffffe1021e15       8d:     callq  0xffffffffe102bd75
  62:   cmp    $0x16,%eax               92:     cmp    $0x16,%rax
  65:   je     0x00000000000000bf       96:     je     0x0000000000000110
  67:   jmp    0x00000000000000c6       98:     jmp    0x0000000000000117
  69:   cmp    $0x800,%eax              9a:     cmp    $0x800,%rax
  6e:   jne    0x00000000000000c6       a1:     jne    0x0000000000000117
  70:   mov    $0x17,%esi               a3:     mov    $0x17,%esi
  75:   callq  0xffffffffe1021e31       a8:     callq  0xffffffffe102bd91
  7a:   cmp    $0x84,%eax               ad:     cmp    $0x84,%rax
  7f:   je     0x000000000000008b       b4:     je     0x00000000000000c2
  81:   cmp    $0x6,%eax                b6:     cmp    $0x6,%rax
  84:   je     0x000000000000008b       ba:     je     0x00000000000000c2
  86:   cmp    $0x11,%eax               bc:     cmp    $0x11,%rax
  89:   jne    0x00000000000000c6       c0:     jne    0x0000000000000117
  8b:   mov    $0x14,%esi               c2:     mov    $0x14,%esi
  90:   callq  0xffffffffe1021e15       c7:     callq  0xffffffffe102bd75
  95:   test   $0x1fff,%ax              cc:     test   $0x1fff,%rax
  99:   jne    0x00000000000000c6       d3:     jne    0x0000000000000117
                                        d5:     mov    %rax,%r14
  9b:   mov    $0xe,%esi                d8:     mov    $0xe,%esi
  a0:   callq  0xffffffffe1021e44       dd:     callq  0xffffffffe102bd91 // MSH
                                        e2:     and    $0xf,%eax
                                        e5:     shl    $0x2,%eax
                                        e8:     mov    %rax,%r13
                                        eb:     mov    %r14,%rax
                                        ee:     mov    %r13,%rsi
  a5:   lea    0xe(%rbx),%esi           f1:     add    $0xe,%esi
  a8:   callq  0xffffffffe1021e0d       f4:     callq  0xffffffffe102bd6d
  ad:   cmp    $0x16,%eax               f9:     cmp    $0x16,%rax
  b0:   je     0x00000000000000bf       fd:     je     0x0000000000000110
                                        ff:     mov    %r13,%rsi
  b2:   lea    0x10(%rbx),%esi         102:     add    $0x10,%esi
  b5:   callq  0xffffffffe1021e0d      105:     callq  0xffffffffe102bd6d
  ba:   cmp    $0x16,%eax              10a:     cmp    $0x16,%rax
  bd:   jne    0x00000000000000c6      10e:     jne    0x0000000000000117
  bf:   mov    $0xffff,%eax            110:     mov    $0xffff,%eax
  c4:   jmp    0x00000000000000c8      115:     jmp    0x000000000000011c
  c6:   xor    %eax,%eax               117:     mov    $0x0,%eax
  c8:   mov    -0x8(%rbp),%rbx         11c:     mov    -0x228(%rbp),%rbx // epilogue
  cc:   leaveq                         123:     mov    -0x220(%rbp),%r13
  cd:   retq                           12a:     mov    -0x218(%rbp),%r14
                                       131:     mov    -0x210(%rbp),%r15
                                       138:     leaveq
                                       139:     retq

On fully cached SKBs both JITed functions take 12 nsec to execute.
BPF interpreter executes the program in 30 nsec.

The difference in generated assembler is due to the following:

Old BPF imlements LDX_MSH instruction via sk_load_byte_msh() helper function
inside bpf_jit.S.
New JIT removes the helper and does it explicitly, so ldx_msh cost
is the same for both JITs, but generated code looks longer.

New JIT has 4 registers to save, so prologue/epilogue are larger,
but the cost is within noise on x64.

Old JIT checks whether first insn clears A and if not emits 'xor %eax,%eax'.
New JIT clears %rax unconditionally.

2. old BPF JIT doesn't support ANC_NLATTR, ANC_PAY_OFFSET, ANC_RANDOM
  extensions. New JIT supports all BPF extensions.
  Performance of such filters improves 2-4 times depending on a filter.
  The longer the filter the higher performance gain.
  Synthetic benchmarks with many ancillary loads see 20x speedup
  which seems to be the maximum gain from JIT

Notes:

. net.core.bpf_jit_enable=2 + tools/net/bpf_jit_disasm is still functional
  and can be used to see generated assembler

. there are two jit_compile() functions and code flow for classic filters is:
  sk_attach_filter() - load classic BPF
  bpf_jit_compile() - try to JIT from classic BPF
  sk_convert_filter() - convert classic to internal
  bpf_int_jit_compile() - JIT from internal BPF

  seccomp and tracing filters will just call bpf_int_jit_compile()

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit.S      |   77 +--
 arch/x86/net/bpf_jit_comp.c | 1314 +++++++++++++++++++++++--------------------
 include/linux/filter.h      |    3 +
 net/core/filter.c           |    9 +-
 4 files changed, 748 insertions(+), 655 deletions(-)

(limited to 'include')

diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 01495755701b..6440221ced0d 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -12,13 +12,16 @@
 
 /*
  * Calling convention :
- * rdi : skb pointer
+ * rbx : skb pointer (callee saved)
  * esi : offset of byte(s) to fetch in skb (can be scratched)
- * r8  : copy of skb->data
+ * r10 : copy of skb->data
  * r9d : hlen = skb->len - skb->data_len
  */
-#define SKBDATA	%r8
+#define SKBDATA	%r10
 #define SKF_MAX_NEG_OFF    $(-0x200000) /* SKF_LL_OFF from filter.h */
+#define MAX_BPF_STACK (512 /* from filter.h */ + \
+	32 /* space for rbx,r13,r14,r15 */ + \
+	8 /* space for skb_copy_bits */)
 
 sk_load_word:
 	.globl	sk_load_word
@@ -68,53 +71,31 @@ sk_load_byte_positive_offset:
 	movzbl	(SKBDATA,%rsi),%eax
 	ret
 
-/**
- * sk_load_byte_msh - BPF_S_LDX_B_MSH helper
- *
- * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
- * Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value
- */
-sk_load_byte_msh:
-	.globl	sk_load_byte_msh
-	test	%esi,%esi
-	js	bpf_slow_path_byte_msh_neg
-
-sk_load_byte_msh_positive_offset:
-	.globl	sk_load_byte_msh_positive_offset
-	cmp	%esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
-	jle	bpf_slow_path_byte_msh
-	movzbl	(SKBDATA,%rsi),%ebx
-	and	$15,%bl
-	shl	$2,%bl
-	ret
-
 /* rsi contains offset and can be scratched */
 #define bpf_slow_path_common(LEN)		\
-	push	%rdi;    /* save skb */		\
+	mov	%rbx, %rdi; /* arg1 == skb */	\
 	push	%r9;				\
 	push	SKBDATA;			\
 /* rsi already has offset */			\
 	mov	$LEN,%ecx;	/* len */	\
-	lea	-12(%rbp),%rdx;			\
+	lea	- MAX_BPF_STACK + 32(%rbp),%rdx;			\
 	call	skb_copy_bits;			\
 	test    %eax,%eax;			\
 	pop	SKBDATA;			\
-	pop	%r9;				\
-	pop	%rdi
+	pop	%r9;
 
 
 bpf_slow_path_word:
 	bpf_slow_path_common(4)
 	js	bpf_error
-	mov	-12(%rbp),%eax
+	mov	- MAX_BPF_STACK + 32(%rbp),%eax
 	bswap	%eax
 	ret
 
 bpf_slow_path_half:
 	bpf_slow_path_common(2)
 	js	bpf_error
-	mov	-12(%rbp),%ax
+	mov	- MAX_BPF_STACK + 32(%rbp),%ax
 	rol	$8,%ax
 	movzwl	%ax,%eax
 	ret
@@ -122,21 +103,11 @@ bpf_slow_path_half:
 bpf_slow_path_byte:
 	bpf_slow_path_common(1)
 	js	bpf_error
-	movzbl	-12(%rbp),%eax
-	ret
-
-bpf_slow_path_byte_msh:
-	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
-	bpf_slow_path_common(1)
-	js	bpf_error
-	movzbl	-12(%rbp),%eax
-	and	$15,%al
-	shl	$2,%al
-	xchg	%eax,%ebx
+	movzbl	- MAX_BPF_STACK + 32(%rbp),%eax
 	ret
 
 #define sk_negative_common(SIZE)				\
-	push	%rdi;	/* save skb */				\
+	mov	%rbx, %rdi; /* arg1 == skb */			\
 	push	%r9;						\
 	push	SKBDATA;					\
 /* rsi already has offset */					\
@@ -145,10 +116,8 @@ bpf_slow_path_byte_msh:
 	test	%rax,%rax;					\
 	pop	SKBDATA;					\
 	pop	%r9;						\
-	pop	%rdi;						\
 	jz	bpf_error
 
-
 bpf_slow_path_word_neg:
 	cmp	SKF_MAX_NEG_OFF, %esi	/* test range */
 	jl	bpf_error	/* offset lower -> error  */
@@ -179,22 +148,12 @@ sk_load_byte_negative_offset:
 	movzbl	(%rax), %eax
 	ret
 
-bpf_slow_path_byte_msh_neg:
-	cmp	SKF_MAX_NEG_OFF, %esi
-	jl	bpf_error
-sk_load_byte_msh_negative_offset:
-	.globl	sk_load_byte_msh_negative_offset
-	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
-	sk_negative_common(1)
-	movzbl	(%rax),%eax
-	and	$15,%al
-	shl	$2,%al
-	xchg	%eax,%ebx
-	ret
-
 bpf_error:
 # force a return 0 from jit handler
-	xor		%eax,%eax
-	mov		-8(%rbp),%rbx
+	xor	%eax,%eax
+	mov	- MAX_BPF_STACK(%rbp),%rbx
+	mov	- MAX_BPF_STACK + 8(%rbp),%r13
+	mov	- MAX_BPF_STACK + 16(%rbp),%r14
+	mov	- MAX_BPF_STACK + 24(%rbp),%r15
 	leaveq
 	ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index c5fa7c9cb665..92aef8fdac2f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1,6 +1,7 @@
 /* bpf_jit_comp.c : BPF JIT compiler
  *
  * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
+ * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -14,28 +15,16 @@
 #include <linux/if_vlan.h>
 #include <linux/random.h>
 
-/*
- * Conventions :
- *  EAX : BPF A accumulator
- *  EBX : BPF X accumulator
- *  RDI : pointer to skb   (first argument given to JIT function)
- *  RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
- *  ECX,EDX,ESI : scratch registers
- *  r9d : skb->len - skb->data_len (headlen)
- *  r8  : skb->data
- * -8(RBP) : saved RBX value
- * -16(RBP)..-80(RBP) : BPF_MEMWORDS values
- */
 int bpf_jit_enable __read_mostly;
 
 /*
  * assembly code in arch/x86/net/bpf_jit.S
  */
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
 extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
-extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_byte_positive_offset[];
 extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
-extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
+extern u8 sk_load_byte_negative_offset[];
 
 static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 {
@@ -56,30 +45,44 @@ static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 #define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
 #define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
 #define EMIT4(b1, b2, b3, b4)   EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
-#define EMIT1_off32(b1, off)	do { EMIT1(b1); EMIT(off, 4);} while (0)
-
-#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */
-#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */
+#define EMIT1_off32(b1, off) \
+	do {EMIT1(b1); EMIT(off, 4); } while (0)
+#define EMIT2_off32(b1, b2, off) \
+	do {EMIT2(b1, b2); EMIT(off, 4); } while (0)
+#define EMIT3_off32(b1, b2, b3, off) \
+	do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
+#define EMIT4_off32(b1, b2, b3, b4, off) \
+	do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
 
 static inline bool is_imm8(int value)
 {
 	return value <= 127 && value >= -128;
 }
 
-static inline bool is_near(int offset)
+static inline bool is_simm32(s64 value)
 {
-	return offset <= 127 && offset >= -128;
+	return value == (s64) (s32) value;
 }
 
-#define EMIT_JMP(offset)						\
-do {									\
-	if (offset) {							\
-		if (is_near(offset))					\
-			EMIT2(0xeb, offset); /* jmp .+off8 */		\
-		else							\
-			EMIT1_off32(0xe9, offset); /* jmp .+off32 */	\
-	}								\
-} while (0)
+/* mov A, X */
+#define EMIT_mov(A, X) \
+	do {if (A != X) \
+		EMIT3(add_2mod(0x48, A, X), 0x89, add_2reg(0xC0, A, X)); \
+	} while (0)
+
+static int bpf_size_to_x86_bytes(int bpf_size)
+{
+	if (bpf_size == BPF_W)
+		return 4;
+	else if (bpf_size == BPF_H)
+		return 2;
+	else if (bpf_size == BPF_B)
+		return 1;
+	else if (bpf_size == BPF_DW)
+		return 4; /* imm32 */
+	else
+		return 0;
+}
 
 /* list of x86 cond jumps opcodes (. + s8)
  * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
@@ -90,27 +93,8 @@ do {									\
 #define X86_JNE 0x75
 #define X86_JBE 0x76
 #define X86_JA  0x77
-
-#define EMIT_COND_JMP(op, offset)				\
-do {								\
-	if (is_near(offset))					\
-		EMIT2(op, offset); /* jxx .+off8 */		\
-	else {							\
-		EMIT2(0x0f, op + 0x10);				\
-		EMIT(offset, 4); /* jxx .+off32 */		\
-	}							\
-} while (0)
-
-#define COND_SEL(CODE, TOP, FOP)	\
-	case CODE:			\
-		t_op = TOP;		\
-		f_op = FOP;		\
-		goto cond_branch
-
-
-#define SEEN_DATAREF 1 /* might call external helpers */
-#define SEEN_XREG    2 /* ebx is used */
-#define SEEN_MEM     4 /* use mem[] for temporary storage */
+#define X86_JGE 0x7D
+#define X86_JG  0x7F
 
 static inline void bpf_flush_icache(void *start, void *end)
 {
@@ -125,26 +109,6 @@ static inline void bpf_flush_icache(void *start, void *end)
 #define CHOOSE_LOAD_FUNC(K, func) \
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
-/* Helper to find the offset of pkt_type in sk_buff
- * We want to make sure its still a 3bit field starting at a byte boundary.
- */
-#define PKT_TYPE_MAX 7
-static int pkt_type_offset(void)
-{
-	struct sk_buff skb_probe = {
-		.pkt_type = ~0,
-	};
-	char *ct = (char *)&skb_probe;
-	unsigned int off;
-
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (ct[off] == PKT_TYPE_MAX)
-			return off;
-	}
-	pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
-	return -1;
-}
-
 struct bpf_binary_header {
 	unsigned int	pages;
 	/* Note : for security reasons, bpf code will follow a randomly
@@ -178,546 +142,715 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen,
 	return header;
 }
 
+/* pick a register outside of BPF range for JIT internal work */
+#define AUX_REG (MAX_BPF_REG + 1)
+
+/* the following table maps BPF registers to x64 registers.
+ * x64 register r12 is unused, since if used as base address register
+ * in load/store instructions, it always needs an extra byte of encoding
+ */
+static const int reg2hex[] = {
+	[BPF_REG_0] = 0,  /* rax */
+	[BPF_REG_1] = 7,  /* rdi */
+	[BPF_REG_2] = 6,  /* rsi */
+	[BPF_REG_3] = 2,  /* rdx */
+	[BPF_REG_4] = 1,  /* rcx */
+	[BPF_REG_5] = 0,  /* r8 */
+	[BPF_REG_6] = 3,  /* rbx callee saved */
+	[BPF_REG_7] = 5,  /* r13 callee saved */
+	[BPF_REG_8] = 6,  /* r14 callee saved */
+	[BPF_REG_9] = 7,  /* r15 callee saved */
+	[BPF_REG_FP] = 5, /* rbp readonly */
+	[AUX_REG] = 3,    /* r11 temp register */
+};
+
+/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15
+ * which need extra byte of encoding.
+ * rax,rcx,...,rbp have simpler encoding
+ */
+static inline bool is_ereg(u32 reg)
+{
+	if (reg == BPF_REG_5 || reg == AUX_REG ||
+	    (reg >= BPF_REG_7 && reg <= BPF_REG_9))
+		return true;
+	else
+		return false;
+}
+
+/* add modifiers if 'reg' maps to x64 registers r8..r15 */
+static inline u8 add_1mod(u8 byte, u32 reg)
+{
+	if (is_ereg(reg))
+		byte |= 1;
+	return byte;
+}
+
+static inline u8 add_2mod(u8 byte, u32 r1, u32 r2)
+{
+	if (is_ereg(r1))
+		byte |= 1;
+	if (is_ereg(r2))
+		byte |= 4;
+	return byte;
+}
+
+/* encode dest register 'a_reg' into x64 opcode 'byte' */
+static inline u8 add_1reg(u8 byte, u32 a_reg)
+{
+	return byte + reg2hex[a_reg];
+}
+
+/* encode dest 'a_reg' and src 'x_reg' registers into x64 opcode 'byte' */
+static inline u8 add_2reg(u8 byte, u32 a_reg, u32 x_reg)
+{
+	return byte + reg2hex[a_reg] + (reg2hex[x_reg] << 3);
+}
+
 struct jit_context {
 	unsigned int cleanup_addr; /* epilogue code offset */
-	int pc_ret0; /* bpf index of first RET #0 instruction (if any) */
-	u8 seen;
+	bool seen_ld_abs;
 };
 
 static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
-	const struct sock_filter *filter = bpf_prog->insns;
-	int flen = bpf_prog->len;
+	struct sock_filter_int *insn = bpf_prog->insnsi;
+	int insn_cnt = bpf_prog->len;
 	u8 temp[64];
-	u8 *prog;
-	int ilen, i, proglen;
-	int t_offset, f_offset;
-	u8 t_op, f_op, seen = 0;
-	u8 *func;
-	unsigned int cleanup_addr = ctx->cleanup_addr;
-	u8 seen_or_pass0 = ctx->seen;
-
-		/* no prologue/epilogue for trivial filters (RET something) */
-		proglen = 0;
-		prog = temp;
+	int i;
+	int proglen = 0;
+	u8 *prog = temp;
+	int stacksize = MAX_BPF_STACK +
+		32 /* space for rbx, r13, r14, r15 */ +
+		8 /* space for skb_copy_bits() buffer */;
 
-		if (seen_or_pass0) {
-			EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
-			EMIT4(0x48, 0x83, 0xec, 96);	/* subq  $96,%rsp	*/
-			/* note : must save %rbx in case bpf_error is hit */
-			if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF))
-				EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
-			if (seen_or_pass0 & SEEN_XREG)
-				CLEAR_X(); /* make sure we dont leek kernel memory */
-
-			/*
-			 * If this filter needs to access skb data,
-			 * loads r9 and r8 with :
-			 *  r9 = skb->len - skb->data_len
-			 *  r8 = skb->data
-			 */
-			if (seen_or_pass0 & SEEN_DATAREF) {
-				if (offsetof(struct sk_buff, len) <= 127)
-					/* mov    off8(%rdi),%r9d */
-					EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
-				else {
-					/* mov    off32(%rdi),%r9d */
-					EMIT3(0x44, 0x8b, 0x8f);
-					EMIT(offsetof(struct sk_buff, len), 4);
-				}
-				if (is_imm8(offsetof(struct sk_buff, data_len)))
-					/* sub    off8(%rdi),%r9d */
-					EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len));
-				else {
-					EMIT3(0x44, 0x2b, 0x8f);
-					EMIT(offsetof(struct sk_buff, data_len), 4);
-				}
+	EMIT1(0x55); /* push rbp */
+	EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
 
-				if (is_imm8(offsetof(struct sk_buff, data)))
-					/* mov off8(%rdi),%r8 */
-					EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data));
-				else {
-					/* mov off32(%rdi),%r8 */
-					EMIT3(0x4c, 0x8b, 0x87);
-					EMIT(offsetof(struct sk_buff, data), 4);
-				}
+	/* sub rsp, stacksize */
+	EMIT3_off32(0x48, 0x81, 0xEC, stacksize);
+
+	/* all classic BPF filters use R6(rbx) save it */
+
+	/* mov qword ptr [rbp-X],rbx */
+	EMIT3_off32(0x48, 0x89, 0x9D, -stacksize);
+
+	/* sk_convert_filter() maps classic BPF register X to R7 and uses R8
+	 * as temporary, so all tcpdump filters need to spill/fill R7(r13) and
+	 * R8(r14). R9(r15) spill could be made conditional, but there is only
+	 * one 'bpf_error' return path out of helper functions inside bpf_jit.S
+	 * The overhead of extra spill is negligible for any filter other
+	 * than synthetic ones. Therefore not worth adding complexity.
+	 */
+
+	/* mov qword ptr [rbp-X],r13 */
+	EMIT3_off32(0x4C, 0x89, 0xAD, -stacksize + 8);
+	/* mov qword ptr [rbp-X],r14 */
+	EMIT3_off32(0x4C, 0x89, 0xB5, -stacksize + 16);
+	/* mov qword ptr [rbp-X],r15 */
+	EMIT3_off32(0x4C, 0x89, 0xBD, -stacksize + 24);
+
+	/* clear A and X registers */
+	EMIT2(0x31, 0xc0); /* xor eax, eax */
+	EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */
+
+	if (ctx->seen_ld_abs) {
+		/* r9d : skb->len - skb->data_len (headlen)
+		 * r10 : skb->data
+		 */
+		if (is_imm8(offsetof(struct sk_buff, len)))
+			/* mov %r9d, off8(%rdi) */
+			EMIT4(0x44, 0x8b, 0x4f,
+			      offsetof(struct sk_buff, len));
+		else
+			/* mov %r9d, off32(%rdi) */
+			EMIT3_off32(0x44, 0x8b, 0x8f,
+				    offsetof(struct sk_buff, len));
+
+		if (is_imm8(offsetof(struct sk_buff, data_len)))
+			/* sub %r9d, off8(%rdi) */
+			EMIT4(0x44, 0x2b, 0x4f,
+			      offsetof(struct sk_buff, data_len));
+		else
+			EMIT3_off32(0x44, 0x2b, 0x8f,
+				    offsetof(struct sk_buff, data_len));
+
+		if (is_imm8(offsetof(struct sk_buff, data)))
+			/* mov %r10, off8(%rdi) */
+			EMIT4(0x4c, 0x8b, 0x57,
+			      offsetof(struct sk_buff, data));
+		else
+			/* mov %r10, off32(%rdi) */
+			EMIT3_off32(0x4c, 0x8b, 0x97,
+				    offsetof(struct sk_buff, data));
+	}
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		const s32 K = insn->imm;
+		u32 a_reg = insn->a_reg;
+		u32 x_reg = insn->x_reg;
+		u8 b1 = 0, b2 = 0, b3 = 0;
+		s64 jmp_offset;
+		u8 jmp_cond;
+		int ilen;
+		u8 *func;
+
+		switch (insn->code) {
+			/* ALU */
+		case BPF_ALU | BPF_ADD | BPF_X:
+		case BPF_ALU | BPF_SUB | BPF_X:
+		case BPF_ALU | BPF_AND | BPF_X:
+		case BPF_ALU | BPF_OR | BPF_X:
+		case BPF_ALU | BPF_XOR | BPF_X:
+		case BPF_ALU64 | BPF_ADD | BPF_X:
+		case BPF_ALU64 | BPF_SUB | BPF_X:
+		case BPF_ALU64 | BPF_AND | BPF_X:
+		case BPF_ALU64 | BPF_OR | BPF_X:
+		case BPF_ALU64 | BPF_XOR | BPF_X:
+			switch (BPF_OP(insn->code)) {
+			case BPF_ADD: b2 = 0x01; break;
+			case BPF_SUB: b2 = 0x29; break;
+			case BPF_AND: b2 = 0x21; break;
+			case BPF_OR: b2 = 0x09; break;
+			case BPF_XOR: b2 = 0x31; break;
 			}
-		}
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_2mod(0x48, a_reg, x_reg));
+			else if (is_ereg(a_reg) || is_ereg(x_reg))
+				EMIT1(add_2mod(0x40, a_reg, x_reg));
+			EMIT2(b2, add_2reg(0xC0, a_reg, x_reg));
+			break;
 
-		switch (filter[0].code) {
-		case BPF_S_RET_K:
-		case BPF_S_LD_W_LEN:
-		case BPF_S_ANC_PROTOCOL:
-		case BPF_S_ANC_IFINDEX:
-		case BPF_S_ANC_MARK:
-		case BPF_S_ANC_RXHASH:
-		case BPF_S_ANC_CPU:
-		case BPF_S_ANC_VLAN_TAG:
-		case BPF_S_ANC_VLAN_TAG_PRESENT:
-		case BPF_S_ANC_QUEUE:
-		case BPF_S_ANC_PKTTYPE:
-		case BPF_S_LD_W_ABS:
-		case BPF_S_LD_H_ABS:
-		case BPF_S_LD_B_ABS:
-			/* first instruction sets A register (or is RET 'constant') */
+			/* mov A, X */
+		case BPF_ALU64 | BPF_MOV | BPF_X:
+			EMIT_mov(a_reg, x_reg);
 			break;
-		default:
-			/* make sure we dont leak kernel information to user */
-			CLEAR_A(); /* A = 0 */
-		}
 
-		for (i = 0; i < flen; i++) {
-			unsigned int K = filter[i].k;
+			/* mov32 A, X */
+		case BPF_ALU | BPF_MOV | BPF_X:
+			if (is_ereg(a_reg) || is_ereg(x_reg))
+				EMIT1(add_2mod(0x40, a_reg, x_reg));
+			EMIT2(0x89, add_2reg(0xC0, a_reg, x_reg));
+			break;
 
-			switch (filter[i].code) {
-			case BPF_S_ALU_ADD_X: /* A += X; */
-				seen |= SEEN_XREG;
-				EMIT2(0x01, 0xd8);		/* add %ebx,%eax */
-				break;
-			case BPF_S_ALU_ADD_K: /* A += K; */
-				if (!K)
-					break;
-				if (is_imm8(K))
-					EMIT3(0x83, 0xc0, K);	/* add imm8,%eax */
-				else
-					EMIT1_off32(0x05, K);	/* add imm32,%eax */
-				break;
-			case BPF_S_ALU_SUB_X: /* A -= X; */
-				seen |= SEEN_XREG;
-				EMIT2(0x29, 0xd8);		/* sub    %ebx,%eax */
-				break;
-			case BPF_S_ALU_SUB_K: /* A -= K */
-				if (!K)
-					break;
-				if (is_imm8(K))
-					EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */
-				else
-					EMIT1_off32(0x2d, K); /* sub imm32,%eax */
-				break;
-			case BPF_S_ALU_MUL_X: /* A *= X; */
-				seen |= SEEN_XREG;
-				EMIT3(0x0f, 0xaf, 0xc3);	/* imul %ebx,%eax */
-				break;
-			case BPF_S_ALU_MUL_K: /* A *= K */
-				if (is_imm8(K))
-					EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */
-				else {
-					EMIT2(0x69, 0xc0);		/* imul imm32,%eax */
-					EMIT(K, 4);
-				}
-				break;
-			case BPF_S_ALU_DIV_X: /* A /= X; */
-				seen |= SEEN_XREG;
-				EMIT2(0x85, 0xdb);	/* test %ebx,%ebx */
-				if (ctx->pc_ret0 > 0) {
-					/* addrs[pc_ret0 - 1] is start address of target
-					 * (addrs[i] - 4) is the address following this jmp
-					 * ("xor %edx,%edx; div %ebx" being 4 bytes long)
-					 */
-					EMIT_COND_JMP(X86_JE, addrs[ctx->pc_ret0 - 1] -
-								(addrs[i] - 4));
-				} else {
-					EMIT_COND_JMP(X86_JNE, 2 + 5);
-					CLEAR_A();
-					EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
-				}
-				EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
-				break;
-			case BPF_S_ALU_MOD_X: /* A %= X; */
-				seen |= SEEN_XREG;
-				EMIT2(0x85, 0xdb);	/* test %ebx,%ebx */
-				if (ctx->pc_ret0 > 0) {
-					/* addrs[pc_ret0 - 1] is start address of target
-					 * (addrs[i] - 6) is the address following this jmp
-					 * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long)
-					 */
-					EMIT_COND_JMP(X86_JE, addrs[ctx->pc_ret0 - 1] -
-								(addrs[i] - 6));
-				} else {
-					EMIT_COND_JMP(X86_JNE, 2 + 5);
-					CLEAR_A();
-					EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */
-				}
-				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
-				EMIT2(0xf7, 0xf3);	/* div %ebx */
-				EMIT2(0x89, 0xd0);	/* mov %edx,%eax */
-				break;
-			case BPF_S_ALU_MOD_K: /* A %= K; */
-				if (K == 1) {
-					CLEAR_A();
-					break;
-				}
-				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
-				EMIT1(0xb9);EMIT(K, 4);	/* mov imm32,%ecx */
-				EMIT2(0xf7, 0xf1);	/* div %ecx */
-				EMIT2(0x89, 0xd0);	/* mov %edx,%eax */
-				break;
-			case BPF_S_ALU_DIV_K: /* A /= K */
-				if (K == 1)
-					break;
-				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
-				EMIT1(0xb9);EMIT(K, 4);	/* mov imm32,%ecx */
-				EMIT2(0xf7, 0xf1);	/* div %ecx */
-				break;
-			case BPF_S_ALU_AND_X:
-				seen |= SEEN_XREG;
-				EMIT2(0x21, 0xd8);		/* and %ebx,%eax */
-				break;
-			case BPF_S_ALU_AND_K:
-				if (K >= 0xFFFFFF00) {
-					EMIT2(0x24, K & 0xFF); /* and imm8,%al */
-				} else if (K >= 0xFFFF0000) {
-					EMIT2(0x66, 0x25);	/* and imm16,%ax */
-					EMIT(K, 2);
-				} else {
-					EMIT1_off32(0x25, K);	/* and imm32,%eax */
-				}
-				break;
-			case BPF_S_ALU_OR_X:
-				seen |= SEEN_XREG;
-				EMIT2(0x09, 0xd8);		/* or %ebx,%eax */
-				break;
-			case BPF_S_ALU_OR_K:
-				if (is_imm8(K))
-					EMIT3(0x83, 0xc8, K); /* or imm8,%eax */
-				else
-					EMIT1_off32(0x0d, K);	/* or imm32,%eax */
-				break;
-			case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
-			case BPF_S_ALU_XOR_X:
-				seen |= SEEN_XREG;
-				EMIT2(0x31, 0xd8);		/* xor %ebx,%eax */
-				break;
-			case BPF_S_ALU_XOR_K: /* A ^= K; */
-				if (K == 0)
-					break;
-				if (is_imm8(K))
-					EMIT3(0x83, 0xf0, K);	/* xor imm8,%eax */
-				else
-					EMIT1_off32(0x35, K);	/* xor imm32,%eax */
-				break;
-			case BPF_S_ALU_LSH_X: /* A <<= X; */
-				seen |= SEEN_XREG;
-				EMIT4(0x89, 0xd9, 0xd3, 0xe0);	/* mov %ebx,%ecx; shl %cl,%eax */
-				break;
-			case BPF_S_ALU_LSH_K:
-				if (K == 0)
-					break;
-				else if (K == 1)
-					EMIT2(0xd1, 0xe0); /* shl %eax */
-				else
-					EMIT3(0xc1, 0xe0, K);
-				break;
-			case BPF_S_ALU_RSH_X: /* A >>= X; */
-				seen |= SEEN_XREG;
-				EMIT4(0x89, 0xd9, 0xd3, 0xe8);	/* mov %ebx,%ecx; shr %cl,%eax */
-				break;
-			case BPF_S_ALU_RSH_K: /* A >>= K; */
-				if (K == 0)
-					break;
-				else if (K == 1)
-					EMIT2(0xd1, 0xe8); /* shr %eax */
-				else
-					EMIT3(0xc1, 0xe8, K);
-				break;
-			case BPF_S_ALU_NEG:
-				EMIT2(0xf7, 0xd8);		/* neg %eax */
-				break;
-			case BPF_S_RET_K:
-				if (!K) {
-					if (ctx->pc_ret0 == -1)
-						ctx->pc_ret0 = i;
-					CLEAR_A();
-				} else {
-					EMIT1_off32(0xb8, K);	/* mov $imm32,%eax */
-				}
-				/* fallinto */
-			case BPF_S_RET_A:
-				if (seen_or_pass0) {
-					if (i != flen - 1) {
-						EMIT_JMP(cleanup_addr - addrs[i]);
-						break;
-					}
-					if (seen_or_pass0 & SEEN_XREG)
-						EMIT4(0x48, 0x8b, 0x5d, 0xf8);  /* mov  -8(%rbp),%rbx */
-					EMIT1(0xc9);		/* leaveq */
-				}
-				EMIT1(0xc3);		/* ret */
-				break;
-			case BPF_S_MISC_TAX: /* X = A */
-				seen |= SEEN_XREG;
-				EMIT2(0x89, 0xc3);	/* mov    %eax,%ebx */
-				break;
-			case BPF_S_MISC_TXA: /* A = X */
-				seen |= SEEN_XREG;
-				EMIT2(0x89, 0xd8);	/* mov    %ebx,%eax */
-				break;
-			case BPF_S_LD_IMM: /* A = K */
-				if (!K)
-					CLEAR_A();
-				else
-					EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
+			/* neg A */
+		case BPF_ALU | BPF_NEG:
+		case BPF_ALU64 | BPF_NEG:
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_1mod(0x48, a_reg));
+			else if (is_ereg(a_reg))
+				EMIT1(add_1mod(0x40, a_reg));
+			EMIT2(0xF7, add_1reg(0xD8, a_reg));
+			break;
+
+		case BPF_ALU | BPF_ADD | BPF_K:
+		case BPF_ALU | BPF_SUB | BPF_K:
+		case BPF_ALU | BPF_AND | BPF_K:
+		case BPF_ALU | BPF_OR | BPF_K:
+		case BPF_ALU | BPF_XOR | BPF_K:
+		case BPF_ALU64 | BPF_ADD | BPF_K:
+		case BPF_ALU64 | BPF_SUB | BPF_K:
+		case BPF_ALU64 | BPF_AND | BPF_K:
+		case BPF_ALU64 | BPF_OR | BPF_K:
+		case BPF_ALU64 | BPF_XOR | BPF_K:
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_1mod(0x48, a_reg));
+			else if (is_ereg(a_reg))
+				EMIT1(add_1mod(0x40, a_reg));
+
+			switch (BPF_OP(insn->code)) {
+			case BPF_ADD: b3 = 0xC0; break;
+			case BPF_SUB: b3 = 0xE8; break;
+			case BPF_AND: b3 = 0xE0; break;
+			case BPF_OR: b3 = 0xC8; break;
+			case BPF_XOR: b3 = 0xF0; break;
+			}
+
+			if (is_imm8(K))
+				EMIT3(0x83, add_1reg(b3, a_reg), K);
+			else
+				EMIT2_off32(0x81, add_1reg(b3, a_reg), K);
+			break;
+
+		case BPF_ALU64 | BPF_MOV | BPF_K:
+			/* optimization: if imm32 is positive,
+			 * use 'mov eax, imm32' (which zero-extends imm32)
+			 * to save 2 bytes
+			 */
+			if (K < 0) {
+				/* 'mov rax, imm32' sign extends imm32 */
+				b1 = add_1mod(0x48, a_reg);
+				b2 = 0xC7;
+				b3 = 0xC0;
+				EMIT3_off32(b1, b2, add_1reg(b3, a_reg), K);
 				break;
-			case BPF_S_LDX_IMM: /* X = K */
-				seen |= SEEN_XREG;
-				if (!K)
-					CLEAR_X();
+			}
+
+		case BPF_ALU | BPF_MOV | BPF_K:
+			/* mov %eax, imm32 */
+			if (is_ereg(a_reg))
+				EMIT1(add_1mod(0x40, a_reg));
+			EMIT1_off32(add_1reg(0xB8, a_reg), K);
+			break;
+
+			/* A %= X, A /= X, A %= K, A /= K */
+		case BPF_ALU | BPF_MOD | BPF_X:
+		case BPF_ALU | BPF_DIV | BPF_X:
+		case BPF_ALU | BPF_MOD | BPF_K:
+		case BPF_ALU | BPF_DIV | BPF_K:
+		case BPF_ALU64 | BPF_MOD | BPF_X:
+		case BPF_ALU64 | BPF_DIV | BPF_X:
+		case BPF_ALU64 | BPF_MOD | BPF_K:
+		case BPF_ALU64 | BPF_DIV | BPF_K:
+			EMIT1(0x50); /* push rax */
+			EMIT1(0x52); /* push rdx */
+
+			if (BPF_SRC(insn->code) == BPF_X)
+				/* mov r11, X */
+				EMIT_mov(AUX_REG, x_reg);
+			else
+				/* mov r11, K */
+				EMIT3_off32(0x49, 0xC7, 0xC3, K);
+
+			/* mov rax, A */
+			EMIT_mov(BPF_REG_0, a_reg);
+
+			/* xor edx, edx
+			 * equivalent to 'xor rdx, rdx', but one byte less
+			 */
+			EMIT2(0x31, 0xd2);
+
+			if (BPF_SRC(insn->code) == BPF_X) {
+				/* if (X == 0) return 0 */
+
+				/* cmp r11, 0 */
+				EMIT4(0x49, 0x83, 0xFB, 0x00);
+
+				/* jne .+9 (skip over pop, pop, xor and jmp) */
+				EMIT2(X86_JNE, 1 + 1 + 2 + 5);
+				EMIT1(0x5A); /* pop rdx */
+				EMIT1(0x58); /* pop rax */
+				EMIT2(0x31, 0xc0); /* xor eax, eax */
+
+				/* jmp cleanup_addr
+				 * addrs[i] - 11, because there are 11 bytes
+				 * after this insn: div, mov, pop, pop, mov
+				 */
+				jmp_offset = ctx->cleanup_addr - (addrs[i] - 11);
+				EMIT1_off32(0xE9, jmp_offset);
+			}
+
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				/* div r11 */
+				EMIT3(0x49, 0xF7, 0xF3);
+			else
+				/* div r11d */
+				EMIT3(0x41, 0xF7, 0xF3);
+
+			if (BPF_OP(insn->code) == BPF_MOD)
+				/* mov r11, rdx */
+				EMIT3(0x49, 0x89, 0xD3);
+			else
+				/* mov r11, rax */
+				EMIT3(0x49, 0x89, 0xC3);
+
+			EMIT1(0x5A); /* pop rdx */
+			EMIT1(0x58); /* pop rax */
+
+			/* mov A, r11 */
+			EMIT_mov(a_reg, AUX_REG);
+			break;
+
+		case BPF_ALU | BPF_MUL | BPF_K:
+		case BPF_ALU | BPF_MUL | BPF_X:
+		case BPF_ALU64 | BPF_MUL | BPF_K:
+		case BPF_ALU64 | BPF_MUL | BPF_X:
+			EMIT1(0x50); /* push rax */
+			EMIT1(0x52); /* push rdx */
+
+			/* mov r11, A */
+			EMIT_mov(AUX_REG, a_reg);
+
+			if (BPF_SRC(insn->code) == BPF_X)
+				/* mov rax, X */
+				EMIT_mov(BPF_REG_0, x_reg);
+			else
+				/* mov rax, K */
+				EMIT3_off32(0x48, 0xC7, 0xC0, K);
+
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_1mod(0x48, AUX_REG));
+			else if (is_ereg(AUX_REG))
+				EMIT1(add_1mod(0x40, AUX_REG));
+			/* mul(q) r11 */
+			EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
+
+			/* mov r11, rax */
+			EMIT_mov(AUX_REG, BPF_REG_0);
+
+			EMIT1(0x5A); /* pop rdx */
+			EMIT1(0x58); /* pop rax */
+
+			/* mov A, r11 */
+			EMIT_mov(a_reg, AUX_REG);
+			break;
+
+			/* shifts */
+		case BPF_ALU | BPF_LSH | BPF_K:
+		case BPF_ALU | BPF_RSH | BPF_K:
+		case BPF_ALU | BPF_ARSH | BPF_K:
+		case BPF_ALU64 | BPF_LSH | BPF_K:
+		case BPF_ALU64 | BPF_RSH | BPF_K:
+		case BPF_ALU64 | BPF_ARSH | BPF_K:
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_1mod(0x48, a_reg));
+			else if (is_ereg(a_reg))
+				EMIT1(add_1mod(0x40, a_reg));
+
+			switch (BPF_OP(insn->code)) {
+			case BPF_LSH: b3 = 0xE0; break;
+			case BPF_RSH: b3 = 0xE8; break;
+			case BPF_ARSH: b3 = 0xF8; break;
+			}
+			EMIT3(0xC1, add_1reg(b3, a_reg), K);
+			break;
+
+		case BPF_ALU | BPF_END | BPF_FROM_BE:
+			switch (K) {
+			case 16:
+				/* emit 'ror %ax, 8' to swap lower 2 bytes */
+				EMIT1(0x66);
+				if (is_ereg(a_reg))
+					EMIT1(0x41);
+				EMIT3(0xC1, add_1reg(0xC8, a_reg), 8);
+				break;
+			case 32:
+				/* emit 'bswap eax' to swap lower 4 bytes */
+				if (is_ereg(a_reg))
+					EMIT2(0x41, 0x0F);
 				else
-					EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */
-				break;
-			case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */
-				seen |= SEEN_MEM;
-				EMIT3(0x8b, 0x45, 0xf0 - K*4);
+					EMIT1(0x0F);
+				EMIT1(add_1reg(0xC8, a_reg));
 				break;
-			case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */
-				seen |= SEEN_XREG | SEEN_MEM;
-				EMIT3(0x8b, 0x5d, 0xf0 - K*4);
-				break;
-			case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */
-				seen |= SEEN_MEM;
-				EMIT3(0x89, 0x45, 0xf0 - K*4);
-				break;
-			case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
-				seen |= SEEN_XREG | SEEN_MEM;
-				EMIT3(0x89, 0x5d, 0xf0 - K*4);
-				break;
-			case BPF_S_LD_W_LEN: /*	A = skb->len; */
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
-				if (is_imm8(offsetof(struct sk_buff, len)))
-					/* mov    off8(%rdi),%eax */
-					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len));
-				else {
-					EMIT2(0x8b, 0x87);
-					EMIT(offsetof(struct sk_buff, len), 4);
-				}
-				break;
-			case BPF_S_LDX_W_LEN: /* X = skb->len; */
-				seen |= SEEN_XREG;
-				if (is_imm8(offsetof(struct sk_buff, len)))
-					/* mov off8(%rdi),%ebx */
-					EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len));
-				else {
-					EMIT2(0x8b, 0x9f);
-					EMIT(offsetof(struct sk_buff, len), 4);
-				}
-				break;
-			case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
-				if (is_imm8(offsetof(struct sk_buff, protocol))) {
-					/* movzwl off8(%rdi),%eax */
-					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol));
-				} else {
-					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
-					EMIT(offsetof(struct sk_buff, protocol), 4);
-				}
-				EMIT2(0x86, 0xc4); /* ntohs() : xchg   %al,%ah */
-				break;
-			case BPF_S_ANC_IFINDEX:
-				if (is_imm8(offsetof(struct sk_buff, dev))) {
-					/* movq off8(%rdi),%rax */
-					EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev));
-				} else {
-					EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */
-					EMIT(offsetof(struct sk_buff, dev), 4);
-				}
-				EMIT3(0x48, 0x85, 0xc0);	/* test %rax,%rax */
-				EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6));
-				BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
-				EMIT2(0x8b, 0x80);	/* mov off32(%rax),%eax */
-				EMIT(offsetof(struct net_device, ifindex), 4);
-				break;
-			case BPF_S_ANC_MARK:
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-				if (is_imm8(offsetof(struct sk_buff, mark))) {
-					/* mov off8(%rdi),%eax */
-					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark));
-				} else {
-					EMIT2(0x8b, 0x87);
-					EMIT(offsetof(struct sk_buff, mark), 4);
-				}
-				break;
-			case BPF_S_ANC_RXHASH:
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
-				if (is_imm8(offsetof(struct sk_buff, hash))) {
-					/* mov off8(%rdi),%eax */
-					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash));
-				} else {
-					EMIT2(0x8b, 0x87);
-					EMIT(offsetof(struct sk_buff, hash), 4);
-				}
-				break;
-			case BPF_S_ANC_QUEUE:
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
-				if (is_imm8(offsetof(struct sk_buff, queue_mapping))) {
-					/* movzwl off8(%rdi),%eax */
-					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping));
-				} else {
-					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
-					EMIT(offsetof(struct sk_buff, queue_mapping), 4);
-				}
-				break;
-			case BPF_S_ANC_CPU:
-#ifdef CONFIG_SMP
-				EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */
-				EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */
-#else
-				CLEAR_A();
-#endif
-				break;
-			case BPF_S_ANC_VLAN_TAG:
-			case BPF_S_ANC_VLAN_TAG_PRESENT:
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-				if (is_imm8(offsetof(struct sk_buff, vlan_tci))) {
-					/* movzwl off8(%rdi),%eax */
-					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, vlan_tci));
-				} else {
-					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
-					EMIT(offsetof(struct sk_buff, vlan_tci), 4);
-				}
-				BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
-				if (filter[i].code == BPF_S_ANC_VLAN_TAG) {
-					EMIT3(0x80, 0xe4, 0xef); /* and    $0xef,%ah */
-				} else {
-					EMIT3(0xc1, 0xe8, 0x0c); /* shr    $0xc,%eax */
-					EMIT3(0x83, 0xe0, 0x01); /* and    $0x1,%eax */
-				}
-				break;
-			case BPF_S_ANC_PKTTYPE:
-			{
-				int off = pkt_type_offset();
-
-				if (off < 0)
-					return -EINVAL;
-				if (is_imm8(off)) {
-					/* movzbl off8(%rdi),%eax */
-					EMIT4(0x0f, 0xb6, 0x47, off);
-				} else {
-					/* movbl off32(%rdi),%eax */
-					EMIT3(0x0f, 0xb6, 0x87);
-					EMIT(off, 4);
-				}
-				EMIT3(0x83, 0xe0, PKT_TYPE_MAX); /* and    $0x7,%eax */
+			case 64:
+				/* emit 'bswap rax' to swap 8 bytes */
+				EMIT3(add_1mod(0x48, a_reg), 0x0F,
+				      add_1reg(0xC8, a_reg));
 				break;
 			}
-			case BPF_S_LD_W_ABS:
-				func = CHOOSE_LOAD_FUNC(K, sk_load_word);
-common_load:			seen |= SEEN_DATAREF;
-				t_offset = func - (image + addrs[i]);
-				EMIT1_off32(0xbe, K); /* mov imm32,%esi */
-				EMIT1_off32(0xe8, t_offset); /* call */
-				break;
-			case BPF_S_LD_H_ABS:
-				func = CHOOSE_LOAD_FUNC(K, sk_load_half);
-				goto common_load;
-			case BPF_S_LD_B_ABS:
-				func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
-				goto common_load;
-			case BPF_S_LDX_B_MSH:
-				func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
-				seen |= SEEN_DATAREF | SEEN_XREG;
-				t_offset = func - (image + addrs[i]);
-				EMIT1_off32(0xbe, K);	/* mov imm32,%esi */
-				EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
-				break;
-			case BPF_S_LD_W_IND:
-				func = sk_load_word;
-common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
-				t_offset = func - (image + addrs[i]);
-				if (K) {
-					if (is_imm8(K)) {
-						EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
-					} else {
-						EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
-						EMIT(K, 4);
-					}
-				} else {
-					EMIT2(0x89,0xde); /* mov %ebx,%esi */
-				}
-				EMIT1_off32(0xe8, t_offset);	/* call sk_load_xxx_ind */
-				break;
-			case BPF_S_LD_H_IND:
-				func = sk_load_half;
-				goto common_load_ind;
-			case BPF_S_LD_B_IND:
-				func = sk_load_byte;
-				goto common_load_ind;
-			case BPF_S_JMP_JA:
-				t_offset = addrs[i + K] - addrs[i];
-				EMIT_JMP(t_offset);
-				break;
-			COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE);
-			COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB);
-			COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE);
-			COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE);
-			COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE);
-			COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB);
-			COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE);
-			COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE);
-
-cond_branch:			f_offset = addrs[i + filter[i].jf] - addrs[i];
-				t_offset = addrs[i + filter[i].jt] - addrs[i];
-
-				/* same targets, can avoid doing the test :) */
-				if (filter[i].jt == filter[i].jf) {
-					EMIT_JMP(t_offset);
-					break;
-				}
+			break;
+
+		case BPF_ALU | BPF_END | BPF_FROM_LE:
+			break;
+
+			/* ST: *(u8*)(a_reg + off) = imm */
+		case BPF_ST | BPF_MEM | BPF_B:
+			if (is_ereg(a_reg))
+				EMIT2(0x41, 0xC6);
+			else
+				EMIT1(0xC6);
+			goto st;
+		case BPF_ST | BPF_MEM | BPF_H:
+			if (is_ereg(a_reg))
+				EMIT3(0x66, 0x41, 0xC7);
+			else
+				EMIT2(0x66, 0xC7);
+			goto st;
+		case BPF_ST | BPF_MEM | BPF_W:
+			if (is_ereg(a_reg))
+				EMIT2(0x41, 0xC7);
+			else
+				EMIT1(0xC7);
+			goto st;
+		case BPF_ST | BPF_MEM | BPF_DW:
+			EMIT2(add_1mod(0x48, a_reg), 0xC7);
+
+st:			if (is_imm8(insn->off))
+				EMIT2(add_1reg(0x40, a_reg), insn->off);
+			else
+				EMIT1_off32(add_1reg(0x80, a_reg), insn->off);
+
+			EMIT(K, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
+			break;
+
+			/* STX: *(u8*)(a_reg + off) = x_reg */
+		case BPF_STX | BPF_MEM | BPF_B:
+			/* emit 'mov byte ptr [rax + off], al' */
+			if (is_ereg(a_reg) || is_ereg(x_reg) ||
+			    /* have to add extra byte for x86 SIL, DIL regs */
+			    x_reg == BPF_REG_1 || x_reg == BPF_REG_2)
+				EMIT2(add_2mod(0x40, a_reg, x_reg), 0x88);
+			else
+				EMIT1(0x88);
+			goto stx;
+		case BPF_STX | BPF_MEM | BPF_H:
+			if (is_ereg(a_reg) || is_ereg(x_reg))
+				EMIT3(0x66, add_2mod(0x40, a_reg, x_reg), 0x89);
+			else
+				EMIT2(0x66, 0x89);
+			goto stx;
+		case BPF_STX | BPF_MEM | BPF_W:
+			if (is_ereg(a_reg) || is_ereg(x_reg))
+				EMIT2(add_2mod(0x40, a_reg, x_reg), 0x89);
+			else
+				EMIT1(0x89);
+			goto stx;
+		case BPF_STX | BPF_MEM | BPF_DW:
+			EMIT2(add_2mod(0x48, a_reg, x_reg), 0x89);
+stx:			if (is_imm8(insn->off))
+				EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off);
+			else
+				EMIT1_off32(add_2reg(0x80, a_reg, x_reg),
+					    insn->off);
+			break;
+
+			/* LDX: a_reg = *(u8*)(x_reg + off) */
+		case BPF_LDX | BPF_MEM | BPF_B:
+			/* emit 'movzx rax, byte ptr [rax + off]' */
+			EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB6);
+			goto ldx;
+		case BPF_LDX | BPF_MEM | BPF_H:
+			/* emit 'movzx rax, word ptr [rax + off]' */
+			EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB7);
+			goto ldx;
+		case BPF_LDX | BPF_MEM | BPF_W:
+			/* emit 'mov eax, dword ptr [rax+0x14]' */
+			if (is_ereg(a_reg) || is_ereg(x_reg))
+				EMIT2(add_2mod(0x40, x_reg, a_reg), 0x8B);
+			else
+				EMIT1(0x8B);
+			goto ldx;
+		case BPF_LDX | BPF_MEM | BPF_DW:
+			/* emit 'mov rax, qword ptr [rax+0x14]' */
+			EMIT2(add_2mod(0x48, x_reg, a_reg), 0x8B);
+ldx:			/* if insn->off == 0 we can save one extra byte, but
+			 * special case of x86 r13 which always needs an offset
+			 * is not worth the hassle
+			 */
+			if (is_imm8(insn->off))
+				EMIT2(add_2reg(0x40, x_reg, a_reg), insn->off);
+			else
+				EMIT1_off32(add_2reg(0x80, x_reg, a_reg),
+					    insn->off);
+			break;
+
+			/* STX XADD: lock *(u32*)(a_reg + off) += x_reg */
+		case BPF_STX | BPF_XADD | BPF_W:
+			/* emit 'lock add dword ptr [rax + off], eax' */
+			if (is_ereg(a_reg) || is_ereg(x_reg))
+				EMIT3(0xF0, add_2mod(0x40, a_reg, x_reg), 0x01);
+			else
+				EMIT2(0xF0, 0x01);
+			goto xadd;
+		case BPF_STX | BPF_XADD | BPF_DW:
+			EMIT3(0xF0, add_2mod(0x48, a_reg, x_reg), 0x01);
+xadd:			if (is_imm8(insn->off))
+				EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off);
+			else
+				EMIT1_off32(add_2reg(0x80, a_reg, x_reg),
+					    insn->off);
+			break;
+
+			/* call */
+		case BPF_JMP | BPF_CALL:
+			func = (u8 *) __bpf_call_base + K;
+			jmp_offset = func - (image + addrs[i]);
+			if (ctx->seen_ld_abs) {
+				EMIT2(0x41, 0x52); /* push %r10 */
+				EMIT2(0x41, 0x51); /* push %r9 */
+				/* need to adjust jmp offset, since
+				 * pop %r9, pop %r10 take 4 bytes after call insn
+				 */
+				jmp_offset += 4;
+			}
+			if (!K || !is_simm32(jmp_offset)) {
+				pr_err("unsupported bpf func %d addr %p image %p\n",
+				       K, func, image);
+				return -EINVAL;
+			}
+			EMIT1_off32(0xE8, jmp_offset);
+			if (ctx->seen_ld_abs) {
+				EMIT2(0x41, 0x59); /* pop %r9 */
+				EMIT2(0x41, 0x5A); /* pop %r10 */
+			}
+			break;
+
+			/* cond jump */
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP | BPF_JNE | BPF_X:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JSGT | BPF_X:
+		case BPF_JMP | BPF_JSGE | BPF_X:
+			/* cmp a_reg, x_reg */
+			EMIT3(add_2mod(0x48, a_reg, x_reg), 0x39,
+			      add_2reg(0xC0, a_reg, x_reg));
+			goto emit_cond_jmp;
+
+		case BPF_JMP | BPF_JSET | BPF_X:
+			/* test a_reg, x_reg */
+			EMIT3(add_2mod(0x48, a_reg, x_reg), 0x85,
+			      add_2reg(0xC0, a_reg, x_reg));
+			goto emit_cond_jmp;
+
+		case BPF_JMP | BPF_JSET | BPF_K:
+			/* test a_reg, imm32 */
+			EMIT1(add_1mod(0x48, a_reg));
+			EMIT2_off32(0xF7, add_1reg(0xC0, a_reg), K);
+			goto emit_cond_jmp;
+
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JNE | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JSGT | BPF_K:
+		case BPF_JMP | BPF_JSGE | BPF_K:
+			/* cmp a_reg, imm8/32 */
+			EMIT1(add_1mod(0x48, a_reg));
+
+			if (is_imm8(K))
+				EMIT3(0x83, add_1reg(0xF8, a_reg), K);
+			else
+				EMIT2_off32(0x81, add_1reg(0xF8, a_reg), K);
+
+emit_cond_jmp:		/* convert BPF opcode to x86 */
+			switch (BPF_OP(insn->code)) {
+			case BPF_JEQ:
+				jmp_cond = X86_JE;
+				break;
+			case BPF_JSET:
+			case BPF_JNE:
+				jmp_cond = X86_JNE;
+				break;
+			case BPF_JGT:
+				/* GT is unsigned '>', JA in x86 */
+				jmp_cond = X86_JA;
+				break;
+			case BPF_JGE:
+				/* GE is unsigned '>=', JAE in x86 */
+				jmp_cond = X86_JAE;
+				break;
+			case BPF_JSGT:
+				/* signed '>', GT in x86 */
+				jmp_cond = X86_JG;
+				break;
+			case BPF_JSGE:
+				/* signed '>=', GE in x86 */
+				jmp_cond = X86_JGE;
+				break;
+			default: /* to silence gcc warning */
+				return -EFAULT;
+			}
+			jmp_offset = addrs[i + insn->off] - addrs[i];
+			if (is_imm8(jmp_offset)) {
+				EMIT2(jmp_cond, jmp_offset);
+			} else if (is_simm32(jmp_offset)) {
+				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
+			} else {
+				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
+				return -EFAULT;
+			}
+
+			break;
 
-				switch (filter[i].code) {
-				case BPF_S_JMP_JGT_X:
-				case BPF_S_JMP_JGE_X:
-				case BPF_S_JMP_JEQ_X:
-					seen |= SEEN_XREG;
-					EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */
-					break;
-				case BPF_S_JMP_JSET_X:
-					seen |= SEEN_XREG;
-					EMIT2(0x85, 0xd8); /* test %ebx,%eax */
-					break;
-				case BPF_S_JMP_JEQ_K:
-					if (K == 0) {
-						EMIT2(0x85, 0xc0); /* test   %eax,%eax */
-						break;
-					}
-				case BPF_S_JMP_JGT_K:
-				case BPF_S_JMP_JGE_K:
-					if (K <= 127)
-						EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */
+		case BPF_JMP | BPF_JA:
+			jmp_offset = addrs[i + insn->off] - addrs[i];
+			if (!jmp_offset)
+				/* optimize out nop jumps */
+				break;
+emit_jmp:
+			if (is_imm8(jmp_offset)) {
+				EMIT2(0xEB, jmp_offset);
+			} else if (is_simm32(jmp_offset)) {
+				EMIT1_off32(0xE9, jmp_offset);
+			} else {
+				pr_err("jmp gen bug %llx\n", jmp_offset);
+				return -EFAULT;
+			}
+			break;
+
+		case BPF_LD | BPF_IND | BPF_W:
+			func = sk_load_word;
+			goto common_load;
+		case BPF_LD | BPF_ABS | BPF_W:
+			func = CHOOSE_LOAD_FUNC(K, sk_load_word);
+common_load:		ctx->seen_ld_abs = true;
+			jmp_offset = func - (image + addrs[i]);
+			if (!func || !is_simm32(jmp_offset)) {
+				pr_err("unsupported bpf func %d addr %p image %p\n",
+				       K, func, image);
+				return -EINVAL;
+			}
+			if (BPF_MODE(insn->code) == BPF_ABS) {
+				/* mov %esi, imm32 */
+				EMIT1_off32(0xBE, K);
+			} else {
+				/* mov %rsi, x_reg */
+				EMIT_mov(BPF_REG_2, x_reg);
+				if (K) {
+					if (is_imm8(K))
+						/* add %esi, imm8 */
+						EMIT3(0x83, 0xC6, K);
 					else
-						EMIT1_off32(0x3d, K); /* cmp imm32,%eax */
-					break;
-				case BPF_S_JMP_JSET_K:
-					if (K <= 0xFF)
-						EMIT2(0xa8, K); /* test imm8,%al */
-					else if (!(K & 0xFFFF00FF))
-						EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */
-					else if (K <= 0xFFFF) {
-						EMIT2(0x66, 0xa9); /* test imm16,%ax */
-						EMIT(K, 2);
-					} else {
-						EMIT1_off32(0xa9, K); /* test imm32,%eax */
-					}
-					break;
+						/* add %esi, imm32 */
+						EMIT2_off32(0x81, 0xC6, K);
 				}
-				if (filter[i].jt != 0) {
-					if (filter[i].jf && f_offset)
-						t_offset += is_near(f_offset) ? 2 : 5;
-					EMIT_COND_JMP(t_op, t_offset);
-					if (filter[i].jf)
-						EMIT_JMP(f_offset);
-					break;
-				}
-				EMIT_COND_JMP(f_op, f_offset);
-				break;
+			}
+			/* skb pointer is in R6 (%rbx), it will be copied into
+			 * %rdi if skb_copy_bits() call is necessary.
+			 * sk_load_* helpers also use %r10 and %r9d.
+			 * See bpf_jit.S
+			 */
+			EMIT1_off32(0xE8, jmp_offset); /* call */
+			break;
+
+		case BPF_LD | BPF_IND | BPF_H:
+			func = sk_load_half;
+			goto common_load;
+		case BPF_LD | BPF_ABS | BPF_H:
+			func = CHOOSE_LOAD_FUNC(K, sk_load_half);
+			goto common_load;
+		case BPF_LD | BPF_IND | BPF_B:
+			func = sk_load_byte;
+			goto common_load;
+		case BPF_LD | BPF_ABS | BPF_B:
+			func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
+			goto common_load;
+
+		case BPF_JMP | BPF_EXIT:
+			if (i != insn_cnt - 1) {
+				jmp_offset = ctx->cleanup_addr - addrs[i];
+				goto emit_jmp;
+			}
+			/* update cleanup_addr */
+			ctx->cleanup_addr = proglen;
+			/* mov rbx, qword ptr [rbp-X] */
+			EMIT3_off32(0x48, 0x8B, 0x9D, -stacksize);
+			/* mov r13, qword ptr [rbp-X] */
+			EMIT3_off32(0x4C, 0x8B, 0xAD, -stacksize + 8);
+			/* mov r14, qword ptr [rbp-X] */
+			EMIT3_off32(0x4C, 0x8B, 0xB5, -stacksize + 16);
+			/* mov r15, qword ptr [rbp-X] */
+			EMIT3_off32(0x4C, 0x8B, 0xBD, -stacksize + 24);
+
+			EMIT1(0xC9); /* leave */
+			EMIT1(0xC3); /* ret */
+			break;
+
 		default:
-			/* hmm, too complex filter, give up with jit compiler */
+			/* By design x64 JIT should support all BPF instructions
+			 * This error will be seen if new instruction was added
+			 * to interpreter, but not to JIT
+			 * or if there is junk in sk_filter
+			 */
+			pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
 			return -EINVAL;
 		}
+
 		ilen = prog - temp;
 		if (image) {
 			if (unlikely(proglen + ilen > oldproglen)) {
-				pr_err("bpb_jit_compile fatal error\n");
+				pr_err("bpf_jit_compile fatal error\n");
 				return -EFAULT;
 			}
 			memcpy(image + proglen, temp, ilen);
@@ -726,21 +859,14 @@ cond_branch:			f_offset = addrs[i + filter[i].jf] - addrs[i];
 		addrs[i] = proglen;
 		prog = temp;
 	}
-	/* last bpf instruction is always a RET :
-	 * use it to give the cleanup instruction(s) addr
-	 */
-	ctx->cleanup_addr = proglen - 1; /* ret */
-	if (seen_or_pass0)
-		ctx->cleanup_addr -= 1; /* leaveq */
-	if (seen_or_pass0 & SEEN_XREG)
-		ctx->cleanup_addr -= 4; /* mov  -8(%rbp),%rbx */
-
-	ctx->seen = seen;
-
 	return proglen;
 }
 
 void bpf_jit_compile(struct sk_filter *prog)
+{
+}
+
+void bpf_int_jit_compile(struct sk_filter *prog)
 {
 	struct bpf_binary_header *header = NULL;
 	int proglen, oldproglen = 0;
@@ -768,8 +894,6 @@ void bpf_jit_compile(struct sk_filter *prog)
 		addrs[i] = proglen;
 	}
 	ctx.cleanup_addr = proglen;
-	ctx.seen = SEEN_XREG | SEEN_DATAREF | SEEN_MEM;
-	ctx.pc_ret0 = -1;
 
 	for (pass = 0; pass < 10; pass++) {
 		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4457b383961c..9d5ae0a2c954 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -207,6 +207,9 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
 void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 
+u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+void bpf_int_jit_compile(struct sk_filter *fp);
+
 #ifdef CONFIG_BPF_JIT
 #include <stdarg.h>
 #include <linux/linkage.h>
diff --git a/net/core/filter.c b/net/core/filter.c
index c442a0d7d0f7..32c5b44c537e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1524,6 +1524,10 @@ out_err:
 	return ERR_PTR(err);
 }
 
+void __weak bpf_int_jit_compile(struct sk_filter *prog)
+{
+}
+
 static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
 					     struct sock *sk)
 {
@@ -1544,9 +1548,12 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
 	/* JIT compiler couldn't process this filter, so do the
 	 * internal BPF translation for the optimized interpreter.
 	 */
-	if (!fp->jited)
+	if (!fp->jited) {
 		fp = __sk_migrate_filter(fp, sk);
 
+		/* Probe if internal BPF can be jit-ed */
+		bpf_int_jit_compile(fp);
+	}
 	return fp;
 }
 
-- 
cgit v1.2.3-71-gd317


From ad222799bec32a2db99c12b4dfa5dc19a1f6eaac Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 2 May 2014 13:22:19 +1000
Subject: drm: fix memory leak around mode_group (v2)

This mode group id_list was never being freed.

v2: take David's suggestion to free in minor_free.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 6 ++++++
 drivers/gpu/drm/drm_stub.c | 1 +
 include/drm/drm_crtc.h     | 1 +
 3 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index d8b7099abece..a3fe32439a5b 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -1378,6 +1378,12 @@ static int drm_mode_group_init(struct drm_device *dev, struct drm_mode_group *gr
 	return 0;
 }
 
+void drm_mode_group_destroy(struct drm_mode_group *group)
+{
+	kfree(group->id_list);
+	group->id_list = NULL;
+}
+
 /*
  * NOTE: Driver's shouldn't ever call drm_mode_group_init_legacy_group - it is
  * the drm core's responsibility to set up mode control groups.
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index 1447b0ee3676..3727ac8bc310 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -294,6 +294,7 @@ static void drm_minor_free(struct drm_device *dev, unsigned int type)
 
 	slot = drm_minor_get_slot(dev, type);
 	if (*slot) {
+		drm_mode_group_destroy(&(*slot)->mode_group);
 		kfree(*slot);
 		*slot = NULL;
 	}
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index e55fccbe7c42..c6b9e8ab0a26 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -915,6 +915,7 @@ extern const char *drm_get_tv_subconnector_name(int val);
 extern const char *drm_get_tv_select_name(int val);
 extern void drm_fb_release(struct drm_file *file_priv);
 extern int drm_mode_group_init_legacy_group(struct drm_device *dev, struct drm_mode_group *group);
+extern void drm_mode_group_destroy(struct drm_mode_group *group);
 extern bool drm_probe_ddc(struct i2c_adapter *adapter);
 extern struct edid *drm_get_edid(struct drm_connector *connector,
 				 struct i2c_adapter *adapter);
-- 
cgit v1.2.3-71-gd317


From 31ad169148df2252a774c73c504aff43bfa4b656 Mon Sep 17 00:00:00 2001
From: Andrzej Kaczmarek <andrzej.kaczmarek@tieto.com>
Date: Wed, 14 May 2014 13:43:02 +0200
Subject: Bluetooth: Add conn info lifetime parameters to debugfs

This patch adds conn_info_min_age and conn_info_max_age parameters to
debugfs which determine lifetime of connection information. Actual
lifetime will be random value between min and max age.

Default values for min and max age are 1000ms and 3000ms respectively.

Signed-off-by: Andrzej Kaczmarek <andrzej.kaczmarek@tieto.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  6 ++++
 net/bluetooth/hci_core.c         | 63 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 211bad6a3366..4623f45c8892 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -145,6 +145,10 @@ struct oob_data {
 /* Default LE RPA expiry time, 15 minutes */
 #define HCI_DEFAULT_RPA_TIMEOUT		(15 * 60)
 
+/* Default min/max age of connection information (1s/3s) */
+#define DEFAULT_CONN_INFO_MIN_AGE	1000
+#define DEFAULT_CONN_INFO_MAX_AGE	3000
+
 struct amp_assoc {
 	__u16	len;
 	__u16	offset;
@@ -200,6 +204,8 @@ struct hci_dev {
 	__u16		le_conn_min_interval;
 	__u16		le_conn_max_interval;
 	__u16		discov_interleaved_timeout;
+	__u16		conn_info_min_age;
+	__u16		conn_info_max_age;
 	__u8		ssp_debug_mode;
 
 	__u16		devid_source;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index d31f144860d1..313bd1c164d6 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -579,6 +579,62 @@ static int sniff_max_interval_get(void *data, u64 *val)
 DEFINE_SIMPLE_ATTRIBUTE(sniff_max_interval_fops, sniff_max_interval_get,
 			sniff_max_interval_set, "%llu\n");
 
+static int conn_info_min_age_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val == 0 || val > hdev->conn_info_max_age)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->conn_info_min_age = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int conn_info_min_age_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->conn_info_min_age;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(conn_info_min_age_fops, conn_info_min_age_get,
+			conn_info_min_age_set, "%llu\n");
+
+static int conn_info_max_age_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val == 0 || val < hdev->conn_info_min_age)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->conn_info_max_age = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int conn_info_max_age_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->conn_info_max_age;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(conn_info_max_age_fops, conn_info_max_age_get,
+			conn_info_max_age_set, "%llu\n");
+
 static int identity_show(struct seq_file *f, void *p)
 {
 	struct hci_dev *hdev = f->private;
@@ -1754,6 +1810,11 @@ static int __hci_init(struct hci_dev *hdev)
 			    &blacklist_fops);
 	debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops);
 
+	debugfs_create_file("conn_info_min_age", 0644, hdev->debugfs, hdev,
+			    &conn_info_min_age_fops);
+	debugfs_create_file("conn_info_max_age", 0644, hdev->debugfs, hdev,
+			    &conn_info_max_age_fops);
+
 	if (lmp_bredr_capable(hdev)) {
 		debugfs_create_file("inquiry_cache", 0444, hdev->debugfs,
 				    hdev, &inquiry_cache_fops);
@@ -3789,6 +3850,8 @@ struct hci_dev *hci_alloc_dev(void)
 
 	hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT;
 	hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT;
+	hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE;
+	hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE;
 
 	mutex_init(&hdev->lock);
 	mutex_init(&hdev->req_lock);
-- 
cgit v1.2.3-71-gd317


From dd9838087b8c2b45c7976e46290749732d7af9d5 Mon Sep 17 00:00:00 2001
From: Andrzej Kaczmarek <andrzej.kaczmarek@tieto.com>
Date: Wed, 14 May 2014 13:43:03 +0200
Subject: Bluetooth: Add support to get connection information

This patch adds support for Get Connection Information mgmt command
which can be used to query for information about connection, i.e. RSSI
and local TX power level.

In general values cached in hci_conn are returned as long as they are
considered valid, i.e. do not exceed age limit set in hdev. This limit
is calculated as random value between min/max values to avoid client
trying to guess when to poll for updated information.

Signed-off-by: Andrzej Kaczmarek <andrzej.kaczmarek@tieto.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |   2 +
 include/net/bluetooth/mgmt.h     |  12 +++
 net/bluetooth/mgmt.c             | 196 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 210 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 4623f45c8892..cbbab6327621 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -384,6 +384,8 @@ struct hci_conn {
 	__s8		tx_power;
 	unsigned long	flags;
 
+	unsigned long	conn_info_timestamp;
+
 	__u8		remote_cap;
 	__u8		remote_auth;
 	__u8		remote_id;
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index d4b571c2f9fd..226ae03cafe7 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -409,6 +409,18 @@ struct mgmt_cp_load_irks {
 } __packed;
 #define MGMT_LOAD_IRKS_SIZE		2
 
+#define MGMT_OP_GET_CONN_INFO		0x0031
+struct mgmt_cp_get_conn_info {
+	struct mgmt_addr_info addr;
+} __packed;
+#define MGMT_GET_CONN_INFO_SIZE		MGMT_ADDR_INFO_SIZE
+struct mgmt_rp_get_conn_info {
+	struct mgmt_addr_info addr;
+	__s8	rssi;
+	__s8	tx_power;
+	__s8	max_tx_power;
+} __packed;
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f2a9422d6139..0e5a316fafbf 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -83,6 +83,7 @@ static const u16 mgmt_commands[] = {
 	MGMT_OP_SET_DEBUG_KEYS,
 	MGMT_OP_SET_PRIVACY,
 	MGMT_OP_LOAD_IRKS,
+	MGMT_OP_GET_CONN_INFO,
 };
 
 static const u16 mgmt_events[] = {
@@ -4557,6 +4558,200 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 	return err;
 }
 
+struct cmd_conn_lookup {
+	struct hci_conn *conn;
+	bool valid_tx_power;
+	u8 mgmt_status;
+};
+
+static void get_conn_info_complete(struct pending_cmd *cmd, void *data)
+{
+	struct cmd_conn_lookup *match = data;
+	struct mgmt_cp_get_conn_info *cp;
+	struct mgmt_rp_get_conn_info rp;
+	struct hci_conn *conn = cmd->user_data;
+
+	if (conn != match->conn)
+		return;
+
+	cp = (struct mgmt_cp_get_conn_info *) cmd->param;
+
+	memset(&rp, 0, sizeof(rp));
+	bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
+	rp.addr.type = cp->addr.type;
+
+	if (!match->mgmt_status) {
+		rp.rssi = conn->rssi;
+
+		if (match->valid_tx_power)
+			rp.tx_power = conn->tx_power;
+		else
+			rp.tx_power = HCI_TX_POWER_INVALID;
+
+		rp.max_tx_power = HCI_TX_POWER_INVALID;
+	}
+
+	cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO,
+		     match->mgmt_status, &rp, sizeof(rp));
+
+	hci_conn_drop(conn);
+
+	mgmt_pending_remove(cmd);
+}
+
+static void conn_info_refresh_complete(struct hci_dev *hdev, u8 status)
+{
+	struct hci_cp_read_rssi *cp;
+	struct hci_conn *conn;
+	struct cmd_conn_lookup match;
+	u16 handle;
+
+	BT_DBG("status 0x%02x", status);
+
+	hci_dev_lock(hdev);
+
+	/* TX power data is valid in case request completed successfully,
+	 * otherwise we assume it's not valid.
+	 */
+	match.valid_tx_power = !status;
+
+	/* Commands sent in request are either Read RSSI or Read Transmit Power
+	 * Level so we check which one was last sent to retrieve connection
+	 * handle.  Both commands have handle as first parameter so it's safe to
+	 * cast data on the same command struct.
+	 *
+	 * First command sent is always Read RSSI and we fail only if it fails.
+	 * In other case we simply override error to indicate success as we
+	 * already remembered if TX power value is actually valid.
+	 */
+	cp = hci_sent_cmd_data(hdev, HCI_OP_READ_RSSI);
+	if (!cp) {
+		cp = hci_sent_cmd_data(hdev, HCI_OP_READ_TX_POWER);
+		status = 0;
+	}
+
+	if (!cp) {
+		BT_ERR("invalid sent_cmd in response");
+		goto unlock;
+	}
+
+	handle = __le16_to_cpu(cp->handle);
+	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	if (!conn) {
+		BT_ERR("unknown handle (%d) in response", handle);
+		goto unlock;
+	}
+
+	match.conn = conn;
+	match.mgmt_status = mgmt_status(status);
+
+	/* Cache refresh is complete, now reply for mgmt request for given
+	 * connection only.
+	 */
+	mgmt_pending_foreach(MGMT_OP_GET_CONN_INFO, hdev,
+			     get_conn_info_complete, &match);
+
+unlock:
+	hci_dev_unlock(hdev);
+}
+
+static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data,
+			 u16 len)
+{
+	struct mgmt_cp_get_conn_info *cp = data;
+	struct mgmt_rp_get_conn_info rp;
+	struct hci_conn *conn;
+	unsigned long conn_info_age;
+	int err = 0;
+
+	BT_DBG("%s", hdev->name);
+
+	memset(&rp, 0, sizeof(rp));
+	bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
+	rp.addr.type = cp->addr.type;
+
+	if (!bdaddr_type_is_valid(cp->addr.type))
+		return cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+				    MGMT_STATUS_INVALID_PARAMS,
+				    &rp, sizeof(rp));
+
+	hci_dev_lock(hdev);
+
+	if (!hdev_is_powered(hdev)) {
+		err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+				   MGMT_STATUS_NOT_POWERED, &rp, sizeof(rp));
+		goto unlock;
+	}
+
+	if (cp->addr.type == BDADDR_BREDR)
+		conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
+					       &cp->addr.bdaddr);
+	else
+		conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr);
+
+	if (!conn || conn->state != BT_CONNECTED) {
+		err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+				   MGMT_STATUS_NOT_CONNECTED, &rp, sizeof(rp));
+		goto unlock;
+	}
+
+	/* To avoid client trying to guess when to poll again for information we
+	 * calculate conn info age as random value between min/max set in hdev.
+	 */
+	conn_info_age = hdev->conn_info_min_age +
+			prandom_u32_max(hdev->conn_info_max_age -
+					hdev->conn_info_min_age);
+
+	/* Query controller to refresh cached values if they are too old or were
+	 * never read.
+	 */
+	if (time_after(jiffies, conn->conn_info_timestamp + conn_info_age) ||
+	    !conn->conn_info_timestamp) {
+		struct hci_request req;
+		struct hci_cp_read_tx_power req_txp_cp;
+		struct hci_cp_read_rssi req_rssi_cp;
+		struct pending_cmd *cmd;
+
+		hci_req_init(&req, hdev);
+		req_rssi_cp.handle = cpu_to_le16(conn->handle);
+		hci_req_add(&req, HCI_OP_READ_RSSI, sizeof(req_rssi_cp),
+			    &req_rssi_cp);
+
+		req_txp_cp.handle = cpu_to_le16(conn->handle);
+		req_txp_cp.type = 0x00;
+		hci_req_add(&req, HCI_OP_READ_TX_POWER,
+			    sizeof(req_txp_cp), &req_txp_cp);
+
+		err = hci_req_run(&req, conn_info_refresh_complete);
+		if (err < 0)
+			goto unlock;
+
+		cmd = mgmt_pending_add(sk, MGMT_OP_GET_CONN_INFO, hdev,
+				       data, len);
+		if (!cmd) {
+			err = -ENOMEM;
+			goto unlock;
+		}
+
+		hci_conn_hold(conn);
+		cmd->user_data = conn;
+
+		conn->conn_info_timestamp = jiffies;
+	} else {
+		/* Cache is valid, just reply with values cached in hci_conn */
+		rp.rssi = conn->rssi;
+		rp.tx_power = conn->tx_power;
+		rp.max_tx_power = HCI_TX_POWER_INVALID;
+
+		err = cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO,
+				   MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
+	}
+
+unlock:
+	hci_dev_unlock(hdev);
+	return err;
+}
+
 static const struct mgmt_handler {
 	int (*func) (struct sock *sk, struct hci_dev *hdev, void *data,
 		     u16 data_len);
@@ -4612,6 +4807,7 @@ static const struct mgmt_handler {
 	{ set_debug_keys,         false, MGMT_SETTING_SIZE },
 	{ set_privacy,            false, MGMT_SET_PRIVACY_SIZE },
 	{ load_irks,              true,  MGMT_LOAD_IRKS_SIZE },
+	{ get_conn_info,          false, MGMT_GET_CONN_INFO_SIZE },
 };
 
 
-- 
cgit v1.2.3-71-gd317


From d0455ed996df84fd2670a655fe13ab72f8264765 Mon Sep 17 00:00:00 2001
From: Andrzej Kaczmarek <andrzej.kaczmarek@tieto.com>
Date: Wed, 14 May 2014 13:43:05 +0200
Subject: Bluetooth: Store max TX power level for connection

This patch adds support to store local maximum TX power level for
connection when reply for HCI_Read_Transmit_Power_Level is received.

Signed-off-by: Andrzej Kaczmarek <andrzej.kaczmarek@tieto.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_conn.c         |  1 +
 net/bluetooth/hci_event.c        | 12 +++++++++++-
 3 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index cbbab6327621..b386bf17e6c2 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -382,6 +382,7 @@ struct hci_conn {
 	__u16		le_conn_max_interval;
 	__s8		rssi;
 	__s8		tx_power;
+	__s8		max_tx_power;
 	unsigned long	flags;
 
 	unsigned long	conn_info_timestamp;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 74b368bfe102..a987e7def025 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -408,6 +408,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 	conn->remote_auth = 0xff;
 	conn->key_type = 0xff;
 	conn->tx_power = HCI_TX_POWER_INVALID;
+	conn->max_tx_power = HCI_TX_POWER_INVALID;
 
 	set_bit(HCI_CONN_POWER_SAVE, &conn->flags);
 	conn->disc_timeout = HCI_DISCONN_TIMEOUT;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index fa614e3430a7..492d8d5071c7 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1282,9 +1282,19 @@ static void hci_cc_read_tx_power(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle));
-	if (conn && sent->type == 0x00)
+	if (!conn)
+		goto unlock;
+
+	switch (sent->type) {
+	case 0x00:
 		conn->tx_power = rp->tx_power;
+		break;
+	case 0x01:
+		conn->max_tx_power = rp->tx_power;
+		break;
+	}
 
+unlock:
 	hci_dev_unlock(hdev);
 }
 
-- 
cgit v1.2.3-71-gd317


From 3b514d24e200fcdcde0a57c354a51d3677a86743 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:47 -0400
Subject: cgroup: skip refcnting on normal root csses and cgrp_dfl_root self
 css

9395a4500404 ("cgroup: enable refcnting for root csses") enabled
reference counting for root csses (cgroup_subsys_states) so that
cgroup's self csses can be used to manage the lifetime of the
containing cgroups.

Unfortunately, this change was incorrect.  During early init,
cgrp_dfl_root self css refcnt is used.  percpu_ref can't initialized
during early init and its initialization is deferred till
cgroup_init() time.  This means that cpu was using percpu_ref which
wasn't properly initialized.  Due to the way percpu variables are laid
out on x86, this didn't blow up immediately on x86 but ended up
incrementing and decrementing the percpu variable at offset zero,
whatever it may be; however, on other archs, this caused fault and
early boot failure.

As cgroup self csses for root cgroups of non-dfl hierarchies need
working refcounting, we can't revert 9395a4500404.  This patch adds
CSS_NO_REF which explicitly inhibits reference counting on the css and
sets it on all normal (non-self) csses and cgroup_dfl_root self css.

v2: cgrp_dfl_root.self is the offending one.  Set the flag on it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Stephen Warren <swarren@nvidia.com>
Tested-by: Stephen Warren <swarren@nvidia.com>
Fixes: 9395a4500404 ("cgroup: enable refcnting for root csses")
---
 include/linux/cgroup.h | 11 ++++++++---
 kernel/cgroup.c        | 11 +++++++++--
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 76dadd77a120..1737db0c63fe 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -77,6 +77,7 @@ struct cgroup_subsys_state {
 
 /* bits in struct cgroup_subsys_state flags field */
 enum {
+	CSS_NO_REF	= (1 << 0), /* no reference counting for this css */
 	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
 };
 
@@ -88,7 +89,8 @@ enum {
  */
 static inline void css_get(struct cgroup_subsys_state *css)
 {
-	percpu_ref_get(&css->refcnt);
+	if (!(css->flags & CSS_NO_REF))
+		percpu_ref_get(&css->refcnt);
 }
 
 /**
@@ -103,7 +105,9 @@ static inline void css_get(struct cgroup_subsys_state *css)
  */
 static inline bool css_tryget_online(struct cgroup_subsys_state *css)
 {
-	return percpu_ref_tryget_live(&css->refcnt);
+	if (!(css->flags & CSS_NO_REF))
+		return percpu_ref_tryget_live(&css->refcnt);
+	return true;
 }
 
 /**
@@ -114,7 +118,8 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css)
  */
 static inline void css_put(struct cgroup_subsys_state *css)
 {
-	percpu_ref_put(&css->refcnt);
+	if (!(css->flags & CSS_NO_REF))
+		percpu_ref_put(&css->refcnt);
 }
 
 /* bits in struct cgroup flags field */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c01e8e8dfad0..0343d7ee6d62 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4593,11 +4593,17 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
 	/* We don't handle early failures gracefully */
 	BUG_ON(IS_ERR(css));
 	init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
+
+	/*
+	 * Root csses are never destroyed and we can't initialize
+	 * percpu_ref during early init.  Disable refcnting.
+	 */
+	css->flags |= CSS_NO_REF;
+
 	if (early) {
 		/* allocation can't be done safely during early init */
 		css->id = 1;
 	} else {
-		BUG_ON(percpu_ref_init(&css->refcnt, css_release));
 		css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
 		BUG_ON(css->id < 0);
 	}
@@ -4636,6 +4642,8 @@ int __init cgroup_init_early(void)
 	int i;
 
 	init_cgroup_root(&cgrp_dfl_root, &opts);
+	cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
+
 	RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
 
 	for_each_subsys(ss, i) {
@@ -4684,7 +4692,6 @@ int __init cgroup_init(void)
 			struct cgroup_subsys_state *css =
 				init_css_set.subsys[ss->id];
 
-			BUG_ON(percpu_ref_init(&css->refcnt, css_release));
 			css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
 						   GFP_KERNEL);
 			BUG_ON(css->id < 0);
-- 
cgit v1.2.3-71-gd317


From 5c9d535b893f30266ea29fe377cb9b002fcd76aa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:48 -0400
Subject: cgroup: remove css_parent()

cgroup in general is moving towards using cgroup_subsys_state as the
fundamental structural component and css_parent() was introduced to
convert from using cgroup->parent to css->parent.  It was quite some
time ago and we're moving forward with making css more prominent.

This patch drops the trivial wrapper css_parent() and let the users
dereference css->parent.  While at it, explicitly mark fields of css
which are public and immutable.

v2: New usage from device_cgroup.c converted.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: "David S. Miller" <davem@davemloft.net>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 block/blk-cgroup.h           |  2 +-
 include/linux/cgroup.h       | 29 +++++++++++------------------
 kernel/cgroup.c              |  8 ++++----
 kernel/cgroup_freezer.c      |  2 +-
 kernel/cpuset.c              |  2 +-
 kernel/sched/core.c          |  2 +-
 kernel/sched/cpuacct.c       |  2 +-
 mm/hugetlb_cgroup.c          |  2 +-
 mm/memcontrol.c              | 14 +++++++-------
 net/core/netclassid_cgroup.c |  2 +-
 net/core/netprio_cgroup.c    |  2 +-
 security/device_cgroup.c     |  8 ++++----
 12 files changed, 34 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 371fe8e92ab5..d692b29c083a 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -204,7 +204,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
  */
 static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
 {
-	return css_to_blkcg(css_parent(&blkcg->css));
+	return css_to_blkcg(blkcg->css.parent);
 }
 
 /**
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1737db0c63fe..2549493d518d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -48,22 +48,28 @@ enum cgroup_subsys_id {
 };
 #undef SUBSYS
 
-/* Per-subsystem/per-cgroup state maintained by the system. */
+/*
+ * Per-subsystem/per-cgroup state maintained by the system.  This is the
+ * fundamental structural building block that controllers deal with.
+ *
+ * Fields marked with "PI:" are public and immutable and may be accessed
+ * directly without synchronization.
+ */
 struct cgroup_subsys_state {
-	/* the cgroup that this css is attached to */
+	/* PI: the cgroup that this css is attached to */
 	struct cgroup *cgroup;
 
-	/* the cgroup subsystem that this css is attached to */
+	/* PI: the cgroup subsystem that this css is attached to */
 	struct cgroup_subsys *ss;
 
 	/* reference count - access via css_[try]get() and css_put() */
 	struct percpu_ref refcnt;
 
-	/* the parent css */
+	/* PI: the parent css */
 	struct cgroup_subsys_state *parent;
 
 	/*
-	 * Subsys-unique ID.  0 is unused and root is always 1.  The
+	 * PI: Subsys-unique ID.  0 is unused and root is always 1.  The
 	 * matching css can be looked up using css_from_id().
 	 */
 	int id;
@@ -669,19 +675,6 @@ struct cgroup_subsys {
 #include <linux/cgroup_subsys.h>
 #undef SUBSYS
 
-/**
- * css_parent - find the parent css
- * @css: the target cgroup_subsys_state
- *
- * Return the parent css of @css.  This function is guaranteed to return
- * non-NULL parent as long as @css isn't the root.
- */
-static inline
-struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css)
-{
-	return css->parent;
-}
-
 /**
  * task_css_set_check - obtain a task's css_set with extra access conditions
  * @task: the task to obtain css_set for
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0343d7ee6d62..929bbbc539e9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3176,10 +3176,10 @@ css_next_descendant_pre(struct cgroup_subsys_state *pos,
 
 	/* no child, visit my or the closest ancestor's next sibling */
 	while (pos != root) {
-		next = css_next_child(pos, css_parent(pos));
+		next = css_next_child(pos, pos->parent);
 		if (next)
 			return next;
-		pos = css_parent(pos);
+		pos = pos->parent;
 	}
 
 	return NULL;
@@ -3261,12 +3261,12 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
 		return NULL;
 
 	/* if there's an unvisited sibling, visit its leftmost descendant */
-	next = css_next_child(pos, css_parent(pos));
+	next = css_next_child(pos, pos->parent);
 	if (next)
 		return css_leftmost_descendant(next);
 
 	/* no sibling left, visit parent */
-	return css_parent(pos);
+	return pos->parent;
 }
 
 static bool cgroup_has_live_children(struct cgroup *cgrp)
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 6b4e60e33a9a..a79e40f9d700 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -59,7 +59,7 @@ static inline struct freezer *task_freezer(struct task_struct *task)
 
 static struct freezer *parent_freezer(struct freezer *freezer)
 {
-	return css_freezer(css_parent(&freezer->css));
+	return css_freezer(freezer->css.parent);
 }
 
 bool cgroup_freezing(struct task_struct *task)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2f4b08b8db24..5b2a31082f4f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -124,7 +124,7 @@ static inline struct cpuset *task_cs(struct task_struct *task)
 
 static inline struct cpuset *parent_cs(struct cpuset *cs)
 {
-	return css_cs(css_parent(&cs->css));
+	return css_cs(cs->css.parent);
 }
 
 #ifdef CONFIG_NUMA
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 268a45ea238c..ac61ad1a5f9f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7586,7 +7586,7 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
 {
 	struct task_group *tg = css_tg(css);
-	struct task_group *parent = css_tg(css_parent(css));
+	struct task_group *parent = css_tg(css->parent);
 
 	if (parent)
 		sched_online_group(tg, parent);
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index c143ee380e3a..9cf350c94ec4 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -46,7 +46,7 @@ static inline struct cpuacct *task_ca(struct task_struct *tsk)
 
 static inline struct cpuacct *parent_ca(struct cpuacct *ca)
 {
-	return css_ca(css_parent(&ca->css));
+	return css_ca(ca->css.parent);
 }
 
 static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index a380681ab3cf..493f758445e7 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -52,7 +52,7 @@ static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
 static inline struct hugetlb_cgroup *
 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
 {
-	return hugetlb_cgroup_from_css(css_parent(&h_cg->css));
+	return hugetlb_cgroup_from_css(h_cg->css.parent);
 }
 
 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b638a79209ee..a5e0417b4f9a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1540,7 +1540,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
 int mem_cgroup_swappiness(struct mem_cgroup *memcg)
 {
 	/* root ? */
-	if (!css_parent(&memcg->css))
+	if (!memcg->css.parent)
 		return vm_swappiness;
 
 	return memcg->swappiness;
@@ -4909,7 +4909,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
 {
 	int retval = 0;
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	struct mem_cgroup *parent_memcg = mem_cgroup_from_css(css_parent(&memcg->css));
+	struct mem_cgroup *parent_memcg = mem_cgroup_from_css(memcg->css.parent);
 
 	mutex_lock(&memcg_create_mutex);
 
@@ -5207,8 +5207,8 @@ static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
 	if (!memcg->use_hierarchy)
 		goto out;
 
-	while (css_parent(&memcg->css)) {
-		memcg = mem_cgroup_from_css(css_parent(&memcg->css));
+	while (memcg->css.parent) {
+		memcg = mem_cgroup_from_css(memcg->css.parent);
 		if (!memcg->use_hierarchy)
 			break;
 		tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
@@ -5443,7 +5443,7 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
 				       struct cftype *cft, u64 val)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
+	struct mem_cgroup *parent = mem_cgroup_from_css(memcg->css.parent);
 
 	if (val > 100 || !parent)
 		return -EINVAL;
@@ -5790,7 +5790,7 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css,
 	struct cftype *cft, u64 val)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
+	struct mem_cgroup *parent = mem_cgroup_from_css(memcg->css.parent);
 
 	/* cannot set to root cgroup and only 0 and 1 are allowed */
 	if (!parent || !((val == 0) || (val == 1)))
@@ -6407,7 +6407,7 @@ static int
 mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
+	struct mem_cgroup *parent = mem_cgroup_from_css(css->parent);
 
 	if (css->id > MEM_CGROUP_ID_MAX)
 		return -ENOSPC;
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 22931e1b99b4..30d903b19c62 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -42,7 +42,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
 static int cgrp_css_online(struct cgroup_subsys_state *css)
 {
 	struct cgroup_cls_state *cs = css_cls_state(css);
-	struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
+	struct cgroup_cls_state *parent = css_cls_state(css->parent);
 
 	if (parent)
 		cs->classid = parent->classid;
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index b990cefd906b..2f385b9bccc0 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -140,7 +140,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
 
 static int cgrp_css_online(struct cgroup_subsys_state *css)
 {
-	struct cgroup_subsys_state *parent_css = css_parent(css);
+	struct cgroup_subsys_state *parent_css = css->parent;
 	struct net_device *dev;
 	int ret = 0;
 
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 7dbac4061b1c..ce14a31b1337 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -182,7 +182,7 @@ static inline bool is_devcg_online(const struct dev_cgroup *devcg)
 static int devcgroup_online(struct cgroup_subsys_state *css)
 {
 	struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
-	struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css_parent(css));
+	struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css->parent);
 	int ret = 0;
 
 	mutex_lock(&devcgroup_mutex);
@@ -455,7 +455,7 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
 static int parent_has_perm(struct dev_cgroup *childcg,
 				  struct dev_exception_item *ex)
 {
-	struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css));
+	struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);
 
 	if (!parent)
 		return 1;
@@ -476,7 +476,7 @@ static int parent_has_perm(struct dev_cgroup *childcg,
 static bool parent_allows_removal(struct dev_cgroup *childcg,
 				  struct dev_exception_item *ex)
 {
-	struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css));
+	struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);
 
 	if (!parent)
 		return true;
@@ -614,7 +614,7 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 	char temp[12];		/* 11 + 1 characters needed for a u32 */
 	int count, rc = 0;
 	struct dev_exception_item ex;
-	struct dev_cgroup *parent = css_to_devcgroup(css_parent(&devcgroup->css));
+	struct dev_cgroup *parent = css_to_devcgroup(devcgroup->css.parent);
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-- 
cgit v1.2.3-71-gd317


From d51f39b05ce0008118c45945e681b20484990571 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:48 -0400
Subject: cgroup: remove cgroup->parent

cgroup->parent is redundant as cgroup->self.parent can also be used to
determine the parent cgroup and we're moving towards using
cgroup_subsys_states as the fundamental structural blocks.  This patch
introduces cgroup_parent() which follows cgroup->self.parent and
removes cgroup->parent.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  1 -
 kernel/cgroup.c        | 52 +++++++++++++++++++++++++++++---------------------
 2 files changed, 30 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2549493d518d..fd538f4c2bb6 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -178,7 +178,6 @@ struct cgroup {
 	struct list_head sibling;	/* my parent's children */
 	struct list_head children;	/* my children */
 
-	struct cgroup *parent;		/* my parent */
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
 	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 929bbbc539e9..8c67a739aea4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -218,6 +218,15 @@ static void cgroup_idr_remove(struct idr *idr, int id)
 	spin_unlock_bh(&cgroup_idr_lock);
 }
 
+static struct cgroup *cgroup_parent(struct cgroup *cgrp)
+{
+	struct cgroup_subsys_state *parent_css = cgrp->self.parent;
+
+	if (parent_css)
+		return container_of(parent_css, struct cgroup, self);
+	return NULL;
+}
+
 /**
  * cgroup_css - obtain a cgroup's css for the specified subsystem
  * @cgrp: the cgroup of interest
@@ -260,9 +269,9 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
 	if (!(cgrp->root->subsys_mask & (1 << ss->id)))
 		return NULL;
 
-	while (cgrp->parent &&
-	       !(cgrp->parent->child_subsys_mask & (1 << ss->id)))
-		cgrp = cgrp->parent;
+	while (cgroup_parent(cgrp) &&
+	       !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id)))
+		cgrp = cgroup_parent(cgrp);
 
 	return cgroup_css(cgrp, ss);
 }
@@ -307,7 +316,7 @@ bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor)
 	while (cgrp) {
 		if (cgrp == ancestor)
 			return true;
-		cgrp = cgrp->parent;
+		cgrp = cgroup_parent(cgrp);
 	}
 	return false;
 }
@@ -454,7 +463,7 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
 
 		if (cgrp->populated_kn)
 			kernfs_notify(cgrp->populated_kn);
-		cgrp = cgrp->parent;
+		cgrp = cgroup_parent(cgrp);
 	} while (cgrp);
 }
 
@@ -2018,7 +2027,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
 	 * Except for the root, child_subsys_mask must be zero for a cgroup
 	 * with tasks so that child cgroups don't compete against tasks.
 	 */
-	if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && dst_cgrp->parent &&
+	if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && cgroup_parent(dst_cgrp) &&
 	    dst_cgrp->child_subsys_mask)
 		return -EBUSY;
 
@@ -2427,7 +2436,7 @@ static int cgroup_controllers_show(struct seq_file *seq, void *v)
 {
 	struct cgroup *cgrp = seq_css(seq)->cgroup;
 
-	cgroup_print_ss_mask(seq, cgrp->parent->child_subsys_mask);
+	cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->child_subsys_mask);
 	return 0;
 }
 
@@ -2610,8 +2619,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 
 			/* unavailable or not enabled on the parent? */
 			if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
-			    (cgrp->parent &&
-			     !(cgrp->parent->child_subsys_mask & (1 << ssid)))) {
+			    (cgroup_parent(cgrp) &&
+			     !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ssid)))) {
 				ret = -ENOENT;
 				goto out_unlock;
 			}
@@ -2640,7 +2649,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 	 * Except for the root, child_subsys_mask must be zero for a cgroup
 	 * with tasks so that child cgroups don't compete against tasks.
 	 */
-	if (enable && cgrp->parent && !list_empty(&cgrp->cset_links)) {
+	if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -2898,9 +2907,9 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
 			continue;
 		if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
 			continue;
-		if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
+		if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
 			continue;
-		if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
+		if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
 			continue;
 
 		if (is_add) {
@@ -4092,14 +4101,14 @@ static void css_free_work_fn(struct work_struct *work)
 		atomic_dec(&cgrp->root->nr_cgrps);
 		cgroup_pidlist_destroy_all(cgrp);
 
-		if (cgrp->parent) {
+		if (cgroup_parent(cgrp)) {
 			/*
 			 * We get a ref to the parent, and put the ref when
 			 * this cgroup is being freed, so it's guaranteed
 			 * that the parent won't be destroyed before its
 			 * children.
 			 */
-			cgroup_put(cgrp->parent);
+			cgroup_put(cgroup_parent(cgrp));
 			kernfs_put(cgrp->kn);
 			kfree(cgrp);
 		} else {
@@ -4163,8 +4172,8 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 	css->ss = ss;
 	css->flags = 0;
 
-	if (cgrp->parent) {
-		css->parent = cgroup_css(cgrp->parent, ss);
+	if (cgroup_parent(cgrp)) {
+		css->parent = cgroup_css(cgroup_parent(cgrp), ss);
 		css_get(css->parent);
 	}
 
@@ -4218,7 +4227,7 @@ static void offline_css(struct cgroup_subsys_state *css)
  */
 static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
 {
-	struct cgroup *parent = cgrp->parent;
+	struct cgroup *parent = cgroup_parent(cgrp);
 	struct cgroup_subsys_state *css;
 	int err;
 
@@ -4251,7 +4260,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
 		goto err_clear_dir;
 
 	if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
-	    parent->parent) {
+	    cgroup_parent(parent)) {
 		pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
 			current->comm, current->pid, ss->name);
 		if (!strcmp(ss->name, "memory"))
@@ -4309,7 +4318,6 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 
 	init_cgroup_housekeeping(cgrp);
 
-	cgrp->parent = parent;
 	cgrp->self.parent = &parent->self;
 	cgrp->root = root;
 
@@ -4336,7 +4344,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	cgrp->serial_nr = cgroup_serial_nr_next++;
 
 	/* allocation complete, commit to creation */
-	list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
+	list_add_tail_rcu(&cgrp->sibling, &cgroup_parent(cgrp)->children);
 	atomic_inc(&root->nr_cgrps);
 	cgroup_get(parent);
 
@@ -4531,8 +4539,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 */
 	kernfs_remove(cgrp->kn);
 
-	set_bit(CGRP_RELEASABLE, &cgrp->parent->flags);
-	check_for_release(cgrp->parent);
+	set_bit(CGRP_RELEASABLE, &cgroup_parent(cgrp)->flags);
+	check_for_release(cgroup_parent(cgrp));
 
 	/* put the base reference */
 	percpu_ref_kill(&cgrp->self.refcnt);
-- 
cgit v1.2.3-71-gd317


From d5c419b68e368fdd9f1857bf8d4bb4480edb9b80 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:48 -0400
Subject: cgroup: move cgroup->sibling and ->children into cgroup_subsys_state

We're moving towards using cgroup_subsys_states as the fundamental
structural blocks.  Let's move cgroup->sibling and ->children into
cgroup_subsys_state.  This is pure move without functional change and
only cgroup->self's fields are actually used.  Other csses will make
use of the fields later.

While at it, update init_and_link_css() so that it zeroes the whole
css before initializing it and remove explicit zeroing of ->flags.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 11 ++++-------
 kernel/cgroup.c        | 38 ++++++++++++++++++++------------------
 2 files changed, 24 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index fd538f4c2bb6..cf8ba26b7c6e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -68,6 +68,10 @@ struct cgroup_subsys_state {
 	/* PI: the parent css */
 	struct cgroup_subsys_state *parent;
 
+	/* siblings list anchored at the parent's ->children */
+	struct list_head sibling;
+	struct list_head children;
+
 	/*
 	 * PI: Subsys-unique ID.  0 is unused and root is always 1.  The
 	 * matching css can be looked up using css_from_id().
@@ -171,13 +175,6 @@ struct cgroup {
 	 */
 	int populated_cnt;
 
-	/*
-	 * We link our 'sibling' struct into our parent's 'children'.
-	 * Our children link their 'sibling' into our 'children'.
-	 */
-	struct list_head sibling;	/* my parent's children */
-	struct list_head children;	/* my children */
-
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
 	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8c67a739aea4..5385839e727b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -378,7 +378,7 @@ static int notify_on_release(const struct cgroup *cgrp)
 
 /* iterate over child cgrps, lock should be held throughout iteration */
 #define cgroup_for_each_live_child(child, cgrp)				\
-	list_for_each_entry((child), &(cgrp)->children, sibling)	\
+	list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
 		if (({ lockdep_assert_held(&cgroup_mutex);		\
 		       cgroup_is_dead(child); }))			\
 			;						\
@@ -870,7 +870,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 	mutex_lock(&cgroup_mutex);
 
 	BUG_ON(atomic_read(&root->nr_cgrps));
-	BUG_ON(!list_empty(&cgrp->children));
+	BUG_ON(!list_empty(&cgrp->self.children));
 
 	/* Rebind all subsystems back to the default hierarchy */
 	rebind_subsystems(&cgrp_dfl_root, root->subsys_mask);
@@ -1432,7 +1432,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
 	}
 
 	/* remounting is not allowed for populated hierarchies */
-	if (!list_empty(&root->cgrp.children)) {
+	if (!list_empty(&root->cgrp.self.children)) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -1512,8 +1512,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	struct cgroup_subsys *ss;
 	int ssid;
 
-	INIT_LIST_HEAD(&cgrp->sibling);
-	INIT_LIST_HEAD(&cgrp->children);
+	INIT_LIST_HEAD(&cgrp->self.sibling);
+	INIT_LIST_HEAD(&cgrp->self.children);
 	INIT_LIST_HEAD(&cgrp->cset_links);
 	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
@@ -1612,7 +1612,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 		link_css_set(&tmp_links, cset, root_cgrp);
 	up_write(&css_set_rwsem);
 
-	BUG_ON(!list_empty(&root_cgrp->children));
+	BUG_ON(!list_empty(&root_cgrp->self.children));
 	BUG_ON(atomic_read(&root->nr_cgrps) != 1);
 
 	kernfs_activate(root_cgrp->kn);
@@ -3128,11 +3128,11 @@ css_next_child(struct cgroup_subsys_state *pos_css,
 	 * cgroup is removed or iteration and removal race.
 	 */
 	if (!pos) {
-		next = list_entry_rcu(cgrp->children.next, struct cgroup, sibling);
+		next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling);
 	} else if (likely(!cgroup_is_dead(pos))) {
-		next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
+		next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling);
 	} else {
-		list_for_each_entry_rcu(next, &cgrp->children, sibling)
+		list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling)
 			if (next->serial_nr > pos->serial_nr)
 				break;
 	}
@@ -3142,12 +3142,12 @@ css_next_child(struct cgroup_subsys_state *pos_css,
 	 * the next sibling; however, it might have @ss disabled.  If so,
 	 * fast-forward to the next enabled one.
 	 */
-	while (&next->sibling != &cgrp->children) {
+	while (&next->self.sibling != &cgrp->self.children) {
 		struct cgroup_subsys_state *next_css = cgroup_css(next, parent_css->ss);
 
 		if (next_css)
 			return next_css;
-		next = list_entry_rcu(next->sibling.next, struct cgroup, sibling);
+		next = list_entry_rcu(next->self.sibling.next, struct cgroup, self.sibling);
 	}
 	return NULL;
 }
@@ -3283,7 +3283,7 @@ static bool cgroup_has_live_children(struct cgroup *cgrp)
 	struct cgroup *child;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(child, &cgrp->children, sibling) {
+	list_for_each_entry_rcu(child, &cgrp->self.children, self.sibling) {
 		if (!cgroup_is_dead(child)) {
 			rcu_read_unlock();
 			return true;
@@ -4144,7 +4144,7 @@ static void css_release_work_fn(struct work_struct *work)
 	} else {
 		/* cgroup release path */
 		mutex_lock(&cgroup_mutex);
-		list_del_rcu(&cgrp->sibling);
+		list_del_rcu(&cgrp->self.sibling);
 		mutex_unlock(&cgroup_mutex);
 
 		cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
@@ -4168,9 +4168,11 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 {
 	cgroup_get(cgrp);
 
+	memset(css, 0, sizeof(*css));
 	css->cgroup = cgrp;
 	css->ss = ss;
-	css->flags = 0;
+	INIT_LIST_HEAD(&css->sibling);
+	INIT_LIST_HEAD(&css->children);
 
 	if (cgroup_parent(cgrp)) {
 		css->parent = cgroup_css(cgroup_parent(cgrp), ss);
@@ -4344,7 +4346,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	cgrp->serial_nr = cgroup_serial_nr_next++;
 
 	/* allocation complete, commit to creation */
-	list_add_tail_rcu(&cgrp->sibling, &cgroup_parent(cgrp)->children);
+	list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
 	atomic_inc(&root->nr_cgrps);
 	cgroup_get(parent);
 
@@ -4507,9 +4509,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 		return -EBUSY;
 
 	/*
-	 * Make sure there's no live children.  We can't test ->children
-	 * emptiness as dead children linger on it while being destroyed;
-	 * otherwise, "rmdir parent/child parent" may fail with -EBUSY.
+	 * Make sure there's no live children.  We can't test emptiness of
+	 * ->self.children as dead children linger on it while being
+	 * drained; otherwise, "rmdir parent/child parent" may fail.
 	 */
 	if (cgroup_has_live_children(cgrp))
 		return -EBUSY;
-- 
cgit v1.2.3-71-gd317


From 0cb51d71c1fa9234afe4213089844be76ec1765a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:49 -0400
Subject: cgroup: move cgroup->serial_nr into cgroup_subsys_state

We're moving towards using cgroup_subsys_states as the fundamental
structural blocks.  All csses including the cgroup->self and actual
ones now form trees through css->children and ->sibling which follow
the same rules as what cgroup->children and ->sibling followed.  This
patch moves cgroup->serial_nr which is used to implement css iteration
into css.

Note that all csses, regardless of their types, allocate their serial
numbers from the same monotonically increasing counter.  This doesn't
affect the ordering needed by css iteration or cause any other
material behavior changes.  This will be used to update css iteration.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 16 ++++++++--------
 kernel/cgroup.c        | 20 +++++++++++---------
 2 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index cf8ba26b7c6e..ebe7ce49f4b7 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -80,6 +80,14 @@ struct cgroup_subsys_state {
 
 	unsigned int flags;
 
+	/*
+	 * Monotonically increasing unique serial number which defines a
+	 * uniform order among all csses.  It's guaranteed that all
+	 * ->children lists are in the ascending order of ->serial_nr and
+	 * used to allow interrupting and resuming iterations.
+	 */
+	u64 serial_nr;
+
 	/* percpu_ref killing and RCU release */
 	struct rcu_head rcu_head;
 	struct work_struct destroy_work;
@@ -178,14 +186,6 @@ struct cgroup {
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
 	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
 
-	/*
-	 * Monotonically increasing unique serial number which defines a
-	 * uniform order among all cgroups.  It's guaranteed that all
-	 * ->children lists are in the ascending order of ->serial_nr.
-	 * It's used to allow interrupting and resuming iterations.
-	 */
-	u64 serial_nr;
-
 	/* the bitmask of subsystems enabled on the child cgroups */
 	unsigned int child_subsys_mask;
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index dcb06e181ce4..d5af128ec1ec 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -157,14 +157,13 @@ static int cgroup_root_count;
 static DEFINE_IDR(cgroup_hierarchy_idr);
 
 /*
- * Assign a monotonically increasing serial number to cgroups.  It
- * guarantees cgroups with bigger numbers are newer than those with smaller
- * numbers.  Also, as cgroups are always appended to the parent's
- * ->children list, it guarantees that sibling cgroups are always sorted in
- * the ascending serial number order on the list.  Protected by
- * cgroup_mutex.
+ * Assign a monotonically increasing serial number to csses.  It guarantees
+ * cgroups with bigger numbers are newer than those with smaller numbers.
+ * Also, as csses are always appended to the parent's ->children list, it
+ * guarantees that sibling csses are always sorted in the ascending serial
+ * number order on the list.  Protected by cgroup_mutex.
  */
-static u64 cgroup_serial_nr_next = 1;
+static u64 css_serial_nr_next = 1;
 
 /* This flag indicates whether tasks in the fork and exit paths should
  * check for fork/exit handlers to call. This avoids us having to do
@@ -3133,7 +3132,7 @@ css_next_child(struct cgroup_subsys_state *pos_css,
 		next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling);
 	} else {
 		list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling)
-			if (next->serial_nr > pos->serial_nr)
+			if (next->self.serial_nr > pos->self.serial_nr)
 				break;
 	}
 
@@ -4168,6 +4167,8 @@ static void css_release(struct percpu_ref *ref)
 static void init_and_link_css(struct cgroup_subsys_state *css,
 			      struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
+	lockdep_assert_held(&cgroup_mutex);
+
 	cgroup_get(cgrp);
 
 	memset(css, 0, sizeof(*css));
@@ -4175,6 +4176,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 	css->ss = ss;
 	INIT_LIST_HEAD(&css->sibling);
 	INIT_LIST_HEAD(&css->children);
+	css->serial_nr = css_serial_nr_next++;
 
 	if (cgroup_parent(cgrp)) {
 		css->parent = cgroup_css(cgroup_parent(cgrp), ss);
@@ -4348,7 +4350,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	 */
 	kernfs_get(kn);
 
-	cgrp->serial_nr = cgroup_serial_nr_next++;
+	cgrp->self.serial_nr = css_serial_nr_next++;
 
 	/* allocation complete, commit to creation */
 	list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
-- 
cgit v1.2.3-71-gd317


From de3f034182ecbf0efbcef7ab8b253c6c3049a592 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:49 -0400
Subject: cgroup: introduce CSS_RELEASED and reduce css iteration fallback
 window

css iterations allow the caller to drop RCU read lock.  As long as the
caller keeps the current position accessible, it can simply re-grab
RCU read lock later and continue iteration.  This is achieved by using
CGRP_DEAD to detect whether the current positions next pointer is safe
to dereference and if not re-iterate from the beginning to the next
position using ->serial_nr.

CGRP_DEAD is used as the marker to invalidate the next pointer and the
only requirement is that the marker is set before the next sibling
starts its RCU grace period.  Because CGRP_DEAD is set at the end of
cgroup_destroy_locked() but the cgroup is unlinked when the reference
count reaches zero, we currently have a rather large window where this
fallback re-iteration logic can be triggered.

This patch introduces CSS_RELEASED which is set when a css is unlinked
from its sibling list.  This still keeps the re-iteration logic
working while drastically reducing the window of its activation.
While at it, rewrite the comment in css_next_child() to reflect the
new flag and better explain the synchronization.

This will also enable iterating csses directly instead of through
cgroups.

v2: CSS_RELEASED now assigned to 1 << 2 as 1 << 0 is used by
    CSS_NO_REF.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  1 +
 kernel/cgroup.c        | 41 ++++++++++++++++++++---------------------
 2 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ebe7ce49f4b7..5375582ea5f6 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -97,6 +97,7 @@ struct cgroup_subsys_state {
 enum {
 	CSS_NO_REF	= (1 << 0), /* no reference counting for this css */
 	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
+	CSS_RELEASED	= (1 << 2), /* refcnt reached zero, released */
 };
 
 /**
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d5af128ec1ec..5544e685f2da 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3108,27 +3108,28 @@ css_next_child(struct cgroup_subsys_state *pos_css,
 	cgroup_assert_mutex_or_rcu_locked();
 
 	/*
-	 * @pos could already have been removed.  Once a cgroup is removed,
-	 * its ->sibling.next is no longer updated when its next sibling
-	 * changes.  As CGRP_DEAD assertion is serialized and happens
-	 * before the cgroup is taken off the ->sibling list, if we see it
-	 * unasserted, it's guaranteed that the next sibling hasn't
-	 * finished its grace period even if it's already removed, and thus
-	 * safe to dereference from this RCU critical section.  If
-	 * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed
-	 * to be visible as %true here.
+	 * @pos could already have been unlinked from the sibling list.
+	 * Once a cgroup is removed, its ->sibling.next is no longer
+	 * updated when its next sibling changes.  CSS_RELEASED is set when
+	 * @pos is taken off list, at which time its next pointer is valid,
+	 * and, as releases are serialized, the one pointed to by the next
+	 * pointer is guaranteed to not have started release yet.  This
+	 * implies that if we observe !CSS_RELEASED on @pos in this RCU
+	 * critical section, the one pointed to by its next pointer is
+	 * guaranteed to not have finished its RCU grace period even if we
+	 * have dropped rcu_read_lock() inbetween iterations.
 	 *
-	 * If @pos is dead, its next pointer can't be dereferenced;
-	 * however, as each cgroup is given a monotonically increasing
-	 * unique serial number and always appended to the sibling list,
-	 * the next one can be found by walking the parent's children until
-	 * we see a cgroup with higher serial number than @pos's.  While
-	 * this path can be slower, it's taken only when either the current
-	 * cgroup is removed or iteration and removal race.
+	 * If @pos has CSS_RELEASED set, its next pointer can't be
+	 * dereferenced; however, as each css is given a monotonically
+	 * increasing unique serial number and always appended to the
+	 * sibling list, the next one can be found by walking the parent's
+	 * children until the first css with higher serial number than
+	 * @pos's.  While this path can be slower, it happens iff iteration
+	 * races against release and the race window is very small.
 	 */
 	if (!pos) {
 		next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling);
-	} else if (likely(!cgroup_is_dead(pos))) {
+	} else if (likely(!(pos->self.flags & CSS_RELEASED))) {
 		next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling);
 	} else {
 		list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling)
@@ -4139,6 +4140,7 @@ static void css_release_work_fn(struct work_struct *work)
 
 	mutex_lock(&cgroup_mutex);
 
+	css->flags |= CSS_RELEASED;
 	list_del_rcu(&css->sibling);
 
 	if (ss) {
@@ -4525,10 +4527,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 
 	/*
 	 * Mark @cgrp dead.  This prevents further task migration and child
-	 * creation by disabling cgroup_lock_live_group().  Note that
-	 * CGRP_DEAD assertion is depended upon by css_next_child() to
-	 * resume iteration after dropping RCU read lock.  See
-	 * css_next_child() for details.
+	 * creation by disabling cgroup_lock_live_group().
 	 */
 	set_bit(CGRP_DEAD, &cgrp->flags);
 
-- 
cgit v1.2.3-71-gd317


From c2931b70a32c705b9bd5762f5044f9eac8a52bb3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:51 -0400
Subject: cgroup: iterate cgroup_subsys_states directly

Currently, css_next_child() is implemented as finding the next child
cgroup which has the css enabled, which used to be the only way to do
it as only cgroups participated in sibling lists and thus could be
iteratd.  This works as long as what's required during iteration is
not missing online csses; however, it turns out that there are use
cases where offlined but not yet released csses need to be iterated.
This is difficult to implement through cgroup iteration the unified
hierarchy as there may be multiple dying csses for the same subsystem
associated with single cgroup.

After the recent changes, the cgroup self and regular csses behave
identically in how they're linked and unlinked from the sibling lists
including assertion of CSS_RELEASED and css_next_child() can simply
switch to iterating csses directly.  This both simplifies the logic
and ensures that all visible non-released csses are included in the
iteration whether there are multiple dying csses for a subsystem or
not.

As all other iterators depend on css_next_child() for sibling
iteration, this changes behaviors of all css iterators.  Add and
update explanations on the css states which are included in traversal
to all iterators.

As css iteration could always contain offlined csses, this shouldn't
break any of the current users and new usages which need iteration of
all on and offline csses can make use of the new semantics.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/cgroup.h | 44 ++++++++++++++++++++---------------
 kernel/cgroup.c        | 62 ++++++++++++++++++++++++++++++--------------------
 2 files changed, 63 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 5375582ea5f6..f2ff578fc03a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -764,14 +764,14 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
  * @pos: the css * to use as the loop cursor
  * @parent: css whose children to walk
  *
- * Walk @parent's children.  Must be called under rcu_read_lock().  A child
- * css which hasn't finished ->css_online() or already has finished
- * ->css_offline() may show up during traversal and it's each subsystem's
- * responsibility to verify that each @pos is alive.
+ * Walk @parent's children.  Must be called under rcu_read_lock().
  *
- * If a subsystem synchronizes against the parent in its ->css_online() and
- * before starting iterating, a css which finished ->css_online() is
- * guaranteed to be visible in the future iterations.
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  *
  * It is allowed to temporarily drop RCU read lock during iteration.  The
  * caller is responsible for ensuring that @pos remains accessible until
@@ -794,17 +794,16 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos);
  * @root: css whose descendants to walk
  *
  * Walk @root's descendants.  @root is included in the iteration and the
- * first node to be visited.  Must be called under rcu_read_lock().  A
- * descendant css which hasn't finished ->css_online() or already has
- * finished ->css_offline() may show up during traversal and it's each
- * subsystem's responsibility to verify that each @pos is alive.
+ * first node to be visited.  Must be called under rcu_read_lock().
  *
- * If a subsystem synchronizes against the parent in its ->css_online() and
- * before starting iterating, and synchronizes against @pos on each
- * iteration, any descendant css which finished ->css_online() is
- * guaranteed to be visible in the future iterations.
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  *
- * In other words, the following guarantees that a descendant can't escape
+ * For example, the following guarantees that a descendant can't escape
  * state updates of its ancestors.
  *
  * my_online(@css)
@@ -860,8 +859,17 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
  *
  * Similar to css_for_each_descendant_pre() but performs post-order
  * traversal instead.  @root is included in the iteration and the last
- * node to be visited.  Note that the walk visibility guarantee described
- * in pre-order walk doesn't apply the same to post-order walks.
+ * node to be visited.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
+ *
+ * Note that the walk visibility guarantee example described in pre-order
+ * walk doesn't apply the same to post-order walks.
  */
 #define css_for_each_descendant_post(pos, css)				\
 	for ((pos) = css_next_descendant_post(NULL, (css)); (pos);	\
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5544e685f2da..097a1fc1e1e8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3089,21 +3089,25 @@ static int cgroup_task_count(const struct cgroup *cgrp)
 
 /**
  * css_next_child - find the next child of a given css
- * @pos_css: the current position (%NULL to initiate traversal)
- * @parent_css: css whose children to walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @parent: css whose children to walk
  *
- * This function returns the next child of @parent_css and should be called
+ * This function returns the next child of @parent and should be called
  * under either cgroup_mutex or RCU read lock.  The only requirement is
- * that @parent_css and @pos_css are accessible.  The next sibling is
- * guaranteed to be returned regardless of their states.
+ * that @parent and @pos are accessible.  The next sibling is guaranteed to
+ * be returned regardless of their states.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  */
-struct cgroup_subsys_state *
-css_next_child(struct cgroup_subsys_state *pos_css,
-	       struct cgroup_subsys_state *parent_css)
+struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
+					   struct cgroup_subsys_state *parent)
 {
-	struct cgroup *pos = pos_css ? pos_css->cgroup : NULL;
-	struct cgroup *cgrp = parent_css->cgroup;
-	struct cgroup *next;
+	struct cgroup_subsys_state *next;
 
 	cgroup_assert_mutex_or_rcu_locked();
 
@@ -3128,27 +3132,21 @@ css_next_child(struct cgroup_subsys_state *pos_css,
 	 * races against release and the race window is very small.
 	 */
 	if (!pos) {
-		next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling);
-	} else if (likely(!(pos->self.flags & CSS_RELEASED))) {
-		next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling);
+		next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
+	} else if (likely(!(pos->flags & CSS_RELEASED))) {
+		next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
 	} else {
-		list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling)
-			if (next->self.serial_nr > pos->self.serial_nr)
+		list_for_each_entry_rcu(next, &parent->children, sibling)
+			if (next->serial_nr > pos->serial_nr)
 				break;
 	}
 
 	/*
 	 * @next, if not pointing to the head, can be dereferenced and is
-	 * the next sibling; however, it might have @ss disabled.  If so,
-	 * fast-forward to the next enabled one.
+	 * the next sibling.
 	 */
-	while (&next->self.sibling != &cgrp->self.children) {
-		struct cgroup_subsys_state *next_css = cgroup_css(next, parent_css->ss);
-
-		if (next_css)
-			return next_css;
-		next = list_entry_rcu(next->self.sibling.next, struct cgroup, self.sibling);
-	}
+	if (&next->sibling != &parent->children)
+		return next;
 	return NULL;
 }
 
@@ -3165,6 +3163,13 @@ css_next_child(struct cgroup_subsys_state *pos_css,
  * doesn't require the whole traversal to be contained in a single critical
  * section.  This function will return the correct next descendant as long
  * as both @pos and @root are accessible and @pos is a descendant of @root.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  */
 struct cgroup_subsys_state *
 css_next_descendant_pre(struct cgroup_subsys_state *pos,
@@ -3252,6 +3257,13 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos)
  * section.  This function will return the correct next descendant as long
  * as both @pos and @cgroup are accessible and @pos is a descendant of
  * @cgroup.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  */
 struct cgroup_subsys_state *
 css_next_descendant_post(struct cgroup_subsys_state *pos,
-- 
cgit v1.2.3-71-gd317


From 184faf32328c65c9d86b19577b8d8b90bdd2cd2e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:51 -0400
Subject: cgroup: use CSS_ONLINE instead of CGRP_DEAD

Use CSS_ONLINE on the self css to indicate whether a cgroup has been
killed instead of CGRP_DEAD.  This will allow re-using css online test
for cgroup liveliness test.  This doesn't introduce any functional
change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 2 --
 kernel/cgroup.c        | 7 ++++---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f2ff578fc03a..51a339c99eb6 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -143,8 +143,6 @@ static inline void css_put(struct cgroup_subsys_state *css)
 
 /* bits in struct cgroup flags field */
 enum {
-	/* Control Group is dead */
-	CGRP_DEAD,
 	/*
 	 * Control Group has previously had a child cgroup or a task,
 	 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 097a1fc1e1e8..004004fd0ded 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -278,7 +278,7 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
 /* convenient tests for these bits */
 static inline bool cgroup_is_dead(const struct cgroup *cgrp)
 {
-	return test_bit(CGRP_DEAD, &cgrp->flags);
+	return !(cgrp->self.flags & CSS_ONLINE);
 }
 
 struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
@@ -1518,6 +1518,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	mutex_init(&cgrp->pidlist_mutex);
 	cgrp->self.cgroup = cgrp;
+	cgrp->self.flags |= CSS_ONLINE;
 
 	for_each_subsys(ss, ssid)
 		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
@@ -4541,13 +4542,13 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 * Mark @cgrp dead.  This prevents further task migration and child
 	 * creation by disabling cgroup_lock_live_group().
 	 */
-	set_bit(CGRP_DEAD, &cgrp->flags);
+	cgrp->self.flags &= ~CSS_ONLINE;
 
 	/* initiate massacre of all css's */
 	for_each_css(css, ssid, cgrp)
 		kill_css(css);
 
-	/* CGRP_DEAD is set, remove from ->release_list for the last time */
+	/* CSS_ONLINE is clear, remove from ->release_list for the last time */
 	raw_spin_lock(&release_list_lock);
 	if (!list_empty(&cgrp->release_list))
 		list_del_init(&cgrp->release_list);
-- 
cgit v1.2.3-71-gd317


From f3d4650015301d1c880df4523f7e7ef320a38aab Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:52 -0400
Subject: cgroup: convert cgroup_has_live_children() into
 css_has_online_children()

Now that cgroup liveliness and css onliness are the same state,
convert cgroup_has_live_children() into css_has_online_children() so
that it can be used for actual csses too.  The function now uses
css_for_each_child() for iteration and is published.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  2 ++
 kernel/cgroup.c        | 32 ++++++++++++++++++++------------
 2 files changed, 22 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 51a339c99eb6..b76999954beb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -873,6 +873,8 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
 	for ((pos) = css_next_descendant_post(NULL, (css)); (pos);	\
 	     (pos) = css_next_descendant_post((pos), (css)))
 
+bool css_has_online_children(struct cgroup_subsys_state *css);
+
 /* A css_task_iter should be treated as an opaque object */
 struct css_task_iter {
 	struct cgroup_subsys		*ss;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 004004fd0ded..082bb842b11a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -175,7 +175,6 @@ static int need_forkexit_callback __read_mostly;
 static struct cftype cgroup_base_files[];
 
 static void cgroup_put(struct cgroup *cgrp);
-static bool cgroup_has_live_children(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned int ss_mask);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
@@ -1769,7 +1768,7 @@ static void cgroup_kill_sb(struct super_block *sb)
 	 * This prevents new mounts by disabling percpu_ref_tryget_live().
 	 * cgroup_mount() may wait for @root's release.
 	 */
-	if (cgroup_has_live_children(&root->cgrp))
+	if (css_has_online_children(&root->cgrp.self))
 		cgroup_put(&root->cgrp);
 	else
 		percpu_ref_kill(&root->cgrp.self.refcnt);
@@ -3291,19 +3290,28 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
 	return pos->parent;
 }
 
-static bool cgroup_has_live_children(struct cgroup *cgrp)
+/**
+ * css_has_online_children - does a css have online children
+ * @css: the target css
+ *
+ * Returns %true if @css has any online children; otherwise, %false.  This
+ * function can be called from any context but the caller is responsible
+ * for synchronizing against on/offlining as necessary.
+ */
+bool css_has_online_children(struct cgroup_subsys_state *css)
 {
-	struct cgroup *child;
+	struct cgroup_subsys_state *child;
+	bool ret = false;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(child, &cgrp->self.children, self.sibling) {
-		if (!cgroup_is_dead(child)) {
-			rcu_read_unlock();
-			return true;
+	css_for_each_child(child, css) {
+		if (css->flags & CSS_ONLINE) {
+			ret = true;
+			break;
 		}
 	}
 	rcu_read_unlock();
-	return false;
+	return ret;
 }
 
 /**
@@ -4535,7 +4543,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 * ->self.children as dead children linger on it while being
 	 * drained; otherwise, "rmdir parent/child parent" may fail.
 	 */
-	if (cgroup_has_live_children(cgrp))
+	if (css_has_online_children(&cgrp->self))
 		return -EBUSY;
 
 	/*
@@ -5014,8 +5022,8 @@ void cgroup_exit(struct task_struct *tsk)
 
 static void check_for_release(struct cgroup *cgrp)
 {
-	if (cgroup_is_releasable(cgrp) &&
-	    list_empty(&cgrp->cset_links) && !cgroup_has_live_children(cgrp)) {
+	if (cgroup_is_releasable(cgrp) && list_empty(&cgrp->cset_links) &&
+	    !css_has_online_children(&cgrp->self)) {
 		/*
 		 * Control Group is currently removeable. If it's not
 		 * already queued for a userspace notification, queue
-- 
cgit v1.2.3-71-gd317


From 6f4524d355a86769b65d5420a6ef47fb0bba9b72 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:52 -0400
Subject: cgroup: implement css_tryget()

Implement css_tryget() which tries to grab a cgroup_subsys_state's
reference as long as it already hasn't reached zero.  Combined with
the recent css iterator changes to include offline && !released csses
during traversal, this can be used to access csses regardless of its
online state.

v2: Take the new flag CSS_NO_REF into account.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/cgroup.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b76999954beb..4afe544d3547 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -112,6 +112,24 @@ static inline void css_get(struct cgroup_subsys_state *css)
 		percpu_ref_get(&css->refcnt);
 }
 
+/**
+ * css_tryget - try to obtain a reference on the specified css
+ * @css: target css
+ *
+ * Obtain a reference on @css unless it already has reached zero and is
+ * being released.  This function doesn't care whether @css is on or
+ * offline.  The caller naturally needs to ensure that @css is accessible
+ * but doesn't have to be holding a reference on it - IOW, RCU protected
+ * access is good enough for this function.  Returns %true if a reference
+ * count was successfully obtained; %false otherwise.
+ */
+static inline bool css_tryget(struct cgroup_subsys_state *css)
+{
+	if (!(css->flags & CSS_NO_REF))
+		return percpu_ref_tryget(&css->refcnt);
+	return true;
+}
+
 /**
  * css_tryget_online - try to obtain a reference on the specified css if online
  * @css: target css
-- 
cgit v1.2.3-71-gd317


From 5cc9ed4b9a7ac579362ccebac67f7a4cdb36de06 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 16 May 2014 14:22:37 +0100
Subject: drm/i915: Introduce mapping of user pages into video memory (userptr)
 ioctl

By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).

v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
v6: Recheck after reacquire mutex for lost mmu.
v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary.
v8: Fix rebasing error after forwarding porting the back port.
v9: Limit the userptr to page aligned entries. We now expect userspace
    to handle all the offset-in-page adjustments itself.
v10: Prevent vma from being copied across fork to avoid issues with cow.
v11: Drop vma behaviour changes -- locking is nigh on impossible.
     Use a worker to load user pages to avoid lock inversions.
v12: Use get_task_mm()/mmput() for correct refcounting of mm.
v13: Use a worker to release the mmu_notifier to avoid lock inversion
v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer
     with its own locking and tree of objects for each mm/mmu_notifier.
v15: Prevent overlapping userptr objects, and invalidate all objects
     within the mmu_notifier range
v16: Fix a typo for iterating over multiple objects in the range and
     rearrange error path to destroy the mmu_notifier locklessly.
     Also close a race between invalidate_range and the get_pages_worker.
v17: Close a race between get_pages_worker/invalidate_range and fresh
     allocations of the same userptr range - and notice that
     struct_mutex was presumed to be held when during creation it wasn't.
v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory
     for the struct sg_table and to clear it before reporting an error.
v19: Always error out on read-only userptr requests as we don't have the
     hardware infrastructure to support them at the moment.
v20: Refuse to implement read-only support until we have the required
     infrastructure - but reserve the bit in flags for future use.
v21: use_mm() is not required for get_user_pages(). It is only meant to
     be used to fix up the kernel thread's current->mm for use with
     copy_user().
v22: Use sg_alloc_table_from_pages for that chunky feeling
v23: Export a function for sanity checking dma-buf rather than encode
     userptr details elsewhere, and clean up comments based on
     suggestions by Bradley.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: "Gong, Zhipeng" <zhipeng.gong@intel.com>
Cc: Akash Goel <akash.goel@intel.com>
Cc: "Volkin, Bradley D" <bradley.d.volkin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Reviewed-by: Brad Volkin <bradley.d.volkin@intel.com>
[danvet: Frob ioctl allocation to pick the next one - will cause a bit
of fuss with create2 apparently, but such are the rules.]
[danvet2: oops, forgot to git add after manual patch application]
[danvet3: Appease sparse.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/Kconfig            |   1 +
 drivers/gpu/drm/i915/Makefile           |   1 +
 drivers/gpu/drm/i915/i915_dma.c         |   1 +
 drivers/gpu/drm/i915/i915_drv.h         |  25 +-
 drivers/gpu/drm/i915/i915_gem.c         |   4 +
 drivers/gpu/drm/i915/i915_gem_dmabuf.c  |   8 +
 drivers/gpu/drm/i915/i915_gem_userptr.c | 711 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c   |   2 +
 include/uapi/drm/i915_drm.h             |  16 +
 9 files changed, 768 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index e4e3c01b8cbc..437e1824d0bf 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -5,6 +5,7 @@ config DRM_I915
 	depends on (AGP || AGP=n)
 	select INTEL_GTT
 	select AGP_INTEL if AGP
+	select INTERVAL_TREE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 24469168d550..7b2f3bee3518 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -27,6 +27,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gem.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_gpu_error.o \
 	  i915_irq.o \
 	  i915_trace_points.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 46f1decab76f..dea455bd889a 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1983,6 +1983,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9e9e8166a1f3..a270e0773e2d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -41,6 +41,7 @@
 #include <linux/i2c-algo-bit.h>
 #include <drm/intel-gtt.h>
 #include <linux/backlight.h>
+#include <linux/hashtable.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
@@ -178,6 +179,7 @@ enum hpd_pin {
 		if ((intel_connector)->base.encoder == (__encoder))
 
 struct drm_i915_private;
+struct i915_mmu_object;
 
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -403,6 +405,7 @@ struct drm_i915_error_state {
 		u32 tiling:2;
 		u32 dirty:1;
 		u32 purgeable:1;
+		u32 userptr:1;
 		s32 ring:4;
 		u32 cache_level:3;
 	} **active_bo, **pinned_bo;
@@ -1447,6 +1450,9 @@ struct drm_i915_private {
 	struct i915_gtt gtt; /* VM representing the global address space */
 
 	struct i915_gem_mm mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+	DECLARE_HASHTABLE(mmu_notifiers, 7);
+#endif
 
 	/* Kernel Modesetting */
 
@@ -1580,6 +1586,8 @@ struct drm_i915_gem_object_ops {
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	int (*dmabuf_export)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1693,8 +1701,20 @@ struct drm_i915_gem_object {
 
 	/** for phy allocated objects */
 	struct drm_i915_gem_phys_object *phys_obj;
-};
 
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+			unsigned read_only :1;
+			unsigned workers :4;
+#define I915_GEM_USERPTR_MAX_WORKERS 15
+
+			struct mm_struct *mm;
+			struct i915_mmu_object *mn;
+			struct work_struct *work;
+		} userptr;
+	};
+};
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -2119,6 +2139,9 @@ int i915_gem_set_tiling(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int i915_gem_get_tiling(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
+int i915_gem_init_userptr(struct drm_device *dev);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f6c03513de52..d5aad0bc71f7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4263,6 +4263,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4542,6 +4545,7 @@ int i915_gem_init(struct drm_device *dev)
 			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
 	}
 
+	i915_gem_init_userptr(dev);
 	i915_gem_init_global_gtt(dev);
 
 	ret = i915_gem_context_init(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 321102a8374b..580aa42443ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -229,6 +229,14 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 				      struct drm_gem_object *gem_obj, int flags)
 {
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+
+	if (obj->ops->dmabuf_export) {
+		int ret = obj->ops->dmabuf_export(obj);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
 	return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 000000000000..21ea92886a56
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,711 @@
+/*
+ * Copyright © 2012-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_context.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mempolicy.h>
+#include <linux/swap.h>
+
+#if defined(CONFIG_MMU_NOTIFIER)
+#include <linux/interval_tree.h>
+
+struct i915_mmu_notifier {
+	spinlock_t lock;
+	struct hlist_node node;
+	struct mmu_notifier mn;
+	struct rb_root objects;
+	struct drm_device *dev;
+	struct mm_struct *mm;
+	struct work_struct work;
+	unsigned long count;
+	unsigned long serial;
+};
+
+struct i915_mmu_object {
+	struct i915_mmu_notifier *mmu;
+	struct interval_tree_node it;
+	struct drm_i915_gem_object *obj;
+};
+
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
+						       struct mm_struct *mm,
+						       unsigned long start,
+						       unsigned long end)
+{
+	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
+	struct interval_tree_node *it = NULL;
+	unsigned long serial = 0;
+
+	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
+	while (start < end) {
+		struct drm_i915_gem_object *obj;
+
+		obj = NULL;
+		spin_lock(&mn->lock);
+		if (serial == mn->serial)
+			it = interval_tree_iter_next(it, start, end);
+		else
+			it = interval_tree_iter_first(&mn->objects, start, end);
+		if (it != NULL) {
+			obj = container_of(it, struct i915_mmu_object, it)->obj;
+			drm_gem_object_reference(&obj->base);
+			serial = mn->serial;
+		}
+		spin_unlock(&mn->lock);
+		if (obj == NULL)
+			return;
+
+		mutex_lock(&mn->dev->struct_mutex);
+		/* Cancel any active worker and force us to re-evaluate gup */
+		obj->userptr.work = NULL;
+
+		if (obj->pages != NULL) {
+			struct drm_i915_private *dev_priv = to_i915(mn->dev);
+			struct i915_vma *vma, *tmp;
+			bool was_interruptible;
+
+			was_interruptible = dev_priv->mm.interruptible;
+			dev_priv->mm.interruptible = false;
+
+			list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
+				int ret = i915_vma_unbind(vma);
+				WARN_ON(ret && ret != -EIO);
+			}
+			WARN_ON(i915_gem_object_put_pages(obj));
+
+			dev_priv->mm.interruptible = was_interruptible;
+		}
+
+		start = obj->userptr.ptr + obj->base.size;
+
+		drm_gem_object_unreference(&obj->base);
+		mutex_unlock(&mn->dev->struct_mutex);
+	}
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+};
+
+static struct i915_mmu_notifier *
+__i915_mmu_notifier_lookup(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+
+	/* Protected by dev->struct_mutex */
+	hash_for_each_possible(dev_priv->mmu_notifiers, mmu, node, (unsigned long)mm)
+		if (mmu->mm == mm)
+			return mmu;
+
+	return NULL;
+}
+
+static struct i915_mmu_notifier *
+i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_mmu_notifier *mmu;
+	int ret;
+
+	lockdep_assert_held(&dev->struct_mutex);
+
+	mmu = __i915_mmu_notifier_lookup(dev, mm);
+	if (mmu)
+		return mmu;
+
+	mmu = kmalloc(sizeof(*mmu), GFP_KERNEL);
+	if (mmu == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&mmu->lock);
+	mmu->dev = dev;
+	mmu->mn.ops = &i915_gem_userptr_notifier;
+	mmu->mm = mm;
+	mmu->objects = RB_ROOT;
+	mmu->count = 0;
+	mmu->serial = 0;
+
+	/* Protected by mmap_sem (write-lock) */
+	ret = __mmu_notifier_register(&mmu->mn, mm);
+	if (ret) {
+		kfree(mmu);
+		return ERR_PTR(ret);
+	}
+
+	/* Protected by dev->struct_mutex */
+	hash_add(dev_priv->mmu_notifiers, &mmu->node, (unsigned long)mm);
+	return mmu;
+}
+
+static void
+__i915_mmu_notifier_destroy_worker(struct work_struct *work)
+{
+	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
+	mmu_notifier_unregister(&mmu->mn, mmu->mm);
+	kfree(mmu);
+}
+
+static void
+__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
+{
+	lockdep_assert_held(&mmu->dev->struct_mutex);
+
+	/* Protected by dev->struct_mutex */
+	hash_del(&mmu->node);
+
+	/* Our lock ordering is: mmap_sem, mmu_notifier_scru, struct_mutex.
+	 * We enter the function holding struct_mutex, therefore we need
+	 * to drop our mutex prior to calling mmu_notifier_unregister in
+	 * order to prevent lock inversion (and system-wide deadlock)
+	 * between the mmap_sem and struct-mutex. Hence we defer the
+	 * unregistration to a workqueue where we hold no locks.
+	 */
+	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
+	schedule_work(&mmu->work);
+}
+
+static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
+{
+	if (++mmu->serial == 0)
+		mmu->serial = 1;
+}
+
+static void
+i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	lockdep_assert_held(&mmu->dev->struct_mutex);
+
+	spin_lock(&mmu->lock);
+	interval_tree_remove(&mn->it, &mmu->objects);
+	__i915_mmu_notifier_update_serial(mmu);
+	spin_unlock(&mmu->lock);
+
+	/* Protected against _add() by dev->struct_mutex */
+	if (--mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+}
+
+static int
+i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
+		      struct i915_mmu_object *mn)
+{
+	struct interval_tree_node *it;
+	int ret;
+
+	ret = i915_mutex_lock_interruptible(mmu->dev);
+	if (ret)
+		return ret;
+
+	/* Make sure we drop the final active reference (and thereby
+	 * remove the objects from the interval tree) before we do
+	 * the check for overlapping objects.
+	 */
+	i915_gem_retire_requests(mmu->dev);
+
+	/* Disallow overlapping userptr objects */
+	spin_lock(&mmu->lock);
+	it = interval_tree_iter_first(&mmu->objects,
+				      mn->it.start, mn->it.last);
+	if (it) {
+		struct drm_i915_gem_object *obj;
+
+		/* We only need to check the first object in the range as it
+		 * either has cancelled gup work queued and we need to
+		 * return back to the user to give time for the gup-workers
+		 * to flush their object references upon which the object will
+		 * be removed from the interval-tree, or the the range is
+		 * still in use by another client and the overlap is invalid.
+		 */
+
+		obj = container_of(it, struct i915_mmu_object, it)->obj;
+		ret = obj->userptr.workers ? -EAGAIN : -EINVAL;
+	} else {
+		interval_tree_insert(&mn->it, &mmu->objects);
+		__i915_mmu_notifier_update_serial(mmu);
+		ret = 0;
+	}
+	spin_unlock(&mmu->lock);
+	mutex_unlock(&mmu->dev->struct_mutex);
+
+	return ret;
+}
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+	struct i915_mmu_object *mn;
+
+	mn = obj->userptr.mn;
+	if (mn == NULL)
+		return;
+
+	i915_mmu_notifier_del(mn->mmu, mn);
+	obj->userptr.mn = NULL;
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	struct i915_mmu_notifier *mmu;
+	struct i915_mmu_object *mn;
+	int ret;
+
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	down_write(&obj->userptr.mm->mmap_sem);
+	ret = i915_mutex_lock_interruptible(obj->base.dev);
+	if (ret == 0) {
+		mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
+		if (!IS_ERR(mmu))
+			mmu->count++; /* preemptive add to act as a refcount */
+		else
+			ret = PTR_ERR(mmu);
+		mutex_unlock(&obj->base.dev->struct_mutex);
+	}
+	up_write(&obj->userptr.mm->mmap_sem);
+	if (ret)
+		return ret;
+
+	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
+	if (mn == NULL) {
+		ret = -ENOMEM;
+		goto destroy_mmu;
+	}
+
+	mn->mmu = mmu;
+	mn->it.start = obj->userptr.ptr;
+	mn->it.last = mn->it.start + obj->base.size - 1;
+	mn->obj = obj;
+
+	ret = i915_mmu_notifier_add(mmu, mn);
+	if (ret)
+		goto free_mn;
+
+	obj->userptr.mn = mn;
+	return 0;
+
+free_mn:
+	kfree(mn);
+destroy_mmu:
+	mutex_lock(&obj->base.dev->struct_mutex);
+	if (--mmu->count == 0)
+		__i915_mmu_notifier_destroy(mmu);
+	mutex_unlock(&obj->base.dev->struct_mutex);
+	return ret;
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+#endif
+
+struct get_pages_work {
+	struct work_struct work;
+	struct drm_i915_gem_object *obj;
+	struct task_struct *task;
+};
+
+
+#if IS_ENABLED(CONFIG_SWIOTLB)
+#define swiotlb_active() swiotlb_nr_tbl()
+#else
+#define swiotlb_active() 0
+#endif
+
+static int
+st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
+{
+	struct scatterlist *sg;
+	int ret, n;
+
+	*st = kmalloc(sizeof(**st), GFP_KERNEL);
+	if (*st == NULL)
+		return -ENOMEM;
+
+	if (swiotlb_active()) {
+		ret = sg_alloc_table(*st, num_pages, GFP_KERNEL);
+		if (ret)
+			goto err;
+
+		for_each_sg((*st)->sgl, sg, num_pages, n)
+			sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+	} else {
+		ret = sg_alloc_table_from_pages(*st, pvec, num_pages,
+						0, num_pages << PAGE_SHIFT,
+						GFP_KERNEL);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	kfree(*st);
+	*st = NULL;
+	return ret;
+}
+
+static void
+__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
+{
+	struct get_pages_work *work = container_of(_work, typeof(*work), work);
+	struct drm_i915_gem_object *obj = work->obj;
+	struct drm_device *dev = obj->base.dev;
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	ret = -ENOMEM;
+	pinned = 0;
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL)
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+	if (pvec != NULL) {
+		struct mm_struct *mm = obj->userptr.mm;
+
+		down_read(&mm->mmap_sem);
+		while (pinned < num_pages) {
+			ret = get_user_pages(work->task, mm,
+					     obj->userptr.ptr + pinned * PAGE_SIZE,
+					     num_pages - pinned,
+					     !obj->userptr.read_only, 0,
+					     pvec + pinned, NULL);
+			if (ret < 0)
+				break;
+
+			pinned += ret;
+		}
+		up_read(&mm->mmap_sem);
+	}
+
+	mutex_lock(&dev->struct_mutex);
+	if (obj->userptr.work != &work->work) {
+		ret = 0;
+	} else if (pinned == num_pages) {
+		ret = st_set_pages(&obj->pages, pvec, num_pages);
+		if (ret == 0) {
+			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
+			pinned = 0;
+		}
+	}
+
+	obj->userptr.work = ERR_PTR(ret);
+	obj->userptr.workers--;
+	drm_gem_object_unreference(&obj->base);
+	mutex_unlock(&dev->struct_mutex);
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+
+	put_task_struct(work->task);
+	kfree(work);
+}
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	const int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	int pinned, ret;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	pvec = NULL;
+	pinned = 0;
+	if (obj->userptr.mm == current->mm) {
+		pvec = kmalloc(num_pages*sizeof(struct page *),
+			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+		if (pvec == NULL) {
+			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+			if (pvec == NULL)
+				return -ENOMEM;
+		}
+
+		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
+					       !obj->userptr.read_only, pvec);
+	}
+	if (pinned < num_pages) {
+		if (pinned < 0) {
+			ret = pinned;
+			pinned = 0;
+		} else {
+			/* Spawn a worker so that we can acquire the
+			 * user pages without holding our mutex. Access
+			 * to the user pages requires mmap_sem, and we have
+			 * a strict lock ordering of mmap_sem, struct_mutex -
+			 * we already hold struct_mutex here and so cannot
+			 * call gup without encountering a lock inversion.
+			 *
+			 * Userspace will keep on repeating the operation
+			 * (thanks to EAGAIN) until either we hit the fast
+			 * path or the worker completes. If the worker is
+			 * cancelled or superseded, the task is still run
+			 * but the results ignored. (This leads to
+			 * complications that we may have a stray object
+			 * refcount that we need to be wary of when
+			 * checking for existing objects during creation.)
+			 * If the worker encounters an error, it reports
+			 * that error back to this function through
+			 * obj->userptr.work = ERR_PTR.
+			 */
+			ret = -EAGAIN;
+			if (obj->userptr.work == NULL &&
+			    obj->userptr.workers < I915_GEM_USERPTR_MAX_WORKERS) {
+				struct get_pages_work *work;
+
+				work = kmalloc(sizeof(*work), GFP_KERNEL);
+				if (work != NULL) {
+					obj->userptr.work = &work->work;
+					obj->userptr.workers++;
+
+					work->obj = obj;
+					drm_gem_object_reference(&obj->base);
+
+					work->task = current;
+					get_task_struct(work->task);
+
+					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
+					schedule_work(&work->work);
+				} else
+					ret = -ENOMEM;
+			} else {
+				if (IS_ERR(obj->userptr.work)) {
+					ret = PTR_ERR(obj->userptr.work);
+					obj->userptr.work = NULL;
+				}
+			}
+		}
+	} else {
+		ret = st_set_pages(&obj->pages, pvec, num_pages);
+		if (ret == 0) {
+			obj->userptr.work = NULL;
+			pinned = 0;
+		}
+	}
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(obj->userptr.work != NULL);
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	i915_gem_userptr_release__mmu_notifier(obj);
+
+	if (obj->userptr.mm) {
+		mmput(obj->userptr.mm);
+		obj->userptr.mm = NULL;
+	}
+}
+
+static int
+i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
+{
+	if (obj->userptr.mn)
+		return 0;
+
+	return i915_gem_userptr_init__mmu_notifier(obj, 0);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.dmabuf_export = i915_gem_userptr_dmabuf_export,
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some normal memory from the process
+ * context - user memory.
+ *
+ * We impose several restrictions upon the memory being mapped
+ * into the GPU.
+ * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
+ * 2. It cannot overlap any other userptr object in the same address space.
+ * 3. It must be normal system memory, not a pointer into another map of IO
+ *    space (e.g. it must not be a GTT mmapping of another object).
+ * 4. We only allow a bo as large as we could in theory map into the GTT,
+ *    that is we limit the size to the total size of the GTT.
+ * 5. The bo is marked as being snoopable. The backing pages are left
+ *    accessible directly by the CPU, but reads and writes by the GPU may
+ *    incur the cost of a snoop (unless you have an LLC architecture).
+ *
+ * Synchronisation between multiple users and the GPU is left to userspace
+ * through the normal set-domain-ioctl. The kernel will enforce that the
+ * GPU relinquishes the VMA before it is returned back to the system
+ * i.e. upon free(), munmap() or process termination. However, the userspace
+ * malloc() library may not immediately relinquish the VMA after free() and
+ * instead reuse it whilst the GPU is still reading and writing to the VMA.
+ * Caveat emptor.
+ *
+ * Also note, that the object created here is not currently a "first class"
+ * object, in that several ioctls are banned. These are the CPU access
+ * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
+ * direct access via your pointer rather than use those ioctls.
+ *
+ * If you think this is a good interface to use to pass GPU memory between
+ * drivers, please use dma-buf instead. In fact, wherever possible use
+ * dma-buf instead.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret;
+	u32 handle;
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY |
+			    I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size))
+		return -EINVAL;
+
+	if (args->user_size > dev_priv->gtt.base.total)
+		return -E2BIG;
+
+	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
+		return -EFAULT;
+
+	if (args->flags & I915_USERPTR_READ_ONLY) {
+		/* On almost all of the current hw, we cannot tell the GPU that a
+		 * page is readonly, so this is just a placeholder in the uAPI.
+		 */
+		return -ENODEV;
+	}
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	drm_gem_private_object_init(dev, &obj->base, args->user_size);
+	i915_gem_object_init(obj, &i915_gem_userptr_ops);
+	obj->cache_level = I915_CACHE_LLC;
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+
+	obj->userptr.ptr = args->user_ptr;
+	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	ret = -ENOMEM;
+	if ((obj->userptr.mm = get_task_mm(current)))
+		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret == 0)
+		ret = drm_gem_handle_create(file, &obj->base, &handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
+
+int
+i915_gem_init_userptr(struct drm_device *dev)
+{
+#if defined(CONFIG_MMU_NOTIFIER)
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	hash_init(dev_priv->mmu_notifiers);
+#endif
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 2d819858c19b..dcbec692f60f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -205,6 +205,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 		err_puts(m, tiling_flag(err->tiling));
 		err_puts(m, dirty_flag(err->dirty));
 		err_puts(m, purgeable_flag(err->purgeable));
+		err_puts(m, err->userptr ? " userptr" : "");
 		err_puts(m, err->ring != -1 ? " " : "");
 		err_puts(m, ring_str(err->ring));
 		err_puts(m, i915_cache_level_str(err->cache_level));
@@ -641,6 +642,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->tiling = obj->tiling_mode;
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
+	err->userptr = obj->userptr.mm != NULL;
 	err->ring = obj->ring ? obj->ring->id : -1;
 	err->cache_level = obj->cache_level;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 8a3e4ef00c3d..ff57f07c3249 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -223,6 +223,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_GET_CACHING	0x30
 #define DRM_I915_REG_READ		0x31
 #define DRM_I915_GET_RESET_STATS	0x32
+#define DRM_I915_GEM_USERPTR		0x33
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -273,6 +274,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1050,4 +1052,18 @@ struct drm_i915_reset_stats {
 	__u32 pad;
 };
 
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 #endif /* _UAPI_I915_DRM_H_ */
-- 
cgit v1.2.3-71-gd317


From 1836eea209546b870dd83f3f4ef234d6598a560d Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@horizon.com>
Date: Sat, 10 May 2014 10:32:57 -0400
Subject: lib/crc7: Shift crc7() output left 1 bit

This eliminates a 1-bit left shift in every single caller,
and makes the inner loop of the CRC computation more efficient.

Renamed crc7 to crc7_be (big-endian) since the interface changed.

Also purged #include <linux/crc7.h> from files that don't use it at all.

Signed-off-by: George Spelvin <linux@horizon.com>
Reviewed-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/mmc/host/mmc_spi.c           |  2 +-
 drivers/net/wireless/ti/wl1251/acx.c |  1 -
 drivers/net/wireless/ti/wl1251/cmd.c |  1 -
 drivers/net/wireless/ti/wl1251/spi.c |  3 +-
 drivers/net/wireless/ti/wlcore/spi.c |  3 +-
 include/linux/crc7.h                 |  8 ++--
 lib/crc7.c                           | 84 ++++++++++++++++++++----------------
 7 files changed, 53 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 0a87e5691341..338e2202eaaa 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -472,7 +472,7 @@ mmc_spi_command_send(struct mmc_spi_host *host,
 	*cp++ = (u8)(arg >> 16);
 	*cp++ = (u8)(arg >> 8);
 	*cp++ = (u8)arg;
-	*cp++ = (crc7(0, &data->status[1], 5) << 1) | 0x01;
+	*cp++ = crc7_be(0, &data->status[1], 5) | 0x01;
 
 	/* Then, read up to 13 bytes (while writing all-ones):
 	 *  - N(CR) (== 1..8) bytes of all-ones
diff --git a/drivers/net/wireless/ti/wl1251/acx.c b/drivers/net/wireless/ti/wl1251/acx.c
index 5a4ec56c83d0..5695628757ee 100644
--- a/drivers/net/wireless/ti/wl1251/acx.c
+++ b/drivers/net/wireless/ti/wl1251/acx.c
@@ -2,7 +2,6 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/crc7.h>
 
 #include "wl1251.h"
 #include "reg.h"
diff --git a/drivers/net/wireless/ti/wl1251/cmd.c b/drivers/net/wireless/ti/wl1251/cmd.c
index bf1fa18b9786..ede31f048ef9 100644
--- a/drivers/net/wireless/ti/wl1251/cmd.c
+++ b/drivers/net/wireless/ti/wl1251/cmd.c
@@ -2,7 +2,6 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/crc7.h>
 #include <linux/etherdevice.h>
 
 #include "wl1251.h"
diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c
index b06d36d99362..e94b57cd5a22 100644
--- a/drivers/net/wireless/ti/wl1251/spi.c
+++ b/drivers/net/wireless/ti/wl1251/spi.c
@@ -122,8 +122,7 @@ static void wl1251_spi_wake(struct wl1251 *wl)
 	crc[3] = cmd[6];
 	crc[4] = cmd[5];
 
-	cmd[4] |= crc7(0, crc, WSPI_INIT_CMD_CRC_LEN) << 1;
-	cmd[4] |= WSPI_INIT_CMD_END;
+	cmd[4] = crc7_be(0, crc, WSPI_INIT_CMD_CRC_LEN) | WSPI_INIT_CMD_END;
 
 	t.tx_buf = cmd;
 	t.len = WSPI_INIT_CMD_LEN;
diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c
index 5f3a389dd74c..1d4ddabe6063 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -151,8 +151,7 @@ static void wl12xx_spi_init(struct device *child)
 	crc[3] = cmd[6];
 	crc[4] = cmd[5];
 
-	cmd[4] |= crc7(0, crc, WSPI_INIT_CMD_CRC_LEN) << 1;
-	cmd[4] |= WSPI_INIT_CMD_END;
+	cmd[4] = crc7_be(0, crc, WSPI_INIT_CMD_CRC_LEN) | WSPI_INIT_CMD_END;
 
 	t.tx_buf = cmd;
 	t.len = WSPI_INIT_CMD_LEN;
diff --git a/include/linux/crc7.h b/include/linux/crc7.h
index 1786e772d5c6..d590765106f3 100644
--- a/include/linux/crc7.h
+++ b/include/linux/crc7.h
@@ -2,13 +2,13 @@
 #define _LINUX_CRC7_H
 #include <linux/types.h>
 
-extern const u8 crc7_syndrome_table[256];
+extern const u8 crc7_be_syndrome_table[256];
 
-static inline u8 crc7_byte(u8 crc, u8 data)
+static inline u8 crc7_be_byte(u8 crc, u8 data)
 {
-	return crc7_syndrome_table[(crc << 1) ^ data];
+	return crc7_be_syndrome_table[crc ^ data];
 }
 
-extern u8 crc7(u8 crc, const u8 *buffer, size_t len);
+extern u8 crc7_be(u8 crc, const u8 *buffer, size_t len);
 
 #endif
diff --git a/lib/crc7.c b/lib/crc7.c
index f1c3a144cec1..bf6255e23919 100644
--- a/lib/crc7.c
+++ b/lib/crc7.c
@@ -10,42 +10,47 @@
 #include <linux/crc7.h>
 
 
-/* Table for CRC-7 (polynomial x^7 + x^3 + 1) */
-const u8 crc7_syndrome_table[256] = {
-	0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f,
-	0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
-	0x19, 0x10, 0x0b, 0x02, 0x3d, 0x34, 0x2f, 0x26,
-	0x51, 0x58, 0x43, 0x4a, 0x75, 0x7c, 0x67, 0x6e,
-	0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d,
-	0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45,
-	0x2b, 0x22, 0x39, 0x30, 0x0f, 0x06, 0x1d, 0x14,
-	0x63, 0x6a, 0x71, 0x78, 0x47, 0x4e, 0x55, 0x5c,
-	0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b,
-	0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13,
-	0x7d, 0x74, 0x6f, 0x66, 0x59, 0x50, 0x4b, 0x42,
-	0x35, 0x3c, 0x27, 0x2e, 0x11, 0x18, 0x03, 0x0a,
-	0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69,
-	0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21,
-	0x4f, 0x46, 0x5d, 0x54, 0x6b, 0x62, 0x79, 0x70,
-	0x07, 0x0e, 0x15, 0x1c, 0x23, 0x2a, 0x31, 0x38,
-	0x41, 0x48, 0x53, 0x5a, 0x65, 0x6c, 0x77, 0x7e,
-	0x09, 0x00, 0x1b, 0x12, 0x2d, 0x24, 0x3f, 0x36,
-	0x58, 0x51, 0x4a, 0x43, 0x7c, 0x75, 0x6e, 0x67,
-	0x10, 0x19, 0x02, 0x0b, 0x34, 0x3d, 0x26, 0x2f,
-	0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
-	0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04,
-	0x6a, 0x63, 0x78, 0x71, 0x4e, 0x47, 0x5c, 0x55,
-	0x22, 0x2b, 0x30, 0x39, 0x06, 0x0f, 0x14, 0x1d,
-	0x25, 0x2c, 0x37, 0x3e, 0x01, 0x08, 0x13, 0x1a,
-	0x6d, 0x64, 0x7f, 0x76, 0x49, 0x40, 0x5b, 0x52,
-	0x3c, 0x35, 0x2e, 0x27, 0x18, 0x11, 0x0a, 0x03,
-	0x74, 0x7d, 0x66, 0x6f, 0x50, 0x59, 0x42, 0x4b,
-	0x17, 0x1e, 0x05, 0x0c, 0x33, 0x3a, 0x21, 0x28,
-	0x5f, 0x56, 0x4d, 0x44, 0x7b, 0x72, 0x69, 0x60,
-	0x0e, 0x07, 0x1c, 0x15, 0x2a, 0x23, 0x38, 0x31,
-	0x46, 0x4f, 0x54, 0x5d, 0x62, 0x6b, 0x70, 0x79
+/*
+ * Table for CRC-7 (polynomial x^7 + x^3 + 1).
+ * This is a big-endian CRC (msbit is highest power of x),
+ * aligned so the msbit of the byte is the x^6 coefficient
+ * and the lsbit is not used.
+ */
+const u8 crc7_be_syndrome_table[256] = {
+	0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,
+	0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
+	0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c,
+	0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc,
+	0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a,
+	0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
+	0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28,
+	0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8,
+	0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,
+	0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
+	0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84,
+	0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14,
+	0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2,
+	0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
+	0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0,
+	0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70,
+	0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc,
+	0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c,
+	0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce,
+	0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e,
+	0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98,
+	0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08,
+	0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa,
+	0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a,
+	0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34,
+	0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4,
+	0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06,
+	0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96,
+	0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50,
+	0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0,
+	0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62,
+	0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2
 };
-EXPORT_SYMBOL(crc7_syndrome_table);
+EXPORT_SYMBOL(crc7_be_syndrome_table);
 
 /**
  * crc7 - update the CRC7 for the data buffer
@@ -55,14 +60,17 @@ EXPORT_SYMBOL(crc7_syndrome_table);
  * Context: any
  *
  * Returns the updated CRC7 value.
+ * The CRC7 is left-aligned in the byte (the lsbit is always 0), as that
+ * makes the computation easier, and all callers want it in that form.
+ *
  */
-u8 crc7(u8 crc, const u8 *buffer, size_t len)
+u8 crc7_be(u8 crc, const u8 *buffer, size_t len)
 {
 	while (len--)
-		crc = crc7_byte(crc, *buffer++);
+		crc = crc7_be_byte(crc, *buffer++);
 	return crc;
 }
-EXPORT_SYMBOL(crc7);
+EXPORT_SYMBOL(crc7_be);
 
 MODULE_DESCRIPTION("CRC7 calculations");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-71-gd317


From a75951217472c522c324adb0a4de3ba69d656ef5 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Fri, 16 May 2014 16:14:04 +0200
Subject: net: phy: extend fixed driver with fixed_phy_register()

The existing fixed_phy_add() function has several drawbacks that
prevents it from being used as is for OF-based declaration of fixed
PHYs:

 * The address of the PHY on the fake bus needs to be passed, while a
   dynamic allocation is desired.

 * Since the phy_device instantiation is post-poned until the next
   mdiobus scan, there is no way to associate the fixed PHY with its
   OF node, which later prevents of_phy_connect() from finding this
   fixed PHY from a given OF node.

To solve this, this commit introduces fixed_phy_register(), which will
allocate an available PHY address, add the PHY using fixed_phy_add()
and instantiate the phy_device structure associated with the provided
OF node.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/fixed.c   | 61 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/phy_fixed.h | 11 +++++++++
 2 files changed, 72 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c
index e41546da105e..d60d875cb445 100644
--- a/drivers/net/phy/fixed.c
+++ b/drivers/net/phy/fixed.c
@@ -21,6 +21,7 @@
 #include <linux/phy_fixed.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/of.h>
 
 #define MII_REGS_NUM 29
 
@@ -203,6 +204,66 @@ err_regs:
 }
 EXPORT_SYMBOL_GPL(fixed_phy_add);
 
+void fixed_phy_del(int phy_addr)
+{
+	struct fixed_mdio_bus *fmb = &platform_fmb;
+	struct fixed_phy *fp, *tmp;
+
+	list_for_each_entry_safe(fp, tmp, &fmb->phys, node) {
+		if (fp->addr == phy_addr) {
+			list_del(&fp->node);
+			kfree(fp);
+			return;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(fixed_phy_del);
+
+static int phy_fixed_addr;
+static DEFINE_SPINLOCK(phy_fixed_addr_lock);
+
+int fixed_phy_register(unsigned int irq,
+		       struct fixed_phy_status *status,
+		       struct device_node *np)
+{
+	struct fixed_mdio_bus *fmb = &platform_fmb;
+	struct phy_device *phy;
+	int phy_addr;
+	int ret;
+
+	/* Get the next available PHY address, up to PHY_MAX_ADDR */
+	spin_lock(&phy_fixed_addr_lock);
+	if (phy_fixed_addr == PHY_MAX_ADDR) {
+		spin_unlock(&phy_fixed_addr_lock);
+		return -ENOSPC;
+	}
+	phy_addr = phy_fixed_addr++;
+	spin_unlock(&phy_fixed_addr_lock);
+
+	ret = fixed_phy_add(PHY_POLL, phy_addr, status);
+	if (ret < 0)
+		return ret;
+
+	phy = get_phy_device(fmb->mii_bus, phy_addr, false);
+	if (!phy || IS_ERR(phy)) {
+		fixed_phy_del(phy_addr);
+		return -EINVAL;
+	}
+
+	of_node_get(np);
+	phy->dev.of_node = np;
+
+	ret = phy_device_register(phy);
+	if (ret) {
+		phy_device_free(phy);
+		of_node_put(np);
+		fixed_phy_del(phy_addr);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int __init fixed_mdio_bus_init(void)
 {
 	struct fixed_mdio_bus *fmb = &platform_fmb;
diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 509d8f5f984e..4f2478b47136 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -9,15 +9,26 @@ struct fixed_phy_status {
 	int asym_pause;
 };
 
+struct device_node;
+
 #ifdef CONFIG_FIXED_PHY
 extern int fixed_phy_add(unsigned int irq, int phy_id,
 			 struct fixed_phy_status *status);
+extern int fixed_phy_register(unsigned int irq,
+			      struct fixed_phy_status *status,
+			      struct device_node *np);
 #else
 static inline int fixed_phy_add(unsigned int irq, int phy_id,
 				struct fixed_phy_status *status)
 {
 	return -ENODEV;
 }
+static inline int fixed_phy_register(unsigned int irq,
+				     struct fixed_phy_status *status,
+				     struct device_node *np)
+{
+	return -ENODEV;
+}
 #endif /* CONFIG_FIXED_PHY */
 
 /*
-- 
cgit v1.2.3-71-gd317


From 3be2a49e5c08d268f8af0dd4fe89a24ea8cdc339 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Fri, 16 May 2014 16:14:05 +0200
Subject: of: provide a binding for fixed link PHYs

Some Ethernet MACs have a "fixed link", and are not connected to a
normal MDIO-managed PHY device. For those situations, a Device Tree
binding allows to describe a "fixed link" using a special PHY node.

This patch adds:

 * A documentation for the fixed PHY Device Tree binding.

 * An of_phy_is_fixed_link() function that an Ethernet driver can call
   on its PHY phandle to find out whether it's a fixed link PHY or
   not. It should typically be used to know if
   of_phy_register_fixed_link() should be called.

 * An of_phy_register_fixed_link() function that instantiates the
   fixed PHY into the PHY subsystem, so that when the driver calls
   of_phy_connect(), the PHY device associated to the OF node will be
   found.

These two additional functions also support the old fixed-link Device
Tree binding used on PowerPC platforms, so that ultimately, the
network device drivers for those platforms could be converted to use
of_phy_is_fixed_link() and of_phy_register_fixed_link() instead of
of_phy_connect_fixed_link(), while keeping compatibility with their
respective Device Tree bindings.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/fixed-link.txt         | 30 ++++++++++
 drivers/of/of_mdio.c                               | 67 ++++++++++++++++++++++
 include/linux/of_mdio.h                            | 15 +++++
 3 files changed, 112 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/fixed-link.txt

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/net/fixed-link.txt b/Documentation/devicetree/bindings/net/fixed-link.txt
new file mode 100644
index 000000000000..e956de1be935
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/fixed-link.txt
@@ -0,0 +1,30 @@
+Fixed link Device Tree binding
+------------------------------
+
+Some Ethernet MACs have a "fixed link", and are not connected to a
+normal MDIO-managed PHY device. For those situations, a Device Tree
+binding allows to describe a "fixed link".
+
+Such a fixed link situation is described by creating a 'fixed-link'
+sub-node of the Ethernet MAC device node, with the following
+properties:
+
+* 'speed' (integer, mandatory), to indicate the link speed. Accepted
+  values are 10, 100 and 1000
+* 'full-duplex' (boolean, optional), to indicate that full duplex is
+  used. When absent, half duplex is assumed.
+* 'pause' (boolean, optional), to indicate that pause should be
+  enabled.
+* 'asym-pause' (boolean, optional), to indicate that asym_pause should
+  be enabled.
+
+Example:
+
+ethernet@0 {
+	...
+	fixed-link {
+	      speed = <1000>;
+	      full-duplex;
+	};
+	...
+};
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 9a95831bd065..1def0bb5cb37 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -14,6 +14,7 @@
 #include <linux/netdevice.h>
 #include <linux/err.h>
 #include <linux/phy.h>
+#include <linux/phy_fixed.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
@@ -301,3 +302,69 @@ struct phy_device *of_phy_attach(struct net_device *dev,
 	return phy_attach_direct(dev, phy, flags, iface) ? NULL : phy;
 }
 EXPORT_SYMBOL(of_phy_attach);
+
+#if defined(CONFIG_FIXED_PHY)
+/*
+ * of_phy_is_fixed_link() and of_phy_register_fixed_link() must
+ * support two DT bindings:
+ * - the old DT binding, where 'fixed-link' was a property with 5
+ *   cells encoding various informations about the fixed PHY
+ * - the new DT binding, where 'fixed-link' is a sub-node of the
+ *   Ethernet device.
+ */
+bool of_phy_is_fixed_link(struct device_node *np)
+{
+	struct device_node *dn;
+	int len;
+
+	/* New binding */
+	dn = of_get_child_by_name(np, "fixed-link");
+	if (dn) {
+		of_node_put(dn);
+		return true;
+	}
+
+	/* Old binding */
+	if (of_get_property(np, "fixed-link", &len) &&
+	    len == (5 * sizeof(__be32)))
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(of_phy_is_fixed_link);
+
+int of_phy_register_fixed_link(struct device_node *np)
+{
+	struct fixed_phy_status status = {};
+	struct device_node *fixed_link_node;
+	const __be32 *fixed_link_prop;
+	int len;
+
+	/* New binding */
+	fixed_link_node = of_get_child_by_name(np, "fixed-link");
+	if (fixed_link_node) {
+		status.link = 1;
+		status.duplex = of_property_read_bool(np, "full-duplex");
+		if (of_property_read_u32(fixed_link_node, "speed", &status.speed))
+			return -EINVAL;
+		status.pause = of_property_read_bool(np, "pause");
+		status.asym_pause = of_property_read_bool(np, "asym-pause");
+		of_node_put(fixed_link_node);
+		return fixed_phy_register(PHY_POLL, &status, np);
+	}
+
+	/* Old binding */
+	fixed_link_prop = of_get_property(np, "fixed-link", &len);
+	if (fixed_link_prop && len == (5 * sizeof(__be32))) {
+		status.link = 1;
+		status.duplex = be32_to_cpu(fixed_link_prop[1]);
+		status.speed = be32_to_cpu(fixed_link_prop[2]);
+		status.pause = be32_to_cpu(fixed_link_prop[3]);
+		status.asym_pause = be32_to_cpu(fixed_link_prop[4]);
+		return fixed_phy_register(PHY_POLL, &status, np);
+	}
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL(of_phy_register_fixed_link);
+#endif
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 881a7c3571f4..0aa367e316cb 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -72,4 +72,19 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np)
 }
 #endif /* CONFIG_OF */
 
+#if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY)
+extern int of_phy_register_fixed_link(struct device_node *np);
+extern bool of_phy_is_fixed_link(struct device_node *np);
+#else
+static inline int of_phy_register_fixed_link(struct device_node *np)
+{
+	return -ENOSYS;
+}
+static inline bool of_phy_is_fixed_link(struct device_node *np)
+{
+	return false;
+}
+#endif
+
+
 #endif /* __LINUX_OF_MDIO_H */
-- 
cgit v1.2.3-71-gd317


From dc20759f281c0f9e6c4fca8be251deca2954862a Mon Sep 17 00:00:00 2001
From: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Date: Fri, 16 May 2014 17:46:35 +0200
Subject: ieee802154: add types for link-layer security

The added structures match 802.15.4-2011 link-layer security PIBs as
closely as is reasonable. Some lists required by the standard were
modeled as bitmaps (frame_types and command_frame_ids in *llsec_key,
802.15.4-2011 7.5/Table 61), since using lists for those seems a bit
excessive and not particularly useful. The DeviceDescriptorHandleList
was inverted and is here a per-device list, since operations on this
list are likely to have both a key and a device at hand, and per-device
lists of keys are shorter than per-key lists of devices.

Signed-off-by: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ieee802154_netdev.h | 95 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

(limited to 'include')

diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index bc9a7475e57e..6f8f9c2f6037 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -242,6 +242,88 @@ static inline struct ieee802154_mac_cb *mac_cb_init(struct sk_buff *skb)
 	return mac_cb(skb);
 }
 
+#define IEEE802154_LLSEC_KEY_SIZE 16
+
+struct ieee802154_llsec_key_id {
+	u8 mode;
+	u8 id;
+	union {
+		struct ieee802154_addr device_addr;
+		__le32 short_source;
+		__le64 extended_source;
+	};
+};
+
+struct ieee802154_llsec_key {
+	u8 frame_types;
+	u32 cmd_frame_ids;
+	u8 key[IEEE802154_LLSEC_KEY_SIZE];
+};
+
+struct ieee802154_llsec_key_entry {
+	struct list_head list;
+
+	struct ieee802154_llsec_key_id id;
+	struct ieee802154_llsec_key *key;
+};
+
+struct ieee802154_llsec_device_key {
+	struct list_head list;
+
+	struct ieee802154_llsec_key_id key_id;
+	u32 frame_counter;
+};
+
+enum {
+	IEEE802154_LLSEC_DEVKEY_IGNORE,
+	IEEE802154_LLSEC_DEVKEY_RESTRICT,
+
+	__IEEE802154_LLSEC_DEVKEY_MAX,
+};
+
+struct ieee802154_llsec_device {
+	struct list_head list;
+
+	__le16 pan_id;
+	__le16 short_addr;
+	__le64 hwaddr;
+	u32 frame_counter;
+	bool seclevel_exempt;
+
+	u8 key_mode;
+	struct list_head keys;
+};
+
+struct ieee802154_llsec_seclevel {
+	struct list_head list;
+
+	u8 frame_type;
+	u8 cmd_frame_id;
+	bool device_override;
+	u32 sec_levels;
+};
+
+struct ieee802154_llsec_params {
+	bool enabled;
+
+	__be32 frame_counter;
+	u8 out_level;
+	struct ieee802154_llsec_key_id out_key;
+
+	__le64 default_key_source;
+
+	__le16 pan_id;
+	__le64 hwaddr;
+	__le64 coord_hwaddr;
+	__le16 coord_shortaddr;
+};
+
+struct ieee802154_llsec_table {
+	struct list_head keys;
+	struct list_head devices;
+	struct list_head security_levels;
+};
+
 #define IEEE802154_MAC_SCAN_ED		0
 #define IEEE802154_MAC_SCAN_ACTIVE	1
 #define IEEE802154_MAC_SCAN_PASSIVE	2
@@ -260,6 +342,19 @@ struct ieee802154_mac_params {
 };
 
 struct wpan_phy;
+
+enum {
+	IEEE802154_LLSEC_PARAM_ENABLED = 1 << 0,
+	IEEE802154_LLSEC_PARAM_FRAME_COUNTER = 1 << 1,
+	IEEE802154_LLSEC_PARAM_OUT_LEVEL = 1 << 2,
+	IEEE802154_LLSEC_PARAM_OUT_KEY = 1 << 3,
+	IEEE802154_LLSEC_PARAM_KEY_SOURCE = 1 << 4,
+	IEEE802154_LLSEC_PARAM_PAN_ID = 1 << 5,
+	IEEE802154_LLSEC_PARAM_HWADDR = 1 << 6,
+	IEEE802154_LLSEC_PARAM_COORD_HWADDR = 1 << 7,
+	IEEE802154_LLSEC_PARAM_COORD_SHORTADDR = 1 << 8,
+};
+
 /*
  * This should be located at net_device->ml_priv
  *
-- 
cgit v1.2.3-71-gd317


From f30be4d53cada48598dab0983866ae4b16af46dc Mon Sep 17 00:00:00 2001
From: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Date: Fri, 16 May 2014 17:46:40 +0200
Subject: mac802154: integrate llsec with wpan devices

Signed-off-by: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ieee802154_netdev.h |   3 +
 net/mac802154/mac802154.h       |  10 ++++
 net/mac802154/wpan.c            | 118 ++++++++++++++++++++++++++++++----------
 3 files changed, 103 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index 6f8f9c2f6037..000c8552d5de 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -225,6 +225,9 @@ struct ieee802154_mac_cb {
 	u8 type;
 	bool ackreq;
 	bool secen;
+	bool secen_override;
+	u8 seclevel;
+	bool seclevel_override;
 	struct ieee802154_addr source;
 	struct ieee802154_addr dest;
 };
diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index e05f66e2eda3..a8d7cbc701a0 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -23,9 +23,12 @@
 #ifndef MAC802154_H
 #define MAC802154_H
 
+#include <linux/mutex.h>
 #include <net/mac802154.h>
 #include <net/ieee802154_netdev.h>
 
+#include "llsec.h"
+
 /* mac802154 device private data */
 struct mac802154_priv {
 	struct ieee802154_dev hw;
@@ -91,6 +94,13 @@ struct mac802154_sub_if_data {
 	u8 bsn;
 	/* MAC DSN field */
 	u8 dsn;
+
+	/* protects sec from concurrent access by netlink. access by
+	 * encrypt/decrypt/header_create safe without additional protection.
+	 */
+	struct mutex sec_mtx;
+
+	struct mac802154_llsec sec;
 };
 
 #define mac802154_to_priv(_hw)	container_of(_hw, struct mac802154_priv, hw)
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index ed105774c15d..00729ca1e30a 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -183,6 +183,38 @@ out:
 	return rc;
 }
 
+static int mac802154_set_header_security(struct mac802154_sub_if_data *priv,
+					 struct ieee802154_hdr *hdr,
+					 const struct ieee802154_mac_cb *cb)
+{
+	struct ieee802154_llsec_params params;
+	u8 level;
+
+	mac802154_llsec_get_params(&priv->sec, &params);
+
+	if (!params.enabled && cb->secen_override && cb->secen)
+		return -EINVAL;
+	if (!params.enabled ||
+	    (cb->secen_override && !cb->secen) ||
+	    !params.out_level)
+		return 0;
+	if (cb->seclevel_override && !cb->seclevel)
+		return -EINVAL;
+
+	level = cb->seclevel_override ? cb->seclevel : params.out_level;
+
+	hdr->fc.security_enabled = 1;
+	hdr->sec.level = level;
+	hdr->sec.key_id_mode = params.out_key.mode;
+	if (params.out_key.mode == IEEE802154_SCF_KEY_SHORT_INDEX)
+		hdr->sec.short_src = params.out_key.short_source;
+	else if (params.out_key.mode == IEEE802154_SCF_KEY_HW_INDEX)
+		hdr->sec.extended_src = params.out_key.extended_source;
+	hdr->sec.key_id = params.out_key.id;
+
+	return 0;
+}
+
 static int mac802154_header_create(struct sk_buff *skb,
 				   struct net_device *dev,
 				   unsigned short type,
@@ -204,6 +236,9 @@ static int mac802154_header_create(struct sk_buff *skb,
 	hdr.fc.ack_request = cb->ackreq;
 	hdr.seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
 
+	if (mac802154_set_header_security(priv, &hdr, cb) < 0)
+		return -EINVAL;
+
 	if (!saddr) {
 		spin_lock_bh(&priv->mib_lock);
 
@@ -259,6 +294,7 @@ mac802154_wpan_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mac802154_sub_if_data *priv;
 	u8 chan, page;
+	int rc;
 
 	priv = netdev_priv(dev);
 
@@ -274,6 +310,13 @@ mac802154_wpan_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_OK;
 	}
 
+	rc = mac802154_llsec_encrypt(&priv->sec, skb);
+	if (rc) {
+		pr_warn("encryption failed: %i\n", rc);
+		kfree_skb(skb);
+		return NETDEV_TX_OK;
+	}
+
 	skb->skb_iif = dev->ifindex;
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
@@ -294,6 +337,15 @@ static const struct net_device_ops mac802154_wpan_ops = {
 	.ndo_set_mac_address	= mac802154_wpan_mac_addr,
 };
 
+static void mac802154_wpan_free(struct net_device *dev)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+	mac802154_llsec_destroy(&priv->sec);
+
+	free_netdev(dev);
+}
+
 void mac802154_wpan_setup(struct net_device *dev)
 {
 	struct mac802154_sub_if_data *priv;
@@ -303,14 +355,14 @@ void mac802154_wpan_setup(struct net_device *dev)
 
 	dev->hard_header_len	= MAC802154_FRAME_HARD_HEADER_LEN;
 	dev->header_ops		= &mac802154_header_ops;
-	dev->needed_tailroom	= 2; /* FCS */
+	dev->needed_tailroom	= 2 + 16; /* FCS + MIC */
 	dev->mtu		= IEEE802154_MTU;
 	dev->tx_queue_len	= 300;
 	dev->type		= ARPHRD_IEEE802154;
 	dev->flags		= IFF_NOARP | IFF_BROADCAST;
 	dev->watchdog_timeo	= 0;
 
-	dev->destructor		= free_netdev;
+	dev->destructor		= mac802154_wpan_free;
 	dev->netdev_ops		= &mac802154_wpan_ops;
 	dev->ml_priv		= &mac802154_mlme_wpan;
 
@@ -321,6 +373,7 @@ void mac802154_wpan_setup(struct net_device *dev)
 	priv->page = 0;
 
 	spin_lock_init(&priv->mib_lock);
+	mutex_init(&priv->sec_mtx);
 
 	get_random_bytes(&priv->bsn, 1);
 	get_random_bytes(&priv->dsn, 1);
@@ -333,6 +386,8 @@ void mac802154_wpan_setup(struct net_device *dev)
 
 	priv->pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST);
 	priv->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
+
+	mac802154_llsec_init(&priv->sec);
 }
 
 static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb)
@@ -341,9 +396,11 @@ static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb)
 }
 
 static int
-mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb)
+mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb,
+		      const struct ieee802154_hdr *hdr)
 {
 	__le16 span, sshort;
+	int rc;
 
 	pr_debug("getting packet via slave interface %s\n", sdata->dev->name);
 
@@ -390,6 +447,12 @@ mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb)
 
 	skb->dev = sdata->dev;
 
+	rc = mac802154_llsec_decrypt(&sdata->sec, skb);
+	if (rc) {
+		pr_debug("decryption failed: %i\n", rc);
+		return NET_RX_DROP;
+	}
+
 	sdata->dev->stats.rx_packets++;
 	sdata->dev->stats.rx_bytes += skb->len;
 
@@ -421,60 +484,58 @@ static void mac802154_print_addr(const char *name,
 	}
 }
 
-static int mac802154_parse_frame_start(struct sk_buff *skb)
+static int mac802154_parse_frame_start(struct sk_buff *skb,
+				       struct ieee802154_hdr *hdr)
 {
 	int hlen;
-	struct ieee802154_hdr hdr;
 	struct ieee802154_mac_cb *cb = mac_cb_init(skb);
 
-	hlen = ieee802154_hdr_pull(skb, &hdr);
+	hlen = ieee802154_hdr_pull(skb, hdr);
 	if (hlen < 0)
 		return -EINVAL;
 
 	skb->mac_len = hlen;
 
-	pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr.fc),
-		 hdr.seq);
+	pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr->fc),
+		 hdr->seq);
 
-	cb->type = hdr.fc.type;
-	cb->ackreq = hdr.fc.ack_request;
-	cb->secen = hdr.fc.security_enabled;
+	cb->type = hdr->fc.type;
+	cb->ackreq = hdr->fc.ack_request;
+	cb->secen = hdr->fc.security_enabled;
 
-	mac802154_print_addr("destination", &hdr.dest);
-	mac802154_print_addr("source", &hdr.source);
+	mac802154_print_addr("destination", &hdr->dest);
+	mac802154_print_addr("source", &hdr->source);
 
-	cb->source = hdr.source;
-	cb->dest = hdr.dest;
+	cb->source = hdr->source;
+	cb->dest = hdr->dest;
 
-	if (hdr.fc.security_enabled) {
+	if (hdr->fc.security_enabled) {
 		u64 key;
 
-		pr_debug("seclevel %i\n", hdr.sec.level);
+		pr_debug("seclevel %i\n", hdr->sec.level);
 
-		switch (hdr.sec.key_id_mode) {
+		switch (hdr->sec.key_id_mode) {
 		case IEEE802154_SCF_KEY_IMPLICIT:
 			pr_debug("implicit key\n");
 			break;
 
 		case IEEE802154_SCF_KEY_INDEX:
-			pr_debug("key %02x\n", hdr.sec.key_id);
+			pr_debug("key %02x\n", hdr->sec.key_id);
 			break;
 
 		case IEEE802154_SCF_KEY_SHORT_INDEX:
 			pr_debug("key %04x:%04x %02x\n",
-				 le32_to_cpu(hdr.sec.short_src) >> 16,
-				 le32_to_cpu(hdr.sec.short_src) & 0xffff,
-				 hdr.sec.key_id);
+				 le32_to_cpu(hdr->sec.short_src) >> 16,
+				 le32_to_cpu(hdr->sec.short_src) & 0xffff,
+				 hdr->sec.key_id);
 			break;
 
 		case IEEE802154_SCF_KEY_HW_INDEX:
-			key = swab64((__force u64) hdr.sec.extended_src);
+			key = swab64((__force u64) hdr->sec.extended_src);
 			pr_debug("key source %8phC %02x\n", &key,
-				 hdr.sec.key_id);
+				 hdr->sec.key_id);
 			break;
 		}
-
-		return -EINVAL;
 	}
 
 	return 0;
@@ -485,8 +546,9 @@ void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb)
 	int ret;
 	struct sk_buff *sskb;
 	struct mac802154_sub_if_data *sdata;
+	struct ieee802154_hdr hdr;
 
-	ret = mac802154_parse_frame_start(skb);
+	ret = mac802154_parse_frame_start(skb, &hdr);
 	if (ret) {
 		pr_debug("got invalid frame\n");
 		return;
@@ -499,7 +561,7 @@ void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb)
 
 		sskb = skb_clone(skb, GFP_ATOMIC);
 		if (sskb)
-			mac802154_subif_frame(sdata, sskb);
+			mac802154_subif_frame(sdata, sskb, &hdr);
 	}
 	rcu_read_unlock();
 }
-- 
cgit v1.2.3-71-gd317


From af9eed5bbf0fb4e398081e79a707545dcca5ebda Mon Sep 17 00:00:00 2001
From: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Date: Fri, 16 May 2014 17:46:41 +0200
Subject: ieee802154: add dgram sockopts for security control

Allow datagram sockets to override the security settings of the device
they send from on a per-socket basis. Requires CAP_NET_ADMIN or
CAP_NET_RAW, since raw sockets can send arbitrary packets anyway.

Signed-off-by: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_ieee802154.h | 10 ++++++-
 net/ieee802154/dgram.c      | 66 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/af_ieee802154.h b/include/net/af_ieee802154.h
index f79ae2aa76d6..085940f7eeec 100644
--- a/include/net/af_ieee802154.h
+++ b/include/net/af_ieee802154.h
@@ -57,6 +57,14 @@ struct sockaddr_ieee802154 {
 /* get/setsockopt */
 #define SOL_IEEE802154	0
 
-#define WPAN_WANTACK	0
+#define WPAN_WANTACK		0
+#define WPAN_SECURITY		1
+#define WPAN_SECURITY_LEVEL	2
+
+#define WPAN_SECURITY_DEFAULT	0
+#define WPAN_SECURITY_OFF	1
+#define WPAN_SECURITY_ON	2
+
+#define WPAN_SECURITY_LEVEL_DEFAULT	(-1)
 
 #endif
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 76c77256a56e..4f0ed8780194 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -21,6 +21,7 @@
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
  */
 
+#include <linux/capability.h>
 #include <linux/net.h>
 #include <linux/module.h>
 #include <linux/if_arp.h>
@@ -47,6 +48,10 @@ struct dgram_sock {
 	unsigned int bound:1;
 	unsigned int connected:1;
 	unsigned int want_ack:1;
+	unsigned int secen:1;
+	unsigned int secen_override:1;
+	unsigned int seclevel:3;
+	unsigned int seclevel_override:1;
 };
 
 static inline struct dgram_sock *dgram_sk(const struct sock *sk)
@@ -264,6 +269,11 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
 		dst_addr = ro->dst_addr;
 	}
 
+	cb->secen = ro->secen;
+	cb->secen_override = ro->secen_override;
+	cb->seclevel = ro->seclevel;
+	cb->seclevel_override = ro->seclevel_override;
+
 	err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr,
 			      ro->bound ? &ro->src_addr : NULL, size);
 	if (err < 0)
@@ -427,6 +437,20 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname,
 	case WPAN_WANTACK:
 		val = ro->want_ack;
 		break;
+	case WPAN_SECURITY:
+		if (!ro->secen_override)
+			val = WPAN_SECURITY_DEFAULT;
+		else if (ro->secen)
+			val = WPAN_SECURITY_ON;
+		else
+			val = WPAN_SECURITY_OFF;
+		break;
+	case WPAN_SECURITY_LEVEL:
+		if (!ro->seclevel_override)
+			val = WPAN_SECURITY_LEVEL_DEFAULT;
+		else
+			val = ro->seclevel;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -442,6 +466,7 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname,
 		    char __user *optval, unsigned int optlen)
 {
 	struct dgram_sock *ro = dgram_sk(sk);
+	struct net *net = sock_net(sk);
 	int val;
 	int err = 0;
 
@@ -457,6 +482,47 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname,
 	case WPAN_WANTACK:
 		ro->want_ack = !!val;
 		break;
+	case WPAN_SECURITY:
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
+			err = -EPERM;
+			break;
+		}
+
+		switch (val) {
+		case WPAN_SECURITY_DEFAULT:
+			ro->secen_override = 0;
+			break;
+		case WPAN_SECURITY_ON:
+			ro->secen_override = 1;
+			ro->secen = 1;
+			break;
+		case WPAN_SECURITY_OFF:
+			ro->secen_override = 1;
+			ro->secen = 0;
+			break;
+		default:
+			err = -EINVAL;
+			break;
+		}
+		break;
+	case WPAN_SECURITY_LEVEL:
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
+		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
+			err = -EPERM;
+			break;
+		}
+
+		if (val < WPAN_SECURITY_LEVEL_DEFAULT ||
+		    val > IEEE802154_SCF_SECLEVEL_ENC_MIC128) {
+			err = -EINVAL;
+		} else if (val == WPAN_SECURITY_LEVEL_DEFAULT) {
+			ro->seclevel_override = 0;
+		} else {
+			ro->seclevel_override = 1;
+			ro->seclevel = val;
+		}
+		break;
 	default:
 		err = -ENOPROTOOPT;
 		break;
-- 
cgit v1.2.3-71-gd317


From 29e023746a672e4ff702ca9dc63a06145fd8f4b0 Mon Sep 17 00:00:00 2001
From: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Date: Fri, 16 May 2014 17:46:42 +0200
Subject: mac802154: add llsec configuration functions

Signed-off-by: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ieee802154_netdev.h |  36 ++++++++
 net/mac802154/mac802154.h       |  33 +++++++
 net/mac802154/mac_cmd.c         |  18 ++++
 net/mac802154/mib.c             | 187 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 274 insertions(+)

(limited to 'include')

diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index 000c8552d5de..eb9f850a51b6 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -358,6 +358,40 @@ enum {
 	IEEE802154_LLSEC_PARAM_COORD_SHORTADDR = 1 << 8,
 };
 
+struct ieee802154_llsec_ops {
+	int (*get_params)(struct net_device *dev,
+			  struct ieee802154_llsec_params *params);
+	int (*set_params)(struct net_device *dev,
+			  const struct ieee802154_llsec_params *params,
+			  int changed);
+
+	int (*add_key)(struct net_device *dev,
+		       const struct ieee802154_llsec_key_id *id,
+		       const struct ieee802154_llsec_key *key);
+	int (*del_key)(struct net_device *dev,
+		       const struct ieee802154_llsec_key_id *id);
+
+	int (*add_dev)(struct net_device *dev,
+		       const struct ieee802154_llsec_device *llsec_dev);
+	int (*del_dev)(struct net_device *dev, __le64 dev_addr);
+
+	int (*add_devkey)(struct net_device *dev,
+			  __le64 device_addr,
+			  const struct ieee802154_llsec_device_key *key);
+	int (*del_devkey)(struct net_device *dev,
+			  __le64 device_addr,
+			  const struct ieee802154_llsec_device_key *key);
+
+	int (*add_seclevel)(struct net_device *dev,
+			    const struct ieee802154_llsec_seclevel *sl);
+	int (*del_seclevel)(struct net_device *dev,
+			    const struct ieee802154_llsec_seclevel *sl);
+
+	void (*lock_table)(struct net_device *dev);
+	void (*get_table)(struct net_device *dev,
+			  struct ieee802154_llsec_table **t);
+	void (*unlock_table)(struct net_device *dev);
+};
 /*
  * This should be located at net_device->ml_priv
  *
@@ -388,6 +422,8 @@ struct ieee802154_mlme_ops {
 	void (*get_mac_params)(struct net_device *dev,
 			       struct ieee802154_mac_params *params);
 
+	struct ieee802154_llsec_ops *llsec;
+
 	/* The fields below are required. */
 
 	struct wpan_phy *(*get_phy)(const struct net_device *dev);
diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index a8d7cbc701a0..762a6f849c6b 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -136,4 +136,37 @@ int mac802154_set_mac_params(struct net_device *dev,
 void mac802154_get_mac_params(struct net_device *dev,
 			      struct ieee802154_mac_params *params);
 
+int mac802154_get_params(struct net_device *dev,
+			 struct ieee802154_llsec_params *params);
+int mac802154_set_params(struct net_device *dev,
+			 const struct ieee802154_llsec_params *params,
+			 int changed);
+
+int mac802154_add_key(struct net_device *dev,
+		      const struct ieee802154_llsec_key_id *id,
+		      const struct ieee802154_llsec_key *key);
+int mac802154_del_key(struct net_device *dev,
+		      const struct ieee802154_llsec_key_id *id);
+
+int mac802154_add_dev(struct net_device *dev,
+		      const struct ieee802154_llsec_device *llsec_dev);
+int mac802154_del_dev(struct net_device *dev, __le64 dev_addr);
+
+int mac802154_add_devkey(struct net_device *dev,
+			 __le64 device_addr,
+			 const struct ieee802154_llsec_device_key *key);
+int mac802154_del_devkey(struct net_device *dev,
+			 __le64 device_addr,
+			 const struct ieee802154_llsec_device_key *key);
+
+int mac802154_add_seclevel(struct net_device *dev,
+			   const struct ieee802154_llsec_seclevel *sl);
+int mac802154_del_seclevel(struct net_device *dev,
+			   const struct ieee802154_llsec_seclevel *sl);
+
+void mac802154_lock_table(struct net_device *dev);
+void mac802154_get_table(struct net_device *dev,
+			 struct ieee802154_llsec_table **t);
+void mac802154_unlock_table(struct net_device *dev);
+
 #endif /* MAC802154_H */
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index d40c0928bc62..afb4e2cbc00a 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -64,6 +64,22 @@ static struct wpan_phy *mac802154_get_phy(const struct net_device *dev)
 	return to_phy(get_device(&priv->hw->phy->dev));
 }
 
+static struct ieee802154_llsec_ops mac802154_llsec_ops = {
+	.get_params = mac802154_get_params,
+	.set_params = mac802154_set_params,
+	.add_key = mac802154_add_key,
+	.del_key = mac802154_del_key,
+	.add_dev = mac802154_add_dev,
+	.del_dev = mac802154_del_dev,
+	.add_devkey = mac802154_add_devkey,
+	.del_devkey = mac802154_del_devkey,
+	.add_seclevel = mac802154_add_seclevel,
+	.del_seclevel = mac802154_del_seclevel,
+	.lock_table = mac802154_lock_table,
+	.get_table = mac802154_get_table,
+	.unlock_table = mac802154_unlock_table,
+};
+
 struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced = {
 	.get_phy = mac802154_get_phy,
 };
@@ -75,6 +91,8 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = {
 	.get_short_addr = mac802154_dev_get_short_addr,
 	.get_dsn = mac802154_dev_get_dsn,
 
+	.llsec = &mac802154_llsec_ops,
+
 	.set_mac_params = mac802154_set_mac_params,
 	.get_mac_params = mac802154_get_mac_params,
 };
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index f0991f2344d4..15aa2f2b03a7 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -213,3 +213,190 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
 	} else
 		mutex_unlock(&priv->hw->phy->pib_lock);
 }
+
+
+int mac802154_get_params(struct net_device *dev,
+			 struct ieee802154_llsec_params *params)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_get_params(&priv->sec, params);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+int mac802154_set_params(struct net_device *dev,
+			 const struct ieee802154_llsec_params *params,
+			 int changed)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_set_params(&priv->sec, params, changed);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+
+int mac802154_add_key(struct net_device *dev,
+		      const struct ieee802154_llsec_key_id *id,
+		      const struct ieee802154_llsec_key *key)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_key_add(&priv->sec, id, key);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+int mac802154_del_key(struct net_device *dev,
+		      const struct ieee802154_llsec_key_id *id)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_key_del(&priv->sec, id);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+
+int mac802154_add_dev(struct net_device *dev,
+		      const struct ieee802154_llsec_device *llsec_dev)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_dev_add(&priv->sec, llsec_dev);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+int mac802154_del_dev(struct net_device *dev, __le64 dev_addr)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_dev_del(&priv->sec, dev_addr);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+
+int mac802154_add_devkey(struct net_device *dev,
+			 __le64 device_addr,
+			 const struct ieee802154_llsec_device_key *key)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_devkey_add(&priv->sec, device_addr, key);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+int mac802154_del_devkey(struct net_device *dev,
+			 __le64 device_addr,
+			 const struct ieee802154_llsec_device_key *key)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_devkey_del(&priv->sec, device_addr, key);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+
+int mac802154_add_seclevel(struct net_device *dev,
+			   const struct ieee802154_llsec_seclevel *sl)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_seclevel_add(&priv->sec, sl);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+int mac802154_del_seclevel(struct net_device *dev,
+			   const struct ieee802154_llsec_seclevel *sl)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int res;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+	res = mac802154_llsec_seclevel_del(&priv->sec, sl);
+	mutex_unlock(&priv->sec_mtx);
+
+	return res;
+}
+
+
+void mac802154_lock_table(struct net_device *dev)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_lock(&priv->sec_mtx);
+}
+
+void mac802154_get_table(struct net_device *dev,
+			 struct ieee802154_llsec_table **t)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	*t = &priv->sec.table;
+}
+
+void mac802154_unlock_table(struct net_device *dev)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	mutex_unlock(&priv->sec_mtx);
+}
-- 
cgit v1.2.3-71-gd317


From 3e9c156e2c210ab67b12b1b692983a6b97c19d3f Mon Sep 17 00:00:00 2001
From: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Date: Fri, 16 May 2014 17:46:44 +0200
Subject: ieee802154: add netlink interfaces for llsec

This patch adds user-visible interfaces for the llsec infrastructure.
For the added methods, the only major difference between all add/remove
implementation lies in how the specific object is parsed, and for dump
requests, how objects are written into netlink messages.

To save on boilerplate code, table dumps are routed through a helper
function that handles netlink dump state, leaving the actual dumping
code to care only about iterating over the table to be dumped and
filling netlink messages. For add/remove methods, the boilerplate
required to work is not quite as large, but still enough to also move
into a local helper.

Signed-off-by: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/nl802154.h    |  31 ++
 net/ieee802154/ieee802154.h |  19 ++
 net/ieee802154/netlink.c    |  20 ++
 net/ieee802154/nl-mac.c     | 807 ++++++++++++++++++++++++++++++++++++++++++++
 net/ieee802154/nl_policy.c  |  16 +
 5 files changed, 893 insertions(+)

(limited to 'include')

diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h
index c8d7f3965fff..20163b9a0eae 100644
--- a/include/linux/nl802154.h
+++ b/include/linux/nl802154.h
@@ -80,6 +80,22 @@ enum {
 
 	IEEE802154_ATTR_FRAME_RETRIES,
 
+	IEEE802154_ATTR_LLSEC_ENABLED,
+	IEEE802154_ATTR_LLSEC_SECLEVEL,
+	IEEE802154_ATTR_LLSEC_KEY_MODE,
+	IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT,
+	IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED,
+	IEEE802154_ATTR_LLSEC_KEY_ID,
+	IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+	IEEE802154_ATTR_LLSEC_KEY_BYTES,
+	IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES,
+	IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS,
+	IEEE802154_ATTR_LLSEC_FRAME_TYPE,
+	IEEE802154_ATTR_LLSEC_CMD_FRAME_ID,
+	IEEE802154_ATTR_LLSEC_SECLEVELS,
+	IEEE802154_ATTR_LLSEC_DEV_OVERRIDE,
+	IEEE802154_ATTR_LLSEC_DEV_KEY_MODE,
+
 	__IEEE802154_ATTR_MAX,
 };
 
@@ -134,6 +150,21 @@ enum {
 
 	IEEE802154_SET_MACPARAMS,
 
+	IEEE802154_LLSEC_GETPARAMS,
+	IEEE802154_LLSEC_SETPARAMS,
+	IEEE802154_LLSEC_LIST_KEY,
+	IEEE802154_LLSEC_ADD_KEY,
+	IEEE802154_LLSEC_DEL_KEY,
+	IEEE802154_LLSEC_LIST_DEV,
+	IEEE802154_LLSEC_ADD_DEV,
+	IEEE802154_LLSEC_DEL_DEV,
+	IEEE802154_LLSEC_LIST_DEVKEY,
+	IEEE802154_LLSEC_ADD_DEVKEY,
+	IEEE802154_LLSEC_DEL_DEVKEY,
+	IEEE802154_LLSEC_LIST_SECLEVEL,
+	IEEE802154_LLSEC_ADD_SECLEVEL,
+	IEEE802154_LLSEC_DEL_SECLEVEL,
+
 	__IEEE802154_CMD_MAX,
 };
 
diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h
index 6693a5cf01ce..8b83a231299e 100644
--- a/net/ieee802154/ieee802154.h
+++ b/net/ieee802154/ieee802154.h
@@ -68,4 +68,23 @@ int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info);
 int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb);
 int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info);
 
+int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_keys(struct sk_buff *skb,
+			       struct netlink_callback *cb);
+int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_devs(struct sk_buff *skb,
+			       struct netlink_callback *cb);
+int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_devkeys(struct sk_buff *skb,
+				  struct netlink_callback *cb);
+int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_seclevels(struct sk_buff *skb,
+				    struct netlink_callback *cb);
+
 #endif
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 04b20589d97a..26efcf4fd2ff 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -124,6 +124,26 @@ static const struct genl_ops ieee8021154_ops[] = {
 	IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface,
 			ieee802154_dump_iface),
 	IEEE802154_OP(IEEE802154_SET_MACPARAMS, ieee802154_set_macparams),
+	IEEE802154_OP(IEEE802154_LLSEC_GETPARAMS, ieee802154_llsec_getparams),
+	IEEE802154_OP(IEEE802154_LLSEC_SETPARAMS, ieee802154_llsec_setparams),
+	IEEE802154_DUMP(IEEE802154_LLSEC_LIST_KEY, NULL,
+			ieee802154_llsec_dump_keys),
+	IEEE802154_OP(IEEE802154_LLSEC_ADD_KEY, ieee802154_llsec_add_key),
+	IEEE802154_OP(IEEE802154_LLSEC_DEL_KEY, ieee802154_llsec_del_key),
+	IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEV, NULL,
+			ieee802154_llsec_dump_devs),
+	IEEE802154_OP(IEEE802154_LLSEC_ADD_DEV, ieee802154_llsec_add_dev),
+	IEEE802154_OP(IEEE802154_LLSEC_DEL_DEV, ieee802154_llsec_del_dev),
+	IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEVKEY, NULL,
+			ieee802154_llsec_dump_devkeys),
+	IEEE802154_OP(IEEE802154_LLSEC_ADD_DEVKEY, ieee802154_llsec_add_devkey),
+	IEEE802154_OP(IEEE802154_LLSEC_DEL_DEVKEY, ieee802154_llsec_del_devkey),
+	IEEE802154_DUMP(IEEE802154_LLSEC_LIST_SECLEVEL, NULL,
+			ieee802154_llsec_dump_seclevels),
+	IEEE802154_OP(IEEE802154_LLSEC_ADD_SECLEVEL,
+		      ieee802154_llsec_add_seclevel),
+	IEEE802154_OP(IEEE802154_LLSEC_DEL_SECLEVEL,
+		      ieee802154_llsec_del_seclevel),
 };
 
 static const struct genl_multicast_group ieee802154_mcgrps[] = {
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 5d285498c0f6..5617b4c6d6d5 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -715,3 +715,810 @@ out:
 	dev_put(dev);
 	return rc;
 }
+
+
+
+static int
+ieee802154_llsec_parse_key_id(struct genl_info *info,
+			      struct ieee802154_llsec_key_id *desc)
+{
+	memset(desc, 0, sizeof(*desc));
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE])
+		return -EINVAL;
+
+	desc->mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]);
+
+	if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) {
+		if (!info->attrs[IEEE802154_ATTR_PAN_ID] &&
+		    !(info->attrs[IEEE802154_ATTR_SHORT_ADDR] ||
+		      info->attrs[IEEE802154_ATTR_HW_ADDR]))
+			return -EINVAL;
+
+		desc->device_addr.pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]);
+
+		if (info->attrs[IEEE802154_ATTR_SHORT_ADDR]) {
+			desc->device_addr.mode = IEEE802154_ADDR_SHORT;
+			desc->device_addr.short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]);
+		} else {
+			desc->device_addr.mode = IEEE802154_ADDR_LONG;
+			desc->device_addr.extended_addr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+		}
+	}
+
+	if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID])
+		return -EINVAL;
+
+	if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT])
+		return -EINVAL;
+
+	if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED])
+		return -EINVAL;
+
+	if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT)
+		desc->id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]);
+
+	switch (desc->mode) {
+	case IEEE802154_SCF_KEY_SHORT_INDEX:
+	{
+		u32 source = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]);
+		desc->short_source = cpu_to_le32(source);
+		break;
+	}
+	case IEEE802154_SCF_KEY_HW_INDEX:
+		desc->extended_source = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]);
+		break;
+	}
+
+	return 0;
+}
+
+static int
+ieee802154_llsec_fill_key_id(struct sk_buff *msg,
+			     const struct ieee802154_llsec_key_id *desc)
+{
+	if (nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_MODE, desc->mode))
+		return -EMSGSIZE;
+
+	if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) {
+		if (nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID,
+				      desc->device_addr.pan_id))
+			return -EMSGSIZE;
+
+		if (desc->device_addr.mode == IEEE802154_ADDR_SHORT &&
+		    nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR,
+				      desc->device_addr.short_addr))
+			return -EMSGSIZE;
+
+		if (desc->device_addr.mode == IEEE802154_ADDR_LONG &&
+		    nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR,
+				   desc->device_addr.extended_addr))
+			return -EMSGSIZE;
+	}
+
+	if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT &&
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_ID, desc->id))
+		return -EMSGSIZE;
+
+	if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX &&
+	    nla_put_u32(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT,
+			le32_to_cpu(desc->short_source)))
+		return -EMSGSIZE;
+
+	if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX &&
+	    nla_put_hwaddr(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED,
+			   desc->extended_source))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	struct net_device *dev = NULL;
+	int rc = -ENOBUFS;
+	struct ieee802154_mlme_ops *ops;
+	void *hdr;
+	struct ieee802154_llsec_params params;
+
+	pr_debug("%s\n", __func__);
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	ops = ieee802154_mlme_ops(dev);
+	if (!ops->llsec)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		goto out_dev;
+
+	hdr = genlmsg_put(msg, 0, info->snd_seq, &nl802154_family, 0,
+		IEEE802154_LLSEC_GETPARAMS);
+	if (!hdr)
+		goto out_free;
+
+	rc = ops->llsec->get_params(dev, &params);
+	if (rc < 0)
+		goto out_free;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_ENABLED, params.enabled) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+			be32_to_cpu(params.frame_counter)) ||
+	    ieee802154_llsec_fill_key_id(msg, &params.out_key))
+		goto out_free;
+
+	dev_put(dev);
+
+	return ieee802154_nl_reply(msg, info);
+out_free:
+	nlmsg_free(msg);
+out_dev:
+	dev_put(dev);
+	return rc;
+}
+
+int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net_device *dev = NULL;
+	int rc = -EINVAL;
+	struct ieee802154_mlme_ops *ops;
+	struct ieee802154_llsec_params params;
+	int changed = 0;
+
+	pr_debug("%s\n", __func__);
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_ENABLED] &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE] &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL])
+		goto out;
+
+	ops = ieee802154_mlme_ops(dev);
+	if (!ops->llsec) {
+		rc = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL] &&
+	    nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) > 7)
+		goto out;
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]) {
+		params.enabled = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]);
+		changed |= IEEE802154_LLSEC_PARAM_ENABLED;
+	}
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) {
+		if (ieee802154_llsec_parse_key_id(info, &params.out_key))
+			goto out;
+
+		changed |= IEEE802154_LLSEC_PARAM_OUT_KEY;
+	}
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) {
+		params.out_level = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]);
+		changed |= IEEE802154_LLSEC_PARAM_OUT_LEVEL;
+	}
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]) {
+		u32 fc = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]);
+
+		params.frame_counter = cpu_to_be32(fc);
+		changed |= IEEE802154_LLSEC_PARAM_FRAME_COUNTER;
+	}
+
+	rc = ops->llsec->set_params(dev, &params, changed);
+
+	dev_put(dev);
+
+	return rc;
+out:
+	dev_put(dev);
+	return rc;
+}
+
+
+
+struct llsec_dump_data {
+	struct sk_buff *skb;
+	int s_idx, s_idx2;
+	int portid;
+	int nlmsg_seq;
+	struct net_device *dev;
+	struct ieee802154_mlme_ops *ops;
+	struct ieee802154_llsec_table *table;
+};
+
+static int
+ieee802154_llsec_dump_table(struct sk_buff *skb, struct netlink_callback *cb,
+			    int (*step)(struct llsec_dump_data*))
+{
+	struct net *net = sock_net(skb->sk);
+	struct net_device *dev;
+	struct llsec_dump_data data;
+	int idx = 0;
+	int first_dev = cb->args[0];
+	int rc;
+
+	for_each_netdev(net, dev) {
+		if (idx < first_dev || dev->type != ARPHRD_IEEE802154)
+			goto skip;
+
+		data.ops = ieee802154_mlme_ops(dev);
+		if (!data.ops->llsec)
+			goto skip;
+
+		data.skb = skb;
+		data.s_idx = cb->args[1];
+		data.s_idx2 = cb->args[2];
+		data.dev = dev;
+		data.portid = NETLINK_CB(cb->skb).portid;
+		data.nlmsg_seq = cb->nlh->nlmsg_seq;
+
+		data.ops->llsec->lock_table(dev);
+		data.ops->llsec->get_table(data.dev, &data.table);
+		rc = step(&data);
+		data.ops->llsec->unlock_table(dev);
+
+		if (rc < 0)
+			break;
+
+skip:
+		idx++;
+	}
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static int
+ieee802154_nl_llsec_change(struct sk_buff *skb, struct genl_info *info,
+			   int (*fn)(struct net_device*, struct genl_info*))
+{
+	struct net_device *dev = NULL;
+	int rc = -EINVAL;
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	if (!ieee802154_mlme_ops(dev)->llsec)
+		rc = -EOPNOTSUPP;
+	else
+		rc = fn(dev, info);
+
+	dev_put(dev);
+	return rc;
+}
+
+
+
+static int
+ieee802154_llsec_parse_key(struct genl_info *info,
+			   struct ieee802154_llsec_key *key)
+{
+	u8 frames;
+	u32 commands[256 / 32];
+
+	memset(key, 0, sizeof(*key));
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES])
+		return -EINVAL;
+
+	frames = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES]);
+	if ((frames & BIT(IEEE802154_FC_TYPE_MAC_CMD)) &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS])
+		return -EINVAL;
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) {
+		nla_memcpy(commands,
+			   info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS],
+			   256 / 8);
+
+		if (commands[0] || commands[1] || commands[2] || commands[3] ||
+		    commands[4] || commands[5] || commands[6] ||
+		    commands[7] >= BIT(IEEE802154_CMD_GTS_REQ + 1))
+			return -EINVAL;
+
+		key->cmd_frame_ids = commands[7];
+	}
+
+	key->frame_types = frames;
+
+	nla_memcpy(key->key, info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES],
+		   IEEE802154_LLSEC_KEY_SIZE);
+
+	return 0;
+}
+
+static int llsec_add_key(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_key key;
+	struct ieee802154_llsec_key_id id;
+
+	if (ieee802154_llsec_parse_key(info, &key) ||
+	    ieee802154_llsec_parse_key_id(info, &id))
+		return -EINVAL;
+
+	return ops->llsec->add_key(dev, &id, &key);
+}
+
+int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info)
+{
+	if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+	    (NLM_F_CREATE | NLM_F_EXCL))
+		return -EINVAL;
+
+	return ieee802154_nl_llsec_change(skb, info, llsec_add_key);
+}
+
+static int llsec_remove_key(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_key_id id;
+
+	if (ieee802154_llsec_parse_key_id(info, &id))
+		return -EINVAL;
+
+	return ops->llsec->del_key(dev, &id);
+}
+
+int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info)
+{
+	return ieee802154_nl_llsec_change(skb, info, llsec_remove_key);
+}
+
+static int
+ieee802154_nl_fill_key(struct sk_buff *msg, u32 portid, u32 seq,
+		       const struct ieee802154_llsec_key_entry *key,
+		       const struct net_device *dev)
+{
+	void *hdr;
+	u32 commands[256 / 32];
+
+	hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+			  IEEE802154_LLSEC_LIST_KEY);
+	if (!hdr)
+		goto out;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    ieee802154_llsec_fill_key_id(msg, &key->id) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES,
+		       key->key->frame_types))
+		goto nla_put_failure;
+
+	if (key->key->frame_types & BIT(IEEE802154_FC_TYPE_MAC_CMD)) {
+		memset(commands, 0, sizeof(commands));
+		commands[7] = key->key->cmd_frame_ids;
+		if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS,
+			    sizeof(commands), commands))
+			goto nla_put_failure;
+	}
+
+	if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_BYTES,
+		    IEEE802154_LLSEC_KEY_SIZE, key->key->key))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+out:
+	return -EMSGSIZE;
+}
+
+static int llsec_iter_keys(struct llsec_dump_data *data)
+{
+	struct ieee802154_llsec_key_entry *pos;
+	int rc = 0, idx = 0;
+
+	list_for_each_entry(pos, &data->table->keys, list) {
+		if (idx++ < data->s_idx)
+			continue;
+
+		if (ieee802154_nl_fill_key(data->skb, data->portid,
+					   data->nlmsg_seq, pos, data->dev)) {
+			rc = -EMSGSIZE;
+			break;
+		}
+
+		data->s_idx++;
+	}
+
+	return rc;
+}
+
+int ieee802154_llsec_dump_keys(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return ieee802154_llsec_dump_table(skb, cb, llsec_iter_keys);
+}
+
+
+
+static int
+llsec_parse_dev(struct genl_info *info,
+		struct ieee802154_llsec_device *dev)
+{
+	memset(dev, 0, sizeof(*dev));
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] ||
+	    !info->attrs[IEEE802154_ATTR_HW_ADDR] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] ||
+	    (!!info->attrs[IEEE802154_ATTR_PAN_ID] !=
+	     !!info->attrs[IEEE802154_ATTR_SHORT_ADDR]))
+		return -EINVAL;
+
+	if (info->attrs[IEEE802154_ATTR_PAN_ID]) {
+		dev->pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]);
+		dev->short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]);
+	} else {
+		dev->short_addr = cpu_to_le16(IEEE802154_ADDR_UNDEF);
+	}
+
+	dev->hwaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+	dev->frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]);
+	dev->seclevel_exempt = !!nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]);
+	dev->key_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE]);
+
+	if (dev->key_mode >= __IEEE802154_LLSEC_DEVKEY_MAX)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int llsec_add_dev(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_device desc;
+
+	if (llsec_parse_dev(info, &desc))
+		return -EINVAL;
+
+	return ops->llsec->add_dev(dev, &desc);
+}
+
+int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info)
+{
+	if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+	    (NLM_F_CREATE | NLM_F_EXCL))
+		return -EINVAL;
+
+	return ieee802154_nl_llsec_change(skb, info, llsec_add_dev);
+}
+
+static int llsec_del_dev(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	__le64 devaddr;
+
+	if (!info->attrs[IEEE802154_ATTR_HW_ADDR])
+		return -EINVAL;
+
+	devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+
+	return ops->llsec->del_dev(dev, devaddr);
+}
+
+int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info)
+{
+	return ieee802154_nl_llsec_change(skb, info, llsec_del_dev);
+}
+
+static int
+ieee802154_nl_fill_dev(struct sk_buff *msg, u32 portid, u32 seq,
+		       const struct ieee802154_llsec_device *desc,
+		       const struct net_device *dev)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+			  IEEE802154_LLSEC_LIST_DEV);
+	if (!hdr)
+		goto out;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, desc->pan_id) ||
+	    nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR,
+			      desc->short_addr) ||
+	    nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+			desc->frame_counter) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE,
+		       desc->seclevel_exempt) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, desc->key_mode))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+out:
+	return -EMSGSIZE;
+}
+
+static int llsec_iter_devs(struct llsec_dump_data *data)
+{
+	struct ieee802154_llsec_device *pos;
+	int rc = 0, idx = 0;
+
+	list_for_each_entry(pos, &data->table->devices, list) {
+		if (idx++ < data->s_idx)
+			continue;
+
+		if (ieee802154_nl_fill_dev(data->skb, data->portid,
+					   data->nlmsg_seq, pos, data->dev)) {
+			rc = -EMSGSIZE;
+			break;
+		}
+
+		data->s_idx++;
+	}
+
+	return rc;
+}
+
+int ieee802154_llsec_dump_devs(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devs);
+}
+
+
+
+static int llsec_add_devkey(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_device_key key;
+	__le64 devaddr;
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] ||
+	    !info->attrs[IEEE802154_ATTR_HW_ADDR] ||
+	    ieee802154_llsec_parse_key_id(info, &key.key_id))
+		return -EINVAL;
+
+	devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+	key.frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]);
+
+	return ops->llsec->add_devkey(dev, devaddr, &key);
+}
+
+int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info)
+{
+	if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+	    (NLM_F_CREATE | NLM_F_EXCL))
+		return -EINVAL;
+
+	return ieee802154_nl_llsec_change(skb, info, llsec_add_devkey);
+}
+
+static int llsec_del_devkey(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_device_key key;
+	__le64 devaddr;
+
+	if (!info->attrs[IEEE802154_ATTR_HW_ADDR] ||
+	    ieee802154_llsec_parse_key_id(info, &key.key_id))
+		return -EINVAL;
+
+	devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+
+	return ops->llsec->del_devkey(dev, devaddr, &key);
+}
+
+int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info)
+{
+	return ieee802154_nl_llsec_change(skb, info, llsec_del_devkey);
+}
+
+static int
+ieee802154_nl_fill_devkey(struct sk_buff *msg, u32 portid, u32 seq,
+			  __le64 devaddr,
+			  const struct ieee802154_llsec_device_key *devkey,
+			  const struct net_device *dev)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+			  IEEE802154_LLSEC_LIST_DEVKEY);
+	if (!hdr)
+		goto out;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+			devkey->frame_counter) ||
+	    ieee802154_llsec_fill_key_id(msg, &devkey->key_id))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+out:
+	return -EMSGSIZE;
+}
+
+static int llsec_iter_devkeys(struct llsec_dump_data *data)
+{
+	struct ieee802154_llsec_device *dpos;
+	struct ieee802154_llsec_device_key *kpos;
+	int rc = 0, idx = 0, idx2;
+
+	list_for_each_entry(dpos, &data->table->devices, list) {
+		if (idx++ < data->s_idx)
+			continue;
+
+		idx2 = 0;
+
+		list_for_each_entry(kpos, &dpos->keys, list) {
+			if (idx2++ < data->s_idx2)
+				continue;
+
+			if (ieee802154_nl_fill_devkey(data->skb, data->portid,
+						      data->nlmsg_seq,
+						      dpos->hwaddr, kpos,
+						      data->dev)) {
+				return rc = -EMSGSIZE;
+			}
+
+			data->s_idx2++;
+		}
+
+		data->s_idx++;
+	}
+
+	return rc;
+}
+
+int ieee802154_llsec_dump_devkeys(struct sk_buff *skb,
+				  struct netlink_callback *cb)
+{
+	return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devkeys);
+}
+
+
+
+static int
+llsec_parse_seclevel(struct genl_info *info,
+		     struct ieee802154_llsec_seclevel *sl)
+{
+	memset(sl, 0, sizeof(*sl));
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE])
+		return -EINVAL;
+
+	sl->frame_type = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE]);
+	if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD) {
+		if (!info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID])
+			return -EINVAL;
+
+		sl->cmd_frame_id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]);
+	}
+
+	sl->sec_levels = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS]);
+	sl->device_override = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]);
+
+	return 0;
+}
+
+static int llsec_add_seclevel(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_seclevel sl;
+
+	if (llsec_parse_seclevel(info, &sl))
+		return -EINVAL;
+
+	return ops->llsec->add_seclevel(dev, &sl);
+}
+
+int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info)
+{
+	if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+	    (NLM_F_CREATE | NLM_F_EXCL))
+		return -EINVAL;
+
+	return ieee802154_nl_llsec_change(skb, info, llsec_add_seclevel);
+}
+
+static int llsec_del_seclevel(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_seclevel sl;
+
+	if (llsec_parse_seclevel(info, &sl))
+		return -EINVAL;
+
+	return ops->llsec->del_seclevel(dev, &sl);
+}
+
+int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info)
+{
+	return ieee802154_nl_llsec_change(skb, info, llsec_del_seclevel);
+}
+
+static int
+ieee802154_nl_fill_seclevel(struct sk_buff *msg, u32 portid, u32 seq,
+			    const struct ieee802154_llsec_seclevel *sl,
+			    const struct net_device *dev)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+			  IEEE802154_LLSEC_LIST_SECLEVEL);
+	if (!hdr)
+		goto out;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_FRAME_TYPE, sl->frame_type) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVELS, sl->sec_levels) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE,
+		       sl->device_override))
+		goto nla_put_failure;
+
+	if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD &&
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_CMD_FRAME_ID,
+		       sl->cmd_frame_id))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+out:
+	return -EMSGSIZE;
+}
+
+static int llsec_iter_seclevels(struct llsec_dump_data *data)
+{
+	struct ieee802154_llsec_seclevel *pos;
+	int rc = 0, idx = 0;
+
+	list_for_each_entry(pos, &data->table->security_levels, list) {
+		if (idx++ < data->s_idx)
+			continue;
+
+		if (ieee802154_nl_fill_seclevel(data->skb, data->portid,
+						data->nlmsg_seq, pos,
+						data->dev)) {
+			rc = -EMSGSIZE;
+			break;
+		}
+
+		data->s_idx++;
+	}
+
+	return rc;
+}
+
+int ieee802154_llsec_dump_seclevels(struct sk_buff *skb,
+				    struct netlink_callback *cb)
+{
+	return ieee802154_llsec_dump_table(skb, cb, llsec_iter_seclevels);
+}
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index fd7be5e45cef..3a703ab88348 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c
@@ -62,5 +62,21 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
 	[IEEE802154_ATTR_CSMA_MAX_BE] = { .type = NLA_U8, },
 
 	[IEEE802154_ATTR_FRAME_RETRIES] = { .type = NLA_S8, },
+
+	[IEEE802154_ATTR_LLSEC_ENABLED] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_SECLEVEL] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_KEY_MODE] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT] = { .type = NLA_U32, },
+	[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED] = { .type = NLA_HW_ADDR, },
+	[IEEE802154_ATTR_LLSEC_KEY_ID] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] = { .type = NLA_U32 },
+	[IEEE802154_ATTR_LLSEC_KEY_BYTES] = { .len = 16, },
+	[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS] = { .len = 258 / 8 },
+	[IEEE802154_ATTR_LLSEC_FRAME_TYPE] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_SECLEVELS] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] = { .type = NLA_U8, },
 };
 
-- 
cgit v1.2.3-71-gd317


From f0f77dc6be76ed1854b08688390e156e4b351ab5 Mon Sep 17 00:00:00 2001
From: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Date: Fri, 16 May 2014 17:46:45 +0200
Subject: ieee802154, mac802154: implement devkey record option

The 802.15.4-2011 standard states that for each key, a list of devices
that use this key shall be kept. Previous patches have only considered
two options:

 * a device "uses" (or may use) all keys, rendering the list useless
 * a device is restricted to a certain set of keys

Another option would be that a device *may* use all keys, but need not
do so, and we are interested in the actual set of keys the device uses.
Recording keys used by any given device may have a noticable performance
impact and might not be needed as often. The common case, in which a
device will not switch keys too often, should still perform well.

Signed-off-by: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ieee802154_netdev.h |  1 +
 net/mac802154/llsec.c           | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

(limited to 'include')

diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index eb9f850a51b6..3b53c8e405e4 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -280,6 +280,7 @@ struct ieee802154_llsec_device_key {
 enum {
 	IEEE802154_LLSEC_DEVKEY_IGNORE,
 	IEEE802154_LLSEC_DEVKEY_RESTRICT,
+	IEEE802154_LLSEC_DEVKEY_RECORD,
 
 	__IEEE802154_LLSEC_DEVKEY_MAX,
 };
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 392653b1b5a3..a83674edaafd 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -920,6 +920,37 @@ llsec_do_decrypt(struct sk_buff *skb, const struct mac802154_llsec *sec,
 		return llsec_do_decrypt_auth(skb, sec, hdr, key, dev_addr);
 }
 
+static int
+llsec_update_devkey_record(struct mac802154_llsec_device *dev,
+			   const struct ieee802154_llsec_key_id *in_key)
+{
+	struct mac802154_llsec_device_key *devkey;
+
+	devkey = llsec_devkey_find(dev, in_key);
+
+	if (!devkey) {
+		struct mac802154_llsec_device_key *next;
+
+		next = kzalloc(sizeof(*devkey), GFP_ATOMIC);
+		if (!next)
+			return -ENOMEM;
+
+		next->devkey.key_id = *in_key;
+
+		spin_lock_bh(&dev->lock);
+
+		devkey = llsec_devkey_find(dev, in_key);
+		if (!devkey)
+			list_add_rcu(&next->devkey.list, &dev->dev.keys);
+		else
+			kfree(next);
+
+		spin_unlock_bh(&dev->lock);
+	}
+
+	return 0;
+}
+
 static int
 llsec_update_devkey_info(struct mac802154_llsec_device *dev,
 			 const struct ieee802154_llsec_key_id *in_key,
@@ -933,6 +964,13 @@ llsec_update_devkey_info(struct mac802154_llsec_device *dev,
 			return -ENOENT;
 	}
 
+	if (dev->dev.key_mode == IEEE802154_LLSEC_DEVKEY_RECORD) {
+		int rc = llsec_update_devkey_record(dev, in_key);
+
+		if (rc < 0)
+			return rc;
+	}
+
 	spin_lock_bh(&dev->lock);
 
 	if ((!devkey && frame_counter < dev->dev.frame_counter) ||
-- 
cgit v1.2.3-71-gd317


From 6c4e548ff36672eeb78f8288a2920d66fa4a6a66 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 16 May 2014 21:48:22 +0200
Subject: net: cdc_ncm: use ethtool to tune coalescing settings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Datagram coalescing is an integral part of the NCM and MBIM
protocols, intended to reduce the interrupt load primarily
on the device end of the USB link.  As with all coalescing
solutions, there is a trade-off between buffering and
interrupts.

The current defaults are based on the assumption that device
side buffers should be the limiting factor.  However, many
modern high speed LTE modems suffers from buffer-bloat,
making this assumption fail. This results in sub-optimal
performance due to excessive coalescing.  And in cases where
such modems are connected to cheap embedded hosts there is
often severe buffer allocation issues, giving very noticeable
performance degradation .

A start on improving this is going from build time hard
coded limits to per device user configurable limits.  The
ethtool coalescing API was selected as user interface
because, although the tuned values are buffer sizes, these
settings directly control datagram coalescing.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 71 +++++++++++++++++++++++++++++++++++++++++++--
 include/linux/usb/cdc_ncm.h |  6 +++-
 2 files changed, 74 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 2ec3790a4db8..141dbec912be 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -65,6 +65,67 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx);
 static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer);
 static struct usb_driver cdc_ncm_driver;
 
+static int cdc_ncm_get_coalesce(struct net_device *netdev,
+				struct ethtool_coalesce *ec)
+{
+	struct usbnet *dev = netdev_priv(netdev);
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+
+	/* assuming maximum sized dgrams and ignoring NDPs */
+	ec->rx_max_coalesced_frames = ctx->rx_max / ctx->max_datagram_size;
+	ec->tx_max_coalesced_frames = ctx->tx_max / ctx->max_datagram_size;
+
+	/* the timer will fire CDC_NCM_TIMER_PENDING_CNT times in a row */
+	ec->tx_coalesce_usecs = (ctx->timer_interval * CDC_NCM_TIMER_PENDING_CNT) / NSEC_PER_USEC;
+	return 0;
+}
+
+static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx);
+
+static int cdc_ncm_set_coalesce(struct net_device *netdev,
+				struct ethtool_coalesce *ec)
+{
+	struct usbnet *dev = netdev_priv(netdev);
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	u32 new_rx_max = ctx->rx_max;
+	u32 new_tx_max = ctx->tx_max;
+
+	/* assuming maximum sized dgrams and a single NDP */
+	if (ec->rx_max_coalesced_frames)
+		new_rx_max = ec->rx_max_coalesced_frames * ctx->max_datagram_size;
+	if (ec->tx_max_coalesced_frames)
+		new_tx_max = ec->tx_max_coalesced_frames * ctx->max_datagram_size;
+
+	if (ec->tx_coalesce_usecs &&
+	    (ec->tx_coalesce_usecs < CDC_NCM_TIMER_INTERVAL_MIN * CDC_NCM_TIMER_PENDING_CNT ||
+	     ec->tx_coalesce_usecs > CDC_NCM_TIMER_INTERVAL_MAX * CDC_NCM_TIMER_PENDING_CNT))
+		return -EINVAL;
+
+	spin_lock_bh(&ctx->mtx);
+	ctx->timer_interval = ec->tx_coalesce_usecs * NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT;
+	if (!ctx->timer_interval)
+		ctx->tx_timer_pending = 0;
+	spin_unlock_bh(&ctx->mtx);
+
+	/* inform device of new values */
+	if (new_rx_max != ctx->rx_max || new_tx_max != ctx->tx_max)
+		cdc_ncm_update_rxtx_max(dev, new_rx_max, new_tx_max);
+	return 0;
+}
+
+static const struct ethtool_ops cdc_ncm_ethtool_ops = {
+	.get_settings      = usbnet_get_settings,
+	.set_settings      = usbnet_set_settings,
+	.get_link          = usbnet_get_link,
+	.nway_reset        = usbnet_nway_reset,
+	.get_drvinfo       = usbnet_get_drvinfo,
+	.get_msglevel      = usbnet_get_msglevel,
+	.set_msglevel      = usbnet_set_msglevel,
+	.get_ts_info       = ethtool_op_get_ts_info,
+	.get_coalesce      = cdc_ncm_get_coalesce,
+	.set_coalesce      = cdc_ncm_set_coalesce,
+};
+
 /* handle rx_max and tx_max changes */
 static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx)
 {
@@ -257,6 +318,9 @@ static int cdc_ncm_init(struct usbnet *dev)
 			(ctx->tx_max_datagrams > CDC_NCM_DPT_DATAGRAMS_MAX))
 		ctx->tx_max_datagrams = CDC_NCM_DPT_DATAGRAMS_MAX;
 
+	/* initial coalescing timer interval */
+	ctx->timer_interval = CDC_NCM_TIMER_INTERVAL_USEC * NSEC_PER_USEC;
+
 	return 0;
 }
 
@@ -596,6 +660,9 @@ advance:
 	/* finish setting up the device specific data */
 	cdc_ncm_setup(dev);
 
+	/* override ethtool_ops */
+	dev->net->ethtool_ops = &cdc_ncm_ethtool_ops;
+
 	return 0;
 
 error2:
@@ -863,7 +930,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		ctx->tx_curr_skb = skb_out;
 		goto exit_no_skb;
 
-	} else if ((n < ctx->tx_max_datagrams) && (ready2send == 0)) {
+	} else if ((n < ctx->tx_max_datagrams) && (ready2send == 0) && (ctx->timer_interval > 0)) {
 		/* wait for more frames */
 		/* push variables */
 		ctx->tx_curr_skb = skb_out;
@@ -915,7 +982,7 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx)
 	/* start timer, if not already started */
 	if (!(hrtimer_active(&ctx->tx_timer) || atomic_read(&ctx->stop)))
 		hrtimer_start(&ctx->tx_timer,
-				ktime_set(0, CDC_NCM_TIMER_INTERVAL),
+				ktime_set(0, ctx->timer_interval),
 				HRTIMER_MODE_REL);
 }
 
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 55b6feead93b..5c1066b4dc41 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -72,7 +72,9 @@
 /* Restart the timer, if amount of datagrams is less than given value */
 #define	CDC_NCM_RESTART_TIMER_DATAGRAM_CNT	3
 #define	CDC_NCM_TIMER_PENDING_CNT		2
-#define CDC_NCM_TIMER_INTERVAL			(400UL * NSEC_PER_USEC)
+#define CDC_NCM_TIMER_INTERVAL_USEC		400UL
+#define CDC_NCM_TIMER_INTERVAL_MIN		5UL
+#define CDC_NCM_TIMER_INTERVAL_MAX		(15UL * USEC_PER_SEC)
 
 /* The following macro defines the minimum header space */
 #define	CDC_NCM_MIN_HDR_SIZE \
@@ -107,6 +109,8 @@ struct cdc_ncm_ctx {
 	spinlock_t mtx;
 	atomic_t stop;
 
+	u64 timer_interval;
+
 	u32 tx_timer_pending;
 	u32 tx_curr_frame_num;
 	u32 rx_max;
-- 
cgit v1.2.3-71-gd317


From 70559b8970e52aa9962dc823fd4498af06809544 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 16 May 2014 21:48:23 +0200
Subject: net: cdc_ncm: use true max dgram count for header estimates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Many newer NCM and MBIM devices will request a maximum tx
datagram count which is much smaller than our hard-coded
absolute max. We can reduce the overhead without sacrificing
any of the simplicity for these devices, by simply using the
true negotiated count in when calculated the maximum NTH and
NDP header sizes.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   |  9 ++++++---
 include/linux/usb/cdc_ncm.h | 10 +---------
 2 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 141dbec912be..b9b562b9128a 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -174,7 +174,7 @@ static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx)
 	}
 
 	/* clamp new_tx to sane values */
-	min = CDC_NCM_MIN_HDR_SIZE + ctx->max_datagram_size;
+	min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth16);
 	max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize));
 
 	/* some devices set dwNtbOutMaxSize too low for the above default */
@@ -318,6 +318,9 @@ static int cdc_ncm_init(struct usbnet *dev)
 			(ctx->tx_max_datagrams > CDC_NCM_DPT_DATAGRAMS_MAX))
 		ctx->tx_max_datagrams = CDC_NCM_DPT_DATAGRAMS_MAX;
 
+	/* set up maximum NDP size */
+	ctx->max_ndp_size = sizeof(struct usb_cdc_ncm_ndp16) + (ctx->tx_max_datagrams + 1) * sizeof(struct usb_cdc_ncm_dpe16);
+
 	/* initial coalescing timer interval */
 	ctx->timer_interval = CDC_NCM_TIMER_INTERVAL_USEC * NSEC_PER_USEC;
 
@@ -800,7 +803,7 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_
 	cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max);
 
 	/* verify that there is room for the NDP and the datagram (reserve) */
-	if ((ctx->tx_max - skb->len - reserve) < CDC_NCM_NDP_SIZE)
+	if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size)
 		return NULL;
 
 	/* link to it */
@@ -810,7 +813,7 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_
 		nth16->wNdpIndex = cpu_to_le16(skb->len);
 
 	/* push a new empty NDP */
-	ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, CDC_NCM_NDP_SIZE), 0, CDC_NCM_NDP_SIZE);
+	ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, ctx->max_ndp_size), 0, ctx->max_ndp_size);
 	ndp16->dwSignature = sign;
 	ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + sizeof(struct usb_cdc_ncm_dpe16));
 	return ndp16;
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 5c1066b4dc41..60a44b8a464e 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -76,15 +76,6 @@
 #define CDC_NCM_TIMER_INTERVAL_MIN		5UL
 #define CDC_NCM_TIMER_INTERVAL_MAX		(15UL * USEC_PER_SEC)
 
-/* The following macro defines the minimum header space */
-#define	CDC_NCM_MIN_HDR_SIZE \
-	(sizeof(struct usb_cdc_ncm_nth16) + sizeof(struct usb_cdc_ncm_ndp16) + \
-	(CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16))
-
-#define CDC_NCM_NDP_SIZE \
-	(sizeof(struct usb_cdc_ncm_ndp16) +				\
-	      (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16))
-
 #define cdc_ncm_comm_intf_is_mbim(x)  ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \
 				       (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE)
 #define cdc_ncm_data_intf_is_mbim(x)  ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB)
@@ -110,6 +101,7 @@ struct cdc_ncm_ctx {
 	atomic_t stop;
 
 	u64 timer_interval;
+	u32 max_ndp_size;
 
 	u32 tx_timer_pending;
 	u32 tx_curr_frame_num;
-- 
cgit v1.2.3-71-gd317


From 43e4c6dfc0fd781e68f20caf563a06f5c6ece995 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 16 May 2014 21:48:24 +0200
Subject: net: cdc_ncm: set reasonable padding limits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We pad frames larger than X to maximum size for devices which
don't need a ZLP after maximum sized frames. This allows the
device to optimize its transfers for one fixed buffer size.

X was arbitrarily set at 512 bytes regardless of real buffer
maximum, causing extreme overheads due to excessive padding of
larger tx buffers. Limit the padding to at most 3 full USB
packets, still allowing the overhead to payload ratio of 3/1.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 8 ++++++--
 include/linux/usb/cdc_ncm.h | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index b9b562b9128a..9592d4669435 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -213,6 +213,10 @@ static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx)
 
 	/* max qlen depend on hard_mtu and rx_urb_size */
 	usbnet_update_max_qlen(dev);
+
+	/* never pad more than 3 full USB packets per transfer */
+	ctx->min_tx_pkt = clamp_t(u16, ctx->tx_max - 3 * usb_maxpacket(dev->udev, dev->out, 1),
+				  CDC_NCM_MIN_TX_PKT, ctx->tx_max);
 }
 
 /* helpers for NCM and MBIM differences */
@@ -947,7 +951,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		/* variables will be reset at next call */
 	}
 
-	/* If collected data size is less or equal CDC_NCM_MIN_TX_PKT
+	/* If collected data size is less or equal ctx->min_tx_pkt
 	 * bytes, we send buffers as it is. If we get more data, it
 	 * would be more efficient for USB HS mobile device with DMA
 	 * engine to receive a full size NTB, than canceling DMA
@@ -957,7 +961,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	 * a ZLP after full sized NTBs.
 	 */
 	if (!(dev->driver_info->flags & FLAG_SEND_ZLP) &&
-	    skb_out->len > CDC_NCM_MIN_TX_PKT)
+	    skb_out->len > ctx->min_tx_pkt)
 		memset(skb_put(skb_out, ctx->tx_max - skb_out->len), 0,
 		       ctx->tx_max - skb_out->len);
 	else if (skb_out->len < ctx->tx_max && (skb_out->len % dev->maxpacket) == 0)
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 60a44b8a464e..79de6724d398 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -115,6 +115,7 @@ struct cdc_ncm_ctx {
 	u16 tx_seq;
 	u16 rx_seq;
 	u16 connected;
+	u16 min_tx_pkt;
 };
 
 u8 cdc_ncm_select_altsetting(struct usb_interface *intf);
-- 
cgit v1.2.3-71-gd317


From beeecd42c3b41d17d0bf1d839db99274c287f514 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 16 May 2014 21:48:25 +0200
Subject: net: cdc_ncm/cdc_mbim: adding NCM protocol statistics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To have an idea of the effects of the protocol coalescing
it's useful to have some counters showing the different
aspects.

Due to the asymmetrical usbnet interface the netdev
rx_bytes counter has been counting real received payload,
while the tx_bytes counter has included the NCM/MBIM
framing overhead. This overhead can be many times the
payload because of the aggressive padding strategy of
this driver, and will vary a lot depending on device
and traffic.

With very few exceptions, users are only interested in
the payload size.  Having an somewhat accurate payload
byte counter is particularly important for mobile
broadband devices, which many NCM devices and of course
all MBIM devices are. Users and userspace applications
will use this counter to monitor account quotas.

Having protocol specific counters for the overhead, we are
now able to correct the tx_bytes netdev counter so that
it shows the real payload

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_mbim.c  |  6 +++
 drivers/net/usb/cdc_ncm.c   | 91 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/cdc_ncm.h | 11 ++++++
 3 files changed, 108 insertions(+)

(limited to 'include')

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index bc23273d0455..5ee7a1dbc023 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -420,6 +420,7 @@ static int cdc_mbim_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in)
 	struct usb_cdc_ncm_dpe16 *dpe16;
 	int ndpoffset;
 	int loopcount = 50; /* arbitrary max preventing infinite loop */
+	u32 payload = 0;
 	u8 *c;
 	u16 tci;
 
@@ -482,6 +483,7 @@ next_ndp:
 			if (!skb)
 				goto error;
 			usbnet_skb_return(dev, skb);
+			payload += len;	/* count payload bytes in this NTB */
 		}
 	}
 err_ndp:
@@ -490,6 +492,10 @@ err_ndp:
 	if (ndpoffset && loopcount--)
 		goto next_ndp;
 
+	/* update stats */
+	ctx->rx_overhead += skb_in->len - payload;
+	ctx->rx_ntbs++;
+
 	return 1;
 error:
 	return 0;
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 9592d4669435..f4b439847d04 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -65,6 +65,68 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx);
 static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer);
 static struct usb_driver cdc_ncm_driver;
 
+struct cdc_ncm_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define CDC_NCM_STAT(str, m) { \
+		.stat_string = str, \
+		.sizeof_stat = sizeof(((struct cdc_ncm_ctx *)0)->m), \
+		.stat_offset = offsetof(struct cdc_ncm_ctx, m) }
+#define CDC_NCM_SIMPLE_STAT(m)	CDC_NCM_STAT(__stringify(m), m)
+
+static const struct cdc_ncm_stats cdc_ncm_gstrings_stats[] = {
+	CDC_NCM_SIMPLE_STAT(tx_reason_ntb_full),
+	CDC_NCM_SIMPLE_STAT(tx_reason_ndp_full),
+	CDC_NCM_SIMPLE_STAT(tx_reason_timeout),
+	CDC_NCM_SIMPLE_STAT(tx_reason_max_datagram),
+	CDC_NCM_SIMPLE_STAT(tx_overhead),
+	CDC_NCM_SIMPLE_STAT(tx_ntbs),
+	CDC_NCM_SIMPLE_STAT(rx_overhead),
+	CDC_NCM_SIMPLE_STAT(rx_ntbs),
+};
+
+static int cdc_ncm_get_sset_count(struct net_device __always_unused *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return ARRAY_SIZE(cdc_ncm_gstrings_stats);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void cdc_ncm_get_ethtool_stats(struct net_device *netdev,
+				    struct ethtool_stats __always_unused *stats,
+				    u64 *data)
+{
+	struct usbnet *dev = netdev_priv(netdev);
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	int i;
+	char *p = NULL;
+
+	for (i = 0; i < ARRAY_SIZE(cdc_ncm_gstrings_stats); i++) {
+		p = (char *)ctx + cdc_ncm_gstrings_stats[i].stat_offset;
+		data[i] = (cdc_ncm_gstrings_stats[i].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+}
+
+static void cdc_ncm_get_strings(struct net_device __always_unused *netdev, u32 stringset, u8 *data)
+{
+	u8 *p = data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ARRAY_SIZE(cdc_ncm_gstrings_stats); i++) {
+			memcpy(p, cdc_ncm_gstrings_stats[i].stat_string, ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+	}
+}
+
 static int cdc_ncm_get_coalesce(struct net_device *netdev,
 				struct ethtool_coalesce *ec)
 {
@@ -122,6 +184,9 @@ static const struct ethtool_ops cdc_ncm_ethtool_ops = {
 	.get_msglevel      = usbnet_get_msglevel,
 	.set_msglevel      = usbnet_set_msglevel,
 	.get_ts_info       = ethtool_op_get_ts_info,
+	.get_sset_count    = cdc_ncm_get_sset_count,
+	.get_strings       = cdc_ncm_get_strings,
+	.get_ethtool_stats = cdc_ncm_get_ethtool_stats,
 	.get_coalesce      = cdc_ncm_get_coalesce,
 	.set_coalesce      = cdc_ncm_set_coalesce,
 };
@@ -862,6 +927,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 
 		/* count total number of frames in this NTB */
 		ctx->tx_curr_frame_num = 0;
+
+		/* recent payload counter for this skb_out */
+		ctx->tx_curr_frame_payload = 0;
 	}
 
 	for (n = ctx->tx_curr_frame_num; n < ctx->tx_max_datagrams; n++) {
@@ -899,6 +967,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 				ctx->tx_rem_sign = sign;
 				skb = NULL;
 				ready2send = 1;
+				ctx->tx_reason_ntb_full++;	/* count reason for transmitting */
 			}
 			break;
 		}
@@ -912,12 +981,14 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		ndp16->dpe16[index].wDatagramIndex = cpu_to_le16(skb_out->len);
 		ndp16->wLength = cpu_to_le16(ndplen + sizeof(struct usb_cdc_ncm_dpe16));
 		memcpy(skb_put(skb_out, skb->len), skb->data, skb->len);
+		ctx->tx_curr_frame_payload += skb->len;	/* count real tx payload data */
 		dev_kfree_skb_any(skb);
 		skb = NULL;
 
 		/* send now if this NDP is full */
 		if (index >= CDC_NCM_DPT_DATAGRAMS_MAX) {
 			ready2send = 1;
+			ctx->tx_reason_ndp_full++;	/* count reason for transmitting */
 			break;
 		}
 	}
@@ -947,6 +1018,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		goto exit_no_skb;
 
 	} else {
+		if (n == ctx->tx_max_datagrams)
+			ctx->tx_reason_max_datagram++;	/* count reason for transmitting */
 		/* frame goes out */
 		/* variables will be reset at next call */
 	}
@@ -974,6 +1047,17 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	/* return skb */
 	ctx->tx_curr_skb = NULL;
 	dev->net->stats.tx_packets += ctx->tx_curr_frame_num;
+
+	/* keep private stats: framing overhead and number of NTBs */
+	ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload;
+	ctx->tx_ntbs++;
+
+	/* usbnet has already counted all the framing overhead.
+	 * Adjust the stats so that the tx_bytes counter show real
+	 * payload data instead.
+	 */
+	dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload;
+
 	return skb_out;
 
 exit_no_skb:
@@ -1014,6 +1098,7 @@ static void cdc_ncm_txpath_bh(unsigned long param)
 		cdc_ncm_tx_timeout_start(ctx);
 		spin_unlock_bh(&ctx->mtx);
 	} else if (dev->net != NULL) {
+		ctx->tx_reason_timeout++;	/* count reason for transmitting */
 		spin_unlock_bh(&ctx->mtx);
 		netif_tx_lock_bh(dev->net);
 		usbnet_start_xmit(NULL, dev->net);
@@ -1149,6 +1234,7 @@ int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in)
 	struct usb_cdc_ncm_dpe16 *dpe16;
 	int ndpoffset;
 	int loopcount = 50; /* arbitrary max preventing infinite loop */
+	u32 payload = 0;
 
 	ndpoffset = cdc_ncm_rx_verify_nth16(ctx, skb_in);
 	if (ndpoffset < 0)
@@ -1201,6 +1287,7 @@ next_ndp:
 			skb->data = ((u8 *)skb_in->data) + offset;
 			skb_set_tail_pointer(skb, len);
 			usbnet_skb_return(dev, skb);
+			payload += len;	/* count payload bytes in this NTB */
 		}
 	}
 err_ndp:
@@ -1209,6 +1296,10 @@ err_ndp:
 	if (ndpoffset && loopcount--)
 		goto next_ndp;
 
+	/* update stats */
+	ctx->rx_overhead += skb_in->len - payload;
+	ctx->rx_ntbs++;
+
 	return 1;
 error:
 	return 0;
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 79de6724d398..88d2d7f1820f 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -116,6 +116,17 @@ struct cdc_ncm_ctx {
 	u16 rx_seq;
 	u16 connected;
 	u16 min_tx_pkt;
+
+	/* statistics */
+	u32 tx_curr_frame_payload;
+	u32 tx_reason_ntb_full;
+	u32 tx_reason_ndp_full;
+	u32 tx_reason_timeout;
+	u32 tx_reason_max_datagram;
+	u64 tx_overhead;
+	u64 tx_ntbs;
+	u64 rx_overhead;
+	u64 rx_ntbs;
 };
 
 u8 cdc_ncm_select_altsetting(struct usb_interface *intf);
-- 
cgit v1.2.3-71-gd317


From 50f1cb1cc8f50fa88dbeaf990ae2bae91b9ff306 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 16 May 2014 21:48:26 +0200
Subject: net: cdc_ncm: use sane defaults for rx/tx buffers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lots of devices request much larger buffers than reasonable. This
cause real problems for users of hosts with limited resources.

Reducing the default buffer size to 16kB for such devices is
a reasonable trade-off between allowing them to aggregate traffic
and avoiding memory exhaustion on resource restrained hosts.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 12 ++++++++++--
 include/linux/usb/cdc_ncm.h |  4 ++++
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index f4b439847d04..bb53abe1f3a1 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -492,10 +492,18 @@ static void cdc_ncm_fix_modulus(struct usbnet *dev)
 static int cdc_ncm_setup(struct usbnet *dev)
 {
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	u32 def_rx, def_tx;
+
+	/* be conservative when selecting intial buffer size to
+	 * increase the number of hosts this will work for
+	 */
+	def_rx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_RX,
+		       le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize));
+	def_tx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_TX,
+		       le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize));
 
 	/* clamp rx_max and tx_max and inform device */
-	cdc_ncm_update_rxtx_max(dev, le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize),
-				le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize));
+	cdc_ncm_update_rxtx_max(dev, def_rx, def_tx);
 
 	/* sanitize the modulus and remainder values */
 	cdc_ncm_fix_modulus(dev);
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 88d2d7f1820f..cde506731c48 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -52,6 +52,10 @@
 #define	CDC_NCM_NTB_MAX_SIZE_TX			32768	/* bytes */
 #define	CDC_NCM_NTB_MAX_SIZE_RX			32768	/* bytes */
 
+/* Initial NTB length */
+#define	CDC_NCM_NTB_DEF_SIZE_TX			16384	/* bytes */
+#define	CDC_NCM_NTB_DEF_SIZE_RX			16384	/* bytes */
+
 /* Minimum value for MaxDatagramSize, ch. 6.2.9 */
 #define	CDC_NCM_MIN_DATAGRAM_SIZE		1514	/* bytes */
 
-- 
cgit v1.2.3-71-gd317


From fa83dbeee55865678025b6c1637ca08860209f87 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 16 May 2014 21:48:28 +0200
Subject: net: cdc_ncm: remove redundant "disconnected" flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Calling netif_carrier_{on,off} is sufficient.  There is no need
to duplicate the carrier state in a driver specific flag.

Acked-by: Enrico Mioso <mrkiko.rs@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c        | 19 ++-----------------
 drivers/net/usb/huawei_cdc_ncm.c | 13 -------------
 include/linux/usb/cdc_ncm.h      |  1 -
 3 files changed, 2 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 1d1ff2fa8ae1..783c4ed96395 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1364,11 +1364,10 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 		 * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be
 		 * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE.
 		 */
-		ctx->connected = le16_to_cpu(event->wValue);
 		netif_info(dev, link, dev->net,
 			   "network connection: %sconnected\n",
-			   ctx->connected ? "" : "dis");
-		usbnet_link_change(dev, ctx->connected, 0);
+			   !!event->wValue ? "" : "dis");
+		usbnet_link_change(dev, !!event->wValue, 0);
 		break;
 
 	case USB_CDC_NOTIFY_SPEED_CHANGE:
@@ -1388,23 +1387,11 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 	}
 }
 
-static int cdc_ncm_check_connect(struct usbnet *dev)
-{
-	struct cdc_ncm_ctx *ctx;
-
-	ctx = (struct cdc_ncm_ctx *)dev->data[0];
-	if (ctx == NULL)
-		return 1;	/* disconnected */
-
-	return !ctx->connected;
-}
-
 static const struct driver_info cdc_ncm_info = {
 	.description = "CDC NCM",
 	.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
-	.check_connect = cdc_ncm_check_connect,
 	.manage_power = usbnet_manage_power,
 	.status = cdc_ncm_status,
 	.rx_fixup = cdc_ncm_rx_fixup,
@@ -1418,7 +1405,6 @@ static const struct driver_info wwan_info = {
 			| FLAG_WWAN,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
-	.check_connect = cdc_ncm_check_connect,
 	.manage_power = usbnet_manage_power,
 	.status = cdc_ncm_status,
 	.rx_fixup = cdc_ncm_rx_fixup,
@@ -1432,7 +1418,6 @@ static const struct driver_info wwan_noarp_info = {
 			| FLAG_WWAN | FLAG_NOARP,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
-	.check_connect = cdc_ncm_check_connect,
 	.manage_power = usbnet_manage_power,
 	.status = cdc_ncm_status,
 	.rx_fixup = cdc_ncm_rx_fixup,
diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c
index 312178d7b698..f9822bc75425 100644
--- a/drivers/net/usb/huawei_cdc_ncm.c
+++ b/drivers/net/usb/huawei_cdc_ncm.c
@@ -172,24 +172,11 @@ err:
 	return ret;
 }
 
-static int huawei_cdc_ncm_check_connect(struct usbnet *usbnet_dev)
-{
-	struct cdc_ncm_ctx *ctx;
-
-	ctx = (struct cdc_ncm_ctx *)usbnet_dev->data[0];
-
-	if (ctx == NULL)
-		return 1; /* disconnected */
-
-	return !ctx->connected;
-}
-
 static const struct driver_info huawei_cdc_ncm_info = {
 	.description = "Huawei CDC NCM device",
 	.flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN,
 	.bind = huawei_cdc_ncm_bind,
 	.unbind = huawei_cdc_ncm_unbind,
-	.check_connect = huawei_cdc_ncm_check_connect,
 	.manage_power = huawei_cdc_ncm_manage_power,
 	.rx_fixup = cdc_ncm_rx_fixup,
 	.tx_fixup = cdc_ncm_tx_fixup,
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index cde506731c48..8c5e38819828 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -118,7 +118,6 @@ struct cdc_ncm_ctx {
 	u16 tx_ndp_modulus;
 	u16 tx_seq;
 	u16 rx_seq;
-	u16 connected;
 	u16 min_tx_pkt;
 
 	/* statistics */
-- 
cgit v1.2.3-71-gd317


From a17597d3b418ca5a394d14724ccfc295cb3186c8 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Fri, 16 May 2014 11:42:43 +0930
Subject: virtio-rng: fixes for device registration/unregistration

There are several fixes in this patch (mostly because it's hard
splitting them up):

 - Revert the name field in struct hwrng back to 'const'. Also, don't
do an extra kmalloc for the name - just wasteful.
 - Deal with allocation failures properly.
 - Use IDA to allocate device number instead of brute forcing one.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/hw_random/virtio-rng.c | 41 +++++++++++++++++++++----------------
 include/linux/hw_random.h           |  2 +-
 2 files changed, 24 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 5b25daa7f798..f3e71501de54 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -25,6 +25,7 @@
 #include <linux/virtio_rng.h>
 #include <linux/module.h>
 
+static DEFINE_IDA(rng_index_ida);
 
 struct virtrng_info {
 	struct virtio_device *vdev;
@@ -33,6 +34,8 @@ struct virtrng_info {
 	unsigned int data_avail;
 	struct completion have_data;
 	bool busy;
+	char name[25];
+	int index;
 };
 
 static void random_recv_done(struct virtqueue *vq)
@@ -92,41 +95,45 @@ static void virtio_cleanup(struct hwrng *rng)
 
 static int probe_common(struct virtio_device *vdev)
 {
-	int err, i;
+	int err, index;
 	struct virtrng_info *vi = NULL;
 
 	vi = kzalloc(sizeof(struct virtrng_info), GFP_KERNEL);
-	vi->hwrng.name = kmalloc(40, GFP_KERNEL);
+	if (!vi)
+		return -ENOMEM;
+
+	vi->index = index = ida_simple_get(&rng_index_ida, 0, 0, GFP_KERNEL);
+	if (index < 0) {
+		kfree(vi);
+		return index;
+	}
+	sprintf(vi->name, "virtio_rng.%d", index);
 	init_completion(&vi->have_data);
 
-	vi->hwrng.read = virtio_read;
-	vi->hwrng.cleanup = virtio_cleanup;
-	vi->hwrng.priv = (unsigned long)vi;
+	vi->hwrng = (struct hwrng) {
+		.read = virtio_read,
+		.cleanup = virtio_cleanup,
+		.priv = (unsigned long)vi,
+		.name = vi->name,
+	};
 	vdev->priv = vi;
 
 	/* We expect a single virtqueue. */
 	vi->vq = virtio_find_single_vq(vdev, random_recv_done, "input");
 	if (IS_ERR(vi->vq)) {
 		err = PTR_ERR(vi->vq);
-		kfree(vi->hwrng.name);
 		vi->vq = NULL;
 		kfree(vi);
-		vi = NULL;
+		ida_simple_remove(&rng_index_ida, index);
 		return err;
 	}
 
-	i = 0;
-	do {
-		sprintf(vi->hwrng.name, "virtio_rng.%d", i++);
-		err = hwrng_register(&vi->hwrng);
-	} while (err == -EEXIST);
-
+	err = hwrng_register(&vi->hwrng);
 	if (err) {
 		vdev->config->del_vqs(vdev);
-		kfree(vi->hwrng.name);
 		vi->vq = NULL;
 		kfree(vi);
-		vi = NULL;
+		ida_simple_remove(&rng_index_ida, index);
 		return err;
 	}
 
@@ -140,10 +147,8 @@ static void remove_common(struct virtio_device *vdev)
 	vi->busy = false;
 	hwrng_unregister(&vi->hwrng);
 	vdev->config->del_vqs(vdev);
-	kfree(vi->hwrng.name);
-	vi->vq = NULL;
+	ida_simple_remove(&rng_index_ida, vi->index);
 	kfree(vi);
-	vi = NULL;
 }
 
 static int virtrng_probe(struct virtio_device *vdev)
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 02d9c87be54c..b4b0eef5fddf 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -31,7 +31,7 @@
  * @priv:		Private data, for use by the RNG driver.
  */
 struct hwrng {
-	char *name;
+	const char *name;
 	int (*init)(struct hwrng *rng);
 	void (*cleanup)(struct hwrng *rng);
 	int (*data_present)(struct hwrng *rng, int wait);
-- 
cgit v1.2.3-71-gd317


From 7455fa2422898eee3464032351d20695930d9542 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Thu, 15 May 2014 01:41:23 +0100
Subject: ethtool: Name the 'no change' value for setting RSS hash key but not
 indir table

We usually allocate special values of u32 fields starting from the top
down, so also change the value to 0xffffffff.  As these operations
haven't been included in a stable release yet, it's not too late to
change.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
 include/uapi/linux/ethtool.h | 12 +++++++-----
 net/core/ethtool.c           | 12 +++++++-----
 2 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index d47d31d6fa0e..cba18e3f8fab 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -850,7 +850,8 @@ struct ethtool_rxfh_indir {
  * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key.
  * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH
  * @rss_context: RSS context identifier.
- * @indir_size: On entry, the array size of the user buffer, which may be zero.
+ * @indir_size: On entry, the array size of the user buffer, which may be zero,
+ *		or (for %ETHTOOL_SRSSH), %ETH_RXFH_INDIR_NO_CHANGE.
  *		On return from %ETHTOOL_GRSSH, the array size of the hardware
  *		indirection table.
  * @key_size:	On entry, the array size of the user buffer in bytes,
@@ -861,10 +862,10 @@ struct ethtool_rxfh_indir {
  *		of size @indir_size followed by hash key of size @key_size.
  *
  * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the
- * size should be returned.  For %ETHTOOL_SRSSH, a @indir_size of 0xDEADBEEF
- * means that indir table setting is not requested and a @indir_size of zero
- * means the indir table should be reset to default values.  This last feature
- * is not supported by the original implementations.
+ * size should be returned.  For %ETHTOOL_SRSSH, an @indir_size of
+ * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested
+ * and a @indir_size of zero means the indir table should be reset to default
+ * values.
  */
 struct ethtool_rxfh {
 	__u32   cmd;
@@ -874,6 +875,7 @@ struct ethtool_rxfh {
 	__u32	rsvd[2];
 	__u32   rss_config[0];
 };
+#define ETH_RXFH_INDIR_NO_CHANGE	0xffffffff
 
 /**
  * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index c834cb29f682..7156fe5ca876 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -803,12 +803,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 
 	/* If either indir or hash key is valid, proceed further.
 	 */
-	if ((user_indir_size && ((user_indir_size != 0xDEADBEEF) &&
-				 user_indir_size != dev_indir_size)) ||
+	if ((user_indir_size &&
+	     user_indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
+	     user_indir_size != dev_indir_size) ||
 	    (user_key_size && (user_key_size != dev_key_size)))
 		return -EINVAL;
 
-	if (user_indir_size != 0xDEADBEEF)
+	if (user_indir_size != ETH_RXFH_INDIR_NO_CHANGE)
 		indir_bytes = dev_indir_size * sizeof(indir[0]);
 
 	rss_config = kzalloc(indir_bytes + user_key_size, GFP_USER);
@@ -821,9 +822,10 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		goto out;
 
 	/* user_indir_size == 0 means reset the indir table to default.
-	 * user_indir_size == 0xDEADBEEF means indir setting is not requested.
+	 * user_indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
 	 */
-	if (user_indir_size && user_indir_size != 0xDEADBEEF) {
+	if (user_indir_size &&
+	    user_indir_size != ETH_RXFH_INDIR_NO_CHANGE) {
 		indir = (u32 *)rss_config;
 		ret = ethtool_copy_validate_indir(indir,
 						  useraddr + rss_cfg_offset,
-- 
cgit v1.2.3-71-gd317


From 38c891a49dec43dbb1575cc40d10dbd49c4961ab Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Thu, 15 May 2014 01:07:16 +0100
Subject: ethtool: Improve explanation of the two arrays following struct
 ethtool_rxfh

The use of two variable-length arrays is unusual so deserves a bit
more explanation.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
 include/uapi/linux/ethtool.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index cba18e3f8fab..e3c7a719c76b 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -850,16 +850,17 @@ struct ethtool_rxfh_indir {
  * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key.
  * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH
  * @rss_context: RSS context identifier.
- * @indir_size: On entry, the array size of the user buffer, which may be zero,
- *		or (for %ETHTOOL_SRSSH), %ETH_RXFH_INDIR_NO_CHANGE.
- *		On return from %ETHTOOL_GRSSH, the array size of the hardware
- *		indirection table.
- * @key_size:	On entry, the array size of the user buffer in bytes,
- *		which may be zero.
- *		On return from %ETHTOOL_GRSSH, the size of the RSS hash key.
+ * @indir_size: On entry, the array size of the user buffer for the
+ *	indirection table, which may be zero, or (for %ETHTOOL_SRSSH),
+ *	%ETH_RXFH_INDIR_NO_CHANGE.  On return from %ETHTOOL_GRSSH,
+ *	the array size of the hardware indirection table.
+ * @key_size: On entry, the array size of the user buffer for the hash key,
+ *	which may be zero.  On return from %ETHTOOL_GRSSH, the size of the
+ *	hardware hash key.
  * @rsvd:	Reserved for future extensions.
  * @rss_config: RX ring/queue index for each hash value i.e., indirection table
- *		of size @indir_size followed by hash key of size @key_size.
+ *	of @indir_size __u32 elements, followed by hash key of @key_size
+ *	bytes.
  *
  * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the
  * size should be returned.  For %ETHTOOL_SRSSH, an @indir_size of
-- 
cgit v1.2.3-71-gd317


From 678e30df2e5664619e06fcfea5490a476826d8fe Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 19 May 2014 01:25:59 +0100
Subject: ethtool: Expand documentation of ethtool_ops::{get,set}_rxfh()

Some corner-cases are not explained properly.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
 include/linux/ethtool.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 212f537fc686..886e127d51a6 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -162,15 +162,16 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  *	Will not be called if @get_rxfh_indir_size returns zero.
  * @get_rxfh: Get the contents of the RX flow hash indirection table and hash
  *	key.
- *	Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size
- *	returns zero.
+ *	Will only be called if one or both of @get_rxfh_indir_size and
+ *	@get_rxfh_key_size are implemented and return non-zero.
  *	Returns a negative error code or zero.
  * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
  *	Will not be called if @get_rxfh_indir_size returns zero.
- * @set_rxfh: Set the contents of the RX flow hash indirection table and
- *	hash key.
- *	Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size
- *	returns zero.
+ * @set_rxfh: Set the contents of the RX flow hash indirection table and/or
+ *	hash key.  Either or both arguments may be %NULL if that attribute
+ *	is not to be changed.
+ *	Will only be called if one or both of @get_rxfh_indir_size and
+ *	@get_rxfh_key_size are implemented and return non-zero.
  *	Returns a negative error code or zero.
  * @get_channels: Get number of channels.
  * @set_channels: Set number of channels.  Returns a negative error code or
@@ -244,8 +245,8 @@ struct ethtool_ops {
 	int	(*reset)(struct net_device *, u32 *);
 	u32	(*get_rxfh_key_size)(struct net_device *);
 	u32	(*get_rxfh_indir_size)(struct net_device *);
-	int	(*get_rxfh)(struct net_device *, u32 *, u8 *);
-	int	(*set_rxfh)(struct net_device *, u32 *, u8 *);
+	int	(*get_rxfh)(struct net_device *, u32 *indir, u8 *key);
+	int	(*set_rxfh)(struct net_device *, u32 *indir, u8 *key);
 	int	(*get_rxfh_indir)(struct net_device *, u32 *);
 	int	(*set_rxfh_indir)(struct net_device *, const u32 *);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
-- 
cgit v1.2.3-71-gd317


From 61d88c6811f216de4ec26aafe24e650dc1aeb00e Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 19 May 2014 01:29:42 +0100
Subject: ethtool: Disallow ETHTOOL_SRSSH with both indir table and hash key
 unchanged

This would be a no-op, so there is no reason to request it.

This also allows conversion of the current implementations of
ethtool_ops::{get,set}_rxfh_indir to ethtool_ops::{get,set}_rxfh
with no change other than their parameters.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
 include/linux/ethtool.h | 4 ++--
 net/core/ethtool.c      | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 886e127d51a6..de687a97c6e7 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -168,8 +168,8 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
  *	Will not be called if @get_rxfh_indir_size returns zero.
  * @set_rxfh: Set the contents of the RX flow hash indirection table and/or
- *	hash key.  Either or both arguments may be %NULL if that attribute
- *	is not to be changed.
+ *	hash key.  In case only the indirection table or hash key is to be
+ *	changed, the other argument will be %NULL.
  *	Will only be called if one or both of @get_rxfh_indir_size and
  *	@get_rxfh_key_size are implemented and return non-zero.
  *	Returns a negative error code or zero.
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 7156fe5ca876..b8857348bdf3 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -802,11 +802,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		return -EFAULT;
 
 	/* If either indir or hash key is valid, proceed further.
+	 * It is not valid to request that both be unchanged.
 	 */
 	if ((user_indir_size &&
 	     user_indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
 	     user_indir_size != dev_indir_size) ||
-	    (user_key_size && (user_key_size != dev_key_size)))
+	    (user_key_size && (user_key_size != dev_key_size)) ||
+	    (user_indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
+	     user_key_size == 0))
 		return -EINVAL;
 
 	if (user_indir_size != ETH_RXFH_INDIR_NO_CHANGE)
-- 
cgit v1.2.3-71-gd317


From 33cb0fa7888510b5bd2096352b200cfe29db10fe Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Thu, 15 May 2014 02:01:23 +0100
Subject: ethtool, be2net: constify array pointer parameters to
 ethtool_ops::set_rxfh

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c    | 2 +-
 drivers/net/ethernet/emulex/benet/be_cmds.h    | 2 +-
 drivers/net/ethernet/emulex/benet/be_ethtool.c | 3 ++-
 include/linux/ethtool.h                        | 3 ++-
 4 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 476752d0a6a4..7b59da241ccb 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -2033,7 +2033,7 @@ int be_cmd_reset_function(struct be_adapter *adapter)
 }
 
 int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable,
-		      u32 rss_hash_opts, u16 table_size, u8 *rss_hkey)
+		      u32 rss_hash_opts, u16 table_size, const u8 *rss_hkey)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_rss_config *req;
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 228d4b611084..451f3138b8fb 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -2068,7 +2068,7 @@ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num,
 			u32 *function_mode, u32 *function_caps, u16 *asic_rev);
 int be_cmd_reset_function(struct be_adapter *adapter);
 int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable,
-		      u32 rss_hash_opts, u16 table_size, u8 *rss_hkey);
+		      u32 rss_hash_opts, u16 table_size, const u8 *rss_hkey);
 int be_process_mcc(struct be_adapter *adapter);
 int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon,
 			    u8 status, u8 state);
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 970ae337daac..e2da4d20dd3d 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -1117,7 +1117,8 @@ static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey)
 	return 0;
 }
 
-static int be_set_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey)
+static int be_set_rxfh(struct net_device *netdev, const u32 *indir,
+		       const u8 *hkey)
 {
 	int rc = 0, i, j;
 	struct be_adapter *adapter = netdev_priv(netdev);
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index de687a97c6e7..874fde01d398 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -246,7 +246,8 @@ struct ethtool_ops {
 	u32	(*get_rxfh_key_size)(struct net_device *);
 	u32	(*get_rxfh_indir_size)(struct net_device *);
 	int	(*get_rxfh)(struct net_device *, u32 *indir, u8 *key);
-	int	(*set_rxfh)(struct net_device *, u32 *indir, u8 *key);
+	int	(*set_rxfh)(struct net_device *, const u32 *indir,
+			    const u8 *key);
 	int	(*get_rxfh_indir)(struct net_device *, u32 *);
 	int	(*set_rxfh_indir)(struct net_device *, const u32 *);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
-- 
cgit v1.2.3-71-gd317


From d3091298570006fa538ec9beacbfb1098964962e Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 16 May 2014 23:26:05 +0200
Subject: sparc: fix sparse warnings in smp_32.c + smp_64.c

Fix following warnings:
smp_32.c:177:5: warning: symbol 'setup_profiling_timer' was not declared. Should it be static?
smp_64.c:1202:5: warning: symbol 'setup_profiling_timer' was not declared. Should it be static?
smp_64.c:989:6: warning: symbol 'kgdb_roundup_cpus' was not declared. Should it be static?

Add prototype to include/linux/profile.h of setup_profiling_timer
Add missing include to smp_64.c

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/smp_32.c | 1 +
 arch/sparc/kernel/smp_64.c | 1 +
 include/linux/profile.h    | 1 +
 3 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 9d3297d8d730..7958242d63c5 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -20,6 +20,7 @@
 #include <linux/seq_file.h>
 #include <linux/cache.h>
 #include <linux/delay.h>
+#include <linux/profile.h>
 #include <linux/cpu.h>
 
 #include <asm/ptrace.h>
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index df91e78dbd95..afc71bf719b1 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -25,6 +25,7 @@
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
+#include <linux/kgdb.h>
 
 #include <asm/head.h>
 #include <asm/ptrace.h>
diff --git a/include/linux/profile.h b/include/linux/profile.h
index aaad3861beb8..b537a25ffa17 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -44,6 +44,7 @@ extern int prof_on __read_mostly;
 int profile_init(void);
 int profile_setup(char *str);
 void profile_tick(int type);
+int setup_profiling_timer(unsigned int multiplier);
 
 /*
  * Add multiple profiler hits to a given address:
-- 
cgit v1.2.3-71-gd317


From 3b36fbb01dc50f58e7803006f5a99683daf26c8c Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Sun, 18 May 2014 22:44:35 -0700
Subject: Input: pixcir_i2c_ts - initialize interrupt mode and power mode

Introduce helper functions to configure power and interrupt registers.
Default to IDLE mode on probe as device supports auto wakeup to ACVIE mode
on detecting finger touch.

Configure interrupt mode and polarity on start up.  Power down on device
closure or module removal.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Acked-by: Mugunthan V N <mugunthanvnm@ti.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/pixcir_i2c_ts.c | 182 ++++++++++++++++++++++++++++--
 include/linux/input/pixcir_ts.h           |  42 +++++++
 2 files changed, 216 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c
index 5d36243bdc79..6c6f6dacb858 100644
--- a/drivers/input/touchscreen/pixcir_i2c_ts.c
+++ b/drivers/input/touchscreen/pixcir_i2c_ts.c
@@ -29,7 +29,7 @@ struct pixcir_i2c_ts_data {
 	struct i2c_client *client;
 	struct input_dev *input;
 	const struct pixcir_ts_platform_data *chip;
-	bool exiting;
+	bool running;
 };
 
 static void pixcir_ts_poscheck(struct pixcir_i2c_ts_data *data)
@@ -88,7 +88,7 @@ static irqreturn_t pixcir_ts_isr(int irq, void *dev_id)
 {
 	struct pixcir_i2c_ts_data *tsdata = dev_id;
 
-	while (!tsdata->exiting) {
+	while (tsdata->running) {
 		pixcir_ts_poscheck(tsdata);
 
 		if (tsdata->chip->attb_read_val())
@@ -100,6 +100,164 @@ static irqreturn_t pixcir_ts_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static int pixcir_set_power_mode(struct pixcir_i2c_ts_data *ts,
+				 enum pixcir_power_mode mode)
+{
+	struct device *dev = &ts->client->dev;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(ts->client, PIXCIR_REG_POWER_MODE);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't read reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_POWER_MODE, ret);
+		return ret;
+	}
+
+	ret &= ~PIXCIR_POWER_MODE_MASK;
+	ret |= mode;
+
+	/* Always AUTO_IDLE */
+	ret |= PIXCIR_POWER_ALLOW_IDLE;
+
+	ret = i2c_smbus_write_byte_data(ts->client, PIXCIR_REG_POWER_MODE, ret);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't write reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_POWER_MODE, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * Set the interrupt mode for the device i.e. ATTB line behaviour
+ *
+ * @polarity : 1 for active high, 0 for active low.
+ */
+static int pixcir_set_int_mode(struct pixcir_i2c_ts_data *ts,
+			       enum pixcir_int_mode mode, bool polarity)
+{
+	struct device *dev = &ts->client->dev;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(ts->client, PIXCIR_REG_INT_MODE);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't read reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_INT_MODE, ret);
+		return ret;
+	}
+
+	ret &= ~PIXCIR_INT_MODE_MASK;
+	ret |= mode;
+
+	if (polarity)
+		ret |= PIXCIR_INT_POL_HIGH;
+	else
+		ret &= ~PIXCIR_INT_POL_HIGH;
+
+	ret = i2c_smbus_write_byte_data(ts->client, PIXCIR_REG_INT_MODE, ret);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't write reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_INT_MODE, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * Enable/disable interrupt generation
+ */
+static int pixcir_int_enable(struct pixcir_i2c_ts_data *ts, bool enable)
+{
+	struct device *dev = &ts->client->dev;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(ts->client, PIXCIR_REG_INT_MODE);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't read reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_INT_MODE, ret);
+		return ret;
+	}
+
+	if (enable)
+		ret |= PIXCIR_INT_ENABLE;
+	else
+		ret &= ~PIXCIR_INT_ENABLE;
+
+	ret = i2c_smbus_write_byte_data(ts->client, PIXCIR_REG_INT_MODE, ret);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't write reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_INT_MODE, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int pixcir_start(struct pixcir_i2c_ts_data *ts)
+{
+	struct device *dev = &ts->client->dev;
+	int error;
+
+	/* LEVEL_TOUCH interrupt with active low polarity */
+	error = pixcir_set_int_mode(ts, PIXCIR_INT_LEVEL_TOUCH, 0);
+	if (error) {
+		dev_err(dev, "Failed to set interrupt mode: %d\n", error);
+		return error;
+	}
+
+	ts->running = true;
+	mb();	/* Update status before IRQ can fire */
+
+	/* enable interrupt generation */
+	error = pixcir_int_enable(ts, true);
+	if (error) {
+		dev_err(dev, "Failed to enable interrupt generation: %d\n",
+			error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int pixcir_stop(struct pixcir_i2c_ts_data *ts)
+{
+	int error;
+
+	/* Disable interrupt generation */
+	error = pixcir_int_enable(ts, false);
+	if (error) {
+		dev_err(&ts->client->dev,
+			"Failed to disable interrupt generation: %d\n",
+			error);
+		return error;
+	}
+
+	/* Exit ISR if running, no more report parsing */
+	ts->running = false;
+	mb();	/* update status before we synchronize irq */
+
+	/* Wait till running ISR is complete */
+	synchronize_irq(ts->client->irq);
+
+	return 0;
+}
+
+static int pixcir_input_open(struct input_dev *dev)
+{
+	struct pixcir_i2c_ts_data *ts = input_get_drvdata(dev);
+
+	return pixcir_start(ts);
+}
+
+static void pixcir_input_close(struct input_dev *dev)
+{
+	struct pixcir_i2c_ts_data *ts = input_get_drvdata(dev);
+
+	pixcir_stop(ts);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int pixcir_i2c_ts_suspend(struct device *dev)
 {
@@ -156,6 +314,8 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client,
 
 	input->name = client->name;
 	input->id.bustype = BUS_I2C;
+	input->open = pixcir_input_open;
+	input->close = pixcir_input_close;
 	input->dev.parent = &client->dev;
 
 	__set_bit(EV_KEY, input->evbit);
@@ -176,11 +336,22 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client,
 		return error;
 	}
 
+	/* Always be in IDLE mode to save power, device supports auto wake */
+	error = pixcir_set_power_mode(tsdata, PIXCIR_POWER_IDLE);
+	if (error) {
+		dev_err(dev, "Failed to set IDLE mode\n");
+		return error;
+	}
+
+	/* Stop device till opened */
+	error = pixcir_stop(tsdata);
+	if (error)
+		return error;
+
 	error = input_register_device(input);
 	if (error)
 		return error;
 
-	i2c_set_clientdata(client, tsdata);
 	device_init_wakeup(&client->dev, 1);
 
 	return 0;
@@ -188,13 +359,8 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client,
 
 static int pixcir_i2c_ts_remove(struct i2c_client *client)
 {
-	struct pixcir_i2c_ts_data *tsdata = i2c_get_clientdata(client);
-
 	device_init_wakeup(&client->dev, 0);
 
-	tsdata->exiting = true;
-	mb();
-
 	return 0;
 }
 
diff --git a/include/linux/input/pixcir_ts.h b/include/linux/input/pixcir_ts.h
index 7163d91c0373..7942804464d3 100644
--- a/include/linux/input/pixcir_ts.h
+++ b/include/linux/input/pixcir_ts.h
@@ -1,6 +1,48 @@
 #ifndef	_PIXCIR_I2C_TS_H
 #define	_PIXCIR_I2C_TS_H
 
+/*
+ * Register map
+ */
+#define PIXCIR_REG_POWER_MODE	51
+#define PIXCIR_REG_INT_MODE	52
+
+/*
+ * Power modes:
+ * active: max scan speed
+ * idle: lower scan speed with automatic transition to active on touch
+ * halt: datasheet says sleep but this is more like halt as the chip
+ *       clocks are cut and it can only be brought out of this mode
+ *	 using the RESET pin.
+ */
+enum pixcir_power_mode {
+	PIXCIR_POWER_ACTIVE,
+	PIXCIR_POWER_IDLE,
+	PIXCIR_POWER_HALT,
+};
+
+#define PIXCIR_POWER_MODE_MASK	0x03
+#define PIXCIR_POWER_ALLOW_IDLE (1UL << 2)
+
+/*
+ * Interrupt modes:
+ * periodical: interrupt is asserted periodicaly
+ * diff coordinates: interrupt is asserted when coordinates change
+ * level on touch: interrupt level asserted during touch
+ * pulse on touch: interrupt pulse asserted druing touch
+ *
+ */
+enum pixcir_int_mode {
+	PIXCIR_INT_PERIODICAL,
+	PIXCIR_INT_DIFF_COORD,
+	PIXCIR_INT_LEVEL_TOUCH,
+	PIXCIR_INT_PULSE_TOUCH,
+};
+
+#define PIXCIR_INT_MODE_MASK	0x03
+#define PIXCIR_INT_ENABLE	(1UL << 3)
+#define PIXCIR_INT_POL_HIGH	(1UL << 2)
+
 struct pixcir_ts_platform_data {
 	int (*attb_read_val)(void);
 	int x_max;
-- 
cgit v1.2.3-71-gd317


From 0dfc8d41bfa091a61354eea73199a5af0eaae9c0 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Sun, 18 May 2014 22:46:43 -0700
Subject: Input: pixcir_i2c_ts - get rid of pdata->attb_read_val()

Get rid of the attb_read_val() platform hook. Instead, read the ATTB gpio
directly from the driver.

Fail if valid ATTB gpio is not provided by patform data.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Acked-by: Mugunthan V N <mugunthanvnm@ti.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/pixcir_i2c_ts.c | 16 +++++++++++++++-
 include/linux/input/pixcir_ts.h           |  2 +-
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c
index 6c6f6dacb858..f2c0ae18e24a 100644
--- a/drivers/input/touchscreen/pixcir_i2c_ts.c
+++ b/drivers/input/touchscreen/pixcir_i2c_ts.c
@@ -24,6 +24,7 @@
 #include <linux/i2c.h>
 #include <linux/input.h>
 #include <linux/input/pixcir_ts.h>
+#include <linux/gpio.h>
 
 struct pixcir_i2c_ts_data {
 	struct i2c_client *client;
@@ -87,11 +88,12 @@ static void pixcir_ts_poscheck(struct pixcir_i2c_ts_data *data)
 static irqreturn_t pixcir_ts_isr(int irq, void *dev_id)
 {
 	struct pixcir_i2c_ts_data *tsdata = dev_id;
+	const struct pixcir_ts_platform_data *pdata = tsdata->chip;
 
 	while (tsdata->running) {
 		pixcir_ts_poscheck(tsdata);
 
-		if (tsdata->chip->attb_read_val())
+		if (gpio_get_value(pdata->gpio_attb))
 			break;
 
 		msleep(20);
@@ -298,6 +300,11 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client,
 		return -EINVAL;
 	}
 
+	if (!gpio_is_valid(pdata->gpio_attb)) {
+		dev_err(dev, "Invalid gpio_attb in pdata\n");
+		return -EINVAL;
+	}
+
 	tsdata = devm_kzalloc(dev, sizeof(*tsdata), GFP_KERNEL);
 	if (!tsdata)
 		return -ENOMEM;
@@ -328,6 +335,13 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client,
 
 	input_set_drvdata(input, tsdata);
 
+	error = devm_gpio_request_one(dev, pdata->gpio_attb,
+				      GPIOF_DIR_IN, "pixcir_i2c_attb");
+	if (error) {
+		dev_err(dev, "Failed to request ATTB gpio\n");
+		return error;
+	}
+
 	error = devm_request_threaded_irq(dev, client->irq, NULL, pixcir_ts_isr,
 					  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 					  client->name, tsdata);
diff --git a/include/linux/input/pixcir_ts.h b/include/linux/input/pixcir_ts.h
index 7942804464d3..160cf353aa39 100644
--- a/include/linux/input/pixcir_ts.h
+++ b/include/linux/input/pixcir_ts.h
@@ -44,9 +44,9 @@ enum pixcir_int_mode {
 #define PIXCIR_INT_POL_HIGH	(1UL << 2)
 
 struct pixcir_ts_platform_data {
-	int (*attb_read_val)(void);
 	int x_max;
 	int y_max;
+	int gpio_attb;		/* GPIO connected to ATTB line */
 };
 
 #endif
-- 
cgit v1.2.3-71-gd317


From 2cefdb1f0a27150755ef2730bafc58bf2ed16571 Mon Sep 17 00:00:00 2001
From: Nick Dyer <nick.dyer@itdev.co.uk>
Date: Sun, 18 May 2014 22:59:20 -0700
Subject: Input: atmel_mxt_ts - remove unnecessary platform data

It is not necessary to download these values to the maXTouch chip on every
probe, since they are stored in NVRAM. It makes life difficult when tuning
the device to keep them in sync with the config array/file, and requires a
new kernel build for minor tweaks.

These parameters only represent a tiny subset of the available
configuration options, tracking all of these options in platform data would
be a endless task. In addition, different versions of maXTouch chips may
have these values in different places or may not even have them at all.

Having these values also makes life more complex for device tree and other
platforms where having to define a static configuration isn't helpful.

Signed-off-by: Nick Dyer <nick.dyer@itdev.co.uk>
Acked-by: Benson Leung <bleung@chromium.org>
Acked-by: Yufeng Shen <miletus@chromium.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 arch/arm/mach-s5pv210/mach-goni.c         |  5 ----
 drivers/input/touchscreen/atmel_mxt_ts.c  | 50 -------------------------------
 drivers/platform/chrome/chromeos_laptop.c | 10 -------
 include/linux/i2c/atmel_mxt_ts.h          |  6 +---
 4 files changed, 1 insertion(+), 70 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c
index b41a38a75844..e549ecf0e5dc 100644
--- a/arch/arm/mach-s5pv210/mach-goni.c
+++ b/arch/arm/mach-s5pv210/mach-goni.c
@@ -239,13 +239,8 @@ static void __init goni_radio_init(void)
 
 /* TSP */
 static struct mxt_platform_data qt602240_platform_data = {
-	.x_line		= 17,
-	.y_line		= 11,
 	.x_size		= 800,
 	.y_size		= 480,
-	.blen		= 0x21,
-	.threshold	= 0x28,
-	.voltage	= 2800000,              /* 2.8V */
 	.orient		= MXT_DIAGONAL,
 	.irqflags	= IRQF_TRIGGER_FALLING,
 };
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index a70400754e92..7eb515caf215 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -685,54 +685,6 @@ static int mxt_make_highchg(struct mxt_data *data)
 	return 0;
 }
 
-static void mxt_handle_pdata(struct mxt_data *data)
-{
-	const struct mxt_platform_data *pdata = data->pdata;
-	u8 voltage;
-
-	/* Set touchscreen lines */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_XSIZE,
-			pdata->x_line);
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_YSIZE,
-			pdata->y_line);
-
-	/* Set touchscreen orient */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_ORIENT,
-			pdata->orient);
-
-	/* Set touchscreen burst length */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_BLEN, pdata->blen);
-
-	/* Set touchscreen threshold */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_TCHTHR, pdata->threshold);
-
-	/* Set touchscreen resolution */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_XRANGE_LSB, (pdata->x_size - 1) & 0xff);
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_XRANGE_MSB, (pdata->x_size - 1) >> 8);
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_YRANGE_LSB, (pdata->y_size - 1) & 0xff);
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_YRANGE_MSB, (pdata->y_size - 1) >> 8);
-
-	/* Set touchscreen voltage */
-	if (pdata->voltage) {
-		if (pdata->voltage < MXT_VOLTAGE_DEFAULT) {
-			voltage = (MXT_VOLTAGE_DEFAULT - pdata->voltage) /
-				MXT_VOLTAGE_STEP;
-			voltage = 0xff - voltage + 1;
-		} else
-			voltage = (pdata->voltage - MXT_VOLTAGE_DEFAULT) /
-				MXT_VOLTAGE_STEP;
-
-		mxt_write_object(data, MXT_SPT_CTECONFIG_T28,
-				MXT_CTE_VOLTAGE, voltage);
-	}
-}
-
 static int mxt_get_info(struct mxt_data *data)
 {
 	struct i2c_client *client = data->client;
@@ -840,8 +792,6 @@ static int mxt_initialize(struct mxt_data *data)
 	if (error)
 		goto err_free_object_table;
 
-	mxt_handle_pdata(data);
-
 	/* Backup to memory */
 	mxt_write_object(data, MXT_GEN_COMMAND_T6,
 			MXT_COMMAND_BACKUPNV,
diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
index 7f3aad0e115c..2559a0407c58 100644
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c
@@ -85,13 +85,8 @@ static struct i2c_board_info tsl2563_als_device = {
 };
 
 static struct mxt_platform_data atmel_224s_tp_platform_data = {
-	.x_line			= 18,
-	.y_line			= 12,
 	.x_size			= 102*20,
 	.y_size			= 68*20,
-	.blen			= 0x80,	/* Gain setting is in upper 4 bits */
-	.threshold		= 0x32,
-	.voltage		= 0,	/* 3.3V */
 	.orient			= MXT_VERTICAL_FLIP,
 	.irqflags		= IRQF_TRIGGER_FALLING,
 	.is_tp			= true,
@@ -110,13 +105,8 @@ static struct i2c_board_info atmel_224s_tp_device = {
 };
 
 static struct mxt_platform_data atmel_1664s_platform_data = {
-	.x_line			= 32,
-	.y_line			= 50,
 	.x_size			= 1700,
 	.y_size			= 2560,
-	.blen			= 0x89,	/* Gain setting is in upper 4 bits */
-	.threshold		= 0x28,
-	.voltage		= 0,	/* 3.3V */
 	.orient			= MXT_ROTATED_90_COUNTER,
 	.irqflags		= IRQF_TRIGGER_FALLING,
 	.is_tp			= false,
diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h
index 99e379b74398..eff0cdc08843 100644
--- a/include/linux/i2c/atmel_mxt_ts.h
+++ b/include/linux/i2c/atmel_mxt_ts.h
@@ -33,14 +33,10 @@ struct mxt_platform_data {
 	const u8 *config;
 	size_t config_length;
 
-	unsigned int x_line;
-	unsigned int y_line;
 	unsigned int x_size;
 	unsigned int y_size;
-	unsigned int blen;
-	unsigned int threshold;
-	unsigned int voltage;
 	unsigned char orient;
+
 	unsigned long irqflags;
 	bool is_tp;
 	const unsigned int key_map[MXT_NUM_GPIO];
-- 
cgit v1.2.3-71-gd317


From fb5e4c3ee140b29e1935b4bbb19c319177bed231 Mon Sep 17 00:00:00 2001
From: Nick Dyer <nick.dyer@itdev.co.uk>
Date: Sun, 18 May 2014 23:00:15 -0700
Subject: Input: atmel_mxt_ts - improve T19 GPIO keys handling

 * The mapping of the GPIO numbers into the T19 status byte varies between
   different maXTouch chips. Some have up to 7 GPIOs. Allowing a keycode array
   of up to 8 items is simpler and more generic. So replace #define with
   configurable number of keys which also allows the removal of is_tp.
 * Rename platform data parameters to include "t19" to prevent confusion with
   T15 key array.
 * Probe aborts early on when pdata is NULL, so no need to check.
 * Move "int i" to beginning of function (mixed declarations and code)
 * Use API calls rather than __set_bit()
 * Remove unused dev variable.

Signed-off-by: Nick Dyer <nick.dyer@itdev.co.uk>
Acked-by: Yufeng Shen <miletus@chromium.org>
Reviewed-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/atmel_mxt_ts.c  | 44 ++++++++++++-------------------
 drivers/platform/chrome/chromeos_laptop.c | 17 +++++++-----
 include/linux/i2c/atmel_mxt_ts.h          |  7 ++---
 3 files changed, 30 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 7eb515caf215..65df362cf327 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -180,12 +180,6 @@
 
 #define MXT_FWRESET_TIME	175	/* msec */
 
-/* MXT_SPT_GPIOPWM_T19 field */
-#define MXT_GPIO0_MASK		0x04
-#define MXT_GPIO1_MASK		0x08
-#define MXT_GPIO2_MASK		0x10
-#define MXT_GPIO3_MASK		0x20
-
 /* Command to unlock bootloader */
 #define MXT_UNLOCK_CMD_MSB	0xaa
 #define MXT_UNLOCK_CMD_LSB	0xdc
@@ -250,7 +244,6 @@ struct mxt_data {
 	const struct mxt_platform_data *pdata;
 	struct mxt_object *object_table;
 	struct mxt_info info;
-	bool is_tp;
 
 	unsigned int irq;
 	unsigned int max_x;
@@ -515,15 +508,16 @@ static int mxt_write_object(struct mxt_data *data,
 static void mxt_input_button(struct mxt_data *data, struct mxt_message *message)
 {
 	struct input_dev *input = data->input_dev;
+	const struct mxt_platform_data *pdata = data->pdata;
 	bool button;
 	int i;
 
 	/* Active-low switch */
-	for (i = 0; i < MXT_NUM_GPIO; i++) {
-		if (data->pdata->key_map[i] == KEY_RESERVED)
+	for (i = 0; i < pdata->t19_num_keys; i++) {
+		if (pdata->t19_keymap[i] == KEY_RESERVED)
 			continue;
-		button = !(message->message[0] & MXT_GPIO0_MASK << i);
-		input_report_key(input, data->pdata->key_map[i], button);
+		button = !(message->message[0] & (1 << i));
+		input_report_key(input, pdata->t19_keymap[i], button);
 	}
 }
 
@@ -1084,6 +1078,8 @@ static int mxt_probe(struct i2c_client *client,
 	struct input_dev *input_dev;
 	int error;
 	unsigned int num_mt_slots;
+	unsigned int mt_flags = 0;
+	int i;
 
 	if (!pdata)
 		return -EINVAL;
@@ -1096,10 +1092,7 @@ static int mxt_probe(struct i2c_client *client,
 		goto err_free_mem;
 	}
 
-	data->is_tp = pdata && pdata->is_tp;
-
-	input_dev->name = (data->is_tp) ? "Atmel maXTouch Touchpad" :
-					  "Atmel maXTouch Touchscreen";
+	input_dev->name = "Atmel maXTouch Touchscreen";
 	snprintf(data->phys, sizeof(data->phys), "i2c-%u-%04x/input0",
 		 client->adapter->nr, client->addr);
 
@@ -1125,20 +1118,15 @@ static int mxt_probe(struct i2c_client *client,
 	__set_bit(EV_KEY, input_dev->evbit);
 	__set_bit(BTN_TOUCH, input_dev->keybit);
 
-	if (data->is_tp) {
-		int i;
-		__set_bit(INPUT_PROP_POINTER, input_dev->propbit);
+	if (pdata->t19_num_keys) {
 		__set_bit(INPUT_PROP_BUTTONPAD, input_dev->propbit);
 
-		for (i = 0; i < MXT_NUM_GPIO; i++)
-			if (pdata->key_map[i] != KEY_RESERVED)
-				__set_bit(pdata->key_map[i], input_dev->keybit);
+		for (i = 0; i < pdata->t19_num_keys; i++)
+			if (pdata->t19_keymap[i] != KEY_RESERVED)
+				input_set_capability(input_dev, EV_KEY,
+						     pdata->t19_keymap[i]);
 
-		__set_bit(BTN_TOOL_FINGER, input_dev->keybit);
-		__set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit);
-		__set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit);
-		__set_bit(BTN_TOOL_QUADTAP, input_dev->keybit);
-		__set_bit(BTN_TOOL_QUINTTAP, input_dev->keybit);
+		mt_flags |= INPUT_MT_POINTER;
 
 		input_abs_set_res(input_dev, ABS_X, MXT_PIXELS_PER_MM);
 		input_abs_set_res(input_dev, ABS_Y, MXT_PIXELS_PER_MM);
@@ -1146,6 +1134,8 @@ static int mxt_probe(struct i2c_client *client,
 				  MXT_PIXELS_PER_MM);
 		input_abs_set_res(input_dev, ABS_MT_POSITION_Y,
 				  MXT_PIXELS_PER_MM);
+
+		input_dev->name = "Atmel maXTouch Touchpad";
 	}
 
 	/* For single touch */
@@ -1158,7 +1148,7 @@ static int mxt_probe(struct i2c_client *client,
 
 	/* For multi touch */
 	num_mt_slots = data->T9_reportid_max - data->T9_reportid_min + 1;
-	error = input_mt_init_slots(input_dev, num_mt_slots, 0);
+	error = input_mt_init_slots(input_dev, num_mt_slots, mt_flags);
 	if (error)
 		goto err_free_object;
 	input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR,
diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
index 2559a0407c58..8b7523ab62e5 100644
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c
@@ -84,16 +84,22 @@ static struct i2c_board_info tsl2563_als_device = {
 	I2C_BOARD_INFO("tsl2563", TAOS_ALS_I2C_ADDR),
 };
 
+static int mxt_t19_keys[] = {
+	KEY_RESERVED,
+	KEY_RESERVED,
+	KEY_RESERVED,
+	KEY_RESERVED,
+	KEY_RESERVED,
+	BTN_LEFT
+};
+
 static struct mxt_platform_data atmel_224s_tp_platform_data = {
 	.x_size			= 102*20,
 	.y_size			= 68*20,
 	.orient			= MXT_VERTICAL_FLIP,
 	.irqflags		= IRQF_TRIGGER_FALLING,
-	.is_tp			= true,
-	.key_map		= { KEY_RESERVED,
-				    KEY_RESERVED,
-				    KEY_RESERVED,
-				    BTN_LEFT },
+	.t19_num_keys		= ARRAY_SIZE(mxt_t19_keys),
+	.t19_keymap		= mxt_t19_keys,
 	.config			= NULL,
 	.config_length		= 0,
 };
@@ -109,7 +115,6 @@ static struct mxt_platform_data atmel_1664s_platform_data = {
 	.y_size			= 2560,
 	.orient			= MXT_ROTATED_90_COUNTER,
 	.irqflags		= IRQF_TRIGGER_FALLING,
-	.is_tp			= false,
 	.config			= NULL,
 	.config_length		= 0,
 };
diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h
index eff0cdc08843..d26080dc606c 100644
--- a/include/linux/i2c/atmel_mxt_ts.h
+++ b/include/linux/i2c/atmel_mxt_ts.h
@@ -15,9 +15,6 @@
 
 #include <linux/types.h>
 
-/* For key_map array */
-#define MXT_NUM_GPIO		4
-
 /* Orient */
 #define MXT_NORMAL		0x0
 #define MXT_DIAGONAL		0x1
@@ -38,8 +35,8 @@ struct mxt_platform_data {
 	unsigned char orient;
 
 	unsigned long irqflags;
-	bool is_tp;
-	const unsigned int key_map[MXT_NUM_GPIO];
+	u8 t19_num_keys;
+	const unsigned int *t19_keymap;
 };
 
 #endif /* __LINUX_ATMEL_MXT_TS_H */
-- 
cgit v1.2.3-71-gd317


From c3f78043d5aea39205a14c580babd87fbdcfa148 Mon Sep 17 00:00:00 2001
From: Nick Dyer <nick.dyer@itdev.co.uk>
Date: Sun, 18 May 2014 23:04:46 -0700
Subject: Input: atmel_mxt_ts - implement CRC check for configuration data

The configuration is stored in NVRAM on the maXTouch chip. When the device
is reset it reports a CRC of the stored configuration values. Therefore it
isn't necessary to send the configuration on each probe - we can check the
CRC matches and avoid a timeconsuming backup/reset cycle.

Signed-off-by: Nick Dyer <nick.dyer@itdev.co.uk>
Acked-by: Benson Leung <bleung@chromium.org>
Acked-by: Yufeng Shen <miletus@chromium.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/atmel_mxt_ts.c | 60 +++++++++++++++++++++++++++-----
 include/linux/i2c/atmel_mxt_ts.h         |  1 +
 2 files changed, 53 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 278f364ec6e1..61f9ef221d12 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -188,6 +188,7 @@
 #define MXT_BACKUP_TIME		50	/* msec */
 #define MXT_RESET_TIME		200	/* msec */
 #define MXT_RESET_TIMEOUT	3000	/* msec */
+#define MXT_CRC_TIMEOUT		1000	/* msec */
 #define MXT_FW_RESET_TIME	3000	/* msec */
 #define MXT_FW_CHG_TIMEOUT	300	/* msec */
 
@@ -259,6 +260,7 @@ struct mxt_data {
 	unsigned int max_x;
 	unsigned int max_y;
 	bool in_bootloader;
+	u32 config_crc;
 
 	/* Cached parameters from object table */
 	u8 T6_reportid;
@@ -272,6 +274,9 @@ struct mxt_data {
 
 	/* for reset handling */
 	struct completion reset_completion;
+
+	/* for config update handling */
+	struct completion crc_completion;
 };
 
 static size_t mxt_obj_size(const struct mxt_object *obj)
@@ -636,7 +641,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
 	}
 }
 
-static unsigned mxt_extract_T6_csum(const u8 *csum)
+static u16 mxt_extract_T6_csum(const u8 *csum)
 {
 	return csum[0] | (csum[1] << 8) | (csum[2] << 16);
 }
@@ -654,6 +659,7 @@ static irqreturn_t mxt_process_messages_until_invalid(struct mxt_data *data)
 	struct device *dev = &data->client->dev;
 	u8 reportid;
 	bool update_input = false;
+	u32 crc;
 
 	do {
 		if (mxt_read_message(data, &message)) {
@@ -665,9 +671,15 @@ static irqreturn_t mxt_process_messages_until_invalid(struct mxt_data *data)
 
 		if (reportid == data->T6_reportid) {
 			u8 status = payload[0];
-			unsigned csum = mxt_extract_T6_csum(&payload[1]);
+
+			crc = mxt_extract_T6_csum(&payload[1]);
+			if (crc != data->config_crc) {
+				data->config_crc = crc;
+				complete(&data->crc_completion);
+			}
+
 			dev_dbg(dev, "Status: %02x Config Checksum: %06x\n",
-				status, csum);
+				status, data->config_crc);
 
 			if (status & MXT_T6_STATUS_RESET)
 				complete(&data->reset_completion);
@@ -757,6 +769,24 @@ static int mxt_soft_reset(struct mxt_data *data)
 	return 0;
 }
 
+static void mxt_update_crc(struct mxt_data *data, u8 cmd, u8 value)
+{
+	/*
+	 * On failure, CRC is set to 0 and config will always be
+	 * downloaded.
+	 */
+	data->config_crc = 0;
+	reinit_completion(&data->crc_completion);
+
+	mxt_t6_command(data, cmd, value, true);
+
+	/*
+	 * Wait for crc message. On failure, CRC is set to 0 and config will
+	 * always be downloaded.
+	 */
+	mxt_wait_for_completion(data, &data->crc_completion, MXT_CRC_TIMEOUT);
+}
+
 static int mxt_check_reg_init(struct mxt_data *data)
 {
 	const struct mxt_platform_data *pdata = data->pdata;
@@ -771,6 +801,16 @@ static int mxt_check_reg_init(struct mxt_data *data)
 		return 0;
 	}
 
+	mxt_update_crc(data, MXT_COMMAND_REPORTALL, 1);
+
+	if (data->config_crc == pdata->config_crc) {
+		dev_info(dev, "Config CRC 0x%06X: OK\n", data->config_crc);
+		return 0;
+	}
+
+	dev_info(dev, "Config CRC 0x%06X: does not match 0x%06X\n",
+		 data->config_crc, pdata->config_crc);
+
 	for (i = 0; i < data->info.object_num; i++) {
 		object = data->object_table + i;
 
@@ -790,6 +830,14 @@ static int mxt_check_reg_init(struct mxt_data *data)
 		index += size;
 	}
 
+	mxt_update_crc(data, MXT_COMMAND_BACKUPNV, MXT_BACKUP_VALUE);
+
+	ret = mxt_soft_reset(data);
+	if (ret)
+		return ret;
+
+	dev_info(dev, "Config successfully updated\n");
+
 	return 0;
 }
 
@@ -929,11 +977,6 @@ static int mxt_initialize(struct mxt_data *data)
 		goto err_free_object_table;
 	}
 
-	error = mxt_t6_command(data, MXT_COMMAND_BACKUPNV,
-			       MXT_BACKUP_VALUE, false);
-	if (!error)
-		mxt_soft_reset(data);
-
 	/* Update matrix size at info struct */
 	error = mxt_read_reg(client, MXT_MATRIX_X_SIZE, &val);
 	if (error)
@@ -1263,6 +1306,7 @@ static int mxt_probe(struct i2c_client *client,
 
 	init_completion(&data->bl_completion);
 	init_completion(&data->reset_completion);
+	init_completion(&data->crc_completion);
 
 	mxt_calc_resolution(data);
 
diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h
index d26080dc606c..9f92135b6620 100644
--- a/include/linux/i2c/atmel_mxt_ts.h
+++ b/include/linux/i2c/atmel_mxt_ts.h
@@ -29,6 +29,7 @@
 struct mxt_platform_data {
 	const u8 *config;
 	size_t config_length;
+	u32 config_crc;
 
 	unsigned int x_size;
 	unsigned int y_size;
-- 
cgit v1.2.3-71-gd317


From fd1159318e55e901cf269de90163b19fd62938cb Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 17 May 2014 00:03:54 +0400
Subject: can: add Renesas R-Car CAN driver

Add support for the CAN controller found in Renesas R-Car SoCs.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/Kconfig               |  10 +
 drivers/net/can/Makefile              |   1 +
 drivers/net/can/rcar_can.c            | 876 ++++++++++++++++++++++++++++++++++
 include/linux/can/platform/rcar_can.h |  17 +
 4 files changed, 904 insertions(+)
 create mode 100644 drivers/net/can/rcar_can.c
 create mode 100644 include/linux/can/platform/rcar_can.h

(limited to 'include')

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index ac67afa7735c..714b18790caf 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -119,6 +119,16 @@ config CAN_GRCAN
 	  endian syntheses of the cores would need some modifications on
 	  the hardware level to work.
 
+config CAN_RCAR
+	tristate "Renesas R-Car CAN controller"
+	depends on ARM
+	---help---
+	  Say Y here if you want to use CAN controller found on Renesas R-Car
+	  SoCs.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called rcar_can.
+
 source "drivers/net/can/mscan/Kconfig"
 
 source "drivers/net/can/sja1000/Kconfig"
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index c42058868b0f..90f538c73f8c 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -25,5 +25,6 @@ obj-$(CONFIG_CAN_JANZ_ICAN3)	+= janz-ican3.o
 obj-$(CONFIG_CAN_FLEXCAN)	+= flexcan.o
 obj-$(CONFIG_PCH_CAN)		+= pch_can.o
 obj-$(CONFIG_CAN_GRCAN)		+= grcan.o
+obj-$(CONFIG_CAN_RCAR)		+= rcar_can.o
 
 ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c
new file mode 100644
index 000000000000..5268d216ecfa
--- /dev/null
+++ b/drivers/net/can/rcar_can.c
@@ -0,0 +1,876 @@
+/* Renesas R-Car CAN device driver
+ *
+ * Copyright (C) 2013 Cogent Embedded, Inc. <source@cogentembedded.com>
+ * Copyright (C) 2013 Renesas Solutions Corp.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/can/led.h>
+#include <linux/can/dev.h>
+#include <linux/clk.h>
+#include <linux/can/platform/rcar_can.h>
+
+#define RCAR_CAN_DRV_NAME	"rcar_can"
+
+/* Mailbox configuration:
+ * mailbox 60 - 63 - Rx FIFO mailboxes
+ * mailbox 56 - 59 - Tx FIFO mailboxes
+ * non-FIFO mailboxes are not used
+ */
+#define RCAR_CAN_N_MBX		64 /* Number of mailboxes in non-FIFO mode */
+#define RCAR_CAN_RX_FIFO_MBX	60 /* Mailbox - window to Rx FIFO */
+#define RCAR_CAN_TX_FIFO_MBX	56 /* Mailbox - window to Tx FIFO */
+#define RCAR_CAN_FIFO_DEPTH	4
+
+/* Mailbox registers structure */
+struct rcar_can_mbox_regs {
+	u32 id;		/* IDE and RTR bits, SID and EID */
+	u8 stub;	/* Not used */
+	u8 dlc;		/* Data Length Code - bits [0..3] */
+	u8 data[8];	/* Data Bytes */
+	u8 tsh;		/* Time Stamp Higher Byte */
+	u8 tsl;		/* Time Stamp Lower Byte */
+};
+
+struct rcar_can_regs {
+	struct rcar_can_mbox_regs mb[RCAR_CAN_N_MBX]; /* Mailbox registers */
+	u32 mkr_2_9[8];	/* Mask Registers 2-9 */
+	u32 fidcr[2];	/* FIFO Received ID Compare Register */
+	u32 mkivlr1;	/* Mask Invalid Register 1 */
+	u32 mier1;	/* Mailbox Interrupt Enable Register 1 */
+	u32 mkr_0_1[2];	/* Mask Registers 0-1 */
+	u32 mkivlr0;    /* Mask Invalid Register 0*/
+	u32 mier0;      /* Mailbox Interrupt Enable Register 0 */
+	u8 pad_440[0x3c0];
+	u8 mctl[64];	/* Message Control Registers */
+	u16 ctlr;	/* Control Register */
+	u16 str;	/* Status register */
+	u8 bcr[3];	/* Bit Configuration Register */
+	u8 clkr;	/* Clock Select Register */
+	u8 rfcr;	/* Receive FIFO Control Register */
+	u8 rfpcr;	/* Receive FIFO Pointer Control Register */
+	u8 tfcr;	/* Transmit FIFO Control Register */
+	u8 tfpcr;       /* Transmit FIFO Pointer Control Register */
+	u8 eier;	/* Error Interrupt Enable Register */
+	u8 eifr;	/* Error Interrupt Factor Judge Register */
+	u8 recr;	/* Receive Error Count Register */
+	u8 tecr;        /* Transmit Error Count Register */
+	u8 ecsr;	/* Error Code Store Register */
+	u8 cssr;	/* Channel Search Support Register */
+	u8 mssr;	/* Mailbox Search Status Register */
+	u8 msmr;	/* Mailbox Search Mode Register */
+	u16 tsr;	/* Time Stamp Register */
+	u8 afsr;	/* Acceptance Filter Support Register */
+	u8 pad_857;
+	u8 tcr;		/* Test Control Register */
+	u8 pad_859[7];
+	u8 ier;		/* Interrupt Enable Register */
+	u8 isr;		/* Interrupt Status Register */
+	u8 pad_862;
+	u8 mbsmr;	/* Mailbox Search Mask Register */
+};
+
+struct rcar_can_priv {
+	struct can_priv can;	/* Must be the first member! */
+	struct net_device *ndev;
+	struct napi_struct napi;
+	struct rcar_can_regs __iomem *regs;
+	struct clk *clk;
+	u8 tx_dlc[RCAR_CAN_FIFO_DEPTH];
+	u32 tx_head;
+	u32 tx_tail;
+	u8 clock_select;
+	u8 ier;
+};
+
+static const struct can_bittiming_const rcar_can_bittiming_const = {
+	.name = RCAR_CAN_DRV_NAME,
+	.tseg1_min = 4,
+	.tseg1_max = 16,
+	.tseg2_min = 2,
+	.tseg2_max = 8,
+	.sjw_max = 4,
+	.brp_min = 1,
+	.brp_max = 1024,
+	.brp_inc = 1,
+};
+
+/* Control Register bits */
+#define RCAR_CAN_CTLR_BOM	(3 << 11) /* Bus-Off Recovery Mode Bits */
+#define RCAR_CAN_CTLR_BOM_ENT	(1 << 11) /* Entry to halt mode */
+					/* at bus-off entry */
+#define RCAR_CAN_CTLR_SLPM	(1 << 10)
+#define RCAR_CAN_CTLR_CANM	(3 << 8) /* Operating Mode Select Bit */
+#define RCAR_CAN_CTLR_CANM_HALT	(1 << 9)
+#define RCAR_CAN_CTLR_CANM_RESET (1 << 8)
+#define RCAR_CAN_CTLR_CANM_FORCE_RESET (3 << 8)
+#define RCAR_CAN_CTLR_MLM	(1 << 3) /* Message Lost Mode Select */
+#define RCAR_CAN_CTLR_IDFM	(3 << 1) /* ID Format Mode Select Bits */
+#define RCAR_CAN_CTLR_IDFM_MIXED (1 << 2) /* Mixed ID mode */
+#define RCAR_CAN_CTLR_MBM	(1 << 0) /* Mailbox Mode select */
+
+/* Status Register bits */
+#define RCAR_CAN_STR_RSTST	(1 << 8) /* Reset Status Bit */
+
+/* FIFO Received ID Compare Registers 0 and 1 bits */
+#define RCAR_CAN_FIDCR_IDE	(1 << 31) /* ID Extension Bit */
+#define RCAR_CAN_FIDCR_RTR	(1 << 30) /* Remote Transmission Request Bit */
+
+/* Receive FIFO Control Register bits */
+#define RCAR_CAN_RFCR_RFEST	(1 << 7) /* Receive FIFO Empty Status Flag */
+#define RCAR_CAN_RFCR_RFE	(1 << 0) /* Receive FIFO Enable */
+
+/* Transmit FIFO Control Register bits */
+#define RCAR_CAN_TFCR_TFUST	(7 << 1) /* Transmit FIFO Unsent Message */
+					/* Number Status Bits */
+#define RCAR_CAN_TFCR_TFUST_SHIFT 1	/* Offset of Transmit FIFO Unsent */
+					/* Message Number Status Bits */
+#define RCAR_CAN_TFCR_TFE	(1 << 0) /* Transmit FIFO Enable */
+
+#define RCAR_CAN_N_RX_MKREGS1	2	/* Number of mask registers */
+					/* for Rx mailboxes 0-31 */
+#define RCAR_CAN_N_RX_MKREGS2	8
+
+/* Bit Configuration Register settings */
+#define RCAR_CAN_BCR_TSEG1(x)	(((x) & 0x0f) << 20)
+#define RCAR_CAN_BCR_BPR(x)	(((x) & 0x3ff) << 8)
+#define RCAR_CAN_BCR_SJW(x)	(((x) & 0x3) << 4)
+#define RCAR_CAN_BCR_TSEG2(x)	((x) & 0x07)
+
+/* Mailbox and Mask Registers bits */
+#define RCAR_CAN_IDE		(1 << 31)
+#define RCAR_CAN_RTR		(1 << 30)
+#define RCAR_CAN_SID_SHIFT	18
+
+/* Mailbox Interrupt Enable Register 1 bits */
+#define RCAR_CAN_MIER1_RXFIE	(1 << 28) /* Receive  FIFO Interrupt Enable */
+#define RCAR_CAN_MIER1_TXFIE	(1 << 24) /* Transmit FIFO Interrupt Enable */
+
+/* Interrupt Enable Register bits */
+#define RCAR_CAN_IER_ERSIE	(1 << 5) /* Error (ERS) Interrupt Enable Bit */
+#define RCAR_CAN_IER_RXFIE	(1 << 4) /* Reception FIFO Interrupt */
+					/* Enable Bit */
+#define RCAR_CAN_IER_TXFIE	(1 << 3) /* Transmission FIFO Interrupt */
+					/* Enable Bit */
+/* Interrupt Status Register bits */
+#define RCAR_CAN_ISR_ERSF	(1 << 5) /* Error (ERS) Interrupt Status Bit */
+#define RCAR_CAN_ISR_RXFF	(1 << 4) /* Reception FIFO Interrupt */
+					/* Status Bit */
+#define RCAR_CAN_ISR_TXFF	(1 << 3) /* Transmission FIFO Interrupt */
+					/* Status Bit */
+
+/* Error Interrupt Enable Register bits */
+#define RCAR_CAN_EIER_BLIE	(1 << 7) /* Bus Lock Interrupt Enable */
+#define RCAR_CAN_EIER_OLIE	(1 << 6) /* Overload Frame Transmit */
+					/* Interrupt Enable */
+#define RCAR_CAN_EIER_ORIE	(1 << 5) /* Receive Overrun  Interrupt Enable */
+#define RCAR_CAN_EIER_BORIE	(1 << 4) /* Bus-Off Recovery Interrupt Enable */
+#define RCAR_CAN_EIER_BOEIE	(1 << 3) /* Bus-Off Entry Interrupt Enable */
+#define RCAR_CAN_EIER_EPIE	(1 << 2) /* Error Passive Interrupt Enable */
+#define RCAR_CAN_EIER_EWIE	(1 << 1) /* Error Warning Interrupt Enable */
+#define RCAR_CAN_EIER_BEIE	(1 << 0) /* Bus Error Interrupt Enable */
+
+/* Error Interrupt Factor Judge Register bits */
+#define RCAR_CAN_EIFR_BLIF	(1 << 7) /* Bus Lock Detect Flag */
+#define RCAR_CAN_EIFR_OLIF	(1 << 6) /* Overload Frame Transmission */
+					 /* Detect Flag */
+#define RCAR_CAN_EIFR_ORIF	(1 << 5) /* Receive Overrun Detect Flag */
+#define RCAR_CAN_EIFR_BORIF	(1 << 4) /* Bus-Off Recovery Detect Flag */
+#define RCAR_CAN_EIFR_BOEIF	(1 << 3) /* Bus-Off Entry Detect Flag */
+#define RCAR_CAN_EIFR_EPIF	(1 << 2) /* Error Passive Detect Flag */
+#define RCAR_CAN_EIFR_EWIF	(1 << 1) /* Error Warning Detect Flag */
+#define RCAR_CAN_EIFR_BEIF	(1 << 0) /* Bus Error Detect Flag */
+
+/* Error Code Store Register bits */
+#define RCAR_CAN_ECSR_EDPM	(1 << 7) /* Error Display Mode Select Bit */
+#define RCAR_CAN_ECSR_ADEF	(1 << 6) /* ACK Delimiter Error Flag */
+#define RCAR_CAN_ECSR_BE0F	(1 << 5) /* Bit Error (dominant) Flag */
+#define RCAR_CAN_ECSR_BE1F	(1 << 4) /* Bit Error (recessive) Flag */
+#define RCAR_CAN_ECSR_CEF	(1 << 3) /* CRC Error Flag */
+#define RCAR_CAN_ECSR_AEF	(1 << 2) /* ACK Error Flag */
+#define RCAR_CAN_ECSR_FEF	(1 << 1) /* Form Error Flag */
+#define RCAR_CAN_ECSR_SEF	(1 << 0) /* Stuff Error Flag */
+
+#define RCAR_CAN_NAPI_WEIGHT	4
+#define MAX_STR_READS		0x100
+
+static void tx_failure_cleanup(struct net_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < RCAR_CAN_FIFO_DEPTH; i++)
+		can_free_echo_skb(ndev, i);
+}
+
+static void rcar_can_error(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u8 eifr, txerr = 0, rxerr = 0;
+
+	/* Propagate the error condition to the CAN stack */
+	skb = alloc_can_err_skb(ndev, &cf);
+
+	eifr = readb(&priv->regs->eifr);
+	if (eifr & (RCAR_CAN_EIFR_EWIF | RCAR_CAN_EIFR_EPIF)) {
+		txerr = readb(&priv->regs->tecr);
+		rxerr = readb(&priv->regs->recr);
+		if (skb) {
+			cf->can_id |= CAN_ERR_CRTL;
+			cf->data[6] = txerr;
+			cf->data[7] = rxerr;
+		}
+	}
+	if (eifr & RCAR_CAN_EIFR_BEIF) {
+		int rx_errors = 0, tx_errors = 0;
+		u8 ecsr;
+
+		netdev_dbg(priv->ndev, "Bus error interrupt:\n");
+		if (skb) {
+			cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
+			cf->data[2] = CAN_ERR_PROT_UNSPEC;
+		}
+		ecsr = readb(&priv->regs->ecsr);
+		if (ecsr & RCAR_CAN_ECSR_ADEF) {
+			netdev_dbg(priv->ndev, "ACK Delimiter Error\n");
+			tx_errors++;
+			writeb(~RCAR_CAN_ECSR_ADEF, &priv->regs->ecsr);
+			if (skb)
+				cf->data[3] |= CAN_ERR_PROT_LOC_ACK_DEL;
+		}
+		if (ecsr & RCAR_CAN_ECSR_BE0F) {
+			netdev_dbg(priv->ndev, "Bit Error (dominant)\n");
+			tx_errors++;
+			writeb(~RCAR_CAN_ECSR_BE0F, &priv->regs->ecsr);
+			if (skb)
+				cf->data[2] |= CAN_ERR_PROT_BIT0;
+		}
+		if (ecsr & RCAR_CAN_ECSR_BE1F) {
+			netdev_dbg(priv->ndev, "Bit Error (recessive)\n");
+			tx_errors++;
+			writeb(~RCAR_CAN_ECSR_BE1F, &priv->regs->ecsr);
+			if (skb)
+				cf->data[2] |= CAN_ERR_PROT_BIT1;
+		}
+		if (ecsr & RCAR_CAN_ECSR_CEF) {
+			netdev_dbg(priv->ndev, "CRC Error\n");
+			rx_errors++;
+			writeb(~RCAR_CAN_ECSR_CEF, &priv->regs->ecsr);
+			if (skb)
+				cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+		}
+		if (ecsr & RCAR_CAN_ECSR_AEF) {
+			netdev_dbg(priv->ndev, "ACK Error\n");
+			tx_errors++;
+			writeb(~RCAR_CAN_ECSR_AEF, &priv->regs->ecsr);
+			if (skb) {
+				cf->can_id |= CAN_ERR_ACK;
+				cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+			}
+		}
+		if (ecsr & RCAR_CAN_ECSR_FEF) {
+			netdev_dbg(priv->ndev, "Form Error\n");
+			rx_errors++;
+			writeb(~RCAR_CAN_ECSR_FEF, &priv->regs->ecsr);
+			if (skb)
+				cf->data[2] |= CAN_ERR_PROT_FORM;
+		}
+		if (ecsr & RCAR_CAN_ECSR_SEF) {
+			netdev_dbg(priv->ndev, "Stuff Error\n");
+			rx_errors++;
+			writeb(~RCAR_CAN_ECSR_SEF, &priv->regs->ecsr);
+			if (skb)
+				cf->data[2] |= CAN_ERR_PROT_STUFF;
+		}
+
+		priv->can.can_stats.bus_error++;
+		ndev->stats.rx_errors += rx_errors;
+		ndev->stats.tx_errors += tx_errors;
+		writeb(~RCAR_CAN_EIFR_BEIF, &priv->regs->eifr);
+	}
+	if (eifr & RCAR_CAN_EIFR_EWIF) {
+		netdev_dbg(priv->ndev, "Error warning interrupt\n");
+		priv->can.state = CAN_STATE_ERROR_WARNING;
+		priv->can.can_stats.error_warning++;
+		/* Clear interrupt condition */
+		writeb(~RCAR_CAN_EIFR_EWIF, &priv->regs->eifr);
+		if (skb)
+			cf->data[1] = txerr > rxerr ? CAN_ERR_CRTL_TX_WARNING :
+					      CAN_ERR_CRTL_RX_WARNING;
+	}
+	if (eifr & RCAR_CAN_EIFR_EPIF) {
+		netdev_dbg(priv->ndev, "Error passive interrupt\n");
+		priv->can.state = CAN_STATE_ERROR_PASSIVE;
+		priv->can.can_stats.error_passive++;
+		/* Clear interrupt condition */
+		writeb(~RCAR_CAN_EIFR_EPIF, &priv->regs->eifr);
+		if (skb)
+			cf->data[1] = txerr > rxerr ? CAN_ERR_CRTL_TX_PASSIVE :
+					      CAN_ERR_CRTL_RX_PASSIVE;
+	}
+	if (eifr & RCAR_CAN_EIFR_BOEIF) {
+		netdev_dbg(priv->ndev, "Bus-off entry interrupt\n");
+		tx_failure_cleanup(ndev);
+		priv->ier = RCAR_CAN_IER_ERSIE;
+		writeb(priv->ier, &priv->regs->ier);
+		priv->can.state = CAN_STATE_BUS_OFF;
+		/* Clear interrupt condition */
+		writeb(~RCAR_CAN_EIFR_BOEIF, &priv->regs->eifr);
+		can_bus_off(ndev);
+		if (skb)
+			cf->can_id |= CAN_ERR_BUSOFF;
+	}
+	if (eifr & RCAR_CAN_EIFR_ORIF) {
+		netdev_dbg(priv->ndev, "Receive overrun error interrupt\n");
+		ndev->stats.rx_over_errors++;
+		ndev->stats.rx_errors++;
+		writeb(~RCAR_CAN_EIFR_ORIF, &priv->regs->eifr);
+		if (skb) {
+			cf->can_id |= CAN_ERR_CRTL;
+			cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
+		}
+	}
+	if (eifr & RCAR_CAN_EIFR_OLIF) {
+		netdev_dbg(priv->ndev,
+			   "Overload Frame Transmission error interrupt\n");
+		ndev->stats.rx_over_errors++;
+		ndev->stats.rx_errors++;
+		writeb(~RCAR_CAN_EIFR_OLIF, &priv->regs->eifr);
+		if (skb) {
+			cf->can_id |= CAN_ERR_PROT;
+			cf->data[2] |= CAN_ERR_PROT_OVERLOAD;
+		}
+	}
+
+	if (skb) {
+		stats->rx_packets++;
+		stats->rx_bytes += cf->can_dlc;
+		netif_rx(skb);
+	}
+}
+
+static void rcar_can_tx_done(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	u8 isr;
+
+	while (1) {
+		u8 unsent = readb(&priv->regs->tfcr);
+
+		unsent = (unsent & RCAR_CAN_TFCR_TFUST) >>
+			  RCAR_CAN_TFCR_TFUST_SHIFT;
+		if (priv->tx_head - priv->tx_tail <= unsent)
+			break;
+		stats->tx_packets++;
+		stats->tx_bytes += priv->tx_dlc[priv->tx_tail %
+						RCAR_CAN_FIFO_DEPTH];
+		priv->tx_dlc[priv->tx_tail % RCAR_CAN_FIFO_DEPTH] = 0;
+		can_get_echo_skb(ndev, priv->tx_tail % RCAR_CAN_FIFO_DEPTH);
+		priv->tx_tail++;
+		netif_wake_queue(ndev);
+	}
+	/* Clear interrupt */
+	isr = readb(&priv->regs->isr);
+	writeb(isr & ~RCAR_CAN_ISR_TXFF, &priv->regs->isr);
+	can_led_event(ndev, CAN_LED_EVENT_TX);
+}
+
+static irqreturn_t rcar_can_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = dev_id;
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u8 isr;
+
+	isr = readb(&priv->regs->isr);
+	if (!(isr & priv->ier))
+		return IRQ_NONE;
+
+	if (isr & RCAR_CAN_ISR_ERSF)
+		rcar_can_error(ndev);
+
+	if (isr & RCAR_CAN_ISR_TXFF)
+		rcar_can_tx_done(ndev);
+
+	if (isr & RCAR_CAN_ISR_RXFF) {
+		if (napi_schedule_prep(&priv->napi)) {
+			/* Disable Rx FIFO interrupts */
+			priv->ier &= ~RCAR_CAN_IER_RXFIE;
+			writeb(priv->ier, &priv->regs->ier);
+			__napi_schedule(&priv->napi);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void rcar_can_set_bittiming(struct net_device *dev)
+{
+	struct rcar_can_priv *priv = netdev_priv(dev);
+	struct can_bittiming *bt = &priv->can.bittiming;
+	u32 bcr;
+
+	bcr = RCAR_CAN_BCR_TSEG1(bt->phase_seg1 + bt->prop_seg - 1) |
+	      RCAR_CAN_BCR_BPR(bt->brp - 1) | RCAR_CAN_BCR_SJW(bt->sjw - 1) |
+	      RCAR_CAN_BCR_TSEG2(bt->phase_seg2 - 1);
+	/* Don't overwrite CLKR with 32-bit BCR access; CLKR has 8-bit access.
+	 * All the registers are big-endian but they get byte-swapped on 32-bit
+	 * read/write (but not on 8-bit, contrary to the manuals)...
+	 */
+	writel((bcr << 8) | priv->clock_select, &priv->regs->bcr);
+}
+
+static void rcar_can_start(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u16 ctlr;
+	int i;
+
+	/* Set controller to known mode:
+	 * - FIFO mailbox mode
+	 * - accept all messages
+	 * - overrun mode
+	 * CAN is in sleep mode after MCU hardware or software reset.
+	 */
+	ctlr = readw(&priv->regs->ctlr);
+	ctlr &= ~RCAR_CAN_CTLR_SLPM;
+	writew(ctlr, &priv->regs->ctlr);
+	/* Go to reset mode */
+	ctlr |= RCAR_CAN_CTLR_CANM_FORCE_RESET;
+	writew(ctlr, &priv->regs->ctlr);
+	for (i = 0; i < MAX_STR_READS; i++) {
+		if (readw(&priv->regs->str) & RCAR_CAN_STR_RSTST)
+			break;
+	}
+	rcar_can_set_bittiming(ndev);
+	ctlr |= RCAR_CAN_CTLR_IDFM_MIXED; /* Select mixed ID mode */
+	ctlr |= RCAR_CAN_CTLR_BOM_ENT;	/* Entry to halt mode automatically */
+					/* at bus-off */
+	ctlr |= RCAR_CAN_CTLR_MBM;	/* Select FIFO mailbox mode */
+	ctlr |= RCAR_CAN_CTLR_MLM;	/* Overrun mode */
+	writew(ctlr, &priv->regs->ctlr);
+
+	/* Accept all SID and EID */
+	writel(0, &priv->regs->mkr_2_9[6]);
+	writel(0, &priv->regs->mkr_2_9[7]);
+	/* In FIFO mailbox mode, write "0" to bits 24 to 31 */
+	writel(0, &priv->regs->mkivlr1);
+	/* Accept all frames */
+	writel(0, &priv->regs->fidcr[0]);
+	writel(RCAR_CAN_FIDCR_IDE | RCAR_CAN_FIDCR_RTR, &priv->regs->fidcr[1]);
+	/* Enable and configure FIFO mailbox interrupts */
+	writel(RCAR_CAN_MIER1_RXFIE | RCAR_CAN_MIER1_TXFIE, &priv->regs->mier1);
+
+	priv->ier = RCAR_CAN_IER_ERSIE | RCAR_CAN_IER_RXFIE |
+		    RCAR_CAN_IER_TXFIE;
+	writeb(priv->ier, &priv->regs->ier);
+
+	/* Accumulate error codes */
+	writeb(RCAR_CAN_ECSR_EDPM, &priv->regs->ecsr);
+	/* Enable error interrupts */
+	writeb(RCAR_CAN_EIER_EWIE | RCAR_CAN_EIER_EPIE | RCAR_CAN_EIER_BOEIE |
+	       (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING ?
+	       RCAR_CAN_EIER_BEIE : 0) | RCAR_CAN_EIER_ORIE |
+	       RCAR_CAN_EIER_OLIE, &priv->regs->eier);
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+
+	/* Go to operation mode */
+	writew(ctlr & ~RCAR_CAN_CTLR_CANM, &priv->regs->ctlr);
+	for (i = 0; i < MAX_STR_READS; i++) {
+		if (!(readw(&priv->regs->str) & RCAR_CAN_STR_RSTST))
+			break;
+	}
+	/* Enable Rx and Tx FIFO */
+	writeb(RCAR_CAN_RFCR_RFE, &priv->regs->rfcr);
+	writeb(RCAR_CAN_TFCR_TFE, &priv->regs->tfcr);
+}
+
+static int rcar_can_open(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	int err;
+
+	err = clk_prepare_enable(priv->clk);
+	if (err) {
+		netdev_err(ndev, "clk_prepare_enable() failed, error %d\n",
+			   err);
+		goto out;
+	}
+	err = open_candev(ndev);
+	if (err) {
+		netdev_err(ndev, "open_candev() failed, error %d\n", err);
+		goto out_clock;
+	}
+	napi_enable(&priv->napi);
+	err = request_irq(ndev->irq, rcar_can_interrupt, 0, ndev->name, ndev);
+	if (err) {
+		netdev_err(ndev, "error requesting interrupt %x\n", ndev->irq);
+		goto out_close;
+	}
+	can_led_event(ndev, CAN_LED_EVENT_OPEN);
+	rcar_can_start(ndev);
+	netif_start_queue(ndev);
+	return 0;
+out_close:
+	napi_disable(&priv->napi);
+	close_candev(ndev);
+out_clock:
+	clk_disable_unprepare(priv->clk);
+out:
+	return err;
+}
+
+static void rcar_can_stop(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u16 ctlr;
+	int i;
+
+	/* Go to (force) reset mode */
+	ctlr = readw(&priv->regs->ctlr);
+	ctlr |= RCAR_CAN_CTLR_CANM_FORCE_RESET;
+	writew(ctlr, &priv->regs->ctlr);
+	for (i = 0; i < MAX_STR_READS; i++) {
+		if (readw(&priv->regs->str) & RCAR_CAN_STR_RSTST)
+			break;
+	}
+	writel(0, &priv->regs->mier0);
+	writel(0, &priv->regs->mier1);
+	writeb(0, &priv->regs->ier);
+	writeb(0, &priv->regs->eier);
+	/* Go to sleep mode */
+	ctlr |= RCAR_CAN_CTLR_SLPM;
+	writew(ctlr, &priv->regs->ctlr);
+	priv->can.state = CAN_STATE_STOPPED;
+}
+
+static int rcar_can_close(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+
+	netif_stop_queue(ndev);
+	rcar_can_stop(ndev);
+	free_irq(ndev->irq, ndev);
+	napi_disable(&priv->napi);
+	clk_disable_unprepare(priv->clk);
+	close_candev(ndev);
+	can_led_event(ndev, CAN_LED_EVENT_STOP);
+	return 0;
+}
+
+static netdev_tx_t rcar_can_start_xmit(struct sk_buff *skb,
+				       struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	struct can_frame *cf = (struct can_frame *)skb->data;
+	u32 data, i;
+
+	if (can_dropped_invalid_skb(ndev, skb))
+		return NETDEV_TX_OK;
+
+	if (cf->can_id & CAN_EFF_FLAG)	/* Extended frame format */
+		data = (cf->can_id & CAN_EFF_MASK) | RCAR_CAN_IDE;
+	else				/* Standard frame format */
+		data = (cf->can_id & CAN_SFF_MASK) << RCAR_CAN_SID_SHIFT;
+
+	if (cf->can_id & CAN_RTR_FLAG) { /* Remote transmission request */
+		data |= RCAR_CAN_RTR;
+	} else {
+		for (i = 0; i < cf->can_dlc; i++)
+			writeb(cf->data[i],
+			       &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].data[i]);
+	}
+
+	writel(data, &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].id);
+
+	writeb(cf->can_dlc, &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].dlc);
+
+	priv->tx_dlc[priv->tx_head % RCAR_CAN_FIFO_DEPTH] = cf->can_dlc;
+	can_put_echo_skb(skb, ndev, priv->tx_head % RCAR_CAN_FIFO_DEPTH);
+	priv->tx_head++;
+	/* Start Tx: write 0xff to the TFPCR register to increment
+	 * the CPU-side pointer for the transmit FIFO to the next
+	 * mailbox location
+	 */
+	writeb(0xff, &priv->regs->tfpcr);
+	/* Stop the queue if we've filled all FIFO entries */
+	if (priv->tx_head - priv->tx_tail >= RCAR_CAN_FIFO_DEPTH)
+		netif_stop_queue(ndev);
+
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops rcar_can_netdev_ops = {
+	.ndo_open = rcar_can_open,
+	.ndo_stop = rcar_can_close,
+	.ndo_start_xmit = rcar_can_start_xmit,
+};
+
+static void rcar_can_rx_pkt(struct rcar_can_priv *priv)
+{
+	struct net_device_stats *stats = &priv->ndev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u32 data;
+	u8 dlc;
+
+	skb = alloc_can_skb(priv->ndev, &cf);
+	if (!skb) {
+		stats->rx_dropped++;
+		return;
+	}
+
+	data = readl(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].id);
+	if (data & RCAR_CAN_IDE)
+		cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG;
+	else
+		cf->can_id = (data >> RCAR_CAN_SID_SHIFT) & CAN_SFF_MASK;
+
+	dlc = readb(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].dlc);
+	cf->can_dlc = get_can_dlc(dlc);
+	if (data & RCAR_CAN_RTR) {
+		cf->can_id |= CAN_RTR_FLAG;
+	} else {
+		for (dlc = 0; dlc < cf->can_dlc; dlc++)
+			cf->data[dlc] =
+			readb(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].data[dlc]);
+	}
+
+	can_led_event(priv->ndev, CAN_LED_EVENT_RX);
+
+	stats->rx_bytes += cf->can_dlc;
+	stats->rx_packets++;
+	netif_receive_skb(skb);
+}
+
+static int rcar_can_rx_poll(struct napi_struct *napi, int quota)
+{
+	struct rcar_can_priv *priv = container_of(napi,
+						  struct rcar_can_priv, napi);
+	int num_pkts;
+
+	for (num_pkts = 0; num_pkts < quota; num_pkts++) {
+		u8 rfcr, isr;
+
+		isr = readb(&priv->regs->isr);
+		/* Clear interrupt bit */
+		if (isr & RCAR_CAN_ISR_RXFF)
+			writeb(isr & ~RCAR_CAN_ISR_RXFF, &priv->regs->isr);
+		rfcr = readb(&priv->regs->rfcr);
+		if (rfcr & RCAR_CAN_RFCR_RFEST)
+			break;
+		rcar_can_rx_pkt(priv);
+		/* Write 0xff to the RFPCR register to increment
+		 * the CPU-side pointer for the receive FIFO
+		 * to the next mailbox location
+		 */
+		writeb(0xff, &priv->regs->rfpcr);
+	}
+	/* All packets processed */
+	if (num_pkts < quota) {
+		napi_complete(napi);
+		priv->ier |= RCAR_CAN_IER_RXFIE;
+		writeb(priv->ier, &priv->regs->ier);
+	}
+	return num_pkts;
+}
+
+static int rcar_can_do_set_mode(struct net_device *ndev, enum can_mode mode)
+{
+	switch (mode) {
+	case CAN_MODE_START:
+		rcar_can_start(ndev);
+		netif_wake_queue(ndev);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int rcar_can_get_berr_counter(const struct net_device *dev,
+				     struct can_berr_counter *bec)
+{
+	struct rcar_can_priv *priv = netdev_priv(dev);
+	int err;
+
+	err = clk_prepare_enable(priv->clk);
+	if (err)
+		return err;
+	bec->txerr = readb(&priv->regs->tecr);
+	bec->rxerr = readb(&priv->regs->recr);
+	clk_disable_unprepare(priv->clk);
+	return 0;
+}
+
+static int rcar_can_probe(struct platform_device *pdev)
+{
+	struct rcar_can_platform_data *pdata;
+	struct rcar_can_priv *priv;
+	struct net_device *ndev;
+	struct resource *mem;
+	void __iomem *addr;
+	int err = -ENODEV;
+	int irq;
+
+	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata) {
+		dev_err(&pdev->dev, "No platform data provided!\n");
+		goto fail;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (!irq) {
+		dev_err(&pdev->dev, "No IRQ resource\n");
+		goto fail;
+	}
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	addr = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(addr)) {
+		err = PTR_ERR(addr);
+		goto fail;
+	}
+
+	ndev = alloc_candev(sizeof(struct rcar_can_priv), RCAR_CAN_FIFO_DEPTH);
+	if (!ndev) {
+		dev_err(&pdev->dev, "alloc_candev() failed\n");
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	priv = netdev_priv(ndev);
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		err = PTR_ERR(priv->clk);
+		dev_err(&pdev->dev, "cannot get clock: %d\n", err);
+		goto fail_clk;
+	}
+
+	ndev->netdev_ops = &rcar_can_netdev_ops;
+	ndev->irq = irq;
+	ndev->flags |= IFF_ECHO;
+	priv->ndev = ndev;
+	priv->regs = addr;
+	priv->clock_select = pdata->clock_select;
+	priv->can.clock.freq = clk_get_rate(priv->clk);
+	priv->can.bittiming_const = &rcar_can_bittiming_const;
+	priv->can.do_set_mode = rcar_can_do_set_mode;
+	priv->can.do_get_berr_counter = rcar_can_get_berr_counter;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_BERR_REPORTING;
+	platform_set_drvdata(pdev, ndev);
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+
+	netif_napi_add(ndev, &priv->napi, rcar_can_rx_poll,
+		       RCAR_CAN_NAPI_WEIGHT);
+	err = register_candev(ndev);
+	if (err) {
+		dev_err(&pdev->dev, "register_candev() failed, error %d\n",
+			err);
+		goto fail_candev;
+	}
+
+	devm_can_led_init(ndev);
+
+	dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%u)\n",
+		 priv->regs, ndev->irq);
+
+	return 0;
+fail_candev:
+	netif_napi_del(&priv->napi);
+fail_clk:
+	free_candev(ndev);
+fail:
+	return err;
+}
+
+static int rcar_can_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+
+	unregister_candev(ndev);
+	netif_napi_del(&priv->napi);
+	free_candev(ndev);
+	return 0;
+}
+
+static int __maybe_unused rcar_can_suspend(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u16 ctlr;
+
+	if (netif_running(ndev)) {
+		netif_stop_queue(ndev);
+		netif_device_detach(ndev);
+	}
+	ctlr = readw(&priv->regs->ctlr);
+	ctlr |= RCAR_CAN_CTLR_CANM_HALT;
+	writew(ctlr, &priv->regs->ctlr);
+	ctlr |= RCAR_CAN_CTLR_SLPM;
+	writew(ctlr, &priv->regs->ctlr);
+	priv->can.state = CAN_STATE_SLEEPING;
+
+	clk_disable(priv->clk);
+	return 0;
+}
+
+static int __maybe_unused rcar_can_resume(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u16 ctlr;
+	int err;
+
+	err = clk_enable(priv->clk);
+	if (err) {
+		netdev_err(ndev, "clk_enable() failed, error %d\n", err);
+		return err;
+	}
+
+	ctlr = readw(&priv->regs->ctlr);
+	ctlr &= ~RCAR_CAN_CTLR_SLPM;
+	writew(ctlr, &priv->regs->ctlr);
+	ctlr &= ~RCAR_CAN_CTLR_CANM;
+	writew(ctlr, &priv->regs->ctlr);
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+
+	if (netif_running(ndev)) {
+		netif_device_attach(ndev);
+		netif_start_queue(ndev);
+	}
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(rcar_can_pm_ops, rcar_can_suspend, rcar_can_resume);
+
+static struct platform_driver rcar_can_driver = {
+	.driver = {
+		.name = RCAR_CAN_DRV_NAME,
+		.owner = THIS_MODULE,
+		.pm = &rcar_can_pm_ops,
+	},
+	.probe = rcar_can_probe,
+	.remove = rcar_can_remove,
+};
+
+module_platform_driver(rcar_can_driver);
+
+MODULE_AUTHOR("Cogent Embedded, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CAN driver for Renesas R-Car SoC");
+MODULE_ALIAS("platform:" RCAR_CAN_DRV_NAME);
diff --git a/include/linux/can/platform/rcar_can.h b/include/linux/can/platform/rcar_can.h
new file mode 100644
index 000000000000..0f4a2f3df504
--- /dev/null
+++ b/include/linux/can/platform/rcar_can.h
@@ -0,0 +1,17 @@
+#ifndef _CAN_PLATFORM_RCAR_CAN_H_
+#define _CAN_PLATFORM_RCAR_CAN_H_
+
+#include <linux/types.h>
+
+/* Clock Select Register settings */
+enum CLKR {
+	CLKR_CLKP1 = 0,	/* Peripheral clock (clkp1) */
+	CLKR_CLKP2 = 1,	/* Peripheral clock (clkp2) */
+	CLKR_CLKEXT = 3	/* Externally input clock */
+};
+
+struct rcar_can_platform_data {
+	enum CLKR clock_select;	/* Clock source select */
+};
+
+#endif	/* !_CAN_PLATFORM_RCAR_CAN_H_ */
-- 
cgit v1.2.3-71-gd317


From 42193e3efb632c84d686acacd7b2327f2b1f8c63 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Thu, 15 May 2014 20:31:56 +0200
Subject: can: unify identifiers to ensure unique include processing

Armin pointed me to the fact that the identifier which is used to ensure the
unique include processing in lunux/include/uapi/linux/can.h is CAN_H.
This clashed with his own source as includes from libraries and APIs should
use an underscore '_' at the identifier start.

This patch fixes the protection identifiers in all CAN relavant includes.

Reported-by: Armin Burchardt <armin@uni-bremen.de>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/core.h             | 6 +++---
 include/linux/can/dev.h              | 6 +++---
 include/linux/can/led.h              | 6 +++---
 include/linux/can/platform/cc770.h   | 6 +++---
 include/linux/can/platform/mcp251x.h | 6 +++---
 include/linux/can/platform/sja1000.h | 6 +++---
 include/linux/can/platform/ti_hecc.h | 6 +++---
 include/linux/can/skb.h              | 6 +++---
 include/uapi/linux/can.h             | 6 +++---
 include/uapi/linux/can/bcm.h         | 6 +++---
 include/uapi/linux/can/error.h       | 6 +++---
 include/uapi/linux/can/gw.h          | 6 +++---
 include/uapi/linux/can/netlink.h     | 6 +++---
 include/uapi/linux/can/raw.h         | 6 +++---
 14 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 78c6c52073ad..a0875001b13c 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -10,8 +10,8 @@
  *
  */
 
-#ifndef CAN_CORE_H
-#define CAN_CORE_H
+#ifndef _CAN_CORE_H
+#define _CAN_CORE_H
 
 #include <linux/can.h>
 #include <linux/skbuff.h>
@@ -58,4 +58,4 @@ extern void can_rx_unregister(struct net_device *dev, canid_t can_id,
 extern int can_send(struct sk_buff *skb, int loop);
 extern int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 
-#endif /* CAN_CORE_H */
+#endif /* !_CAN_CORE_H */
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 3ce5e526525f..6992afc6ba7f 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -10,8 +10,8 @@
  *
  */
 
-#ifndef CAN_DEV_H
-#define CAN_DEV_H
+#ifndef _CAN_DEV_H
+#define _CAN_DEV_H
 
 #include <linux/can.h>
 #include <linux/can/netlink.h>
@@ -132,4 +132,4 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev,
 struct sk_buff *alloc_can_err_skb(struct net_device *dev,
 				  struct can_frame **cf);
 
-#endif /* CAN_DEV_H */
+#endif /* !_CAN_DEV_H */
diff --git a/include/linux/can/led.h b/include/linux/can/led.h
index 9c1167baf273..e0475c5cbb92 100644
--- a/include/linux/can/led.h
+++ b/include/linux/can/led.h
@@ -6,8 +6,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef CAN_LED_H
-#define CAN_LED_H
+#ifndef _CAN_LED_H
+#define _CAN_LED_H
 
 #include <linux/if.h>
 #include <linux/leds.h>
@@ -48,4 +48,4 @@ static inline void can_led_notifier_exit(void)
 
 #endif
 
-#endif
+#endif /* !_CAN_LED_H */
diff --git a/include/linux/can/platform/cc770.h b/include/linux/can/platform/cc770.h
index 7702641f87ee..78b2d44f04cf 100644
--- a/include/linux/can/platform/cc770.h
+++ b/include/linux/can/platform/cc770.h
@@ -1,5 +1,5 @@
-#ifndef _CAN_PLATFORM_CC770_H_
-#define _CAN_PLATFORM_CC770_H_
+#ifndef _CAN_PLATFORM_CC770_H
+#define _CAN_PLATFORM_CC770_H
 
 /* CPU Interface Register (0x02) */
 #define CPUIF_CEN	0x01	/* Clock Out Enable */
@@ -30,4 +30,4 @@ struct cc770_platform_data {
 	u8 bcr;		/* Bus Configuration Register */
 };
 
-#endif	/* !_CAN_PLATFORM_CC770_H_ */
+#endif	/* !_CAN_PLATFORM_CC770_H */
diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h
index dc029dba7a03..d44fcae274ff 100644
--- a/include/linux/can/platform/mcp251x.h
+++ b/include/linux/can/platform/mcp251x.h
@@ -1,5 +1,5 @@
-#ifndef __CAN_PLATFORM_MCP251X_H__
-#define __CAN_PLATFORM_MCP251X_H__
+#ifndef _CAN_PLATFORM_MCP251X_H
+#define _CAN_PLATFORM_MCP251X_H
 
 /*
  *
@@ -18,4 +18,4 @@ struct mcp251x_platform_data {
 	unsigned long oscillator_frequency;
 };
 
-#endif /* __CAN_PLATFORM_MCP251X_H__ */
+#endif /* !_CAN_PLATFORM_MCP251X_H */
diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h
index 96f8fcc78d78..93570b61ec6c 100644
--- a/include/linux/can/platform/sja1000.h
+++ b/include/linux/can/platform/sja1000.h
@@ -1,5 +1,5 @@
-#ifndef _CAN_PLATFORM_SJA1000_H_
-#define _CAN_PLATFORM_SJA1000_H_
+#ifndef _CAN_PLATFORM_SJA1000_H
+#define _CAN_PLATFORM_SJA1000_H
 
 /* clock divider register */
 #define CDR_CLKOUT_MASK 0x07
@@ -32,4 +32,4 @@ struct sja1000_platform_data {
 	u8 cdr;		/* clock divider register */
 };
 
-#endif	/* !_CAN_PLATFORM_SJA1000_H_ */
+#endif	/* !_CAN_PLATFORM_SJA1000_H */
diff --git a/include/linux/can/platform/ti_hecc.h b/include/linux/can/platform/ti_hecc.h
index af17cb3f7a84..a52f47ca6c8a 100644
--- a/include/linux/can/platform/ti_hecc.h
+++ b/include/linux/can/platform/ti_hecc.h
@@ -1,5 +1,5 @@
-#ifndef __CAN_PLATFORM_TI_HECC_H__
-#define __CAN_PLATFORM_TI_HECC_H__
+#ifndef _CAN_PLATFORM_TI_HECC_H
+#define _CAN_PLATFORM_TI_HECC_H
 
 /*
  * TI HECC (High End CAN Controller) driver platform header
@@ -41,4 +41,4 @@ struct ti_hecc_platform_data {
 	u32 version;
 	void (*transceiver_switch) (int);
 };
-#endif
+#endif /* !_CAN_PLATFORM_TI_HECC_H */
diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index f9bbbb472663..cc00d15c6107 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -7,8 +7,8 @@
  *
  */
 
-#ifndef CAN_SKB_H
-#define CAN_SKB_H
+#ifndef _CAN_SKB_H
+#define _CAN_SKB_H
 
 #include <linux/types.h>
 #include <linux/skbuff.h>
@@ -80,4 +80,4 @@ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb)
 	return skb;
 }
 
-#endif /* CAN_SKB_H */
+#endif /* !_CAN_SKB_H */
diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 5d9d1d140718..41892f720057 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -42,8 +42,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_H
-#define CAN_H
+#ifndef _UAPI_CAN_H
+#define _UAPI_CAN_H
 
 #include <linux/types.h>
 #include <linux/socket.h>
@@ -191,4 +191,4 @@ struct can_filter {
 
 #define CAN_INV_FILTER 0x20000000U /* to be set in can_filter.can_id */
 
-#endif /* CAN_H */
+#endif /* !_UAPI_CAN_H */
diff --git a/include/uapi/linux/can/bcm.h b/include/uapi/linux/can/bcm.h
index 382251a1d214..89ddb9dc9bdf 100644
--- a/include/uapi/linux/can/bcm.h
+++ b/include/uapi/linux/can/bcm.h
@@ -41,8 +41,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_BCM_H
-#define CAN_BCM_H
+#ifndef _UAPI_CAN_BCM_H
+#define _UAPI_CAN_BCM_H
 
 #include <linux/types.h>
 #include <linux/can.h>
@@ -95,4 +95,4 @@ enum {
 #define TX_RESET_MULTI_IDX  0x0200
 #define RX_RTR_FRAME        0x0400
 
-#endif /* CAN_BCM_H */
+#endif /* !_UAPI_CAN_BCM_H */
diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h
index b63204545320..c247446ab25a 100644
--- a/include/uapi/linux/can/error.h
+++ b/include/uapi/linux/can/error.h
@@ -41,8 +41,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_ERROR_H
-#define CAN_ERROR_H
+#ifndef _UAPI_CAN_ERROR_H
+#define _UAPI_CAN_ERROR_H
 
 #define CAN_ERR_DLC 8 /* dlc for error message frames */
 
@@ -120,4 +120,4 @@
 
 /* controller specific additional information / data[5..7] */
 
-#endif /* CAN_ERROR_H */
+#endif /* _UAPI_CAN_ERROR_H */
diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h
index 844c8964bdfe..3e6184cf2f6d 100644
--- a/include/uapi/linux/can/gw.h
+++ b/include/uapi/linux/can/gw.h
@@ -41,8 +41,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_GW_H
-#define CAN_GW_H
+#ifndef _UAPI_CAN_GW_H
+#define _UAPI_CAN_GW_H
 
 #include <linux/types.h>
 #include <linux/can.h>
@@ -200,4 +200,4 @@ enum {
  *         Beware of sending unpacked or aligned structs!
  */
 
-#endif
+#endif /* !_UAPI_CAN_GW_H */
diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index 7e2e1863db16..813d11f54977 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -15,8 +15,8 @@
  * GNU General Public License for more details.
  */
 
-#ifndef CAN_NETLINK_H
-#define CAN_NETLINK_H
+#ifndef _UAPI_CAN_NETLINK_H
+#define _UAPI_CAN_NETLINK_H
 
 #include <linux/types.h>
 
@@ -130,4 +130,4 @@ enum {
 
 #define IFLA_CAN_MAX	(__IFLA_CAN_MAX - 1)
 
-#endif /* CAN_NETLINK_H */
+#endif /* !_UAPI_CAN_NETLINK_H */
diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h
index c7d8c334e0ce..78ec76fd89a6 100644
--- a/include/uapi/linux/can/raw.h
+++ b/include/uapi/linux/can/raw.h
@@ -42,8 +42,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_RAW_H
-#define CAN_RAW_H
+#ifndef _UAPI_CAN_RAW_H
+#define _UAPI_CAN_RAW_H
 
 #include <linux/can.h>
 
@@ -59,4 +59,4 @@ enum {
 	CAN_RAW_FD_FRAMES,	/* allow CAN FD frames (default:off) */
 };
 
-#endif
+#endif /* !_UAPI_CAN_RAW_H */
-- 
cgit v1.2.3-71-gd317


From 7c95f6d866d861268a217003c5202009fa76f252 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 4 Apr 2014 01:22:45 +0200
Subject: netfilter: nf_tables: deconstify table and chain in context structure

The new transaction infrastructure updates the family, table and chain
objects in the context structure, so let's deconstify them. While at it,
move the context structure initialization routine to the top of the
source file as it will be also used from the table and chain routines.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  6 ++--
 net/netfilter/nf_tables_api.c     | 58 +++++++++++++++++++--------------------
 2 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 29ff1dc41ef3..91505231a105 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -83,9 +83,9 @@ struct nft_ctx {
 	struct net			*net;
 	const struct sk_buff		*skb;
 	const struct nlmsghdr		*nlh;
-	const struct nft_af_info	*afi;
-	const struct nft_table		*table;
-	const struct nft_chain		*chain;
+	struct nft_af_info		*afi;
+	struct nft_table		*table;
+	struct nft_chain		*chain;
 	const struct nlattr * const 	*nla;
 };
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a5ca900912e1..3643bbc720bc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -88,6 +88,23 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
 	return ERR_PTR(-EAFNOSUPPORT);
 }
 
+static void nft_ctx_init(struct nft_ctx *ctx,
+			 const struct sk_buff *skb,
+			 const struct nlmsghdr *nlh,
+			 struct nft_af_info *afi,
+			 struct nft_table *table,
+			 struct nft_chain *chain,
+			 const struct nlattr * const *nla)
+{
+	ctx->net   = sock_net(skb->sk);
+	ctx->skb   = skb;
+	ctx->nlh   = nlh;
+	ctx->afi   = afi;
+	ctx->table = table;
+	ctx->chain = chain;
+	ctx->nla   = nla;
+}
+
 /*
  * Tables
  */
@@ -812,7 +829,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	const struct nlattr * uninitialized_var(name);
-	const struct nft_af_info *afi;
+	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_base_chain *basechain = NULL;
@@ -1024,7 +1041,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
 			      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nft_af_info *afi;
+	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct nft_chain *chain;
 	struct net *net = sock_net(skb->sk);
@@ -1062,23 +1079,6 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
 	return 0;
 }
 
-static void nft_ctx_init(struct nft_ctx *ctx,
-			 const struct sk_buff *skb,
-			 const struct nlmsghdr *nlh,
-			 const struct nft_af_info *afi,
-			 const struct nft_table *table,
-			 const struct nft_chain *chain,
-			 const struct nlattr * const *nla)
-{
-	ctx->net   = sock_net(skb->sk);
-	ctx->skb   = skb;
-	ctx->nlh   = nlh;
-	ctx->afi   = afi;
-	ctx->table = table;
-	ctx->chain = chain;
-	ctx->nla   = nla;
-}
-
 /*
  * Expressions
  */
@@ -1582,7 +1582,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 			     const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nft_af_info *afi;
+	struct nft_af_info *afi;
 	struct net *net = sock_net(skb->sk);
 	struct nft_table *table;
 	struct nft_chain *chain;
@@ -1763,9 +1763,9 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
 			     const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nft_af_info *afi;
+	struct nft_af_info *afi;
 	struct net *net = sock_net(skb->sk);
-	const struct nft_table *table;
+	struct nft_table *table;
 	struct nft_chain *chain = NULL;
 	struct nft_rule *rule;
 	int family = nfmsg->nfgen_family, err = 0;
@@ -2009,8 +2009,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
 {
 	struct net *net = sock_net(skb->sk);
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nft_af_info *afi = NULL;
-	const struct nft_table *table = NULL;
+	struct nft_af_info *afi = NULL;
+	struct nft_table *table = NULL;
 
 	if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
 		afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -2244,7 +2244,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
 {
 	const struct nft_set *set;
 	unsigned int idx, s_idx = cb->args[0];
-	const struct nft_af_info *afi;
+	struct nft_af_info *afi;
 	struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
 	struct net *net = sock_net(skb->sk);
 	int cur_family = cb->args[3];
@@ -2389,7 +2389,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	const struct nft_set_ops *ops;
-	const struct nft_af_info *afi;
+	struct nft_af_info *afi;
 	struct net *net = sock_net(skb->sk);
 	struct nft_table *table;
 	struct nft_set *set;
@@ -2651,8 +2651,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
 				      const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
-	const struct nft_af_info *afi;
-	const struct nft_table *table;
+	struct nft_af_info *afi;
+	struct nft_table *table;
 	struct net *net = sock_net(skb->sk);
 
 	afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -2959,7 +2959,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
 			struct nft_ctx bind_ctx = {
 				.afi	= ctx->afi,
 				.table	= ctx->table,
-				.chain	= binding->chain,
+				.chain	= (struct nft_chain *)binding->chain,
 			};
 
 			err = nft_validate_data_load(&bind_ctx, dreg,
-- 
cgit v1.2.3-71-gd317


From 1081d11b086afb73e1d8f52f9047d661d8770b82 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 4 Apr 2014 01:24:07 +0200
Subject: netfilter: nf_tables: generalise transaction infrastructure

This patch generalises the existing rule transaction infrastructure
so it can be used to handle set, table and chain object transactions
as well. The transaction provides a data area that stores private
information depending on the transaction type.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  15 +++--
 net/netfilter/nf_tables_api.c     | 123 +++++++++++++++++++++-----------------
 2 files changed, 80 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 91505231a105..246dbd48825f 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -387,18 +387,25 @@ struct nft_rule {
 };
 
 /**
- *	struct nft_rule_trans - nf_tables rule update in transaction
+ *	struct nft_trans - nf_tables object update in transaction
  *
  *	@list: used internally
- *	@ctx: rule context
- *	@rule: rule that needs to be updated
+ *	@ctx: transaction context
+ *	@data: internal information related to the transaction
  */
-struct nft_rule_trans {
+struct nft_trans {
 	struct list_head		list;
 	struct nft_ctx			ctx;
+	char				data[0];
+};
+
+struct nft_trans_rule {
 	struct nft_rule			*rule;
 };
 
+#define nft_trans_rule(trans)	\
+	(((struct nft_trans_rule *)trans->data)->rule)
+
 static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
 {
 	return (struct nft_expr *)&rule->data[0];
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3643bbc720bc..424a6f3cb6c5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -105,6 +105,25 @@ static void nft_ctx_init(struct nft_ctx *ctx,
 	ctx->nla   = nla;
 }
 
+static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, u32 size)
+{
+	struct nft_trans *trans;
+
+	trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL);
+	if (trans == NULL)
+		return NULL;
+
+	trans->ctx	= *ctx;
+
+	return trans;
+}
+
+static void nft_trans_destroy(struct nft_trans *trans)
+{
+	list_del(&trans->list);
+	kfree(trans);
+}
+
 /*
  * Tables
  */
@@ -1557,26 +1576,25 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
 	kfree(rule);
 }
 
-#define NFT_RULE_MAXEXPRS	128
-
-static struct nft_expr_info *info;
-
-static struct nft_rule_trans *
-nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule)
+static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx,
+					    struct nft_rule *rule)
 {
-	struct nft_rule_trans *rupd;
+	struct nft_trans *trans;
 
-	rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL);
-	if (rupd == NULL)
-	       return NULL;
+	trans = nft_trans_alloc(ctx, sizeof(struct nft_trans_rule));
+	if (trans == NULL)
+		return NULL;
 
-	rupd->ctx = *ctx;
-	rupd->rule = rule;
-	list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
+	nft_trans_rule(trans) = rule;
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
 
-	return rupd;
+	return trans;
 }
 
+#define NFT_RULE_MAXEXPRS	128
+
+static struct nft_expr_info *info;
+
 static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 			     const struct nlmsghdr *nlh,
 			     const struct nlattr * const nla[])
@@ -1587,7 +1605,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_rule *rule, *old_rule = NULL;
-	struct nft_rule_trans *repl = NULL;
+	struct nft_trans *trans = NULL;
 	struct nft_expr *expr;
 	struct nft_ctx ctx;
 	struct nlattr *tmp;
@@ -1685,8 +1703,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 
 	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
 		if (nft_rule_is_active_next(net, old_rule)) {
-			repl = nf_tables_trans_add(&ctx, old_rule);
-			if (repl == NULL) {
+			trans = nft_trans_rule_add(&ctx, old_rule);
+			if (trans == NULL) {
 				err = -ENOMEM;
 				goto err2;
 			}
@@ -1708,7 +1726,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 			list_add_rcu(&rule->list, &chain->rules);
 	}
 
-	if (nf_tables_trans_add(&ctx, rule) == NULL) {
+	if (nft_trans_rule_add(&ctx, rule) == NULL) {
 		err = -ENOMEM;
 		goto err3;
 	}
@@ -1716,11 +1734,10 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 
 err3:
 	list_del_rcu(&rule->list);
-	if (repl) {
-		list_del_rcu(&repl->rule->list);
-		list_del(&repl->list);
-		nft_rule_clear(net, repl->rule);
-		kfree(repl);
+	if (trans) {
+		list_del_rcu(&nft_trans_rule(trans)->list);
+		nft_rule_clear(net, nft_trans_rule(trans));
+		nft_trans_destroy(trans);
 	}
 err2:
 	nf_tables_rule_destroy(&ctx, rule);
@@ -1737,7 +1754,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
 {
 	/* You cannot delete the same rule twice */
 	if (nft_rule_is_active_next(ctx->net, rule)) {
-		if (nf_tables_trans_add(ctx, rule) == NULL)
+		if (nft_trans_rule_add(ctx, rule) == NULL)
 			return -ENOMEM;
 		nft_rule_disactivate_next(ctx->net, rule);
 		return 0;
@@ -1813,7 +1830,7 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
 static int nf_tables_commit(struct sk_buff *skb)
 {
 	struct net *net = sock_net(skb->sk);
-	struct nft_rule_trans *rupd, *tmp;
+	struct nft_trans *trans, *next;
 
 	/* Bump generation counter, invalidate any dump in progress */
 	net->nft.genctr++;
@@ -1826,38 +1843,38 @@ static int nf_tables_commit(struct sk_buff *skb)
 	 */
 	synchronize_rcu();
 
-	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
+	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		/* This rule was inactive in the past and just became active.
 		 * Clear the next bit of the genmask since its meaning has
 		 * changed, now it is the future.
 		 */
-		if (nft_rule_is_active(net, rupd->rule)) {
-			nft_rule_clear(net, rupd->rule);
-			nf_tables_rule_notify(skb, rupd->ctx.nlh,
-					      rupd->ctx.table, rupd->ctx.chain,
-					      rupd->rule, NFT_MSG_NEWRULE, 0,
-					      rupd->ctx.afi->family);
-			list_del(&rupd->list);
-			kfree(rupd);
+		if (nft_rule_is_active(net, nft_trans_rule(trans))) {
+			nft_rule_clear(net, nft_trans_rule(trans));
+			nf_tables_rule_notify(skb, trans->ctx.nlh,
+					      trans->ctx.table,
+					      trans->ctx.chain,
+					      nft_trans_rule(trans),
+					      NFT_MSG_NEWRULE, 0,
+					      trans->ctx.afi->family);
+			nft_trans_destroy(trans);
 			continue;
 		}
 
 		/* This rule is in the past, get rid of it */
-		list_del_rcu(&rupd->rule->list);
-		nf_tables_rule_notify(skb, rupd->ctx.nlh,
-				      rupd->ctx.table, rupd->ctx.chain,
-				      rupd->rule, NFT_MSG_DELRULE, 0,
-				      rupd->ctx.afi->family);
+		list_del_rcu(&nft_trans_rule(trans)->list);
+		nf_tables_rule_notify(skb, trans->ctx.nlh,
+				      trans->ctx.table, trans->ctx.chain,
+				      nft_trans_rule(trans), NFT_MSG_DELRULE,
+				      0, trans->ctx.afi->family);
 	}
 
 	/* Make sure we don't see any packet traversing old rules */
 	synchronize_rcu();
 
 	/* Now we can safely release unused old rules */
-	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
-		nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
-		list_del(&rupd->list);
-		kfree(rupd);
+	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+		nft_trans_destroy(trans);
 	}
 
 	return 0;
@@ -1866,27 +1883,25 @@ static int nf_tables_commit(struct sk_buff *skb)
 static int nf_tables_abort(struct sk_buff *skb)
 {
 	struct net *net = sock_net(skb->sk);
-	struct nft_rule_trans *rupd, *tmp;
+	struct nft_trans *trans, *next;
 
-	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
-		if (!nft_rule_is_active_next(net, rupd->rule)) {
-			nft_rule_clear(net, rupd->rule);
-			list_del(&rupd->list);
-			kfree(rupd);
+	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+		if (!nft_rule_is_active_next(net, nft_trans_rule(trans))) {
+			nft_rule_clear(net, nft_trans_rule(trans));
+			nft_trans_destroy(trans);
 			continue;
 		}
 
 		/* This rule is inactive, get rid of it */
-		list_del_rcu(&rupd->rule->list);
+		list_del_rcu(&nft_trans_rule(trans)->list);
 	}
 
 	/* Make sure we don't see any packet accessing aborted rules */
 	synchronize_rcu();
 
-	list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
-		nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
-		list_del(&rupd->list);
-		kfree(rupd);
+	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+		nft_trans_destroy(trans);
 	}
 
 	return 0;
-- 
cgit v1.2.3-71-gd317


From b380e5c733b9f18a6a3ebb97963b6dd037339bc0 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 4 Apr 2014 01:38:51 +0200
Subject: netfilter: nf_tables: add message type to transactions

The patch adds message type to the transaction to simplify the
commit the and abort routines. Yet another step forward in the
generalisation of the transaction infrastructure.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  2 ++
 net/netfilter/nf_tables_api.c     | 74 +++++++++++++++++++++++----------------
 2 files changed, 45 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 246dbd48825f..d8dfb2695e0f 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -390,11 +390,13 @@ struct nft_rule {
  *	struct nft_trans - nf_tables object update in transaction
  *
  *	@list: used internally
+ *	@msg_type: message type
  *	@ctx: transaction context
  *	@data: internal information related to the transaction
  */
 struct nft_trans {
 	struct list_head		list;
+	int				msg_type;
 	struct nft_ctx			ctx;
 	char				data[0];
 };
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 17df4b807f84..34aa3d507542 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -105,7 +105,8 @@ static void nft_ctx_init(struct nft_ctx *ctx,
 	ctx->nla   = nla;
 }
 
-static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, u32 size)
+static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
+					 u32 size)
 {
 	struct nft_trans *trans;
 
@@ -113,6 +114,7 @@ static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, u32 size)
 	if (trans == NULL)
 		return NULL;
 
+	trans->msg_type = msg_type;
 	trans->ctx	= *ctx;
 
 	return trans;
@@ -1576,12 +1578,12 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
 	kfree(rule);
 }
 
-static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx,
+static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
 					    struct nft_rule *rule)
 {
 	struct nft_trans *trans;
 
-	trans = nft_trans_alloc(ctx, sizeof(struct nft_trans_rule));
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
 	if (trans == NULL)
 		return NULL;
 
@@ -1703,7 +1705,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 
 	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
 		if (nft_rule_is_active_next(net, old_rule)) {
-			trans = nft_trans_rule_add(&ctx, old_rule);
+			trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE,
+						   old_rule);
 			if (trans == NULL) {
 				err = -ENOMEM;
 				goto err2;
@@ -1726,7 +1729,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 			list_add_rcu(&rule->list, &chain->rules);
 	}
 
-	if (nft_trans_rule_add(&ctx, rule) == NULL) {
+	if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
 		err = -ENOMEM;
 		goto err3;
 	}
@@ -1754,7 +1757,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
 {
 	/* You cannot delete the same rule twice */
 	if (nft_rule_is_active_next(ctx->net, rule)) {
-		if (nft_trans_rule_add(ctx, rule) == NULL)
+		if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL)
 			return -ENOMEM;
 		nft_rule_disactivate_next(ctx->net, rule);
 		return 0;
@@ -3114,28 +3117,26 @@ static int nf_tables_commit(struct sk_buff *skb)
 	synchronize_rcu();
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
-		/* This rule was inactive in the past and just became active.
-		 * Clear the next bit of the genmask since its meaning has
-		 * changed, now it is the future.
-		 */
-		if (nft_rule_is_active(net, nft_trans_rule(trans))) {
-			nft_rule_clear(net, nft_trans_rule(trans));
-			nf_tables_rule_notify(skb, trans->ctx.nlh,
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWRULE:
+			nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
+			nf_tables_rule_notify(trans->ctx.skb, trans->ctx.nlh,
 					      trans->ctx.table,
 					      trans->ctx.chain,
 					      nft_trans_rule(trans),
 					      NFT_MSG_NEWRULE, 0,
 					      trans->ctx.afi->family);
 			nft_trans_destroy(trans);
-			continue;
+			break;
+		case NFT_MSG_DELRULE:
+			list_del_rcu(&nft_trans_rule(trans)->list);
+			nf_tables_rule_notify(trans->ctx.skb, trans->ctx.nlh,
+					      trans->ctx.table,
+					      trans->ctx.chain,
+					      nft_trans_rule(trans), NFT_MSG_DELRULE, 0,
+					      trans->ctx.afi->family);
+			break;
 		}
-
-		/* This rule is in the past, get rid of it */
-		list_del_rcu(&nft_trans_rule(trans)->list);
-		nf_tables_rule_notify(skb, trans->ctx.nlh,
-				      trans->ctx.table, trans->ctx.chain,
-				      nft_trans_rule(trans), NFT_MSG_DELRULE,
-				      0, trans->ctx.afi->family);
 	}
 
 	/* Make sure we don't see any packet traversing old rules */
@@ -3143,8 +3144,13 @@ static int nf_tables_commit(struct sk_buff *skb)
 
 	/* Now we can safely release unused old rules */
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
-		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
-		nft_trans_destroy(trans);
+		switch (trans->msg_type) {
+		case NFT_MSG_DELRULE:
+			nf_tables_rule_destroy(&trans->ctx,
+					       nft_trans_rule(trans));
+			nft_trans_destroy(trans);
+			break;
+		}
 	}
 
 	return 0;
@@ -3156,22 +3162,28 @@ static int nf_tables_abort(struct sk_buff *skb)
 	struct nft_trans *trans, *next;
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
-		if (!nft_rule_is_active_next(net, nft_trans_rule(trans))) {
-			nft_rule_clear(net, nft_trans_rule(trans));
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWRULE:
+			list_del_rcu(&nft_trans_rule(trans)->list);
+			break;
+		case NFT_MSG_DELRULE:
+			nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
 			nft_trans_destroy(trans);
-			continue;
+			break;
 		}
-
-		/* This rule is inactive, get rid of it */
-		list_del_rcu(&nft_trans_rule(trans)->list);
 	}
 
 	/* Make sure we don't see any packet accessing aborted rules */
 	synchronize_rcu();
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
-		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
-		nft_trans_destroy(trans);
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWRULE:
+			nf_tables_rule_destroy(&trans->ctx,
+					       nft_trans_rule(trans));
+			nft_trans_destroy(trans);
+			break;
+		}
 	}
 
 	return 0;
-- 
cgit v1.2.3-71-gd317


From 958bee14d0718ca7a5002c0f48a099d1d345812a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 3 Apr 2014 11:48:44 +0200
Subject: netfilter: nf_tables: use new transaction infrastructure to handle
 sets

This patch reworks the nf_tables API so set updates are included in
the same batch that contains rule updates. This speeds up rule-set
updates since we skip a dialog of four messages between kernel and
user-space (two on each direction), from:

 1) create the set and send netlink message to the kernel
 2) process the response from the kernel that contains the allocated name.
 3) add the set elements and send netlink message to the kernel.
 4) process the response from the kernel (to check for errors).

To:

 1) add the set to the batch.
 2) add the set elements to the batch.
 3) add the rule that points to the set.
 4) send batch to the kernel.

This also introduces an internal set ID (NFTA_SET_ID) that is unique
in the batch so set elements and rules can refer to new sets.

Backward compatibility has been only retained in userspace, this
means that new nft versions can talk to the kernel both in the new
and the old fashion.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  12 +++
 include/uapi/linux/netfilter/nf_tables.h |   6 ++
 net/netfilter/nf_tables_api.c            | 123 +++++++++++++++++++++++++++----
 net/netfilter/nft_lookup.c               |  10 ++-
 4 files changed, 133 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index d8dfb2695e0f..0f472d668cbe 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -268,6 +268,8 @@ static inline void *nft_set_priv(const struct nft_set *set)
 
 struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
 				     const struct nlattr *nla);
+struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+					  const struct nlattr *nla);
 
 /**
  *	struct nft_set_binding - nf_tables set binding
@@ -408,6 +410,16 @@ struct nft_trans_rule {
 #define nft_trans_rule(trans)	\
 	(((struct nft_trans_rule *)trans->data)->rule)
 
+struct nft_trans_set {
+	struct nft_set	*set;
+	u32		set_id;
+};
+
+#define nft_trans_set(trans)	\
+	(((struct nft_trans_set *)trans->data)->set)
+#define nft_trans_set_id(trans)	\
+	(((struct nft_trans_set *)trans->data)->set_id)
+
 static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
 {
 	return (struct nft_expr *)&rule->data[0];
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 7d6433f66bf8..2a88f645a5d8 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -246,6 +246,7 @@ enum nft_set_desc_attributes {
  * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
  * @NFTA_SET_POLICY: selection policy (NLA_U32)
  * @NFTA_SET_DESC: set description (NLA_NESTED)
+ * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
  */
 enum nft_set_attributes {
 	NFTA_SET_UNSPEC,
@@ -258,6 +259,7 @@ enum nft_set_attributes {
 	NFTA_SET_DATA_LEN,
 	NFTA_SET_POLICY,
 	NFTA_SET_DESC,
+	NFTA_SET_ID,
 	__NFTA_SET_MAX
 };
 #define NFTA_SET_MAX		(__NFTA_SET_MAX - 1)
@@ -293,12 +295,14 @@ enum nft_set_elem_attributes {
  * @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING)
  * @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING)
  * @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes)
+ * @NFTA_SET_ELEM_LIST_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
  */
 enum nft_set_elem_list_attributes {
 	NFTA_SET_ELEM_LIST_UNSPEC,
 	NFTA_SET_ELEM_LIST_TABLE,
 	NFTA_SET_ELEM_LIST_SET,
 	NFTA_SET_ELEM_LIST_ELEMENTS,
+	NFTA_SET_ELEM_LIST_SET_ID,
 	__NFTA_SET_ELEM_LIST_MAX
 };
 #define NFTA_SET_ELEM_LIST_MAX	(__NFTA_SET_ELEM_LIST_MAX - 1)
@@ -484,12 +488,14 @@ enum nft_cmp_attributes {
  * @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING)
  * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers)
  * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_LOOKUP_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
  */
 enum nft_lookup_attributes {
 	NFTA_LOOKUP_UNSPEC,
 	NFTA_LOOKUP_SET,
 	NFTA_LOOKUP_SREG,
 	NFTA_LOOKUP_DREG,
+	NFTA_LOOKUP_SET_ID,
 	__NFTA_LOOKUP_MAX
 };
 #define NFTA_LOOKUP_MAX		(__NFTA_LOOKUP_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 34aa3d507542..02d3cc65690c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1934,6 +1934,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
 	[NFTA_SET_DATA_LEN]		= { .type = NLA_U32 },
 	[NFTA_SET_POLICY]		= { .type = NLA_U32 },
 	[NFTA_SET_DESC]			= { .type = NLA_NESTED },
+	[NFTA_SET_ID]			= { .type = NLA_U32 },
 };
 
 static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -1984,6 +1985,20 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
 	return ERR_PTR(-ENOENT);
 }
 
+struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
+					  const struct nlattr *nla)
+{
+	struct nft_trans *trans;
+	u32 id = ntohl(nla_get_be32(nla));
+
+	list_for_each_entry(trans, &net->nft.commit_list, list) {
+		if (trans->msg_type == NFT_MSG_NEWSET &&
+		    id == nft_trans_set_id(trans))
+			return nft_trans_set(trans);
+	}
+	return ERR_PTR(-ENOENT);
+}
+
 static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
 				    const char *name)
 {
@@ -2259,6 +2274,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
 	return ret;
 }
 
+#define NFT_SET_INACTIVE	(1 << 15)	/* Internal set flag */
+
 static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
 			    const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[])
@@ -2288,6 +2305,8 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
+	if (set->flags & NFT_SET_INACTIVE)
+		return -ENOENT;
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb2 == NULL)
@@ -2321,6 +2340,26 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
 	return 0;
 }
 
+static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
+			     struct nft_set *set)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
+	if (trans == NULL)
+		return -ENOMEM;
+
+	if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
+		nft_trans_set_id(trans) =
+			ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
+		set->flags |= NFT_SET_INACTIVE;
+	}
+	nft_trans_set(trans) = set;
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+	return 0;
+}
+
 static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 			    const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[])
@@ -2341,7 +2380,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 
 	if (nla[NFTA_SET_TABLE] == NULL ||
 	    nla[NFTA_SET_NAME] == NULL ||
-	    nla[NFTA_SET_KEY_LEN] == NULL)
+	    nla[NFTA_SET_KEY_LEN] == NULL ||
+	    nla[NFTA_SET_ID] == NULL)
 		return -EINVAL;
 
 	memset(&desc, 0, sizeof(desc));
@@ -2458,8 +2498,11 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	if (err < 0)
 		goto err2;
 
+	err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
+	if (err < 0)
+		goto err2;
+
 	list_add_tail(&set->list, &table->sets);
-	nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET);
 	return 0;
 
 err2:
@@ -2469,16 +2512,20 @@ err1:
 	return err;
 }
 
-static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+static void nft_set_destroy(struct nft_set *set)
 {
-	list_del(&set->list);
-	nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
-
 	set->ops->destroy(set);
 	module_put(set->ops->owner);
 	kfree(set);
 }
 
+static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+{
+	list_del(&set->list);
+	nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
+	nft_set_destroy(set);
+}
+
 static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
 			    const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[])
@@ -2500,10 +2547,16 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
+	if (set->flags & NFT_SET_INACTIVE)
+		return -ENOENT;
 	if (!list_empty(&set->bindings))
 		return -EBUSY;
 
-	nf_tables_set_destroy(&ctx, set);
+	err = nft_trans_set_add(&ctx, NFT_MSG_DELSET, set);
+	if (err < 0)
+		return err;
+
+	list_del(&set->list);
 	return 0;
 }
 
@@ -2563,7 +2616,8 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 {
 	list_del(&binding->list);
 
-	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
+	    !(set->flags & NFT_SET_INACTIVE))
 		nf_tables_set_destroy(ctx, set);
 }
 
@@ -2581,6 +2635,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
 	[NFTA_SET_ELEM_LIST_TABLE]	= { .type = NLA_STRING },
 	[NFTA_SET_ELEM_LIST_SET]	= { .type = NLA_STRING },
 	[NFTA_SET_ELEM_LIST_ELEMENTS]	= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_LIST_SET_ID]	= { .type = NLA_U32 },
 };
 
 static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
@@ -2680,6 +2735,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
+	if (set->flags & NFT_SET_INACTIVE)
+		return -ENOENT;
 
 	event  = NFT_MSG_NEWSETELEM;
 	event |= NFNL_SUBSYS_NFTABLES << 8;
@@ -2743,6 +2800,8 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
 	if (IS_ERR(set))
 		return PTR_ERR(set);
+	if (set->flags & NFT_SET_INACTIVE)
+		return -ENOENT;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
@@ -2928,6 +2987,7 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
 				const struct nlmsghdr *nlh,
 				const struct nlattr * const nla[])
 {
+	struct net *net = sock_net(skb->sk);
 	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
@@ -2938,8 +2998,15 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
 		return err;
 
 	set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
-	if (IS_ERR(set))
-		return PTR_ERR(set);
+	if (IS_ERR(set)) {
+		if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
+			set = nf_tables_set_lookup_byid(net,
+					nla[NFTA_SET_ELEM_LIST_SET_ID]);
+		}
+		if (IS_ERR(set))
+			return PTR_ERR(set);
+	}
+
 	if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
 		return -EBUSY;
 
@@ -3069,7 +3136,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_rule_policy,
 	},
 	[NFT_MSG_NEWSET] = {
-		.call		= nf_tables_newset,
+		.call_batch	= nf_tables_newset,
 		.attr_count	= NFTA_SET_MAX,
 		.policy		= nft_set_policy,
 	},
@@ -3079,12 +3146,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_set_policy,
 	},
 	[NFT_MSG_DELSET] = {
-		.call		= nf_tables_delset,
+		.call_batch	= nf_tables_delset,
 		.attr_count	= NFTA_SET_MAX,
 		.policy		= nft_set_policy,
 	},
 	[NFT_MSG_NEWSETELEM] = {
-		.call		= nf_tables_newsetelem,
+		.call_batch	= nf_tables_newsetelem,
 		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
 		.policy		= nft_set_elem_list_policy,
 	},
@@ -3094,7 +3161,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_set_elem_list_policy,
 	},
 	[NFT_MSG_DELSETELEM] = {
-		.call		= nf_tables_delsetelem,
+		.call_batch	= nf_tables_delsetelem,
 		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
 		.policy		= nft_set_elem_list_policy,
 	},
@@ -3136,6 +3203,16 @@ static int nf_tables_commit(struct sk_buff *skb)
 					      nft_trans_rule(trans), NFT_MSG_DELRULE, 0,
 					      trans->ctx.afi->family);
 			break;
+		case NFT_MSG_NEWSET:
+			nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE;
+			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+					     NFT_MSG_NEWSET);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELSET:
+			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
+					     NFT_MSG_DELSET);
+			break;
 		}
 	}
 
@@ -3148,9 +3225,12 @@ static int nf_tables_commit(struct sk_buff *skb)
 		case NFT_MSG_DELRULE:
 			nf_tables_rule_destroy(&trans->ctx,
 					       nft_trans_rule(trans));
-			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELSET:
+			nft_set_destroy(nft_trans_set(trans));
 			break;
 		}
+		nft_trans_destroy(trans);
 	}
 
 	return 0;
@@ -3170,6 +3250,14 @@ static int nf_tables_abort(struct sk_buff *skb)
 			nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
 			nft_trans_destroy(trans);
 			break;
+		case NFT_MSG_NEWSET:
+			list_del(&nft_trans_set(trans)->list);
+			break;
+		case NFT_MSG_DELSET:
+			list_add_tail(&nft_trans_set(trans)->list,
+				      &trans->ctx.table->sets);
+			nft_trans_destroy(trans);
+			break;
 		}
 	}
 
@@ -3181,9 +3269,12 @@ static int nf_tables_abort(struct sk_buff *skb)
 		case NFT_MSG_NEWRULE:
 			nf_tables_rule_destroy(&trans->ctx,
 					       nft_trans_rule(trans));
-			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_NEWSET:
+			nft_set_destroy(nft_trans_set(trans));
 			break;
 		}
+		nft_trans_destroy(trans);
 	}
 
 	return 0;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 7fd2bea8aa23..6404a726d17b 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -56,8 +56,14 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
 		return -EINVAL;
 
 	set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
-	if (IS_ERR(set))
-		return PTR_ERR(set);
+	if (IS_ERR(set)) {
+		if (tb[NFTA_LOOKUP_SET_ID]) {
+			set = nf_tables_set_lookup_byid(ctx->net,
+							tb[NFTA_LOOKUP_SET_ID]);
+		}
+		if (IS_ERR(set))
+			return PTR_ERR(set);
+	}
 
 	priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
 	err = nft_validate_input_register(priv->sreg);
-- 
cgit v1.2.3-71-gd317


From 91c7b38dc9f0de4f7f444b796d14476bc12df7bc Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 9 Apr 2014 11:58:08 +0200
Subject: netfilter: nf_tables: use new transaction infrastructure to handle
 chain

This patch speeds up rule-set updates and it also introduces a way to
revert chain updates if the batch is aborted. The idea is to store the
changes in the transaction to apply that in the commit step.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  17 ++++
 net/netfilter/nf_tables_api.c     | 203 +++++++++++++++++++++++++++++---------
 2 files changed, 175 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 0f472d668cbe..7b2361c559b5 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -420,6 +420,22 @@ struct nft_trans_set {
 #define nft_trans_set_id(trans)	\
 	(((struct nft_trans_set *)trans->data)->set_id)
 
+struct nft_trans_chain {
+	bool		update;
+	char		name[NFT_CHAIN_MAXNAMELEN];
+	struct nft_stats __percpu *stats;
+	u8		policy;
+};
+
+#define nft_trans_chain_update(trans)	\
+	(((struct nft_trans_chain *)trans->data)->update)
+#define nft_trans_chain_name(trans)	\
+	(((struct nft_trans_chain *)trans->data)->name)
+#define nft_trans_chain_stats(trans)	\
+	(((struct nft_trans_chain *)trans->data)->stats)
+#define nft_trans_chain_policy(trans)	\
+	(((struct nft_trans_chain *)trans->data)->policy)
+
 static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
 {
 	return (struct nft_expr *)&rule->data[0];
@@ -452,6 +468,7 @@ static inline void *nft_userdata(const struct nft_rule *rule)
 
 enum nft_chain_flags {
 	NFT_BASE_CHAIN			= 0x1,
+	NFT_CHAIN_INACTIVE		= 0x2,
 };
 
 /**
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 92cec68c03ec..7a1149fe2100 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -782,6 +782,8 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
 	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
+	if (chain->flags & NFT_CHAIN_INACTIVE)
+		return -ENOENT;
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2)
@@ -847,6 +849,34 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain,
 		rcu_assign_pointer(chain->stats, newstats);
 }
 
+static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
+	if (trans == NULL)
+		return -ENOMEM;
+
+	if (msg_type == NFT_MSG_NEWCHAIN)
+		ctx->chain->flags |= NFT_CHAIN_INACTIVE;
+
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	return 0;
+}
+
+static void nf_tables_chain_destroy(struct nft_chain *chain)
+{
+	BUG_ON(chain->use > 0);
+
+	if (chain->flags & NFT_BASE_CHAIN) {
+		module_put(nft_base_chain(chain)->type->owner);
+		free_percpu(nft_base_chain(chain)->stats);
+		kfree(nft_base_chain(chain));
+	} else {
+		kfree(chain);
+	}
+}
+
 static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			      const struct nlmsghdr *nlh,
 			      const struct nlattr * const nla[])
@@ -866,6 +896,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_stats __percpu *stats;
 	int err;
 	bool create;
+	struct nft_ctx ctx;
 
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
@@ -911,6 +942,11 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 	}
 
 	if (chain != NULL) {
+		struct nft_stats *stats = NULL;
+		struct nft_trans *trans;
+
+		if (chain->flags & NFT_CHAIN_INACTIVE)
+			return -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_EXCL)
 			return -EEXIST;
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
@@ -927,17 +963,28 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
 			if (IS_ERR(stats))
 				return PTR_ERR(stats);
-
-			nft_chain_stats_replace(nft_base_chain(chain), stats);
 		}
 
-		if (nla[NFTA_CHAIN_POLICY])
-			nft_base_chain(chain)->policy = policy;
+		nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+		trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
+					sizeof(struct nft_trans_chain));
+		if (trans == NULL)
+			return -ENOMEM;
 
-		if (nla[NFTA_CHAIN_HANDLE] && name)
-			nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+		nft_trans_chain_stats(trans) = stats;
+		nft_trans_chain_update(trans) = true;
 
-		goto notify;
+		if (nla[NFTA_CHAIN_POLICY])
+			nft_trans_chain_policy(trans) = policy;
+		else
+			nft_trans_chain_policy(trans) = -1;
+
+		if (nla[NFTA_CHAIN_HANDLE] && name) {
+			nla_strlcpy(nft_trans_chain_name(trans), name,
+				    NFT_CHAIN_MAXNAMELEN);
+		}
+		list_add_tail(&trans->list, &net->nft.commit_list);
+		return 0;
 	}
 
 	if (table->use == UINT_MAX)
@@ -1033,31 +1080,26 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
 	    chain->flags & NFT_BASE_CHAIN) {
 		err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops);
-		if (err < 0) {
-			module_put(basechain->type->owner);
-			free_percpu(basechain->stats);
-			kfree(basechain);
-			return err;
-		}
+		if (err < 0)
+			goto err1;
 	}
-	list_add_tail(&chain->list, &table->chains);
-	table->use++;
-notify:
-	nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
-			       family);
-	return 0;
-}
 
-static void nf_tables_chain_destroy(struct nft_chain *chain)
-{
-	BUG_ON(chain->use > 0);
+	nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+	err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
+	if (err < 0)
+		goto err2;
 
-	if (chain->flags & NFT_BASE_CHAIN) {
-		module_put(nft_base_chain(chain)->type->owner);
-		free_percpu(nft_base_chain(chain)->stats);
-		kfree(nft_base_chain(chain));
-	} else
-		kfree(chain);
+	list_add_tail(&chain->list, &table->chains);
+	return 0;
+err2:
+	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+	    chain->flags & NFT_BASE_CHAIN) {
+		nf_unregister_hooks(nft_base_chain(chain)->ops,
+				    afi->nops);
+	}
+err1:
+	nf_tables_chain_destroy(chain);
+	return err;
 }
 
 static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
@@ -1070,6 +1112,8 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_chain *chain;
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
+	struct nft_ctx ctx;
+	int err;
 
 	afi = nf_tables_afinfo_lookup(net, family, false);
 	if (IS_ERR(afi))
@@ -1082,24 +1126,17 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
 	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
-
+	if (chain->flags & NFT_CHAIN_INACTIVE)
+		return -ENOENT;
 	if (!list_empty(&chain->rules) || chain->use > 0)
 		return -EBUSY;
 
-	list_del(&chain->list);
-	table->use--;
-
-	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
-	    chain->flags & NFT_BASE_CHAIN)
-		nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops);
-
-	nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
-			       family);
-
-	/* Make sure all rule references are gone before this is released */
-	synchronize_rcu();
+	nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+	err = nft_trans_chain_add(&ctx, NFT_MSG_DELCHAIN);
+	if (err < 0)
+		return err;
 
-	nf_tables_chain_destroy(chain);
+	list_del(&chain->list);
 	return 0;
 }
 
@@ -1542,6 +1579,8 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
 	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
 	if (IS_ERR(chain))
 		return PTR_ERR(chain);
+	if (chain->flags & NFT_CHAIN_INACTIVE)
+		return -ENOENT;
 
 	rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
 	if (IS_ERR(rule))
@@ -3109,7 +3148,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_table_policy,
 	},
 	[NFT_MSG_NEWCHAIN] = {
-		.call		= nf_tables_newchain,
+		.call_batch	= nf_tables_newchain,
 		.attr_count	= NFTA_CHAIN_MAX,
 		.policy		= nft_chain_policy,
 	},
@@ -3119,7 +3158,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_chain_policy,
 	},
 	[NFT_MSG_DELCHAIN] = {
-		.call		= nf_tables_delchain,
+		.call_batch	= nf_tables_delchain,
 		.attr_count	= NFTA_CHAIN_MAX,
 		.policy		= nft_chain_policy,
 	},
@@ -3170,6 +3209,27 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 	},
 };
 
+static void nft_chain_commit_update(struct nft_trans *trans)
+{
+	struct nft_base_chain *basechain;
+
+	if (nft_trans_chain_name(trans)[0])
+		strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
+
+	if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN))
+		return;
+
+	basechain = nft_base_chain(trans->ctx.chain);
+	nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans));
+
+	switch (nft_trans_chain_policy(trans)) {
+	case NF_DROP:
+	case NF_ACCEPT:
+		basechain->policy = nft_trans_chain_policy(trans);
+		break;
+	}
+}
+
 static int nf_tables_commit(struct sk_buff *skb)
 {
 	struct net *net = sock_net(skb->sk);
@@ -3188,6 +3248,33 @@ static int nf_tables_commit(struct sk_buff *skb)
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		switch (trans->msg_type) {
+		case NFT_MSG_NEWCHAIN:
+			if (nft_trans_chain_update(trans))
+				nft_chain_commit_update(trans);
+			else {
+				trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE;
+				trans->ctx.table->use++;
+			}
+			nf_tables_chain_notify(trans->ctx.skb, trans->ctx.nlh,
+					       trans->ctx.table,
+					       trans->ctx.chain,
+					       NFT_MSG_NEWCHAIN,
+					       trans->ctx.afi->family);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELCHAIN:
+			trans->ctx.table->use--;
+			nf_tables_chain_notify(trans->ctx.skb, trans->ctx.nlh,
+					       trans->ctx.table,
+					       trans->ctx.chain,
+					       NFT_MSG_DELCHAIN,
+					       trans->ctx.afi->family);
+			if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
+			    trans->ctx.chain->flags & NFT_BASE_CHAIN) {
+				nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
+						    trans->ctx.afi->nops);
+			}
+			break;
 		case NFT_MSG_NEWRULE:
 			nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
 			nf_tables_rule_notify(trans->ctx.skb, trans->ctx.nlh,
@@ -3225,6 +3312,9 @@ static int nf_tables_commit(struct sk_buff *skb)
 	/* Now we can safely release unused old rules */
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		switch (trans->msg_type) {
+		case NFT_MSG_DELCHAIN:
+			nf_tables_chain_destroy(trans->ctx.chain);
+			break;
 		case NFT_MSG_DELRULE:
 			nf_tables_rule_destroy(&trans->ctx,
 					       nft_trans_rule(trans));
@@ -3246,6 +3336,26 @@ static int nf_tables_abort(struct sk_buff *skb)
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		switch (trans->msg_type) {
+		case NFT_MSG_NEWCHAIN:
+			if (nft_trans_chain_update(trans)) {
+				if (nft_trans_chain_stats(trans))
+					free_percpu(nft_trans_chain_stats(trans));
+
+				nft_trans_destroy(trans);
+			} else {
+				list_del(&trans->ctx.chain->list);
+				if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
+				    trans->ctx.chain->flags & NFT_BASE_CHAIN) {
+					nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
+							    trans->ctx.afi->nops);
+				}
+			}
+			break;
+		case NFT_MSG_DELCHAIN:
+			list_add_tail(&trans->ctx.chain->list,
+				      &trans->ctx.table->chains);
+			nft_trans_destroy(trans);
+			break;
 		case NFT_MSG_NEWRULE:
 			list_del_rcu(&nft_trans_rule(trans)->list);
 			break;
@@ -3269,6 +3379,9 @@ static int nf_tables_abort(struct sk_buff *skb)
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		switch (trans->msg_type) {
+		case NFT_MSG_NEWCHAIN:
+			nf_tables_chain_destroy(trans->ctx.chain);
+			break;
 		case NFT_MSG_NEWRULE:
 			nf_tables_rule_destroy(&trans->ctx,
 					       nft_trans_rule(trans));
-- 
cgit v1.2.3-71-gd317


From 55dd6f93076bb82aa8911191125418dcfcbf2c9b Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 3 Apr 2014 11:53:37 +0200
Subject: netfilter: nf_tables: use new transaction infrastructure to handle
 table

This patch speeds up rule-set updates and it also provides a way
to revert updates and leave things in consistent state in case that
the batch needs to be aborted.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  10 +++
 net/netfilter/nf_tables_api.c     | 145 +++++++++++++++++++++++++++++++++-----
 2 files changed, 136 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 7b2361c559b5..15bf745f198d 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -436,6 +436,16 @@ struct nft_trans_chain {
 #define nft_trans_chain_policy(trans)	\
 	(((struct nft_trans_chain *)trans->data)->policy)
 
+struct nft_trans_table {
+	bool		update;
+	bool		enable;
+};
+
+#define nft_trans_table_update(trans)	\
+	(((struct nft_trans_table *)trans->data)->update)
+#define nft_trans_table_enable(trans)	\
+	(((struct nft_trans_table *)trans->data)->enable)
+
 static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
 {
 	return (struct nft_expr *)&rule->data[0];
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d71125a0a341..3e685dbb2921 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -307,6 +307,9 @@ done:
 	return skb->len;
 }
 
+/* Internal table flags */
+#define NFT_TABLE_INACTIVE	(1 << 15)
+
 static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
 			      const struct nlmsghdr *nlh,
 			      const struct nlattr * const nla[])
@@ -333,6 +336,8 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
 	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
+	if (table->flags & NFT_TABLE_INACTIVE)
+		return -ENOENT;
 
 	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb2)
@@ -395,9 +400,9 @@ static void nf_tables_table_disable(const struct nft_af_info *afi,
 
 static int nf_tables_updtable(struct nft_ctx *ctx)
 {
-	const struct nfgenmsg *nfmsg = nlmsg_data(ctx->nlh);
-	int family = nfmsg->nfgen_family, ret = 0;
+	struct nft_trans *trans;
 	u32 flags;
+	int ret = 0;
 
 	if (!ctx->nla[NFTA_TABLE_FLAGS])
 		return 0;
@@ -406,25 +411,48 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 	if (flags & ~NFT_TABLE_F_DORMANT)
 		return -EINVAL;
 
+	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
+				sizeof(struct nft_trans_table));
+	if (trans == NULL)
+		return -ENOMEM;
+
 	if ((flags & NFT_TABLE_F_DORMANT) &&
 	    !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
-		nf_tables_table_disable(ctx->afi, ctx->table);
-		ctx->table->flags |= NFT_TABLE_F_DORMANT;
+		nft_trans_table_enable(trans) = false;
 	} else if (!(flags & NFT_TABLE_F_DORMANT) &&
 		   ctx->table->flags & NFT_TABLE_F_DORMANT) {
 		ret = nf_tables_table_enable(ctx->afi, ctx->table);
-		if (ret >= 0)
+		if (ret >= 0) {
 			ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+			nft_trans_table_enable(trans) = true;
+		}
 	}
 	if (ret < 0)
 		goto err;
 
-	nf_tables_table_notify(ctx->skb, ctx->nlh, ctx->table,
-			       NFT_MSG_NEWTABLE, family);
+	nft_trans_table_update(trans) = true;
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	return 0;
 err:
+	nft_trans_destroy(trans);
 	return ret;
 }
 
+static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
+	if (trans == NULL)
+		return -ENOMEM;
+
+	if (msg_type == NFT_MSG_NEWTABLE)
+		ctx->table->flags |= NFT_TABLE_INACTIVE;
+
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+	return 0;
+}
+
 static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 			      const struct nlmsghdr *nlh,
 			      const struct nlattr * const nla[])
@@ -437,6 +465,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 	int family = nfmsg->nfgen_family;
 	u32 flags = 0;
 	struct nft_ctx ctx;
+	int err;
 
 	afi = nf_tables_afinfo_lookup(net, family, true);
 	if (IS_ERR(afi))
@@ -451,6 +480,8 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 	}
 
 	if (table != NULL) {
+		if (table->flags & NFT_TABLE_INACTIVE)
+			return -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_EXCL)
 			return -EEXIST;
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
@@ -480,8 +511,14 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 	INIT_LIST_HEAD(&table->sets);
 	table->flags = flags;
 
+	nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+	err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
+	if (err < 0) {
+		kfree(table);
+		module_put(afi->owner);
+		return err;
+	}
 	list_add_tail(&table->list, &afi->tables);
-	nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
 	return 0;
 }
 
@@ -493,7 +530,8 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_af_info *afi;
 	struct nft_table *table;
 	struct net *net = sock_net(skb->sk);
-	int family = nfmsg->nfgen_family;
+	int family = nfmsg->nfgen_family, err;
+	struct nft_ctx ctx;
 
 	afi = nf_tables_afinfo_lookup(net, family, false);
 	if (IS_ERR(afi))
@@ -502,17 +540,27 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
 	table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
+	if (table->flags & NFT_TABLE_INACTIVE)
+		return -ENOENT;
 
 	if (!list_empty(&table->chains) || !list_empty(&table->sets))
 		return -EBUSY;
 
+	nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+	err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE);
+	if (err < 0)
+		return err;
+
 	list_del(&table->list);
-	nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
-	kfree(table);
-	module_put(afi->owner);
 	return 0;
 }
 
+static void nf_tables_table_destroy(struct nft_ctx *ctx)
+{
+	kfree(ctx->table);
+	module_put(ctx->afi->owner);
+}
+
 int nft_register_chain_type(const struct nf_chain_type *ctype)
 {
 	int err = 0;
@@ -776,6 +824,8 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
 	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
+	if (table->flags & NFT_TABLE_INACTIVE)
+		return -ENOENT;
 
 	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
 	if (IS_ERR(chain))
@@ -1120,6 +1170,8 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
 	table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
+	if (table->flags & NFT_TABLE_INACTIVE)
+		return -ENOENT;
 
 	chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
 	if (IS_ERR(chain))
@@ -1573,6 +1625,8 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
 	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
+	if (table->flags & NFT_TABLE_INACTIVE)
+		return -ENOENT;
 
 	chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
 	if (IS_ERR(chain))
@@ -1838,6 +1892,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
 	table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
+	if (table->flags & NFT_TABLE_INACTIVE)
+		return -ENOENT;
 
 	if (nla[NFTA_RULE_CHAIN]) {
 		chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
@@ -2004,6 +2060,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
 		table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
 		if (IS_ERR(table))
 			return PTR_ERR(table);
+		if (table->flags & NFT_TABLE_INACTIVE)
+			return -ENOENT;
 	}
 
 	nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
@@ -2681,7 +2739,8 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
 static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
 				      const struct sk_buff *skb,
 				      const struct nlmsghdr *nlh,
-				      const struct nlattr * const nla[])
+				      const struct nlattr * const nla[],
+				      bool trans)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	struct nft_af_info *afi;
@@ -2695,6 +2754,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
 	table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
 	if (IS_ERR(table))
 		return PTR_ERR(table);
+	if (!trans && (table->flags & NFT_TABLE_INACTIVE))
+		return -ENOENT;
 
 	nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
 	return 0;
@@ -2768,7 +2829,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	if (err < 0)
 		return err;
 
-	err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla);
+	err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla,
+					 false);
 	if (err < 0)
 		return err;
 
@@ -2833,7 +2895,7 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_ctx ctx;
 	int err;
 
-	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
 	if (err < 0)
 		return err;
 
@@ -3033,7 +3095,7 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_ctx ctx;
 	int rem, err;
 
-	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
 	if (err < 0)
 		return err;
 
@@ -3111,7 +3173,7 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_ctx ctx;
 	int rem, err;
 
-	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
 	if (err < 0)
 		return err;
 
@@ -3131,7 +3193,7 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
 
 static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 	[NFT_MSG_NEWTABLE] = {
-		.call		= nf_tables_newtable,
+		.call_batch	= nf_tables_newtable,
 		.attr_count	= NFTA_TABLE_MAX,
 		.policy		= nft_table_policy,
 	},
@@ -3141,7 +3203,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.policy		= nft_table_policy,
 	},
 	[NFT_MSG_DELTABLE] = {
-		.call		= nf_tables_deltable,
+		.call_batch	= nf_tables_deltable,
 		.attr_count	= NFTA_TABLE_MAX,
 		.policy		= nft_table_policy,
 	},
@@ -3246,6 +3308,28 @@ static int nf_tables_commit(struct sk_buff *skb)
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		switch (trans->msg_type) {
+		case NFT_MSG_NEWTABLE:
+			if (nft_trans_table_update(trans)) {
+				if (!nft_trans_table_enable(trans)) {
+					nf_tables_table_disable(trans->ctx.afi,
+								trans->ctx.table);
+					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+				}
+			} else {
+				trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE;
+			}
+			nf_tables_table_notify(trans->ctx.skb, trans->ctx.nlh,
+					       trans->ctx.table,
+					       NFT_MSG_NEWTABLE,
+					       trans->ctx.afi->family);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELTABLE:
+			nf_tables_table_notify(trans->ctx.skb, trans->ctx.nlh,
+					       trans->ctx.table,
+					       NFT_MSG_DELTABLE,
+					       trans->ctx.afi->family);
+			break;
 		case NFT_MSG_NEWCHAIN:
 			if (nft_trans_chain_update(trans))
 				nft_chain_commit_update(trans);
@@ -3310,6 +3394,9 @@ static int nf_tables_commit(struct sk_buff *skb)
 	/* Now we can safely release unused old rules */
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		switch (trans->msg_type) {
+		case NFT_MSG_DELTABLE:
+			nf_tables_table_destroy(&trans->ctx);
+			break;
 		case NFT_MSG_DELCHAIN:
 			nf_tables_chain_destroy(trans->ctx.chain);
 			break;
@@ -3334,6 +3421,23 @@ static int nf_tables_abort(struct sk_buff *skb)
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		switch (trans->msg_type) {
+		case NFT_MSG_NEWTABLE:
+			if (nft_trans_table_update(trans)) {
+				if (nft_trans_table_enable(trans)) {
+					nf_tables_table_disable(trans->ctx.afi,
+								trans->ctx.table);
+					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+				}
+				nft_trans_destroy(trans);
+			} else {
+				list_del(&trans->ctx.table->list);
+			}
+			break;
+		case NFT_MSG_DELTABLE:
+			list_add_tail(&trans->ctx.table->list,
+				      &trans->ctx.afi->tables);
+			nft_trans_destroy(trans);
+			break;
 		case NFT_MSG_NEWCHAIN:
 			if (nft_trans_chain_update(trans)) {
 				if (nft_trans_chain_stats(trans))
@@ -3377,6 +3481,9 @@ static int nf_tables_abort(struct sk_buff *skb)
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		switch (trans->msg_type) {
+		case NFT_MSG_NEWTABLE:
+			nf_tables_table_destroy(&trans->ctx);
+			break;
 		case NFT_MSG_NEWCHAIN:
 			nf_tables_chain_destroy(trans->ctx.chain);
 			break;
-- 
cgit v1.2.3-71-gd317


From 60319eb1ca351aa36e29d58d2e60ba9a9836265a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 4 Apr 2014 03:36:42 +0200
Subject: netfilter: nf_tables: use new transaction infrastructure to handle
 elements

Leave the set content in consistent state if we fail to load the
batch. Use the new generic transaction infrastructure to achieve
this.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 10 +++++
 net/netfilter/nf_tables_api.c     | 82 ++++++++++++++++++++++++++++++++-------
 2 files changed, 78 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 15bf745f198d..b08f2a941007 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -446,6 +446,16 @@ struct nft_trans_table {
 #define nft_trans_table_enable(trans)	\
 	(((struct nft_trans_table *)trans->data)->enable)
 
+struct nft_trans_elem {
+	struct nft_set		*set;
+	struct nft_set_elem	elem;
+};
+
+#define nft_trans_elem_set(trans)	\
+	(((struct nft_trans_elem *)trans->data)->set)
+#define nft_trans_elem(trans)	\
+	(((struct nft_trans_elem *)trans->data)->elem)
+
 static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
 {
 	return (struct nft_expr *)&rule->data[0];
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3e685dbb2921..cd002935e6b2 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2993,7 +2993,21 @@ err:
 	return err;
 }
 
-static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
+static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
+					      int msg_type,
+					      struct nft_set *set)
+{
+	struct nft_trans *trans;
+
+	trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
+	if (trans == NULL)
+		return NULL;
+
+	nft_trans_elem_set(trans) = set;
+	return trans;
+}
+
+static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			    const struct nlattr *attr)
 {
 	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
@@ -3001,6 +3015,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
 	struct nft_set_elem elem;
 	struct nft_set_binding *binding;
 	enum nft_registers dreg;
+	struct nft_trans *trans;
 	int err;
 
 	if (set->size && set->nelems == set->size)
@@ -3068,14 +3083,20 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
 		}
 	}
 
+	trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
+	if (trans == NULL)
+		goto err3;
+
 	err = set->ops->insert(set, &elem);
 	if (err < 0)
-		goto err3;
-	set->nelems++;
+		goto err4;
 
-	nf_tables_setelem_notify(ctx, set, &elem, NFT_MSG_NEWSETELEM, 0);
+	nft_trans_elem(trans) = elem;
+	list_add(&trans->list, &ctx->net->nft.commit_list);
 	return 0;
 
+err4:
+	kfree(trans);
 err3:
 	if (nla[NFTA_SET_ELEM_DATA] != NULL)
 		nft_data_uninit(&elem.data, d2.type);
@@ -3093,7 +3114,7 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
 	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
-	int rem, err;
+	int rem, err = 0;
 
 	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
 	if (err < 0)
@@ -3115,17 +3136,18 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
 	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
 		err = nft_add_set_elem(&ctx, set, attr);
 		if (err < 0)
-			return err;
+			break;
 	}
-	return 0;
+	return err;
 }
 
-static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
+static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 			   const struct nlattr *attr)
 {
 	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
 	struct nft_data_desc desc;
 	struct nft_set_elem elem;
+	struct nft_trans *trans;
 	int err;
 
 	err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
@@ -3149,10 +3171,12 @@ static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
 	if (err < 0)
 		goto err2;
 
-	set->ops->remove(set, &elem);
-	set->nelems--;
+	trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
+	if (trans == NULL)
+		goto err2;
 
-	nf_tables_setelem_notify(ctx, set, &elem, NFT_MSG_DELSETELEM, 0);
+	nft_trans_elem(trans) = elem;
+	list_add(&trans->list, &ctx->net->nft.commit_list);
 
 	nft_data_uninit(&elem.key, NFT_DATA_VALUE);
 	if (set->flags & NFT_SET_MAP)
@@ -3171,7 +3195,7 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
 	const struct nlattr *attr;
 	struct nft_set *set;
 	struct nft_ctx ctx;
-	int rem, err;
+	int rem, err = 0;
 
 	err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
 	if (err < 0)
@@ -3186,9 +3210,9 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
 	nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
 		err = nft_del_setelem(&ctx, set, attr);
 		if (err < 0)
-			return err;
+			break;
 	}
-	return 0;
+	return err;
 }
 
 static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
@@ -3294,6 +3318,7 @@ static int nf_tables_commit(struct sk_buff *skb)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nft_trans *trans, *next;
+	struct nft_set *set;
 
 	/* Bump generation counter, invalidate any dump in progress */
 	net->nft.genctr++;
@@ -3385,6 +3410,25 @@ static int nf_tables_commit(struct sk_buff *skb)
 			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
 					     NFT_MSG_DELSET);
 			break;
+		case NFT_MSG_NEWSETELEM:
+			nft_trans_elem_set(trans)->nelems++;
+			nf_tables_setelem_notify(&trans->ctx,
+						 nft_trans_elem_set(trans),
+						 &nft_trans_elem(trans),
+						 NFT_MSG_NEWSETELEM, 0);
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELSETELEM:
+			nft_trans_elem_set(trans)->nelems--;
+			nf_tables_setelem_notify(&trans->ctx,
+						 nft_trans_elem_set(trans),
+						 &nft_trans_elem(trans),
+						 NFT_MSG_DELSETELEM, 0);
+			set = nft_trans_elem_set(trans);
+			set->ops->get(set, &nft_trans_elem(trans));
+			set->ops->remove(set, &nft_trans_elem(trans));
+			nft_trans_destroy(trans);
+			break;
 		}
 	}
 
@@ -3418,6 +3462,7 @@ static int nf_tables_abort(struct sk_buff *skb)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nft_trans *trans, *next;
+	struct nft_set *set;
 
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
 		switch (trans->msg_type) {
@@ -3473,6 +3518,15 @@ static int nf_tables_abort(struct sk_buff *skb)
 				      &trans->ctx.table->sets);
 			nft_trans_destroy(trans);
 			break;
+		case NFT_MSG_NEWSETELEM:
+			set = nft_trans_elem_set(trans);
+			set->ops->get(set, &nft_trans_elem(trans));
+			set->ops->remove(set, &nft_trans_elem(trans));
+			nft_trans_destroy(trans);
+			break;
+		case NFT_MSG_DELSETELEM:
+			nft_trans_destroy(trans);
+			break;
 		}
 	}
 
-- 
cgit v1.2.3-71-gd317


From 128ad3322ba5de8fa346203c9931d1fdcab8da87 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 9 May 2014 17:14:24 +0200
Subject: netfilter: nf_tables: remove skb and nlh from context structure

Instead of caching the original skbuff that contains the netlink
messages, this stores the netlink message sequence number, the
netlink portID and the report flag. This helps to prepare the
introduction of the object release via call_rcu.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  10 ++--
 net/netfilter/nf_tables_api.c     | 101 +++++++++++++++++---------------------
 2 files changed, 52 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index b08f2a941007..1ed2797fb964 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -72,21 +72,23 @@ static inline void nft_data_debug(const struct nft_data *data)
  *	struct nft_ctx - nf_tables rule/set context
  *
  *	@net: net namespace
- * 	@skb: netlink skb
- * 	@nlh: netlink message header
  * 	@afi: address family info
  * 	@table: the table the chain is contained in
  * 	@chain: the chain the rule is contained in
  *	@nla: netlink attributes
+ *	@portid: netlink portID of the original message
+ *	@seq: netlink sequence number
+ *	@report: notify via unicast netlink message
  */
 struct nft_ctx {
 	struct net			*net;
-	const struct sk_buff		*skb;
-	const struct nlmsghdr		*nlh;
 	struct nft_af_info		*afi;
 	struct nft_table		*table;
 	struct nft_chain		*chain;
 	const struct nlattr * const 	*nla;
+	u32				portid;
+	u32				seq;
+	bool				report;
 };
 
 struct nft_data_desc {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 86d055a51f0b..4147166ad17c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -96,13 +96,14 @@ static void nft_ctx_init(struct nft_ctx *ctx,
 			 struct nft_chain *chain,
 			 const struct nlattr * const *nla)
 {
-	ctx->net   = sock_net(skb->sk);
-	ctx->skb   = skb;
-	ctx->nlh   = nlh;
-	ctx->afi   = afi;
-	ctx->table = table;
-	ctx->chain = chain;
-	ctx->nla   = nla;
+	ctx->net	= sock_net(skb->sk);
+	ctx->afi	= afi;
+	ctx->table	= table;
+	ctx->chain	= chain;
+	ctx->nla   	= nla;
+	ctx->portid	= NETLINK_CB(skb).portid;
+	ctx->report	= nlmsg_report(nlh);
+	ctx->seq	= nlh->nlmsg_seq;
 }
 
 static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
@@ -238,14 +239,10 @@ nla_put_failure:
 static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 {
 	struct sk_buff *skb;
-	u32 portid = NETLINK_CB(ctx->skb).portid;
-	u32 seq = ctx->nlh->nlmsg_seq;
-	struct net *net = sock_net(ctx->skb->sk);
-	bool report;
 	int err;
 
-	report = nlmsg_report(ctx->nlh);
-	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+	if (!ctx->report &&
+	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
 		return 0;
 
 	err = -ENOBUFS;
@@ -253,18 +250,20 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
 	if (skb == NULL)
 		goto err;
 
-	err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
+	err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
 					ctx->afi->family, ctx->table);
 	if (err < 0) {
 		kfree_skb(skb);
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
-			     GFP_KERNEL);
+	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+			     ctx->report, GFP_KERNEL);
 err:
-	if (err < 0)
-		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	if (err < 0) {
+		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+				  err);
+	}
 	return err;
 }
 
@@ -721,14 +720,10 @@ nla_put_failure:
 static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 {
 	struct sk_buff *skb;
-	u32 portid = NETLINK_CB(ctx->skb).portid;
-	struct net *net = sock_net(ctx->skb->sk);
-	u32 seq = ctx->nlh->nlmsg_seq;
-	bool report;
 	int err;
 
-	report = nlmsg_report(ctx->nlh);
-	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+	if (!ctx->report &&
+	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
 		return 0;
 
 	err = -ENOBUFS;
@@ -736,7 +731,7 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 	if (skb == NULL)
 		goto err;
 
-	err = nf_tables_fill_chain_info(skb, portid, seq, event, 0,
+	err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
 					ctx->afi->family, ctx->table,
 					ctx->chain);
 	if (err < 0) {
@@ -744,11 +739,13 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
-			     GFP_KERNEL);
+	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+			     ctx->report, GFP_KERNEL);
 err:
-	if (err < 0)
-		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	if (err < 0) {
+		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+				  err);
+	}
 	return err;
 }
 
@@ -1473,16 +1470,11 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx,
 				 const struct nft_rule *rule,
 				 int event)
 {
-	const struct sk_buff *oskb = ctx->skb;
 	struct sk_buff *skb;
-	u32 portid = NETLINK_CB(oskb).portid;
-	struct net *net = sock_net(oskb->sk);
-	u32 seq = ctx->nlh->nlmsg_seq;
-	bool report;
 	int err;
 
-	report = nlmsg_report(ctx->nlh);
-	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+	if (!ctx->report &&
+	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
 		return 0;
 
 	err = -ENOBUFS;
@@ -1490,7 +1482,7 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx,
 	if (skb == NULL)
 		goto err;
 
-	err = nf_tables_fill_rule_info(skb, portid, seq, event, 0,
+	err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0,
 				       ctx->afi->family, ctx->table,
 				       ctx->chain, rule);
 	if (err < 0) {
@@ -1498,11 +1490,13 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
-			     GFP_KERNEL);
+	err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+			     ctx->report, GFP_KERNEL);
 err:
-	if (err < 0)
-		nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+	if (err < 0) {
+		nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+				  err);
+	}
 	return err;
 }
 
@@ -2141,8 +2135,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 	struct nfgenmsg *nfmsg;
 	struct nlmsghdr *nlh;
 	struct nlattr *desc;
-	u32 portid = NETLINK_CB(ctx->skb).portid;
-	u32 seq = ctx->nlh->nlmsg_seq;
+	u32 portid = ctx->portid;
+	u32 seq = ctx->seq;
 
 	event |= NFNL_SUBSYS_NFTABLES << 8;
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
@@ -2194,12 +2188,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
 				int event)
 {
 	struct sk_buff *skb;
-	u32 portid = NETLINK_CB(ctx->skb).portid;
-	bool report;
+	u32 portid = ctx->portid;
 	int err;
 
-	report = nlmsg_report(ctx->nlh);
-	if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+	if (!ctx->report &&
+	    !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
 		return 0;
 
 	err = -ENOBUFS;
@@ -2213,8 +2206,8 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report,
-			     GFP_KERNEL);
+	err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES,
+			     ctx->report, GFP_KERNEL);
 err:
 	if (err < 0)
 		nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
@@ -2956,14 +2949,12 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
 				    const struct nft_set_elem *elem,
 				    int event, u16 flags)
 {
-	const struct sk_buff *oskb = ctx->skb;
-	struct net *net = sock_net(oskb->sk);
-	u32 portid = NETLINK_CB(oskb).portid;
-	bool report = nlmsg_report(ctx->nlh);
+	struct net *net = ctx->net;
+	u32 portid = ctx->portid;
 	struct sk_buff *skb;
 	int err;
 
-	if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+	if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
 		return 0;
 
 	err = -ENOBUFS;
@@ -2978,7 +2969,7 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+	err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
 			     GFP_KERNEL);
 err:
 	if (err < 0)
-- 
cgit v1.2.3-71-gd317


From c7c32e72cbe23cea97c5d87ffcf6e23cc1ec1a65 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 10 Apr 2014 00:31:10 +0200
Subject: netfilter: nf_tables: defer all object release via rcu

Now that all objects are released in the reverse order via the
transaction infrastructure, we can enqueue the release via
call_rcu to save one synchronize_rcu. For small rule-sets loaded
via nft -f, it now takes around 50ms less here.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  2 +
 net/netfilter/nf_tables_api.c     | 93 +++++++++++++++++++++++----------------
 2 files changed, 56 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 1ed2797fb964..7ee6ce6564ae 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -393,12 +393,14 @@ struct nft_rule {
 /**
  *	struct nft_trans - nf_tables object update in transaction
  *
+ *	@rcu_head: rcu head to defer release of transaction data
  *	@list: used internally
  *	@msg_type: message type
  *	@ctx: transaction context
  *	@data: internal information related to the transaction
  */
 struct nft_trans {
+	struct rcu_head			rcu_head;
 	struct list_head		list;
 	int				msg_type;
 	struct nft_ctx			ctx;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4147166ad17c..e171c635d08c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3298,6 +3298,30 @@ static void nft_chain_commit_update(struct nft_trans *trans)
 	}
 }
 
+/* Schedule objects for release via rcu to make sure no packets are accesing
+ * removed rules.
+ */
+static void nf_tables_commit_release_rcu(struct rcu_head *rt)
+{
+	struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
+
+	switch (trans->msg_type) {
+	case NFT_MSG_DELTABLE:
+		nf_tables_table_destroy(&trans->ctx);
+		break;
+	case NFT_MSG_DELCHAIN:
+		nf_tables_chain_destroy(trans->ctx.chain);
+		break;
+	case NFT_MSG_DELRULE:
+		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+		break;
+	case NFT_MSG_DELSET:
+		nft_set_destroy(nft_trans_set(trans));
+		break;
+	}
+	kfree(trans);
+}
+
 static int nf_tables_commit(struct sk_buff *skb)
 {
 	struct net *net = sock_net(skb->sk);
@@ -3397,32 +3421,39 @@ static int nf_tables_commit(struct sk_buff *skb)
 		}
 	}
 
-	/* Make sure we don't see any packet traversing old rules */
-	synchronize_rcu();
-
-	/* Now we can safely release unused old rules */
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
-		switch (trans->msg_type) {
-		case NFT_MSG_DELTABLE:
-			nf_tables_table_destroy(&trans->ctx);
-			break;
-		case NFT_MSG_DELCHAIN:
-			nf_tables_chain_destroy(trans->ctx.chain);
-			break;
-		case NFT_MSG_DELRULE:
-			nf_tables_rule_destroy(&trans->ctx,
-					       nft_trans_rule(trans));
-			break;
-		case NFT_MSG_DELSET:
-			nft_set_destroy(nft_trans_set(trans));
-			break;
-		}
-		nft_trans_destroy(trans);
+		list_del(&trans->list);
+		trans->ctx.nla = NULL;
+		call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
 	}
 
 	return 0;
 }
 
+/* Schedule objects for release via rcu to make sure no packets are accesing
+ * aborted rules.
+ */
+static void nf_tables_abort_release_rcu(struct rcu_head *rt)
+{
+	struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
+
+	switch (trans->msg_type) {
+	case NFT_MSG_NEWTABLE:
+		nf_tables_table_destroy(&trans->ctx);
+		break;
+	case NFT_MSG_NEWCHAIN:
+		nf_tables_chain_destroy(trans->ctx.chain);
+		break;
+	case NFT_MSG_NEWRULE:
+		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
+		break;
+	case NFT_MSG_NEWSET:
+		nft_set_destroy(nft_trans_set(trans));
+		break;
+	}
+	kfree(trans);
+}
+
 static int nf_tables_abort(struct sk_buff *skb)
 {
 	struct net *net = sock_net(skb->sk);
@@ -3495,26 +3526,10 @@ static int nf_tables_abort(struct sk_buff *skb)
 		}
 	}
 
-	/* Make sure we don't see any packet accessing aborted rules */
-	synchronize_rcu();
-
 	list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
-		switch (trans->msg_type) {
-		case NFT_MSG_NEWTABLE:
-			nf_tables_table_destroy(&trans->ctx);
-			break;
-		case NFT_MSG_NEWCHAIN:
-			nf_tables_chain_destroy(trans->ctx.chain);
-			break;
-		case NFT_MSG_NEWRULE:
-			nf_tables_rule_destroy(&trans->ctx,
-					       nft_trans_rule(trans));
-			break;
-		case NFT_MSG_NEWSET:
-			nft_set_destroy(nft_trans_set(trans));
-			break;
-		}
-		nft_trans_destroy(trans);
+		list_del(&trans->list);
+		trans->ctx.nla = NULL;
+		call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu);
 	}
 
 	return 0;
-- 
cgit v1.2.3-71-gd317


From a1b73fc194e73ed33c8b77bf09374cb05b58151b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 1 May 2014 16:51:04 +0200
Subject: scsi: reintroduce scsi_driver.init_command

Instead of letting the ULD play games with the prep_fn move back to
the model of a central prep_fn with a callback to the ULD.  This
already cleans up and shortens the code by itself, and will be required
to properly support blk-mq in the SCSI midlayer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Nicholas Bellinger <nab@linux-iscsi.org>
Reviewed-by: Mike Christie <michaelc@cs.wisc.edu>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/scsi_lib.c    | 66 +++++++++++++++++++++++++++-------------------
 drivers/scsi/sd.c          | 44 ++++++++++---------------------
 drivers/scsi/sr.c          | 19 +++++--------
 include/scsi/scsi_driver.h |  9 +++----
 4 files changed, 62 insertions(+), 76 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 045822befad9..9f841df6add8 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1073,15 +1073,7 @@ static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev,
 
 int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 {
-	struct scsi_cmnd *cmd;
-	int ret = scsi_prep_state_check(sdev, req);
-
-	if (ret != BLKPREP_OK)
-		return ret;
-
-	cmd = scsi_get_cmd_from_req(sdev, req);
-	if (unlikely(!cmd))
-		return BLKPREP_DEFER;
+	struct scsi_cmnd *cmd = req->special;
 
 	/*
 	 * BLOCK_PC requests may transfer data, in which case they must
@@ -1125,15 +1117,11 @@ EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd);
  */
 int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
 {
-	struct scsi_cmnd *cmd;
-	int ret = scsi_prep_state_check(sdev, req);
-
-	if (ret != BLKPREP_OK)
-		return ret;
+	struct scsi_cmnd *cmd = req->special;
 
 	if (unlikely(sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh
 			 && sdev->scsi_dh_data->scsi_dh->prep_fn)) {
-		ret = sdev->scsi_dh_data->scsi_dh->prep_fn(sdev, req);
+		int ret = sdev->scsi_dh_data->scsi_dh->prep_fn(sdev, req);
 		if (ret != BLKPREP_OK)
 			return ret;
 	}
@@ -1143,16 +1131,13 @@ int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
 	 */
 	BUG_ON(!req->nr_phys_segments);
 
-	cmd = scsi_get_cmd_from_req(sdev, req);
-	if (unlikely(!cmd))
-		return BLKPREP_DEFER;
-
 	memset(cmd->cmnd, 0, BLK_MAX_CDB);
 	return scsi_init_io(cmd, GFP_ATOMIC);
 }
 EXPORT_SYMBOL(scsi_setup_fs_cmnd);
 
-int scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
+static int
+scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
 {
 	int ret = BLKPREP_OK;
 
@@ -1204,9 +1189,9 @@ int scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
 	}
 	return ret;
 }
-EXPORT_SYMBOL(scsi_prep_state_check);
 
-int scsi_prep_return(struct request_queue *q, struct request *req, int ret)
+static int
+scsi_prep_return(struct request_queue *q, struct request *req, int ret)
 {
 	struct scsi_device *sdev = q->queuedata;
 
@@ -1237,18 +1222,44 @@ int scsi_prep_return(struct request_queue *q, struct request *req, int ret)
 
 	return ret;
 }
-EXPORT_SYMBOL(scsi_prep_return);
 
-int scsi_prep_fn(struct request_queue *q, struct request *req)
+static int scsi_prep_fn(struct request_queue *q, struct request *req)
 {
 	struct scsi_device *sdev = q->queuedata;
-	int ret = BLKPREP_KILL;
+	struct scsi_cmnd *cmd;
+	int ret;
 
-	if (req->cmd_type == REQ_TYPE_BLOCK_PC)
+	ret = scsi_prep_state_check(sdev, req);
+	if (ret != BLKPREP_OK)
+		goto out;
+
+	cmd = scsi_get_cmd_from_req(sdev, req);
+	if (unlikely(!cmd)) {
+		ret = BLKPREP_DEFER;
+		goto out;
+	}
+
+	if (req->cmd_type == REQ_TYPE_FS)
+		ret = scsi_cmd_to_driver(cmd)->init_command(cmd);
+	else if (req->cmd_type == REQ_TYPE_BLOCK_PC)
 		ret = scsi_setup_blk_pc_cmnd(sdev, req);
+	else
+		ret = BLKPREP_KILL;
+
+out:
 	return scsi_prep_return(q, req, ret);
 }
-EXPORT_SYMBOL(scsi_prep_fn);
+
+static void scsi_unprep_fn(struct request_queue *q, struct request *req)
+{
+	if (req->cmd_type == REQ_TYPE_FS) {
+		struct scsi_cmnd *cmd = req->special;
+		struct scsi_driver *drv = scsi_cmd_to_driver(cmd);
+
+		if (drv->uninit_command)
+			drv->uninit_command(cmd);
+	}
+}
 
 /*
  * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
@@ -1669,6 +1680,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
 		return NULL;
 
 	blk_queue_prep_rq(q, scsi_prep_fn);
+	blk_queue_unprep_rq(q, scsi_unprep_fn);
 	blk_queue_softirq_done(q, scsi_softirq_done);
 	blk_queue_rq_timed_out(q, scsi_times_out);
 	blk_queue_lld_busy(q, scsi_lld_busy);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index efcbcd182863..aa028d8ae774 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -109,6 +109,8 @@ static int sd_suspend_system(struct device *);
 static int sd_suspend_runtime(struct device *);
 static int sd_resume(struct device *);
 static void sd_rescan(struct device *);
+static int sd_init_command(struct scsi_cmnd *SCpnt);
+static void sd_uninit_command(struct scsi_cmnd *SCpnt);
 static int sd_done(struct scsi_cmnd *);
 static int sd_eh_action(struct scsi_cmnd *, int);
 static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer);
@@ -503,6 +505,8 @@ static struct scsi_driver sd_template = {
 		.pm		= &sd_pm_ops,
 	},
 	.rescan			= sd_rescan,
+	.init_command		= sd_init_command,
+	.uninit_command		= sd_uninit_command,
 	.done			= sd_done,
 	.eh_action		= sd_eh_action,
 };
@@ -838,9 +842,9 @@ static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq)
 	return scsi_setup_blk_pc_cmnd(sdp, rq);
 }
 
-static void sd_unprep_fn(struct request_queue *q, struct request *rq)
+static void sd_uninit_command(struct scsi_cmnd *SCpnt)
 {
-	struct scsi_cmnd *SCpnt = rq->special;
+	struct request *rq = SCpnt->request;
 
 	if (rq->cmd_flags & REQ_DISCARD) {
 		free_page((unsigned long)rq->buffer);
@@ -853,18 +857,10 @@ static void sd_unprep_fn(struct request_queue *q, struct request *rq)
 	}
 }
 
-/**
- *	sd_prep_fn - build a scsi (read or write) command from
- *	information in the request structure.
- *	@SCpnt: pointer to mid-level's per scsi command structure that
- *	contains request and into which the scsi command is written
- *
- *	Returns 1 if successful and 0 if error (or cannot be done now).
- **/
-static int sd_prep_fn(struct request_queue *q, struct request *rq)
+static int sd_init_command(struct scsi_cmnd *SCpnt)
 {
-	struct scsi_cmnd *SCpnt;
-	struct scsi_device *sdp = q->queuedata;
+	struct request *rq = SCpnt->request;
+	struct scsi_device *sdp = SCpnt->device;
 	struct gendisk *disk = rq->rq_disk;
 	struct scsi_disk *sdkp;
 	sector_t block = blk_rq_pos(rq);
@@ -886,12 +882,6 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	} else if (rq->cmd_flags & REQ_FLUSH) {
 		ret = scsi_setup_flush_cmnd(sdp, rq);
 		goto out;
-	} else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
-		ret = scsi_setup_blk_pc_cmnd(sdp, rq);
-		goto out;
-	} else if (rq->cmd_type != REQ_TYPE_FS) {
-		ret = BLKPREP_KILL;
-		goto out;
 	}
 	ret = scsi_setup_fs_cmnd(sdp, rq);
 	if (ret != BLKPREP_OK)
@@ -903,11 +893,10 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	 * is used for a killable error condition */
 	ret = BLKPREP_KILL;
 
-	SCSI_LOG_HLQUEUE(1, scmd_printk(KERN_INFO, SCpnt,
-					"sd_prep_fn: block=%llu, "
-					"count=%d\n",
-					(unsigned long long)block,
-					this_count));
+	SCSI_LOG_HLQUEUE(1,
+		scmd_printk(KERN_INFO, SCpnt,
+			"%s: block=%llu, count=%d\n",
+			__func__, (unsigned long long)block, this_count));
 
 	if (!sdp || !scsi_device_online(sdp) ||
 	    block + blk_rq_sectors(rq) > get_capacity(disk)) {
@@ -1127,7 +1116,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	 */
 	ret = BLKPREP_OK;
  out:
-	return scsi_prep_return(q, rq, ret);
+	return ret;
 }
 
 /**
@@ -2878,9 +2867,6 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
 
 	sd_revalidate_disk(gd);
 
-	blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
-	blk_queue_unprep_rq(sdp->request_queue, sd_unprep_fn);
-
 	gd->driverfs_dev = &sdp->sdev_gendev;
 	gd->flags = GENHD_FL_EXT_DEVT;
 	if (sdp->removable) {
@@ -3028,8 +3014,6 @@ static int sd_remove(struct device *dev)
 
 	async_synchronize_full_domain(&scsi_sd_pm_domain);
 	async_synchronize_full_domain(&scsi_sd_probe_domain);
-	blk_queue_prep_rq(sdkp->device->request_queue, scsi_prep_fn);
-	blk_queue_unprep_rq(sdkp->device->request_queue, NULL);
 	device_del(&sdkp->dev);
 	del_gendisk(sdkp->disk);
 	sd_shutdown(dev);
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 40d85929aefe..93cbd36c990b 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -79,6 +79,7 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_WORM);
 static DEFINE_MUTEX(sr_mutex);
 static int sr_probe(struct device *);
 static int sr_remove(struct device *);
+static int sr_init_command(struct scsi_cmnd *SCpnt);
 static int sr_done(struct scsi_cmnd *);
 static int sr_runtime_suspend(struct device *dev);
 
@@ -94,6 +95,7 @@ static struct scsi_driver sr_template = {
 		.remove		= sr_remove,
 		.pm		= &sr_pm_ops,
 	},
+	.init_command		= sr_init_command,
 	.done			= sr_done,
 };
 
@@ -378,21 +380,14 @@ static int sr_done(struct scsi_cmnd *SCpnt)
 	return good_bytes;
 }
 
-static int sr_prep_fn(struct request_queue *q, struct request *rq)
+static int sr_init_command(struct scsi_cmnd *SCpnt)
 {
 	int block = 0, this_count, s_size;
 	struct scsi_cd *cd;
-	struct scsi_cmnd *SCpnt;
-	struct scsi_device *sdp = q->queuedata;
+	struct request *rq = SCpnt->request;
+	struct scsi_device *sdp = SCpnt->device;
 	int ret;
 
-	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
-		ret = scsi_setup_blk_pc_cmnd(sdp, rq);
-		goto out;
-	} else if (rq->cmd_type != REQ_TYPE_FS) {
-		ret = BLKPREP_KILL;
-		goto out;
-	}
 	ret = scsi_setup_fs_cmnd(sdp, rq);
 	if (ret != BLKPREP_OK)
 		goto out;
@@ -517,7 +512,7 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq)
 	 */
 	ret = BLKPREP_OK;
  out:
-	return scsi_prep_return(q, rq, ret);
+	return ret;
 }
 
 static int sr_block_open(struct block_device *bdev, fmode_t mode)
@@ -718,7 +713,6 @@ static int sr_probe(struct device *dev)
 
 	/* FIXME: need to handle a get_capabilities failure properly ?? */
 	get_capabilities(cd);
-	blk_queue_prep_rq(sdev->request_queue, sr_prep_fn);
 	sr_vendor_init(cd);
 
 	disk->driverfs_dev = &sdev->sdev_gendev;
@@ -993,7 +987,6 @@ static int sr_remove(struct device *dev)
 
 	scsi_autopm_get_device(cd->device);
 
-	blk_queue_prep_rq(cd->device->request_queue, scsi_prep_fn);
 	del_gendisk(cd->disk);
 
 	mutex_lock(&sr_ref_mutex);
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index 20fdfc2526ad..36c4114ed9bc 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -4,17 +4,17 @@
 #include <linux/device.h>
 
 struct module;
+struct request;
 struct scsi_cmnd;
 struct scsi_device;
-struct request;
-struct request_queue;
-
 
 struct scsi_driver {
 	struct module		*owner;
 	struct device_driver	gendrv;
 
 	void (*rescan)(struct device *);
+	int (*init_command)(struct scsi_cmnd *);
+	void (*uninit_command)(struct scsi_cmnd *);
 	int (*done)(struct scsi_cmnd *);
 	int (*eh_action)(struct scsi_cmnd *, int);
 };
@@ -31,8 +31,5 @@ extern int scsi_register_interface(struct class_interface *);
 
 int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req);
 int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req);
-int scsi_prep_state_check(struct scsi_device *sdev, struct request *req);
-int scsi_prep_return(struct request_queue *q, struct request *req, int ret);
-int scsi_prep_fn(struct request_queue *, struct request *);
 
 #endif /* _SCSI_SCSI_DRIVER_H */
-- 
cgit v1.2.3-71-gd317


From c2e4323b3316b9daec7824802ca0dd9eae4317c5 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Thu, 15 May 2014 20:18:09 +0300
Subject: cfg80211: add documentation for max_num_csa_counters

Move the comment in the structure to a description of the
max_num_csa_counters field in the docbook area.

This fixes a warning when building htmldocs (at least):

 Warning(include/net/cfg80211.h:3064): No description found for parameter 'max_num_csa_counters'

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 447cb58f0d77..955fdec5a1b6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2936,6 +2936,11 @@ struct wiphy_vendor_command {
  *	(including P2P GO) or 0 to indicate no such limit is advertised. The
  *	driver is allowed to advertise a theoretical limit that it can reach in
  *	some cases, but may not always reach.
+ *
+ * @max_num_csa_counters: Number of supported csa_counters in beacons
+ *	and probe responses.  This value should be set if the driver
+ *	wishes to limit the number of csa counters. Default (0) means
+ *	infinite.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -3053,11 +3058,6 @@ struct wiphy {
 
 	u16 max_ap_assoc_sta;
 
-	/*
-	 * Number of supported csa_counters in beacons and probe responses.
-	 * This value should be set if the driver wishes to limit the number of
-	 * csa counters. Default (0) means infinite.
-	 */
 	u8 max_num_csa_counters;
 
 	char priv[0] __aligned(NETDEV_ALIGN);
-- 
cgit v1.2.3-71-gd317


From 8d77ec856200df31623074de3fde44519df7725b Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Thu, 15 May 2014 20:32:08 +0300
Subject: mac80211: fix csa_counter_offs argument name in docbook

The csa_counter_offs was erroneously described as csa_offs in
the docbook section.

This fixes two warnings when making htmldocs (at least):

Warning(include/net/mac80211.h:3428): No description found for parameter 'csa_counter_offs[IEEE80211_MAX_CSA_COUNTERS_NUM]'
Warning(include/net/mac80211.h:3428): Excess struct/union/enum/typedef member 'csa_offs' description in 'ieee80211_mutable_offsets'

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 982d2cd80166..a34f26a4ed18 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3417,8 +3417,9 @@ void ieee80211_report_low_ack(struct ieee80211_sta *sta, u32 num_packets);
  * struct ieee80211_mutable_offsets - mutable beacon offsets
  * @tim_offset: position of TIM element
  * @tim_length: size of TIM element
- * @csa_offs: array of IEEE80211_MAX_CSA_COUNTERS_NUM offsets to CSA counters.
- *	This array can contain zero values which should be ignored.
+ * @csa_counter_offs: array of IEEE80211_MAX_CSA_COUNTERS_NUM offsets
+ *	to CSA counters.  This array can contain zero values which
+ *	should be ignored.
  */
 struct ieee80211_mutable_offsets {
 	u16 tim_offset;
-- 
cgit v1.2.3-71-gd317


From 3b3a0162fade6b83d5c83efafcd5adb9e4537047 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 19 May 2014 17:19:31 +0200
Subject: cfg80211: constify MAC addresses in cfg80211 ops

This propagates through all the drivers and mac80211.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c         |  7 ++--
 drivers/net/wireless/ath/wil6210/cfg80211.c        |  4 +-
 drivers/net/wireless/ath/wil6210/main.c            |  4 +-
 drivers/net/wireless/ath/wil6210/wil6210.h         |  2 +-
 .../net/wireless/brcm80211/brcmfmac/wl_cfg80211.c  |  6 +--
 drivers/net/wireless/libertas/cfg.c                |  2 +-
 drivers/net/wireless/mwifiex/11n.h                 |  2 +-
 drivers/net/wireless/mwifiex/cfg80211.c            | 19 +++++-----
 drivers/net/wireless/mwifiex/main.h                | 22 +++++------
 drivers/net/wireless/mwifiex/tdls.c                | 44 ++++++++++++----------
 drivers/net/wireless/mwifiex/util.c                |  6 +--
 drivers/net/wireless/mwifiex/wmm.c                 | 10 ++---
 drivers/net/wireless/mwifiex/wmm.h                 |  5 ++-
 drivers/net/wireless/rndis_wlan.c                  |  4 +-
 drivers/staging/wlan-ng/cfg80211.c                 |  2 +-
 include/net/cfg80211.h                             | 29 +++++++-------
 net/mac80211/cfg.c                                 | 24 ++++++------
 net/mac80211/ieee80211_i.h                         |  4 +-
 net/mac80211/tdls.c                                | 12 +++---
 19 files changed, 106 insertions(+), 102 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index c2c6f4604958..1fffbde4b01e 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1759,7 +1759,7 @@ static bool is_rate_ht40(s32 rate, u8 *mcs, bool *sgi)
 }
 
 static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
-			      u8 *mac, struct station_info *sinfo)
+			      const u8 *mac, struct station_info *sinfo)
 {
 	struct ath6kl *ar = ath6kl_priv(dev);
 	struct ath6kl_vif *vif = netdev_priv(dev);
@@ -2975,7 +2975,7 @@ static int ath6kl_stop_ap(struct wiphy *wiphy, struct net_device *dev)
 static const u8 bcast_addr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static int ath6kl_del_station(struct wiphy *wiphy, struct net_device *dev,
-			      u8 *mac)
+			      const u8 *mac)
 {
 	struct ath6kl *ar = ath6kl_priv(dev);
 	struct ath6kl_vif *vif = netdev_priv(dev);
@@ -2986,7 +2986,8 @@ static int ath6kl_del_station(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int ath6kl_change_station(struct wiphy *wiphy, struct net_device *dev,
-				 u8 *mac, struct station_parameters *params)
+				 const u8 *mac,
+				 struct station_parameters *params)
 {
 	struct ath6kl *ar = ath6kl_priv(dev);
 	struct ath6kl_vif *vif = netdev_priv(dev);
diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 4806a49cb61b..6e699d050d1e 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -172,7 +172,7 @@ static int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
 
 static int wil_cfg80211_get_station(struct wiphy *wiphy,
 				    struct net_device *ndev,
-				    u8 *mac, struct station_info *sinfo)
+				    const u8 *mac, struct station_info *sinfo)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 	int rc;
@@ -671,7 +671,7 @@ static int wil_cfg80211_stop_ap(struct wiphy *wiphy,
 }
 
 static int wil_cfg80211_del_station(struct wiphy *wiphy,
-				    struct net_device *dev, u8 *mac)
+				    struct net_device *dev, const u8 *mac)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 
diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index 95f4efe9ef37..a6b1d5872786 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c
@@ -81,7 +81,7 @@ static void wil_disconnect_cid(struct wil6210_priv *wil, int cid)
 	memset(&sta->stats, 0, sizeof(sta->stats));
 }
 
-static void _wil6210_disconnect(struct wil6210_priv *wil, void *bssid)
+static void _wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid)
 {
 	int cid = -ENOENT;
 	struct net_device *ndev = wil_to_ndev(wil);
@@ -252,7 +252,7 @@ int wil_priv_init(struct wil6210_priv *wil)
 	return 0;
 }
 
-void wil6210_disconnect(struct wil6210_priv *wil, void *bssid)
+void wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid)
 {
 	del_timer_sync(&wil->connect_timer);
 	_wil6210_disconnect(wil, bssid);
diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index 2a2dec75f026..6b353be64579 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -507,7 +507,7 @@ void wil_wdev_free(struct wil6210_priv *wil);
 int wmi_set_mac_address(struct wil6210_priv *wil, void *addr);
 int wmi_pcp_start(struct wil6210_priv *wil, int bi, u8 wmi_nettype, u8 chan);
 int wmi_pcp_stop(struct wil6210_priv *wil);
-void wil6210_disconnect(struct wil6210_priv *wil, void *bssid);
+void wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid);
 
 int wil_rx_init(struct wil6210_priv *wil);
 void wil_rx_fini(struct wil6210_priv *wil);
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c
index afb3d15e38ff..befa9c04166d 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c
@@ -2182,7 +2182,7 @@ brcmf_cfg80211_config_default_mgmt_key(struct wiphy *wiphy,
 
 static s32
 brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
-			   u8 *mac, struct station_info *sinfo)
+			   const u8 *mac, struct station_info *sinfo)
 {
 	struct brcmf_if *ifp = netdev_priv(ndev);
 	struct brcmf_cfg80211_profile *profile = &ifp->vif->profile;
@@ -3975,7 +3975,7 @@ brcmf_cfg80211_change_beacon(struct wiphy *wiphy, struct net_device *ndev,
 
 static int
 brcmf_cfg80211_del_station(struct wiphy *wiphy, struct net_device *ndev,
-			   u8 *mac)
+			   const u8 *mac)
 {
 	struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy);
 	struct brcmf_scb_val_le scbval;
@@ -4203,7 +4203,7 @@ static int brcmf_convert_nl80211_tdls_oper(enum nl80211_tdls_operation oper)
 }
 
 static int brcmf_cfg80211_tdls_oper(struct wiphy *wiphy,
-				    struct net_device *ndev, u8 *peer,
+				    struct net_device *ndev, const u8 *peer,
 				    enum nl80211_tdls_operation oper)
 {
 	struct brcmf_if *ifp;
diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index 54e344aed6e0..e4d7031df05c 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c
@@ -1610,7 +1610,7 @@ static int lbs_cfg_del_key(struct wiphy *wiphy, struct net_device *netdev,
  */
 
 static int lbs_cfg_get_station(struct wiphy *wiphy, struct net_device *dev,
-			      u8 *mac, struct station_info *sinfo)
+			       const u8 *mac, struct station_info *sinfo)
 {
 	struct lbs_private *priv = wiphy_priv(wiphy);
 	s8 signal, noise;
diff --git a/drivers/net/wireless/mwifiex/11n.h b/drivers/net/wireless/mwifiex/11n.h
index 40b007a00f4b..0a46aab24214 100644
--- a/drivers/net/wireless/mwifiex/11n.h
+++ b/drivers/net/wireless/mwifiex/11n.h
@@ -199,7 +199,7 @@ static inline int mwifiex_is_sta_11n_enabled(struct mwifiex_private *priv,
 }
 
 static inline u8
-mwifiex_tdls_peer_11n_enabled(struct mwifiex_private *priv, u8 *ra)
+mwifiex_tdls_peer_11n_enabled(struct mwifiex_private *priv, const u8 *ra)
 {
 	struct mwifiex_sta_node *node = mwifiex_get_sta_entry(priv, ra);
 	if (node)
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 21ee27ab7b74..e95dec91a561 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -994,7 +994,7 @@ mwifiex_dump_station_info(struct mwifiex_private *priv,
  */
 static int
 mwifiex_cfg80211_get_station(struct wiphy *wiphy, struct net_device *dev,
-			     u8 *mac, struct station_info *sinfo)
+			     const u8 *mac, struct station_info *sinfo)
 {
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
 
@@ -1270,7 +1270,7 @@ static int mwifiex_cfg80211_change_beacon(struct wiphy *wiphy,
  */
 static int
 mwifiex_cfg80211_del_station(struct wiphy *wiphy, struct net_device *dev,
-			     u8 *mac)
+			     const u8 *mac)
 {
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
 	struct mwifiex_sta_node *sta_node;
@@ -2629,7 +2629,7 @@ static int mwifiex_cfg80211_set_coalesce(struct wiphy *wiphy,
  */
 static int
 mwifiex_cfg80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
-			   u8 *peer, u8 action_code, u8 dialog_token,
+			   const u8 *peer, u8 action_code, u8 dialog_token,
 			   u16 status_code, u32 peer_capability,
 			   const u8 *extra_ies, size_t extra_ies_len)
 {
@@ -2701,7 +2701,7 @@ mwifiex_cfg80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
 
 static int
 mwifiex_cfg80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
-			   u8 *peer, enum nl80211_tdls_operation action)
+			   const u8 *peer, enum nl80211_tdls_operation action)
 {
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
 
@@ -2748,9 +2748,8 @@ mwifiex_cfg80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int
-mwifiex_cfg80211_add_station(struct wiphy *wiphy,
-			     struct net_device *dev,
-			     u8 *mac, struct station_parameters *params)
+mwifiex_cfg80211_add_station(struct wiphy *wiphy, struct net_device *dev,
+			     const u8 *mac, struct station_parameters *params)
 {
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
 
@@ -2765,9 +2764,9 @@ mwifiex_cfg80211_add_station(struct wiphy *wiphy,
 }
 
 static int
-mwifiex_cfg80211_change_station(struct wiphy *wiphy,
-				struct net_device *dev,
-				u8 *mac, struct station_parameters *params)
+mwifiex_cfg80211_change_station(struct wiphy *wiphy, struct net_device *dev,
+				const u8 *mac,
+				struct station_parameters *params)
 {
 	int ret;
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
diff --git a/drivers/net/wireless/mwifiex/main.h b/drivers/net/wireless/mwifiex/main.h
index d53e1e8c9467..86c6b8cdec22 100644
--- a/drivers/net/wireless/mwifiex/main.h
+++ b/drivers/net/wireless/mwifiex/main.h
@@ -910,8 +910,6 @@ int mwifiex_handle_uap_rx_forward(struct mwifiex_private *priv,
 				  struct sk_buff *skb);
 int mwifiex_process_sta_event(struct mwifiex_private *);
 int mwifiex_process_uap_event(struct mwifiex_private *);
-struct mwifiex_sta_node *
-mwifiex_get_sta_entry(struct mwifiex_private *priv, u8 *mac);
 void mwifiex_delete_all_station_list(struct mwifiex_private *priv);
 void *mwifiex_process_sta_txpd(struct mwifiex_private *, struct sk_buff *skb);
 void *mwifiex_process_uap_txpd(struct mwifiex_private *, struct sk_buff *skb);
@@ -1220,26 +1218,26 @@ void mwifiex_dnld_txpwr_table(struct mwifiex_private *priv);
 extern const struct ethtool_ops mwifiex_ethtool_ops;
 
 void mwifiex_del_all_sta_list(struct mwifiex_private *priv);
-void mwifiex_del_sta_entry(struct mwifiex_private *priv, u8 *mac);
+void mwifiex_del_sta_entry(struct mwifiex_private *priv, const u8 *mac);
 void
 mwifiex_set_sta_ht_cap(struct mwifiex_private *priv, const u8 *ies,
 		       int ies_len, struct mwifiex_sta_node *node);
 struct mwifiex_sta_node *
-mwifiex_add_sta_entry(struct mwifiex_private *priv, u8 *mac);
+mwifiex_add_sta_entry(struct mwifiex_private *priv, const u8 *mac);
 struct mwifiex_sta_node *
-mwifiex_get_sta_entry(struct mwifiex_private *priv, u8 *mac);
-int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, u8 *peer,
+mwifiex_get_sta_entry(struct mwifiex_private *priv, const u8 *mac);
+int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, const u8 *peer,
 				 u8 action_code, u8 dialog_token,
 				 u16 status_code, const u8 *extra_ies,
 				 size_t extra_ies_len);
-int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv,
-				 u8 *peer, u8 action_code, u8 dialog_token,
-				 u16 status_code, const u8 *extra_ies,
-				 size_t extra_ies_len);
+int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv, const u8 *peer,
+				   u8 action_code, u8 dialog_token,
+				   u16 status_code, const u8 *extra_ies,
+				   size_t extra_ies_len);
 void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv,
 				       u8 *buf, int len);
-int mwifiex_tdls_oper(struct mwifiex_private *priv, u8 *peer, u8 action);
-int mwifiex_get_tdls_link_status(struct mwifiex_private *priv, u8 *mac);
+int mwifiex_tdls_oper(struct mwifiex_private *priv, const u8 *peer, u8 action);
+int mwifiex_get_tdls_link_status(struct mwifiex_private *priv, const u8 *mac);
 void mwifiex_disable_all_tdls_links(struct mwifiex_private *priv);
 bool mwifiex_is_bss_in_11ac_mode(struct mwifiex_private *priv);
 u8 mwifiex_get_center_freq_index(struct mwifiex_private *priv, u8 band,
diff --git a/drivers/net/wireless/mwifiex/tdls.c b/drivers/net/wireless/mwifiex/tdls.c
index 97662a1ba58c..49c8a2c64eeb 100644
--- a/drivers/net/wireless/mwifiex/tdls.c
+++ b/drivers/net/wireless/mwifiex/tdls.c
@@ -25,8 +25,8 @@
 #define TDLS_RESP_FIX_LEN     8
 #define TDLS_CONFIRM_FIX_LEN  6
 
-static void
-mwifiex_restore_tdls_packets(struct mwifiex_private *priv, u8 *mac, u8 status)
+static void mwifiex_restore_tdls_packets(struct mwifiex_private *priv,
+					 const u8 *mac, u8 status)
 {
 	struct mwifiex_ra_list_tbl *ra_list;
 	struct list_head *tid_list;
@@ -84,7 +84,8 @@ mwifiex_restore_tdls_packets(struct mwifiex_private *priv, u8 *mac, u8 status)
 	return;
 }
 
-static void mwifiex_hold_tdls_packets(struct mwifiex_private *priv, u8 *mac)
+static void mwifiex_hold_tdls_packets(struct mwifiex_private *priv,
+				      const u8 *mac)
 {
 	struct mwifiex_ra_list_tbl *ra_list;
 	struct list_head *ra_list_head;
@@ -186,7 +187,7 @@ static int mwifiex_tdls_add_vht_capab(struct mwifiex_private *priv,
 }
 
 static int mwifiex_tdls_add_vht_oper(struct mwifiex_private *priv,
-				     u8 *mac, struct sk_buff *skb)
+				     const u8 *mac, struct sk_buff *skb)
 {
 	struct mwifiex_bssdescriptor *bss_desc;
 	struct ieee80211_vht_operation *vht_oper;
@@ -325,8 +326,9 @@ static void mwifiex_tdls_add_qos_capab(struct sk_buff *skb)
 }
 
 static int mwifiex_prep_tdls_encap_data(struct mwifiex_private *priv,
-			     u8 *peer, u8 action_code, u8 dialog_token,
-			     u16 status_code, struct sk_buff *skb)
+					const u8 *peer, u8 action_code,
+					u8 dialog_token,
+					u16 status_code, struct sk_buff *skb)
 {
 	struct ieee80211_tdls_data *tf;
 	int ret;
@@ -453,7 +455,8 @@ static int mwifiex_prep_tdls_encap_data(struct mwifiex_private *priv,
 }
 
 static void
-mwifiex_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr, u8 *peer, u8 *bssid)
+mwifiex_tdls_add_link_ie(struct sk_buff *skb, const u8 *src_addr,
+			 const u8 *peer, const u8 *bssid)
 {
 	struct ieee80211_tdls_lnkie *lnkid;
 
@@ -467,8 +470,8 @@ mwifiex_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr, u8 *peer, u8 *bssid)
 	memcpy(lnkid->resp_sta, peer, ETH_ALEN);
 }
 
-int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv,
-				 u8 *peer, u8 action_code, u8 dialog_token,
+int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv, const u8 *peer,
+				 u8 action_code, u8 dialog_token,
 				 u16 status_code, const u8 *extra_ies,
 				 size_t extra_ies_len)
 {
@@ -560,7 +563,8 @@ int mwifiex_send_tdls_data_frame(struct mwifiex_private *priv,
 }
 
 static int
-mwifiex_construct_tdls_action_frame(struct mwifiex_private *priv, u8 *peer,
+mwifiex_construct_tdls_action_frame(struct mwifiex_private *priv,
+				    const u8 *peer,
 				    u8 action_code, u8 dialog_token,
 				    u16 status_code, struct sk_buff *skb)
 {
@@ -638,10 +642,10 @@ mwifiex_construct_tdls_action_frame(struct mwifiex_private *priv, u8 *peer,
 	return 0;
 }
 
-int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv,
-				 u8 *peer, u8 action_code, u8 dialog_token,
-				 u16 status_code, const u8 *extra_ies,
-				 size_t extra_ies_len)
+int mwifiex_send_tdls_action_frame(struct mwifiex_private *priv, const u8 *peer,
+				   u8 action_code, u8 dialog_token,
+				   u16 status_code, const u8 *extra_ies,
+				   size_t extra_ies_len)
 {
 	struct sk_buff *skb;
 	struct mwifiex_txinfo *tx_info;
@@ -848,7 +852,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv,
 }
 
 static int
-mwifiex_tdls_process_config_link(struct mwifiex_private *priv, u8 *peer)
+mwifiex_tdls_process_config_link(struct mwifiex_private *priv, const u8 *peer)
 {
 	struct mwifiex_sta_node *sta_ptr;
 	struct mwifiex_ds_tdls_oper tdls_oper;
@@ -869,7 +873,7 @@ mwifiex_tdls_process_config_link(struct mwifiex_private *priv, u8 *peer)
 }
 
 static int
-mwifiex_tdls_process_create_link(struct mwifiex_private *priv, u8 *peer)
+mwifiex_tdls_process_create_link(struct mwifiex_private *priv, const u8 *peer)
 {
 	struct mwifiex_sta_node *sta_ptr;
 	struct mwifiex_ds_tdls_oper tdls_oper;
@@ -896,7 +900,7 @@ mwifiex_tdls_process_create_link(struct mwifiex_private *priv, u8 *peer)
 }
 
 static int
-mwifiex_tdls_process_disable_link(struct mwifiex_private *priv, u8 *peer)
+mwifiex_tdls_process_disable_link(struct mwifiex_private *priv, const u8 *peer)
 {
 	struct mwifiex_sta_node *sta_ptr;
 	struct mwifiex_ds_tdls_oper tdls_oper;
@@ -925,7 +929,7 @@ mwifiex_tdls_process_disable_link(struct mwifiex_private *priv, u8 *peer)
 }
 
 static int
-mwifiex_tdls_process_enable_link(struct mwifiex_private *priv, u8 *peer)
+mwifiex_tdls_process_enable_link(struct mwifiex_private *priv, const u8 *peer)
 {
 	struct mwifiex_sta_node *sta_ptr;
 	struct ieee80211_mcs_info mcs;
@@ -982,7 +986,7 @@ mwifiex_tdls_process_enable_link(struct mwifiex_private *priv, u8 *peer)
 	return 0;
 }
 
-int mwifiex_tdls_oper(struct mwifiex_private *priv, u8 *peer, u8 action)
+int mwifiex_tdls_oper(struct mwifiex_private *priv, const u8 *peer, u8 action)
 {
 	switch (action) {
 	case MWIFIEX_TDLS_ENABLE_LINK:
@@ -997,7 +1001,7 @@ int mwifiex_tdls_oper(struct mwifiex_private *priv, u8 *peer, u8 action)
 	return 0;
 }
 
-int mwifiex_get_tdls_link_status(struct mwifiex_private *priv, u8 *mac)
+int mwifiex_get_tdls_link_status(struct mwifiex_private *priv, const u8 *mac)
 {
 	struct mwifiex_sta_node *sta_ptr;
 
diff --git a/drivers/net/wireless/mwifiex/util.c b/drivers/net/wireless/mwifiex/util.c
index c3824e37f3f2..6da5abf52e61 100644
--- a/drivers/net/wireless/mwifiex/util.c
+++ b/drivers/net/wireless/mwifiex/util.c
@@ -259,7 +259,7 @@ int mwifiex_complete_cmd(struct mwifiex_adapter *adapter,
  * NULL is returned if station entry is not found in associated STA list.
  */
 struct mwifiex_sta_node *
-mwifiex_get_sta_entry(struct mwifiex_private *priv, u8 *mac)
+mwifiex_get_sta_entry(struct mwifiex_private *priv, const u8 *mac)
 {
 	struct mwifiex_sta_node *node;
 
@@ -280,7 +280,7 @@ mwifiex_get_sta_entry(struct mwifiex_private *priv, u8 *mac)
  * If received mac address is NULL, NULL is returned.
  */
 struct mwifiex_sta_node *
-mwifiex_add_sta_entry(struct mwifiex_private *priv, u8 *mac)
+mwifiex_add_sta_entry(struct mwifiex_private *priv, const u8 *mac)
 {
 	struct mwifiex_sta_node *node;
 	unsigned long flags;
@@ -332,7 +332,7 @@ mwifiex_set_sta_ht_cap(struct mwifiex_private *priv, const u8 *ies,
 }
 
 /* This function will delete a station entry from station list */
-void mwifiex_del_sta_entry(struct mwifiex_private *priv, u8 *mac)
+void mwifiex_del_sta_entry(struct mwifiex_private *priv, const u8 *mac)
 {
 	struct mwifiex_sta_node *node;
 	unsigned long flags;
diff --git a/drivers/net/wireless/mwifiex/wmm.c b/drivers/net/wireless/mwifiex/wmm.c
index 0a7cc742aed7..c6bc5b3191d6 100644
--- a/drivers/net/wireless/mwifiex/wmm.c
+++ b/drivers/net/wireless/mwifiex/wmm.c
@@ -92,7 +92,7 @@ mwifiex_wmm_ac_debug_print(const struct ieee_types_wmm_ac_parameters *ac_param)
  * The function also initializes the list with the provided RA.
  */
 static struct mwifiex_ra_list_tbl *
-mwifiex_wmm_allocate_ralist_node(struct mwifiex_adapter *adapter, u8 *ra)
+mwifiex_wmm_allocate_ralist_node(struct mwifiex_adapter *adapter, const u8 *ra)
 {
 	struct mwifiex_ra_list_tbl *ra_list;
 
@@ -139,8 +139,7 @@ static u8 mwifiex_get_random_ba_threshold(void)
  * This function allocates and adds a RA list for all TIDs
  * with the given RA.
  */
-void
-mwifiex_ralist_add(struct mwifiex_private *priv, u8 *ra)
+void mwifiex_ralist_add(struct mwifiex_private *priv, const u8 *ra)
 {
 	int i;
 	struct mwifiex_ra_list_tbl *ra_list;
@@ -575,7 +574,7 @@ mwifiex_clean_txrx(struct mwifiex_private *priv)
  */
 static struct mwifiex_ra_list_tbl *
 mwifiex_wmm_get_ralist_node(struct mwifiex_private *priv, u8 tid,
-			    u8 *ra_addr)
+			    const u8 *ra_addr)
 {
 	struct mwifiex_ra_list_tbl *ra_list;
 
@@ -596,7 +595,8 @@ mwifiex_wmm_get_ralist_node(struct mwifiex_private *priv, u8 tid,
  * retrieved.
  */
 struct mwifiex_ra_list_tbl *
-mwifiex_wmm_get_queue_raptr(struct mwifiex_private *priv, u8 tid, u8 *ra_addr)
+mwifiex_wmm_get_queue_raptr(struct mwifiex_private *priv, u8 tid,
+			    const u8 *ra_addr)
 {
 	struct mwifiex_ra_list_tbl *ra_list;
 
diff --git a/drivers/net/wireless/mwifiex/wmm.h b/drivers/net/wireless/mwifiex/wmm.h
index 83e42083ebff..eca56e371a57 100644
--- a/drivers/net/wireless/mwifiex/wmm.h
+++ b/drivers/net/wireless/mwifiex/wmm.h
@@ -99,7 +99,7 @@ mwifiex_wmm_is_ra_list_empty(struct list_head *ra_list_hhead)
 
 void mwifiex_wmm_add_buf_txqueue(struct mwifiex_private *priv,
 				 struct sk_buff *skb);
-void mwifiex_ralist_add(struct mwifiex_private *priv, u8 *ra);
+void mwifiex_ralist_add(struct mwifiex_private *priv, const u8 *ra);
 void mwifiex_rotate_priolists(struct mwifiex_private *priv,
 			      struct mwifiex_ra_list_tbl *ra, int tid);
 
@@ -123,7 +123,8 @@ void mwifiex_wmm_setup_ac_downgrade(struct mwifiex_private *priv);
 int mwifiex_ret_wmm_get_status(struct mwifiex_private *priv,
 			       const struct host_cmd_ds_command *resp);
 struct mwifiex_ra_list_tbl *
-mwifiex_wmm_get_queue_raptr(struct mwifiex_private *priv, u8 tid, u8 *ra_addr);
+mwifiex_wmm_get_queue_raptr(struct mwifiex_private *priv, u8 tid,
+			    const u8 *ra_addr);
 u8 mwifiex_wmm_downgrade_tid(struct mwifiex_private *priv, u32 tid);
 
 #endif /* !_MWIFIEX_WMM_H_ */
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 39d22a154341..d2a9a08210be 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -517,7 +517,7 @@ static int rndis_set_default_key(struct wiphy *wiphy, struct net_device *netdev,
 				 u8 key_index, bool unicast, bool multicast);
 
 static int rndis_get_station(struct wiphy *wiphy, struct net_device *dev,
-					u8 *mac, struct station_info *sinfo);
+			     const u8 *mac, struct station_info *sinfo);
 
 static int rndis_dump_station(struct wiphy *wiphy, struct net_device *dev,
 			       int idx, u8 *mac, struct station_info *sinfo);
@@ -2490,7 +2490,7 @@ static void rndis_fill_station_info(struct usbnet *usbdev,
 }
 
 static int rndis_get_station(struct wiphy *wiphy, struct net_device *dev,
-					u8 *mac, struct station_info *sinfo)
+			     const u8 *mac, struct station_info *sinfo)
 {
 	struct rndis_wlan_private *priv = wiphy_priv(wiphy);
 	struct usbnet *usbdev = priv->usbdev;
diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c
index f76f95c29617..f7eb8163d4fb 100644
--- a/drivers/staging/wlan-ng/cfg80211.c
+++ b/drivers/staging/wlan-ng/cfg80211.c
@@ -298,7 +298,7 @@ static int prism2_set_default_key(struct wiphy *wiphy, struct net_device *dev,
 
 
 static int prism2_get_station(struct wiphy *wiphy, struct net_device *dev,
-			      u8 *mac, struct station_info *sinfo)
+			      const u8 *mac, struct station_info *sinfo)
 {
 	wlandevice_t *wlandev = dev->ml_priv;
 	struct p80211msg_lnxreq_commsquality quality;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 955fdec5a1b6..d4a602b92edf 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2345,28 +2345,29 @@ struct cfg80211_ops {
 
 
 	int	(*add_station)(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *mac, struct station_parameters *params);
+			       const u8 *mac,
+			       struct station_parameters *params);
 	int	(*del_station)(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *mac);
+			       const u8 *mac);
 	int	(*change_station)(struct wiphy *wiphy, struct net_device *dev,
-				  u8 *mac, struct station_parameters *params);
+				  const u8 *mac,
+				  struct station_parameters *params);
 	int	(*get_station)(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *mac, struct station_info *sinfo);
+			       const u8 *mac, struct station_info *sinfo);
 	int	(*dump_station)(struct wiphy *wiphy, struct net_device *dev,
-			       int idx, u8 *mac, struct station_info *sinfo);
+				int idx, u8 *mac, struct station_info *sinfo);
 
 	int	(*add_mpath)(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *dst, u8 *next_hop);
+			       const u8 *dst, const u8 *next_hop);
 	int	(*del_mpath)(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *dst);
+			       const u8 *dst);
 	int	(*change_mpath)(struct wiphy *wiphy, struct net_device *dev,
-				  u8 *dst, u8 *next_hop);
+				  const u8 *dst, const u8 *next_hop);
 	int	(*get_mpath)(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *dst, u8 *next_hop,
-			       struct mpath_info *pinfo);
+			     u8 *dst, u8 *next_hop, struct mpath_info *pinfo);
 	int	(*dump_mpath)(struct wiphy *wiphy, struct net_device *dev,
-			       int idx, u8 *dst, u8 *next_hop,
-			       struct mpath_info *pinfo);
+			      int idx, u8 *dst, u8 *next_hop,
+			      struct mpath_info *pinfo);
 	int	(*get_mesh_config)(struct wiphy *wiphy,
 				struct net_device *dev,
 				struct mesh_config *conf);
@@ -2496,11 +2497,11 @@ struct cfg80211_ops {
 				  struct cfg80211_gtk_rekey_data *data);
 
 	int	(*tdls_mgmt)(struct wiphy *wiphy, struct net_device *dev,
-			     u8 *peer, u8 action_code,  u8 dialog_token,
+			     const u8 *peer, u8 action_code,  u8 dialog_token,
 			     u16 status_code, u32 peer_capability,
 			     const u8 *buf, size_t len);
 	int	(*tdls_oper)(struct wiphy *wiphy, struct net_device *dev,
-			     u8 *peer, enum nl80211_tdls_operation oper);
+			     const u8 *peer, enum nl80211_tdls_operation oper);
 
 	int	(*probe_client)(struct wiphy *wiphy, struct net_device *dev,
 				const u8 *peer, u64 *cookie);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index bfd2534e5a4d..ac45304590d8 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -777,7 +777,7 @@ static void ieee80211_get_et_strings(struct wiphy *wiphy,
 }
 
 static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
-				 int idx, u8 *mac, struct station_info *sinfo)
+				  int idx, u8 *mac, struct station_info *sinfo)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
@@ -807,7 +807,7 @@ static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
-				 u8 *mac, struct station_info *sinfo)
+				 const u8 *mac, struct station_info *sinfo)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
@@ -1457,7 +1457,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 }
 
 static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
-				 u8 *mac, struct station_parameters *params)
+				 const u8 *mac,
+				 struct station_parameters *params)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct sta_info *sta;
@@ -1526,7 +1527,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
-				 u8 *mac)
+				 const u8 *mac)
 {
 	struct ieee80211_sub_if_data *sdata;
 
@@ -1540,7 +1541,7 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int ieee80211_change_station(struct wiphy *wiphy,
-				    struct net_device *dev, u8 *mac,
+				    struct net_device *dev, const u8 *mac,
 				    struct station_parameters *params)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1665,7 +1666,7 @@ out_err:
 
 #ifdef CONFIG_MAC80211_MESH
 static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
-				 u8 *dst, u8 *next_hop)
+			       const u8 *dst, const u8 *next_hop)
 {
 	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
@@ -1693,7 +1694,7 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *dst)
+			       const u8 *dst)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
@@ -1704,9 +1705,8 @@ static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev,
 	return 0;
 }
 
-static int ieee80211_change_mpath(struct wiphy *wiphy,
-				    struct net_device *dev,
-				    u8 *dst, u8 *next_hop)
+static int ieee80211_change_mpath(struct wiphy *wiphy, struct net_device *dev,
+				  const u8 *dst, const u8 *next_hop)
 {
 	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
@@ -1798,8 +1798,8 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
-				 int idx, u8 *dst, u8 *next_hop,
-				 struct mpath_info *pinfo)
+				int idx, u8 *dst, u8 *next_hop,
+				struct mpath_info *pinfo)
 {
 	struct ieee80211_sub_if_data *sdata;
 	struct mesh_path *mpath;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 57e0b2682cef..ed2b817d5ece 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1839,11 +1839,11 @@ int ieee80211_max_num_channels(struct ieee80211_local *local);
 
 /* TDLS */
 int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
-			u8 *peer, u8 action_code, u8 dialog_token,
+			const u8 *peer, u8 action_code, u8 dialog_token,
 			u16 status_code, u32 peer_capability,
 			const u8 *extra_ies, size_t extra_ies_len);
 int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
-			u8 *peer, enum nl80211_tdls_operation oper);
+			const u8 *peer, enum nl80211_tdls_operation oper);
 
 
 #ifdef CONFIG_MAC80211_NOINLINE
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 8e14e2aaea11..652813b2d3df 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -40,8 +40,8 @@ static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata)
 	return capab;
 }
 
-static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr,
-				       u8 *peer, u8 *bssid)
+static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, const u8 *src_addr,
+				       const u8 *peer, const u8 *bssid)
 {
 	struct ieee80211_tdls_lnkie *lnkid;
 
@@ -57,7 +57,7 @@ static void ieee80211_tdls_add_link_ie(struct sk_buff *skb, u8 *src_addr,
 
 static int
 ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
-			       u8 *peer, u8 action_code, u8 dialog_token,
+			       const u8 *peer, u8 action_code, u8 dialog_token,
 			       u16 status_code, struct sk_buff *skb)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -130,7 +130,7 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
 
 static int
 ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
-			   u8 *peer, u8 action_code, u8 dialog_token,
+			   const u8 *peer, u8 action_code, u8 dialog_token,
 			   u16 status_code, struct sk_buff *skb)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -169,7 +169,7 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
 }
 
 int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
-			u8 *peer, u8 action_code, u8 dialog_token,
+			const u8 *peer, u8 action_code, u8 dialog_token,
 			u16 status_code, u32 peer_capability,
 			const u8 *extra_ies, size_t extra_ies_len)
 {
@@ -285,7 +285,7 @@ fail:
 }
 
 int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
-			u8 *peer, enum nl80211_tdls_operation oper)
+			const u8 *peer, enum nl80211_tdls_operation oper)
 {
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-- 
cgit v1.2.3-71-gd317


From c1e5f4714d591cc0a5e986613fdefa61abe98ac2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 19 May 2014 17:53:16 +0200
Subject: cfg80211: constify more pointers in the cfg80211 API

This also propagates through the drivers.

The orinoco driver uses the cfg80211 API structs for internal
bookkeeping, and so needs a (void *) cast that removes the
const - but that's OK because it allocates those pointers.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath6kl/wmi.c |  2 +-
 drivers/net/wireless/ath/ath6kl/wmi.h |  2 +-
 drivers/net/wireless/libertas/cfg.c   |  5 ++---
 drivers/net/wireless/libertas/defs.h  |  3 ++-
 drivers/net/wireless/orinoco/hw.c     |  4 ++--
 drivers/net/wireless/orinoco/hw.h     |  4 ++--
 drivers/net/wireless/orinoco/wext.c   |  4 ++--
 drivers/staging/wlan-ng/cfg80211.c    |  2 +-
 include/net/cfg80211.h                | 23 ++++++++++++-----------
 net/wireless/ibss.c                   |  2 +-
 net/wireless/sme.c                    |  2 +-
 net/wireless/util.c                   |  3 ++-
 12 files changed, 29 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c
index 8b4ce28e3ce8..051094604e21 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.c
+++ b/drivers/net/wireless/ath/ath6kl/wmi.c
@@ -2322,7 +2322,7 @@ int ath6kl_wmi_addkey_cmd(struct wmi *wmi, u8 if_idx, u8 key_index,
 	return ret;
 }
 
-int ath6kl_wmi_add_krk_cmd(struct wmi *wmi, u8 if_idx, u8 *krk)
+int ath6kl_wmi_add_krk_cmd(struct wmi *wmi, u8 if_idx, const u8 *krk)
 {
 	struct sk_buff *skb;
 	struct wmi_add_krk_cmd *cmd;
diff --git a/drivers/net/wireless/ath/ath6kl/wmi.h b/drivers/net/wireless/ath/ath6kl/wmi.h
index b5f226503baf..39ee35e60c1d 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.h
+++ b/drivers/net/wireless/ath/ath6kl/wmi.h
@@ -2617,7 +2617,7 @@ int ath6kl_wmi_addkey_cmd(struct wmi *wmi, u8 if_idx, u8 key_index,
 			  u8 *key_material,
 			  u8 key_op_ctrl, u8 *mac_addr,
 			  enum wmi_sync_flag sync_flag);
-int ath6kl_wmi_add_krk_cmd(struct wmi *wmi, u8 if_idx, u8 *krk);
+int ath6kl_wmi_add_krk_cmd(struct wmi *wmi, u8 if_idx, const u8 *krk);
 int ath6kl_wmi_deletekey_cmd(struct wmi *wmi, u8 if_idx, u8 key_index);
 int ath6kl_wmi_setpmkid_cmd(struct wmi *wmi, u8 if_idx, const u8 *bssid,
 			    const u8 *pmkid, bool set);
diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index e4d7031df05c..47a998d8f99e 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c
@@ -1006,9 +1006,8 @@ struct cmd_key_material {
 } __packed;
 
 static int lbs_set_key_material(struct lbs_private *priv,
-				int key_type,
-				int key_info,
-				u8 *key, u16 key_len)
+				int key_type, int key_info,
+				const u8 *key, u16 key_len)
 {
 	struct cmd_key_material cmd;
 	int ret;
diff --git a/drivers/net/wireless/libertas/defs.h b/drivers/net/wireless/libertas/defs.h
index ab966f08024a..407784aca627 100644
--- a/drivers/net/wireless/libertas/defs.h
+++ b/drivers/net/wireless/libertas/defs.h
@@ -90,7 +90,8 @@ do { if ((lbs_debug & (grp)) == (grp)) \
 #define lbs_deb_cfg80211(fmt, args...)  LBS_DEB_LL(LBS_DEB_CFG80211, " cfg80211", fmt, ##args)
 
 #ifdef DEBUG
-static inline void lbs_deb_hex(unsigned int grp, const char *prompt, u8 *buf, int len)
+static inline void lbs_deb_hex(unsigned int grp, const char *prompt,
+			       const u8 *buf, int len)
 {
 	int i = 0;
 
diff --git a/drivers/net/wireless/orinoco/hw.c b/drivers/net/wireless/orinoco/hw.c
index 49300d04efdf..e27e32851f1e 100644
--- a/drivers/net/wireless/orinoco/hw.c
+++ b/drivers/net/wireless/orinoco/hw.c
@@ -988,8 +988,8 @@ int __orinoco_hw_setup_enc(struct orinoco_private *priv)
  * tsc must be NULL or up to 8 bytes
  */
 int __orinoco_hw_set_tkip_key(struct orinoco_private *priv, int key_idx,
-			      int set_tx, u8 *key, u8 *rsc, size_t rsc_len,
-			      u8 *tsc, size_t tsc_len)
+			      int set_tx, const u8 *key, const u8 *rsc,
+			      size_t rsc_len, const u8 *tsc, size_t tsc_len)
 {
 	struct {
 		__le16 idx;
diff --git a/drivers/net/wireless/orinoco/hw.h b/drivers/net/wireless/orinoco/hw.h
index 8f6831f4e328..466d1ede76f1 100644
--- a/drivers/net/wireless/orinoco/hw.h
+++ b/drivers/net/wireless/orinoco/hw.h
@@ -38,8 +38,8 @@ int __orinoco_hw_set_wap(struct orinoco_private *priv);
 int __orinoco_hw_setup_wepkeys(struct orinoco_private *priv);
 int __orinoco_hw_setup_enc(struct orinoco_private *priv);
 int __orinoco_hw_set_tkip_key(struct orinoco_private *priv, int key_idx,
-			      int set_tx, u8 *key, u8 *rsc, size_t rsc_len,
-			      u8 *tsc, size_t tsc_len);
+			      int set_tx, const u8 *key, const u8 *rsc,
+			      size_t rsc_len, const u8 *tsc, size_t tsc_len);
 int orinoco_clear_tkip_key(struct orinoco_private *priv, int key_idx);
 int __orinoco_hw_set_multicast_list(struct orinoco_private *priv,
 				    struct net_device *dev,
diff --git a/drivers/net/wireless/orinoco/wext.c b/drivers/net/wireless/orinoco/wext.c
index b7a867b50b94..6abdaf0aa052 100644
--- a/drivers/net/wireless/orinoco/wext.c
+++ b/drivers/net/wireless/orinoco/wext.c
@@ -52,9 +52,9 @@ static int orinoco_set_key(struct orinoco_private *priv, int index,
 	priv->keys[index].seq_len = seq_len;
 
 	if (key_len)
-		memcpy(priv->keys[index].key, key, key_len);
+		memcpy((void *)priv->keys[index].key, key, key_len);
 	if (seq_len)
-		memcpy(priv->keys[index].seq, seq, seq_len);
+		memcpy((void *)priv->keys[index].seq, seq, seq_len);
 
 	switch (alg) {
 	case ORINOCO_ALG_TKIP:
diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c
index f7eb8163d4fb..723319ee08f3 100644
--- a/drivers/staging/wlan-ng/cfg80211.c
+++ b/drivers/staging/wlan-ng/cfg80211.c
@@ -84,7 +84,7 @@ static int prism2_domibset_uint32(wlandevice_t *wlandev, u32 did, u32 data)
 }
 
 static int prism2_domibset_pstr32(wlandevice_t *wlandev,
-				  u32 did, u8 len, u8 *data)
+				  u32 did, u8 len, const u8 *data)
 {
 	struct p80211msg_dot11req_mibset msg;
 	p80211item_pstr32_t *mibitem =
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d4a602b92edf..3299d1b731ef 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -341,8 +341,8 @@ struct vif_params {
  * @seq_len: length of @seq.
  */
 struct key_params {
-	u8 *key;
-	u8 *seq;
+	const u8 *key;
+	const u8 *seq;
 	int key_len;
 	int seq_len;
 	u32 cipher;
@@ -1169,7 +1169,7 @@ struct bss_parameters {
 	int use_cts_prot;
 	int use_short_preamble;
 	int use_short_slot_time;
-	u8 *basic_rates;
+	const u8 *basic_rates;
 	u8 basic_rates_len;
 	int ap_isolate;
 	int ht_opmode;
@@ -1699,10 +1699,10 @@ struct cfg80211_disassoc_request {
  * @ht_capa_mask:  The bits of ht_capa which are to be used.
  */
 struct cfg80211_ibss_params {
-	u8 *ssid;
-	u8 *bssid;
+	const u8 *ssid;
+	const u8 *bssid;
 	struct cfg80211_chan_def chandef;
-	u8 *ie;
+	const u8 *ie;
 	u8 ssid_len, ie_len;
 	u16 beacon_interval;
 	u32 basic_rates;
@@ -1811,8 +1811,8 @@ struct cfg80211_bitrate_mask {
  * @pmkid: The PMK material itself.
  */
 struct cfg80211_pmksa {
-	u8 *bssid;
-	u8 *pmkid;
+	const u8 *bssid;
+	const u8 *pmkid;
 };
 
 /**
@@ -3289,7 +3289,7 @@ struct wireless_dev {
 		struct cfg80211_ibss_params ibss;
 		struct cfg80211_connect_params connect;
 		struct cfg80211_cached_keys *keys;
-		u8 *ie;
+		const u8 *ie;
 		size_t ie_len;
 		u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
 		u8 ssid[IEEE80211_MAX_SSID_LEN];
@@ -3530,7 +3530,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
  * Return: 0 on success, or a negative error code.
  */
 int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
-			     enum nl80211_iftype iftype, u8 *bssid, bool qos);
+			     enum nl80211_iftype iftype, const u8 *bssid,
+			     bool qos);
 
 /**
  * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame
@@ -4319,7 +4320,7 @@ void cfg80211_roamed_bss(struct net_device *dev, struct cfg80211_bss *bss,
  * and not try to connect to any AP any more.
  */
 void cfg80211_disconnected(struct net_device *dev, u16 reason,
-			   u8 *ie, size_t ie_len, gfp_t gfp);
+			   const u8 *ie, size_t ie_len, gfp_t gfp);
 
 /**
  * cfg80211_ready_on_channel - notification of remain_on_channel start
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 6b50588b709f..8f345da3ea5f 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -420,8 +420,8 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
 	if (len > 0 && ssid[len - 1] == '\0')
 		len--;
 
+	memcpy(wdev->ssid, ssid, len);
 	wdev->wext.ibss.ssid = wdev->ssid;
-	memcpy(wdev->wext.ibss.ssid, ssid, len);
 	wdev->wext.ibss.ssid_len = len;
 
 	wdev_lock(wdev);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 4bc21a2b1989..ea701bbacc6a 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -879,7 +879,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 }
 
 void cfg80211_disconnected(struct net_device *dev, u16 reason,
-			   u8 *ie, size_t ie_len, gfp_t gfp)
+			   const u8 *ie, size_t ie_len, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 8c61d5c6fad3..fa61ac9c9b26 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -476,7 +476,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 EXPORT_SYMBOL(ieee80211_data_to_8023);
 
 int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
-			     enum nl80211_iftype iftype, u8 *bssid, bool qos)
+			     enum nl80211_iftype iftype,
+			     const u8 *bssid, bool qos)
 {
 	struct ieee80211_hdr hdr;
 	u16 hdrlen, ethertype;
-- 
cgit v1.2.3-71-gd317


From 922bd80fc33b5b90eb34b1485ebcf3c7b2e61618 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 19 May 2014 17:59:50 +0200
Subject: cfg80211: constify wowlan/coalesce mask/pattern pointers

This requires changing the nl80211 parsing code a bit to use
intermediate pointers for the allocation, but clarifies the
API towards the drivers.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ti/wlcore/main.c     |  2 +-
 drivers/net/wireless/ti/wlcore/wlcore_i.h |  4 ++--
 include/net/cfg80211.h                    |  2 +-
 net/wireless/nl80211.c                    | 39 +++++++++++++++++--------------
 4 files changed, 26 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 077eb5b9cd74..02c91d6db753 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -1416,7 +1416,7 @@ void wl1271_rx_filter_free(struct wl12xx_rx_filter *filter)
 
 int wl1271_rx_filter_alloc_field(struct wl12xx_rx_filter *filter,
 				 u16 offset, u8 flags,
-				 u8 *pattern, u8 len)
+				 const u8 *pattern, u8 len)
 {
 	struct wl12xx_rx_filter_field *field;
 
diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h
index 756e890bc5ee..c2c34a84ff3d 100644
--- a/drivers/net/wireless/ti/wlcore/wlcore_i.h
+++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h
@@ -512,8 +512,8 @@ int wl1271_recalc_rx_streaming(struct wl1271 *wl, struct wl12xx_vif *wlvif);
 void wl12xx_queue_recovery_work(struct wl1271 *wl);
 size_t wl12xx_copy_fwlog(struct wl1271 *wl, u8 *memblock, size_t maxlen);
 int wl1271_rx_filter_alloc_field(struct wl12xx_rx_filter *filter,
-					u16 offset, u8 flags,
-					u8 *pattern, u8 len);
+				 u16 offset, u8 flags,
+				 const u8 *pattern, u8 len);
 void wl1271_rx_filter_free(struct wl12xx_rx_filter *filter);
 struct wl12xx_rx_filter *wl1271_rx_filter_alloc(void);
 int wl1271_rx_filter_get_fields_size(struct wl12xx_rx_filter *filter);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 3299d1b731ef..fe4fa287f788 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1827,7 +1827,7 @@ struct cfg80211_pmksa {
  * memory, free @mask only!
  */
 struct cfg80211_pkt_pattern {
-	u8 *mask, *pattern;
+	const u8 *mask, *pattern;
 	int pattern_len;
 	int pkt_offset;
 };
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ca19b1520389..49adf58646e6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8554,6 +8554,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 
 		nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
 				    rem) {
+			u8 *mask_pat;
+
 			nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
 				  nla_len(pat), NULL);
 			err = -EINVAL;
@@ -8577,19 +8579,18 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 				goto error;
 			new_triggers.patterns[i].pkt_offset = pkt_offset;
 
-			new_triggers.patterns[i].mask =
-				kmalloc(mask_len + pat_len, GFP_KERNEL);
-			if (!new_triggers.patterns[i].mask) {
+			mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL);
+			if (!mask_pat) {
 				err = -ENOMEM;
 				goto error;
 			}
-			new_triggers.patterns[i].pattern =
-				new_triggers.patterns[i].mask + mask_len;
-			memcpy(new_triggers.patterns[i].mask,
-			       nla_data(pat_tb[NL80211_PKTPAT_MASK]),
+			new_triggers.patterns[i].mask = mask_pat;
+			memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]),
 			       mask_len);
+			mask_pat += mask_len;
+			new_triggers.patterns[i].pattern = mask_pat;
 			new_triggers.patterns[i].pattern_len = pat_len;
-			memcpy(new_triggers.patterns[i].pattern,
+			memcpy(mask_pat,
 			       nla_data(pat_tb[NL80211_PKTPAT_PATTERN]),
 			       pat_len);
 			i++;
@@ -8781,6 +8782,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
 
 	nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN],
 			    rem) {
+		u8 *mask_pat;
+
 		nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
 			  nla_len(pat), NULL);
 		if (!pat_tb[NL80211_PKTPAT_MASK] ||
@@ -8802,17 +8805,19 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
 			return -EINVAL;
 		new_rule->patterns[i].pkt_offset = pkt_offset;
 
-		new_rule->patterns[i].mask =
-			kmalloc(mask_len + pat_len, GFP_KERNEL);
-		if (!new_rule->patterns[i].mask)
+		mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL);
+		if (!mask_pat)
 			return -ENOMEM;
-		new_rule->patterns[i].pattern =
-			new_rule->patterns[i].mask + mask_len;
-		memcpy(new_rule->patterns[i].mask,
-		       nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len);
+
+		new_rule->patterns[i].mask = mask_pat;
+		memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]),
+		       mask_len);
+
+		mask_pat += mask_len;
+		new_rule->patterns[i].pattern = mask_pat;
 		new_rule->patterns[i].pattern_len = pat_len;
-		memcpy(new_rule->patterns[i].pattern,
-		       nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len);
+		memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]),
+		       pat_len);
 		i++;
 	}
 
-- 
cgit v1.2.3-71-gd317


From b54197c43db88f4436717f554d623189cddce29e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 1 May 2014 16:51:50 +0200
Subject: virtio_scsi: use cmd_size

Taken almost entirely from Nicholas Bellinger's scsi-mq conversion.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/scsi/virtio_scsi.c | 25 +++++++------------------
 include/scsi/scsi_cmnd.h   |  9 +++++++++
 2 files changed, 16 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index fc054935eb1f..d4727b339474 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -199,7 +199,6 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf)
 			set_driver_byte(sc, DRIVER_SENSE);
 	}
 
-	mempool_free(cmd, virtscsi_cmd_pool);
 	sc->scsi_done(sc);
 
 	atomic_dec(&tgt->reqs);
@@ -242,8 +241,6 @@ static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf)
 
 	if (cmd->comp)
 		complete_all(cmd->comp);
-	else
-		mempool_free(cmd, virtscsi_cmd_pool);
 }
 
 static void virtscsi_ctrl_done(struct virtqueue *vq)
@@ -459,10 +456,9 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
 				 struct virtio_scsi_vq *req_vq,
 				 struct scsi_cmnd *sc)
 {
-	struct virtio_scsi_cmd *cmd;
-	int ret;
-
 	struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
+	struct virtio_scsi_cmd *cmd = scsi_cmd_priv(sc);
+
 	BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize);
 
 	/* TODO: check feature bit and fail if unsupported?  */
@@ -471,11 +467,6 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
 	dev_dbg(&sc->device->sdev_gendev,
 		"cmd %p CDB: %#02x\n", sc, sc->cmnd[0]);
 
-	ret = SCSI_MLQUEUE_HOST_BUSY;
-	cmd = mempool_alloc(virtscsi_cmd_pool, GFP_ATOMIC);
-	if (!cmd)
-		goto out;
-
 	memset(cmd, 0, sizeof(*cmd));
 	cmd->sc = sc;
 	cmd->req.cmd = (struct virtio_scsi_cmd_req){
@@ -494,13 +485,9 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
 
 	if (virtscsi_kick_cmd(req_vq, cmd,
 			      sizeof cmd->req.cmd, sizeof cmd->resp.cmd,
-			      GFP_ATOMIC) == 0)
-		ret = 0;
-	else
-		mempool_free(cmd, virtscsi_cmd_pool);
-
-out:
-	return ret;
+			      GFP_ATOMIC) != 0)
+		return SCSI_MLQUEUE_HOST_BUSY;
+	return 0;
 }
 
 static int virtscsi_queuecommand_single(struct Scsi_Host *sh,
@@ -642,6 +629,7 @@ static struct scsi_host_template virtscsi_host_template_single = {
 	.name = "Virtio SCSI HBA",
 	.proc_name = "virtio_scsi",
 	.this_id = -1,
+	.cmd_size = sizeof(struct virtio_scsi_cmd),
 	.queuecommand = virtscsi_queuecommand_single,
 	.eh_abort_handler = virtscsi_abort,
 	.eh_device_reset_handler = virtscsi_device_reset,
@@ -658,6 +646,7 @@ static struct scsi_host_template virtscsi_host_template_multi = {
 	.name = "Virtio SCSI HBA",
 	.proc_name = "virtio_scsi",
 	.this_id = -1,
+	.cmd_size = sizeof(struct virtio_scsi_cmd),
 	.queuecommand = virtscsi_queuecommand_multi,
 	.eh_abort_handler = virtscsi_abort,
 	.eh_device_reset_handler = virtscsi_device_reset,
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index dd7c998221b3..e016e2ac38df 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -133,6 +133,15 @@ struct scsi_cmnd {
 	unsigned char tag;	/* SCSI-II queued command tag */
 };
 
+/*
+ * Return the driver private allocation behind the command.
+ * Only works if cmd_size is set in the host template.
+ */
+static inline void *scsi_cmd_priv(struct scsi_cmnd *cmd)
+{
+	return cmd + 1;
+}
+
 /* make sure not to use it with REQ_TYPE_BLOCK_PC commands */
 static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd)
 {
-- 
cgit v1.2.3-71-gd317


From 5533e0114425dcdb878f11b291f2727af8667a7c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 May 2014 19:33:07 -0400
Subject: cgroup: disallow debug controller on the default hierarchy

The debug controller, as its name suggests, exposes cgroup core
internals to userland to aid debugging.  Unfortunately, except for the
name, there's no provision to prevent its usage in production
configurations and the controller is widely enabled and mounted
leaking internal details to userland.  Like most other debug
information, the information exposed by debug isn't interesting even
for debugging itself once the related parts are working reliably.

This controller has no reason for existing.  This patch implements
cgrp_dfl_root_inhibit_ss_mask which can suppress specific subsystems
on the default hierarchy and adds the debug subsystem to it so that it
can be gradually deprecated as usages move towards the unified
hierarchy.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h        |  2 ++
 include/linux/cgroup_subsys.h | 11 +++++++----
 kernel/cgroup.c               | 21 ++++++++++++++++++---
 3 files changed, 27 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 4afe544d3547..8a111dd42d7a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -305,6 +305,8 @@ enum {
 	 *   the flag is not created.
 	 *
 	 * - blkcg: blk-throttle becomes properly hierarchical.
+	 *
+	 * - debug: disallowed on the default hierarchy.
 	 */
 	CGRP_ROOT_SANE_BEHAVIOR	= (1 << 0),
 
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 768fe44e19f0..98c4f9b12b03 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -7,10 +7,6 @@
 SUBSYS(cpuset)
 #endif
 
-#if IS_ENABLED(CONFIG_CGROUP_DEBUG)
-SUBSYS(debug)
-#endif
-
 #if IS_ENABLED(CONFIG_CGROUP_SCHED)
 SUBSYS(cpu)
 #endif
@@ -50,6 +46,13 @@ SUBSYS(net_prio)
 #if IS_ENABLED(CONFIG_CGROUP_HUGETLB)
 SUBSYS(hugetlb)
 #endif
+
+/*
+ * The following subsystems are not supported on the default hierarchy.
+ */
+#if IS_ENABLED(CONFIG_CGROUP_DEBUG)
+SUBSYS(debug)
+#endif
 /*
  * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
  */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 082bb842b11a..a5f75ac4e793 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -148,6 +148,13 @@ struct cgroup_root cgrp_dfl_root;
  */
 static bool cgrp_dfl_root_visible;
 
+/* some controllers are not supported in the default hierarchy */
+static const unsigned int cgrp_dfl_root_inhibit_ss_mask = 0
+#ifdef CONFIG_CGROUP_DEBUG
+	| (1 << debug_cgrp_id)
+#endif
+	;
+
 /* The list of hierarchy roots */
 
 static LIST_HEAD(cgroup_roots);
@@ -1126,6 +1133,7 @@ static void cgroup_clear_dir(struct cgroup *cgrp, unsigned int subsys_mask)
 static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask)
 {
 	struct cgroup_subsys *ss;
+	unsigned int tmp_ss_mask;
 	int ssid, i, ret;
 
 	lockdep_assert_held(&cgroup_mutex);
@@ -1143,7 +1151,12 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask)
 			return -EBUSY;
 	}
 
-	ret = cgroup_populate_dir(&dst_root->cgrp, ss_mask);
+	/* skip creating root files on dfl_root for inhibited subsystems */
+	tmp_ss_mask = ss_mask;
+	if (dst_root == &cgrp_dfl_root)
+		tmp_ss_mask &= ~cgrp_dfl_root_inhibit_ss_mask;
+
+	ret = cgroup_populate_dir(&dst_root->cgrp, tmp_ss_mask);
 	if (ret) {
 		if (dst_root != &cgrp_dfl_root)
 			return ret;
@@ -2426,7 +2439,8 @@ static int cgroup_root_controllers_show(struct seq_file *seq, void *v)
 {
 	struct cgroup *cgrp = seq_css(seq)->cgroup;
 
-	cgroup_print_ss_mask(seq, cgrp->root->subsys_mask);
+	cgroup_print_ss_mask(seq, cgrp->root->subsys_mask &
+			     ~cgrp_dfl_root_inhibit_ss_mask);
 	return 0;
 }
 
@@ -2564,7 +2578,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 		if (tok[0] == '\0')
 			continue;
 		for_each_subsys(ss, ssid) {
-			if (ss->disabled || strcmp(tok + 1, ss->name))
+			if (ss->disabled || strcmp(tok + 1, ss->name) ||
+			    ((1 << ss->id) & cgrp_dfl_root_inhibit_ss_mask))
 				continue;
 
 			if (*tok == '+') {
-- 
cgit v1.2.3-71-gd317


From 4f4aa2ec24dc45881849833a439558d3a378028c Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 18 May 2014 00:22:38 +0200
Subject: ssb: sprom: add dev_id field for value overriding standard ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some devices may have different features despite sharing the same ID
(e.g. PCI ID). For example 14e4:4331 is usually a dual band, but this
can be "limited". Device with "pci/x/y/devid=0x4332" supports 2.4 GHz
only. Similarly 0x4333 will mean support for 5 GHz only.
Add entry in SPROM so info described above can be extracted and stored.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 arch/mips/bcm47xx/sprom.c | 1 +
 include/linux/ssb/ssb.h   | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/arch/mips/bcm47xx/sprom.c b/arch/mips/bcm47xx/sprom.c
index a8b5408dd349..da4cdb16844e 100644
--- a/arch/mips/bcm47xx/sprom.c
+++ b/arch/mips/bcm47xx/sprom.c
@@ -168,6 +168,7 @@ static void nvram_read_alpha2(const char *prefix, const char *name,
 static void bcm47xx_fill_sprom_r1234589(struct ssb_sprom *sprom,
 					const char *prefix, bool fallback)
 {
+	nvram_read_u16(prefix, NULL, "devid", &sprom->dev_id, 0, fallback);
 	nvram_read_u8(prefix, NULL, "ledbh0", &sprom->gpio0, 0xff, fallback);
 	nvram_read_u8(prefix, NULL, "ledbh1", &sprom->gpio1, 0xff, fallback);
 	nvram_read_u8(prefix, NULL, "ledbh2", &sprom->gpio2, 0xff, fallback);
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 07ef9b82b66d..4568a5cc9ab8 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -33,6 +33,7 @@ struct ssb_sprom {
 	u8 et1phyaddr;		/* MII address for enet1 */
 	u8 et0mdcport;		/* MDIO for enet0 */
 	u8 et1mdcport;		/* MDIO for enet1 */
+	u16 dev_id;		/* Device ID overriding e.g. PCI ID */
 	u16 board_rev;		/* Board revision number from SPROM. */
 	u16 board_num;		/* Board number from SPROM. */
 	u16 board_type;		/* Board type from SPROM. */
-- 
cgit v1.2.3-71-gd317


From 57be1f3f3ec1ccab6432615ca161c4c9ece2a2aa Mon Sep 17 00:00:00 2001
From: Hiren Tandel <hirent@marvell.com>
Date: Mon, 5 May 2014 19:43:31 +0900
Subject: NFC: Add RAW socket type support for SOCKPROTO_RAW

This allows for a more generic NFC sniffing by using SOCKPROTO_RAW
SOCK_RAW to read RAW NFC frames. This is for sniffing anything but LLCP
(HCI, NCI, etc...).

Signed-off-by: Hiren Tandel <hirent@marvell.com>
Signed-off-by: Rahul Tank <rahult@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nfc.h    |  3 ++
 include/uapi/linux/nfc.h | 16 ++++++---
 net/nfc/llcp_commands.c  |  2 +-
 net/nfc/llcp_core.c      | 11 +++---
 net/nfc/nfc.h            |  6 ++++
 net/nfc/rawsock.c        | 94 +++++++++++++++++++++++++++++++++++++++++++++---
 6 files changed, 117 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 2e8b40c16274..6c583e244de2 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -264,4 +264,7 @@ int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type);
 int nfc_remove_se(struct nfc_dev *dev, u32 se_idx);
 struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx);
 
+void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb,
+			  u8 payload_type, u8 direction);
+
 #endif /* __NET_NFC_H */
diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index 9789dc95b6a8..9b19b4461928 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -273,11 +273,19 @@ struct sockaddr_nfc_llcp {
  * First byte is the adapter index
  * Second byte contains flags
  *  - 0x01 - Direction (0=RX, 1=TX)
- *  - 0x02-0x80 - Reserved
+ *  - 0x02-0x04 - Payload type (000=LLCP, 001=NCI, 010=HCI, 011=Digital,
+ *                              100=Proprietary)
+ *  - 0x05-0x80 - Reserved
  **/
-#define NFC_LLCP_RAW_HEADER_SIZE	2
-#define NFC_LLCP_DIRECTION_RX		0x00
-#define NFC_LLCP_DIRECTION_TX		0x01
+#define NFC_RAW_HEADER_SIZE	2
+#define NFC_DIRECTION_RX		0x00
+#define NFC_DIRECTION_TX		0x01
+
+#define RAW_PAYLOAD_LLCP 0
+#define RAW_PAYLOAD_NCI	1
+#define RAW_PAYLOAD_HCI	2
+#define RAW_PAYLOAD_DIGITAL	3
+#define RAW_PAYLOAD_PROPRIETARY	4
 
 /* socket option names */
 #define NFC_LLCP_RW		0
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index bec6ed15f503..a3ad69a4c648 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -387,7 +387,7 @@ int nfc_llcp_send_symm(struct nfc_dev *dev)
 
 	__net_timestamp(skb);
 
-	nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX);
+	nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_TX);
 
 	return nfc_data_exchange(dev, local->target_idx, skb,
 				 nfc_llcp_recv, local);
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index b4671958fcf9..f6278da68763 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -680,16 +680,17 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
 			continue;
 
 		if (skb_copy == NULL) {
-			skb_copy = __pskb_copy(skb, NFC_LLCP_RAW_HEADER_SIZE,
+			skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE,
 					       GFP_ATOMIC);
 
 			if (skb_copy == NULL)
 				continue;
 
-			data = skb_push(skb_copy, NFC_LLCP_RAW_HEADER_SIZE);
+			data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE);
 
 			data[0] = local->dev ? local->dev->idx : 0xFF;
-			data[1] = direction;
+			data[1] = direction & 0x01;
+			data[1] |= (RAW_PAYLOAD_LLCP << 1);
 		}
 
 		nskb = skb_clone(skb_copy, GFP_ATOMIC);
@@ -747,7 +748,7 @@ static void nfc_llcp_tx_work(struct work_struct *work)
 			__net_timestamp(skb);
 
 			nfc_llcp_send_to_raw_sock(local, skb,
-						  NFC_LLCP_DIRECTION_TX);
+						  NFC_DIRECTION_TX);
 
 			ret = nfc_data_exchange(local->dev, local->target_idx,
 						skb, nfc_llcp_recv, local);
@@ -1476,7 +1477,7 @@ static void nfc_llcp_rx_work(struct work_struct *work)
 
 	__net_timestamp(skb);
 
-	nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX);
+	nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_RX);
 
 	nfc_llcp_rx_skb(local, skb);
 
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 9d6e74f7e6b3..88d60064890e 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -40,6 +40,12 @@ struct nfc_rawsock {
 	struct work_struct tx_work;
 	bool tx_work_scheduled;
 };
+
+struct nfc_sock_list {
+	struct hlist_head head;
+	rwlock_t          lock;
+};
+
 #define nfc_rawsock(sk) ((struct nfc_rawsock *) sk)
 #define to_rawsock_sk(_tx_work) \
 	((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work))
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index c27a6e86cae4..8627c75063e2 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -27,6 +27,24 @@
 
 #include "nfc.h"
 
+static struct nfc_sock_list raw_sk_list = {
+	.lock = __RW_LOCK_UNLOCKED(raw_sk_list.lock)
+};
+
+void nfc_sock_link(struct nfc_sock_list *l, struct sock *sk)
+{
+	write_lock(&l->lock);
+	sk_add_node(sk, &l->head);
+	write_unlock(&l->lock);
+}
+
+void nfc_sock_unlink(struct nfc_sock_list *l, struct sock *sk)
+{
+	write_lock(&l->lock);
+	sk_del_node_init(sk);
+	write_unlock(&l->lock);
+}
+
 static void rawsock_write_queue_purge(struct sock *sk)
 {
 	pr_debug("sk=%p\n", sk);
@@ -57,6 +75,9 @@ static int rawsock_release(struct socket *sock)
 	if (!sk)
 		return 0;
 
+	if (sock->type == SOCK_RAW)
+		nfc_sock_unlink(&raw_sk_list, sk);
+
 	sock_orphan(sk);
 	sock_put(sk);
 
@@ -275,6 +296,26 @@ static const struct proto_ops rawsock_ops = {
 	.mmap           = sock_no_mmap,
 };
 
+static const struct proto_ops rawsock_raw_ops = {
+	.family         = PF_NFC,
+	.owner          = THIS_MODULE,
+	.release        = rawsock_release,
+	.bind           = sock_no_bind,
+	.connect        = sock_no_connect,
+	.socketpair     = sock_no_socketpair,
+	.accept         = sock_no_accept,
+	.getname        = sock_no_getname,
+	.poll           = datagram_poll,
+	.ioctl          = sock_no_ioctl,
+	.listen         = sock_no_listen,
+	.shutdown       = sock_no_shutdown,
+	.setsockopt     = sock_no_setsockopt,
+	.getsockopt     = sock_no_getsockopt,
+	.sendmsg        = sock_no_sendmsg,
+	.recvmsg        = rawsock_recvmsg,
+	.mmap           = sock_no_mmap,
+};
+
 static void rawsock_destruct(struct sock *sk)
 {
 	pr_debug("sk=%p\n", sk);
@@ -300,10 +341,13 @@ static int rawsock_create(struct net *net, struct socket *sock,
 
 	pr_debug("sock=%p\n", sock);
 
-	if (sock->type != SOCK_SEQPACKET)
+	if ((sock->type != SOCK_SEQPACKET) && (sock->type != SOCK_RAW))
 		return -ESOCKTNOSUPPORT;
 
-	sock->ops = &rawsock_ops;
+	if (sock->type == SOCK_RAW)
+		sock->ops = &rawsock_raw_ops;
+	else
+		sock->ops = &rawsock_ops;
 
 	sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto);
 	if (!sk)
@@ -313,13 +357,53 @@ static int rawsock_create(struct net *net, struct socket *sock,
 	sk->sk_protocol = nfc_proto->id;
 	sk->sk_destruct = rawsock_destruct;
 	sock->state = SS_UNCONNECTED;
-
-	INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work);
-	nfc_rawsock(sk)->tx_work_scheduled = false;
+	if (sock->type == SOCK_RAW)
+		nfc_sock_link(&raw_sk_list, sk);
+	else {
+		INIT_WORK(&nfc_rawsock(sk)->tx_work, rawsock_tx_work);
+		nfc_rawsock(sk)->tx_work_scheduled = false;
+	}
 
 	return 0;
 }
 
+void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb,
+			  u8 payload_type, u8 direction)
+{
+	struct sk_buff *skb_copy = NULL, *nskb;
+	struct sock *sk;
+	u8 *data;
+
+	read_lock(&raw_sk_list.lock);
+
+	sk_for_each(sk, &raw_sk_list.head) {
+		if (!skb_copy) {
+			skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE,
+				     GFP_ATOMIC);
+			if (!skb_copy)
+				continue;
+
+			data = skb_push(skb_copy, NFC_RAW_HEADER_SIZE);
+
+			data[0] = dev ? dev->idx : 0xFF;
+			data[1] = direction & 0x01;
+			data[1] |= (payload_type << 1);
+		}
+
+		nskb = skb_clone(skb_copy, GFP_ATOMIC);
+		if (!nskb)
+			continue;
+
+		if (sock_queue_rcv_skb(sk, nskb))
+			kfree_skb(nskb);
+	}
+
+	read_unlock(&raw_sk_list.lock);
+
+	kfree_skb(skb_copy);
+}
+EXPORT_SYMBOL(nfc_send_to_raw_sock);
+
 static struct proto rawsock_proto = {
 	.name     = "NFC_RAW",
 	.owner    = THIS_MODULE,
-- 
cgit v1.2.3-71-gd317


From 867d849fc844623a88ec7b380442952b5ffe5e68 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@open-mesh.com>
Date: Mon, 19 May 2014 21:53:19 +0200
Subject: cfg80211: export expected throughput through get_station()

Users may need information about the expected throughput
towards a given peer.
This value is supposed to consider the size overhead
generated by the 802.11 header.

This value is exported in kbps through the get_station() API
by including it into the station_info object.
Moreover, it is sent to user space when replying to the
nl80211 GET_STATION command.

This information will be useful to the batman-adv module
which will use it for its new metric computation.

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 62 ++++++++++++++++++++++++--------------------
 include/uapi/linux/nl80211.h |  3 +++
 net/wireless/nl80211.c       |  4 +++
 3 files changed, 41 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index fe4fa287f788..857d6476a128 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -873,36 +873,38 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
  * @STATION_INFO_NONPEER_PM: @nonpeer_pm filled
  * @STATION_INFO_CHAIN_SIGNAL: @chain_signal filled
  * @STATION_INFO_CHAIN_SIGNAL_AVG: @chain_signal_avg filled
+ * @STATION_INFO_EXPECTED_THROUGHPUT: @expected_throughput filled
  */
 enum station_info_flags {
-	STATION_INFO_INACTIVE_TIME	= 1<<0,
-	STATION_INFO_RX_BYTES		= 1<<1,
-	STATION_INFO_TX_BYTES		= 1<<2,
-	STATION_INFO_LLID		= 1<<3,
-	STATION_INFO_PLID		= 1<<4,
-	STATION_INFO_PLINK_STATE	= 1<<5,
-	STATION_INFO_SIGNAL		= 1<<6,
-	STATION_INFO_TX_BITRATE		= 1<<7,
-	STATION_INFO_RX_PACKETS		= 1<<8,
-	STATION_INFO_TX_PACKETS		= 1<<9,
-	STATION_INFO_TX_RETRIES		= 1<<10,
-	STATION_INFO_TX_FAILED		= 1<<11,
-	STATION_INFO_RX_DROP_MISC	= 1<<12,
-	STATION_INFO_SIGNAL_AVG		= 1<<13,
-	STATION_INFO_RX_BITRATE		= 1<<14,
-	STATION_INFO_BSS_PARAM          = 1<<15,
-	STATION_INFO_CONNECTED_TIME	= 1<<16,
-	STATION_INFO_ASSOC_REQ_IES	= 1<<17,
-	STATION_INFO_STA_FLAGS		= 1<<18,
-	STATION_INFO_BEACON_LOSS_COUNT	= 1<<19,
-	STATION_INFO_T_OFFSET		= 1<<20,
-	STATION_INFO_LOCAL_PM		= 1<<21,
-	STATION_INFO_PEER_PM		= 1<<22,
-	STATION_INFO_NONPEER_PM		= 1<<23,
-	STATION_INFO_RX_BYTES64		= 1<<24,
-	STATION_INFO_TX_BYTES64		= 1<<25,
-	STATION_INFO_CHAIN_SIGNAL	= 1<<26,
-	STATION_INFO_CHAIN_SIGNAL_AVG	= 1<<27,
+	STATION_INFO_INACTIVE_TIME		= BIT(0),
+	STATION_INFO_RX_BYTES			= BIT(1),
+	STATION_INFO_TX_BYTES			= BIT(2),
+	STATION_INFO_LLID			= BIT(3),
+	STATION_INFO_PLID			= BIT(4),
+	STATION_INFO_PLINK_STATE		= BIT(5),
+	STATION_INFO_SIGNAL			= BIT(6),
+	STATION_INFO_TX_BITRATE			= BIT(7),
+	STATION_INFO_RX_PACKETS			= BIT(8),
+	STATION_INFO_TX_PACKETS			= BIT(9),
+	STATION_INFO_TX_RETRIES			= BIT(10),
+	STATION_INFO_TX_FAILED			= BIT(11),
+	STATION_INFO_RX_DROP_MISC		= BIT(12),
+	STATION_INFO_SIGNAL_AVG			= BIT(13),
+	STATION_INFO_RX_BITRATE			= BIT(14),
+	STATION_INFO_BSS_PARAM			= BIT(15),
+	STATION_INFO_CONNECTED_TIME		= BIT(16),
+	STATION_INFO_ASSOC_REQ_IES		= BIT(17),
+	STATION_INFO_STA_FLAGS			= BIT(18),
+	STATION_INFO_BEACON_LOSS_COUNT		= BIT(19),
+	STATION_INFO_T_OFFSET			= BIT(20),
+	STATION_INFO_LOCAL_PM			= BIT(21),
+	STATION_INFO_PEER_PM			= BIT(22),
+	STATION_INFO_NONPEER_PM			= BIT(23),
+	STATION_INFO_RX_BYTES64			= BIT(24),
+	STATION_INFO_TX_BYTES64			= BIT(25),
+	STATION_INFO_CHAIN_SIGNAL		= BIT(26),
+	STATION_INFO_CHAIN_SIGNAL_AVG		= BIT(27),
+	STATION_INFO_EXPECTED_THROUGHPUT	= BIT(28),
 };
 
 /**
@@ -1024,6 +1026,8 @@ struct sta_bss_parameters {
  * @local_pm: local mesh STA power save mode
  * @peer_pm: peer mesh STA power save mode
  * @nonpeer_pm: non-peer mesh STA power save mode
+ * @expected_throughput: expected throughput in kbps (including 802.11 headers)
+ *	towards this station.
  */
 struct station_info {
 	u32 filled;
@@ -1062,6 +1066,8 @@ struct station_info {
 	enum nl80211_mesh_power_mode peer_pm;
 	enum nl80211_mesh_power_mode nonpeer_pm;
 
+	u32 expected_throughput;
+
 	/*
 	 * Note: Add a new enum station_info_flags value for each new field and
 	 * use it to check which fields are initialized.
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 0cfa827123ff..fb0efa1f9066 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2199,6 +2199,8 @@ enum nl80211_sta_bss_param {
  *	Contains a nested array of signal strength attributes (u8, dBm)
  * @NL80211_STA_INFO_CHAIN_SIGNAL_AVG: per-chain signal strength average
  *	Same format as NL80211_STA_INFO_CHAIN_SIGNAL.
+ * @NL80211_STA_EXPECTED_THROUGHPUT: expected throughput considering also the
+ *	802.11 header (u32, kbps)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2230,6 +2232,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_TX_BYTES64,
 	NL80211_STA_INFO_CHAIN_SIGNAL,
 	NL80211_STA_INFO_CHAIN_SIGNAL_AVG,
+	NL80211_STA_INFO_EXPECTED_THROUGHPUT,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 49adf58646e6..62bdb1adaa4d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3650,6 +3650,10 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
 	    nla_put_u32(msg, NL80211_STA_INFO_TX_FAILED,
 			sinfo->tx_failed))
 		goto nla_put_failure;
+	if ((sinfo->filled & STATION_INFO_EXPECTED_THROUGHPUT) &&
+	    nla_put_u32(msg, NL80211_STA_INFO_EXPECTED_THROUGHPUT,
+			sinfo->expected_throughput))
+		goto nla_put_failure;
 	if ((sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) &&
 	    nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS,
 			sinfo->beacon_loss_count))
-- 
cgit v1.2.3-71-gd317


From e69595c2501094d85d5bbca87592acb8a481109a Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Wed, 19 Feb 2014 19:36:08 +0200
Subject: drm: Make the vblank disable timer per-crtc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently there's one per-device vblank disable timer, and it gets
reset wheneven the vblank refcount for any crtc drops to zero. That
means that one crtc could accidentally be keeping the vblank interrupts
for other crtcs enabled even if there are no users for them. Make the
disable timer per-crtc to avoid this issue.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_irq.c | 40 +++++++++++++++++++++-------------------
 include/drm/drmP.h        |  4 +++-
 2 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index ea20c4aa1b6b..90c59a8c820f 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -140,33 +140,34 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc)
 
 static void vblank_disable_fn(unsigned long arg)
 {
-	struct drm_device *dev = (struct drm_device *)arg;
+	struct drm_vblank_crtc *vblank = (void *)arg;
+	struct drm_device *dev = vblank->dev;
 	unsigned long irqflags;
-	int i;
+	int crtc = vblank->crtc;
 
 	if (!dev->vblank_disable_allowed)
 		return;
 
-	for (i = 0; i < dev->num_crtcs; i++) {
-		spin_lock_irqsave(&dev->vbl_lock, irqflags);
-		if (atomic_read(&dev->vblank[i].refcount) == 0 &&
-		    dev->vblank[i].enabled) {
-			DRM_DEBUG("disabling vblank on crtc %d\n", i);
-			vblank_disable_and_save(dev, i);
-		}
-		spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
+	spin_lock_irqsave(&dev->vbl_lock, irqflags);
+	if (atomic_read(&vblank->refcount) == 0 && vblank->enabled) {
+		DRM_DEBUG("disabling vblank on crtc %d\n", crtc);
+		vblank_disable_and_save(dev, crtc);
 	}
+	spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 }
 
 void drm_vblank_cleanup(struct drm_device *dev)
 {
+	int crtc;
+
 	/* Bail if the driver didn't call drm_vblank_init() */
 	if (dev->num_crtcs == 0)
 		return;
 
-	del_timer_sync(&dev->vblank_disable_timer);
-
-	vblank_disable_fn((unsigned long)dev);
+	for (crtc = 0; crtc < dev->num_crtcs; crtc++) {
+		del_timer_sync(&dev->vblank[crtc].disable_timer);
+		vblank_disable_fn((unsigned long)&dev->vblank[crtc]);
+	}
 
 	kfree(dev->vblank);
 
@@ -178,8 +179,6 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs)
 {
 	int i, ret = -ENOMEM;
 
-	setup_timer(&dev->vblank_disable_timer, vblank_disable_fn,
-		    (unsigned long)dev);
 	spin_lock_init(&dev->vbl_lock);
 	spin_lock_init(&dev->vblank_time_lock);
 
@@ -189,8 +188,13 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs)
 	if (!dev->vblank)
 		goto err;
 
-	for (i = 0; i < num_crtcs; i++)
+	for (i = 0; i < num_crtcs; i++) {
+		dev->vblank[i].dev = dev;
+		dev->vblank[i].crtc = i;
 		init_waitqueue_head(&dev->vblank[i].queue);
+		setup_timer(&dev->vblank[i].disable_timer, vblank_disable_fn,
+			    (unsigned long)&dev->vblank[i]);
+	}
 
 	DRM_INFO("Supports vblank timestamp caching Rev 2 (21.10.2013).\n");
 
@@ -900,7 +904,7 @@ void drm_vblank_put(struct drm_device *dev, int crtc)
 	/* Last user schedules interrupt disable */
 	if (atomic_dec_and_test(&dev->vblank[crtc].refcount) &&
 	    (drm_vblank_offdelay > 0))
-		mod_timer(&dev->vblank_disable_timer,
+		mod_timer(&dev->vblank[crtc].disable_timer,
 			  jiffies + ((drm_vblank_offdelay * HZ)/1000));
 }
 EXPORT_SYMBOL(drm_vblank_put);
@@ -909,8 +913,6 @@ EXPORT_SYMBOL(drm_vblank_put);
  * drm_vblank_off - disable vblank events on a CRTC
  * @dev: DRM device
  * @crtc: CRTC in question
- *
- * Caller must hold event lock.
  */
 void drm_vblank_off(struct drm_device *dev, int crtc)
 {
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 12f10bc2395f..9d982d483f12 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1024,14 +1024,17 @@ struct drm_pending_vblank_event {
 };
 
 struct drm_vblank_crtc {
+	struct drm_device *dev;		/* pointer to the drm_device */
 	wait_queue_head_t queue;	/**< VBLANK wait queue */
 	struct timeval time[DRM_VBLANKTIME_RBSIZE];	/**< timestamp of current count */
+	struct timer_list disable_timer;		/* delayed disable timer */
 	atomic_t count;			/**< number of VBLANK interrupts */
 	atomic_t refcount;		/* number of users of vblank interruptsper crtc */
 	u32 last;			/* protected by dev->vbl_lock, used */
 					/* for wraparound handling */
 	u32 last_wait;			/* Last vblank seqno waited per CRTC */
 	unsigned int inmodeset;		/* Display driver is setting mode */
+	int crtc;			/* crtc index */
 	bool enabled;			/* so we don't call enable more than
 					   once per disable */
 };
@@ -1119,7 +1122,6 @@ struct drm_device {
 
 	spinlock_t vblank_time_lock;    /**< Protects vblank count and time updates during vblank enable/disable */
 	spinlock_t vbl_lock;
-	struct timer_list vblank_disable_timer;
 
 	u32 max_vblank_count;           /**< size of vblank counter register */
 
-- 
cgit v1.2.3-71-gd317


From f2752282f7f7b566b07113dd5aa130725aa95ac4 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Wed, 19 Feb 2014 21:29:49 +0200
Subject: drm: Add drm_vblank_on()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drm_vblank_off() will turn off vblank interrupts, but as long as the
refcount is elevated drm_vblank_get() will not re-enable them. This
is a problem is someone is holding a vblank reference while a modeset is
happening, and the driver requires vblank interrupt to work during that
time.

Add drm_vblank_on() as a counterpart to drm_vblank_off() which will
re-enabled vblank interrupts if the refcount is already elevated. This
will allow drivers to choose the specific places in the modeset sequence
at which vblank interrupts get disabled and enabled.

Testcase: igt/kms_flip/*-vs-suspend
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
[danvet: Add Testcase tag for the igt I've written.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_irq.c            | 72 ++++++++++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_display.c |  8 ++++
 include/drm/drmP.h                   |  1 +
 3 files changed, 62 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 13d671ed3421..dd786d84daab 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -839,6 +839,41 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc)
 	smp_mb__after_atomic_inc();
 }
 
+/**
+ * drm_vblank_enable - enable the vblank interrupt on a CRTC
+ * @dev: DRM device
+ * @crtc: CRTC in question
+ */
+static int drm_vblank_enable(struct drm_device *dev, int crtc)
+{
+	int ret = 0;
+
+	assert_spin_locked(&dev->vbl_lock);
+
+	spin_lock(&dev->vblank_time_lock);
+
+	if (!dev->vblank[crtc].enabled) {
+		/* Enable vblank irqs under vblank_time_lock protection.
+		 * All vblank count & timestamp updates are held off
+		 * until we are done reinitializing master counter and
+		 * timestamps. Filtercode in drm_handle_vblank() will
+		 * prevent double-accounting of same vblank interval.
+		 */
+		ret = dev->driver->enable_vblank(dev, crtc);
+		DRM_DEBUG("enabling vblank on crtc %d, ret: %d\n", crtc, ret);
+		if (ret)
+			atomic_dec(&dev->vblank[crtc].refcount);
+		else {
+			dev->vblank[crtc].enabled = true;
+			drm_update_vblank_count(dev, crtc);
+		}
+	}
+
+	spin_unlock(&dev->vblank_time_lock);
+
+	return ret;
+}
+
 /**
  * drm_vblank_get - get a reference count on vblank events
  * @dev: DRM device
@@ -858,25 +893,7 @@ int drm_vblank_get(struct drm_device *dev, int crtc)
 	spin_lock_irqsave(&dev->vbl_lock, irqflags);
 	/* Going from 0->1 means we have to enable interrupts again */
 	if (atomic_add_return(1, &dev->vblank[crtc].refcount) == 1) {
-		spin_lock(&dev->vblank_time_lock);
-		if (!dev->vblank[crtc].enabled) {
-			/* Enable vblank irqs under vblank_time_lock protection.
-			 * All vblank count & timestamp updates are held off
-			 * until we are done reinitializing master counter and
-			 * timestamps. Filtercode in drm_handle_vblank() will
-			 * prevent double-accounting of same vblank interval.
-			 */
-			ret = dev->driver->enable_vblank(dev, crtc);
-			DRM_DEBUG("enabling vblank on crtc %d, ret: %d\n",
-				  crtc, ret);
-			if (ret)
-				atomic_dec(&dev->vblank[crtc].refcount);
-			else {
-				dev->vblank[crtc].enabled = true;
-				drm_update_vblank_count(dev, crtc);
-			}
-		}
-		spin_unlock(&dev->vblank_time_lock);
+		ret = drm_vblank_enable(dev, crtc);
 	} else {
 		if (!dev->vblank[crtc].enabled) {
 			atomic_dec(&dev->vblank[crtc].refcount);
@@ -945,6 +962,23 @@ void drm_vblank_off(struct drm_device *dev, int crtc)
 }
 EXPORT_SYMBOL(drm_vblank_off);
 
+/**
+ * drm_vblank_on - enable vblank events on a CRTC
+ * @dev: DRM device
+ * @crtc: CRTC in question
+ */
+void drm_vblank_on(struct drm_device *dev, int crtc)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&dev->vbl_lock, irqflags);
+	/* re-enable interrupts if there's are users left */
+	if (atomic_read(&dev->vblank[crtc].refcount) != 0)
+		WARN_ON(drm_vblank_enable(dev, crtc));
+	spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
+}
+EXPORT_SYMBOL(drm_vblank_on);
+
 /**
  * drm_vblank_pre_modeset - account for vblanks across mode sets
  * @dev: DRM device
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 72b5c34d8ce7..313f29d1aae2 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3737,6 +3737,8 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
 	 * happening.
 	 */
 	intel_wait_for_vblank(dev, intel_crtc->pipe);
+
+	drm_vblank_on(dev, pipe);
 }
 
 /* IPS only exists on ULT machines and is tied to pipe A. */
@@ -3828,6 +3830,8 @@ static void haswell_crtc_enable(struct drm_crtc *crtc)
 	 * to change the workaround. */
 	haswell_mode_set_planes_workaround(intel_crtc);
 	ilk_crtc_enable_planes(crtc);
+
+	drm_vblank_on(dev, pipe);
 }
 
 static void ironlake_pfit_disable(struct intel_crtc *crtc)
@@ -4351,6 +4355,8 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc)
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		encoder->enable(encoder);
+
+	drm_vblank_on(dev, pipe);
 }
 
 static void i9xx_crtc_enable(struct drm_crtc *crtc)
@@ -4398,6 +4404,8 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc)
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		encoder->enable(encoder);
+
+	drm_vblank_on(dev, pipe);
 }
 
 static void i9xx_pfit_disable(struct intel_crtc *crtc)
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 9d982d483f12..7339b2b00724 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1360,6 +1360,7 @@ extern bool drm_handle_vblank(struct drm_device *dev, int crtc);
 extern int drm_vblank_get(struct drm_device *dev, int crtc);
 extern void drm_vblank_put(struct drm_device *dev, int crtc);
 extern void drm_vblank_off(struct drm_device *dev, int crtc);
+extern void drm_vblank_on(struct drm_device *dev, int crtc);
 extern void drm_vblank_cleanup(struct drm_device *dev);
 extern u32 drm_get_last_vbltimestamp(struct drm_device *dev, int crtc,
 				     struct timeval *tvblank, unsigned flags);
-- 
cgit v1.2.3-71-gd317


From 66507c7bc8895f0da6b4ad87e96d61a9f7d7a118 Mon Sep 17 00:00:00 2001
From: Kamal Dasu <kdasu.kdev@gmail.com>
Date: Thu, 1 May 2014 20:51:19 -0400
Subject: mtd: nand: Add support to use nand_base poi databuf as bounce buffer

nand_base can be passed a kmap()'d buffers from highmem by
filesystems like jffs2. This results in failure to map the
physical address of the DMA buffer on various contoller
driver on different platforms. This change adds a chip option
to use preallocated databuf as bounce buffers used in
nand_do_read_ops() and nand_do_write_ops().
This allows for specific nand controller driver to set this
option as needed.

Signed-off-by: Kamal Dasu <kdasu.kdev@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/nand_base.c | 37 +++++++++++++++++++++++++++++++------
 include/linux/mtd/nand.h     |  5 +++++
 2 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 7853b9b0a05e..1b844b8c621f 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -37,6 +37,7 @@
 #include <linux/err.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
@@ -1500,6 +1501,7 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 		mtd->oobavail : mtd->oobsize;
 
 	uint8_t *bufpoi, *oob, *buf;
+	int use_bufpoi;
 	unsigned int max_bitflips = 0;
 	int retry_mode = 0;
 	bool ecc_fail = false;
@@ -1522,9 +1524,20 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 		bytes = min(mtd->writesize - col, readlen);
 		aligned = (bytes == mtd->writesize);
 
+		if (!aligned)
+			use_bufpoi = 1;
+		else if (chip->options & NAND_USE_BOUNCE_BUFFER)
+			use_bufpoi = !virt_addr_valid(buf);
+		else
+			use_bufpoi = 0;
+
 		/* Is the current page in the buffer? */
 		if (realpage != chip->pagebuf || oob) {
-			bufpoi = aligned ? buf : chip->buffers->databuf;
+			bufpoi = use_bufpoi ? chip->buffers->databuf : buf;
+
+			if (use_bufpoi && aligned)
+				pr_debug("%s: using read bounce buffer for buf@%p\n",
+						 __func__, buf);
 
 read_retry:
 			chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
@@ -1546,7 +1559,7 @@ read_retry:
 				ret = chip->ecc.read_page(mtd, chip, bufpoi,
 							  oob_required, page);
 			if (ret < 0) {
-				if (!aligned)
+				if (use_bufpoi)
 					/* Invalidate page cache */
 					chip->pagebuf = -1;
 				break;
@@ -1555,7 +1568,7 @@ read_retry:
 			max_bitflips = max_t(unsigned int, max_bitflips, ret);
 
 			/* Transfer not aligned data */
-			if (!aligned) {
+			if (use_bufpoi) {
 				if (!NAND_HAS_SUBPAGE_READ(chip) && !oob &&
 				    !(mtd->ecc_stats.failed - ecc_failures) &&
 				    (ops->mode != MTD_OPS_RAW)) {
@@ -2375,11 +2388,23 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 		int bytes = mtd->writesize;
 		int cached = writelen > bytes && page != blockmask;
 		uint8_t *wbuf = buf;
+		int use_bufpoi;
+		int part_pagewr = (column || writelen < (mtd->writesize - 1));
+
+		if (part_pagewr)
+			use_bufpoi = 1;
+		else if (chip->options & NAND_USE_BOUNCE_BUFFER)
+			use_bufpoi = !virt_addr_valid(buf);
+		else
+			use_bufpoi = 0;
 
-		/* Partial page write? */
-		if (unlikely(column || writelen < (mtd->writesize - 1))) {
+		/* Partial page write?, or need to use bounce buffer */
+		if (use_bufpoi) {
+			pr_debug("%s: using write bounce buffer for buf@%p\n",
+					 __func__, buf);
 			cached = 0;
-			bytes = min_t(int, bytes - column, (int) writelen);
+			if (part_pagewr)
+				bytes = min_t(int, bytes - column, writelen);
 			chip->pagebuf = -1;
 			memset(chip->buffers->databuf, 0xff, mtd->writesize);
 			memcpy(&chip->buffers->databuf[column], buf, bytes);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 7a922e6c4e4b..2f0af2891f0f 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -175,6 +175,11 @@ typedef enum {
 #define NAND_OWN_BUFFERS	0x00020000
 /* Chip may not exist, so silence any errors in scan */
 #define NAND_SCAN_SILENT_NODEV	0x00040000
+/*
+ * This option could be defined by controller drivers to protect against
+ * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers
+ */
+#define NAND_USE_BOUNCE_BUFFER	0x00080000
 /*
  * Autodetect nand buswidth with readid/onfi.
  * This suppose the driver will configure the hardware in 8 bits mode
-- 
cgit v1.2.3-71-gd317


From 96ba9dd65788a0bd2a7d1e57ec78b7642f0ccc25 Mon Sep 17 00:00:00 2001
From: Vincenzo Aliberti <vincenzo.aliberti@gmail.com>
Date: Thu, 15 May 2014 23:19:32 +0200
Subject: mtd: lpddr: add driver for LPDDR2-NVM PCM memories

Signed-off-by: Vincenzo Aliberti <vincenzo.aliberti@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/lpddr/Kconfig      |  12 +-
 drivers/mtd/lpddr/Makefile     |   1 +
 drivers/mtd/lpddr/lpddr2_nvm.c | 507 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/mtd/mtd-abi.h     |   1 +
 4 files changed, 519 insertions(+), 2 deletions(-)
 create mode 100644 drivers/mtd/lpddr/lpddr2_nvm.c

(limited to 'include')

diff --git a/drivers/mtd/lpddr/Kconfig b/drivers/mtd/lpddr/Kconfig
index 265f969817e3..468f06dea45d 100644
--- a/drivers/mtd/lpddr/Kconfig
+++ b/drivers/mtd/lpddr/Kconfig
@@ -1,5 +1,5 @@
-menu "LPDDR flash memory drivers"
-	depends on MTD!=n
+menu "LPDDR & LPDDR2 PCM memory drivers"
+	depends on MTD
 
 config MTD_LPDDR
 	tristate "Support for LPDDR flash chips"
@@ -17,4 +17,12 @@ config MTD_QINFO_PROBE
 	    Window QINFO interface, permits software to be used for entire
 	    families of devices. This serves similar purpose of CFI on legacy
 	    Flash products
+
+config MTD_LPDDR2_NVM
+	depends on MTD
+	tristate "Support for LPDDR2-NVM flash chips"
+	help
+	  This option enables support of PCM memories with a LPDDR2-NVM
+	  (Low power double data rate 2) interface.
+
 endmenu
diff --git a/drivers/mtd/lpddr/Makefile b/drivers/mtd/lpddr/Makefile
index da48e46b5812..881d440d483e 100644
--- a/drivers/mtd/lpddr/Makefile
+++ b/drivers/mtd/lpddr/Makefile
@@ -4,3 +4,4 @@
 
 obj-$(CONFIG_MTD_QINFO_PROBE)	+= qinfo_probe.o
 obj-$(CONFIG_MTD_LPDDR)	+= lpddr_cmds.o
+obj-$(CONFIG_MTD_LPDDR2_NVM) += lpddr2_nvm.o
diff --git a/drivers/mtd/lpddr/lpddr2_nvm.c b/drivers/mtd/lpddr/lpddr2_nvm.c
new file mode 100644
index 000000000000..063cec40d0ae
--- /dev/null
+++ b/drivers/mtd/lpddr/lpddr2_nvm.c
@@ -0,0 +1,507 @@
+/*
+ * LPDDR2-NVM MTD driver. This module provides read, write, erase, lock/unlock
+ * support for LPDDR2-NVM PCM memories
+ *
+ * Copyright © 2012 Micron Technology, Inc.
+ *
+ * Vincenzo Aliberti <vincenzo.aliberti@gmail.com>
+ * Domenico Manna <domenico.manna@gmail.com>
+ * Many thanks to Andrea Vigilante for initial enabling
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/ioport.h>
+#include <linux/err.h>
+
+/* Parameters */
+#define ERASE_BLOCKSIZE			(0x00020000/2)	/* in Word */
+#define WRITE_BUFFSIZE			(0x00000400/2)	/* in Word */
+#define OW_BASE_ADDRESS			0x00000000	/* OW offset */
+#define BUS_WIDTH			0x00000020	/* x32 devices */
+
+/* PFOW symbols address offset */
+#define PFOW_QUERY_STRING_P		(0x0000/2)	/* in Word */
+#define PFOW_QUERY_STRING_F		(0x0002/2)	/* in Word */
+#define PFOW_QUERY_STRING_O		(0x0004/2)	/* in Word */
+#define PFOW_QUERY_STRING_W		(0x0006/2)	/* in Word */
+
+/* OW registers address */
+#define CMD_CODE_OFS			(0x0080/2)	/* in Word */
+#define CMD_DATA_OFS			(0x0084/2)	/* in Word */
+#define CMD_ADD_L_OFS			(0x0088/2)	/* in Word */
+#define CMD_ADD_H_OFS			(0x008A/2)	/* in Word */
+#define MPR_L_OFS			(0x0090/2)	/* in Word */
+#define MPR_H_OFS			(0x0092/2)	/* in Word */
+#define CMD_EXEC_OFS			(0x00C0/2)	/* in Word */
+#define STATUS_REG_OFS			(0x00CC/2)	/* in Word */
+#define PRG_BUFFER_OFS			(0x0010/2)	/* in Word */
+
+/* Datamask */
+#define MR_CFGMASK			0x8000
+#define SR_OK_DATAMASK			0x0080
+
+/* LPDDR2-NVM Commands */
+#define LPDDR2_NVM_LOCK			0x0061
+#define LPDDR2_NVM_UNLOCK		0x0062
+#define LPDDR2_NVM_SW_PROGRAM		0x0041
+#define LPDDR2_NVM_SW_OVERWRITE		0x0042
+#define LPDDR2_NVM_BUF_PROGRAM		0x00E9
+#define LPDDR2_NVM_BUF_OVERWRITE	0x00EA
+#define LPDDR2_NVM_ERASE		0x0020
+
+/* LPDDR2-NVM Registers offset */
+#define LPDDR2_MODE_REG_DATA		0x0040
+#define LPDDR2_MODE_REG_CFG		0x0050
+
+/*
+ * Internal Type Definitions
+ * pcm_int_data contains memory controller details:
+ * @reg_data : LPDDR2_MODE_REG_DATA register address after remapping
+ * @reg_cfg  : LPDDR2_MODE_REG_CFG register address after remapping
+ * &bus_width: memory bus-width (eg: x16 2 Bytes, x32 4 Bytes)
+ */
+struct pcm_int_data {
+	void __iomem *ctl_regs;
+	int bus_width;
+};
+
+static DEFINE_MUTEX(lpdd2_nvm_mutex);
+
+/*
+ * Build a map_word starting from an u_long
+ */
+static inline map_word build_map_word(u_long myword)
+{
+	map_word val = { {0} };
+	val.x[0] = myword;
+	return val;
+}
+
+/*
+ * Build Mode Register Configuration DataMask based on device bus-width
+ */
+static inline u_int build_mr_cfgmask(u_int bus_width)
+{
+	u_int val = MR_CFGMASK;
+
+	if (bus_width == 0x0004)		/* x32 device */
+		val = val << 16;
+
+	return val;
+}
+
+/*
+ * Build Status Register OK DataMask based on device bus-width
+ */
+static inline u_int build_sr_ok_datamask(u_int bus_width)
+{
+	u_int val = SR_OK_DATAMASK;
+
+	if (bus_width == 0x0004)		/* x32 device */
+		val = (val << 16)+val;
+
+	return val;
+}
+
+/*
+ * Evaluates Overlay Window Control Registers address
+ */
+static inline u_long ow_reg_add(struct map_info *map, u_long offset)
+{
+	u_long val = 0;
+	struct pcm_int_data *pcm_data = map->fldrv_priv;
+
+	val = map->pfow_base + offset*pcm_data->bus_width;
+
+	return val;
+}
+
+/*
+ * Enable lpddr2-nvm Overlay Window
+ * Overlay Window is a memory mapped area containing all LPDDR2-NVM registers
+ * used by device commands as well as uservisible resources like Device Status
+ * Register, Device ID, etc
+ */
+static inline void ow_enable(struct map_info *map)
+{
+	struct pcm_int_data *pcm_data = map->fldrv_priv;
+
+	writel_relaxed(build_mr_cfgmask(pcm_data->bus_width) | 0x18,
+		pcm_data->ctl_regs + LPDDR2_MODE_REG_CFG);
+	writel_relaxed(0x01, pcm_data->ctl_regs + LPDDR2_MODE_REG_DATA);
+}
+
+/*
+ * Disable lpddr2-nvm Overlay Window
+ * Overlay Window is a memory mapped area containing all LPDDR2-NVM registers
+ * used by device commands as well as uservisible resources like Device Status
+ * Register, Device ID, etc
+ */
+static inline void ow_disable(struct map_info *map)
+{
+	struct pcm_int_data *pcm_data = map->fldrv_priv;
+
+	writel_relaxed(build_mr_cfgmask(pcm_data->bus_width) | 0x18,
+		pcm_data->ctl_regs + LPDDR2_MODE_REG_CFG);
+	writel_relaxed(0x02, pcm_data->ctl_regs + LPDDR2_MODE_REG_DATA);
+}
+
+/*
+ * Execute lpddr2-nvm operations
+ */
+static int lpddr2_nvm_do_op(struct map_info *map, u_long cmd_code,
+	u_long cmd_data, u_long cmd_add, u_long cmd_mpr, u_char *buf)
+{
+	map_word add_l = { {0} }, add_h = { {0} }, mpr_l = { {0} },
+		mpr_h = { {0} }, data_l = { {0} }, cmd = { {0} },
+		exec_cmd = { {0} }, sr;
+	map_word data_h = { {0} };	/* only for 2x x16 devices stacked */
+	u_long i, status_reg, prg_buff_ofs;
+	struct pcm_int_data *pcm_data = map->fldrv_priv;
+	u_int sr_ok_datamask = build_sr_ok_datamask(pcm_data->bus_width);
+
+	/* Builds low and high words for OW Control Registers */
+	add_l.x[0]	= cmd_add & 0x0000FFFF;
+	add_h.x[0]	= (cmd_add >> 16) & 0x0000FFFF;
+	mpr_l.x[0]	= cmd_mpr & 0x0000FFFF;
+	mpr_h.x[0]	= (cmd_mpr >> 16) & 0x0000FFFF;
+	cmd.x[0]	= cmd_code & 0x0000FFFF;
+	exec_cmd.x[0]	= 0x0001;
+	data_l.x[0]	= cmd_data & 0x0000FFFF;
+	data_h.x[0]	= (cmd_data >> 16) & 0x0000FFFF; /* only for 2x x16 */
+
+	/* Set Overlay Window Control Registers */
+	map_write(map, cmd, ow_reg_add(map, CMD_CODE_OFS));
+	map_write(map, data_l, ow_reg_add(map, CMD_DATA_OFS));
+	map_write(map, add_l, ow_reg_add(map, CMD_ADD_L_OFS));
+	map_write(map, add_h, ow_reg_add(map, CMD_ADD_H_OFS));
+	map_write(map, mpr_l, ow_reg_add(map, MPR_L_OFS));
+	map_write(map, mpr_h, ow_reg_add(map, MPR_H_OFS));
+	if (pcm_data->bus_width == 0x0004) {	/* 2x16 devices stacked */
+		map_write(map, cmd, ow_reg_add(map, CMD_CODE_OFS) + 2);
+		map_write(map, data_h, ow_reg_add(map, CMD_DATA_OFS) + 2);
+		map_write(map, add_l, ow_reg_add(map, CMD_ADD_L_OFS) + 2);
+		map_write(map, add_h, ow_reg_add(map, CMD_ADD_H_OFS) + 2);
+		map_write(map, mpr_l, ow_reg_add(map, MPR_L_OFS) + 2);
+		map_write(map, mpr_h, ow_reg_add(map, MPR_H_OFS) + 2);
+	}
+
+	/* Fill Program Buffer */
+	if ((cmd_code == LPDDR2_NVM_BUF_PROGRAM) ||
+		(cmd_code == LPDDR2_NVM_BUF_OVERWRITE)) {
+		prg_buff_ofs = (map_read(map,
+			ow_reg_add(map, PRG_BUFFER_OFS))).x[0];
+		for (i = 0; i < cmd_mpr; i++) {
+			map_write(map, build_map_word(buf[i]), map->pfow_base +
+			prg_buff_ofs + i);
+		}
+	}
+
+	/* Command Execute */
+	map_write(map, exec_cmd, ow_reg_add(map, CMD_EXEC_OFS));
+	if (pcm_data->bus_width == 0x0004)	/* 2x16 devices stacked */
+		map_write(map, exec_cmd, ow_reg_add(map, CMD_EXEC_OFS) + 2);
+
+	/* Status Register Check */
+	do {
+		sr = map_read(map, ow_reg_add(map, STATUS_REG_OFS));
+		status_reg = sr.x[0];
+		if (pcm_data->bus_width == 0x0004) {/* 2x16 devices stacked */
+			sr = map_read(map, ow_reg_add(map,
+				STATUS_REG_OFS) + 2);
+			status_reg += sr.x[0] << 16;
+		}
+	} while ((status_reg & sr_ok_datamask) != sr_ok_datamask);
+
+	return (((status_reg & sr_ok_datamask) == sr_ok_datamask) ? 0 : -EIO);
+}
+
+/*
+ * Execute lpddr2-nvm operations @ block level
+ */
+static int lpddr2_nvm_do_block_op(struct mtd_info *mtd, loff_t start_add,
+	uint64_t len, u_char block_op)
+{
+	struct map_info *map = mtd->priv;
+	u_long add, end_add;
+	int ret = 0;
+
+	mutex_lock(&lpdd2_nvm_mutex);
+
+	ow_enable(map);
+
+	add = start_add;
+	end_add = add + len;
+
+	do {
+		ret = lpddr2_nvm_do_op(map, block_op, 0x00, add, add, NULL);
+		if (ret)
+			goto out;
+		add += mtd->erasesize;
+	} while (add < end_add);
+
+out:
+	ow_disable(map);
+	mutex_unlock(&lpdd2_nvm_mutex);
+	return ret;
+}
+
+/*
+ * verify presence of PFOW string
+ */
+static int lpddr2_nvm_pfow_present(struct map_info *map)
+{
+	map_word pfow_val[4];
+	unsigned int found = 1;
+
+	mutex_lock(&lpdd2_nvm_mutex);
+
+	ow_enable(map);
+
+	/* Load string from array */
+	pfow_val[0] = map_read(map, ow_reg_add(map, PFOW_QUERY_STRING_P));
+	pfow_val[1] = map_read(map, ow_reg_add(map, PFOW_QUERY_STRING_F));
+	pfow_val[2] = map_read(map, ow_reg_add(map, PFOW_QUERY_STRING_O));
+	pfow_val[3] = map_read(map, ow_reg_add(map, PFOW_QUERY_STRING_W));
+
+	/* Verify the string loaded vs expected */
+	if (!map_word_equal(map, build_map_word('P'), pfow_val[0]))
+		found = 0;
+	if (!map_word_equal(map, build_map_word('F'), pfow_val[1]))
+		found = 0;
+	if (!map_word_equal(map, build_map_word('O'), pfow_val[2]))
+		found = 0;
+	if (!map_word_equal(map, build_map_word('W'), pfow_val[3]))
+		found = 0;
+
+	ow_disable(map);
+
+	mutex_unlock(&lpdd2_nvm_mutex);
+
+	return found;
+}
+
+/*
+ * lpddr2_nvm driver read method
+ */
+static int lpddr2_nvm_read(struct mtd_info *mtd, loff_t start_add,
+				size_t len, size_t *retlen, u_char *buf)
+{
+	struct map_info *map = mtd->priv;
+
+	mutex_lock(&lpdd2_nvm_mutex);
+
+	*retlen = len;
+
+	map_copy_from(map, buf, start_add, *retlen);
+
+	mutex_unlock(&lpdd2_nvm_mutex);
+	return 0;
+}
+
+/*
+ * lpddr2_nvm driver write method
+ */
+static int lpddr2_nvm_write(struct mtd_info *mtd, loff_t start_add,
+				size_t len, size_t *retlen, const u_char *buf)
+{
+	struct map_info *map = mtd->priv;
+	struct pcm_int_data *pcm_data = map->fldrv_priv;
+	u_long add, current_len, tot_len, target_len, my_data;
+	u_char *write_buf = (u_char *)buf;
+	int ret = 0;
+
+	mutex_lock(&lpdd2_nvm_mutex);
+
+	ow_enable(map);
+
+	/* Set start value for the variables */
+	add = start_add;
+	target_len = len;
+	tot_len = 0;
+
+	while (tot_len < target_len) {
+		if (!(IS_ALIGNED(add, mtd->writesize))) { /* do sw program */
+			my_data = write_buf[tot_len];
+			my_data += (write_buf[tot_len+1]) << 8;
+			if (pcm_data->bus_width == 0x0004) {/* 2x16 devices */
+				my_data += (write_buf[tot_len+2]) << 16;
+				my_data += (write_buf[tot_len+3]) << 24;
+			}
+			ret = lpddr2_nvm_do_op(map, LPDDR2_NVM_SW_OVERWRITE,
+				my_data, add, 0x00, NULL);
+			if (ret)
+				goto out;
+
+			add += pcm_data->bus_width;
+			tot_len += pcm_data->bus_width;
+		} else {		/* do buffer program */
+			current_len = min(target_len - tot_len,
+				(u_long) mtd->writesize);
+			ret = lpddr2_nvm_do_op(map, LPDDR2_NVM_BUF_OVERWRITE,
+				0x00, add, current_len, write_buf + tot_len);
+			if (ret)
+				goto out;
+
+			add += current_len;
+			tot_len += current_len;
+		}
+	}
+
+out:
+	*retlen = tot_len;
+	ow_disable(map);
+	mutex_unlock(&lpdd2_nvm_mutex);
+	return ret;
+}
+
+/*
+ * lpddr2_nvm driver erase method
+ */
+static int lpddr2_nvm_erase(struct mtd_info *mtd, struct erase_info *instr)
+{
+	int ret = lpddr2_nvm_do_block_op(mtd, instr->addr, instr->len,
+		LPDDR2_NVM_ERASE);
+	if (!ret) {
+		instr->state = MTD_ERASE_DONE;
+		mtd_erase_callback(instr);
+	}
+
+	return ret;
+}
+
+/*
+ * lpddr2_nvm driver unlock method
+ */
+static int lpddr2_nvm_unlock(struct mtd_info *mtd, loff_t start_add,
+	uint64_t len)
+{
+	return lpddr2_nvm_do_block_op(mtd, start_add, len, LPDDR2_NVM_UNLOCK);
+}
+
+/*
+ * lpddr2_nvm driver lock method
+ */
+static int lpddr2_nvm_lock(struct mtd_info *mtd, loff_t start_add,
+	uint64_t len)
+{
+	return lpddr2_nvm_do_block_op(mtd, start_add, len, LPDDR2_NVM_LOCK);
+}
+
+/*
+ * lpddr2_nvm driver probe method
+ */
+static int lpddr2_nvm_probe(struct platform_device *pdev)
+{
+	struct map_info *map;
+	struct mtd_info *mtd;
+	struct resource *add_range;
+	struct resource *control_regs;
+	struct pcm_int_data *pcm_data;
+
+	/* Allocate memory control_regs data structures */
+	pcm_data = devm_kzalloc(&pdev->dev, sizeof(*pcm_data), GFP_KERNEL);
+	if (!pcm_data)
+		return -ENOMEM;
+
+	pcm_data->bus_width = BUS_WIDTH;
+
+	/* Allocate memory for map_info & mtd_info data structures */
+	map = devm_kzalloc(&pdev->dev, sizeof(*map), GFP_KERNEL);
+	if (!map)
+		return -ENOMEM;
+
+	mtd = devm_kzalloc(&pdev->dev, sizeof(*mtd), GFP_KERNEL);
+	if (!mtd)
+		return -ENOMEM;
+
+	/* lpddr2_nvm address range */
+	add_range = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	/* Populate map_info data structure */
+	*map = (struct map_info) {
+		.virt		= devm_ioremap_resource(&pdev->dev, add_range),
+		.name		= pdev->dev.init_name,
+		.phys		= add_range->start,
+		.size		= resource_size(add_range),
+		.bankwidth	= pcm_data->bus_width / 2,
+		.pfow_base	= OW_BASE_ADDRESS,
+		.fldrv_priv	= pcm_data,
+	};
+	if (IS_ERR(map->virt))
+		return PTR_ERR(map->virt);
+
+	simple_map_init(map);	/* fill with default methods */
+
+	control_regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	pcm_data->ctl_regs = devm_ioremap_resource(&pdev->dev, control_regs);
+	if (IS_ERR(pcm_data->ctl_regs))
+		return PTR_ERR(pcm_data->ctl_regs);
+
+	/* Populate mtd_info data structure */
+	*mtd = (struct mtd_info) {
+		.name		= pdev->dev.init_name,
+		.type		= MTD_RAM,
+		.priv		= map,
+		.size		= resource_size(add_range),
+		.erasesize	= ERASE_BLOCKSIZE * pcm_data->bus_width,
+		.writesize	= 1,
+		.writebufsize	= WRITE_BUFFSIZE * pcm_data->bus_width,
+		.flags		= (MTD_CAP_NVRAM | MTD_POWERUP_LOCK),
+		._read		= lpddr2_nvm_read,
+		._write		= lpddr2_nvm_write,
+		._erase		= lpddr2_nvm_erase,
+		._unlock	= lpddr2_nvm_unlock,
+		._lock		= lpddr2_nvm_lock,
+	};
+
+	/* Verify the presence of the device looking for PFOW string */
+	if (!lpddr2_nvm_pfow_present(map)) {
+		pr_err("device not recognized\n");
+		return -EINVAL;
+	}
+	/* Parse partitions and register the MTD device */
+	return mtd_device_parse_register(mtd, NULL, NULL, NULL, 0);
+}
+
+/*
+ * lpddr2_nvm driver remove method
+ */
+static int lpddr2_nvm_remove(struct platform_device *pdev)
+{
+	return mtd_device_unregister(dev_get_drvdata(&pdev->dev));
+}
+
+/* Initialize platform_driver data structure for lpddr2_nvm */
+static struct platform_driver lpddr2_nvm_drv = {
+	.driver		= {
+		.name	= "lpddr2_nvm",
+	},
+	.probe		= lpddr2_nvm_probe,
+	.remove		= lpddr2_nvm_remove,
+};
+
+module_platform_driver(lpddr2_nvm_drv);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Vincenzo Aliberti <vincenzo.aliberti@gmail.com>");
+MODULE_DESCRIPTION("MTD driver for LPDDR2-NVM PCM memories");
diff --git a/include/uapi/mtd/mtd-abi.h b/include/uapi/mtd/mtd-abi.h
index e272ea060e38..763bb6950402 100644
--- a/include/uapi/mtd/mtd-abi.h
+++ b/include/uapi/mtd/mtd-abi.h
@@ -109,6 +109,7 @@ struct mtd_write_req {
 #define MTD_CAP_RAM		(MTD_WRITEABLE | MTD_BIT_WRITEABLE | MTD_NO_ERASE)
 #define MTD_CAP_NORFLASH	(MTD_WRITEABLE | MTD_BIT_WRITEABLE)
 #define MTD_CAP_NANDFLASH	(MTD_WRITEABLE)
+#define MTD_CAP_NVRAM		(MTD_WRITEABLE | MTD_BIT_WRITEABLE | MTD_NO_ERASE)
 
 /* Obsolete ECC byte placement modes (used with obsolete MEMGETOOBSEL) */
 #define MTD_NANDECC_OFF		0	// Switch off ECC (Not recommended)
-- 
cgit v1.2.3-71-gd317


From 27c9fd607587e6c3b517590df4cd35ac85f3d0bd Mon Sep 17 00:00:00 2001
From: pekon gupta <pekon@ti.com>
Date: Mon, 19 May 2014 13:24:39 +0530
Subject: mtd: nand: omap: add support for BCH16_ECC - GPMC driver updates

This patch add support for BCH16_ECC in GPMC (controller) driver:
- extends configuration space to include BCH16 registers
- extends parsing of DT binding for selecting BCH16 ecc-scheme

Signed-off-by: Pekon Gupta <pekon@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 arch/arm/mach-omap2/gpmc.c                   | 15 +++++++++++++++
 include/linux/platform_data/mtd-nand-omap2.h |  5 +++++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index ab43755364f5..9b27773db040 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -68,6 +68,9 @@
 #define	GPMC_ECC_BCH_RESULT_1	0x244	/* not available on OMAP2 */
 #define	GPMC_ECC_BCH_RESULT_2	0x248	/* not available on OMAP2 */
 #define	GPMC_ECC_BCH_RESULT_3	0x24c	/* not available on OMAP2 */
+#define	GPMC_ECC_BCH_RESULT_4	0x300	/* not available on OMAP2 */
+#define	GPMC_ECC_BCH_RESULT_5	0x304	/* not available on OMAP2 */
+#define	GPMC_ECC_BCH_RESULT_6	0x308	/* not available on OMAP2 */
 
 /* GPMC ECC control settings */
 #define GPMC_ECC_CTRL_ECCCLEAR		0x100
@@ -666,6 +669,12 @@ void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs)
 					   GPMC_BCH_SIZE * i;
 		reg->gpmc_bch_result3[i] = gpmc_base + GPMC_ECC_BCH_RESULT_3 +
 					   GPMC_BCH_SIZE * i;
+		reg->gpmc_bch_result4[i] = gpmc_base + GPMC_ECC_BCH_RESULT_4 +
+					   i * GPMC_BCH_SIZE;
+		reg->gpmc_bch_result5[i] = gpmc_base + GPMC_ECC_BCH_RESULT_5 +
+					   i * GPMC_BCH_SIZE;
+		reg->gpmc_bch_result6[i] = gpmc_base + GPMC_ECC_BCH_RESULT_6 +
+					   i * GPMC_BCH_SIZE;
 	}
 }
 
@@ -1401,6 +1410,12 @@ static int gpmc_probe_nand_child(struct platform_device *pdev,
 		else
 			gpmc_nand_data->ecc_opt =
 				OMAP_ECC_BCH8_CODE_HW_DETECTION_SW;
+	else if (!strcmp(s, "bch16"))
+		if (gpmc_nand_data->elm_of_node)
+			gpmc_nand_data->ecc_opt =
+				OMAP_ECC_BCH16_CODE_HW;
+		else
+			pr_err("%s: BCH16 requires ELM support\n", __func__);
 	else
 		pr_err("%s: ti,nand-ecc-opt invalid value\n", __func__);
 
diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h
index 3e9dd6676b97..660c029d694f 100644
--- a/include/linux/platform_data/mtd-nand-omap2.h
+++ b/include/linux/platform_data/mtd-nand-omap2.h
@@ -31,6 +31,8 @@ enum omap_ecc {
 	OMAP_ECC_BCH8_CODE_HW_DETECTION_SW,
 	/* 8-bit  ECC calculation by GPMC, Error detection by ELM */
 	OMAP_ECC_BCH8_CODE_HW,
+	/* 16-bit ECC calculation by GPMC, Error detection by ELM */
+	OMAP_ECC_BCH16_CODE_HW,
 };
 
 struct gpmc_nand_regs {
@@ -50,6 +52,9 @@ struct gpmc_nand_regs {
 	void __iomem	*gpmc_bch_result1[GPMC_BCH_NUM_REMAINDER];
 	void __iomem	*gpmc_bch_result2[GPMC_BCH_NUM_REMAINDER];
 	void __iomem	*gpmc_bch_result3[GPMC_BCH_NUM_REMAINDER];
+	void __iomem	*gpmc_bch_result4[GPMC_BCH_NUM_REMAINDER];
+	void __iomem	*gpmc_bch_result5[GPMC_BCH_NUM_REMAINDER];
+	void __iomem	*gpmc_bch_result6[GPMC_BCH_NUM_REMAINDER];
 };
 
 struct omap_nand_platform_data {
-- 
cgit v1.2.3-71-gd317


From 2be589e4b28457f148640dc6addf6da24af64b7f Mon Sep 17 00:00:00 2001
From: pekon gupta <pekon@ti.com>
Date: Mon, 19 May 2014 13:24:40 +0530
Subject: mtd: nand: omap: add support for BCH16_ECC - ELM driver updates

ELM hardware engine is used to detect ECC errors for BCHx ecc-schemes
(like BCH4/BCH8/BCH16). This patch extends configuration of ELM registers
for adding support of BCH16_HW ecc-scheme.

Signed-off-by: Pekon Gupta <pekon@ti.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/elm.c         | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/platform_data/elm.h |  3 ++-
 2 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/devices/elm.c
index 0a037b15c11b..7df86948e6d4 100644
--- a/drivers/mtd/devices/elm.c
+++ b/drivers/mtd/devices/elm.c
@@ -213,6 +213,28 @@ static void elm_load_syndrome(struct elm_info *info,
 				val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12;
 				elm_write_reg(info, offset, val);
 				break;
+			case BCH16_ECC:
+				val = cpu_to_be32(*(u32 *) &ecc[22]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[18]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[14]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[10]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[6]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[2]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[0]) >> 16;
+				elm_write_reg(info, offset, val);
+				break;
 			default:
 				pr_err("invalid config bch_type\n");
 			}
@@ -436,6 +458,13 @@ static int elm_context_save(struct elm_info *info)
 	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
 		offset = i * SYNDROME_FRAGMENT_REG_SIZE;
 		switch (bch_type) {
+		case BCH16_ECC:
+			regs->elm_syndrome_fragment_6[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_6 + offset);
+			regs->elm_syndrome_fragment_5[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_5 + offset);
+			regs->elm_syndrome_fragment_4[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_4 + offset);
 		case BCH8_ECC:
 			regs->elm_syndrome_fragment_3[i] = elm_read_reg(info,
 					ELM_SYNDROME_FRAGMENT_3 + offset);
@@ -474,6 +503,13 @@ static int elm_context_restore(struct elm_info *info)
 	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
 		offset = i * SYNDROME_FRAGMENT_REG_SIZE;
 		switch (bch_type) {
+		case BCH16_ECC:
+			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_6 + offset,
+					regs->elm_syndrome_fragment_6[i]);
+			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_5 + offset,
+					regs->elm_syndrome_fragment_5[i]);
+			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_4 + offset,
+					regs->elm_syndrome_fragment_4[i]);
 		case BCH8_ECC:
 			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_3 + offset,
 					regs->elm_syndrome_fragment_3[i]);
diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h
index 4edb40676b3f..780d1e97f620 100644
--- a/include/linux/platform_data/elm.h
+++ b/include/linux/platform_data/elm.h
@@ -21,6 +21,7 @@
 enum bch_ecc {
 	BCH4_ECC = 0,
 	BCH8_ECC,
+	BCH16_ECC,
 };
 
 /* ELM support 8 error syndrome process */
@@ -38,7 +39,7 @@ struct elm_errorvec {
 	bool error_reported;
 	bool error_uncorrectable;
 	int error_count;
-	int error_loc[ERROR_VECTOR_MAX];
+	int error_loc[16];
 };
 
 void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc,
-- 
cgit v1.2.3-71-gd317


From 7d10d2610cc02d432168ca0c5d964cd9e85c1b06 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Mon, 19 May 2014 09:21:09 +0200
Subject: net: cdc_ncm: fix 64bit division build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The upper timer_interval limit is arbitrary and much higher
than anything usable in the real world.  Reducing it from 15s
to ~4s to make the timer_interval fit in an u32 does not make
much difference.  The limit is still outside the practical
bounds.

This eliminates the need for a 64bit timer_interval, fixing a
build error related to 64bit division:

 drivers/built-in.o: In function `cdc_ncm_get_coalesce':
 ak8975.c:(.text+0x1ac994): undefined reference to `__aeabi_uldivmod'

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 4 ++--
 include/linux/usb/cdc_ncm.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 2d0caf1eea25..ad2a386a6e92 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -138,7 +138,7 @@ static int cdc_ncm_get_coalesce(struct net_device *netdev,
 	ec->tx_max_coalesced_frames = ctx->tx_max / ctx->max_datagram_size;
 
 	/* the timer will fire CDC_NCM_TIMER_PENDING_CNT times in a row */
-	ec->tx_coalesce_usecs = (ctx->timer_interval * CDC_NCM_TIMER_PENDING_CNT) / NSEC_PER_USEC;
+	ec->tx_coalesce_usecs = ctx->timer_interval / (NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT);
 	return 0;
 }
 
@@ -164,7 +164,7 @@ static int cdc_ncm_set_coalesce(struct net_device *netdev,
 		return -EINVAL;
 
 	spin_lock_bh(&ctx->mtx);
-	ctx->timer_interval = ec->tx_coalesce_usecs * NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT;
+	ctx->timer_interval = ec->tx_coalesce_usecs * (NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT);
 	if (!ctx->timer_interval)
 		ctx->tx_timer_pending = 0;
 	spin_unlock_bh(&ctx->mtx);
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 8c5e38819828..7c9b484735c5 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -78,7 +78,7 @@
 #define	CDC_NCM_TIMER_PENDING_CNT		2
 #define CDC_NCM_TIMER_INTERVAL_USEC		400UL
 #define CDC_NCM_TIMER_INTERVAL_MIN		5UL
-#define CDC_NCM_TIMER_INTERVAL_MAX		(15UL * USEC_PER_SEC)
+#define CDC_NCM_TIMER_INTERVAL_MAX		(U32_MAX / NSEC_PER_USEC)
 
 #define cdc_ncm_comm_intf_is_mbim(x)  ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \
 				       (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE)
@@ -104,7 +104,7 @@ struct cdc_ncm_ctx {
 	spinlock_t mtx;
 	atomic_t stop;
 
-	u64 timer_interval;
+	u32 timer_interval;
 	u32 max_ndp_size;
 
 	u32 tx_timer_pending;
-- 
cgit v1.2.3-71-gd317


From 3796ce1d4d4b330a75005c5eda105603ce9d4071 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 19 May 2014 22:42:32 +0200
Subject: pwm: add period and polarity to struct pwm_lookup

Add period and polarity members to struct pwm_lookup so that platforms
using the lookup table can be treated the same way as those using the
device tree.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/core.c  | 8 +++++++-
 include/linux/pwm.h | 2 ++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index a80471399c20..4b66bf09ee55 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -661,10 +661,16 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id)
 		}
 	}
 
+	mutex_unlock(&pwm_lookup_lock);
+
 	if (chip)
 		pwm = pwm_request_from_chip(chip, index, con_id ?: dev_id);
+	if (IS_ERR(pwm))
+		return pwm;
+
+	pwm_set_period(pwm, p->period);
+	pwm_set_polarity(pwm, p->polarity);
 
-	mutex_unlock(&pwm_lookup_lock);
 
 	return pwm;
 }
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 4717f54051cb..2f45e2fe5b93 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -274,6 +274,8 @@ struct pwm_lookup {
 	unsigned int index;
 	const char *dev_id;
 	const char *con_id;
+	unsigned int period;
+	enum pwm_polarity polarity;
 };
 
 #define PWM_LOOKUP(_provider, _index, _dev_id, _con_id)	\
-- 
cgit v1.2.3-71-gd317


From cca674d47e59665630f3005291b61bb883015fc5 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@open-mesh.com>
Date: Mon, 19 May 2014 21:53:20 +0200
Subject: mac80211: export the expected throughput

Add get_expected_throughput() API to mac80211 so that each
driver can implement its own version based on the RC
algorithm they are using (might be using an HW RC algo).
The API returns a value expressed in Kbps.

Also, add the new get_expected_throughput() member
to the rate_control_ops structure in order to be
able to query the RC algorithm (this patch provides an
implementation of this API for both minstrel and
minstrel_ht).

The related member in the station_info object is now
filled accordingly when dumping a station.

Cc: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h             |  7 +++++++
 net/mac80211/cfg.c                 | 13 +++++++++++++
 net/mac80211/driver-ops.h          | 13 +++++++++++++
 net/mac80211/rc80211_minstrel.c    | 12 ++++++++++++
 net/mac80211/rc80211_minstrel_ht.c | 17 +++++++++++++++++
 net/mac80211/trace.h               | 32 ++++++++++++++++++++++++++++++++
 6 files changed, 94 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a34f26a4ed18..2c78997bc48d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2769,6 +2769,10 @@ enum ieee80211_roc_type {
  *	information in bss_conf is set up and the beacon can be retrieved. A
  *	channel context is bound before this is called.
  * @leave_ibss: Leave the IBSS again.
+ *
+ * @get_expected_throughput: extract the expected throughput towards the
+ *	specified station. The returned value is expressed in Kbps. It returns 0
+ *	if the RC algorithm does not have proper data to provide.
  */
 struct ieee80211_ops {
 	void (*tx)(struct ieee80211_hw *hw,
@@ -2962,6 +2966,7 @@ struct ieee80211_ops {
 
 	int (*join_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
 	void (*leave_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
+	u32 (*get_expected_throughput)(struct ieee80211_sta *sta);
 };
 
 /**
@@ -4535,6 +4540,8 @@ struct rate_control_ops {
 	void (*add_sta_debugfs)(void *priv, void *priv_sta,
 				struct dentry *dir);
 	void (*remove_sta_debugfs)(void *priv, void *priv_sta);
+
+	u32 (*get_expected_throughput)(void *priv_sta);
 };
 
 static inline int rate_supported(struct ieee80211_sta *sta,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ac45304590d8..d7513a503be1 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -472,8 +472,10 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
+	struct rate_control_ref *ref = local->rate_ctrl;
 	struct timespec uptime;
 	u64 packets = 0;
+	u32 thr = 0;
 	int i, ac;
 
 	sinfo->generation = sdata->local->sta_generation;
@@ -587,6 +589,17 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED);
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
 		sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
+
+	/* check if the driver has a SW RC implementation */
+	if (ref && ref->ops->get_expected_throughput)
+		thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
+	else
+		thr = drv_get_expected_throughput(local, &sta->sta);
+
+	if (thr != 0) {
+		sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT;
+		sinfo->expected_throughput = thr;
+	}
 }
 
 static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = {
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index df1d50291344..696ef78b1fb7 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1156,4 +1156,17 @@ static inline void drv_leave_ibss(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
+static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
+					      struct ieee80211_sta *sta)
+{
+	u32 ret = 0;
+
+	trace_drv_get_expected_throughput(sta);
+	if (local->ops->get_expected_throughput)
+		ret = local->ops->get_expected_throughput(sta);
+	trace_drv_return_u32(local, ret);
+
+	return ret;
+}
+
 #endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 26fd94fa0aed..1c1469c36dca 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -657,6 +657,17 @@ minstrel_free(void *priv)
 	kfree(priv);
 }
 
+static u32 minstrel_get_expected_throughput(void *priv_sta)
+{
+	struct minstrel_sta_info *mi = priv_sta;
+	int idx = mi->max_tp_rate[0];
+
+	/* convert pkt per sec in kbps (1200 is the average pkt size used for
+	 * computing cur_tp
+	 */
+	return MINSTREL_TRUNC(mi->r[idx].cur_tp) * 1200 * 8 / 1024;
+}
+
 const struct rate_control_ops mac80211_minstrel = {
 	.name = "minstrel",
 	.tx_status = minstrel_tx_status,
@@ -670,6 +681,7 @@ const struct rate_control_ops mac80211_minstrel = {
 	.add_sta_debugfs = minstrel_add_sta_debugfs,
 	.remove_sta_debugfs = minstrel_remove_sta_debugfs,
 #endif
+	.get_expected_throughput = minstrel_get_expected_throughput,
 };
 
 int __init
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 4e916ad51a20..85c1e74b7714 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1032,6 +1032,22 @@ minstrel_ht_free(void *priv)
 	mac80211_minstrel.free(priv);
 }
 
+static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
+{
+	struct minstrel_ht_sta_priv *msp = priv_sta;
+	struct minstrel_ht_sta *mi = &msp->ht;
+	int i, j;
+
+	if (!msp->is_ht)
+		return mac80211_minstrel.get_expected_throughput(priv_sta);
+
+	i = mi->max_tp_rate / MCS_GROUP_RATES;
+	j = mi->max_tp_rate % MCS_GROUP_RATES;
+
+	/* convert cur_tp from pkt per second in kbps */
+	return mi->groups[i].rates[j].cur_tp * AVG_PKT_SIZE * 8 / 1024;
+}
+
 static const struct rate_control_ops mac80211_minstrel_ht = {
 	.name = "minstrel_ht",
 	.tx_status = minstrel_ht_tx_status,
@@ -1046,6 +1062,7 @@ static const struct rate_control_ops mac80211_minstrel_ht = {
 	.add_sta_debugfs = minstrel_ht_add_sta_debugfs,
 	.remove_sta_debugfs = minstrel_ht_remove_sta_debugfs,
 #endif
+	.get_expected_throughput = minstrel_ht_get_expected_throughput,
 };
 
 
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index a0b0aea76525..942f64b8ce0e 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -184,6 +184,20 @@ TRACE_EVENT(drv_return_bool,
 		  "true" : "false")
 );
 
+TRACE_EVENT(drv_return_u32,
+	TP_PROTO(struct ieee80211_local *local, u32 ret),
+	TP_ARGS(local, ret),
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u32, ret)
+	),
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->ret = ret;
+	),
+	TP_printk(LOCAL_PR_FMT " - %u", LOCAL_PR_ARG, __entry->ret)
+);
+
 TRACE_EVENT(drv_return_u64,
 	TP_PROTO(struct ieee80211_local *local, u64 ret),
 	TP_ARGS(local, ret),
@@ -1499,6 +1513,24 @@ DEFINE_EVENT(local_sdata_evt, drv_leave_ibss,
 	TP_ARGS(local, sdata)
 );
 
+TRACE_EVENT(drv_get_expected_throughput,
+	TP_PROTO(struct ieee80211_sta *sta),
+
+	TP_ARGS(sta),
+
+	TP_STRUCT__entry(
+		STA_ENTRY
+	),
+
+	TP_fast_assign(
+		STA_ASSIGN;
+	),
+
+	TP_printk(
+		STA_PR_FMT, STA_PR_ARG
+	)
+);
+
 /*
  * Tracing for API calls that drivers call.
  */
-- 
cgit v1.2.3-71-gd317


From 7406353d43c8e2faf478721e87aeb6f2f9685de0 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@open-mesh.com>
Date: Mon, 19 May 2014 21:53:21 +0200
Subject: cfg80211: implement cfg80211_get_station cfg80211 API

Implement and export the new cfg80211_get_station() API.
This utility can be used by other kernel modules to obtain
detailed information about a given wireless station.

It will be in particular useful to batman-adv which will
implement a wireless rate based metric.

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h  | 13 +++++++++++++
 net/wireless/rdev-ops.h |  2 +-
 net/wireless/util.c     | 18 ++++++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 857d6476a128..a75fabd18502 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1074,6 +1074,19 @@ struct station_info {
 	 */
 };
 
+/**
+ * cfg80211_get_station - retrieve information about a given station
+ * @dev: the device where the station is supposed to be connected to
+ * @mac_addr: the mac address of the station of interest
+ * @sinfo: pointer to the structure to fill with the information
+ *
+ * Returns 0 on success and sinfo is filled with the available information
+ * otherwise returns a negative error code and the content of sinfo has to be
+ * considered undefined.
+ */
+int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
+			 struct station_info *sinfo);
+
 /**
  * enum monitor_flags - monitor flags
  *
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 00cdf73ba6c4..d95bbe348138 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -199,7 +199,7 @@ static inline int rdev_change_station(struct cfg80211_registered_device *rdev,
 }
 
 static inline int rdev_get_station(struct cfg80211_registered_device *rdev,
-				   struct net_device *dev, u8 *mac,
+				   struct net_device *dev, const u8 *mac,
 				   struct station_info *sinfo)
 {
 	int ret;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index fa61ac9c9b26..728f1c0dc70d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1546,6 +1546,24 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy)
 }
 EXPORT_SYMBOL(ieee80211_get_num_supported_channels);
 
+int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
+			 struct station_info *sinfo)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+
+	wdev = dev->ieee80211_ptr;
+	if (!wdev)
+		return -EOPNOTSUPP;
+
+	rdev = wiphy_to_rdev(wdev->wiphy);
+	if (!rdev->ops->get_station)
+		return -EOPNOTSUPP;
+
+	return rdev_get_station(rdev, dev, mac_addr, sinfo);
+}
+EXPORT_SYMBOL(cfg80211_get_station);
+
 /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
 /* Ethernet-II snap header (RFC1042 for most EtherTypes) */
 const unsigned char rfc1042_header[] __aligned(2) =
-- 
cgit v1.2.3-71-gd317


From dc671157139918eaf61f73db1bd6dd02960b66e2 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 19 May 2014 22:42:34 +0200
Subject: pwm: renesas-tpu: remove unused struct tpu_pwm_platform_data

The struct is not used anymore and the polarity initialization will be
done using the PWM lookup table (or device tree).

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-renesas-tpu.c                 | 19 +++----------------
 include/linux/platform_data/pwm-renesas-tpu.h | 16 ----------------
 2 files changed, 3 insertions(+), 32 deletions(-)
 delete mode 100644 include/linux/platform_data/pwm-renesas-tpu.h

(limited to 'include')

diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c
index cc13ff4cfab0..3b71b42e89d5 100644
--- a/drivers/pwm/pwm-renesas-tpu.c
+++ b/drivers/pwm/pwm-renesas-tpu.c
@@ -21,13 +21,14 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
-#include <linux/platform_data/pwm-renesas-tpu.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
+#define TPU_CHANNEL_MAX		4
+
 #define TPU_TSTR		0x00	/* Timer start register (shared) */
 
 #define TPU_TCRn		0x00	/* Timer control register */
@@ -87,7 +88,6 @@ struct tpu_pwm_device {
 
 struct tpu_device {
 	struct platform_device *pdev;
-	enum pwm_polarity polarities[TPU_CHANNEL_MAX];
 	struct pwm_chip chip;
 	spinlock_t lock;
 
@@ -229,7 +229,7 @@ static int tpu_pwm_request(struct pwm_chip *chip, struct pwm_device *_pwm)
 
 	pwm->tpu = tpu;
 	pwm->channel = _pwm->hwpwm;
-	pwm->polarity = tpu->polarities[pwm->channel];
+	pwm->polarity = PWM_POLARITY_NORMAL;
 	pwm->prescaler = 0;
 	pwm->period = 0;
 	pwm->duty = 0;
@@ -388,16 +388,6 @@ static const struct pwm_ops tpu_pwm_ops = {
  * Probe and remove
  */
 
-static void tpu_parse_pdata(struct tpu_device *tpu)
-{
-	struct tpu_pwm_platform_data *pdata = tpu->pdev->dev.platform_data;
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(tpu->polarities); ++i)
-		tpu->polarities[i] = pdata ? pdata->channels[i].polarity
-				   : PWM_POLARITY_NORMAL;
-}
-
 static int tpu_probe(struct platform_device *pdev)
 {
 	struct tpu_device *tpu;
@@ -411,9 +401,6 @@ static int tpu_probe(struct platform_device *pdev)
 	spin_lock_init(&tpu->lock);
 	tpu->pdev = pdev;
 
-	/* Initialize device configuration from platform data. */
-	tpu_parse_pdata(tpu);
-
 	/* Map memory, get clock and pin control. */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	tpu->base = devm_ioremap_resource(&pdev->dev, res);
diff --git a/include/linux/platform_data/pwm-renesas-tpu.h b/include/linux/platform_data/pwm-renesas-tpu.h
deleted file mode 100644
index a7220b10ddab..000000000000
--- a/include/linux/platform_data/pwm-renesas-tpu.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __PWM_RENESAS_TPU_H__
-#define __PWM_RENESAS_TPU_H__
-
-#include <linux/pwm.h>
-
-#define TPU_CHANNEL_MAX		4
-
-struct tpu_pwm_channel_data {
-	enum pwm_polarity polarity;
-};
-
-struct tpu_pwm_platform_data {
-	struct tpu_pwm_channel_data channels[TPU_CHANNEL_MAX];
-};
-
-#endif /* __PWM_RENESAS_TPU_H__ */
-- 
cgit v1.2.3-71-gd317


From 4284402924cc55e182008ca7e9d4fb1e891ff5ae Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 19 May 2014 22:42:37 +0200
Subject: pwm: modify PWM_LOOKUP to initialize all struct pwm_lookup members

Now that PWM_LOOKUP is not used anymore, modify it to initialize all the
members of struct pwm_lookup.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/pwm.txt | 3 ++-
 include/linux/pwm.h   | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/pwm.txt b/Documentation/pwm.txt
index 0527f615b115..ca895fd211e4 100644
--- a/Documentation/pwm.txt
+++ b/Documentation/pwm.txt
@@ -19,7 +19,8 @@ should instead register a static mapping that can be used to match PWM
 consumers to providers, as given in the following example:
 
 	static struct pwm_lookup board_pwm_lookup[] = {
-		PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL),
+		PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL,
+			   50000, PWM_POLARITY_NORMAL),
 	};
 
 	static void __init board_init(void)
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 2f45e2fe5b93..e90628cac8fa 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -278,12 +278,14 @@ struct pwm_lookup {
 	enum pwm_polarity polarity;
 };
 
-#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id)	\
+#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id, _period, _polarity) \
 	{						\
 		.provider = _provider,			\
 		.index = _index,			\
 		.dev_id = _dev_id,			\
 		.con_id = _con_id,			\
+		.period = _period,			\
+		.polarity = _polarity			\
 	}
 
 #if IS_ENABLED(CONFIG_PWM)
-- 
cgit v1.2.3-71-gd317


From 89dd6a4b34c5a7658a4017c93b2fe66c964399d0 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 15 May 2014 15:32:12 +0200
Subject: drm/irq: Add kms-native crtc interface functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to start somewhere ... With this the only places left in i915
where we use pipe integers is in the interrupt handling code. And
there it actually makes some amount of sense.

v2:
- Polish kerneldoc a bit (Thierry).
- Drop "dev" parameter since it's unecessary.
- Split out i915 changes (Thierry).

Cc: Thierry Reding <thierry.reding@gmail.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_irq.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++
 include/drm/drmP.h        |  5 +++
 2 files changed, 82 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index f3dafccca456..5ba9a614e7ad 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -916,6 +916,8 @@ static int drm_vblank_enable(struct drm_device *dev, int crtc)
  * Acquire a reference count on vblank events to avoid having them disabled
  * while in use.
  *
+ * This is the legacy version of drm_crtc_vblank_get().
+ *
  * Returns:
  * Zero on success, nonzero on failure.
  */
@@ -940,6 +942,24 @@ int drm_vblank_get(struct drm_device *dev, int crtc)
 }
 EXPORT_SYMBOL(drm_vblank_get);
 
+/**
+ * drm_crtc_vblank_get - get a reference count on vblank events
+ * @crtc: which CRTC to own
+ *
+ * Acquire a reference count on vblank events to avoid having them disabled
+ * while in use.
+ *
+ * This is the native kms version of drm_vblank_off().
+ *
+ * Returns:
+ * Zero on success, nonzero on failure.
+ */
+int drm_crtc_vblank_get(struct drm_crtc *crtc)
+{
+	return drm_vblank_get(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_vblank_get);
+
 /**
  * drm_vblank_put - give up ownership of vblank events
  * @dev: DRM device
@@ -947,6 +967,8 @@ EXPORT_SYMBOL(drm_vblank_get);
  *
  * Release ownership of a given vblank counter, turning off interrupts
  * if possible. Disable interrupts after drm_vblank_offdelay milliseconds.
+ *
+ * This is the legacy version of drm_crtc_vblank_put().
  */
 void drm_vblank_put(struct drm_device *dev, int crtc)
 {
@@ -960,6 +982,21 @@ void drm_vblank_put(struct drm_device *dev, int crtc)
 }
 EXPORT_SYMBOL(drm_vblank_put);
 
+/**
+ * drm_crtc_vblank_put - give up ownership of vblank events
+ * @crtc: which counter to give up
+ *
+ * Release ownership of a given vblank counter, turning off interrupts
+ * if possible. Disable interrupts after drm_vblank_offdelay milliseconds.
+ *
+ * This is the native kms version of drm_vblank_put().
+ */
+void drm_crtc_vblank_put(struct drm_crtc *crtc)
+{
+	drm_vblank_put(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_vblank_put);
+
 /**
  * drm_vblank_off - disable vblank events on a CRTC
  * @dev: DRM device
@@ -971,6 +1008,8 @@ EXPORT_SYMBOL(drm_vblank_put);
  *
  * Drivers must use this function when the hardware vblank counter can get
  * reset, e.g. when suspending.
+ *
+ * This is the legacy version of drm_crtc_vblank_off().
  */
 void drm_vblank_off(struct drm_device *dev, int crtc)
 {
@@ -1003,6 +1042,25 @@ void drm_vblank_off(struct drm_device *dev, int crtc)
 }
 EXPORT_SYMBOL(drm_vblank_off);
 
+/**
+ * drm_crtc_vblank_off - disable vblank events on a CRTC
+ * @crtc: CRTC in question
+ *
+ * Drivers can use this function to shut down the vblank interrupt handling when
+ * disabling a crtc. This function ensures that the latest vblank frame count is
+ * stored so that drm_vblank_on can restore it again.
+ *
+ * Drivers must use this function when the hardware vblank counter can get
+ * reset, e.g. when suspending.
+ *
+ * This is the native kms version of drm_vblank_off().
+ */
+void drm_crtc_vblank_off(struct drm_crtc *crtc)
+{
+	drm_vblank_off(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_vblank_off);
+
 /**
  * drm_vblank_on - enable vblank events on a CRTC
  * @dev: DRM device
@@ -1012,6 +1070,8 @@ EXPORT_SYMBOL(drm_vblank_off);
  * drm_vblank_off() again. Note that calls to drm_vblank_on() and
  * drm_vblank_off() can be unbalanced and so can also be unconditionaly called
  * in driver load code to reflect the current hardware state of the crtc.
+ *
+ * This is the legacy version of drm_crtc_vblank_on().
  */
 void drm_vblank_on(struct drm_device *dev, int crtc)
 {
@@ -1025,6 +1085,23 @@ void drm_vblank_on(struct drm_device *dev, int crtc)
 }
 EXPORT_SYMBOL(drm_vblank_on);
 
+/**
+ * drm_crtc_vblank_on - enable vblank events on a CRTC
+ * @crtc: CRTC in question
+ *
+ * This functions restores the vblank interrupt state captured with
+ * drm_vblank_off() again. Note that calls to drm_vblank_on() and
+ * drm_vblank_off() can be unbalanced and so can also be unconditionaly called
+ * in driver load code to reflect the current hardware state of the crtc.
+ *
+ * This is the native kms version of drm_vblank_on().
+ */
+void drm_crtc_vblank_on(struct drm_crtc *crtc)
+{
+	drm_vblank_on(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_vblank_on);
+
 /**
  * drm_vblank_pre_modeset - account for vblanks across mode sets
  * @dev: DRM device
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 7339b2b00724..76ccaabd0418 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1359,9 +1359,14 @@ extern void drm_send_vblank_event(struct drm_device *dev, int crtc,
 extern bool drm_handle_vblank(struct drm_device *dev, int crtc);
 extern int drm_vblank_get(struct drm_device *dev, int crtc);
 extern void drm_vblank_put(struct drm_device *dev, int crtc);
+extern int drm_crtc_vblank_get(struct drm_crtc *crtc);
+extern void drm_crtc_vblank_put(struct drm_crtc *crtc);
 extern void drm_vblank_off(struct drm_device *dev, int crtc);
 extern void drm_vblank_on(struct drm_device *dev, int crtc);
+extern void drm_crtc_vblank_off(struct drm_crtc *crtc);
+extern void drm_crtc_vblank_on(struct drm_crtc *crtc);
 extern void drm_vblank_cleanup(struct drm_device *dev);
+
 extern u32 drm_get_last_vbltimestamp(struct drm_device *dev, int crtc,
 				     struct timeval *tvblank, unsigned flags);
 extern int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev,
-- 
cgit v1.2.3-71-gd317


From 5b3e507820c6e120bc2680c0d35f9d9d81fcb98d Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Wed, 14 May 2014 14:58:08 -0300
Subject: mtd: nand: pxa3xx: Use ECC strength and step size devicetree binding

This commit adds support for the user to specify the ECC strength
and step size through the devicetree. We keep the previous behavior,
when there is no DT parameter provided.

Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/pxa3xx_nand.c                | 17 +++++++++++++++--
 include/linux/platform_data/mtd-nand-pxa3xx.h |  3 +++
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 3b66a6460d67..2a9add06c2d5 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -1519,8 +1519,13 @@ KEEP_CONFIG:
 		}
 	}
 
-	ecc_strength = chip->ecc_strength_ds;
-	ecc_step = chip->ecc_step_ds;
+	if (pdata->ecc_strength && pdata->ecc_step_size) {
+		ecc_strength = pdata->ecc_strength;
+		ecc_step = pdata->ecc_step_size;
+	} else {
+		ecc_strength = chip->ecc_strength_ds;
+		ecc_step = chip->ecc_step_ds;
+	}
 
 	/* Set default ECC strength requirements on non-ONFI devices */
 	if (ecc_strength < 1 && ecc_step < 1) {
@@ -1729,6 +1734,14 @@ static int pxa3xx_nand_probe_dt(struct platform_device *pdev)
 	of_property_read_u32(np, "num-cs", &pdata->num_cs);
 	pdata->flash_bbt = of_get_nand_on_flash_bbt(np);
 
+	pdata->ecc_strength = of_get_nand_ecc_strength(np);
+	if (pdata->ecc_strength < 0)
+		pdata->ecc_strength = 0;
+
+	pdata->ecc_step_size = of_get_nand_ecc_step_size(np);
+	if (pdata->ecc_step_size < 0)
+		pdata->ecc_step_size = 0;
+
 	pdev->dev.platform_data = pdata;
 
 	return 0;
diff --git a/include/linux/platform_data/mtd-nand-pxa3xx.h b/include/linux/platform_data/mtd-nand-pxa3xx.h
index a94147124929..ac4ea2e641c7 100644
--- a/include/linux/platform_data/mtd-nand-pxa3xx.h
+++ b/include/linux/platform_data/mtd-nand-pxa3xx.h
@@ -58,6 +58,9 @@ struct pxa3xx_nand_platform_data {
 	/* use an flash-based bad block table */
 	bool	flash_bbt;
 
+	/* requested ECC strength and ECC step size */
+	int ecc_strength, ecc_step_size;
+
 	const struct mtd_partition		*parts[NUM_CHIP_SELECT];
 	unsigned int				nr_parts[NUM_CHIP_SELECT];
 
-- 
cgit v1.2.3-71-gd317


From 5fe821a9dee241fa450703ab7015d970ee0cfb8d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Mon, 19 May 2014 14:56:14 -0700
Subject: net: filter: cleanup invocation of internal BPF

Kernel API for classic BPF socket filters is:

sk_unattached_filter_create() - validate classic BPF, convert, JIT
SK_RUN_FILTER() - run it
sk_unattached_filter_destroy() - destroy socket filter

Cleanup internal BPF kernel API as following:

sk_filter_select_runtime() - final step of internal BPF creation.
  Try to JIT internal BPF program, if JIT is not available select interpreter
SK_RUN_FILTER() - run it
sk_filter_free() - free internal BPF program

Disallow direct calls to BPF interpreter. Execution of the BPF program should
be done with SK_RUN_FILTER() macro.

Example of internal BPF create, run, destroy:

  struct sk_filter *fp;

  fp = kzalloc(sk_filter_size(prog_len), GFP_KERNEL);
  memcpy(fp->insni, prog, prog_len * sizeof(fp->insni[0]));
  fp->len = prog_len;

  sk_filter_select_runtime(fp);

  SK_RUN_FILTER(fp, ctx);

  sk_filter_free(fp);

Sockets, seccomp, testsuite, tracing are using different ways to populate
sk_filter, so first steps of program creation are not common.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  6 ++----
 kernel/seccomp.c       |  6 ++----
 lib/test_bpf.c         |  4 ++--
 net/core/filter.c      | 44 ++++++++++++++++++++++++++++----------------
 4 files changed, 34 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9d5ae0a2c954..7977b3958e25 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -184,10 +184,8 @@ static inline unsigned int sk_filter_size(unsigned int proglen)
 
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
-u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
-			      const struct sock_filter_int *insni);
-u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
-			  const struct sock_filter_int *insni);
+void sk_filter_select_runtime(struct sk_filter *fp);
+void sk_filter_free(struct sk_filter *fp);
 
 int sk_convert_filter(struct sock_filter *prog, int len,
 		      struct sock_filter_int *new_prog, int *new_len);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 7e02d624cc50..1036b6f2fded 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -273,10 +273,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 
 	atomic_set(&filter->usage, 1);
 	filter->prog->len = new_len;
-	filter->prog->bpf_func = (void *)sk_run_filter_int_seccomp;
 
-	/* JIT internal BPF into native HW instructions */
-	bpf_int_jit_compile(filter->prog);
+	sk_filter_select_runtime(filter->prog);
 
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its
@@ -340,7 +338,7 @@ void put_seccomp_filter(struct task_struct *tsk)
 	while (orig && atomic_dec_and_test(&orig->usage)) {
 		struct seccomp_filter *freeme = orig;
 		orig = orig->prev;
-		bpf_jit_free(freeme->prog);
+		sk_filter_free(freeme->prog);
 		kfree(freeme);
 	}
 }
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 3603ebcd5d65..e160934430eb 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1489,7 +1489,7 @@ static __init int test_bpf(void)
 			memcpy(fp_ext->insns, tests[i].insns_int,
 			       fprog.len * 8);
 			fp->len = fprog.len;
-			fp->bpf_func = sk_run_filter_int_skb;
+			sk_filter_select_runtime(fp);
 		} else {
 			err = sk_unattached_filter_create(&fp, &fprog);
 			if (tests[i].data_type == EXPECTED_FAIL) {
@@ -1516,7 +1516,7 @@ static __init int test_bpf(void)
 		if (tests[i].data_type != SKB_INT)
 			sk_unattached_filter_destroy(fp);
 		else
-			kfree(fp);
+			sk_filter_free(fp);
 
 		if (err) {
 			pr_cont("FAIL %d\n", err);
diff --git a/net/core/filter.c b/net/core/filter.c
index 32c5b44c537e..7067cb240d3e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -153,7 +153,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
  * keep, 0 for none. @ctx is the data we are operating on, @insn is the
  * array of filter instructions.
  */
-unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
+static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
 {
 	u64 stack[MAX_BPF_STACK / sizeof(u64)];
 	u64 regs[MAX_BPF_REG], tmp;
@@ -571,15 +571,6 @@ load_byte:
 		return 0;
 }
 
-u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
-			      const struct sock_filter_int *insni)
-    __attribute__ ((alias ("__sk_run_filter")));
-
-u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
-			  const struct sock_filter_int *insni)
-    __attribute__ ((alias ("__sk_run_filter")));
-EXPORT_SYMBOL_GPL(sk_run_filter_int_skb);
-
 /* Helper to find the offset of pkt_type in sk_buff structure. We want
  * to make sure its still a 3bit field starting at a byte boundary;
  * taken from arch/x86/net/bpf_jit_comp.c.
@@ -1397,7 +1388,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
 
 	sk_release_orig_filter(fp);
-	bpf_jit_free(fp);
+	sk_filter_free(fp);
 }
 
 /**
@@ -1497,7 +1488,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
 		goto out_err_free;
 	}
 
-	fp->bpf_func = sk_run_filter_int_skb;
 	fp->len = new_len;
 
 	/* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
@@ -1510,6 +1500,8 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
 		 */
 		goto out_err_free;
 
+	sk_filter_select_runtime(fp);
+
 	kfree(old_prog);
 	return fp;
 
@@ -1528,6 +1520,29 @@ void __weak bpf_int_jit_compile(struct sk_filter *prog)
 {
 }
 
+/**
+ *	sk_filter_select_runtime - select execution runtime for BPF program
+ *	@fp: sk_filter populated with internal BPF program
+ *
+ * try to JIT internal BPF program, if JIT is not available select interpreter
+ * BPF program will be executed via SK_RUN_FILTER() macro
+ */
+void sk_filter_select_runtime(struct sk_filter *fp)
+{
+	fp->bpf_func = (void *) __sk_run_filter;
+
+	/* Probe if internal BPF can be JITed */
+	bpf_int_jit_compile(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
+
+/* free internal BPF program */
+void sk_filter_free(struct sk_filter *fp)
+{
+	bpf_jit_free(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_free);
+
 static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
 					     struct sock *sk)
 {
@@ -1548,12 +1563,9 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
 	/* JIT compiler couldn't process this filter, so do the
 	 * internal BPF translation for the optimized interpreter.
 	 */
-	if (!fp->jited) {
+	if (!fp->jited)
 		fp = __sk_migrate_filter(fp, sk);
 
-		/* Probe if internal BPF can be jit-ed */
-		bpf_int_jit_compile(fp);
-	}
 	return fp;
 }
 
-- 
cgit v1.2.3-71-gd317


From ba730340f96c01160b5f26f81e8fb38f8cb1821c Mon Sep 17 00:00:00 2001
From: Alexander Popov <a13xp0p0v88@gmail.com>
Date: Thu, 15 May 2014 18:15:31 +0400
Subject: dmaengine: fix comment typo

Fix comment typo.

Signed-off-by: Alexander Popov <a13xp0p0v88@gmail.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 8300fb87b84a..cbb168e04dc1 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -292,7 +292,7 @@ struct dma_chan_dev {
 };
 
 /**
- * enum dma_slave_buswidth - defines bus with of the DMA slave
+ * enum dma_slave_buswidth - defines bus width of the DMA slave
  * device, source or target buses
  */
 enum dma_slave_buswidth {
-- 
cgit v1.2.3-71-gd317


From 67af9811539be83dbdc0739215d29af23c870405 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 18 May 2014 10:15:24 +0300
Subject: cfg80211: allow RSSI compensation

Channels in 2.4GHz band overlap, this means that if we
send a probe request on channel 1 and then move to channel
2, we will hear the probe response on channel 2. In this
case, the RSSI will be lower than if we had heard it on
the channel on which it was sent (1 in this case).

The firmware / low level driver can parse the channel in
the DS IE or HT IE and compensate the RSSI so that it will
still have a valid value even if we heard the frame on an
adjacent channel. This can be done up to a certain offset.

Add this offset as a configuration for the low level driver.
A low level driver that can compensate the low RSSI in this
case should assign the maximal offset for which the RSSI
value is still valid.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h |  7 +++++++
 net/wireless/scan.c    | 12 ++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a75fabd18502..920ec8c1ce54 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2961,6 +2961,12 @@ struct wiphy_vendor_command {
  *	and probe responses.  This value should be set if the driver
  *	wishes to limit the number of csa counters. Default (0) means
  *	infinite.
+ * @max_adj_channel_rssi_comp: max offset of between the channel on which the
+ *	frame was sent and the channel on which the frame was heard for which
+ *	the reported rssi is still valid. If a driver is able to compensate the
+ *	low rssi when a frame is heard on different channel, then it should set
+ *	this variable to the maximal offset for which it can compensate.
+ *	This value should be set in MHz.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -3079,6 +3085,7 @@ struct wiphy {
 	u16 max_ap_assoc_sta;
 
 	u8 max_num_csa_counters;
+	u8 max_adj_channel_rssi_comp;
 
 	char priv[0] __aligned(NETDEV_ALIGN);
 };
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 0f5da18cc619..77c56eef0574 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -883,6 +883,7 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
 	struct cfg80211_bss_ies *ies;
 	struct ieee80211_channel *channel;
 	struct cfg80211_internal_bss tmp = {}, *res;
+	bool signal_valid;
 
 	if (WARN_ON(!wiphy))
 		return NULL;
@@ -919,8 +920,9 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
 	rcu_assign_pointer(tmp.pub.beacon_ies, ies);
 	rcu_assign_pointer(tmp.pub.ies, ies);
 
-	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp,
-				  rx_channel == channel);
+	signal_valid = abs(rx_channel->center_freq - channel->center_freq) <=
+		wiphy->max_adj_channel_rssi_comp;
+	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
 	if (!res)
 		return NULL;
 
@@ -944,6 +946,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
 	struct cfg80211_internal_bss tmp = {}, *res;
 	struct cfg80211_bss_ies *ies;
 	struct ieee80211_channel *channel;
+	bool signal_valid;
 	size_t ielen = len - offsetof(struct ieee80211_mgmt,
 				      u.probe_resp.variable);
 
@@ -991,8 +994,9 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
 	tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
 	tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
 
-	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp,
-				  rx_channel == channel);
+	signal_valid = abs(rx_channel->center_freq - channel->center_freq) <=
+		wiphy->max_adj_channel_rssi_comp;
+	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
 	if (!res)
 		return NULL;
 
-- 
cgit v1.2.3-71-gd317


From bf3b5ec66bd03d66e9ea729aaca013ea1047a797 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:55:30 +0100
Subject: mmc: sdio_irq: rework sdio irq handling

Rather than the SDIO support spawning it's own thread for handling card
interrupts, use the generic IRQ infrastructure for this, triggering it
from the host interface's interrupt handling directly.

This avoids a race between the parent thread waiting to receive an
interrupt response from the card, and the slow startup from the sdio
irq thread, which can occur as a result of high system load (eg, while
udev is running.)

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
[Ulf Hansson] Resolved conflict
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/sdio_irq.c | 41 +++++++++++++++++++++++++++++++----------
 include/linux/mmc/host.h    |  3 +++
 2 files changed, 34 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index aaa90460ed23..5cc13c8d35bb 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -90,6 +90,15 @@ static int process_sdio_pending_irqs(struct mmc_host *host)
 	return ret;
 }
 
+void sdio_run_irqs(struct mmc_host *host)
+{
+	mmc_claim_host(host);
+	host->sdio_irq_pending = true;
+	process_sdio_pending_irqs(host);
+	mmc_release_host(host);
+}
+EXPORT_SYMBOL_GPL(sdio_run_irqs);
+
 static int sdio_irq_thread(void *_host)
 {
 	struct mmc_host *host = _host;
@@ -189,14 +198,20 @@ static int sdio_card_irq_get(struct mmc_card *card)
 	WARN_ON(!host->claimed);
 
 	if (!host->sdio_irqs++) {
-		atomic_set(&host->sdio_irq_thread_abort, 0);
-		host->sdio_irq_thread =
-			kthread_run(sdio_irq_thread, host, "ksdioirqd/%s",
-				mmc_hostname(host));
-		if (IS_ERR(host->sdio_irq_thread)) {
-			int err = PTR_ERR(host->sdio_irq_thread);
-			host->sdio_irqs--;
-			return err;
+		if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) {
+			atomic_set(&host->sdio_irq_thread_abort, 0);
+			host->sdio_irq_thread =
+				kthread_run(sdio_irq_thread, host,
+					    "ksdioirqd/%s", mmc_hostname(host));
+			if (IS_ERR(host->sdio_irq_thread)) {
+				int err = PTR_ERR(host->sdio_irq_thread);
+				host->sdio_irqs--;
+				return err;
+			}
+		} else {
+			mmc_host_clk_hold(host);
+			host->ops->enable_sdio_irq(host, 1);
+			mmc_host_clk_release(host);
 		}
 	}
 
@@ -211,8 +226,14 @@ static int sdio_card_irq_put(struct mmc_card *card)
 	BUG_ON(host->sdio_irqs < 1);
 
 	if (!--host->sdio_irqs) {
-		atomic_set(&host->sdio_irq_thread_abort, 1);
-		kthread_stop(host->sdio_irq_thread);
+		if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) {
+			atomic_set(&host->sdio_irq_thread_abort, 1);
+			kthread_stop(host->sdio_irq_thread);
+		} else {
+			mmc_host_clk_hold(host);
+			host->ops->enable_sdio_irq(host, 0);
+			mmc_host_clk_release(host);
+		}
 	}
 
 	return 0;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index cd595275e118..7960424d0bc0 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -282,6 +282,7 @@ struct mmc_host {
 #define MMC_CAP2_HS400_1_2V	(1 << 16)	/* Can support HS400 1.2V */
 #define MMC_CAP2_HS400		(MMC_CAP2_HS400_1_8V | \
 				 MMC_CAP2_HS400_1_2V)
+#define MMC_CAP2_SDIO_IRQ_NOTHREAD (1 << 17)
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
@@ -397,6 +398,8 @@ static inline void mmc_signal_sdio_irq(struct mmc_host *host)
 	wake_up_process(host->sdio_irq_thread);
 }
 
+void sdio_run_irqs(struct mmc_host *host);
+
 #ifdef CONFIG_REGULATOR
 int mmc_regulator_get_ocrmask(struct regulator *supply);
 int mmc_regulator_set_ocr(struct mmc_host *mmc,
-- 
cgit v1.2.3-71-gd317


From 781e989cf593c71d26bdca74f5e77b3651fc060e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:55:46 +0100
Subject: mmc: sdhci: convert to new SDIO IRQ handling

Use a generic threaded interrupt handler for SDIO interrupt handling,
rather than allowing the SDIO core code to buggily spawn its own
thread.  This results in host drivers to be more in control of how
SDIO interrupts are acknowledged in the hardware, rather than having
the internals of the SDIO core placed upon them, possibly resulting
in sub-standard handling.

At least one SDHCI implementation specifies a very specific sequence
to deal with a card interrupt.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci.c  | 62 +++++++++++++++++++++++++++++------------------
 include/linux/mmc/sdhci.h |  2 ++
 2 files changed, 41 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 4b6cca2130bc..4a0622d52ae5 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2428,10 +2428,10 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 
 static irqreturn_t sdhci_irq(int irq, void *dev_id)
 {
-	irqreturn_t result;
+	irqreturn_t result = IRQ_NONE;
 	struct sdhci_host *host = dev_id;
 	u32 intmask, mask, unexpected = 0;
-	int cardint = 0, max_loops = 16;
+	int max_loops = 16;
 
 	spin_lock(&host->lock);
 
@@ -2490,8 +2490,11 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 			pr_err("%s: Card is consuming too much power!\n",
 				mmc_hostname(host->mmc));
 
-		if (intmask & SDHCI_INT_CARD_INT)
-			cardint = 1;
+		if (intmask & SDHCI_INT_CARD_INT) {
+			sdhci_enable_sdio_irq_nolock(host, false);
+			host->thread_isr |= SDHCI_INT_CARD_INT;
+			result = IRQ_WAKE_THREAD;
+		}
 
 		intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE |
 			     SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK |
@@ -2503,17 +2506,10 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 			sdhci_writel(host, intmask, SDHCI_INT_STATUS);
 		}
 
-		result = IRQ_HANDLED;
+		if (result == IRQ_NONE)
+			result = IRQ_HANDLED;
 
 		intmask = sdhci_readl(host, SDHCI_INT_STATUS);
-
-		/*
-		 * If we know we'll call the driver to signal SDIO IRQ,
-		 * disregard further indications of Card Interrupt in
-		 * the status to avoid a needless loop.
-		 */
-		if (cardint)
-			intmask &= ~SDHCI_INT_CARD_INT;
 	} while (intmask && --max_loops);
 out:
 	spin_unlock(&host->lock);
@@ -2523,15 +2519,33 @@ out:
 			   mmc_hostname(host->mmc), unexpected);
 		sdhci_dumpregs(host);
 	}
-	/*
-	 * We have to delay this as it calls back into the driver.
-	 */
-	if (cardint)
-		mmc_signal_sdio_irq(host->mmc);
 
 	return result;
 }
 
+static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
+{
+	struct sdhci_host *host = dev_id;
+	unsigned long flags;
+	u32 isr;
+
+	spin_lock_irqsave(&host->lock, flags);
+	isr = host->thread_isr;
+	host->thread_isr = 0;
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	if (isr & SDHCI_INT_CARD_INT) {
+		sdio_run_irqs(host->mmc);
+
+		spin_lock_irqsave(&host->lock, flags);
+		if (host->flags & SDHCI_SDIO_IRQ_ENABLED)
+			sdhci_enable_sdio_irq_nolock(host, true);
+		spin_unlock_irqrestore(&host->lock, flags);
+	}
+
+	return isr ? IRQ_HANDLED : IRQ_NONE;
+}
+
 /*****************************************************************************\
  *                                                                           *
  * Suspend/resume                                                            *
@@ -2601,8 +2615,9 @@ int sdhci_resume_host(struct sdhci_host *host)
 	}
 
 	if (!device_may_wakeup(mmc_dev(host->mmc))) {
-		ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
-				  mmc_hostname(host->mmc), host);
+		ret = request_threaded_irq(host->irq, sdhci_irq,
+					   sdhci_thread_irq, IRQF_SHARED,
+					   mmc_hostname(host->mmc), host);
 		if (ret)
 			return ret;
 	} else {
@@ -2681,7 +2696,7 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host)
 	sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
 	spin_unlock_irqrestore(&host->lock, flags);
 
-	synchronize_irq(host->irq);
+	synchronize_hardirq(host->irq);
 
 	spin_lock_irqsave(&host->lock, flags);
 	host->runtime_suspended = true;
@@ -2937,6 +2952,7 @@ int sdhci_add_host(struct sdhci_host *host)
 	mmc->max_busy_timeout = (1 << 27) / host->timeout_clk;
 
 	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
+	mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
 
 	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
 		host->flags |= SDHCI_AUTO_CMD12;
@@ -3226,8 +3242,8 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	sdhci_init(host, 0);
 
-	ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
-		mmc_hostname(mmc), host);
+	ret = request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq,
+				   IRQF_SHARED,	mmc_hostname(mmc), host);
 	if (ret) {
 		pr_err("%s: Failed to request IRQ %d: %d\n",
 		       mmc_hostname(mmc), host->irq, ret);
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 7be12b883485..d1aa97b77dd9 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -177,6 +177,8 @@ struct sdhci_host {
 	unsigned int            ocr_avail_mmc;
 	u32 ocr_mask;		/* available voltages */
 
+	u32			thread_isr;
+
 	wait_queue_head_t	buf_ready_int;	/* Waitqueue for Buffer Read Ready interrupt */
 	unsigned int		tuning_done;	/* Condition flag set when CMD19 succeeds */
 
-- 
cgit v1.2.3-71-gd317


From 3560db8e247aa35bc6b287ec7ec51cd41abd512e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:55:51 +0100
Subject: mmc: sdhci: push card_tasklet into threaded irq handler

There's no requirement to have the card tasklet separate now that we
have a threaded interrupt handler, so kill this and move the called
code into the threaded part of the handler.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci.c  | 23 +++++++++--------------
 include/linux/mmc/sdhci.h |  3 +--
 2 files changed, 10 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 4a0622d52ae5..8def3919b32c 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2128,15 +2128,6 @@ static const struct mmc_host_ops sdhci_ops = {
  *                                                                           *
 \*****************************************************************************/
 
-static void sdhci_tasklet_card(unsigned long param)
-{
-	struct sdhci_host *host = (struct sdhci_host*)param;
-
-	sdhci_card_event(host->mmc);
-
-	mmc_detect_change(host->mmc, msecs_to_jiffies(200));
-}
-
 static void sdhci_tasklet_finish(unsigned long param)
 {
 	struct sdhci_host *host;
@@ -2477,7 +2468,10 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 
 			sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT |
 				     SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS);
-			tasklet_schedule(&host->card_tasklet);
+
+			host->thread_isr |= intmask & (SDHCI_INT_CARD_INSERT |
+						       SDHCI_INT_CARD_REMOVE);
+			result = IRQ_WAKE_THREAD;
 		}
 
 		if (intmask & SDHCI_INT_CMD_MASK)
@@ -2534,6 +2528,11 @@ static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
 	host->thread_isr = 0;
 	spin_unlock_irqrestore(&host->lock, flags);
 
+	if (isr & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
+		sdhci_card_event(host->mmc);
+		mmc_detect_change(host->mmc, msecs_to_jiffies(200));
+	}
+
 	if (isr & SDHCI_INT_CARD_INT) {
 		sdio_run_irqs(host->mmc);
 
@@ -3224,8 +3223,6 @@ int sdhci_add_host(struct sdhci_host *host)
 	/*
 	 * Init tasklets.
 	 */
-	tasklet_init(&host->card_tasklet,
-		sdhci_tasklet_card, (unsigned long)host);
 	tasklet_init(&host->finish_tasklet,
 		sdhci_tasklet_finish, (unsigned long)host);
 
@@ -3290,7 +3287,6 @@ reset:
 	free_irq(host->irq, host);
 #endif
 untasklet:
-	tasklet_kill(&host->card_tasklet);
 	tasklet_kill(&host->finish_tasklet);
 
 	return ret;
@@ -3334,7 +3330,6 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 
 	del_timer_sync(&host->timer);
 
-	tasklet_kill(&host->card_tasklet);
 	tasklet_kill(&host->finish_tasklet);
 
 	if (host->vmmc) {
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index d1aa97b77dd9..f1c8e14e8751 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -164,8 +164,7 @@ struct sdhci_host {
 	dma_addr_t adma_addr;	/* Mapped ADMA descr. table */
 	dma_addr_t align_addr;	/* Mapped bounce buffer */
 
-	struct tasklet_struct card_tasklet;	/* Tasklet structures */
-	struct tasklet_struct finish_tasklet;
+	struct tasklet_struct finish_tasklet;	/* Tasklet structures */
 
 	struct timer_list timer;	/* Timer for timeouts */
 
-- 
cgit v1.2.3-71-gd317


From b537f94ce19583de1882f539a5cc49aa99260aca Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:56:01 +0100
Subject: mmc: sdhci: more efficient interrupt enable register handling

Rather than wasting cycles read-modify-writing the interrupt enable
registers, cache the value locally instead.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci.c  | 98 +++++++++++++++++++++++------------------------
 include/linux/mmc/sdhci.h |  3 ++
 2 files changed, 50 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 0ecbcc4c29d2..4a98ee29d136 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -131,27 +131,6 @@ static void sdhci_dumpregs(struct sdhci_host *host)
  *                                                                           *
 \*****************************************************************************/
 
-static void sdhci_clear_set_irqs(struct sdhci_host *host, u32 clear, u32 set)
-{
-	u32 ier;
-
-	ier = sdhci_readl(host, SDHCI_INT_ENABLE);
-	ier &= ~clear;
-	ier |= set;
-	sdhci_writel(host, ier, SDHCI_INT_ENABLE);
-	sdhci_writel(host, ier, SDHCI_SIGNAL_ENABLE);
-}
-
-static void sdhci_unmask_irqs(struct sdhci_host *host, u32 irqs)
-{
-	sdhci_clear_set_irqs(host, 0, irqs);
-}
-
-static void sdhci_mask_irqs(struct sdhci_host *host, u32 irqs)
-{
-	sdhci_clear_set_irqs(host, irqs, 0);
-}
-
 static void sdhci_set_card_detection(struct sdhci_host *host, bool enable)
 {
 	u32 present, irqs;
@@ -165,9 +144,12 @@ static void sdhci_set_card_detection(struct sdhci_host *host, bool enable)
 	irqs = present ? SDHCI_INT_CARD_REMOVE : SDHCI_INT_CARD_INSERT;
 
 	if (enable)
-		sdhci_unmask_irqs(host, irqs);
+		host->ier |= irqs;
 	else
-		sdhci_mask_irqs(host, irqs);
+		host->ier &= ~irqs;
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 }
 
 static void sdhci_enable_card_detection(struct sdhci_host *host)
@@ -183,17 +165,12 @@ static void sdhci_disable_card_detection(struct sdhci_host *host)
 static void sdhci_reset(struct sdhci_host *host, u8 mask)
 {
 	unsigned long timeout;
-	u32 uninitialized_var(ier);
-
 	if (host->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) {
 		if (!(sdhci_readl(host, SDHCI_PRESENT_STATE) &
 			SDHCI_CARD_PRESENT))
 			return;
 	}
 
-	if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET)
-		ier = sdhci_readl(host, SDHCI_INT_ENABLE);
-
 	if (host->ops->platform_reset_enter)
 		host->ops->platform_reset_enter(host, mask);
 
@@ -224,8 +201,10 @@ static void sdhci_reset(struct sdhci_host *host, u8 mask)
 	if (host->ops->platform_reset_exit)
 		host->ops->platform_reset_exit(host, mask);
 
-	if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET)
-		sdhci_clear_set_irqs(host, SDHCI_INT_ALL_MASK, ier);
+	if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET) {
+		sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+		sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+	}
 
 	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
 		if ((host->ops->enable_dma) && (mask & SDHCI_RESET_ALL))
@@ -242,11 +221,14 @@ static void sdhci_init(struct sdhci_host *host, int soft)
 	else
 		sdhci_reset(host, SDHCI_RESET_ALL);
 
-	sdhci_clear_set_irqs(host, SDHCI_INT_ALL_MASK,
-		SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT |
-		SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_INDEX |
-		SDHCI_INT_END_BIT | SDHCI_INT_CRC | SDHCI_INT_TIMEOUT |
-		SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE);
+	host->ier = SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT |
+		    SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT |
+		    SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC |
+		    SDHCI_INT_TIMEOUT | SDHCI_INT_DATA_END |
+		    SDHCI_INT_RESPONSE;
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 
 	if (soft) {
 		/* force clock reconfiguration */
@@ -721,9 +703,12 @@ static void sdhci_set_transfer_irqs(struct sdhci_host *host)
 	u32 dma_irqs = SDHCI_INT_DMA_END | SDHCI_INT_ADMA_ERROR;
 
 	if (host->flags & SDHCI_REQ_USE_DMA)
-		sdhci_clear_set_irqs(host, pio_irqs, dma_irqs);
+		host->ier = (host->ier & ~pio_irqs) | dma_irqs;
 	else
-		sdhci_clear_set_irqs(host, dma_irqs, pio_irqs);
+		host->ier = (host->ier & ~dma_irqs) | pio_irqs;
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 }
 
 static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
@@ -1713,9 +1698,12 @@ static void sdhci_enable_sdio_irq_nolock(struct sdhci_host *host, int enable)
 {
 	if (!(host->flags & SDHCI_DEVICE_DEAD)) {
 		if (enable)
-			sdhci_unmask_irqs(host, SDHCI_INT_CARD_INT);
+			host->ier |= SDHCI_INT_CARD_INT;
 		else
-			sdhci_mask_irqs(host, SDHCI_INT_CARD_INT);
+			host->ier &= ~SDHCI_INT_CARD_INT;
+
+		sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+		sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 		mmiowb();
 	}
 }
@@ -1857,7 +1845,6 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 {
 	struct sdhci_host *host;
 	u16 ctrl;
-	u32 ier;
 	int tuning_loop_counter = MAX_TUNING_LOOP;
 	unsigned long timeout;
 	int err = 0;
@@ -1911,8 +1898,8 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	 * to make sure we don't hit a controller bug, we _only_
 	 * enable Buffer Read Ready interrupt here.
 	 */
-	ier = sdhci_readl(host, SDHCI_INT_ENABLE);
-	sdhci_clear_set_irqs(host, ier, SDHCI_INT_DATA_AVAIL);
+	sdhci_writel(host, SDHCI_INT_DATA_AVAIL, SDHCI_INT_ENABLE);
+	sdhci_writel(host, SDHCI_INT_DATA_AVAIL, SDHCI_SIGNAL_ENABLE);
 
 	/*
 	 * Issue CMD19 repeatedly till Execute Tuning is set to 0 or the number
@@ -2047,7 +2034,8 @@ out:
 	if (err && (host->flags & SDHCI_USING_RETUNING_TIMER))
 		err = 0;
 
-	sdhci_clear_set_irqs(host, SDHCI_INT_DATA_AVAIL, ier);
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 	spin_unlock_irqrestore(&host->lock, flags);
 	sdhci_runtime_pm_put(host);
 
@@ -2460,10 +2448,12 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 			 * More testing are needed here to ensure it works
 			 * for other platforms though.
 			 */
-			sdhci_mask_irqs(host, present ? SDHCI_INT_CARD_INSERT :
-							SDHCI_INT_CARD_REMOVE);
-			sdhci_unmask_irqs(host, present ? SDHCI_INT_CARD_REMOVE :
-							  SDHCI_INT_CARD_INSERT);
+			host->ier &= ~(SDHCI_INT_CARD_INSERT |
+				       SDHCI_INT_CARD_REMOVE);
+			host->ier |= present ? SDHCI_INT_CARD_REMOVE :
+					       SDHCI_INT_CARD_INSERT;
+			sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+			sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 
 			sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT |
 				     SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS);
@@ -2592,7 +2582,9 @@ int sdhci_suspend_host(struct sdhci_host *host)
 	}
 
 	if (!device_may_wakeup(mmc_dev(host->mmc))) {
-		sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+		host->ier = 0;
+		sdhci_writel(host, 0, SDHCI_INT_ENABLE);
+		sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
 		free_irq(host->irq, host);
 	} else {
 		sdhci_enable_irq_wakeups(host);
@@ -2691,7 +2683,9 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host)
 	}
 
 	spin_lock_irqsave(&host->lock, flags);
-	sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK & ~SDHCI_INT_CARD_INT);
+	host->ier &= SDHCI_INT_CARD_INT;
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 	spin_unlock_irqrestore(&host->lock, flags);
 
 	synchronize_hardirq(host->irq);
@@ -3282,7 +3276,8 @@ int sdhci_add_host(struct sdhci_host *host)
 #ifdef SDHCI_USE_LEDS_CLASS
 reset:
 	sdhci_reset(host, SDHCI_RESET_ALL);
-	sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+	sdhci_writel(host, 0, SDHCI_INT_ENABLE);
+	sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
 	free_irq(host->irq, host);
 #endif
 untasklet:
@@ -3324,7 +3319,8 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 	if (!dead)
 		sdhci_reset(host, SDHCI_RESET_ALL);
 
-	sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+	sdhci_writel(host, 0, SDHCI_INT_ENABLE);
+	sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
 	free_irq(host->irq, host);
 
 	del_timer_sync(&host->timer);
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index f1c8e14e8751..9361d8ef509d 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -178,6 +178,9 @@ struct sdhci_host {
 
 	u32			thread_isr;
 
+	/* cached registers */
+	u32			ier;
+
 	wait_queue_head_t	buf_ready_int;	/* Waitqueue for Buffer Read Ready interrupt */
 	unsigned int		tuning_done;	/* Condition flag set when CMD19 succeeds */
 
-- 
cgit v1.2.3-71-gd317


From 0718e59ae259f7c48155b4e852d8b0632d59028e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:57:18 +0100
Subject: mmc: sdhci: move FSL ESDHC reset handling quirk into esdhc code

The Freescale esdhc driver is the only driver which needs the interrupt
registers restored after a reset.  Move this quirk to be part of the
ESDHC driver implementation.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 10 +++++++++-
 drivers/mmc/host/sdhci-esdhc.h     |  3 +--
 drivers/mmc/host/sdhci.c           |  5 -----
 include/linux/mmc/sdhci.h          |  2 --
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index b1d74fa33c5f..812c5772d900 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -876,6 +876,14 @@ static int esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
 	return esdhc_change_pinstate(host, uhs);
 }
 
+static void esdhc_reset(struct sdhci_host *host, u8 mask)
+{
+	sdhci_reset(host, mask);
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+}
+
 static struct sdhci_ops sdhci_esdhc_ops = {
 	.read_l = esdhc_readl_le,
 	.read_w = esdhc_readw_le,
@@ -888,7 +896,7 @@ static struct sdhci_ops sdhci_esdhc_ops = {
 	.get_ro = esdhc_pltfm_get_ro,
 	.set_bus_width = esdhc_pltfm_set_bus_width,
 	.set_uhs_signaling = esdhc_set_uhs_signaling,
-	.reset = sdhci_reset,
+	.reset = esdhc_reset,
 };
 
 static const struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = {
diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h
index a7d9f95a7b03..de69bddc3afc 100644
--- a/drivers/mmc/host/sdhci-esdhc.h
+++ b/drivers/mmc/host/sdhci-esdhc.h
@@ -22,8 +22,7 @@
 				SDHCI_QUIRK_NO_BUSY_IRQ | \
 				SDHCI_QUIRK_NONSTANDARD_CLOCK | \
 				SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | \
-				SDHCI_QUIRK_PIO_NEEDS_DELAY | \
-				SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET)
+				SDHCI_QUIRK_PIO_NEEDS_DELAY)
 
 #define ESDHC_SYSTEM_CONTROL	0x2c
 #define ESDHC_CLOCK_MASK	0x0000fff0
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 5e25147e92f7..074157e8e73d 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -203,11 +203,6 @@ static void sdhci_do_reset(struct sdhci_host *host, u8 mask)
 
 	host->ops->reset(host, mask);
 
-	if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET) {
-		sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
-		sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
-	}
-
 	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
 		if ((host->ops->enable_dma) && (mask & SDHCI_RESET_ALL))
 			host->ops->enable_dma(host);
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 9361d8ef509d..02919ef99419 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -61,8 +61,6 @@ struct sdhci_host {
 #define SDHCI_QUIRK_NONSTANDARD_CLOCK			(1<<17)
 /* Controller does not like fast PIO transfers */
 #define SDHCI_QUIRK_PIO_NEEDS_DELAY			(1<<18)
-/* Controller losing signal/interrupt enable states after reset */
-#define SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET		(1<<19)
 /* Controller has to be forced to use block size of 2048 bytes */
 #define SDHCI_QUIRK_FORCE_BLK_SZ_2048			(1<<20)
 /* Controller cannot do multi-block transfers */
-- 
cgit v1.2.3-71-gd317


From 1771059cf5f9c09e37ef6315df8acf120f2642fc Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:58:55 +0100
Subject: mmc: sdhci: convert sdhci_set_clock() into a library function

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci-acpi.c      |  2 ++
 drivers/mmc/host/sdhci-bcm-kona.c  |  1 +
 drivers/mmc/host/sdhci-bcm2835.c   |  1 +
 drivers/mmc/host/sdhci-cns3xxx.c   |  3 +--
 drivers/mmc/host/sdhci-dove.c      |  1 +
 drivers/mmc/host/sdhci-esdhc.h     |  1 -
 drivers/mmc/host/sdhci-of-arasan.c |  1 +
 drivers/mmc/host/sdhci-of-hlwd.c   |  1 +
 drivers/mmc/host/sdhci-pci.c       |  1 +
 drivers/mmc/host/sdhci-pltfm.c     |  1 +
 drivers/mmc/host/sdhci-pxav2.c     |  1 +
 drivers/mmc/host/sdhci-pxav3.c     |  1 +
 drivers/mmc/host/sdhci-s3c.c       | 19 ++++++++++++++-----
 drivers/mmc/host/sdhci-sirf.c      |  1 +
 drivers/mmc/host/sdhci-spear.c     |  1 +
 drivers/mmc/host/sdhci-tegra.c     |  1 +
 drivers/mmc/host/sdhci.c           | 17 ++++++-----------
 drivers/mmc/host/sdhci.h           |  1 +
 include/linux/mmc/sdhci.h          |  2 --
 19 files changed, 36 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index aca84a682551..323e2a688563 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -102,12 +102,14 @@ static void sdhci_acpi_int_hw_reset(struct sdhci_host *host)
 }
 
 static const struct sdhci_ops sdhci_acpi_ops_dflt = {
+	.set_clock = sdhci_set_clock,
 	.enable_dma = sdhci_acpi_enable_dma,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
 };
 
 static const struct sdhci_ops sdhci_acpi_ops_int = {
+	.set_clock = sdhci_set_clock,
 	.enable_dma = sdhci_acpi_enable_dma,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
diff --git a/drivers/mmc/host/sdhci-bcm-kona.c b/drivers/mmc/host/sdhci-bcm-kona.c
index 7b97bfab910d..e610811c09b0 100644
--- a/drivers/mmc/host/sdhci-bcm-kona.c
+++ b/drivers/mmc/host/sdhci-bcm-kona.c
@@ -206,6 +206,7 @@ static void sdhci_bcm_kona_init_74_clocks(struct sdhci_host *host,
 }
 
 static struct sdhci_ops sdhci_bcm_kona_ops = {
+	.set_clock = sdhci_set_clock,
 	.get_max_clock = sdhci_bcm_kona_get_max_clk,
 	.get_timeout_clock = sdhci_bcm_kona_get_timeout_clock,
 	.platform_send_init_74_clocks = sdhci_bcm_kona_init_74_clocks,
diff --git a/drivers/mmc/host/sdhci-bcm2835.c b/drivers/mmc/host/sdhci-bcm2835.c
index 289b1c80d5fc..74906d6008e1 100644
--- a/drivers/mmc/host/sdhci-bcm2835.c
+++ b/drivers/mmc/host/sdhci-bcm2835.c
@@ -131,6 +131,7 @@ static const struct sdhci_ops bcm2835_sdhci_ops = {
 	.read_l = bcm2835_sdhci_readl,
 	.read_w = bcm2835_sdhci_readw,
 	.read_b = bcm2835_sdhci_readb,
+	.set_clock = sdhci_set_clock,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
 	.get_min_clock = bcm2835_sdhci_get_min_clock,
 	.set_bus_width = sdhci_set_bus_width,
diff --git a/drivers/mmc/host/sdhci-cns3xxx.c b/drivers/mmc/host/sdhci-cns3xxx.c
index 416f4a4c2e35..587d73ef33ff 100644
--- a/drivers/mmc/host/sdhci-cns3xxx.c
+++ b/drivers/mmc/host/sdhci-cns3xxx.c
@@ -89,8 +89,7 @@ static const struct sdhci_pltfm_data sdhci_cns3xxx_pdata = {
 		  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
 		  SDHCI_QUIRK_INVERTED_WRITE_PROTECT |
 		  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
-		  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
-		  SDHCI_QUIRK_NONSTANDARD_CLOCK,
+		  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
 };
 
 static int sdhci_cns3xxx_probe(struct platform_device *pdev)
diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c
index 1408cc11d881..8ef4ab52f8e0 100644
--- a/drivers/mmc/host/sdhci-dove.c
+++ b/drivers/mmc/host/sdhci-dove.c
@@ -86,6 +86,7 @@ static u32 sdhci_dove_readl(struct sdhci_host *host, int reg)
 static const struct sdhci_ops sdhci_dove_ops = {
 	.read_w	= sdhci_dove_readw,
 	.read_l	= sdhci_dove_readl,
+	.set_clock = sdhci_set_clock,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
 };
diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h
index de69bddc3afc..3497cfaf683c 100644
--- a/drivers/mmc/host/sdhci-esdhc.h
+++ b/drivers/mmc/host/sdhci-esdhc.h
@@ -20,7 +20,6 @@
 
 #define ESDHC_DEFAULT_QUIRKS	(SDHCI_QUIRK_FORCE_BLK_SZ_2048 | \
 				SDHCI_QUIRK_NO_BUSY_IRQ | \
-				SDHCI_QUIRK_NONSTANDARD_CLOCK | \
 				SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | \
 				SDHCI_QUIRK_PIO_NEEDS_DELAY)
 
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index faef21740584..f0ee594f25d1 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -52,6 +52,7 @@ static unsigned int sdhci_arasan_get_timeout_clock(struct sdhci_host *host)
 }
 
 static struct sdhci_ops sdhci_arasan_ops = {
+	.set_clock = sdhci_set_clock,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
 	.get_timeout_clock = sdhci_arasan_get_timeout_clock,
 	.set_bus_width = sdhci_set_bus_width,
diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c
index fb01958cb18e..a4a1f0f2c0a0 100644
--- a/drivers/mmc/host/sdhci-of-hlwd.c
+++ b/drivers/mmc/host/sdhci-of-hlwd.c
@@ -58,6 +58,7 @@ static const struct sdhci_ops sdhci_hlwd_ops = {
 	.write_l = sdhci_hlwd_writel,
 	.write_w = sdhci_hlwd_writew,
 	.write_b = sdhci_hlwd_writeb,
+	.set_clock = sdhci_set_clock,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
 };
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 87f9dd91f68c..b3a28f6b170e 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -1078,6 +1078,7 @@ static void sdhci_pci_hw_reset(struct sdhci_host *host)
 }
 
 static const struct sdhci_ops sdhci_pci_ops = {
+	.set_clock	= sdhci_set_clock,
 	.enable_dma	= sdhci_pci_enable_dma,
 	.set_bus_width	= sdhci_pci_set_bus_width,
 	.reset		= sdhci_reset,
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index bfbf467b61c7..1fb89f44bd58 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -45,6 +45,7 @@ unsigned int sdhci_pltfm_clk_get_max_clock(struct sdhci_host *host)
 EXPORT_SYMBOL_GPL(sdhci_pltfm_clk_get_max_clock);
 
 static const struct sdhci_ops sdhci_pltfm_ops = {
+	.set_clock = sdhci_set_clock,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
 };
diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c
index 2eee0c8b88eb..db5257bf032e 100644
--- a/drivers/mmc/host/sdhci-pxav2.c
+++ b/drivers/mmc/host/sdhci-pxav2.c
@@ -112,6 +112,7 @@ static void pxav2_mmc_set_bus_width(struct sdhci_host *host, int width)
 }
 
 static const struct sdhci_ops pxav2_sdhci_ops = {
+	.set_clock     = sdhci_set_clock,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
 	.set_bus_width = pxav2_mmc_set_bus_width,
 	.reset         = pxav2_reset,
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index 86564233ae93..8a40e079a57e 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -225,6 +225,7 @@ static int pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
 }
 
 static const struct sdhci_ops pxav3_sdhci_ops = {
+	.set_clock = sdhci_set_clock,
 	.set_uhs_signaling = pxav3_set_uhs_signaling,
 	.platform_send_init_74_clocks = pxav3_gen_init_74_clocks,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index 9d710b748b9c..9e6f1c52982c 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -55,6 +55,8 @@ struct sdhci_s3c {
 	struct clk		*clk_io;
 	struct clk		*clk_bus[MAX_BUS_CLK];
 	unsigned long		clk_rates[MAX_BUS_CLK];
+
+	bool			no_divider;
 };
 
 /**
@@ -67,6 +69,7 @@ struct sdhci_s3c {
  */
 struct sdhci_s3c_drv_data {
 	unsigned int	sdhci_quirks;
+	bool		no_divider;
 };
 
 static inline struct sdhci_s3c *to_s3c(struct sdhci_host *host)
@@ -116,7 +119,7 @@ static unsigned int sdhci_s3c_consider_clock(struct sdhci_s3c *ourhost,
 	 * If controller uses a non-standard clock division, find the best clock
 	 * speed possible with selected clock source and skip the division.
 	 */
-	if (ourhost->host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK) {
+	if (ourhost->no_divider) {
 		rate = clk_round_rate(clksrc, wanted);
 		return wanted - rate;
 	}
@@ -161,8 +164,10 @@ static void sdhci_s3c_set_clock(struct sdhci_host *host, unsigned int clock)
 	host->mmc->actual_clock = 0;
 
 	/* don't bother if the clock is going off. */
-	if (clock == 0)
+	if (clock == 0) {
+		sdhci_set_clock(host, clock);
 		return;
+	}
 
 	for (src = 0; src < MAX_BUS_CLK; src++) {
 		delta = sdhci_s3c_consider_clock(ourhost, src, clock);
@@ -214,6 +219,8 @@ static void sdhci_s3c_set_clock(struct sdhci_host *host, unsigned int clock)
 	if (clock < 25 * 1000000)
 		ctrl |= (S3C_SDHCI_CTRL3_FCSEL3 | S3C_SDHCI_CTRL3_FCSEL2);
 	writel(ctrl, host->ioaddr + S3C_SDHCI_CONTROL3);
+
+	sdhci_set_clock(host, clock);
 }
 
 /**
@@ -603,8 +610,10 @@ static int sdhci_s3c_probe(struct platform_device *pdev)
 	/* Setup quirks for the controller */
 	host->quirks |= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC;
 	host->quirks |= SDHCI_QUIRK_NO_HISPD_BIT;
-	if (drv_data)
+	if (drv_data) {
 		host->quirks |= drv_data->sdhci_quirks;
+		sc->no_divider = drv_data->no_divider;
+	}
 
 #ifndef CONFIG_MMC_SDHCI_S3C_DMA
 
@@ -653,7 +662,7 @@ static int sdhci_s3c_probe(struct platform_device *pdev)
 	 * If controller does not have internal clock divider,
 	 * we can use overriding functions instead of default.
 	 */
-	if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK) {
+	if (sc->no_divider) {
 		sdhci_s3c_ops.set_clock = sdhci_cmu_set_clock;
 		sdhci_s3c_ops.get_min_clock = sdhci_cmu_get_min_clock;
 		sdhci_s3c_ops.get_max_clock = sdhci_cmu_get_max_clock;
@@ -794,7 +803,7 @@ static const struct dev_pm_ops sdhci_s3c_pmops = {
 
 #if defined(CONFIG_CPU_EXYNOS4210) || defined(CONFIG_SOC_EXYNOS4212)
 static struct sdhci_s3c_drv_data exynos4_sdhci_drv_data = {
-	.sdhci_quirks = SDHCI_QUIRK_NONSTANDARD_CLOCK,
+	.no_divider = true,
 };
 #define EXYNOS4_SDHCI_DRV_DATA ((kernel_ulong_t)&exynos4_sdhci_drv_data)
 #else
diff --git a/drivers/mmc/host/sdhci-sirf.c b/drivers/mmc/host/sdhci-sirf.c
index 5d79e10e1ba2..3b775348b470 100644
--- a/drivers/mmc/host/sdhci-sirf.c
+++ b/drivers/mmc/host/sdhci-sirf.c
@@ -28,6 +28,7 @@ static unsigned int sdhci_sirf_get_max_clk(struct sdhci_host *host)
 }
 
 static struct sdhci_ops sdhci_sirf_ops = {
+	.set_clock = sdhci_set_clock,
 	.get_max_clock	= sdhci_sirf_get_max_clk,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c
index c2a2bedc8813..8bf64ab36720 100644
--- a/drivers/mmc/host/sdhci-spear.c
+++ b/drivers/mmc/host/sdhci-spear.c
@@ -38,6 +38,7 @@ struct spear_sdhci {
 
 /* sdhci ops */
 static const struct sdhci_ops sdhci_pltfm_ops = {
+	.set_clock = sdhci_set_clock,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
 };
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 7754c0319fda..a0a8b5cc3b0c 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -153,6 +153,7 @@ static const struct sdhci_ops tegra_sdhci_ops = {
 	.read_l     = tegra_sdhci_readl,
 	.read_w     = tegra_sdhci_readw,
 	.write_l    = tegra_sdhci_writel,
+	.set_clock  = sdhci_set_clock,
 	.set_bus_width = tegra_sdhci_set_bus_width,
 	.reset      = tegra_sdhci_reset,
 };
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index d9b91fc17bb0..69e58d071b33 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1112,19 +1112,13 @@ static u16 sdhci_get_preset_value(struct sdhci_host *host)
 	return preset;
 }
 
-static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
+void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 {
 	int div = 0; /* Initialized for compiler warning */
 	int real_div = div, clk_mul = 1;
 	u16 clk = 0;
 	unsigned long timeout;
 
-	if (host->ops->set_clock) {
-		host->ops->set_clock(host, clock);
-		if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK)
-			return;
-	}
-
 	host->mmc->actual_clock = 0;
 
 	sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
@@ -1221,6 +1215,7 @@ clock_set:
 	clk |= SDHCI_CLOCK_CARD_EN;
 	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
 }
+EXPORT_SYMBOL_GPL(sdhci_set_clock);
 
 static int sdhci_set_power(struct sdhci_host *host, unsigned short power)
 {
@@ -1439,7 +1434,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 		sdhci_enable_preset_value(host, false);
 
 	if (!ios->clock || ios->clock != host->clock) {
-		sdhci_set_clock(host, ios->clock);
+		host->ops->set_clock(host, ios->clock);
 		host->clock = ios->clock;
 	}
 
@@ -1510,7 +1505,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 			sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 
 			/* Re-enable SD Clock */
-			sdhci_set_clock(host, host->clock);
+			host->ops->set_clock(host, host->clock);
 		}
 
 
@@ -1555,7 +1550,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 		}
 
 		/* Re-enable SD Clock */
-		sdhci_set_clock(host, host->clock);
+		host->ops->set_clock(host, host->clock);
 	} else
 		sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 
@@ -2129,7 +2124,7 @@ static void sdhci_tasklet_finish(unsigned long param)
 		/* Some controllers need this kick or reset won't work here */
 		if (host->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET)
 			/* This is to force an update */
-			sdhci_set_clock(host, host->clock);
+			host->ops->set_clock(host, host->clock);
 
 		/* Spec says we should do both at the same time, but Ricoh
 		   controllers do not like that. */
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 7d84cb3b0e00..ac20195f667b 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -400,6 +400,7 @@ static inline bool sdhci_sdio_irq_enabled(struct sdhci_host *host)
 	return !!(host->flags & SDHCI_SDIO_IRQ_ENABLED);
 }
 
+void sdhci_set_clock(struct sdhci_host *host, unsigned int clock);
 void sdhci_set_bus_width(struct sdhci_host *host, int width);
 void sdhci_reset(struct sdhci_host *host, u8 mask);
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 02919ef99419..72a90baf111f 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -57,8 +57,6 @@ struct sdhci_host {
 #define SDHCI_QUIRK_BROKEN_CARD_DETECTION		(1<<15)
 /* Controller reports inverted write-protect state */
 #define SDHCI_QUIRK_INVERTED_WRITE_PROTECT		(1<<16)
-/* Controller has nonstandard clock management */
-#define SDHCI_QUIRK_NONSTANDARD_CLOCK			(1<<17)
 /* Controller does not like fast PIO transfers */
 #define SDHCI_QUIRK_PIO_NEEDS_DELAY			(1<<18)
 /* Controller has to be forced to use block size of 2048 bytes */
-- 
cgit v1.2.3-71-gd317


From d975f121011a58223c7936ab483c3374a83236c3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:59:31 +0100
Subject: mmc: sdhci: cache timing information locally

Rather than reading back the timing information from the registers,
cache it locally.  This allows implementations to translate the UHS
timing by overriding the set_uhs_signaling() method as required
without also having to emulate the SDHCI_HOST_CONTROL2 register.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
[Ulf Hansson] Resolved conflict
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci.c  | 23 ++++++++++++-----------
 include/linux/mmc/sdhci.h |  2 ++
 2 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 0073aae0adcb..956799c75df2 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1083,24 +1083,23 @@ static void sdhci_finish_command(struct sdhci_host *host)
 
 static u16 sdhci_get_preset_value(struct sdhci_host *host)
 {
-	u16 ctrl, preset = 0;
+	u16 preset = 0;
 
-	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-
-	switch (ctrl & SDHCI_CTRL_UHS_MASK) {
-	case SDHCI_CTRL_UHS_SDR12:
+	switch (host->timing) {
+	case MMC_TIMING_UHS_SDR12:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR12);
 		break;
-	case SDHCI_CTRL_UHS_SDR25:
+	case MMC_TIMING_UHS_SDR25:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR25);
 		break;
-	case SDHCI_CTRL_UHS_SDR50:
+	case MMC_TIMING_UHS_SDR50:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR50);
 		break;
-	case SDHCI_CTRL_UHS_SDR104:
+	case MMC_TIMING_UHS_SDR104:
+	case MMC_TIMING_MMC_HS200:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR104);
 		break;
-	case SDHCI_CTRL_UHS_DDR50:
+	case MMC_TIMING_UHS_DDR50:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_DDR50);
 		break;
 	default:
@@ -1538,6 +1537,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 		sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
 
 		host->ops->set_uhs_signaling(host, ios->timing);
+		host->timing = ios->timing;
 
 		if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) &&
 				((ios->timing == MMC_TIMING_UHS_SDR12) ||
@@ -1842,12 +1842,13 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	 * If the Host Controller supports the HS200 mode then the
 	 * tuning function has to be executed.
 	 */
-	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) &&
+	if (host->timing == MMC_TIMING_UHS_SDR50 &&
 	    (host->flags & SDHCI_SDR50_NEEDS_TUNING ||
 	     host->flags & SDHCI_SDR104_NEEDS_TUNING))
 		requires_tuning_nonuhs = true;
 
-	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR104) ||
+	if (host->timing == MMC_TIMING_MMC_HS200 ||
+	    host->timing == MMC_TIMING_UHS_SDR104 ||
 	    requires_tuning_nonuhs)
 		ctrl |= SDHCI_CTRL_EXEC_TUNING;
 	else {
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 72a90baf111f..7f3efbab8732 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -172,6 +172,8 @@ struct sdhci_host {
 	unsigned int            ocr_avail_mmc;
 	u32 ocr_mask;		/* available voltages */
 
+	unsigned		timing;		/* Current timing */
+
 	u32			thread_isr;
 
 	/* cached registers */
-- 
cgit v1.2.3-71-gd317


From da91a8f9c0f56d75b35bfe2e2456187ab55b3639 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 13:00:12 +0100
Subject: mmc: sdhci: track whether preset mode is currently enabled in
 hardware

Track whether preset mode is currently enabled in hardware, and use that
when making decisions elsewhere in the code rather than reading the
register and checking the bit.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci.c  | 44 ++++++++++++++++++++++++++------------------
 include/linux/mmc/sdhci.h |  1 +
 2 files changed, 27 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index effd9e5d1d81..447eef8217c7 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -205,9 +205,14 @@ static void sdhci_do_reset(struct sdhci_host *host, u8 mask)
 
 	host->ops->reset(host, mask);
 
-	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
-		if ((host->ops->enable_dma) && (mask & SDHCI_RESET_ALL))
-			host->ops->enable_dma(host);
+	if (mask & SDHCI_RESET_ALL) {
+		if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
+			if (host->ops->enable_dma)
+				host->ops->enable_dma(host);
+		}
+
+		/* Resetting the controller clears many */
+		host->preset_enabled = false;
 	}
 }
 
@@ -1126,8 +1131,7 @@ void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 		return;
 
 	if (host->version >= SDHCI_SPEC_300) {
-		if (sdhci_readw(host, SDHCI_HOST_CONTROL2) &
-			SDHCI_CTRL_PRESET_VAL_ENABLE) {
+		if (host->preset_enabled) {
 			u16 pre_val;
 
 			clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
@@ -1493,13 +1497,13 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 		    (ios->timing == MMC_TIMING_UHS_SDR25))
 			ctrl |= SDHCI_CTRL_HISPD;
 
-		ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-		if (!(ctrl_2 & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
+		if (!host->preset_enabled) {
 			sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 			/*
 			 * We only need to set Driver Strength if the
 			 * preset value enable is not set.
 			 */
+			ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 			ctrl_2 &= ~SDHCI_CTRL_DRV_TYPE_MASK;
 			if (ios->drv_type == MMC_SET_DRIVER_TYPE_A)
 				ctrl_2 |= SDHCI_CTRL_DRV_TYPE_A;
@@ -2018,26 +2022,30 @@ out:
 
 static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable)
 {
-	u16 ctrl;
-
 	/* Host Controller v3.00 defines preset value registers */
 	if (host->version < SDHCI_SPEC_300)
 		return;
 
-	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-
 	/*
 	 * We only enable or disable Preset Value if they are not already
 	 * enabled or disabled respectively. Otherwise, we bail out.
 	 */
-	if (enable && !(ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
-		ctrl |= SDHCI_CTRL_PRESET_VAL_ENABLE;
-		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
-		host->flags |= SDHCI_PV_ENABLED;
-	} else if (!enable && (ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
-		ctrl &= ~SDHCI_CTRL_PRESET_VAL_ENABLE;
+	if (host->preset_enabled != enable) {
+		u16 ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+
+		if (enable)
+			ctrl |= SDHCI_CTRL_PRESET_VAL_ENABLE;
+		else
+			ctrl &= ~SDHCI_CTRL_PRESET_VAL_ENABLE;
+
 		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
-		host->flags &= ~SDHCI_PV_ENABLED;
+
+		if (enable)
+			host->flags |= SDHCI_PV_ENABLED;
+		else
+			host->flags &= ~SDHCI_PV_ENABLED;
+
+		host->preset_enabled = enable;
 	}
 }
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 7f3efbab8732..08abe9941884 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -143,6 +143,7 @@ struct sdhci_host {
 
 	bool runtime_suspended;	/* Host is runtime suspended */
 	bool bus_on;		/* Bus power prevents runtime suspend */
+	bool preset_enabled;	/* Preset is enabled */
 
 	struct mmc_request *mrq;	/* Current request */
 	struct mmc_command *cmd;	/* Current command */
-- 
cgit v1.2.3-71-gd317


From ee526d515ad12e9fee2d2dbfc7f626c0a5c7f417 Mon Sep 17 00:00:00 2001
From: Balaji T K <balajitk@ti.com>
Date: Fri, 9 May 2014 22:16:53 +0530
Subject: mmc: omap_hsmmc: split omap-dma header file

moving dmaengine consumer specific function to omap-dmaengine.h
to Resolve build failure seen with sh-allmodconfig:
    include/linux/omap-dma.h:171:8: error: expected identifier before numeric constant
    make[4]: *** [drivers/mmc/host/omap_hsmmc.o] Error 1

Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
Cc: Tony Lindgren <tony@atomide.com>
Signed-off-by: Balaji T K <balajitk@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/omap_hsmmc.c  |  2 +-
 include/linux/omap-dma.h       | 19 +------------------
 include/linux/omap-dmaengine.h | 21 +++++++++++++++++++++
 3 files changed, 23 insertions(+), 19 deletions(-)
 create mode 100644 include/linux/omap-dmaengine.h

(limited to 'include')

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index cba71d69a79c..6b7b75585926 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -31,7 +31,7 @@
 #include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/of_device.h>
-#include <linux/omap-dma.h>
+#include <linux/omap-dmaengine.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/core.h>
 #include <linux/mmc/mmc.h>
diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index 41a13e70f41f..999f52d3d1e7 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -1,23 +1,6 @@
-/*
- * OMAP DMA Engine support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
 #ifndef __LINUX_OMAP_DMA_H
 #define __LINUX_OMAP_DMA_H
-
-struct dma_chan;
-
-#if defined(CONFIG_DMA_OMAP) || defined(CONFIG_DMA_OMAP_MODULE)
-bool omap_dma_filter_fn(struct dma_chan *, void *);
-#else
-static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d)
-{
-	return false;
-}
-#endif
+#include <linux/omap-dmaengine.h>
 
 /*
  *  Legacy OMAP DMA handling defines and functions
diff --git a/include/linux/omap-dmaengine.h b/include/linux/omap-dmaengine.h
new file mode 100644
index 000000000000..2b0b6aa01922
--- /dev/null
+++ b/include/linux/omap-dmaengine.h
@@ -0,0 +1,21 @@
+/*
+ * OMAP DMA Engine support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __LINUX_OMAP_DMAENGINE_H
+#define __LINUX_OMAP_DMAENGINE_H
+
+struct dma_chan;
+
+#if defined(CONFIG_DMA_OMAP) || defined(CONFIG_DMA_OMAP_MODULE)
+bool omap_dma_filter_fn(struct dma_chan *, void *);
+#else
+static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d)
+{
+	return false;
+}
+#endif
+#endif /* __LINUX_OMAP_DMAENGINE_H */
-- 
cgit v1.2.3-71-gd317


From 73e4354444eef5251e5cdfd388ab02ef9f2e727e Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 22 May 2014 16:42:41 +0800
Subject: workqueue: declare system_highpri_wq

system_highpri_wq is exported to modules via EXPORT_SYMBOL_GPL(),
but it was forgotten to be declared in workqueue.h. So we add the declaration
and a short description for it.

tj: Minor comment tweak.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d93d28b2ec73..b263b29bd98b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -340,6 +340,9 @@ enum {
  * short queue flush time.  Don't queue works which can run for too
  * long.
  *
+ * system_highpri_wq is similar to system_wq but for work items which
+ * require WQ_HIGHPRI.
+ *
  * system_long_wq is similar to system_wq but may host long running
  * works.  Queue flushing might take relatively long.
  *
@@ -358,6 +361,7 @@ enum {
  * 'wq_power_efficient' is disabled.  See WQ_POWER_EFFICIENT for more info.
  */
 extern struct workqueue_struct *system_wq;
+extern struct workqueue_struct *system_highpri_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
-- 
cgit v1.2.3-71-gd317


From 79bc251f0e0aea67bc230c530f7fa57f66f9cdf3 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 22 May 2014 16:43:44 +0800
Subject: workqueue: remove unused WORK_CPU_END

WORK_CPU_END is totally unused since 4e8b22bd1a37 ("workqueue: fix
pool ID allocation leakage and remove BUILD_BUG_ON() in
init_workqueues"). It should be removed.

After it is removed, the comment "special cpu IDs" is not precise due to
there is only one special CPU ID (WORK_CPU_UNBOUND) left, so we also
change this comment to the description for WORK_CPU_UNBOUND.

tj: Minor description and comment tweaks.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b263b29bd98b..b8aee9453f22 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -56,9 +56,8 @@ enum {
 	WORK_NR_COLORS		= (1 << WORK_STRUCT_COLOR_BITS) - 1,
 	WORK_NO_COLOR		= WORK_NR_COLORS,
 
-	/* special cpu IDs */
+	/* not bound to any CPU, prefer the local CPU */
 	WORK_CPU_UNBOUND	= NR_CPUS,
-	WORK_CPU_END		= NR_CPUS + 1,
 
 	/*
 	 * Reserve 7 bits off of pwq pointer w/ debugobjects turned off.
-- 
cgit v1.2.3-71-gd317


From cafebac153ae54fd0aba5d4ad28af995532c5375 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 22 May 2014 16:43:56 +0800
Subject: workqueue: remove unused work_clear_pending()

In 8930caba3dbd ("workqueue: disable irq while manipulating PENDING"),
setting last CPU and clearing PENDING got merged into a single
operation (set_work_cpu_and_clear_pending()), which resulted that the
internal routine work_clear_pending() is not used any more.

tj: Minor description tweak.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b8aee9453f22..a0cc2e95ed1b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -273,13 +273,6 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #define delayed_work_pending(w) \
 	work_pending(&(w)->work)
 
-/**
- * work_clear_pending - for internal use only, mark a work item as not pending
- * @work: The work item in question
- */
-#define work_clear_pending(work) \
-	clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
-
 /*
  * Workqueue flags and constants.  For details, please refer to
  * Documentation/workqueue.txt.
-- 
cgit v1.2.3-71-gd317


From ca8a22634381537c92b5a10308652e1c38fd9edf Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Thu, 22 May 2014 10:41:08 -0400
Subject: tcp: make cwnd-limited checks measurement-based, and gentler

Experience with the recent e114a710aa50 ("tcp: fix cwnd limited
checking to improve congestion control") has shown that there are
common cases where that commit can cause cwnd to be much larger than
necessary. This leads to TSO autosizing cooking skbs that are too
large, among other things.

The main problems seemed to be:

(1) That commit attempted to predict the future behavior of the
connection by looking at the write queue (if TSO or TSQ limit
sending). That prediction sometimes overestimated future outstanding
packets.

(2) That commit always allowed cwnd to grow to twice the number of
outstanding packets (even in congestion avoidance, where this is not
needed).

This commit improves both of these, by:

(1) Switching to a measurement-based approach where we explicitly
track the largest number of packets in flight during the past window
("max_packets_out"), and remember whether we were cwnd-limited at the
moment we finished sending that flight.

(2) Only allowing cwnd to grow to twice the number of outstanding
packets ("max_packets_out") in slow start. In congestion avoidance
mode we now only allow cwnd to grow if it was fully utilized.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  6 ++++--
 include/net/tcp.h     | 11 ++++++++---
 net/ipv4/tcp_output.c | 37 +++++++++++++++++++++++--------------
 3 files changed, 35 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bc35e4709e8e..a0513210798f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -197,7 +197,8 @@ struct tcp_sock {
 	u8	do_early_retrans:1,/* Enable RFC5827 early-retransmit  */
 		syn_data:1,	/* SYN includes data */
 		syn_fastopen:1,	/* SYN includes Fast Open option */
-		syn_data_acked:1;/* data in SYN is acked by SYN-ACK */
+		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
+		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
 /* RTT measurement */
@@ -209,6 +210,8 @@ struct tcp_sock {
 
 	u32	packets_out;	/* Packets which are "in flight"	*/
 	u32	retrans_out;	/* Retransmitted packets out		*/
+	u32	max_packets_out;  /* max packets_out in last window */
+	u32	max_packets_seq;  /* right edge of max_packets_out flight */
 
 	u16	urg_data;	/* Saved octet of OOB data and control flags */
 	u8	ecn_flags;	/* ECN status bits.			*/
@@ -230,7 +233,6 @@ struct tcp_sock {
 	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
 	u32	snd_cwnd_used;
 	u32	snd_cwnd_stamp;
-	u32	lsnd_pending;	/* packets inflight or unsent since last xmit */
 	u32	prior_cwnd;	/* Congestion window at start of Recovery. */
 	u32	prr_delivered;	/* Number of newly delivered packets to
 				 * receiver in Recovery. */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f5d6ca4a9d28..e80abe4486cb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -971,8 +971,9 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
 
 /* We follow the spirit of RFC2861 to validate cwnd but implement a more
  * flexible approach. The RFC suggests cwnd should not be raised unless
- * it was fully used previously. But we allow cwnd to grow as long as the
- * application has used half the cwnd.
+ * it was fully used previously. And that's exactly what we do in
+ * congestion avoidance mode. But in slow start we allow cwnd to grow
+ * as long as the application has used half the cwnd.
  * Example :
  *    cwnd is 10 (IW10), but application sends 9 frames.
  *    We allow cwnd to reach 18 when all frames are ACKed.
@@ -985,7 +986,11 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
-	return tp->snd_cwnd < 2 * tp->lsnd_pending;
+	/* If in slow start, ensure cwnd grows to twice what was ACKed. */
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		return tp->snd_cwnd < 2 * tp->max_packets_out;
+
+	return tp->is_cwnd_limited;
 }
 
 static inline void tcp_check_probe_timer(struct sock *sk)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3d61c52bdf79..d463c35db33d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1402,11 +1402,19 @@ static void tcp_cwnd_application_limited(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs)
+static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	tp->lsnd_pending = tp->packets_out + unsent_segs;
+	/* Track the maximum number of outstanding packets in each
+	 * window, and remember whether we were cwnd-limited then.
+	 */
+	if (!before(tp->snd_una, tp->max_packets_seq) ||
+	    tp->packets_out > tp->max_packets_out) {
+		tp->max_packets_out = tp->packets_out;
+		tp->max_packets_seq = tp->snd_nxt;
+		tp->is_cwnd_limited = is_cwnd_limited;
+	}
 
 	if (tcp_is_cwnd_limited(sk)) {
 		/* Network is feed fully. */
@@ -1660,7 +1668,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
  *
  * This algorithm is from John Heffner.
  */
-static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
+static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
+				 bool *is_cwnd_limited)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1724,6 +1733,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 	if (!tp->tso_deferred)
 		tp->tso_deferred = 1 | (jiffies << 1);
 
+	if (cong_win < send_win && cong_win < skb->len)
+		*is_cwnd_limited = true;
+
 	return true;
 
 send_now:
@@ -1881,9 +1893,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	unsigned int tso_segs, sent_pkts, unsent_segs = 0;
+	unsigned int tso_segs, sent_pkts;
 	int cwnd_quota;
 	int result;
+	bool is_cwnd_limited = false;
 
 	sent_pkts = 0;
 
@@ -1908,6 +1921,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 
 		cwnd_quota = tcp_cwnd_test(tp, skb);
 		if (!cwnd_quota) {
+			is_cwnd_limited = true;
 			if (push_one == 2)
 				/* Force out a loss probe pkt. */
 				cwnd_quota = 1;
@@ -1924,8 +1938,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 						      nonagle : TCP_NAGLE_PUSH))))
 				break;
 		} else {
-			if (!push_one && tcp_tso_should_defer(sk, skb))
-				goto compute_unsent_segs;
+			if (!push_one &&
+			    tcp_tso_should_defer(sk, skb, &is_cwnd_limited))
+				break;
 		}
 
 		/* TCP Small Queues :
@@ -1950,14 +1965,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			 * there is no smp_mb__after_set_bit() yet
 			 */
 			smp_mb__after_clear_bit();
-			if (atomic_read(&sk->sk_wmem_alloc) > limit) {
-				u32 unsent_bytes;
-
-compute_unsent_segs:
-				unsent_bytes = tp->write_seq - tp->snd_nxt;
-				unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now);
+			if (atomic_read(&sk->sk_wmem_alloc) > limit)
 				break;
-			}
 		}
 
 		limit = mss_now;
@@ -1997,7 +2006,7 @@ repair:
 		/* Send one loss probe per tail loss episode. */
 		if (push_one != 2)
 			tcp_schedule_loss_probe(sk);
-		tcp_cwnd_validate(sk, unsent_segs);
+		tcp_cwnd_validate(sk, is_cwnd_limited);
 		return false;
 	}
 	return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
-- 
cgit v1.2.3-71-gd317


From e876f208af18b074f800656e4d1b99da75b2135f Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Mon, 19 May 2014 13:59:52 -0300
Subject: net: Add a software TSO helper API

Although the implementation probably needs a lot of work, this initial API
allows to implement software TSO in mvneta and mv643xx_eth drivers in a not
so intrusive way.

Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tso.h | 20 ++++++++++++++++
 net/core/Makefile |  2 +-
 net/core/tso.c    | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 include/net/tso.h
 create mode 100644 net/core/tso.c

(limited to 'include')

diff --git a/include/net/tso.h b/include/net/tso.h
new file mode 100644
index 000000000000..47e5444f7d15
--- /dev/null
+++ b/include/net/tso.h
@@ -0,0 +1,20 @@
+#ifndef _TSO_H
+#define _TSO_H
+
+#include <net/ip.h>
+
+struct tso_t {
+	int next_frag_idx;
+	void *data;
+	size_t size;
+	u16 ip_id;
+	u32 tcp_seq;
+};
+
+int tso_count_descs(struct sk_buff *skb);
+void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
+		   int size, bool is_last);
+void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size);
+void tso_start(struct sk_buff *skb, struct tso_t *tso);
+
+#endif	/* _TSO_H */
diff --git a/net/core/Makefile b/net/core/Makefile
index 826b925aa453..71093d94ad2b 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
 obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
-			sock_diag.o dev_ioctl.o
+			sock_diag.o dev_ioctl.o tso.o
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
diff --git a/net/core/tso.c b/net/core/tso.c
new file mode 100644
index 000000000000..097821dd3a8c
--- /dev/null
+++ b/net/core/tso.c
@@ -0,0 +1,72 @@
+#include <net/ip.h>
+#include <net/tso.h>
+
+/* Calculate expected number of TX descriptors */
+int tso_count_descs(struct sk_buff *skb)
+{
+	/* The Marvell Way */
+	return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags;
+}
+
+void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
+		   int size, bool is_last)
+{
+	struct iphdr *iph;
+	struct tcphdr *tcph;
+	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+	int mac_hdr_len = skb_network_offset(skb);
+
+	memcpy(hdr, skb->data, hdr_len);
+	iph = (struct iphdr *)(hdr + mac_hdr_len);
+	iph->id = htons(tso->ip_id);
+	iph->tot_len = htons(size + hdr_len - mac_hdr_len);
+	tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
+	tcph->seq = htonl(tso->tcp_seq);
+	tso->ip_id++;
+
+	if (!is_last) {
+		/* Clear all special flags for not last packet */
+		tcph->psh = 0;
+		tcph->fin = 0;
+		tcph->rst = 0;
+	}
+}
+
+void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
+{
+	tso->tcp_seq += size;
+	tso->size -= size;
+	tso->data += size;
+
+	if ((tso->size == 0) &&
+	    (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
+
+		/* Move to next segment */
+		tso->size = frag->size;
+		tso->data = page_address(frag->page.p) + frag->page_offset;
+		tso->next_frag_idx++;
+	}
+}
+
+void tso_start(struct sk_buff *skb, struct tso_t *tso)
+{
+	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+	tso->ip_id = ntohs(ip_hdr(skb)->id);
+	tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
+	tso->next_frag_idx = 0;
+
+	/* Build first data */
+	tso->size = skb_headlen(skb) - hdr_len;
+	tso->data = skb->data + hdr_len;
+	if ((tso->size == 0) &&
+	    (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
+
+		/* Move to next segment */
+		tso->size = frag->size;
+		tso->data = page_address(frag->page.p) + frag->page_offset;
+		tso->next_frag_idx++;
+	}
+}
-- 
cgit v1.2.3-71-gd317


From da08143b85203b581f4a6461b149186b0e9592df Mon Sep 17 00:00:00 2001
From: Michal Kubeček <mkubecek@suse.cz>
Date: Tue, 20 May 2014 08:29:25 +0200
Subject: vlan: more careful checksum features handling

When combining real_dev's features and vlan_features, simple
bitwise AND is used. This doesn't work well for checksum
offloading features as if one set has NETIF_F_HW_CSUM and the
other NETIF_F_IP_CSUM and/or NETIF_F_IPV6_CSUM, we end up with
no checksum offloading. However, from the logical point of view
(how can_checksum_protocol() works), NETIF_F_HW_CSUM contains
the functionality of NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM so
that the result should be IP/IPV6.

Add helper function netdev_intersect_features() implementing
this logic and use it in vlan_dev_fix_features().

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 14 ++++++++++++++
 net/8021q/vlan_dev.c      |  4 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2dea98cbbdba..f4ad247fd324 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3153,6 +3153,20 @@ const char *netdev_drivername(const struct net_device *dev);
 
 void linkwatch_run_queue(void);
 
+static inline netdev_features_t netdev_intersect_features(netdev_features_t f1,
+							  netdev_features_t f2)
+{
+	if (f1 & NETIF_F_GEN_CSUM)
+		f1 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+	if (f2 & NETIF_F_GEN_CSUM)
+		f2 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+	f1 &= f2;
+	if (f1 & NETIF_F_GEN_CSUM)
+		f1 &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+
+	return f1;
+}
+
 static inline netdev_features_t netdev_get_wanted_features(
 	struct net_device *dev)
 {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 8f025afa29fd..4181fb71ba77 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -678,9 +678,9 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
 	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	netdev_features_t old_features = features;
 
-	features &= real_dev->vlan_features;
+	features = netdev_intersect_features(features, real_dev->vlan_features);
 	features |= NETIF_F_RXCSUM;
-	features &= real_dev->features;
+	features = netdev_intersect_features(features, real_dev->features);
 
 	features |= old_features & NETIF_F_SOFT_FEATURES;
 	features |= NETIF_F_LLTX;
-- 
cgit v1.2.3-71-gd317


From ea3429c77d4e34cb2983b90e49a5506fedf70b98 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 22 May 2014 09:47:50 -0700
Subject: of: mdio: remove of_phy_connect_fixed_link

All in-tree drivers have been converted to use the new pair of
functions: of_is_fixed_phy_link() plus of_phy_register_fixed_link(), we
can now safely remove of_phy_connect_fixed_link.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/of/of_mdio.c    | 38 --------------------------------------
 include/linux/of_mdio.h | 10 ----------
 2 files changed, 48 deletions(-)

(limited to 'include')

diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 1def0bb5cb37..4c1e01ed16dc 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -245,44 +245,6 @@ struct phy_device *of_phy_connect(struct net_device *dev,
 }
 EXPORT_SYMBOL(of_phy_connect);
 
-/**
- * of_phy_connect_fixed_link - Parse fixed-link property and return a dummy phy
- * @dev: pointer to net_device claiming the phy
- * @hndlr: Link state callback for the network device
- * @iface: PHY data interface type
- *
- * This function is a temporary stop-gap and will be removed soon.  It is
- * only to support the fs_enet, ucc_geth and gianfar Ethernet drivers.  Do
- * not call this function from new drivers.
- */
-struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
-					     void (*hndlr)(struct net_device *),
-					     phy_interface_t iface)
-{
-	struct device_node *net_np;
-	char bus_id[MII_BUS_ID_SIZE + 3];
-	struct phy_device *phy;
-	const __be32 *phy_id;
-	int sz;
-
-	if (!dev->dev.parent)
-		return NULL;
-
-	net_np = dev->dev.parent->of_node;
-	if (!net_np)
-		return NULL;
-
-	phy_id = of_get_property(net_np, "fixed-link", &sz);
-	if (!phy_id || sz < sizeof(*phy_id))
-		return NULL;
-
-	sprintf(bus_id, PHY_ID_FMT, "fixed-0", be32_to_cpu(phy_id[0]));
-
-	phy = phy_connect(dev, bus_id, hndlr, iface);
-	return IS_ERR(phy) ? NULL : phy;
-}
-EXPORT_SYMBOL(of_phy_connect_fixed_link);
-
 /**
  * of_phy_attach - Attach to a PHY without starting the state machine
  * @dev: pointer to net_device claiming the phy
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 0aa367e316cb..d449018d0726 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -22,9 +22,6 @@ extern struct phy_device *of_phy_connect(struct net_device *dev,
 struct phy_device *of_phy_attach(struct net_device *dev,
 				 struct device_node *phy_np, u32 flags,
 				 phy_interface_t iface);
-extern struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
-					 void (*hndlr)(struct net_device *),
-					 phy_interface_t iface);
 
 extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
 
@@ -59,13 +56,6 @@ static inline struct phy_device *of_phy_attach(struct net_device *dev,
 	return NULL;
 }
 
-static inline struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
-							   void (*hndlr)(struct net_device *),
-							   phy_interface_t iface)
-{
-	return NULL;
-}
-
 static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np)
 {
 	return NULL;
-- 
cgit v1.2.3-71-gd317


From 16e4d93f6de7063800f3f5e68f064b0ff8fae9b7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 19 May 2014 13:40:22 -0400
Subject: NFSD: Ignore client's source port on RDMA transports

An NFS/RDMA client's source port is meaningless for RDMA transports.
The transport layer typically sets the source port value on the
connection to a random ephemeral port.

Currently, NFS server administrators must specify the "insecure"
export option to enable clients to access exports via RDMA.

But this means NFS clients can access such an export via IP using an
ephemeral port, which may not be desirable.

This patch eliminates the need to specify the "insecure" export
option to allow NFS/RDMA clients access to an export.

BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=250
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h          | 1 +
 net/sunrpc/svc_xprt.c                    | 2 +-
 net/sunrpc/svcsock.c                     | 9 +++++++++
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 7 +++++++
 4 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index b05963f09ebf..0cec1b94c670 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -24,6 +24,7 @@ struct svc_xprt_ops {
 	void		(*xpo_release_rqst)(struct svc_rqst *);
 	void		(*xpo_detach)(struct svc_xprt *);
 	void		(*xpo_free)(struct svc_xprt *);
+	int		(*xpo_secure_port)(struct svc_rqst *);
 };
 
 struct svc_xprt_class {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 06c6ff0cb911..614956f1777e 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -793,7 +793,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 
 	clear_bit(XPT_OLD, &xprt->xpt_flags);
 
-	rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
+	rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp);
 	rqstp->rq_chandle.defer = svc_defer;
 
 	if (serv->sv_stats)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 43bcb4699d69..0cb34f5d58dc 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -400,6 +400,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
 	release_sock(sock->sk);
 #endif
 }
+
+static int svc_sock_secure_port(struct svc_rqst *rqstp)
+{
+	return svc_port_is_privileged(svc_addr(rqstp));
+}
+
 /*
  * INET callback when data has been received on the socket.
  */
@@ -678,6 +684,7 @@ static struct svc_xprt_ops svc_udp_ops = {
 	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
 	.xpo_has_wspace = svc_udp_has_wspace,
 	.xpo_accept = svc_udp_accept,
+	.xpo_secure_port = svc_sock_secure_port,
 };
 
 static struct svc_xprt_class svc_udp_class = {
@@ -1234,6 +1241,7 @@ static struct svc_xprt_ops svc_tcp_bc_ops = {
 	.xpo_detach = svc_bc_tcp_sock_detach,
 	.xpo_free = svc_bc_sock_free,
 	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
+	.xpo_secure_port = svc_sock_secure_port,
 };
 
 static struct svc_xprt_class svc_tcp_bc_class = {
@@ -1272,6 +1280,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
 	.xpo_has_wspace = svc_tcp_has_wspace,
 	.xpo_accept = svc_tcp_accept,
+	.xpo_secure_port = svc_sock_secure_port,
 };
 
 static struct svc_xprt_class svc_tcp_class = {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 25688fa2207f..02db8d9cc994 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -65,6 +65,7 @@ static void dto_tasklet_func(unsigned long data);
 static void svc_rdma_detach(struct svc_xprt *xprt);
 static void svc_rdma_free(struct svc_xprt *xprt);
 static int svc_rdma_has_wspace(struct svc_xprt *xprt);
+static int svc_rdma_secure_port(struct svc_rqst *);
 static void rq_cq_reap(struct svcxprt_rdma *xprt);
 static void sq_cq_reap(struct svcxprt_rdma *xprt);
 
@@ -82,6 +83,7 @@ static struct svc_xprt_ops svc_rdma_ops = {
 	.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
 	.xpo_has_wspace = svc_rdma_has_wspace,
 	.xpo_accept = svc_rdma_accept,
+	.xpo_secure_port = svc_rdma_secure_port,
 };
 
 struct svc_xprt_class svc_rdma_class = {
@@ -1207,6 +1209,11 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
 	return 1;
 }
 
+static int svc_rdma_secure_port(struct svc_rqst *rqstp)
+{
+	return 1;
+}
+
 /*
  * Attempt to register the kvec representing the RPC memory with the
  * device.
-- 
cgit v1.2.3-71-gd317


From ef11ce24875a8a540adc185e7bce3d7d49c8296f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 12 May 2014 11:22:47 +1000
Subject: SUNRPC: track whether a request is coming from a loop-back interface.

If an incoming NFS request is coming from the local host, then
nfsd will need to perform some special handling.  So detect that
possibility and make the source visible in rq_local.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h      |  1 +
 include/linux/sunrpc/svc_xprt.h |  1 +
 net/sunrpc/sunrpc.h             | 13 +++++++++++++
 net/sunrpc/svcsock.c            |  5 +++++
 4 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 04e763221246..a0dbbd1e00e9 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -254,6 +254,7 @@ struct svc_rqst {
 	u32			rq_prot;	/* IP protocol */
 	unsigned short
 				rq_secure  : 1;	/* secure port */
+	unsigned short		rq_local   : 1;	/* local request */
 
 	void *			rq_argp;	/* decoded arguments */
 	void *			rq_resp;	/* xdr'd results */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 0cec1b94c670..7235040a19b2 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -64,6 +64,7 @@ struct svc_xprt {
 #define	XPT_DETACHED	10		/* detached from tempsocks list */
 #define XPT_LISTENER	11		/* listening endpoint */
 #define XPT_CACHE_AUTH	12		/* cache auth info */
+#define XPT_LOCAL	13		/* connection from loopback interface */
 
 	struct svc_serv		*xpt_server;	/* service for transport */
 	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 14c9f6d1c5ff..f2b7cb540e61 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -43,6 +43,19 @@ static inline int rpc_reply_expected(struct rpc_task *task)
 		(task->tk_msg.rpc_proc->p_decode != NULL);
 }
 
+static inline int sock_is_loopback(struct sock *sk)
+{
+	struct dst_entry *dst;
+	int loopback = 0;
+	rcu_read_lock();
+	dst = rcu_dereference(sk->sk_dst_cache);
+	if (dst && dst->dev &&
+	    (dst->dev->features & NETIF_F_LOOPBACK))
+		loopback = 1;
+	rcu_read_unlock();
+	return loopback;
+}
+
 int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
 		    struct page *headpage, unsigned long headoffset,
 		    struct page *tailpage, unsigned long tailoffset);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0cb34f5d58dc..f3b8eb309d01 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -874,6 +874,10 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 	}
 	svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
 
+	if (sock_is_loopback(newsock->sk))
+		set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
+	else
+		clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpconn++;
 
@@ -1119,6 +1123,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 
 	rqstp->rq_xprt_ctxt   = NULL;
 	rqstp->rq_prot	      = IPPROTO_TCP;
+	rqstp->rq_local	      = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags);
 
 	p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
 	calldir = p[1];
-- 
cgit v1.2.3-71-gd317


From ecc8fb11cdb37d108d4597ba0f6bdff77c6019af Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Thu, 22 May 2014 15:55:39 +0300
Subject: net/mlx4_core: Deprecate use_prio module parameter

use_prio was added as part of an infrastructure for running FCoE in A0 mode.
FCoE didn't get into Mellanox Upstream driver, and when it will, it won't be
using A0 steering mode.

Therefore we can safely deprecate this module parameter without hurting any
existing user.

CC: Carol Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 8 ++++----
 include/linux/mlx4/device.h               | 1 -
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index a56f6012258d..08ff5dd9298f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -132,8 +132,7 @@ MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
 
 static bool use_prio;
 module_param_named(use_prio, use_prio, bool, 0444);
-MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
-		  "(0/1, default 0)");
+MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
 
 int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
@@ -290,7 +289,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 
 	dev->caps.log_num_macs  = log_num_mac;
 	dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
-	dev->caps.log_num_prios = use_prio ? 3 : 0;
 
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
 		dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
@@ -358,7 +356,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
 		(1 << dev->caps.log_num_macs) *
 		(1 << dev->caps.log_num_vlans) *
-		(1 << dev->caps.log_num_prios) *
 		dev->caps.num_ports;
 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
 
@@ -2775,6 +2772,9 @@ static int __init mlx4_verify_params(void)
 		pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
 			   MLX4_LOG_NUM_VLANS);
 
+	if (use_prio != 0)
+		pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
+
 	if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
 		pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
 		return -1;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index c0468e6f0442..ca38871a585c 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -449,7 +449,6 @@ struct mlx4_caps {
 	int                     reserved_qps_base[MLX4_NUM_QP_REGION];
 	int                     log_num_macs;
 	int                     log_num_vlans;
-	int                     log_num_prios;
 	enum mlx4_port_type	port_type[MLX4_MAX_PORTS + 1];
 	u8			supported_type[MLX4_MAX_PORTS + 1];
 	u8                      suggested_type[MLX4_MAX_PORTS + 1];
-- 
cgit v1.2.3-71-gd317


From be52c9e96a6657d117bb0ec6e11438fb246af5c7 Mon Sep 17 00:00:00 2001
From: Jarno Rajahalme <jrajahalme@nicira.com>
Date: Mon, 5 May 2014 09:59:40 -0700
Subject: openvswitch: Avoid assigning a NULL pointer to flow actions.

Flow SET can accept an empty set of actions, with the intended
semantics of leaving existing actions unmodified.  This seems to have
been brokin after OVS 1.7, as we have assigned the flow's actions
pointer to NULL in this case, but we never check for the NULL pointer
later on.  This patch restores the intended behavior and documents it
in the include/linux/openvswitch.h.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
---
 include/uapi/linux/openvswitch.h |  4 +++-
 net/openvswitch/datapath.c       | 14 ++++++++------
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 970553cbbc8e..0b979ee4bfc0 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -395,7 +395,9 @@ struct ovs_key_nd {
  * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
  * the actions to take for packets that match the key.  Always present in
  * notifications.  Required for %OVS_FLOW_CMD_NEW requests, optional for
- * %OVS_FLOW_CMD_SET requests.
+ * %OVS_FLOW_CMD_SET requests.  An %OVS_FLOW_CMD_SET without
+ * %OVS_FLOW_ATTR_ACTIONS will not modify the actions.  To clear the actions,
+ * an %OVS_FLOW_ATTR_ACTIONS without any nested attributes must be given.
  * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
  * flow.  Present in notifications if the stats would be nonzero.  Ignored in
  * requests.
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 8867d7e2d65b..90a1e5e66287 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -810,6 +810,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 			goto err_kfree;
 		}
 	} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
+		/* OVS_FLOW_CMD_NEW must have actions. */
 		error = -EINVAL;
 		goto error;
 	}
@@ -849,8 +850,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
 	} else {
 		/* We found a matching flow. */
-		struct sw_flow_actions *old_acts;
-
 		/* Bail out if we're not allowed to modify an existing flow.
 		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
 		 * because Generic Netlink treats the latter as a dump
@@ -866,11 +865,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		if (!ovs_flow_cmp_unmasked_key(flow, &match))
 			goto err_unlock_ovs;
 
-		/* Update actions. */
-		old_acts = ovsl_dereference(flow->sf_acts);
-		rcu_assign_pointer(flow->sf_acts, acts);
-		ovs_nla_free_flow_actions(old_acts);
+		/* Update actions, if present. */
+		if (acts) {
+			struct sw_flow_actions *old_acts;
 
+			old_acts = ovsl_dereference(flow->sf_acts);
+			rcu_assign_pointer(flow->sf_acts, acts);
+			ovs_nla_free_flow_actions(old_acts);
+		}
 		reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);
 
 		/* Clear stats. */
-- 
cgit v1.2.3-71-gd317


From 3c8a0922e0b774aa394a6925f11976ccfc852808 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 2 May 2014 11:05:21 +1000
Subject: drm/dp_helper: add defines for DP 1.2 and MST support. (v2)

This just adds the defines from the DP 1.2 spec, which we
will use later.

fix some DP MST to 1.2 MST

Signed-off-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Todd Previte <tprevite@gmail.com>
Reviewed-by: Jingoo Han <jg1.han@samsung.com>
---
 include/drm/drm_dp_helper.h | 78 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index cfcacec5b89d..c8857e6159a5 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -37,6 +37,7 @@
  * eDP: Embedded DisplayPort version 1
  * DPI: DisplayPort Interoperability Guideline v1.1a
  * 1.2: DisplayPort 1.2
+ * MST: Multistream Transport - part of DP 1.2a
  *
  * 1.2 formally includes both eDP and DPI definitions.
  */
@@ -103,9 +104,14 @@
 #define DP_TRAINING_AUX_RD_INTERVAL         0x00e   /* XXX 1.2? */
 
 /* Multiple stream transport */
+#define DP_FAUX_CAP			    0x020   /* 1.2 */
+# define DP_FAUX_CAP_1			    (1 << 0)
+
 #define DP_MSTM_CAP			    0x021   /* 1.2 */
 # define DP_MST_CAP			    (1 << 0)
 
+#define DP_GUID				    0x030   /* 1.2 */
+
 #define DP_PSR_SUPPORT                      0x070   /* XXX 1.2? */
 # define DP_PSR_IS_SUPPORTED                1
 #define DP_PSR_CAPS                         0x071   /* XXX 1.2? */
@@ -221,6 +227,16 @@
 # define DP_PSR_CRC_VERIFICATION	    (1 << 2)
 # define DP_PSR_FRAME_CAPTURE		    (1 << 3)
 
+#define DP_ADAPTER_CTRL			    0x1a0
+# define DP_ADAPTER_CTRL_FORCE_LOAD_SENSE   (1 << 0)
+
+#define DP_BRANCH_DEVICE_CTRL		    0x1a1
+# define DP_BRANCH_DEVICE_IRQ_HPD	    (1 << 0)
+
+#define DP_PAYLOAD_ALLOCATE_SET		    0x1c0
+#define DP_PAYLOAD_ALLOCATE_START_TIME_SLOT 0x1c1
+#define DP_PAYLOAD_ALLOCATE_TIME_SLOT_COUNT 0x1c2
+
 #define DP_SINK_COUNT			    0x200
 /* prior to 1.2 bit 7 was reserved mbz */
 # define DP_GET_SINK_COUNT(x)		    ((((x) & 0x80) >> 1) | ((x) & 0x3f))
@@ -230,6 +246,9 @@
 # define DP_REMOTE_CONTROL_COMMAND_PENDING  (1 << 0)
 # define DP_AUTOMATED_TEST_REQUEST	    (1 << 1)
 # define DP_CP_IRQ			    (1 << 2)
+# define DP_MCCS_IRQ			    (1 << 3)
+# define DP_DOWN_REP_MSG_RDY		    (1 << 4) /* 1.2 MST */
+# define DP_UP_REQ_MSG_RDY		    (1 << 5) /* 1.2 MST */
 # define DP_SINK_SPECIFIC_IRQ		    (1 << 6)
 
 #define DP_LANE0_1_STATUS		    0x202
@@ -294,6 +313,13 @@
 #define DP_TEST_SINK			    0x270
 #define DP_TEST_SINK_START	    (1 << 0)
 
+#define DP_PAYLOAD_TABLE_UPDATE_STATUS      0x2c0   /* 1.2 MST */
+# define DP_PAYLOAD_TABLE_UPDATED           (1 << 0)
+# define DP_PAYLOAD_ACT_HANDLED             (1 << 1)
+
+#define DP_VC_PAYLOAD_ID_SLOT_1             0x2c1   /* 1.2 MST */
+/* up to ID_SLOT_63 at 0x2ff */
+
 #define DP_SOURCE_OUI			    0x300
 #define DP_SINK_OUI			    0x400
 #define DP_BRANCH_OUI			    0x500
@@ -303,6 +329,21 @@
 # define DP_SET_POWER_D3                    0x2
 # define DP_SET_POWER_MASK                  0x3
 
+#define DP_SIDEBAND_MSG_DOWN_REQ_BASE	    0x1000   /* 1.2 MST */
+#define DP_SIDEBAND_MSG_UP_REP_BASE	    0x1200   /* 1.2 MST */
+#define DP_SIDEBAND_MSG_DOWN_REP_BASE	    0x1400   /* 1.2 MST */
+#define DP_SIDEBAND_MSG_UP_REQ_BASE	    0x1600   /* 1.2 MST */
+
+#define DP_SINK_COUNT_ESI		    0x2002   /* 1.2 */
+/* 0-5 sink count */
+# define DP_SINK_COUNT_CP_READY             (1 << 6)
+
+#define DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0   0x2003   /* 1.2 */
+
+#define DP_DEVICE_SERVICE_IRQ_VECTOR_ESI1   0x2004   /* 1.2 */
+
+#define DP_LINK_SERVICE_IRQ_VECTOR_ESI0     0x2005   /* 1.2 */
+
 #define DP_PSR_ERROR_STATUS                 0x2006  /* XXX 1.2? */
 # define DP_PSR_LINK_CRC_ERROR              (1 << 0)
 # define DP_PSR_RFB_STORAGE_ERROR           (1 << 1)
@@ -319,6 +360,43 @@
 # define DP_PSR_SINK_INTERNAL_ERROR         7
 # define DP_PSR_SINK_STATE_MASK             0x07
 
+/* DP 1.2 Sideband message defines */
+/* peer device type - DP 1.2a Table 2-92 */
+#define DP_PEER_DEVICE_NONE		0x0
+#define DP_PEER_DEVICE_SOURCE_OR_SST	0x1
+#define DP_PEER_DEVICE_MST_BRANCHING	0x2
+#define DP_PEER_DEVICE_SST_SINK		0x3
+#define DP_PEER_DEVICE_DP_LEGACY_CONV	0x4
+
+/* DP 1.2 MST sideband request names DP 1.2a Table 2-80 */
+#define DP_LINK_ADDRESS			0x01
+#define DP_CONNECTION_STATUS_NOTIFY	0x02
+#define DP_ENUM_PATH_RESOURCES		0x10
+#define DP_ALLOCATE_PAYLOAD		0x11
+#define DP_QUERY_PAYLOAD		0x12
+#define DP_RESOURCE_STATUS_NOTIFY	0x13
+#define DP_CLEAR_PAYLOAD_ID_TABLE	0x14
+#define DP_REMOTE_DPCD_READ		0x20
+#define DP_REMOTE_DPCD_WRITE		0x21
+#define DP_REMOTE_I2C_READ		0x22
+#define DP_REMOTE_I2C_WRITE		0x23
+#define DP_POWER_UP_PHY			0x24
+#define DP_POWER_DOWN_PHY		0x25
+#define DP_SINK_EVENT_NOTIFY		0x30
+#define DP_QUERY_STREAM_ENC_STATUS	0x38
+
+/* DP 1.2 MST sideband nak reasons - table 2.84 */
+#define DP_NAK_WRITE_FAILURE		0x01
+#define DP_NAK_INVALID_READ		0x02
+#define DP_NAK_CRC_FAILURE		0x03
+#define DP_NAK_BAD_PARAM		0x04
+#define DP_NAK_DEFER			0x05
+#define DP_NAK_LINK_FAILURE		0x06
+#define DP_NAK_NO_RESOURCES		0x07
+#define DP_NAK_DPCD_FAIL		0x08
+#define DP_NAK_I2C_NAK			0x09
+#define DP_NAK_ALLOCATE_FAIL		0x0a
+
 #define MODE_I2C_START	1
 #define MODE_I2C_WRITE	2
 #define MODE_I2C_READ	4
-- 
cgit v1.2.3-71-gd317


From 8b2e62cc34feaaf1cac9440a93fb18ac0b1e81bc Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Wed, 21 May 2014 10:49:19 +0800
Subject: MIPS: Fix a typo error in AUDIT_ARCH definition

Missing a "|" in AUDIT_ARCH_MIPSEL64N32 macro definition.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Reviewed-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J. Hill <Steven.Hill@imgtec.com>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: linux-mips@linux-mips.org
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Patchwork: https://patchwork.linux-mips.org/patch/6978/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/uapi/linux/audit.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 1b1efddb91cd..4c31a366be16 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -357,7 +357,7 @@ enum {
 #define AUDIT_ARCH_MIPS64N32	(EM_MIPS|__AUDIT_ARCH_64BIT|\
 				 __AUDIT_ARCH_CONVENTION_MIPS64_N32)
 #define AUDIT_ARCH_MIPSEL64	(EM_MIPS|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
-#define AUDIT_ARCH_MIPSEL64N32	(EM_MIPS|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE\
+#define AUDIT_ARCH_MIPSEL64N32	(EM_MIPS|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE|\
 				 __AUDIT_ARCH_CONVENTION_MIPS64_N32)
 #define AUDIT_ARCH_OPENRISC	(EM_OPENRISC)
 #define AUDIT_ARCH_PARISC	(EM_PARISC)
-- 
cgit v1.2.3-71-gd317


From d7b2545023ecfde94d3ea9c03c5480ac18da96c9 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 23 May 2014 13:19:53 +0300
Subject: Bluetooth: Clearly distinguish mgmt LTK type from authenticated
 property

On the mgmt level we have a key type parameter which currently accepts
two possible values: 0x00 for unauthenticated and 0x01 for
authenticated. However, in the internal struct smp_ltk representation we
have an explicit "authenticated" boolean value.

To make this distinction clear, add defines for the possible mgmt values
and do conversion to and from the internal authenticated value.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h |  3 +++
 net/bluetooth/mgmt.c         | 19 ++++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 226ae03cafe7..bcffc9ae0c89 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -181,6 +181,9 @@ struct mgmt_cp_load_link_keys {
 } __packed;
 #define MGMT_LOAD_LINK_KEYS_SIZE	3
 
+#define MGMT_LTK_UNAUTHENTICATED	0x00
+#define MGMT_LTK_AUTHENTICATED		0x01
+
 struct mgmt_ltk_info {
 	struct mgmt_addr_info addr;
 	__u8	type;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f8ca69dd1984..5e9c21a5525f 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4534,7 +4534,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 
 	for (i = 0; i < key_count; i++) {
 		struct mgmt_ltk_info *key = &cp->keys[i];
-		u8 type, addr_type;
+		u8 type, addr_type, authenticated;
 
 		if (key->addr.type == BDADDR_LE_PUBLIC)
 			addr_type = ADDR_LE_DEV_PUBLIC;
@@ -4546,8 +4546,13 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 		else
 			type = HCI_SMP_LTK_SLAVE;
 
+		if (key->type == MGMT_LTK_UNAUTHENTICATED)
+			authenticated = 0x00;
+		else
+			authenticated = 0x01;
+
 		hci_add_ltk(hdev, &key->addr.bdaddr, addr_type, type,
-			    key->type, key->val, key->enc_size, key->ediv,
+			    authenticated, key->val, key->enc_size, key->ediv,
 			    key->rand);
 	}
 
@@ -5222,6 +5227,14 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key,
 	mgmt_event(MGMT_EV_NEW_LINK_KEY, hdev, &ev, sizeof(ev), NULL);
 }
 
+static u8 mgmt_ltk_type(struct smp_ltk *ltk)
+{
+	if (ltk->authenticated)
+		return MGMT_LTK_AUTHENTICATED;
+
+	return MGMT_LTK_UNAUTHENTICATED;
+}
+
 void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
 {
 	struct mgmt_ev_new_long_term_key ev;
@@ -5247,7 +5260,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
 
 	bacpy(&ev.key.addr.bdaddr, &key->bdaddr);
 	ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type);
-	ev.key.type = key->authenticated;
+	ev.key.type = mgmt_ltk_type(key);
 	ev.key.enc_size = key->enc_size;
 	ev.key.ediv = key->ediv;
 	ev.key.rand = key->rand;
-- 
cgit v1.2.3-71-gd317


From ed616689a3d95eb6c9bdbb1ef74b0f50cbdf276a Mon Sep 17 00:00:00 2001
From: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Date: Thu, 22 May 2014 09:59:05 -0400
Subject: net-next:v4: Add support to configure SR-IOV VF minimum and maximum
 Tx rate through ip tool.

o min_tx_rate puts lower limit on the VF bandwidth. VF is guaranteed
  to have a bandwidth of at least this value.
  max_tx_rate puts cap on the VF bandwidth. VF can have a bandwidth
  of up to this value.

o A new handler set_vf_rate for attr IFLA_VF_RATE has been introduced
  which takes 4 arguments:
  netdev, VF number, min_tx_rate, max_tx_rate

o ndo_set_vf_rate replaces ndo_set_vf_tx_rate handler.

o Drivers that currently implement ndo_set_vf_tx_rate should now call
  ndo_set_vf_rate instead and reject attempt to set a minimum bandwidth
  greater than 0 for IFLA_VF_TX_RATE when IFLA_VF_RATE is not yet
  implemented by driver.

o If user enters only one of either min_tx_rate or max_tx_rate, then,
  userland should read back the other value from driver and set both
  for IFLA_VF_RATE.
  Drivers that have not yet implemented IFLA_VF_RATE should always
  return min_tx_rate as 0 when read from ip tool.

o If both IFLA_VF_TX_RATE and IFLA_VF_RATE options are specified, then
  IFLA_VF_RATE should override.

o Idea is to have consistent display of rate values to user.

o Usage example: -

  ./ip link set p4p1 vf 0 rate 900

  ./ip link show p4p1
  32: p4p1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode
  DEFAULT qlen 1000
    link/ether 00:0e:1e:08:b0:f0 brd ff:ff:ff:ff:ff:ff
    vf 0 MAC 3e:a0:ca:bd:ae:5a, tx rate 900 (Mbps), max_tx_rate 900Mbps
    vf 1 MAC f6:c6:7c:3f:3d:6c
    vf 2 MAC 56:32:43:98:d7:71
    vf 3 MAC d6:be:c3:b5:85:ff
    vf 4 MAC ee:a9:9a:1e:19:14
    vf 5 MAC 4a:d0:4c:07:52:18
    vf 6 MAC 3a:76:44:93:62:f9
    vf 7 MAC 82:e9:e7:e3:15:1a

  ./ip link set p4p1 vf 0 max_tx_rate 300 min_tx_rate 200

  ./ip link show p4p1
  32: p4p1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode
  DEFAULT qlen 1000
    link/ether 00:0e:1e:08:b0:f0 brd ff:ff:ff:ff:ff:ff
    vf 0 MAC 3e:a0:ca:bd:ae:5a, tx rate 300 (Mbps), max_tx_rate 300Mbps,
    min_tx_rate 200Mbps
    vf 1 MAC f6:c6:7c:3f:3d:6c
    vf 2 MAC 56:32:43:98:d7:71
    vf 3 MAC d6:be:c3:b5:85:ff
    vf 4 MAC ee:a9:9a:1e:19:14
    vf 5 MAC 4a:d0:4c:07:52:18
    vf 6 MAC 3a:76:44:93:62:f9
    vf 7 MAC 82:e9:e7:e3:15:1a

  ./ip link set p4p1 vf 0 max_tx_rate 600 rate 300

  ./ip link show p4p1
  32: p4p1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode
  DEFAULT qlen 1000
    link/ether 00:0e:1e:08:b0:f brd ff:ff:ff:ff:ff:ff
    vf 0 MAC 3e:a0:ca:bd:ae:5, tx rate 600 (Mbps), max_tx_rate 600Mbps,
    min_tx_rate 200Mbps
    vf 1 MAC f6:c6:7c:3f:3d:6c
    vf 2 MAC 56:32:43:98:d7:71
    vf 3 MAC d6:be:c3:b5:85:ff
    vf 4 MAC ee:a9:9a:1e:19:14
    vf 5 MAC 4a:d0:4c:07:52:18
    vf 6 MAC 3a:76:44:93:62:f9
    vf 7 MAC 82:e9:e7:e3:15:1a

Signed-off-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c  |  3 +-
 drivers/net/ethernet/emulex/benet/be_main.c        | 21 +++++----
 drivers/net/ethernet/intel/i40e/i40e_main.c        |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 26 +++++++----
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h |  3 +-
 drivers/net/ethernet/intel/igb/igb_main.c          | 20 ++++++---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c      |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c     | 13 ++++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h     |  3 +-
 drivers/net/ethernet/mellanox/mlx4/cmd.c           | 11 ++---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h        |  1 +
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c   |  2 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h  |  2 +-
 .../ethernet/qlogic/qlcnic/qlcnic_sriov_common.c   |  1 +
 .../net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c   | 52 +++++++++++++++-------
 drivers/net/ethernet/sfc/siena_sriov.c             |  3 +-
 include/linux/if_link.h                            |  3 +-
 include/linux/netdevice.h                          |  8 ++--
 include/uapi/linux/if_link.h                       |  9 +++-
 net/core/rtnetlink.c                               | 38 +++++++++++++---
 20 files changed, 156 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 81cc2d9831c2..8d0479d5be8e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -2576,7 +2576,8 @@ int bnx2x_get_vf_config(struct net_device *dev, int vfidx,
 
 	ivi->vf = vfidx;
 	ivi->qos = 0;
-	ivi->tx_rate = 10000; /* always 10G. TBA take from link struct */
+	ivi->max_tx_rate = 10000; /* always 10G. TBA take from link struct */
+	ivi->min_tx_rate = 0;
 	ivi->spoofchk = 1; /*always enabled */
 	if (vf->state == VF_ENABLED) {
 		/* mac and vlan are in vlan_mac objects */
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index dcc5e5c69743..4693d004a223 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1302,7 +1302,8 @@ static int be_get_vf_config(struct net_device *netdev, int vf,
 		return -EINVAL;
 
 	vi->vf = vf;
-	vi->tx_rate = vf_cfg->tx_rate;
+	vi->max_tx_rate = vf_cfg->tx_rate;
+	vi->min_tx_rate = 0;
 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
@@ -1342,7 +1343,8 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
 	return status;
 }
 
-static int be_set_vf_tx_rate(struct net_device *netdev, int vf, int rate)
+static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
+			     int min_tx_rate, int max_tx_rate)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	int status = 0;
@@ -1353,18 +1355,21 @@ static int be_set_vf_tx_rate(struct net_device *netdev, int vf, int rate)
 	if (vf >= adapter->num_vfs)
 		return -EINVAL;
 
-	if (rate < 100 || rate > 10000) {
+	if (min_tx_rate)
+		return -EINVAL;
+
+	if (max_tx_rate < 100 || max_tx_rate > 10000) {
 		dev_err(&adapter->pdev->dev,
-			"tx rate must be between 100 and 10000 Mbps\n");
+			"max tx rate must be between 100 and 10000 Mbps\n");
 		return -EINVAL;
 	}
 
-	status = be_cmd_config_qos(adapter, rate / 10, vf + 1);
+	status = be_cmd_config_qos(adapter, max_tx_rate / 10, vf + 1);
 	if (status)
 		dev_err(&adapter->pdev->dev,
-			"tx rate %d on VF %d failed\n", rate, vf);
+			"max tx rate %d on VF %d failed\n", max_tx_rate, vf);
 	else
-		adapter->vf_cfg[vf].tx_rate = rate;
+		adapter->vf_cfg[vf].tx_rate = max_tx_rate;
 	return status;
 }
 static int be_set_vf_link_state(struct net_device *netdev, int vf,
@@ -4257,7 +4262,7 @@ static const struct net_device_ops be_netdev_ops = {
 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
 	.ndo_set_vf_mac		= be_set_vf_mac,
 	.ndo_set_vf_vlan	= be_set_vf_vlan,
-	.ndo_set_vf_tx_rate	= be_set_vf_tx_rate,
+	.ndo_set_vf_rate	= be_set_vf_tx_rate,
 	.ndo_get_vf_config	= be_get_vf_config,
 	.ndo_set_vf_link_state  = be_set_vf_link_state,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index e0e5c6a867b1..96f7fabd8758 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6724,7 +6724,7 @@ static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_set_features	= i40e_set_features,
 	.ndo_set_vf_mac		= i40e_ndo_set_vf_mac,
 	.ndo_set_vf_vlan	= i40e_ndo_set_vf_port_vlan,
-	.ndo_set_vf_tx_rate	= i40e_ndo_set_vf_bw,
+	.ndo_set_vf_rate	= i40e_ndo_set_vf_bw,
 	.ndo_get_vf_config	= i40e_ndo_get_vf_config,
 	.ndo_set_vf_link_state	= i40e_ndo_set_vf_link_state,
 #ifdef CONFIG_I40E_VXLAN
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 4d219566a04d..8564b0939dc4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2205,7 +2205,8 @@ error_pvid:
  *
  * configure vf tx rate
  **/
-int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate)
+int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+		       int max_tx_rate)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_pf *pf = np->vsi->back;
@@ -2221,6 +2222,12 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate)
 		goto error;
 	}
 
+	if (min_tx_rate) {
+		dev_err(&pf->pdev->dev, "Invalid min tx rate (%d) (greater than 0) specified for vf %d.\n",
+			min_tx_rate, vf_id);
+		return -EINVAL;
+	}
+
 	vf = &(pf->vf[vf_id]);
 	vsi = pf->vsi[vf->lan_vsi_index];
 	if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) {
@@ -2243,23 +2250,23 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate)
 		break;
 	}
 
-	if (tx_rate > speed) {
-		dev_err(&pf->pdev->dev, "Invalid tx rate %d specified for vf %d.",
-			tx_rate, vf->vf_id);
+	if (max_tx_rate > speed) {
+		dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for vf %d.",
+			max_tx_rate, vf->vf_id);
 		ret = -EINVAL;
 		goto error;
 	}
 
 	/* Tx rate credits are in values of 50Mbps, 0 is disabled*/
-	ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, tx_rate / 50, 0,
-					  NULL);
+	ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, max_tx_rate / 50,
+					  0, NULL);
 	if (ret) {
-		dev_err(&pf->pdev->dev, "Unable to set tx rate, error code %d.\n",
+		dev_err(&pf->pdev->dev, "Unable to set max tx rate, error code %d.\n",
 			ret);
 		ret = -EIO;
 		goto error;
 	}
-	vf->tx_rate = tx_rate;
+	vf->tx_rate = max_tx_rate;
 error:
 	return ret;
 }
@@ -2301,7 +2308,8 @@ int i40e_ndo_get_vf_config(struct net_device *netdev,
 
 	memcpy(&ivi->mac, vf->default_lan_addr.addr, ETH_ALEN);
 
-	ivi->tx_rate = vf->tx_rate;
+	ivi->max_tx_rate = vf->tx_rate;
+	ivi->min_tx_rate = 0;
 	ivi->vlan = le16_to_cpu(vsi->info.pvid) & I40E_VLAN_MASK;
 	ivi->qos = (le16_to_cpu(vsi->info.pvid) & I40E_PRIORITY_MASK) >>
 		   I40E_VLAN_PRIORITY_SHIFT;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index ba3d1f8414be..5a559be4ba2c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -116,7 +116,8 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
 int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
 int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
 			      int vf_id, u16 vlan_id, u8 qos);
-int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate);
+int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+		       int max_tx_rate);
 int i40e_ndo_get_vf_config(struct net_device *netdev,
 			   int vf_id, struct ifla_vf_info *ivi);
 int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index bfcda8a455f4..1075b3f8c415 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -169,7 +169,7 @@ static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
 			       int vf, u16 vlan, u8 qos);
-static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
+static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
 static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
 				   bool setting);
 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
@@ -2084,7 +2084,7 @@ static const struct net_device_ops igb_netdev_ops = {
 	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
 	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
 	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
-	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
+	.ndo_set_vf_rate	= igb_ndo_set_vf_bw,
 	.ndo_set_vf_spoofchk	= igb_ndo_set_vf_spoofchk,
 	.ndo_get_vf_config	= igb_ndo_get_vf_config,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -7879,7 +7879,8 @@ static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
 	}
 }
 
-static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
+static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf,
+			     int min_tx_rate, int max_tx_rate)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -7888,15 +7889,19 @@ static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
 	if (hw->mac.type != e1000_82576)
 		return -EOPNOTSUPP;
 
+	if (min_tx_rate)
+		return -EINVAL;
+
 	actual_link_speed = igb_link_mbps(adapter->link_speed);
 	if ((vf >= adapter->vfs_allocated_count) ||
 	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
-	    (tx_rate < 0) || (tx_rate > actual_link_speed))
+	    (max_tx_rate < 0) ||
+	    (max_tx_rate > actual_link_speed))
 		return -EINVAL;
 
 	adapter->vf_rate_link_speed = actual_link_speed;
-	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
-	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
+	adapter->vf_data[vf].tx_rate = (u16)max_tx_rate;
+	igb_set_vf_rate_limit(hw, vf, max_tx_rate, actual_link_speed);
 
 	return 0;
 }
@@ -7936,7 +7941,8 @@ static int igb_ndo_get_vf_config(struct net_device *netdev,
 		return -EINVAL;
 	ivi->vf = vf;
 	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
-	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
+	ivi->max_tx_rate = adapter->vf_data[vf].tx_rate;
+	ivi->min_tx_rate = 0;
 	ivi->vlan = adapter->vf_data[vf].pf_vlan;
 	ivi->qos = adapter->vf_data[vf].pf_qos;
 	ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 8089ea9f2fba..a5332389620a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7926,7 +7926,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_do_ioctl		= ixgbe_ioctl,
 	.ndo_set_vf_mac		= ixgbe_ndo_set_vf_mac,
 	.ndo_set_vf_vlan	= ixgbe_ndo_set_vf_vlan,
-	.ndo_set_vf_tx_rate	= ixgbe_ndo_set_vf_bw,
+	.ndo_set_vf_rate	= ixgbe_ndo_set_vf_bw,
 	.ndo_set_vf_spoofchk	= ixgbe_ndo_set_vf_spoofchk,
 	.ndo_get_vf_config	= ixgbe_ndo_get_vf_config,
 	.ndo_get_stats64	= ixgbe_get_stats64,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index a01417c06620..3248e208c9dc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -1222,7 +1222,8 @@ void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter)
 	}
 }
 
-int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
+int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
+			int max_tx_rate)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	int link_speed;
@@ -1240,13 +1241,16 @@ int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
 	if (link_speed != 10000)
 		return -EINVAL;
 
+	if (min_tx_rate)
+		return -EINVAL;
+
 	/* rate limit cannot be less than 10Mbs or greater than link speed */
-	if (tx_rate && ((tx_rate <= 10) || (tx_rate > link_speed)))
+	if (max_tx_rate && ((max_tx_rate <= 10) || (max_tx_rate > link_speed)))
 		return -EINVAL;
 
 	/* store values */
 	adapter->vf_rate_link_speed = link_speed;
-	adapter->vfinfo[vf].tx_rate = tx_rate;
+	adapter->vfinfo[vf].tx_rate = max_tx_rate;
 
 	/* update hardware configuration */
 	ixgbe_set_vf_rate_limit(adapter, vf);
@@ -1288,7 +1292,8 @@ int ixgbe_ndo_get_vf_config(struct net_device *netdev,
 		return -EINVAL;
 	ivi->vf = vf;
 	memcpy(&ivi->mac, adapter->vfinfo[vf].vf_mac_addresses, ETH_ALEN);
-	ivi->tx_rate = adapter->vfinfo[vf].tx_rate;
+	ivi->max_tx_rate = adapter->vfinfo[vf].tx_rate;
+	ivi->min_tx_rate = 0;
 	ivi->vlan = adapter->vfinfo[vf].pf_vlan;
 	ivi->qos = adapter->vfinfo[vf].pf_qos;
 	ivi->spoofchk = adapter->vfinfo[vf].spoofchk_enabled;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
index cea640147604..32c26d586c01 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
@@ -44,7 +44,8 @@ void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
 int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
 int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
 			   u8 qos);
-int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
+int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
+			int max_tx_rate);
 int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
 int ixgbe_ndo_get_vf_config(struct net_device *netdev,
 			    int vf, struct ifla_vf_info *ivi);
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 59c7fd406805..ca8e7cb5a8e4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2486,11 +2486,12 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in
 	ivf->mac[4] = ((s_info->mac >> (1*8)) & 0xff);
 	ivf->mac[5] = ((s_info->mac)  & 0xff);
 
-	ivf->vlan	= s_info->default_vlan;
-	ivf->qos	= s_info->default_qos;
-	ivf->tx_rate	= s_info->tx_rate;
-	ivf->spoofchk	= s_info->spoofchk;
-	ivf->linkstate	= s_info->link_state;
+	ivf->vlan		= s_info->default_vlan;
+	ivf->qos		= s_info->default_qos;
+	ivf->max_tx_rate	= s_info->tx_rate;
+	ivf->min_tx_rate	= 0;
+	ivf->spoofchk		= s_info->spoofchk;
+	ivf->linkstate		= s_info->link_state;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 6e7527e2b595..41abe6070466 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1319,6 +1319,7 @@ struct qlcnic_eswitch {
 #define QL_STATUS_INVALID_PARAM	-1
 
 #define MAX_BW			100	/* % of link speed */
+#define MIN_BW			1	/* % of link speed */
 #define MAX_VLAN_ID		4095
 #define MIN_VLAN_ID		2
 #define DEFAULT_MAC_LEARN	1
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index f0a285359e66..f06ba90b4282 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -525,7 +525,7 @@ static const struct net_device_ops qlcnic_netdev_ops = {
 #endif
 #ifdef CONFIG_QLCNIC_SRIOV
 	.ndo_set_vf_mac		= qlcnic_sriov_set_vf_mac,
-	.ndo_set_vf_tx_rate	= qlcnic_sriov_set_vf_tx_rate,
+	.ndo_set_vf_rate	= qlcnic_sriov_set_vf_tx_rate,
 	.ndo_get_vf_config	= qlcnic_sriov_get_vf_config,
 	.ndo_set_vf_vlan	= qlcnic_sriov_set_vf_vlan,
 	.ndo_set_vf_spoofchk	= qlcnic_sriov_set_vf_spoofchk,
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
index 335b50f7bd3e..4677b2edccca 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
@@ -233,7 +233,7 @@ bool qlcnic_sriov_soft_flr_check(struct qlcnic_adapter *,
 void qlcnic_sriov_pf_reset(struct qlcnic_adapter *);
 int qlcnic_sriov_pf_reinit(struct qlcnic_adapter *);
 int qlcnic_sriov_set_vf_mac(struct net_device *, int, u8 *);
-int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int);
+int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int, int);
 int qlcnic_sriov_get_vf_config(struct net_device *, int ,
 			       struct ifla_vf_info *);
 int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index 498fa6350c8d..2bdd9deffb38 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -201,6 +201,7 @@ int qlcnic_sriov_init(struct qlcnic_adapter *adapter, int num_vfs)
 			sriov->vf_info[i].vp = vp;
 			vp->vlan_mode = QLC_GUEST_VLAN_MODE;
 			vp->max_tx_bw = MAX_BW;
+			vp->min_tx_bw = MIN_BW;
 			vp->spoofchk = false;
 			random_ether_addr(vp->mac);
 			dev_info(&adapter->pdev->dev,
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
index 6d2f72f114f2..a29538b86edf 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
@@ -1848,7 +1848,8 @@ int qlcnic_sriov_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 	return 0;
 }
 
-int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, int tx_rate)
+int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf,
+				int min_tx_rate, int max_tx_rate)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	struct qlcnic_sriov *sriov = adapter->ahw->sriov;
@@ -1863,35 +1864,52 @@ int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, int tx_rate)
 	if (vf >= sriov->num_vfs)
 		return -EINVAL;
 
-	if (tx_rate >= 10000 || tx_rate < 100) {
+	vf_info = &sriov->vf_info[vf];
+	vp = vf_info->vp;
+	vpid = vp->handle;
+
+	if (!min_tx_rate)
+		min_tx_rate = QLC_VF_MIN_TX_RATE;
+
+	if (max_tx_rate &&
+	    (max_tx_rate >= 10000 || max_tx_rate < min_tx_rate)) {
 		netdev_err(netdev,
-			   "Invalid Tx rate, allowed range is [%d - %d]",
-			   QLC_VF_MIN_TX_RATE, QLC_VF_MAX_TX_RATE);
+			   "Invalid max Tx rate, allowed range is [%d - %d]",
+			   min_tx_rate, QLC_VF_MAX_TX_RATE);
 		return -EINVAL;
 	}
 
-	if (tx_rate == 0)
-		tx_rate = 10000;
+	if (!max_tx_rate)
+		max_tx_rate = 10000;
 
-	vf_info = &sriov->vf_info[vf];
-	vp = vf_info->vp;
-	vpid = vp->handle;
+	if (min_tx_rate &&
+	    (min_tx_rate > max_tx_rate || min_tx_rate < QLC_VF_MIN_TX_RATE)) {
+		netdev_err(netdev,
+			   "Invalid min Tx rate, allowed range is [%d - %d]",
+			   QLC_VF_MIN_TX_RATE, max_tx_rate);
+		return -EINVAL;
+	}
 
 	if (test_bit(QLC_BC_VF_STATE, &vf_info->state)) {
 		if (qlcnic_sriov_get_vf_vport_info(adapter, &nic_info, vpid))
 			return -EIO;
 
-		nic_info.max_tx_bw = tx_rate / 100;
+		nic_info.max_tx_bw = max_tx_rate / 100;
+		nic_info.min_tx_bw = min_tx_rate / 100;
 		nic_info.bit_offsets = BIT_0;
 
 		if (qlcnic_sriov_pf_set_vport_info(adapter, &nic_info, vpid))
 			return -EIO;
 	}
 
-	vp->max_tx_bw = tx_rate / 100;
+	vp->max_tx_bw = max_tx_rate / 100;
+	netdev_info(netdev,
+		    "Setting Max Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n",
+		    max_tx_rate, vp->max_tx_bw, vf);
+	vp->min_tx_bw = min_tx_rate / 100;
 	netdev_info(netdev,
-		    "Setting Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n",
-		    tx_rate, vp->max_tx_bw, vf);
+		    "Setting Min Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n",
+		    min_tx_rate, vp->min_tx_bw, vf);
 	return 0;
 }
 
@@ -1990,9 +2008,13 @@ int qlcnic_sriov_get_vf_config(struct net_device *netdev,
 	ivi->qos = vp->qos;
 	ivi->spoofchk = vp->spoofchk;
 	if (vp->max_tx_bw == MAX_BW)
-		ivi->tx_rate = 0;
+		ivi->max_tx_rate = 0;
+	else
+		ivi->max_tx_rate = vp->max_tx_bw * 100;
+	if (vp->min_tx_bw == MIN_BW)
+		ivi->min_tx_rate = 0;
 	else
-		ivi->tx_rate = vp->max_tx_bw * 100;
+		ivi->min_tx_rate = vp->min_tx_bw * 100;
 
 	ivi->vf = vf;
 	return 0;
diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c
index 9a9205e77896..43d2e64546ed 100644
--- a/drivers/net/ethernet/sfc/siena_sriov.c
+++ b/drivers/net/ethernet/sfc/siena_sriov.c
@@ -1633,7 +1633,8 @@ int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i,
 
 	ivi->vf = vf_i;
 	ether_addr_copy(ivi->mac, vf->addr.mac_addr);
-	ivi->tx_rate = 0;
+	ivi->max_tx_rate = 0;
+	ivi->min_tx_rate = 0;
 	tci = ntohs(vf->addr.tci);
 	ivi->vlan = tci & VLAN_VID_MASK;
 	ivi->qos = (tci >> VLAN_PRIO_SHIFT) & 0x7;
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index a86784dec3d3..119130e9298b 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -10,8 +10,9 @@ struct ifla_vf_info {
 	__u8 mac[32];
 	__u32 vlan;
 	__u32 qos;
-	__u32 tx_rate;
 	__u32 spoofchk;
 	__u32 linkstate;
+	__u32 min_tx_rate;
+	__u32 max_tx_rate;
 };
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f4ad247fd324..9ec3a945caf2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -850,7 +850,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	SR-IOV management functions.
  * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
  * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
- * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
+ * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
+ *			  int max_tx_rate);
  * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_config)(struct net_device *dev,
  *			    int vf, struct ifla_vf_info *ivf);
@@ -1044,8 +1045,9 @@ struct net_device_ops {
 						  int queue, u8 *mac);
 	int			(*ndo_set_vf_vlan)(struct net_device *dev,
 						   int queue, u16 vlan, u8 qos);
-	int			(*ndo_set_vf_tx_rate)(struct net_device *dev,
-						      int vf, int rate);
+	int			(*ndo_set_vf_rate)(struct net_device *dev,
+						   int vf, int min_tx_rate,
+						   int max_tx_rate);
 	int			(*ndo_set_vf_spoofchk)(struct net_device *dev,
 						       int vf, bool setting);
 	int			(*ndo_get_vf_config)(struct net_device *dev,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 9a7f7ace6649..622e7910b8cc 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -399,9 +399,10 @@ enum {
 	IFLA_VF_UNSPEC,
 	IFLA_VF_MAC,		/* Hardware queue specific attributes */
 	IFLA_VF_VLAN,
-	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
+	IFLA_VF_TX_RATE,	/* Max TX Bandwidth Allocation */
 	IFLA_VF_SPOOFCHK,	/* Spoof Checking on/off switch */
 	IFLA_VF_LINK_STATE,	/* link state enable/disable/auto switch */
+	IFLA_VF_RATE,		/* Min and Max TX Bandwidth Allocation */
 	__IFLA_VF_MAX,
 };
 
@@ -423,6 +424,12 @@ struct ifla_vf_tx_rate {
 	__u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
 };
 
+struct ifla_vf_rate {
+	__u32 vf;
+	__u32 min_tx_rate; /* Min Bandwidth in Mbps */
+	__u32 max_tx_rate; /* Max Bandwidth in Mbps */
+};
+
 struct ifla_vf_spoofchk {
 	__u32 vf;
 	__u32 setting;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9837bebf93ce..d6417464dc66 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -767,8 +767,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
 		size += num_vfs *
 			(nla_total_size(sizeof(struct ifla_vf_mac)) +
 			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
-			 nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
-			 nla_total_size(sizeof(struct ifla_vf_spoofchk)));
+			 nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
+			 nla_total_size(sizeof(struct ifla_vf_rate)));
 		return size;
 	} else
 		return 0;
@@ -1034,6 +1034,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			struct ifla_vf_info ivi;
 			struct ifla_vf_mac vf_mac;
 			struct ifla_vf_vlan vf_vlan;
+			struct ifla_vf_rate vf_rate;
 			struct ifla_vf_tx_rate vf_tx_rate;
 			struct ifla_vf_spoofchk vf_spoofchk;
 			struct ifla_vf_link_state vf_linkstate;
@@ -1054,6 +1055,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				break;
 			vf_mac.vf =
 				vf_vlan.vf =
+				vf_rate.vf =
 				vf_tx_rate.vf =
 				vf_spoofchk.vf =
 				vf_linkstate.vf = ivi.vf;
@@ -1061,7 +1063,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
 			vf_vlan.vlan = ivi.vlan;
 			vf_vlan.qos = ivi.qos;
-			vf_tx_rate.rate = ivi.tx_rate;
+			vf_tx_rate.rate = ivi.max_tx_rate;
+			vf_rate.min_tx_rate = ivi.min_tx_rate;
+			vf_rate.max_tx_rate = ivi.max_tx_rate;
 			vf_spoofchk.setting = ivi.spoofchk;
 			vf_linkstate.link_state = ivi.linkstate;
 			vf = nla_nest_start(skb, IFLA_VF_INFO);
@@ -1071,6 +1075,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			}
 			if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
 			    nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
+			    nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
+				    &vf_rate) ||
 			    nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
 				    &vf_tx_rate) ||
 			    nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
@@ -1177,6 +1183,8 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 				    .len = sizeof(struct ifla_vf_tx_rate) },
 	[IFLA_VF_SPOOFCHK]	= { .type = NLA_BINARY,
 				    .len = sizeof(struct ifla_vf_spoofchk) },
+	[IFLA_VF_RATE]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_rate) },
 };
 
 static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1336,11 +1344,29 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
 		}
 		case IFLA_VF_TX_RATE: {
 			struct ifla_vf_tx_rate *ivt;
+			struct ifla_vf_info ivf;
 			ivt = nla_data(vf);
 			err = -EOPNOTSUPP;
-			if (ops->ndo_set_vf_tx_rate)
-				err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
-							      ivt->rate);
+			if (ops->ndo_get_vf_config)
+				err = ops->ndo_get_vf_config(dev, ivt->vf,
+							     &ivf);
+			if (err)
+				break;
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_rate)
+				err = ops->ndo_set_vf_rate(dev, ivt->vf,
+							   ivf.min_tx_rate,
+							   ivt->rate);
+			break;
+		}
+		case IFLA_VF_RATE: {
+			struct ifla_vf_rate *ivt;
+			ivt = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_rate)
+				err = ops->ndo_set_vf_rate(dev, ivt->vf,
+							   ivt->min_tx_rate,
+							   ivt->max_tx_rate);
 			break;
 		}
 		case IFLA_VF_SPOOFCHK: {
-- 
cgit v1.2.3-71-gd317


From b26ba202e0500eb852e89499ece1b2deaa64c3a7 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 23 May 2014 08:47:09 -0700
Subject: net: Eliminate no_check from protosw

It doesn't seem like an protocols are setting anything other
than the default, and allowing to arbitrarily disable checksums
for a whole protocol seems dangerous. This can be done on a per
socket basis.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/protocol.h | 1 -
 net/dccp/ipv4.c        | 1 -
 net/ipv4/af_inet.c     | 7 -------
 net/ipv4/udplite.c     | 1 -
 net/ipv6/af_inet6.c    | 3 ---
 net/ipv6/ping.c        | 1 -
 net/ipv6/raw.c         | 1 -
 net/ipv6/tcp_ipv6.c    | 1 -
 net/ipv6/udp.c         | 1 -
 net/ipv6/udplite.c     | 1 -
 net/l2tp/l2tp_ip.c     | 1 -
 net/l2tp/l2tp_ip6.c    | 1 -
 net/sctp/ipv6.c        | 2 --
 net/sctp/protocol.c    | 2 --
 14 files changed, 24 deletions(-)

(limited to 'include')

diff --git a/include/net/protocol.h b/include/net/protocol.h
index a7e986b08147..d6fcc1fcdb5b 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -86,7 +86,6 @@ struct inet_protosw {
 	struct proto	 *prot;
 	const struct proto_ops *ops;
   
-	char             no_check;   /* checksum on rcv/xmit/none? */
 	unsigned char	 flags;      /* See INET_PROTOSW_* below.  */
 };
 #define INET_PROTOSW_REUSE 0x01	     /* Are ports automatically reusable? */
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 22b5d818b200..6ca645c4b48e 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1024,7 +1024,6 @@ static struct inet_protosw dccp_v4_protosw = {
 	.protocol	= IPPROTO_DCCP,
 	.prot		= &dccp_v4_prot,
 	.ops		= &inet_dccp_ops,
-	.no_check	= 0,
 	.flags		= INET_PROTOSW_ICSK,
 };
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 279132bcadd9..0e9bb08a91e4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -254,7 +254,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
 	struct inet_sock *inet;
 	struct proto *answer_prot;
 	unsigned char answer_flags;
-	char answer_no_check;
 	int try_loading_module = 0;
 	int err;
 
@@ -312,7 +311,6 @@ lookup_protocol:
 
 	sock->ops = answer->ops;
 	answer_prot = answer->prot;
-	answer_no_check = answer->no_check;
 	answer_flags = answer->flags;
 	rcu_read_unlock();
 
@@ -324,7 +322,6 @@ lookup_protocol:
 		goto out;
 
 	err = 0;
-	sk->sk_no_check = answer_no_check;
 	if (INET_PROTOSW_REUSE & answer_flags)
 		sk->sk_reuse = SK_CAN_REUSE;
 
@@ -1002,7 +999,6 @@ static struct inet_protosw inetsw_array[] =
 		.protocol =   IPPROTO_TCP,
 		.prot =       &tcp_prot,
 		.ops =        &inet_stream_ops,
-		.no_check =   0,
 		.flags =      INET_PROTOSW_PERMANENT |
 			      INET_PROTOSW_ICSK,
 	},
@@ -1012,7 +1008,6 @@ static struct inet_protosw inetsw_array[] =
 		.protocol =   IPPROTO_UDP,
 		.prot =       &udp_prot,
 		.ops =        &inet_dgram_ops,
-		.no_check =   UDP_CSUM_DEFAULT,
 		.flags =      INET_PROTOSW_PERMANENT,
        },
 
@@ -1021,7 +1016,6 @@ static struct inet_protosw inetsw_array[] =
 		.protocol =   IPPROTO_ICMP,
 		.prot =       &ping_prot,
 		.ops =        &inet_dgram_ops,
-		.no_check =   UDP_CSUM_DEFAULT,
 		.flags =      INET_PROTOSW_REUSE,
        },
 
@@ -1030,7 +1024,6 @@ static struct inet_protosw inetsw_array[] =
 	       .protocol =   IPPROTO_IP,	/* wild card */
 	       .prot =       &raw_prot,
 	       .ops =        &inet_sockraw_ops,
-	       .no_check =   UDP_CSUM_DEFAULT,
 	       .flags =      INET_PROTOSW_REUSE,
        }
 };
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 2c46acd4cc36..3b3efbda48e1 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -70,7 +70,6 @@ static struct inet_protosw udplite4_protosw = {
 	.protocol	=  IPPROTO_UDPLITE,
 	.prot		=  &udplite_prot,
 	.ops		=  &inet_dgram_ops,
-	.no_check	=  0,		/* must checksum (RFC 3828) */
 	.flags		=  INET_PROTOSW_PERMANENT,
 };
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index dc47cc757b80..7cb4392690dd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -106,7 +106,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
 	struct inet_protosw *answer;
 	struct proto *answer_prot;
 	unsigned char answer_flags;
-	char answer_no_check;
 	int try_loading_module = 0;
 	int err;
 
@@ -162,7 +161,6 @@ lookup_protocol:
 
 	sock->ops = answer->ops;
 	answer_prot = answer->prot;
-	answer_no_check = answer->no_check;
 	answer_flags = answer->flags;
 	rcu_read_unlock();
 
@@ -176,7 +174,6 @@ lookup_protocol:
 	sock_init_data(sock, sk);
 
 	err = 0;
-	sk->sk_no_check = answer_no_check;
 	if (INET_PROTOSW_REUSE & answer_flags)
 		sk->sk_reuse = SK_CAN_REUSE;
 
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index a2a1d80dfe0c..5b7a1ed2aba9 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -51,7 +51,6 @@ static struct inet_protosw pingv6_protosw = {
 	.protocol =  IPPROTO_ICMPV6,
 	.prot =      &pingv6_prot,
 	.ops =       &inet6_dgram_ops,
-	.no_check =  UDP_CSUM_DEFAULT,
 	.flags =     INET_PROTOSW_REUSE,
 };
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dddfb5fa2b7a..b2dc60b0c764 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1322,7 +1322,6 @@ static struct inet_protosw rawv6_protosw = {
 	.protocol	= IPPROTO_IP,	/* wild card */
 	.prot		= &rawv6_prot,
 	.ops		= &inet6_sockraw_ops,
-	.no_check	= UDP_CSUM_DEFAULT,
 	.flags		= INET_PROTOSW_REUSE,
 };
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f07b2abba359..229239ad96b1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1992,7 +1992,6 @@ static struct inet_protosw tcpv6_protosw = {
 	.protocol	=	IPPROTO_TCP,
 	.prot		=	&tcpv6_prot,
 	.ops		=	&inet6_stream_ops,
-	.no_check	=	0,
 	.flags		=	INET_PROTOSW_PERMANENT |
 				INET_PROTOSW_ICSK,
 };
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7edf096867c4..c7ed47bde54b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1507,7 +1507,6 @@ static struct inet_protosw udpv6_protosw = {
 	.protocol =  IPPROTO_UDP,
 	.prot =      &udpv6_prot,
 	.ops =       &inet6_dgram_ops,
-	.no_check =  UDP_CSUM_DEFAULT,
 	.flags =     INET_PROTOSW_PERMANENT,
 };
 
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index dfcc4be46898..9cf097e206e9 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -64,7 +64,6 @@ static struct inet_protosw udplite6_protosw = {
 	.protocol	= IPPROTO_UDPLITE,
 	.prot		= &udplitev6_prot,
 	.ops		= &inet6_dgram_ops,
-	.no_check	= 0,
 	.flags		= INET_PROTOSW_PERMANENT,
 };
 
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 3397fe6897c0..369a9822488c 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -606,7 +606,6 @@ static struct inet_protosw l2tp_ip_protosw = {
 	.protocol	= IPPROTO_L2TP,
 	.prot		= &l2tp_ip_prot,
 	.ops		= &l2tp_ip_ops,
-	.no_check	= 0,
 };
 
 static struct net_protocol l2tp_ip_protocol __read_mostly = {
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index e472d44a3b91..f3f98a156cee 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -755,7 +755,6 @@ static struct inet_protosw l2tp_ip6_protosw = {
 	.protocol	= IPPROTO_L2TP,
 	.prot		= &l2tp_ip6_prot,
 	.ops		= &l2tp_ip6_ops,
-	.no_check	= 0,
 };
 
 static struct inet6_protocol l2tp_ip6_protocol __read_mostly = {
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 4dc5d9e08311..1999592ba88c 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -943,7 +943,6 @@ static struct inet_protosw sctpv6_seqpacket_protosw = {
 	.protocol      = IPPROTO_SCTP,
 	.prot 	       = &sctpv6_prot,
 	.ops           = &inet6_seqpacket_ops,
-	.no_check      = 0,
 	.flags         = SCTP_PROTOSW_FLAG
 };
 static struct inet_protosw sctpv6_stream_protosw = {
@@ -951,7 +950,6 @@ static struct inet_protosw sctpv6_stream_protosw = {
 	.protocol      = IPPROTO_SCTP,
 	.prot 	       = &sctpv6_prot,
 	.ops           = &inet6_seqpacket_ops,
-	.no_check      = 0,
 	.flags         = SCTP_PROTOSW_FLAG,
 };
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index af5afca4b85a..6789d785e698 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1017,7 +1017,6 @@ static struct inet_protosw sctp_seqpacket_protosw = {
 	.protocol   = IPPROTO_SCTP,
 	.prot       = &sctp_prot,
 	.ops        = &inet_seqpacket_ops,
-	.no_check   = 0,
 	.flags      = SCTP_PROTOSW_FLAG
 };
 static struct inet_protosw sctp_stream_protosw = {
@@ -1025,7 +1024,6 @@ static struct inet_protosw sctp_stream_protosw = {
 	.protocol   = IPPROTO_SCTP,
 	.prot       = &sctp_prot,
 	.ops        = &inet_seqpacket_ops,
-	.no_check   = 0,
 	.flags      = SCTP_PROTOSW_FLAG
 };
 
-- 
cgit v1.2.3-71-gd317


From 28448b80456feafe07e2d05b6363b00f61f6171e Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 23 May 2014 08:47:19 -0700
Subject: net: Split sk_no_check into sk_no_check_{rx,tx}

Define separate fields in the sock structure for configuring disabling
checksums in both TX and RX-- sk_no_check_tx and sk_no_check_rx.
The SO_NO_CHECK socket option only affects sk_no_check_tx. Also,
removed UDP_CSUM_* defines since they are no longer necessary.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/scsi/iscsi_tcp.c | 2 +-
 include/net/sock.h       | 6 ++++--
 include/net/udp.h        | 9 ---------
 net/appletalk/ddp.c      | 2 +-
 net/core/sock.c          | 4 ++--
 net/decnet/af_decnet.c   | 2 +-
 net/ipv4/udp.c           | 2 +-
 net/ipv6/udp.c           | 6 +++---
 net/ipx/af_ipx.c         | 2 +-
 net/ipx/ipx_route.c      | 3 ++-
 net/l2tp/l2tp_core.c     | 4 ++--
 net/l2tp/l2tp_netlink.c  | 3 +--
 net/sctp/socket.c        | 3 ++-
 13 files changed, 21 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 11854845393b..a669f2d11c31 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -244,7 +244,7 @@ iscsi_sw_tcp_conn_restore_callbacks(struct iscsi_conn *conn)
 	sk->sk_data_ready   = tcp_sw_conn->old_data_ready;
 	sk->sk_state_change = tcp_sw_conn->old_state_change;
 	sk->sk_write_space  = tcp_sw_conn->old_write_space;
-	sk->sk_no_check	 = 0;
+	sk->sk_no_check_tx = 0;
 	write_unlock_bh(&sk->sk_callback_lock);
 }
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 21569cf456ed..07b7fcd60d80 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -243,7 +243,8 @@ struct cg_proto;
   *	@sk_sndbuf: size of send buffer in bytes
   *	@sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
   *		   %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
-  *	@sk_no_check: %SO_NO_CHECK setting, whether or not checkup packets
+  *	@sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets
+  *	@sk_no_check_rx: allow zero checksum in RX packets
   *	@sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
   *	@sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
   *	@sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
@@ -371,7 +372,8 @@ struct sock {
 	struct sk_buff_head	sk_write_queue;
 	kmemcheck_bitfield_begin(flags);
 	unsigned int		sk_shutdown  : 2,
-				sk_no_check  : 2,
+				sk_no_check_tx : 1,
+				sk_no_check_rx : 1,
 				sk_userlocks : 4,
 				sk_protocol  : 8,
 				sk_type      : 16;
diff --git a/include/net/udp.h b/include/net/udp.h
index a24f0f3e107f..5eb86874bcd6 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -95,15 +95,6 @@ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table,
 	return &table->hash2[hash & table->mask];
 }
 
-/* Note: this must match 'valbool' in sock_setsockopt */
-#define UDP_CSUM_NOXMIT		1
-
-/* Used by SunRPC/xprt layer. */
-#define UDP_CSUM_NORCV		2
-
-/* Default, as per the RFC, is to always do csums. */
-#define UDP_CSUM_DEFAULT	0
-
 extern struct proto udp_prot;
 
 extern atomic_long_t udp_memory_allocated;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 786ee2f83d5f..01a1082e02b3 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1669,7 +1669,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 		goto out;
 	}
 
-	if (sk->sk_no_check == 1)
+	if (sk->sk_no_check_tx)
 		ddp->deh_sum = 0;
 	else
 		ddp->deh_sum = atalk_checksum(skb, len + sizeof(*ddp));
diff --git a/net/core/sock.c b/net/core/sock.c
index 664ee4295b6f..026e01f70274 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -784,7 +784,7 @@ set_rcvbuf:
 		break;
 
 	case SO_NO_CHECK:
-		sk->sk_no_check = valbool;
+		sk->sk_no_check_tx = valbool;
 		break;
 
 	case SO_PRIORITY:
@@ -1064,7 +1064,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case SO_NO_CHECK:
-		v.val = sk->sk_no_check;
+		v.val = sk->sk_no_check_tx;
 		break;
 
 	case SO_PRIORITY:
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 4c04848953bd..ae011b46c071 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -481,7 +481,7 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
 
 	sk->sk_backlog_rcv = dn_nsp_backlog_rcv;
 	sk->sk_destruct    = dn_destruct;
-	sk->sk_no_check    = 1;
+	sk->sk_no_check_tx = 1;
 	sk->sk_family      = PF_DECnet;
 	sk->sk_protocol    = 0;
 	sk->sk_allocation  = gfp;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 590532a7bd2d..12c6175b29cd 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -785,7 +785,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
 	if (is_udplite)  				 /*     UDP-Lite      */
 		csum = udplite_csum(skb);
 
-	else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */
+	else if (sk->sk_no_check_tx) {   /* UDP csum disabled */
 
 		skb->ip_summed = CHECKSUM_NONE;
 		goto send;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c7ed47bde54b..b8db453133aa 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -797,7 +797,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 		/* If zero checksum and sk_no_check is not on for
 		 * the socket then skip it.
 		 */
-		if (uh->check || sk->sk_no_check)
+		if (uh->check || sk->sk_no_check_rx)
 			stack[count++] = sk;
 
 		sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
@@ -887,7 +887,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (sk != NULL) {
 		int ret;
 
-		if (!uh->check && !sk->sk_no_check) {
+		if (!uh->check && !sk->sk_no_check_rx) {
 			sock_put(sk);
 			udp6_csum_zero_error(skb);
 			goto csum_error;
@@ -1037,7 +1037,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
 
 	if (is_udplite)
 		csum = udplite_csum_outgoing(sk, skb);
-	else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */
+	else if (sk->sk_no_check_tx) {   /* UDP csum disabled */
 		skb->ip_summed = CHECKSUM_NONE;
 		goto send;
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 41e4e93cb3aa..91729b807c7d 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1353,7 +1353,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol,
 
 	sk_refcnt_debug_inc(sk);
 	sock_init_data(sock, sk);
-	sk->sk_no_check = 1;		/* Checksum off by default */
+	sk->sk_no_check_tx = 1;		/* Checksum off by default */
 	sock->ops = &ipx_dgram_ops;
 	rc = 0;
 out:
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index c1f03185c5e1..67e7ad3d46b1 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -236,7 +236,8 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
 	}
 
 	/* Apply checksum. Not allowed on 802.3 links. */
-	if (sk->sk_no_check || intrfc->if_dlink_type == htons(IPX_FRAME_8023))
+	if (sk->sk_no_check_tx ||
+	    intrfc->if_dlink_type == htons(IPX_FRAME_8023))
 		ipx->ipx_checksum = htons(0xFFFF);
 	else
 		ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr));
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index ed0716a075ba..a1186105f537 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1188,7 +1188,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 			l2tp_xmit_ipv6_csum(sk, skb, udp_len);
 		else
 #endif
-		if (sk->sk_no_check == UDP_CSUM_NOXMIT)
+		if (sk->sk_no_check_tx)
 			skb->ip_summed = CHECKSUM_NONE;
 		else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
 			 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
@@ -1463,7 +1463,7 @@ static int l2tp_tunnel_sock_create(struct net *net,
 		}
 
 		if (!cfg->use_udp_checksums)
-			sock->sk->sk_no_check = UDP_CSUM_NOXMIT;
+			sock->sk->sk_no_check_tx = 1;
 
 		break;
 
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index bd7387adea9e..f3d331bdd706 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -297,8 +297,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 	case L2TP_ENCAPTYPE_UDP:
 		if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
 		    nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)) ||
-		    nla_put_u8(skb, L2TP_ATTR_UDP_CSUM,
-			       (sk->sk_no_check != UDP_CSUM_NOXMIT)))
+		    nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx))
 			goto nla_put_failure;
 		/* NOBREAK */
 	case L2TP_ENCAPTYPE_IP:
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 2af76eaba8f7..429899689408 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6946,7 +6946,8 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 	newsk->sk_type = sk->sk_type;
 	newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
 	newsk->sk_flags = sk->sk_flags;
-	newsk->sk_no_check = sk->sk_no_check;
+	newsk->sk_no_check_tx = sk->sk_no_check_tx;
+	newsk->sk_no_check_rx = sk->sk_no_check_rx;
 	newsk->sk_reuse = sk->sk_reuse;
 
 	newsk->sk_shutdown = sk->sk_shutdown;
-- 
cgit v1.2.3-71-gd317


From 1c19448c9ba6545b80ded18488a64a7f3d8e6998 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 23 May 2014 08:47:32 -0700
Subject: net: Make enabling of zero UDP6 csums more restrictive

RFC 6935 permits zero checksums to be used in IPv6 however this is
recommended only for certain tunnel protocols, it does not make
checksums completely optional like they are in IPv4.

This patch restricts the use of IPv6 zero checksums that was previously
intoduced. no_check6_tx and no_check6_rx have been added to control
the use of checksums in UDP6 RX and TX path. The normal
sk_no_check_{rx,tx} settings are not used (this avoids ambiguity when
dealing with a dual stack socket).

A helper function has been added (udp_set_no_check6) which can be
called by tunnel impelmentations to all zero checksums (send on the
socket, and accept them as valid).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h      | 24 +++++++++++++++++++++++-
 include/uapi/linux/udp.h |  2 ++
 net/ipv4/udp.c           | 20 +++++++++++++++++++-
 net/ipv6/udp.c           |  8 ++++----
 4 files changed, 48 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 42278bbf7a88..247cfdcc4b08 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -47,7 +47,9 @@ struct udp_sock {
 #define udp_portaddr_node	inet.sk.__sk_common.skc_portaddr_node
 	int		 pending;	/* Any pending frames ? */
 	unsigned int	 corkflag;	/* Cork is required */
-  	__u16		 encap_type;	/* Is this an Encapsulation socket? */
+	__u8		 encap_type;	/* Is this an Encapsulation socket? */
+	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
+			 no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */
 	/*
 	 * Following member retains the information to create a UDP header
 	 * when the socket is uncorked.
@@ -76,6 +78,26 @@ static inline struct udp_sock *udp_sk(const struct sock *sk)
 	return (struct udp_sock *)sk;
 }
 
+static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
+{
+	udp_sk(sk)->no_check6_tx = val;
+}
+
+static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
+{
+	udp_sk(sk)->no_check6_rx = val;
+}
+
+static inline bool udp_get_no_check6_tx(struct sock *sk)
+{
+	return udp_sk(sk)->no_check6_tx;
+}
+
+static inline bool udp_get_no_check6_rx(struct sock *sk)
+{
+	return udp_sk(sk)->no_check6_rx;
+}
+
 #define udp_portaddr_for_each_entry(__sk, node, list) \
 	hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
 
diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index e2bcfd75a30d..16574ea18f0c 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -29,6 +29,8 @@ struct udphdr {
 /* UDP socket options */
 #define UDP_CORK	1	/* Never send partially complete segments */
 #define UDP_ENCAP	100	/* Set the socket to accept encapsulated packets */
+#define UDP_NO_CHECK6_TX 101	/* Disable sending checksum for UDP6X */
+#define UDP_NO_CHECK6_RX 102	/* Disable accpeting checksum for UDP6 */
 
 /* UDP encapsulation types */
 #define UDP_ENCAP_ESPINUDP_NON_IKE	1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 12c6175b29cd..e07d52b8617a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1968,7 +1968,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		       int (*push_pending_frames)(struct sock *))
 {
 	struct udp_sock *up = udp_sk(sk);
-	int val;
+	int val, valbool;
 	int err = 0;
 	int is_udplite = IS_UDPLITE(sk);
 
@@ -1978,6 +1978,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 	if (get_user(val, (int __user *)optval))
 		return -EFAULT;
 
+	valbool = val ? 1 : 0;
+
 	switch (optname) {
 	case UDP_CORK:
 		if (val != 0) {
@@ -2007,6 +2009,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		}
 		break;
 
+	case UDP_NO_CHECK6_TX:
+		up->no_check6_tx = valbool;
+		break;
+
+	case UDP_NO_CHECK6_RX:
+		up->no_check6_rx = valbool;
+		break;
+
 	/*
 	 * 	UDP-Lite's partial checksum coverage (RFC 3828).
 	 */
@@ -2089,6 +2099,14 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 		val = up->encap_type;
 		break;
 
+	case UDP_NO_CHECK6_TX:
+		val = up->no_check6_tx;
+		break;
+
+	case UDP_NO_CHECK6_RX:
+		val = up->no_check6_rx;
+		break;
+
 	/* The following two cannot be changed on UDP sockets, the return is
 	 * always 0 (which corresponds to the full checksum coverage of UDP). */
 	case UDPLITE_SEND_CSCOV:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b8db453133aa..60325236446a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -794,10 +794,10 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	dif = inet6_iif(skb);
 	sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
 	while (sk) {
-		/* If zero checksum and sk_no_check is not on for
+		/* If zero checksum and no_check is not on for
 		 * the socket then skip it.
 		 */
-		if (uh->check || sk->sk_no_check_rx)
+		if (uh->check || udp_sk(sk)->no_check6_rx)
 			stack[count++] = sk;
 
 		sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
@@ -887,7 +887,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (sk != NULL) {
 		int ret;
 
-		if (!uh->check && !sk->sk_no_check_rx) {
+		if (!uh->check && !udp_sk(sk)->no_check6_rx) {
 			sock_put(sk);
 			udp6_csum_zero_error(skb);
 			goto csum_error;
@@ -1037,7 +1037,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
 
 	if (is_udplite)
 		csum = udplite_csum_outgoing(sk, skb);
-	else if (sk->sk_no_check_tx) {   /* UDP csum disabled */
+	else if (up->no_check6_tx) {   /* UDP csum disabled */
 		skb->ip_summed = CHECKSUM_NONE;
 		goto send;
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
-- 
cgit v1.2.3-71-gd317


From 6b649feafe10b293f4bd5a74aca95faf625ae525 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 23 May 2014 08:47:40 -0700
Subject: l2tp: Add support for zero IPv6 checksums

Added new L2TP configuration options to allow TX and RX of
zero checksums in IPv6. Default is not to use them.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/l2tp.h | 2 ++
 net/l2tp/l2tp_core.c      | 9 ++++++++-
 net/l2tp/l2tp_core.h      | 4 +++-
 net/l2tp/l2tp_netlink.c   | 7 +++++++
 4 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 8adb68160327..21caa2631c20 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -124,6 +124,8 @@ enum {
 	L2TP_ATTR_STATS,		/* nested */
 	L2TP_ATTR_IP6_SADDR,		/* struct in6_addr */
 	L2TP_ATTR_IP6_DADDR,		/* struct in6_addr */
+	L2TP_ATTR_UDP_ZERO_CSUM6_TX,	/* u8 */
+	L2TP_ATTR_UDP_ZERO_CSUM6_RX,	/* u8 */
 	__L2TP_ATTR_MAX,
 };
 
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index a1186105f537..379558014b60 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1102,7 +1102,9 @@ static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct udphdr *uh = udp_hdr(skb);
 
-	if (!skb_dst(skb) || !skb_dst(skb)->dev ||
+	if (udp_get_no_check6_tx(sk))
+		skb->ip_summed = CHECKSUM_NONE;
+	else if (!skb_dst(skb) || !skb_dst(skb)->dev ||
 	    !(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
 		__wsum csum = skb_checksum(skb, 0, udp_len, 0);
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1435,6 +1437,11 @@ static int l2tp_tunnel_sock_create(struct net *net,
 					     sizeof(udp6_addr), 0);
 			if (err < 0)
 				goto out;
+
+			if (cfg->udp6_zero_tx_checksums)
+				udp_set_no_check6_tx(sock->sk, true);
+			if (cfg->udp6_zero_rx_checksums)
+				udp_set_no_check6_rx(sock->sk, true);
 		} else
 #endif
 		{
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 3f93ccd6ba97..68aa9ffd4ae4 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -162,7 +162,9 @@ struct l2tp_tunnel_cfg {
 #endif
 	u16			local_udp_port;
 	u16			peer_udp_port;
-	unsigned int		use_udp_checksums:1;
+	unsigned int		use_udp_checksums:1,
+				udp6_zero_tx_checksums:1,
+				udp6_zero_rx_checksums:1;
 };
 
 struct l2tp_tunnel {
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index f3d331bdd706..0ac907adb2f4 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -161,6 +161,13 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
 			cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
 		if (info->attrs[L2TP_ATTR_UDP_CSUM])
 			cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]);
+
+#if IS_ENABLED(CONFIG_IPV6)
+		if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX])
+			cfg.udp6_zero_tx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]);
+		if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX])
+			cfg.udp6_zero_rx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]);
+#endif
 	}
 
 	if (info->attrs[L2TP_ATTR_DEBUG])
-- 
cgit v1.2.3-71-gd317


From 8556ce79d5986a87fee4c29300b4efee07c0f15e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Fri, 23 May 2014 18:43:57 +0200
Subject: net: filter: remove DL macro

Lets get rid of this macro. After commit 5bcfedf06f7f ("net: filter:
simplify label names from jump-table"), labels have become more
readable due to omission of BPF_ prefix but at the same time more
generic, so that things like `git grep -n` would not find them. As
a middle path, lets get rid of the DL macro as it's not strictly
needed and would otherwise just hide the full name.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |   3 -
 net/core/filter.c      | 193 +++++++++++++++++++++++++------------------------
 2 files changed, 99 insertions(+), 97 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7977b3958e25..2b0056afd1f7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -37,9 +37,6 @@
 #define BPF_CALL	0x80	/* function call */
 #define BPF_EXIT	0x90	/* function return */
 
-/* Placeholder/dummy for 0 */
-#define BPF_0		0
-
 /* Register numbers */
 enum {
 	BPF_REG_0 = 0,
diff --git a/net/core/filter.c b/net/core/filter.c
index 7067cb240d3e..b3b0889fe089 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -160,95 +160,100 @@ static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *ins
 	static const void *jumptable[256] = {
 		[0 ... 255] = &&default_label,
 		/* Now overwrite non-defaults ... */
-#define DL(A, B, C)	[BPF_##A|BPF_##B|BPF_##C] = &&A##_##B##_##C
-		DL(ALU, ADD, X),
-		DL(ALU, ADD, K),
-		DL(ALU, SUB, X),
-		DL(ALU, SUB, K),
-		DL(ALU, AND, X),
-		DL(ALU, AND, K),
-		DL(ALU, OR, X),
-		DL(ALU, OR, K),
-		DL(ALU, LSH, X),
-		DL(ALU, LSH, K),
-		DL(ALU, RSH, X),
-		DL(ALU, RSH, K),
-		DL(ALU, XOR, X),
-		DL(ALU, XOR, K),
-		DL(ALU, MUL, X),
-		DL(ALU, MUL, K),
-		DL(ALU, MOV, X),
-		DL(ALU, MOV, K),
-		DL(ALU, DIV, X),
-		DL(ALU, DIV, K),
-		DL(ALU, MOD, X),
-		DL(ALU, MOD, K),
-		DL(ALU, NEG, 0),
-		DL(ALU, END, TO_BE),
-		DL(ALU, END, TO_LE),
-		DL(ALU64, ADD, X),
-		DL(ALU64, ADD, K),
-		DL(ALU64, SUB, X),
-		DL(ALU64, SUB, K),
-		DL(ALU64, AND, X),
-		DL(ALU64, AND, K),
-		DL(ALU64, OR, X),
-		DL(ALU64, OR, K),
-		DL(ALU64, LSH, X),
-		DL(ALU64, LSH, K),
-		DL(ALU64, RSH, X),
-		DL(ALU64, RSH, K),
-		DL(ALU64, XOR, X),
-		DL(ALU64, XOR, K),
-		DL(ALU64, MUL, X),
-		DL(ALU64, MUL, K),
-		DL(ALU64, MOV, X),
-		DL(ALU64, MOV, K),
-		DL(ALU64, ARSH, X),
-		DL(ALU64, ARSH, K),
-		DL(ALU64, DIV, X),
-		DL(ALU64, DIV, K),
-		DL(ALU64, MOD, X),
-		DL(ALU64, MOD, K),
-		DL(ALU64, NEG, 0),
-		DL(JMP, CALL, 0),
-		DL(JMP, JA, 0),
-		DL(JMP, JEQ, X),
-		DL(JMP, JEQ, K),
-		DL(JMP, JNE, X),
-		DL(JMP, JNE, K),
-		DL(JMP, JGT, X),
-		DL(JMP, JGT, K),
-		DL(JMP, JGE, X),
-		DL(JMP, JGE, K),
-		DL(JMP, JSGT, X),
-		DL(JMP, JSGT, K),
-		DL(JMP, JSGE, X),
-		DL(JMP, JSGE, K),
-		DL(JMP, JSET, X),
-		DL(JMP, JSET, K),
-		DL(JMP, EXIT, 0),
-		DL(STX, MEM, B),
-		DL(STX, MEM, H),
-		DL(STX, MEM, W),
-		DL(STX, MEM, DW),
-		DL(STX, XADD, W),
-		DL(STX, XADD, DW),
-		DL(ST, MEM, B),
-		DL(ST, MEM, H),
-		DL(ST, MEM, W),
-		DL(ST, MEM, DW),
-		DL(LDX, MEM, B),
-		DL(LDX, MEM, H),
-		DL(LDX, MEM, W),
-		DL(LDX, MEM, DW),
-		DL(LD, ABS, W),
-		DL(LD, ABS, H),
-		DL(LD, ABS, B),
-		DL(LD, IND, W),
-		DL(LD, IND, H),
-		DL(LD, IND, B),
-#undef DL
+		/* 32 bit ALU operations */
+		[BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
+		[BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
+		[BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
+		[BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
+		[BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
+		[BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
+		[BPF_ALU | BPF_OR | BPF_X]  = &&ALU_OR_X,
+		[BPF_ALU | BPF_OR | BPF_K]  = &&ALU_OR_K,
+		[BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
+		[BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
+		[BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
+		[BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
+		[BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
+		[BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
+		[BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
+		[BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
+		[BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
+		[BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
+		[BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
+		[BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
+		[BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
+		[BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
+		[BPF_ALU | BPF_NEG] = &&ALU_NEG,
+		[BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
+		[BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
+		/* 64 bit ALU operations */
+		[BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
+		[BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
+		[BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
+		[BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
+		[BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
+		[BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
+		[BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
+		[BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
+		[BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
+		[BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
+		[BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
+		[BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
+		[BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
+		[BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
+		[BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
+		[BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
+		[BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
+		[BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
+		[BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
+		[BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
+		[BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
+		[BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
+		[BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
+		[BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
+		[BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
+		/* Call instruction */
+		[BPF_JMP | BPF_CALL] = &&JMP_CALL,
+		/* Jumps */
+		[BPF_JMP | BPF_JA] = &&JMP_JA,
+		[BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
+		[BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
+		[BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
+		[BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
+		[BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
+		[BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
+		[BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
+		[BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
+		[BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
+		[BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
+		[BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
+		[BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
+		[BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
+		[BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
+		/* Program return */
+		[BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
+		/* Store instructions */
+		[BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
+		[BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
+		[BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
+		[BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
+		[BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
+		[BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
+		[BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
+		[BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
+		[BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
+		[BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
+		/* Load instructions */
+		[BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
+		[BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
+		[BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
+		[BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
+		[BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
+		[BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
+		[BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
+		[BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
+		[BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
+		[BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
 	};
 	void *ptr;
 	int off;
@@ -290,10 +295,10 @@ select_insn:
 	ALU(XOR,  ^)
 	ALU(MUL,  *)
 #undef ALU
-	ALU_NEG_0:
+	ALU_NEG:
 		A = (u32) -A;
 		CONT;
-	ALU64_NEG_0:
+	ALU64_NEG:
 		A = -A;
 		CONT;
 	ALU_MOV_X:
@@ -382,7 +387,7 @@ select_insn:
 		CONT;
 
 	/* CALL */
-	JMP_CALL_0:
+	JMP_CALL:
 		/* Function call scratches BPF_R1-BPF_R5 registers,
 		 * preserves BPF_R6-BPF_R9, and stores return value
 		 * into BPF_R0.
@@ -392,7 +397,7 @@ select_insn:
 		CONT;
 
 	/* JMP */
-	JMP_JA_0:
+	JMP_JA:
 		insn += insn->off;
 		CONT;
 	JMP_JEQ_X:
@@ -479,7 +484,7 @@ select_insn:
 			CONT_JMP;
 		}
 		CONT;
-	JMP_EXIT_0:
+	JMP_EXIT:
 		return BPF_R0;
 
 	/* STX and ST and LDX*/
-- 
cgit v1.2.3-71-gd317


From b1fcd35cf53553a0a3ef949b05106d921446abc3 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Fri, 23 May 2014 18:43:58 +0200
Subject: net: filter: let unattached filters use sock_fprog_kern

The sk_unattached_filter_create() API is used by BPF filters that
are not directly attached or related to sockets, and are used in
team, ptp, xt_bpf, cls_bpf, etc. As such all users do their own
internal managment of obtaining filter blocks and thus already
have them in kernel memory and set up before calling into
sk_unattached_filter_create(). As a result, due to __user annotation
in sock_fprog, sparse triggers false positives (incorrect type in
assignment [different address space]) when filters are set up before
passing them to sk_unattached_filter_create(). Therefore, let
sk_unattached_filter_create() API use sock_fprog_kern to overcome
this issue.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/i4l/isdn_ppp.c              |  4 ++--
 drivers/net/ppp/ppp_generic.c            |  4 ++--
 drivers/net/team/team_mode_loadbalance.c | 10 +++++-----
 include/linux/filter.h                   |  2 +-
 lib/test_bpf.c                           |  2 +-
 net/core/filter.c                        |  2 +-
 net/core/ptp_classifier.c                |  2 +-
 net/netfilter/xt_bpf.c                   |  5 +++--
 net/sched/cls_bpf.c                      |  4 ++--
 9 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index a5da511e3c9a..61ac63237446 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -634,7 +634,7 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg)
 #ifdef CONFIG_IPPP_FILTER
 	case PPPIOCSPASS:
 	{
-		struct sock_fprog fprog;
+		struct sock_fprog_kern fprog;
 		struct sock_filter *code;
 		int err, len = get_filter(argp, &code);
 
@@ -653,7 +653,7 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg)
 	}
 	case PPPIOCSACTIVE:
 	{
-		struct sock_fprog fprog;
+		struct sock_fprog_kern fprog;
 		struct sock_filter *code;
 		int err, len = get_filter(argp, &code);
 
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index e3923ebb693f..91d6c1272fcf 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -757,7 +757,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 		err = get_filter(argp, &code);
 		if (err >= 0) {
-			struct sock_fprog fprog = {
+			struct sock_fprog_kern fprog = {
 				.len = err,
 				.filter = code,
 			};
@@ -778,7 +778,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 		err = get_filter(argp, &code);
 		if (err >= 0) {
-			struct sock_fprog fprog = {
+			struct sock_fprog_kern fprog = {
 				.len = err,
 				.filter = code,
 			};
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index dbde3412ee5e..0a6ee07bf0af 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -49,7 +49,7 @@ struct lb_port_mapping {
 struct lb_priv_ex {
 	struct team *team;
 	struct lb_port_mapping tx_hash_to_port_mapping[LB_TX_HASHTABLE_SIZE];
-	struct sock_fprog *orig_fprog;
+	struct sock_fprog_kern *orig_fprog;
 	struct {
 		unsigned int refresh_interval; /* in tenths of second */
 		struct delayed_work refresh_dw;
@@ -241,10 +241,10 @@ static int lb_bpf_func_get(struct team *team, struct team_gsetter_ctx *ctx)
 	return 0;
 }
 
-static int __fprog_create(struct sock_fprog **pfprog, u32 data_len,
+static int __fprog_create(struct sock_fprog_kern **pfprog, u32 data_len,
 			  const void *data)
 {
-	struct sock_fprog *fprog;
+	struct sock_fprog_kern *fprog;
 	struct sock_filter *filter = (struct sock_filter *) data;
 
 	if (data_len % sizeof(struct sock_filter))
@@ -262,7 +262,7 @@ static int __fprog_create(struct sock_fprog **pfprog, u32 data_len,
 	return 0;
 }
 
-static void __fprog_destroy(struct sock_fprog *fprog)
+static void __fprog_destroy(struct sock_fprog_kern *fprog)
 {
 	kfree(fprog->filter);
 	kfree(fprog);
@@ -273,7 +273,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
 	struct lb_priv *lb_priv = get_lb_priv(team);
 	struct sk_filter *fp = NULL;
 	struct sk_filter *orig_fp;
-	struct sock_fprog *fprog = NULL;
+	struct sock_fprog_kern *fprog = NULL;
 	int err;
 
 	if (ctx->data.bin_val.len) {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 2b0056afd1f7..625f4de9bdf2 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -188,7 +188,7 @@ int sk_convert_filter(struct sock_filter *prog, int len,
 		      struct sock_filter_int *new_prog, int *new_len);
 
 int sk_unattached_filter_create(struct sk_filter **pfp,
-				struct sock_fprog *fprog);
+				struct sock_fprog_kern *fprog);
 void sk_unattached_filter_destroy(struct sk_filter *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 3d80adbdb559..e03991ea8cc2 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1472,7 +1472,7 @@ static int run_one(struct sk_filter *fp, struct bpf_test *t)
 static __init int test_bpf(void)
 {
 	struct sk_filter *fp, *fp_ext = NULL;
-	struct sock_fprog fprog;
+	struct sock_fprog_kern fprog;
 	int err, i, err_cnt = 0;
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
diff --git a/net/core/filter.c b/net/core/filter.c
index b3b0889fe089..2c2d35d9d101 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1585,7 +1585,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
  * a negative errno code is returned. On success the return is zero.
  */
 int sk_unattached_filter_create(struct sk_filter **pfp,
-				struct sock_fprog *fprog)
+				struct sock_fprog_kern *fprog)
 {
 	unsigned int fsize = sk_filter_proglen(fprog);
 	struct sk_filter *fp;
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 37d86157b76e..d3027a73fd4b 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -133,7 +133,7 @@ void __init ptp_classifier_init(void)
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x06,  0,  0, 0x00000000 },
 	};
-	struct sock_fprog ptp_prog = {
+	struct sock_fprog_kern ptp_prog = {
 		.len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
 	};
 
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 12d4da8e6c77..bbffdbdaf603 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -23,10 +23,11 @@ MODULE_ALIAS("ip6t_bpf");
 static int bpf_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_bpf_info *info = par->matchinfo;
-	struct sock_fprog program;
+	struct sock_fprog_kern program;
 
 	program.len = info->bpf_program_num_elem;
-	program.filter = (struct sock_filter __user *) info->bpf_program;
+	program.filter = info->bpf_program;
+
 	if (sk_unattached_filter_create(&info->filter, &program)) {
 		pr_info("bpf: check failed: parse error\n");
 		return -EINVAL;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 16186965af97..13f64df2c710 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -160,7 +160,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 {
 	struct sock_filter *bpf_ops, *bpf_old;
 	struct tcf_exts exts;
-	struct sock_fprog tmp;
+	struct sock_fprog_kern tmp;
 	struct sk_filter *fp, *fp_old;
 	u16 bpf_size, bpf_len;
 	u32 classid;
@@ -191,7 +191,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
 
 	tmp.len = bpf_len;
-	tmp.filter = (struct sock_filter __user *) bpf_ops;
+	tmp.filter = bpf_ops;
 
 	ret = sk_unattached_filter_create(&fp, &tmp);
 	if (ret)
-- 
cgit v1.2.3-71-gd317


From 34ea4d4417bb726245fdaeb2f8951eaa0c18fc4c Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Sun, 9 Mar 2014 21:42:52 -0300
Subject: [media] v4l: vb2: Add a function to discard all DONE buffers

When suspending a device while a video stream is active all buffers
marked as done but not dequeued yet will be kept across suspend and
given back to userspace after resume. This will result in outdated
buffers being dequeued.

Introduce a new vb2 function to mark all done buffers as erroneous
instead, to be used by drivers at resume time.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Acked-by: Sakari Ailus <sakari.ailus@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/v4l2-core/videobuf2-core.c | 24 ++++++++++++++++++++++++
 include/media/videobuf2-core.h           |  1 +
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 349e659d75fb..7c4489c42365 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -1199,6 +1199,30 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
 }
 EXPORT_SYMBOL_GPL(vb2_buffer_done);
 
+/**
+ * vb2_discard_done() - discard all buffers marked as DONE
+ * @q:		videobuf2 queue
+ *
+ * This function is intended to be used with suspend/resume operations. It
+ * discards all 'done' buffers as they would be too old to be requested after
+ * resume.
+ *
+ * Drivers must stop the hardware and synchronize with interrupt handlers and/or
+ * delayed works before calling this function to make sure no buffer will be
+ * touched by the driver and/or hardware.
+ */
+void vb2_discard_done(struct vb2_queue *q)
+{
+	struct vb2_buffer *vb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->done_lock, flags);
+	list_for_each_entry(vb, &q->done_list, done_entry)
+		vb->state = VB2_BUF_STATE_ERROR;
+	spin_unlock_irqrestore(&q->done_lock, flags);
+}
+EXPORT_SYMBOL_GPL(vb2_discard_done);
+
 /**
  * __fill_vb2_buffer() - fill a vb2_buffer with information provided in a
  * v4l2_buffer by the userspace. The caller has already verified that struct
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index bca25dc53f9d..8fab6fa0dbfb 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -432,6 +432,7 @@ void *vb2_plane_vaddr(struct vb2_buffer *vb, unsigned int plane_no);
 void *vb2_plane_cookie(struct vb2_buffer *vb, unsigned int plane_no);
 
 void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state);
+void vb2_discard_done(struct vb2_queue *q);
 int vb2_wait_for_all_buffers(struct vb2_queue *q);
 
 int vb2_querybuf(struct vb2_queue *q, struct v4l2_buffer *b);
-- 
cgit v1.2.3-71-gd317


From 59201052dff17d0cc8bc60bc9e89ad4924906ee8 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Fri, 23 May 2014 12:58:06 +0900
Subject: drm/ttm: fix kerneldoc of ttm_bo_create

The kerneldoc header of ttm_bo_create() was referring to another
(nonexisting) function and had a few obsolete or incorrect arguments.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/ttm/ttm_bo_api.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index ee127ec33c60..7526c5bf5610 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -485,13 +485,12 @@ extern int ttm_bo_init(struct ttm_bo_device *bdev,
 			void (*destroy) (struct ttm_buffer_object *));
 
 /**
- * ttm_bo_synccpu_object_init
+ * ttm_bo_create
  *
  * @bdev: Pointer to a ttm_bo_device struct.
- * @bo: Pointer to a ttm_buffer_object to be initialized.
  * @size: Requested size of buffer object.
  * @type: Requested type of buffer object.
- * @flags: Initial placement flags.
+ * @placement: Initial placement.
  * @page_alignment: Data alignment in pages.
  * @interruptible: If needing to sleep while waiting for GPU resources,
  * sleep interruptible.
-- 
cgit v1.2.3-71-gd317


From 1a5f0c13d1a8808c2bdd00630818ed491e1719f5 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Fri, 23 May 2014 14:33:12 +0300
Subject: mac80211: add a single-transaction driver op to switch contexts

In some cases, when the driver is already using all the channel
contexts it can handle at once, we have to do an in-place switch
(ie. we cannot afford using an extra context temporarily for the
transaction).  But some drivers may not support switching the channel
context assigned to a vif on the fly (ie. without unassigning and
assigning it) while others may only work if the context is changed on
the fly, without unassigning it first.

To allow these different scenarios, add a new driver operation that
let's the driver decide how to handle an in-place switch.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h    | 46 +++++++++++++++++++++++++
 net/mac80211/driver-ops.h | 53 +++++++++++++++++++++++++++++
 net/mac80211/trace.h      | 85 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 184 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2c78997bc48d..421b6ecb4b2c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -188,6 +188,43 @@ struct ieee80211_chanctx_conf {
 	u8 drv_priv[0] __aligned(sizeof(void *));
 };
 
+/**
+ * enum ieee80211_chanctx_switch_mode - channel context switch mode
+ * @CHANCTX_SWMODE_REASSIGN_VIF: Both old and new contexts already
+ *	exist (and will continue to exist), but the virtual interface
+ *	needs to be switched from one to the other.
+ * @CHANCTX_SWMODE_SWAP_CONTEXTS: The old context exists but will stop
+ *      to exist with this call, the new context doesn't exist but
+ *      will be active after this call, the virtual interface switches
+ *      from the old to the new (note that the driver may of course
+ *      implement this as an on-the-fly chandef switch of the existing
+ *      hardware context, but the mac80211 pointer for the old context
+ *      will cease to exist and only the new one will later be used
+ *      for changes/removal.)
+ */
+enum ieee80211_chanctx_switch_mode {
+	CHANCTX_SWMODE_REASSIGN_VIF,
+	CHANCTX_SWMODE_SWAP_CONTEXTS,
+};
+
+/**
+ * struct ieee80211_vif_chanctx_switch - vif chanctx switch information
+ *
+ * This is structure is used to pass information about a vif that
+ * needs to switch from one chanctx to another.  The
+ * &ieee80211_chanctx_switch_mode defines how the switch should be
+ * done.
+ *
+ * @vif: the vif that should be switched from old_ctx to new_ctx
+ * @old_ctx: the old context to which the vif was assigned
+ * @new_ctx: the new context to which the vif must be assigned
+ */
+struct ieee80211_vif_chanctx_switch {
+	struct ieee80211_vif *vif;
+	struct ieee80211_chanctx_conf *old_ctx;
+	struct ieee80211_chanctx_conf *new_ctx;
+};
+
 /**
  * enum ieee80211_bss_change - BSS change notification flags
  *
@@ -2736,6 +2773,11 @@ enum ieee80211_roc_type {
  *	to vif. Possible use is for hw queue remapping.
  * @unassign_vif_chanctx: Notifies device driver about channel context being
  *	unbound from vif.
+ * @switch_vif_chanctx: switch a number of vifs from one chanctx to
+ *	another, as specified in the list of
+ *	@ieee80211_vif_chanctx_switch passed to the driver, according
+ *	to the mode defined in &ieee80211_chanctx_switch_mode.
+ *
  * @start_ap: Start operation on the AP interface, this is called after all the
  *	information in bss_conf is set and beacon can be retrieved. A channel
  *	context is bound before this is called. Note that if the driver uses
@@ -2952,6 +2994,10 @@ struct ieee80211_ops {
 	void (*unassign_vif_chanctx)(struct ieee80211_hw *hw,
 				     struct ieee80211_vif *vif,
 				     struct ieee80211_chanctx_conf *ctx);
+	int (*switch_vif_chanctx)(struct ieee80211_hw *hw,
+				  struct ieee80211_vif_chanctx_switch *vifs,
+				  int n_vifs,
+				  enum ieee80211_chanctx_switch_mode mode);
 
 	void (*restart_complete)(struct ieee80211_hw *hw);
 
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 696ef78b1fb7..bd782dcffcc7 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1048,6 +1048,59 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
+static inline int
+drv_switch_vif_chanctx(struct ieee80211_local *local,
+		       struct ieee80211_vif_chanctx_switch *vifs,
+		       int n_vifs,
+		       enum ieee80211_chanctx_switch_mode mode)
+{
+	int ret = 0;
+	int i;
+
+	if (!local->ops->switch_vif_chanctx)
+		return -EOPNOTSUPP;
+
+	for (i = 0; i < n_vifs; i++) {
+		struct ieee80211_chanctx *new_ctx =
+			container_of(vifs[i].new_ctx,
+				     struct ieee80211_chanctx,
+				     conf);
+		struct ieee80211_chanctx *old_ctx =
+			container_of(vifs[i].old_ctx,
+				     struct ieee80211_chanctx,
+				     conf);
+
+		WARN_ON_ONCE(!old_ctx->driver_present);
+		WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS &&
+			      new_ctx->driver_present) ||
+			     (mode == CHANCTX_SWMODE_REASSIGN_VIF &&
+			      !new_ctx->driver_present));
+	}
+
+	trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode);
+	ret = local->ops->switch_vif_chanctx(&local->hw,
+					     vifs, n_vifs, mode);
+	trace_drv_return_int(local, ret);
+
+	if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) {
+		for (i = 0; i < n_vifs; i++) {
+			struct ieee80211_chanctx *new_ctx =
+				container_of(vifs[i].new_ctx,
+					     struct ieee80211_chanctx,
+					     conf);
+			struct ieee80211_chanctx *old_ctx =
+				container_of(vifs[i].old_ctx,
+					     struct ieee80211_chanctx,
+					     conf);
+
+			new_ctx->driver_present = true;
+			old_ctx->driver_present = false;
+		}
+	}
+
+	return ret;
+}
+
 static inline int drv_start_ap(struct ieee80211_local *local,
 			       struct ieee80211_sub_if_data *sdata)
 {
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 942f64b8ce0e..b22d6969cde9 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1389,6 +1389,91 @@ TRACE_EVENT(drv_change_chanctx,
 	)
 );
 
+#if !defined(__TRACE_VIF_ENTRY)
+#define __TRACE_VIF_ENTRY
+struct trace_vif_entry {
+	enum nl80211_iftype vif_type;
+	bool p2p;
+	char vif_name[IFNAMSIZ];
+} __packed;
+
+struct trace_chandef_entry {
+	u32 control_freq;
+	u32 chan_width;
+	u32 center_freq1;
+	u32 center_freq2;
+} __packed;
+
+struct trace_switch_entry {
+	struct trace_vif_entry vif;
+	struct trace_chandef_entry old_chandef;
+	struct trace_chandef_entry new_chandef;
+} __packed;
+
+#define SWITCH_ENTRY_ASSIGN(to, from) local_vifs[i].to = vifs[i].from
+#endif
+
+TRACE_EVENT(drv_switch_vif_chanctx,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_vif_chanctx_switch *vifs,
+		 int n_vifs, enum ieee80211_chanctx_switch_mode mode),
+	    TP_ARGS(local, vifs, n_vifs, mode),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(int, n_vifs)
+		__field(u32, mode)
+		__dynamic_array(u8, vifs,
+				sizeof(struct trace_switch_entry) * n_vifs)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->n_vifs = n_vifs;
+		__entry->mode = mode;
+		{
+			struct trace_switch_entry *local_vifs =
+				__get_dynamic_array(vifs);
+			int i;
+
+			for (i = 0; i < n_vifs; i++) {
+				struct ieee80211_sub_if_data *sdata;
+
+				sdata = container_of(vifs[i].vif,
+						struct ieee80211_sub_if_data,
+						vif);
+
+				SWITCH_ENTRY_ASSIGN(vif.vif_type, vif->type);
+				SWITCH_ENTRY_ASSIGN(vif.p2p, vif->p2p);
+				strncpy(local_vifs[i].vif.vif_name,
+					sdata->name,
+					sizeof(local_vifs[i].vif.vif_name));
+				SWITCH_ENTRY_ASSIGN(old_chandef.control_freq,
+						old_ctx->def.chan->center_freq);
+				SWITCH_ENTRY_ASSIGN(old_chandef.chan_width,
+						    old_ctx->def.width);
+				SWITCH_ENTRY_ASSIGN(old_chandef.center_freq1,
+						    old_ctx->def.center_freq1);
+				SWITCH_ENTRY_ASSIGN(old_chandef.center_freq2,
+						    old_ctx->def.center_freq2);
+				SWITCH_ENTRY_ASSIGN(new_chandef.control_freq,
+						new_ctx->def.chan->center_freq);
+				SWITCH_ENTRY_ASSIGN(new_chandef.chan_width,
+						    new_ctx->def.width);
+				SWITCH_ENTRY_ASSIGN(new_chandef.center_freq1,
+						    new_ctx->def.center_freq1);
+				SWITCH_ENTRY_ASSIGN(new_chandef.center_freq2,
+						    new_ctx->def.center_freq2);
+			}
+		}
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " n_vifs:%d mode:%d",
+		LOCAL_PR_ARG, __entry->n_vifs, __entry->mode
+	)
+);
+
 DECLARE_EVENT_CLASS(local_sdata_chanctx,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.3-71-gd317


From 61dc1abae64854c7cef543598b9e6f04886c4ebd Mon Sep 17 00:00:00 2001
From: Nick Dyer <nick.dyer@itdev.co.uk>
Date: Sun, 18 May 2014 23:16:49 -0700
Subject: Input: atmel_mxt_ts - read screen config from chip

By reading the touchscreen configuration from the settings that the
maXTouch chip is actually using, we can remove some platform data.

The matrix size is not used for anything, and results in some rather
confusing code to re-read it because it may change when configuration
is downloaded, so don't print it out.

Signed-off-by: Nick Dyer <nick.dyer@itdev.co.uk>
Acked-by: Benson Leung <bleung@chromium.org>
Acked-by: Yufeng Shen <miletus@chromium.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 arch/arm/mach-s5pv210/mach-goni.c         |   3 -
 drivers/input/touchscreen/atmel_mxt_ts.c  | 136 ++++++++++++++----------------
 drivers/platform/chrome/chromeos_laptop.c |   6 --
 include/linux/i2c/atmel_mxt_ts.h          |  15 ----
 4 files changed, 65 insertions(+), 95 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c
index e549ecf0e5dc..bb9354f45e27 100644
--- a/arch/arm/mach-s5pv210/mach-goni.c
+++ b/arch/arm/mach-s5pv210/mach-goni.c
@@ -239,9 +239,6 @@ static void __init goni_radio_init(void)
 
 /* TSP */
 static struct mxt_platform_data qt602240_platform_data = {
-	.x_size		= 800,
-	.y_size		= 480,
-	.orient		= MXT_DIAGONAL,
 	.irqflags	= IRQF_TRIGGER_FALLING,
 };
 
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 7a9197a19f67..75493ca8e784 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -100,33 +100,16 @@
 
 /* MXT_TOUCH_MULTI_T9 field */
 #define MXT_TOUCH_CTRL		0
-#define MXT_TOUCH_XORIGIN	1
-#define MXT_TOUCH_YORIGIN	2
-#define MXT_TOUCH_XSIZE		3
-#define MXT_TOUCH_YSIZE		4
-#define MXT_TOUCH_BLEN		6
-#define MXT_TOUCH_TCHTHR	7
-#define MXT_TOUCH_TCHDI		8
-#define MXT_TOUCH_ORIENT	9
-#define MXT_TOUCH_MOVHYSTI	11
-#define MXT_TOUCH_MOVHYSTN	12
-#define MXT_TOUCH_NUMTOUCH	14
-#define MXT_TOUCH_MRGHYST	15
-#define MXT_TOUCH_MRGTHR	16
-#define MXT_TOUCH_AMPHYST	17
-#define MXT_TOUCH_XRANGE_LSB	18
-#define MXT_TOUCH_XRANGE_MSB	19
-#define MXT_TOUCH_YRANGE_LSB	20
-#define MXT_TOUCH_YRANGE_MSB	21
-#define MXT_TOUCH_XLOCLIP	22
-#define MXT_TOUCH_XHICLIP	23
-#define MXT_TOUCH_YLOCLIP	24
-#define MXT_TOUCH_YHICLIP	25
-#define MXT_TOUCH_XEDGECTRL	26
-#define MXT_TOUCH_XEDGEDIST	27
-#define MXT_TOUCH_YEDGECTRL	28
-#define MXT_TOUCH_YEDGEDIST	29
-#define MXT_TOUCH_JUMPLIMIT	30
+#define MXT_T9_ORIENT		9
+#define MXT_T9_RANGE		18
+
+struct t9_range {
+	u16 x;
+	u16 y;
+} __packed;
+
+/* Touch orient bits */
+#define MXT_XY_SWITCH		(1 << 0)
 
 /* MXT_PROCI_GRIPFACE_T20 field */
 #define MXT_GRIPFACE_CTRL	0
@@ -211,11 +194,6 @@
 #define MXT_PRESS		(1 << 6)
 #define MXT_DETECT		(1 << 7)
 
-/* Touch orient bits */
-#define MXT_XY_SWITCH		(1 << 0)
-#define MXT_X_INVERT		(1 << 1)
-#define MXT_Y_INVERT		(1 << 2)
-
 /* Touchscreen absolute values */
 #define MXT_MAX_AREA		0xff
 
@@ -580,11 +558,6 @@ static int __mxt_read_reg(struct i2c_client *client,
 	return ret;
 }
 
-static int mxt_read_reg(struct i2c_client *client, u16 reg, u8 *val)
-{
-	return __mxt_read_reg(client, reg, 1, val);
-}
-
 static int __mxt_write_reg(struct i2c_client *client, u16 reg, u16 len,
 			   const void *val)
 {
@@ -1029,12 +1002,59 @@ static void mxt_free_object_table(struct mxt_data *data)
 	data->T19_reportid = 0;
 }
 
+static int mxt_read_t9_resolution(struct mxt_data *data)
+{
+	struct i2c_client *client = data->client;
+	int error;
+	struct t9_range range;
+	unsigned char orient;
+	struct mxt_object *object;
+
+	object = mxt_get_object(data, MXT_TOUCH_MULTI_T9);
+	if (!object)
+		return -EINVAL;
+
+	error = __mxt_read_reg(client,
+			       object->start_address + MXT_T9_RANGE,
+			       sizeof(range), &range);
+	if (error)
+		return error;
+
+	le16_to_cpus(&range.x);
+	le16_to_cpus(&range.y);
+
+	error =  __mxt_read_reg(client,
+				object->start_address + MXT_T9_ORIENT,
+				1, &orient);
+	if (error)
+		return error;
+
+	/* Handle default values */
+	if (range.x == 0)
+		range.x = 1023;
+
+	if (range.y == 0)
+		range.y = 1023;
+
+	if (orient & MXT_XY_SWITCH) {
+		data->max_x = range.y;
+		data->max_y = range.x;
+	} else {
+		data->max_x = range.x;
+		data->max_y = range.y;
+	}
+
+	dev_dbg(&client->dev,
+		"Touchscreen size X%uY%u\n", data->max_x, data->max_y);
+
+	return 0;
+}
+
 static int mxt_initialize(struct mxt_data *data)
 {
 	struct i2c_client *client = data->client;
 	struct mxt_info *info = &data->info;
 	int error;
-	u8 val;
 
 	error = mxt_get_info(data);
 	if (error)
@@ -1063,26 +1083,16 @@ static int mxt_initialize(struct mxt_data *data)
 		goto err_free_object_table;
 	}
 
-	/* Update matrix size at info struct */
-	error = mxt_read_reg(client, MXT_MATRIX_X_SIZE, &val);
-	if (error)
-		goto err_free_object_table;
-	info->matrix_xsize = val;
-
-	error = mxt_read_reg(client, MXT_MATRIX_Y_SIZE, &val);
-	if (error)
+	error = mxt_read_t9_resolution(data);
+	if (error) {
+		dev_err(&client->dev, "Failed to initialize T9 resolution\n");
 		goto err_free_object_table;
-	info->matrix_ysize = val;
-
-	dev_info(&client->dev,
-			"Family: %u Variant: %u Firmware V%u.%u.%02X\n",
-			info->family_id, info->variant_id, info->version >> 4,
-			info->version & 0xf, info->build);
+	}
 
 	dev_info(&client->dev,
-			"Matrix X Size: %u Matrix Y Size: %u Objects: %u\n",
-			info->matrix_xsize, info->matrix_ysize,
-			info->object_num);
+		 "Family: %u Variant: %u Firmware V%u.%u.%02X Objects: %u\n",
+		 info->family_id, info->variant_id, info->version >> 4,
+		 info->version & 0xf, info->build, info->object_num);
 
 	return 0;
 
@@ -1091,20 +1101,6 @@ err_free_object_table:
 	return error;
 }
 
-static void mxt_calc_resolution(struct mxt_data *data)
-{
-	unsigned int max_x = data->pdata->x_size - 1;
-	unsigned int max_y = data->pdata->y_size - 1;
-
-	if (data->pdata->orient & MXT_XY_SWITCH) {
-		data->max_x = max_y;
-		data->max_y = max_x;
-	} else {
-		data->max_x = max_x;
-		data->max_y = max_y;
-	}
-}
-
 /* Firmware Version is returned as Major.Minor.Build */
 static ssize_t mxt_fw_version_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
@@ -1430,8 +1426,6 @@ static int mxt_probe(struct i2c_client *client,
 	init_completion(&data->reset_completion);
 	init_completion(&data->crc_completion);
 
-	mxt_calc_resolution(data);
-
 	error = mxt_initialize(data);
 	if (error)
 		goto err_free_mem;
diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
index 8b7523ab62e5..7f1a2e2711bd 100644
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c
@@ -94,9 +94,6 @@ static int mxt_t19_keys[] = {
 };
 
 static struct mxt_platform_data atmel_224s_tp_platform_data = {
-	.x_size			= 102*20,
-	.y_size			= 68*20,
-	.orient			= MXT_VERTICAL_FLIP,
 	.irqflags		= IRQF_TRIGGER_FALLING,
 	.t19_num_keys		= ARRAY_SIZE(mxt_t19_keys),
 	.t19_keymap		= mxt_t19_keys,
@@ -111,9 +108,6 @@ static struct i2c_board_info atmel_224s_tp_device = {
 };
 
 static struct mxt_platform_data atmel_1664s_platform_data = {
-	.x_size			= 1700,
-	.y_size			= 2560,
-	.orient			= MXT_ROTATED_90_COUNTER,
 	.irqflags		= IRQF_TRIGGER_FALLING,
 	.config			= NULL,
 	.config_length		= 0,
diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h
index 9f92135b6620..3891dc1de21c 100644
--- a/include/linux/i2c/atmel_mxt_ts.h
+++ b/include/linux/i2c/atmel_mxt_ts.h
@@ -15,26 +15,11 @@
 
 #include <linux/types.h>
 
-/* Orient */
-#define MXT_NORMAL		0x0
-#define MXT_DIAGONAL		0x1
-#define MXT_HORIZONTAL_FLIP	0x2
-#define MXT_ROTATED_90_COUNTER	0x3
-#define MXT_VERTICAL_FLIP	0x4
-#define MXT_ROTATED_90		0x5
-#define MXT_ROTATED_180		0x6
-#define MXT_DIAGONAL_COUNTER	0x7
-
 /* The platform data for the Atmel maXTouch touchscreen driver */
 struct mxt_platform_data {
 	const u8 *config;
 	size_t config_length;
 	u32 config_crc;
-
-	unsigned int x_size;
-	unsigned int y_size;
-	unsigned char orient;
-
 	unsigned long irqflags;
 	u8 t19_num_keys;
 	const unsigned int *t19_keymap;
-- 
cgit v1.2.3-71-gd317


From b8380580836bf81d032da6099879da9f6b515325 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 23 Apr 2014 15:49:04 +0200
Subject: drm/dp: Add missing kernel-doc

Commit 9dc4056026e0 (drm/dp: let drivers specify the name of the I2C-
over-AUX adapter) introduced a new field but didn't add the proper
kernel-doc for it.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_dp_helper.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index c8857e6159a5..efcde2c38cc1 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -509,6 +509,7 @@ struct drm_dp_aux_msg {
 
 /**
  * struct drm_dp_aux - DisplayPort AUX channel
+ * @name: user-visible name of this AUX channel and the I2C-over-AUX adapter
  * @ddc: I2C adapter that can be used for I2C-over-AUX communication
  * @dev: pointer to struct device that is the parent for this AUX channel
  * @transfer: transfers a message representing a single AUX transaction
-- 
cgit v1.2.3-71-gd317


From b475598aec63f2efbc78f0ff1895d917d2370846 Mon Sep 17 00:00:00 2001
From: Haggai Eran <haggaie@mellanox.com>
Date: Thu, 22 May 2014 14:50:10 +0300
Subject: mlx5_core: Store MR attributes in mlx5_mr_core during creation and
 after UMR

The patch stores iova, pd and size during mr creation and after UMRs
that modify them.  It removes the unused access flags field.

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/mr.c              | 4 ++++
 drivers/net/ethernet/mellanox/mlx5/core/mr.c | 4 ++++
 include/linux/mlx5/driver.h                  | 1 -
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 9d932a2aa9f4..f472ab246d94 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -794,6 +794,10 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 		err = -EFAULT;
 	}
 
+	mr->mmr.iova = virt_addr;
+	mr->mmr.size = len;
+	mr->mmr.pd = to_mpd(pd)->pdn;
+
 unmap_dma:
 	up(&umrc->sem);
 	dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 4cc927649404..ac52a0fe2d3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -82,7 +82,11 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
 		return mlx5_cmd_status_to_err(&lout.hdr);
 	}
 
+	mr->iova = be64_to_cpu(in->seg.start_addr);
+	mr->size = be64_to_cpu(in->seg.len);
 	mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
+	mr->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff;
+
 	mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
 		      be32_to_cpu(lout.mkey), key, mr->key);
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 93cef6313e72..2bce4aad2570 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -427,7 +427,6 @@ struct mlx5_core_mr {
 	u64			size;
 	u32			key;
 	u32			pd;
-	u32			access;
 };
 
 struct mlx5_core_srq {
-- 
cgit v1.2.3-71-gd317


From 29fe5732bef2ba3c785fcb4aa5ba7160ad8faba5 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Thu, 6 Mar 2014 09:44:18 -0800
Subject: mtd: pfow: remove unused variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes warning:

In file included from drivers/mtd/lpddr/qinfo_probe.c:31:0:
include/linux/mtd/pfow.h: In function ‘send_pfow_command’:
include/linux/mtd/pfow.h:104:6: warning: variable ‘chipnum’ set but not used [-Wunused-but-set-variable]
  int chipnum;
      ^

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/pfow.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h
index b730d4f84655..42ff7ff09bf5 100644
--- a/include/linux/mtd/pfow.h
+++ b/include/linux/mtd/pfow.h
@@ -101,9 +101,6 @@ static inline void send_pfow_command(struct map_info *map,
 				unsigned long len, map_word *datum)
 {
 	int bits_per_chip = map_bankwidth(map) * 8;
-	int chipnum;
-	struct lpddr_private *lpddr = map->fldrv_priv;
-	chipnum = adr >> lpddr->chipshift;
 
 	map_write(map, CMD(cmd_code), map->pfow_base + PFOW_COMMAND_CODE);
 	map_write(map, CMD(adr & ((1<<bits_per_chip) - 1)),
-- 
cgit v1.2.3-71-gd317


From e7cd1d1eb16fcdf53001b926187a82f1f3e1a7e6 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 20 May 2014 11:17:54 -0700
Subject: mfd: twl4030-power: Add generic reset configuration

The twl4030 PMIC needs to be configured properly for things like
warm reset and deeper idle states so the PMIC manages the regulators
properly based on the hardware triggers from the SoC. Earlier
we have configured twl4030 using platform data, but we want to
do it for device tree based booting also.

In some cases configuring twl4030 is needed for things to work.
For example, when rebooting an OMAP3530 at 125 MHz, it hangs.
With this patch, TWL4030 will be reset when a warm reset occures,
and OMAP3530 does not hang on reboot.

Let's add device tree support and configure things for warm reset
as the default when compatible = "ti,twl4030-power". More
complicated configurations can be added to the driver based on
other compatible flags.

Note we now also make the pdata const like it should be.
This allows use it for match->data with the device tree
related functions.

Based on earlier patch by Matthias Brugger <matthias.bgg@gmail.com>
and Lesly A M <leslyam@ti.com>.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 .../devicetree/bindings/mfd/twl4030-power.txt      |   7 +-
 drivers/mfd/twl4030-power.c                        | 109 ++++++++++++++++++---
 include/linux/i2c/twl.h                            |   3 +
 3 files changed, 105 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/mfd/twl4030-power.txt b/Documentation/devicetree/bindings/mfd/twl4030-power.txt
index 8e15ec35ac99..b90611650cd9 100644
--- a/Documentation/devicetree/bindings/mfd/twl4030-power.txt
+++ b/Documentation/devicetree/bindings/mfd/twl4030-power.txt
@@ -5,7 +5,12 @@ to control the power resources, including power scripts. For now, the
 binding only supports the complete shutdown of the system after poweroff.
 
 Required properties:
-- compatible : must be "ti,twl4030-power"
+- compatible : must be one of the following
+	"ti,twl4030-power"
+	"ti,twl4030-power-reset"
+
+The use of ti,twl4030-power-reset is recommended at least on
+3530 that needs a special configuration for warm reset to work.
 
 Optional properties:
 - ti,use_poweroff: With this flag, the chip will initiates an ACTIVE-to-OFF or
diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 0b037dca46a8..cb5b0cb8f933 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -29,6 +29,7 @@
 #include <linux/i2c/twl.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 
 #include <asm/mach-types.h>
 
@@ -128,6 +129,40 @@ static u8 res_config_addrs[] = {
 	[RES_MAIN_REF]	= 0x94,
 };
 
+/*
+ * Usable values for .remap_sleep and .remap_off
+ * Based on table "5.3.3 Resource Operating modes"
+ */
+enum {
+	TWL_REMAP_OFF = 0,
+	TWL_REMAP_SLEEP = 8,
+	TWL_REMAP_ACTIVE = 9,
+};
+
+/*
+ * Macros to configure the PM register states for various resources.
+ * Note that we can make MSG_SINGULAR etc private to this driver once
+ * omap3 has been made DT only.
+ */
+#define TWL_DFLT_DELAY		2	/* typically 2 32 KiHz cycles */
+#define TWL_RESOURCE_SET(res, state)					\
+	{ MSG_SINGULAR(DEV_GRP_NULL, (res), (state)), TWL_DFLT_DELAY }
+#define TWL_RESOURCE_ON(res)	TWL_RESOURCE_SET(res, RES_STATE_ACTIVE)
+#define TWL_RESOURCE_OFF(res)	TWL_RESOURCE_SET(res, RES_STATE_OFF)
+#define TWL_RESOURCE_RESET(res)	TWL_RESOURCE_SET(res, RES_STATE_WRST)
+/*
+ * It seems that type1 and type2 is just the resource init order
+ * number for the type1 and type2 group.
+ */
+#define TWL_RESOURCE_GROUP_RESET(group, type1, type2)			\
+	{ MSG_BROADCAST(DEV_GRP_NULL, (group), (type1), (type2),	\
+		RES_STATE_WRST), TWL_DFLT_DELAY }
+#define TWL_REMAP_SLEEP(res, devgrp, typ, typ2)				\
+	{ .resource = (res), .devgroup = (devgrp),			\
+	  .type = (typ), .type2 = (typ2),				\
+	  .remap_off = TWL_REMAP_OFF,					\
+	  .remap_sleep = TWL_REMAP_SLEEP, }
+
 static int twl4030_write_script_byte(u8 address, u8 byte)
 {
 	int err;
@@ -502,7 +537,8 @@ int twl4030_remove_script(u8 flags)
 	return err;
 }
 
-static int twl4030_power_configure_scripts(struct twl4030_power_data *pdata)
+static int
+twl4030_power_configure_scripts(const struct twl4030_power_data *pdata)
 {
 	int err;
 	int i;
@@ -518,7 +554,8 @@ static int twl4030_power_configure_scripts(struct twl4030_power_data *pdata)
 	return 0;
 }
 
-static int twl4030_power_configure_resources(struct twl4030_power_data *pdata)
+static int
+twl4030_power_configure_resources(const struct twl4030_power_data *pdata)
 {
 	struct twl4030_resconfig *resconfig = pdata->resource_config;
 	int err;
@@ -550,7 +587,7 @@ void twl4030_power_off(void)
 		pr_err("TWL4030 Unable to power off\n");
 }
 
-static bool twl4030_power_use_poweroff(struct twl4030_power_data *pdata,
+static bool twl4030_power_use_poweroff(const struct twl4030_power_data *pdata,
 					struct device_node *node)
 {
 	if (pdata && pdata->use_poweroff)
@@ -562,10 +599,60 @@ static bool twl4030_power_use_poweroff(struct twl4030_power_data *pdata,
 	return false;
 }
 
+#ifdef CONFIG_OF
+
+/* Generic warm reset configuration for omap3 */
+
+static struct twl4030_ins omap3_wrst_seq[] = {
+	TWL_RESOURCE_OFF(RES_NRES_PWRON),
+	TWL_RESOURCE_OFF(RES_RESET),
+	TWL_RESOURCE_RESET(RES_MAIN_REF),
+	TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R2),
+	TWL_RESOURCE_RESET(RES_VUSB_3V1),
+	TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R1),
+	TWL_RESOURCE_GROUP_RESET(RES_GRP_RC, RES_TYPE_ALL, RES_TYPE2_R0),
+	TWL_RESOURCE_ON(RES_RESET),
+	TWL_RESOURCE_ON(RES_NRES_PWRON),
+};
+
+static struct twl4030_script omap3_wrst_script = {
+	.script	= omap3_wrst_seq,
+	.size	= ARRAY_SIZE(omap3_wrst_seq),
+	.flags	= TWL4030_WRST_SCRIPT,
+};
+
+static struct twl4030_script *omap3_reset_scripts[] = {
+	&omap3_wrst_script,
+};
+
+static struct twl4030_resconfig omap3_rconfig[] = {
+	TWL_REMAP_SLEEP(RES_HFCLKOUT, DEV_GRP_P3, -1, -1),
+	TWL_REMAP_SLEEP(RES_VDD1, DEV_GRP_P1, -1, -1),
+	TWL_REMAP_SLEEP(RES_VDD2, DEV_GRP_P1, -1, -1),
+	{ 0, 0 },
+};
+
+static struct twl4030_power_data omap3_reset = {
+	.scripts		= omap3_reset_scripts,
+	.num			= ARRAY_SIZE(omap3_reset_scripts),
+	.resource_config	= omap3_rconfig,
+};
+
+static struct of_device_id twl4030_power_of_match[] = {
+	{
+		.compatible = "ti,twl4030-power-reset",
+		.data = &omap3_reset,
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, twl4030_power_of_match);
+#endif	/* CONFIG_OF */
+
 static int twl4030_power_probe(struct platform_device *pdev)
 {
-	struct twl4030_power_data *pdata = dev_get_platdata(&pdev->dev);
+	const struct twl4030_power_data *pdata = dev_get_platdata(&pdev->dev);
 	struct device_node *node = pdev->dev.of_node;
+	const struct of_device_id *match;
 	int err = 0;
 	int err2 = 0;
 	u8 val;
@@ -586,8 +673,12 @@ static int twl4030_power_probe(struct platform_device *pdev)
 		return err;
 	}
 
+	match = of_match_device(of_match_ptr(twl4030_power_of_match),
+				&pdev->dev);
+	if (match && match->data)
+		pdata = match->data;
+
 	if (pdata) {
-		/* TODO: convert to device tree */
 		err = twl4030_power_configure_scripts(pdata);
 		if (err) {
 			pr_err("TWL4030 failed to load scripts\n");
@@ -637,14 +728,6 @@ static int twl4030_power_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_OF
-static const struct of_device_id twl4030_power_of_match[] = {
-	{.compatible = "ti,twl4030-power", },
-	{ },
-};
-MODULE_DEVICE_TABLE(of, twl4030_power_of_match);
-#endif
-
 static struct platform_driver twl4030_power_driver = {
 	.driver = {
 		.name	= "twl4030_power",
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index ade1c06d4ceb..5fe031375ed4 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -486,7 +486,10 @@ static inline int twl6030_mmc_card_detect(struct device *dev, int slot)
 #define RES_GRP_ALL		0x7	/* All resource groups */
 
 #define RES_TYPE2_R0		0x0
+#define RES_TYPE2_R1		0x1
+#define RES_TYPE2_R2		0x2
 
+#define RES_TYPE_R0		0x0
 #define RES_TYPE_ALL		0x7
 
 /* Resource states */
-- 
cgit v1.2.3-71-gd317


From 482e7db160df713a2d1d4c7ee9fffad92008283f Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 20 May 2014 11:17:54 -0700
Subject: mfd: twl4030-power: Add support for board specific configuration

With the recommended twl4030 configuration added, we can now add
board specific changes as modifications to the recommended
configuration.

Note that the data is private to this driver, and the data must
always have a NULL resource in the sentinel.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/twl4030-power.c | 21 +++++++++++++++++++++
 include/linux/i2c/twl.h     |  1 +
 2 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 2bfbb40ca9d2..4846c7b48ebb 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -567,13 +567,34 @@ twl4030_power_configure_scripts(const struct twl4030_power_data *pdata)
 	return 0;
 }
 
+static void twl4030_patch_rconfig(struct twl4030_resconfig *common,
+				  struct twl4030_resconfig *board)
+{
+	while (common->resource) {
+		struct twl4030_resconfig *b = board;
+
+		while (b->resource) {
+			if (b->resource == common->resource) {
+				*common = *b;
+				break;
+			}
+			b++;
+		}
+		common++;
+	}
+}
+
 static int
 twl4030_power_configure_resources(const struct twl4030_power_data *pdata)
 {
 	struct twl4030_resconfig *resconfig = pdata->resource_config;
+	struct twl4030_resconfig *boardconf = pdata->board_config;
 	int err;
 
 	if (resconfig) {
+		if (boardconf)
+			twl4030_patch_rconfig(resconfig, boardconf);
+
 		while (resconfig->resource) {
 			err = twl4030_configure_resource(resconfig);
 			if (err)
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 5fe031375ed4..57fe782bf031 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -662,6 +662,7 @@ struct twl4030_power_data {
 	struct twl4030_script **scripts;
 	unsigned num;
 	struct twl4030_resconfig *resource_config;
+	struct twl4030_resconfig *board_config;
 #define TWL4030_RESCONFIG_UNDEF	((u8)-1)
 	bool use_poweroff;	/* Board is wired for TWL poweroff */
 };
-- 
cgit v1.2.3-71-gd317


From aa76fcf473f6bfa839f37f77b6fdb71f0fb88d8f Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 21 Feb 2014 17:36:21 +0200
Subject: CLK: TI: DPLL: add support for omap2 core dpll

OMAP2 has slightly different DPLL compared to later OMAP generations.
This patch adds support for the ti,omap2-dpll-core-clock and also adds
the bindings documentation.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 .../devicetree/bindings/clock/ti/dpll.txt          |  9 +++
 arch/arm/mach-omap2/clock.h                        |  1 -
 arch/arm/mach-omap2/clock2xxx.h                    |  4 --
 drivers/clk/ti/dpll.c                              | 78 +++++++++++++++++++---
 include/linux/clk/ti.h                             |  6 ++
 5 files changed, 82 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/ti/dpll.txt b/Documentation/devicetree/bindings/clock/ti/dpll.txt
index 30bfdb7c9f18..50a1a427608f 100644
--- a/Documentation/devicetree/bindings/clock/ti/dpll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/dpll.txt
@@ -30,6 +30,7 @@ Required properties:
 		"ti,am3-dpll-clock",
 		"ti,am3-dpll-core-clock",
 		"ti,am3-dpll-x2-clock",
+		"ti,omap2-dpll-core-clock",
 
 - #clock-cells : from common clock binding; shall be set to 0.
 - clocks : link phandles of parent clocks, first entry lists reference clock
@@ -41,6 +42,7 @@ Required properties:
 	"mult-div1" - contains the multiplier / divider register base address
 	"autoidle" - contains the autoidle register base address (optional)
   ti,am3-* dpll types do not have autoidle register
+  ti,omap2-* dpll type does not support idlest / autoidle registers
 
 Optional properties:
 - DPLL mode setting - defining any one or more of the following overrides
@@ -73,3 +75,10 @@ Examples:
 		clocks = <&sys_clkin_ck>, <&sys_clkin_ck>;
 		reg = <0x90>, <0x5c>, <0x68>;
 	};
+
+	dpll_ck: dpll_ck {
+		#clock-cells = <0>;
+		compatible = "ti,omap2-dpll-core-clock";
+		clocks = <&sys_ck>, <&sys_ck>;
+		reg = <0x0500>, <0x0540>;
+	};
diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h
index bda767a9dea8..f6e9904d7a75 100644
--- a/arch/arm/mach-omap2/clock.h
+++ b/arch/arm/mach-omap2/clock.h
@@ -279,7 +279,6 @@ extern const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait;
 extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait;
 extern const struct clk_hw_omap_ops clkhwops_apll54;
 extern const struct clk_hw_omap_ops clkhwops_apll96;
-extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll;
 extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait;
 
 /* clksel_rate blocks shared between OMAP44xx and AM33xx */
diff --git a/arch/arm/mach-omap2/clock2xxx.h b/arch/arm/mach-omap2/clock2xxx.h
index 539dc08afbba..45f41a411603 100644
--- a/arch/arm/mach-omap2/clock2xxx.h
+++ b/arch/arm/mach-omap2/clock2xxx.h
@@ -21,10 +21,6 @@ unsigned long omap2xxx_sys_clk_recalc(struct clk_hw *clk,
 				      unsigned long parent_rate);
 unsigned long omap2_osc_clk_recalc(struct clk_hw *clk,
 				   unsigned long parent_rate);
-unsigned long omap2_dpllcore_recalc(struct clk_hw *hw,
-				    unsigned long parent_rate);
-int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate,
-			     unsigned long parent_rate);
 void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw);
 unsigned long omap2_clk_apll54_recalc(struct clk_hw *hw,
 				      unsigned long parent_rate);
diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index dda262db42ea..34e233990212 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c
@@ -35,21 +35,18 @@ static const struct clk_ops dpll_m4xen_ck_ops = {
 	.set_rate	= &omap3_noncore_dpll_set_rate,
 	.get_parent	= &omap2_init_dpll_parent,
 };
+#else
+static const struct clk_ops dpll_m4xen_ck_ops = {};
 #endif
 
+#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4) || \
+	defined(CONFIG_SOC_OMAP5) || defined(CONFIG_SOC_DRA7XX) || \
+	defined(CONFIG_SOC_AM33XX) || defined(CONFIG_SOC_AM43XX)
 static const struct clk_ops dpll_core_ck_ops = {
 	.recalc_rate	= &omap3_dpll_recalc,
 	.get_parent	= &omap2_init_dpll_parent,
 };
 
-#ifdef CONFIG_ARCH_OMAP3
-static const struct clk_ops omap3_dpll_core_ck_ops = {
-	.get_parent	= &omap2_init_dpll_parent,
-	.recalc_rate	= &omap3_dpll_recalc,
-	.round_rate	= &omap2_dpll_round_rate,
-};
-#endif
-
 static const struct clk_ops dpll_ck_ops = {
 	.enable		= &omap3_noncore_dpll_enable,
 	.disable	= &omap3_noncore_dpll_disable,
@@ -65,6 +62,33 @@ static const struct clk_ops dpll_no_gate_ck_ops = {
 	.round_rate	= &omap2_dpll_round_rate,
 	.set_rate	= &omap3_noncore_dpll_set_rate,
 };
+#else
+static const struct clk_ops dpll_core_ck_ops = {};
+static const struct clk_ops dpll_ck_ops = {};
+static const struct clk_ops dpll_no_gate_ck_ops = {};
+const struct clk_hw_omap_ops clkhwops_omap3_dpll = {};
+#endif
+
+#ifdef CONFIG_ARCH_OMAP2
+static const struct clk_ops omap2_dpll_core_ck_ops = {
+	.get_parent	= &omap2_init_dpll_parent,
+	.recalc_rate	= &omap2_dpllcore_recalc,
+	.round_rate	= &omap2_dpll_round_rate,
+	.set_rate	= &omap2_reprogram_dpllcore,
+};
+#else
+static const struct clk_ops omap2_dpll_core_ck_ops = {};
+#endif
+
+#ifdef CONFIG_ARCH_OMAP3
+static const struct clk_ops omap3_dpll_core_ck_ops = {
+	.get_parent	= &omap2_init_dpll_parent,
+	.recalc_rate	= &omap3_dpll_recalc,
+	.round_rate	= &omap2_dpll_round_rate,
+};
+#else
+static const struct clk_ops omap3_dpll_core_ck_ops = {};
+#endif
 
 #ifdef CONFIG_ARCH_OMAP3
 static const struct clk_ops omap3_dpll_ck_ops = {
@@ -237,10 +261,27 @@ static void __init of_ti_dpll_setup(struct device_node *node,
 	init->parent_names = parent_names;
 
 	dd->control_reg = ti_clk_get_reg_addr(node, 0);
-	dd->idlest_reg = ti_clk_get_reg_addr(node, 1);
-	dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2);
 
-	if (!dd->control_reg || !dd->idlest_reg || !dd->mult_div1_reg)
+	/*
+	 * Special case for OMAP2 DPLL, register order is different due to
+	 * missing idlest_reg, also clkhwops is different. Detected from
+	 * missing idlest_mask.
+	 */
+	if (!dd->idlest_mask) {
+		dd->mult_div1_reg = ti_clk_get_reg_addr(node, 1);
+#ifdef CONFIG_ARCH_OMAP2
+		clk_hw->ops = &clkhwops_omap2xxx_dpll;
+		omap2xxx_clkt_dpllcore_init(&clk_hw->hw);
+#endif
+	} else {
+		dd->idlest_reg = ti_clk_get_reg_addr(node, 1);
+		if (!dd->idlest_reg)
+			goto cleanup;
+
+		dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2);
+	}
+
+	if (!dd->control_reg || !dd->mult_div1_reg)
 		goto cleanup;
 
 	if (dd->autoidle_mask) {
@@ -547,3 +588,18 @@ static void __init of_ti_am3_core_dpll_setup(struct device_node *node)
 }
 CLK_OF_DECLARE(ti_am3_core_dpll_clock, "ti,am3-dpll-core-clock",
 	       of_ti_am3_core_dpll_setup);
+
+static void __init of_ti_omap2_core_dpll_setup(struct device_node *node)
+{
+	const struct dpll_data dd = {
+		.enable_mask = 0x3,
+		.mult_mask = 0x3ff << 12,
+		.div1_mask = 0xf << 8,
+		.max_divider = 16,
+		.min_divider = 1,
+	};
+
+	of_ti_dpll_setup(node, &omap2_dpll_core_ck_ops, &dd);
+}
+CLK_OF_DECLARE(ti_omap2_core_dpll_clock, "ti,omap2-dpll-core-clock",
+	       of_ti_omap2_core_dpll_setup);
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 4a21a872dbbd..753878c6fa52 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -259,6 +259,11 @@ int omap2_dflt_clk_enable(struct clk_hw *hw);
 void omap2_dflt_clk_disable(struct clk_hw *hw);
 int omap2_dflt_clk_is_enabled(struct clk_hw *hw);
 void omap3_clk_lock_dpll5(void);
+unsigned long omap2_dpllcore_recalc(struct clk_hw *hw,
+				    unsigned long parent_rate);
+int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate,
+			     unsigned long parent_rate);
+void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw);
 
 void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index);
 void ti_dt_clocks_register(struct ti_dt_clk *oclks);
@@ -287,6 +292,7 @@ static inline void of_ti_clk_allow_autoidle_all(void) { }
 static inline void of_ti_clk_deny_autoidle_all(void) { }
 #endif
 
+extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll;
 extern const struct clk_hw_omap_ops clkhwops_omap3_dpll;
 extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx;
 extern const struct clk_hw_omap_ops clkhwops_wait;
-- 
cgit v1.2.3-71-gd317


From 4d008589e271e28eae728eef7f5fb1f658f12b9f Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Mon, 24 Feb 2014 16:06:34 +0200
Subject: CLK: TI: APLL: add support for omap2 aplls

This patch adds support for omap2 type aplls, which have gating and
autoidle functionality.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 .../devicetree/bindings/clock/ti/apll.txt          |  24 ++-
 arch/arm/mach-omap2/clock.h                        |  11 --
 drivers/clk/ti/apll.c                              | 181 +++++++++++++++++++++
 include/linux/clk/ti.h                             |  21 ++-
 4 files changed, 220 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/ti/apll.txt b/Documentation/devicetree/bindings/clock/ti/apll.txt
index 7faf5a68b3be..ade4dd4c30f0 100644
--- a/Documentation/devicetree/bindings/clock/ti/apll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/apll.txt
@@ -14,18 +14,32 @@ a subtype of a DPLL [2], although a simplified one at that.
 [2] Documentation/devicetree/bindings/clock/ti/dpll.txt
 
 Required properties:
-- compatible : shall be "ti,dra7-apll-clock"
+- compatible : shall be "ti,dra7-apll-clock" or "ti,omap2-apll-clock"
 - #clock-cells : from common clock binding; shall be set to 0.
 - clocks : link phandles of parent clocks (clk-ref and clk-bypass)
 - reg : address and length of the register set for controlling the APLL.
   It contains the information of registers in the following order:
-	"control" - contains the control register base address
-	"idlest" - contains the idlest register base address
+	"control" - contains the control register offset
+	"idlest" - contains the idlest register offset
+	"autoidle" - contains the autoidle register offset (OMAP2 only)
+- ti,clock-frequency : static clock frequency for the clock (OMAP2 only)
+- ti,idlest-shift : bit-shift for the idlest field (OMAP2 only)
+- ti,bit-shift : bit-shift for enable and autoidle fields (OMAP2 only)
 
 Examples:
-	apll_pcie_ck: apll_pcie_ck@4a008200 {
+	apll_pcie_ck: apll_pcie_ck {
 		#clock-cells = <0>;
 		clocks = <&apll_pcie_in_clk_mux>, <&dpll_pcie_ref_ck>;
-		reg = <0x4a00821c 0x4>, <0x4a008220 0x4>;
+		reg = <0x021c>, <0x0220>;
 		compatible = "ti,dra7-apll-clock";
 	};
+
+	apll96_ck: apll96_ck {
+		#clock-cells = <0>;
+		compatible = "ti,omap2-apll-clock";
+		clocks = <&sys_ck>;
+		ti,bit-shift = <2>;
+		ti,idlest-shift = <8>;
+		ti,clock-frequency = <96000000>;
+		reg = <0x0500>, <0x0530>, <0x0520>;
+	};
diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h
index f6e9904d7a75..eb441d137843 100644
--- a/arch/arm/mach-omap2/clock.h
+++ b/arch/arm/mach-omap2/clock.h
@@ -178,17 +178,6 @@ struct clksel {
 	const struct clksel_rate *rates;
 };
 
-struct clk_hw_omap_ops {
-	void			(*find_idlest)(struct clk_hw_omap *oclk,
-					void __iomem **idlest_reg,
-					u8 *idlest_bit, u8 *idlest_val);
-	void			(*find_companion)(struct clk_hw_omap *oclk,
-					void __iomem **other_reg,
-					u8 *other_bit);
-	void			(*allow_idle)(struct clk_hw_omap *oclk);
-	void			(*deny_idle)(struct clk_hw_omap *oclk);
-};
-
 unsigned long omap_fixed_divisor_recalc(struct clk_hw *hw,
 					unsigned long parent_rate);
 
diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c
index b986f61f5a77..5428c9c547cd 100644
--- a/drivers/clk/ti/apll.c
+++ b/drivers/clk/ti/apll.c
@@ -221,3 +221,184 @@ cleanup:
 	kfree(init);
 }
 CLK_OF_DECLARE(dra7_apll_clock, "ti,dra7-apll-clock", of_dra7_apll_setup);
+
+#define OMAP2_EN_APLL_LOCKED	0x3
+#define OMAP2_EN_APLL_STOPPED	0x0
+
+static int omap2_apll_is_enabled(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *ad = clk->dpll_data;
+	u32 v;
+
+	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v &= ad->enable_mask;
+
+	v >>= __ffs(ad->enable_mask);
+
+	return v == OMAP2_EN_APLL_LOCKED ? 1 : 0;
+}
+
+static unsigned long omap2_apll_recalc(struct clk_hw *hw,
+				       unsigned long parent_rate)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+
+	if (omap2_apll_is_enabled(hw))
+		return clk->fixed_rate;
+
+	return 0;
+}
+
+static int omap2_apll_enable(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *ad = clk->dpll_data;
+	u32 v;
+	int i = 0;
+
+	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v &= ~ad->enable_mask;
+	v |= OMAP2_EN_APLL_LOCKED << __ffs(ad->enable_mask);
+	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+
+	while (1) {
+		v = ti_clk_ll_ops->clk_readl(ad->idlest_reg);
+		if (v & ad->idlest_mask)
+			break;
+		if (i > MAX_APLL_WAIT_TRIES)
+			break;
+		i++;
+		udelay(1);
+	}
+
+	if (i == MAX_APLL_WAIT_TRIES) {
+		pr_warn("%s failed to transition to locked\n",
+			__clk_get_name(clk->hw.clk));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static void omap2_apll_disable(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *ad = clk->dpll_data;
+	u32 v;
+
+	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v &= ~ad->enable_mask;
+	v |= OMAP2_EN_APLL_STOPPED << __ffs(ad->enable_mask);
+	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+}
+
+static struct clk_ops omap2_apll_ops = {
+	.enable		= &omap2_apll_enable,
+	.disable	= &omap2_apll_disable,
+	.is_enabled	= &omap2_apll_is_enabled,
+	.recalc_rate	= &omap2_apll_recalc,
+};
+
+static void omap2_apll_set_autoidle(struct clk_hw_omap *clk, u32 val)
+{
+	struct dpll_data *ad = clk->dpll_data;
+	u32 v;
+
+	v = ti_clk_ll_ops->clk_readl(ad->autoidle_reg);
+	v &= ~ad->autoidle_mask;
+	v |= val << __ffs(ad->autoidle_mask);
+	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+}
+
+#define OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP	0x3
+#define OMAP2_APLL_AUTOIDLE_DISABLE		0x0
+
+static void omap2_apll_allow_idle(struct clk_hw_omap *clk)
+{
+	omap2_apll_set_autoidle(clk, OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP);
+}
+
+static void omap2_apll_deny_idle(struct clk_hw_omap *clk)
+{
+	omap2_apll_set_autoidle(clk, OMAP2_APLL_AUTOIDLE_DISABLE);
+}
+
+static struct clk_hw_omap_ops omap2_apll_hwops = {
+	.allow_idle	= &omap2_apll_allow_idle,
+	.deny_idle	= &omap2_apll_deny_idle,
+};
+
+static void __init of_omap2_apll_setup(struct device_node *node)
+{
+	struct dpll_data *ad = NULL;
+	struct clk_hw_omap *clk_hw = NULL;
+	struct clk_init_data *init = NULL;
+	struct clk *clk;
+	const char *parent_name;
+	u32 val;
+
+	ad = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
+	clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
+	init = kzalloc(sizeof(*init), GFP_KERNEL);
+
+	if (!ad || !clk_hw || !init)
+		goto cleanup;
+
+	clk_hw->dpll_data = ad;
+	clk_hw->hw.init = init;
+	init->ops = &omap2_apll_ops;
+	init->name = node->name;
+	clk_hw->ops = &omap2_apll_hwops;
+
+	init->num_parents = of_clk_get_parent_count(node);
+	if (init->num_parents != 1) {
+		pr_err("%s must have one parent\n", node->name);
+		goto cleanup;
+	}
+
+	parent_name = of_clk_get_parent_name(node, 0);
+	init->parent_names = &parent_name;
+
+	if (of_property_read_u32(node, "ti,clock-frequency", &val)) {
+		pr_err("%s missing clock-frequency\n", node->name);
+		goto cleanup;
+	}
+	clk_hw->fixed_rate = val;
+
+	if (of_property_read_u32(node, "ti,bit-shift", &val)) {
+		pr_err("%s missing bit-shift\n", node->name);
+		goto cleanup;
+	}
+
+	clk_hw->enable_bit = val;
+	ad->enable_mask = 0x3 << val;
+	ad->autoidle_mask = 0x3 << val;
+
+	if (of_property_read_u32(node, "ti,idlest-shift", &val)) {
+		pr_err("%s missing idlest-shift\n", node->name);
+		goto cleanup;
+	}
+
+	ad->idlest_mask = 1 << val;
+
+	ad->control_reg = ti_clk_get_reg_addr(node, 0);
+	ad->autoidle_reg = ti_clk_get_reg_addr(node, 1);
+	ad->idlest_reg = ti_clk_get_reg_addr(node, 2);
+
+	if (!ad->control_reg || !ad->autoidle_reg || !ad->idlest_reg)
+		goto cleanup;
+
+	clk = clk_register(NULL, &clk_hw->hw);
+	if (!IS_ERR(clk)) {
+		of_clk_add_provider(node, of_clk_src_simple_get, clk);
+		kfree(init);
+		return;
+	}
+cleanup:
+	kfree(ad);
+	kfree(clk_hw);
+	kfree(init);
+}
+CLK_OF_DECLARE(omap2_apll_clock, "ti,omap2-apll-clock",
+	       of_omap2_apll_setup);
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 753878c6fa52..44bf84002a34 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -94,7 +94,26 @@ struct dpll_data {
 	u8			flags;
 };
 
-struct clk_hw_omap_ops;
+struct clk_hw_omap;
+
+/**
+ * struct clk_hw_omap_ops - OMAP clk ops
+ * @find_idlest: find idlest register information for a clock
+ * @find_companion: find companion clock register information for a clock,
+ *		    basically converts CM_ICLKEN* <-> CM_FCLKEN*
+ * @allow_idle: enables autoidle hardware functionality for a clock
+ * @deny_idle: prevent autoidle hardware functionality for a clock
+ */
+struct clk_hw_omap_ops {
+	void	(*find_idlest)(struct clk_hw_omap *oclk,
+			       void __iomem **idlest_reg,
+			       u8 *idlest_bit, u8 *idlest_val);
+	void	(*find_companion)(struct clk_hw_omap *oclk,
+				  void __iomem **other_reg,
+				  u8 *other_bit);
+	void	(*allow_idle)(struct clk_hw_omap *oclk);
+	void	(*deny_idle)(struct clk_hw_omap *oclk);
+};
 
 /**
  * struct clk_hw_omap - OMAP struct clk
-- 
cgit v1.2.3-71-gd317


From de742570745e12b53c70130ace958f2a60044000 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Tue, 25 Feb 2014 19:16:07 +0200
Subject: CLK: TI: interface: add support for omap2430 specific interface clock

OMAP2430 I2CHS modules require specific hardware ops to be used, so added
a new compatible string for this.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 Documentation/devicetree/bindings/clock/ti/interface.txt |  2 ++
 arch/arm/mach-omap2/clock.h                              |  1 -
 drivers/clk/ti/interface.c                               | 11 +++++++++++
 include/linux/clk/ti.h                                   |  1 +
 4 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/ti/interface.txt b/Documentation/devicetree/bindings/clock/ti/interface.txt
index 064e8caccac3..3111a409fea6 100644
--- a/Documentation/devicetree/bindings/clock/ti/interface.txt
+++ b/Documentation/devicetree/bindings/clock/ti/interface.txt
@@ -21,6 +21,8 @@ Required properties:
   "ti,omap3-dss-interface-clock" - interface clock with DSS specific HW handling
   "ti,omap3-ssi-interface-clock" - interface clock with SSI specific HW handling
   "ti,am35xx-interface-clock" - interface clock with AM35xx specific HW handling
+  "ti,omap2430-interface-clock" - interface clock with OMAP2430 specific HW
+				  handling
 - #clock-cells : from common clock binding; shall be set to 0
 - clocks : link to phandle of parent clock
 - reg : base address for the control register
diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h
index eb441d137843..12f54d428d7c 100644
--- a/arch/arm/mach-omap2/clock.h
+++ b/arch/arm/mach-omap2/clock.h
@@ -268,7 +268,6 @@ extern const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait;
 extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait;
 extern const struct clk_hw_omap_ops clkhwops_apll54;
 extern const struct clk_hw_omap_ops clkhwops_apll96;
-extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait;
 
 /* clksel_rate blocks shared between OMAP44xx and AM33xx */
 extern const struct clksel_rate div_1_0_rates[];
diff --git a/drivers/clk/ti/interface.c b/drivers/clk/ti/interface.c
index 320a2b168bb2..9c3e8c4aaa40 100644
--- a/drivers/clk/ti/interface.c
+++ b/drivers/clk/ti/interface.c
@@ -94,6 +94,7 @@ static void __init of_ti_no_wait_interface_clk_setup(struct device_node *node)
 CLK_OF_DECLARE(ti_no_wait_interface_clk, "ti,omap3-no-wait-interface-clock",
 	       of_ti_no_wait_interface_clk_setup);
 
+#ifdef CONFIG_ARCH_OMAP3
 static void __init of_ti_hsotgusb_interface_clk_setup(struct device_node *node)
 {
 	_of_ti_interface_clk_setup(node,
@@ -123,3 +124,13 @@ static void __init of_ti_am35xx_interface_clk_setup(struct device_node *node)
 }
 CLK_OF_DECLARE(ti_am35xx_interface_clk, "ti,am35xx-interface-clock",
 	       of_ti_am35xx_interface_clk_setup);
+#endif
+
+#ifdef CONFIG_SOC_OMAP2430
+static void __init of_ti_omap2430_interface_clk_setup(struct device_node *node)
+{
+	_of_ti_interface_clk_setup(node, &clkhwops_omap2430_i2chs_wait);
+}
+CLK_OF_DECLARE(ti_omap2430_interface_clk, "ti,omap2430-interface-clock",
+	       of_ti_omap2430_interface_clk_setup);
+#endif
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 44bf84002a34..a8390d478528 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -312,6 +312,7 @@ static inline void of_ti_clk_deny_autoidle_all(void) { }
 #endif
 
 extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll;
+extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait;
 extern const struct clk_hw_omap_ops clkhwops_omap3_dpll;
 extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx;
 extern const struct clk_hw_omap_ops clkhwops_wait;
-- 
cgit v1.2.3-71-gd317


From be67c3bf382c591d8267e0ef12d80041854731d9 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Mon, 24 Feb 2014 17:52:57 +0200
Subject: CLK: TI: OMAP2: add clock init support

Adds support for registering the alias clocks, boot time clock-enable list
and disabling autoidle of clocks.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 drivers/clk/ti/Makefile   |   1 +
 drivers/clk/ti/clk-2xxx.c | 254 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/clk/ti.h    |   2 +
 3 files changed, 257 insertions(+)
 create mode 100644 drivers/clk/ti/clk-2xxx.c

(limited to 'include')

diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile
index 4319d4031aa3..4afeaed9e9ba 100644
--- a/drivers/clk/ti/Makefile
+++ b/drivers/clk/ti/Makefile
@@ -3,6 +3,7 @@ obj-y					+= clk.o autoidle.o clockdomain.o
 clk-common				= dpll.o composite.o divider.o gate.o \
 					  fixed-factor.o mux.o apll.o
 obj-$(CONFIG_SOC_AM33XX)		+= $(clk-common) clk-33xx.o
+obj-$(CONFIG_ARCH_OMAP2)		+= $(clk-common) interface.o clk-2xxx.o
 obj-$(CONFIG_ARCH_OMAP3)		+= $(clk-common) interface.o clk-3xxx.o
 obj-$(CONFIG_ARCH_OMAP4)		+= $(clk-common) clk-44xx.o
 obj-$(CONFIG_SOC_OMAP5)			+= $(clk-common) clk-54xx.o
diff --git a/drivers/clk/ti/clk-2xxx.c b/drivers/clk/ti/clk-2xxx.c
new file mode 100644
index 000000000000..f6400fb5ee3e
--- /dev/null
+++ b/drivers/clk/ti/clk-2xxx.c
@@ -0,0 +1,254 @@
+/*
+ * OMAP2 Clock init
+ *
+ * Copyright (C) 2013 Texas Instruments, Inc
+ *     Tero Kristo (t-kristo@ti.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/clk-provider.h>
+#include <linux/clk/ti.h>
+
+static struct ti_dt_clk omap2xxx_clks[] = {
+	DT_CLK(NULL, "func_32k_ck", "func_32k_ck"),
+	DT_CLK(NULL, "secure_32k_ck", "secure_32k_ck"),
+	DT_CLK(NULL, "virt_12m_ck", "virt_12m_ck"),
+	DT_CLK(NULL, "virt_13m_ck", "virt_13m_ck"),
+	DT_CLK(NULL, "virt_19200000_ck", "virt_19200000_ck"),
+	DT_CLK(NULL, "virt_26m_ck", "virt_26m_ck"),
+	DT_CLK(NULL, "aplls_clkin_ck", "aplls_clkin_ck"),
+	DT_CLK(NULL, "aplls_clkin_x2_ck", "aplls_clkin_x2_ck"),
+	DT_CLK(NULL, "osc_ck", "osc_ck"),
+	DT_CLK(NULL, "sys_ck", "sys_ck"),
+	DT_CLK(NULL, "alt_ck", "alt_ck"),
+	DT_CLK(NULL, "mcbsp_clks", "mcbsp_clks"),
+	DT_CLK(NULL, "dpll_ck", "dpll_ck"),
+	DT_CLK(NULL, "apll96_ck", "apll96_ck"),
+	DT_CLK(NULL, "apll54_ck", "apll54_ck"),
+	DT_CLK(NULL, "func_54m_ck", "func_54m_ck"),
+	DT_CLK(NULL, "core_ck", "core_ck"),
+	DT_CLK(NULL, "func_96m_ck", "func_96m_ck"),
+	DT_CLK(NULL, "func_48m_ck", "func_48m_ck"),
+	DT_CLK(NULL, "func_12m_ck", "func_12m_ck"),
+	DT_CLK(NULL, "sys_clkout_src", "sys_clkout_src"),
+	DT_CLK(NULL, "sys_clkout", "sys_clkout"),
+	DT_CLK(NULL, "emul_ck", "emul_ck"),
+	DT_CLK(NULL, "mpu_ck", "mpu_ck"),
+	DT_CLK(NULL, "dsp_fck", "dsp_fck"),
+	DT_CLK(NULL, "gfx_3d_fck", "gfx_3d_fck"),
+	DT_CLK(NULL, "gfx_2d_fck", "gfx_2d_fck"),
+	DT_CLK(NULL, "gfx_ick", "gfx_ick"),
+	DT_CLK("omapdss_dss", "ick", "dss_ick"),
+	DT_CLK(NULL, "dss_ick", "dss_ick"),
+	DT_CLK(NULL, "dss1_fck", "dss1_fck"),
+	DT_CLK(NULL, "dss2_fck", "dss2_fck"),
+	DT_CLK(NULL, "dss_54m_fck", "dss_54m_fck"),
+	DT_CLK(NULL, "core_l3_ck", "core_l3_ck"),
+	DT_CLK(NULL, "ssi_fck", "ssi_ssr_sst_fck"),
+	DT_CLK(NULL, "usb_l4_ick", "usb_l4_ick"),
+	DT_CLK(NULL, "l4_ck", "l4_ck"),
+	DT_CLK(NULL, "ssi_l4_ick", "ssi_l4_ick"),
+	DT_CLK(NULL, "gpt1_ick", "gpt1_ick"),
+	DT_CLK(NULL, "gpt1_fck", "gpt1_fck"),
+	DT_CLK(NULL, "gpt2_ick", "gpt2_ick"),
+	DT_CLK(NULL, "gpt2_fck", "gpt2_fck"),
+	DT_CLK(NULL, "gpt3_ick", "gpt3_ick"),
+	DT_CLK(NULL, "gpt3_fck", "gpt3_fck"),
+	DT_CLK(NULL, "gpt4_ick", "gpt4_ick"),
+	DT_CLK(NULL, "gpt4_fck", "gpt4_fck"),
+	DT_CLK(NULL, "gpt5_ick", "gpt5_ick"),
+	DT_CLK(NULL, "gpt5_fck", "gpt5_fck"),
+	DT_CLK(NULL, "gpt6_ick", "gpt6_ick"),
+	DT_CLK(NULL, "gpt6_fck", "gpt6_fck"),
+	DT_CLK(NULL, "gpt7_ick", "gpt7_ick"),
+	DT_CLK(NULL, "gpt7_fck", "gpt7_fck"),
+	DT_CLK(NULL, "gpt8_ick", "gpt8_ick"),
+	DT_CLK(NULL, "gpt8_fck", "gpt8_fck"),
+	DT_CLK(NULL, "gpt9_ick", "gpt9_ick"),
+	DT_CLK(NULL, "gpt9_fck", "gpt9_fck"),
+	DT_CLK(NULL, "gpt10_ick", "gpt10_ick"),
+	DT_CLK(NULL, "gpt10_fck", "gpt10_fck"),
+	DT_CLK(NULL, "gpt11_ick", "gpt11_ick"),
+	DT_CLK(NULL, "gpt11_fck", "gpt11_fck"),
+	DT_CLK(NULL, "gpt12_ick", "gpt12_ick"),
+	DT_CLK(NULL, "gpt12_fck", "gpt12_fck"),
+	DT_CLK("omap-mcbsp.1", "ick", "mcbsp1_ick"),
+	DT_CLK(NULL, "mcbsp1_ick", "mcbsp1_ick"),
+	DT_CLK(NULL, "mcbsp1_fck", "mcbsp1_fck"),
+	DT_CLK("omap-mcbsp.2", "ick", "mcbsp2_ick"),
+	DT_CLK(NULL, "mcbsp2_ick", "mcbsp2_ick"),
+	DT_CLK(NULL, "mcbsp2_fck", "mcbsp2_fck"),
+	DT_CLK("omap2_mcspi.1", "ick", "mcspi1_ick"),
+	DT_CLK(NULL, "mcspi1_ick", "mcspi1_ick"),
+	DT_CLK(NULL, "mcspi1_fck", "mcspi1_fck"),
+	DT_CLK("omap2_mcspi.2", "ick", "mcspi2_ick"),
+	DT_CLK(NULL, "mcspi2_ick", "mcspi2_ick"),
+	DT_CLK(NULL, "mcspi2_fck", "mcspi2_fck"),
+	DT_CLK(NULL, "uart1_ick", "uart1_ick"),
+	DT_CLK(NULL, "uart1_fck", "uart1_fck"),
+	DT_CLK(NULL, "uart2_ick", "uart2_ick"),
+	DT_CLK(NULL, "uart2_fck", "uart2_fck"),
+	DT_CLK(NULL, "uart3_ick", "uart3_ick"),
+	DT_CLK(NULL, "uart3_fck", "uart3_fck"),
+	DT_CLK(NULL, "gpios_ick", "gpios_ick"),
+	DT_CLK(NULL, "gpios_fck", "gpios_fck"),
+	DT_CLK("omap_wdt", "ick", "mpu_wdt_ick"),
+	DT_CLK(NULL, "mpu_wdt_ick", "mpu_wdt_ick"),
+	DT_CLK(NULL, "mpu_wdt_fck", "mpu_wdt_fck"),
+	DT_CLK(NULL, "sync_32k_ick", "sync_32k_ick"),
+	DT_CLK(NULL, "wdt1_ick", "wdt1_ick"),
+	DT_CLK(NULL, "omapctrl_ick", "omapctrl_ick"),
+	DT_CLK("omap24xxcam", "fck", "cam_fck"),
+	DT_CLK(NULL, "cam_fck", "cam_fck"),
+	DT_CLK("omap24xxcam", "ick", "cam_ick"),
+	DT_CLK(NULL, "cam_ick", "cam_ick"),
+	DT_CLK(NULL, "mailboxes_ick", "mailboxes_ick"),
+	DT_CLK(NULL, "wdt4_ick", "wdt4_ick"),
+	DT_CLK(NULL, "wdt4_fck", "wdt4_fck"),
+	DT_CLK(NULL, "mspro_ick", "mspro_ick"),
+	DT_CLK(NULL, "mspro_fck", "mspro_fck"),
+	DT_CLK(NULL, "fac_ick", "fac_ick"),
+	DT_CLK(NULL, "fac_fck", "fac_fck"),
+	DT_CLK("omap_hdq.0", "ick", "hdq_ick"),
+	DT_CLK(NULL, "hdq_ick", "hdq_ick"),
+	DT_CLK("omap_hdq.0", "fck", "hdq_fck"),
+	DT_CLK(NULL, "hdq_fck", "hdq_fck"),
+	DT_CLK("omap_i2c.1", "ick", "i2c1_ick"),
+	DT_CLK(NULL, "i2c1_ick", "i2c1_ick"),
+	DT_CLK("omap_i2c.2", "ick", "i2c2_ick"),
+	DT_CLK(NULL, "i2c2_ick", "i2c2_ick"),
+	DT_CLK(NULL, "gpmc_fck", "gpmc_fck"),
+	DT_CLK(NULL, "sdma_fck", "sdma_fck"),
+	DT_CLK(NULL, "sdma_ick", "sdma_ick"),
+	DT_CLK(NULL, "sdrc_ick", "sdrc_ick"),
+	DT_CLK(NULL, "des_ick", "des_ick"),
+	DT_CLK("omap-sham", "ick", "sha_ick"),
+	DT_CLK(NULL, "sha_ick", "sha_ick"),
+	DT_CLK("omap_rng", "ick", "rng_ick"),
+	DT_CLK(NULL, "rng_ick", "rng_ick"),
+	DT_CLK("omap-aes", "ick", "aes_ick"),
+	DT_CLK(NULL, "aes_ick", "aes_ick"),
+	DT_CLK(NULL, "pka_ick", "pka_ick"),
+	DT_CLK(NULL, "usb_fck", "usb_fck"),
+	DT_CLK(NULL, "timer_32k_ck", "func_32k_ck"),
+	DT_CLK(NULL, "timer_sys_ck", "sys_ck"),
+	DT_CLK(NULL, "timer_ext_ck", "alt_ck"),
+	{ .node_name = NULL },
+};
+
+static struct ti_dt_clk omap2420_clks[] = {
+	DT_CLK(NULL, "sys_clkout2_src", "sys_clkout2_src"),
+	DT_CLK(NULL, "sys_clkout2", "sys_clkout2"),
+	DT_CLK(NULL, "dsp_ick", "dsp_ick"),
+	DT_CLK(NULL, "iva1_ifck", "iva1_ifck"),
+	DT_CLK(NULL, "iva1_mpu_int_ifck", "iva1_mpu_int_ifck"),
+	DT_CLK(NULL, "wdt3_ick", "wdt3_ick"),
+	DT_CLK(NULL, "wdt3_fck", "wdt3_fck"),
+	DT_CLK("mmci-omap.0", "ick", "mmc_ick"),
+	DT_CLK(NULL, "mmc_ick", "mmc_ick"),
+	DT_CLK("mmci-omap.0", "fck", "mmc_fck"),
+	DT_CLK(NULL, "mmc_fck", "mmc_fck"),
+	DT_CLK(NULL, "eac_ick", "eac_ick"),
+	DT_CLK(NULL, "eac_fck", "eac_fck"),
+	DT_CLK(NULL, "i2c1_fck", "i2c1_fck"),
+	DT_CLK(NULL, "i2c2_fck", "i2c2_fck"),
+	DT_CLK(NULL, "vlynq_ick", "vlynq_ick"),
+	DT_CLK(NULL, "vlynq_fck", "vlynq_fck"),
+	DT_CLK("musb-hdrc", "fck", "osc_ck"),
+	{ .node_name = NULL },
+};
+
+static struct ti_dt_clk omap2430_clks[] = {
+	DT_CLK("twl", "fck", "osc_ck"),
+	DT_CLK(NULL, "iva2_1_ick", "iva2_1_ick"),
+	DT_CLK(NULL, "mdm_ick", "mdm_ick"),
+	DT_CLK(NULL, "mdm_osc_ck", "mdm_osc_ck"),
+	DT_CLK("omap-mcbsp.3", "ick", "mcbsp3_ick"),
+	DT_CLK(NULL, "mcbsp3_ick", "mcbsp3_ick"),
+	DT_CLK(NULL, "mcbsp3_fck", "mcbsp3_fck"),
+	DT_CLK("omap-mcbsp.4", "ick", "mcbsp4_ick"),
+	DT_CLK(NULL, "mcbsp4_ick", "mcbsp4_ick"),
+	DT_CLK(NULL, "mcbsp4_fck", "mcbsp4_fck"),
+	DT_CLK("omap-mcbsp.5", "ick", "mcbsp5_ick"),
+	DT_CLK(NULL, "mcbsp5_ick", "mcbsp5_ick"),
+	DT_CLK(NULL, "mcbsp5_fck", "mcbsp5_fck"),
+	DT_CLK("omap2_mcspi.3", "ick", "mcspi3_ick"),
+	DT_CLK(NULL, "mcspi3_ick", "mcspi3_ick"),
+	DT_CLK(NULL, "mcspi3_fck", "mcspi3_fck"),
+	DT_CLK(NULL, "icr_ick", "icr_ick"),
+	DT_CLK(NULL, "i2chs1_fck", "i2chs1_fck"),
+	DT_CLK(NULL, "i2chs2_fck", "i2chs2_fck"),
+	DT_CLK("musb-omap2430", "ick", "usbhs_ick"),
+	DT_CLK(NULL, "usbhs_ick", "usbhs_ick"),
+	DT_CLK("omap_hsmmc.0", "ick", "mmchs1_ick"),
+	DT_CLK(NULL, "mmchs1_ick", "mmchs1_ick"),
+	DT_CLK(NULL, "mmchs1_fck", "mmchs1_fck"),
+	DT_CLK("omap_hsmmc.1", "ick", "mmchs2_ick"),
+	DT_CLK(NULL, "mmchs2_ick", "mmchs2_ick"),
+	DT_CLK(NULL, "mmchs2_fck", "mmchs2_fck"),
+	DT_CLK(NULL, "gpio5_ick", "gpio5_ick"),
+	DT_CLK(NULL, "gpio5_fck", "gpio5_fck"),
+	DT_CLK(NULL, "mdm_intc_ick", "mdm_intc_ick"),
+	DT_CLK("omap_hsmmc.0", "mmchsdb_fck", "mmchsdb1_fck"),
+	DT_CLK(NULL, "mmchsdb1_fck", "mmchsdb1_fck"),
+	DT_CLK("omap_hsmmc.1", "mmchsdb_fck", "mmchsdb2_fck"),
+	DT_CLK(NULL, "mmchsdb2_fck", "mmchsdb2_fck"),
+	{ .node_name = NULL },
+};
+
+static const char *enable_init_clks[] = {
+	"apll96_ck",
+	"apll54_ck",
+	"sync_32k_ick",
+	"omapctrl_ick",
+	"gpmc_fck",
+	"sdrc_ick",
+};
+
+enum {
+	OMAP2_SOC_OMAP2420,
+	OMAP2_SOC_OMAP2430,
+};
+
+static int __init omap2xxx_dt_clk_init(int soc_type)
+{
+	ti_dt_clocks_register(omap2xxx_clks);
+
+	if (soc_type == OMAP2_SOC_OMAP2420)
+		ti_dt_clocks_register(omap2420_clks);
+	else
+		ti_dt_clocks_register(omap2430_clks);
+
+	omap2_clk_disable_autoidle_all();
+
+	omap2_clk_enable_init_clocks(enable_init_clks,
+				     ARRAY_SIZE(enable_init_clks));
+
+	pr_info("Clocking rate (Crystal/DPLL/MPU): %ld.%01ld/%ld/%ld MHz\n",
+		(clk_get_rate(clk_get_sys(NULL, "sys_ck")) / 1000000),
+		(clk_get_rate(clk_get_sys(NULL, "sys_ck")) / 100000) % 10,
+		(clk_get_rate(clk_get_sys(NULL, "dpll_ck")) / 1000000),
+		(clk_get_rate(clk_get_sys(NULL, "mpu_ck")) / 1000000));
+
+	return 0;
+}
+
+int __init omap2420_dt_clk_init(void)
+{
+	return omap2xxx_dt_clk_init(OMAP2_SOC_OMAP2420);
+}
+
+int __init omap2430_dt_clk_init(void)
+{
+	return omap2xxx_dt_clk_init(OMAP2_SOC_OMAP2430);
+}
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index a8390d478528..188f0cbb26c2 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -302,6 +302,8 @@ int omap5xxx_dt_clk_init(void);
 int dra7xx_dt_clk_init(void);
 int am33xx_dt_clk_init(void);
 int am43xx_dt_clk_init(void);
+int omap2420_dt_clk_init(void);
+int omap2430_dt_clk_init(void);
 
 #ifdef CONFIG_OF
 void of_ti_clk_allow_autoidle_all(void);
-- 
cgit v1.2.3-71-gd317


From 61f25ca76ccc7b63371a7a6b0b8b9a8a46745b79 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Mon, 24 Feb 2014 18:49:35 +0200
Subject: ARM: OMAP2: clock: add DT boot support for cpufreq_ck

The clock and clkdev for this are added manually.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c | 53 ++++++++++++++++++++++++++++
 drivers/clk/ti/clk-2xxx.c                    |  2 ++
 include/linux/clk/ti.h                       |  1 +
 3 files changed, 56 insertions(+)

(limited to 'include')

diff --git a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
index b935ed2922d8..85e0b0c06718 100644
--- a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
+++ b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
@@ -208,3 +208,56 @@ void omap2xxx_clkt_vps_late_init(void)
 		clk_put(c);
 	}
 }
+
+#ifdef CONFIG_OF
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+
+static const struct clk_ops virt_prcm_set_ops = {
+	.recalc_rate	= &omap2_table_mpu_recalc,
+	.set_rate	= &omap2_select_table_rate,
+	.round_rate	= &omap2_round_to_table_rate,
+};
+
+/**
+ * omap2xxx_clkt_vps_init - initialize virt_prcm_set clock
+ *
+ * Does a manual init for the virtual prcm DVFS clock for OMAP2. This
+ * function is called only from omap2 DT clock init, as the virtual
+ * node is not modelled in the DT clock data.
+ */
+void omap2xxx_clkt_vps_init(void)
+{
+	struct clk_init_data init = { NULL };
+	struct clk_hw_omap *hw = NULL;
+	struct clk *clk;
+	const char *parent_name = "mpu_ck";
+	struct clk_lookup *lookup = NULL;
+
+	omap2xxx_clkt_vps_late_init();
+	omap2xxx_clkt_vps_check_bootloader_rates();
+
+	hw = kzalloc(sizeof(*hw), GFP_KERNEL);
+	lookup = kzalloc(sizeof(*lookup), GFP_KERNEL);
+	if (!hw || !lookup)
+		goto cleanup;
+	init.name = "virt_prcm_set";
+	init.ops = &virt_prcm_set_ops;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+
+	hw->hw.init = &init;
+
+	clk = clk_register(NULL, &hw->hw);
+
+	lookup->dev_id = NULL;
+	lookup->con_id = "cpufreq_ck";
+	lookup->clk = clk;
+
+	clkdev_add(lookup);
+	return;
+cleanup:
+	kfree(hw);
+	kfree(lookup);
+}
+#endif
diff --git a/drivers/clk/ti/clk-2xxx.c b/drivers/clk/ti/clk-2xxx.c
index f6400fb5ee3e..c808ab3d2bb2 100644
--- a/drivers/clk/ti/clk-2xxx.c
+++ b/drivers/clk/ti/clk-2xxx.c
@@ -229,6 +229,8 @@ static int __init omap2xxx_dt_clk_init(int soc_type)
 	else
 		ti_dt_clocks_register(omap2430_clks);
 
+	omap2xxx_clkt_vps_init();
+
 	omap2_clk_disable_autoidle_all();
 
 	omap2_clk_enable_init_clocks(enable_init_clks,
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 188f0cbb26c2..4231c41bed51 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -283,6 +283,7 @@ unsigned long omap2_dpllcore_recalc(struct clk_hw *hw,
 int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate,
 			     unsigned long parent_rate);
 void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw);
+void omap2xxx_clkt_vps_init(void);
 
 void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index);
 void ti_dt_clocks_register(struct ti_dt_clk *oclks);
-- 
cgit v1.2.3-71-gd317


From 2d5e447914722f3c79103ad54baa1170661ac553 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 7 May 2014 13:20:46 +0300
Subject: dt:/bindings: DRA7 ATL (Audio Tracking Logic) clock bindings

Audio Tracking Logic is designed to be used by HD Radio applications to
synchronize the audio output clocks to the baseband clock. ATL can be also
used to track errors between two reference clocks (BWS, AWS) and generate a modulated
clock output which averages to some desired frequency.
In essence ATL is generating a clock to be used by an audio codec and also
to be used by the SoC as MCLK.

To be able to integrate the ATL provided clocks to the clock tree we need
two types of DT binding:
- DT clock nodes to represent the ATL clocks towards the CCF
- binding for the ATL IP itself which is going to handle the hw
  configuration

The reason for this type of setup is that ATL itself is a separate device
in the SoC, it has it's own address space and clock domain. Other IPs can
use the ATL generated clock as their functional clock (McASPs for example)
and external components like audio codecs can also use the very same clock
as their MCLK.

The ATL IP in DRA7 contains 4 ATL instences.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 .../devicetree/bindings/clock/ti/dra7-atl.txt      | 96 ++++++++++++++++++++++
 include/dt-bindings/clk/ti-dra7-atl.h              | 40 +++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/ti/dra7-atl.txt
 create mode 100644 include/dt-bindings/clk/ti-dra7-atl.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt
new file mode 100644
index 000000000000..585e8c191f50
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt
@@ -0,0 +1,96 @@
+Device Tree Clock bindings for ATL (Audio Tracking Logic) of DRA7 SoC.
+
+The ATL IP is used to generate clock to be used to synchronize baseband and
+audio codec. A single ATL IP provides four ATL clock instances sharing the same
+functional clock but can be configured to provide different clocks.
+ATL can maintain a clock averages to some desired frequency based on the bws/aws
+signals - can compensate the drift between the two ws signal.
+
+In order to provide the support for ATL and it's output clocks (which can be used
+internally within the SoC or external components) two sets of bindings is needed:
+
+Clock tree binding:
+This binding uses the common clock binding[1].
+To be able to integrate the ATL clocks with DT clock tree.
+Provides ccf level representation of the ATL clocks to be used by drivers.
+Since the clock instances are part of a single IP this binding is used as a node
+for the DT clock tree, the IP driver is needed to handle the actual configuration
+of the IP.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be "ti,dra7-atl-clock"
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : link phandles to functional clock of ATL
+
+Binding for the IP driver:
+This binding is used to configure the IP driver which is going to handle the
+configuration of the IP for the ATL clock instances.
+
+Required properties:
+- compatible : shall be "ti,dra7-atl"
+- reg : base address for the ATL IP
+- ti,provided-clocks : List of phandles to the clocks associated with the ATL
+- clocks : link phandles to functional clock of ATL
+- clock-names : Shall be set to "fck"
+- ti,hwmods : Shall be set to "atl"
+
+Optional properties:
+Configuration of ATL instances:
+- atl{0/1/2/3} {
+	- bws : Baseband word select signal selection
+	- aws : Audio word select signal selection
+};
+
+For valid word select signals, see the dt-bindings/clk/ti-dra7-atl.h include
+file.
+
+Examples:
+/* clock bindings for atl provided clocks */
+atl_clkin0_ck: atl_clkin0_ck {
+	#clock-cells = <0>;
+	compatible = "ti,dra7-atl-clock";
+	clocks = <&atl_gfclk_mux>;
+};
+
+atl_clkin1_ck: atl_clkin1_ck {
+	#clock-cells = <0>;
+	compatible = "ti,dra7-atl-clock";
+	clocks = <&atl_gfclk_mux>;
+};
+
+atl_clkin2_ck: atl_clkin2_ck {
+	#clock-cells = <0>;
+	compatible = "ti,dra7-atl-clock";
+	clocks = <&atl_gfclk_mux>;
+};
+
+atl_clkin3_ck: atl_clkin3_ck {
+	#clock-cells = <0>;
+	compatible = "ti,dra7-atl-clock";
+	clocks = <&atl_gfclk_mux>;
+};
+
+/* binding for the IP */
+atl: atl@4843c000 {
+	compatible = "ti,dra7-atl";
+	reg = <0x4843c000 0x3ff>;
+	ti,hwmods = "atl";
+	ti,provided-clocks = <&atl_clkin0_ck>, <&atl_clkin1_ck>,
+				<&atl_clkin2_ck>, <&atl_clkin3_ck>;
+	clocks = <&atl_gfclk_mux>;
+	clock-names = "fck";
+	status = "disabled";
+};
+
+#include <dt-bindings/clk/ti-dra7-atl.h>
+
+&atl {
+	status = "okay";
+
+	atl2 {
+		bws = <DRA7_ATL_WS_MCASP2_FSX>;
+		aws = <DRA7_ATL_WS_MCASP3_FSX>;
+	};
+};
diff --git a/include/dt-bindings/clk/ti-dra7-atl.h b/include/dt-bindings/clk/ti-dra7-atl.h
new file mode 100644
index 000000000000..42dd4164f6f4
--- /dev/null
+++ b/include/dt-bindings/clk/ti-dra7-atl.h
@@ -0,0 +1,40 @@
+/*
+ * This header provides constants for DRA7 ATL (Audio Tracking Logic)
+ *
+ * The constants defined in this header are used in dts files
+ *
+ * Copyright (C) 2013 Texas Instruments, Inc.
+ *
+ * Peter Ujfalusi <peter.ujfalusi@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_CLK_DRA7_ATL_H
+#define _DT_BINDINGS_CLK_DRA7_ATL_H
+
+#define DRA7_ATL_WS_MCASP1_FSR		0
+#define DRA7_ATL_WS_MCASP1_FSX		1
+#define DRA7_ATL_WS_MCASP2_FSR		2
+#define DRA7_ATL_WS_MCASP2_FSX		3
+#define DRA7_ATL_WS_MCASP3_FSX		4
+#define DRA7_ATL_WS_MCASP4_FSX		5
+#define DRA7_ATL_WS_MCASP5_FSX		6
+#define DRA7_ATL_WS_MCASP6_FSX		7
+#define DRA7_ATL_WS_MCASP7_FSX		8
+#define DRA7_ATL_WS_MCASP8_FSX		9
+#define DRA7_ATL_WS_MCASP8_AHCLKX	10
+#define DRA7_ATL_WS_XREF_CLK3		11
+#define DRA7_ATL_WS_XREF_CLK0		12
+#define DRA7_ATL_WS_XREF_CLK1		13
+#define DRA7_ATL_WS_XREF_CLK2		14
+#define DRA7_ATL_WS_OSC1_X1		15
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 494b6590043b4cd73ceb3f58e1c012a2c6c98d85 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <Jes.Sorensen@redhat.com>
Date: Mon, 26 May 2014 18:06:34 +0200
Subject: wireless: add missing WLAN_EID_BSS_INTOLERANT_CHL_REPORT

Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f194ccb8539c..6bff13f74050 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1711,6 +1711,7 @@ enum ieee80211_eid {
 	WLAN_EID_RRM_ENABLED_CAPABILITIES = 70,
 	WLAN_EID_MULTIPLE_BSSID = 71,
 	WLAN_EID_BSS_COEX_2040 = 72,
+	WLAN_EID_BSS_INTOLERANT_CHL_REPORT = 73,
 	WLAN_EID_OVERLAP_BSS_SCAN_PARAM = 74,
 	WLAN_EID_RIC_DESCRIPTOR = 75,
 	WLAN_EID_MMIE = 76,
-- 
cgit v1.2.3-71-gd317


From f08dbf8a61462aa122b9b5077849a3f4bd84702a Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Wed, 8 Jan 2014 11:23:35 +0000
Subject: cpuidle: declare cpuidle_dev in cpuidle.h

Declaring this allows drivers which need to initialise each struct
cpuidle_device at initialisation time to make use of the structures
already defined in cpuidle.c, rather than having to wastefully define
their own.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
 include/linux/cpuidle.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index b0238cba440b..99cbd7a74e9f 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -84,6 +84,7 @@ struct cpuidle_device {
 };
 
 DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
 
 /**
  * cpuidle_get_last_residency - retrieves the last state's residency time
-- 
cgit v1.2.3-71-gd317


From c25dc82899e67a32fdcfb20dd72a37fc236fde2e Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 22 May 2014 17:07:30 -0600
Subject: PCI: Add DMA alias iterator

In a mixed PCI/PCI-X/PCIe topology, bridges can take ownership of
transactions, replacing the original requester ID with their own.
Sometimes we just want to know the resulting device or resulting alias;
other times we want each step in the chain.  This iterator allows either
usage.  When an endpoint is connected via an unbroken chain of PCIe
switches and root ports, it has no alias and its requester ID is visible to
the root bus.  When PCI/X get in the way, we pick up aliases for bridges.

The reason why we potentially care about each step in the path is because
of PCI-X.  PCI-X has the concept of a requester ID, but bridges may or may
not take ownership of various types of transactions.  We therefore leave it
to the consumer of this function to prune out what they don't care about
rather than attempt to flatten the alias ourselves.

Tested-by: George Spelvin <linux@horizon.com>
Tested-by: Pat Erley <pat-lkml@erley.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/search.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h  |  4 +++
 2 files changed, 74 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 4a1b972efe7f..5601cdb8bbb3 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -17,6 +17,76 @@
 DECLARE_RWSEM(pci_bus_sem);
 EXPORT_SYMBOL_GPL(pci_bus_sem);
 
+/*
+ * pci_for_each_dma_alias - Iterate over DMA aliases for a device
+ * @pdev: starting downstream device
+ * @fn: function to call for each alias
+ * @data: opaque data to pass to @fn
+ *
+ * Starting @pdev, walk up the bus calling @fn for each possible alias
+ * of @pdev at the root bus.
+ */
+int pci_for_each_dma_alias(struct pci_dev *pdev,
+			   int (*fn)(struct pci_dev *pdev,
+				     u16 alias, void *data), void *data)
+{
+	struct pci_bus *bus;
+	int ret;
+
+	ret = fn(pdev, PCI_DEVID(pdev->bus->number, pdev->devfn), data);
+	if (ret)
+		return ret;
+
+	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
+		struct pci_dev *tmp;
+
+		/* Skip virtual buses */
+		if (!bus->self)
+			continue;
+
+		tmp = bus->self;
+
+		/*
+		 * PCIe-to-PCI/X bridges alias transactions from downstream
+		 * devices using the subordinate bus number (PCI Express to
+		 * PCI/PCI-X Bridge Spec, rev 1.0, sec 2.3).  For all cases
+		 * where the upstream bus is PCI/X we alias to the bridge
+		 * (there are various conditions in the previous reference
+		 * where the bridge may take ownership of transactions, even
+		 * when the secondary interface is PCI-X).
+		 */
+		if (pci_is_pcie(tmp)) {
+			switch (pci_pcie_type(tmp)) {
+			case PCI_EXP_TYPE_ROOT_PORT:
+			case PCI_EXP_TYPE_UPSTREAM:
+			case PCI_EXP_TYPE_DOWNSTREAM:
+				continue;
+			case PCI_EXP_TYPE_PCI_BRIDGE:
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->subordinate->number,
+						   PCI_DEVFN(0, 0)), data);
+				if (ret)
+					return ret;
+				continue;
+			case PCI_EXP_TYPE_PCIE_BRIDGE:
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->bus->number,
+						   tmp->devfn), data);
+				if (ret)
+					return ret;
+				continue;
+			}
+		} else {
+			ret = fn(tmp, PCI_DEVID(tmp->bus->number, tmp->devfn),
+				 data);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return ret;
+}
+
 /*
  * find the upstream PCIe-to-PCI bridge of a PCI device
  * if the device is PCIE, return NULL
diff --git a/include/linux/pci.h b/include/linux/pci.h
index aab57b4abe7f..14b074bbc841 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1795,6 +1795,10 @@ static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev)
 }
 #endif
 
+int pci_for_each_dma_alias(struct pci_dev *pdev,
+			   int (*fn)(struct pci_dev *pdev,
+				     u16 alias, void *data), void *data);
+
 /**
  * pci_find_upstream_pcie_bridge - find upstream PCIe-to-PCI bridge of a device
  * @pdev: the PCI device
-- 
cgit v1.2.3-71-gd317


From 6b121592f8a3fd2bd0de128637b76a0d0864d993 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 22 May 2014 17:07:36 -0600
Subject: PCI: Convert pci_dev_flags definitions to bit shifts

Convert the pci_dev_flags definitions from decimal constants to bit shifts.
We're only a few entries away from where using the decimal value becomes
cumbersome.  No functional change.

Tested-by: George Spelvin <linux@horizon.com>
Tested-by: Pat Erley <pat-lkml@erley.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 14b074bbc841..545903df00dc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -164,13 +164,13 @@ enum pci_dev_flags {
 	/* INTX_DISABLE in PCI_COMMAND register disables MSI
 	 * generation too.
 	 */
-	PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1,
+	PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) (1 << 0),
 	/* Device configuration is irrevocably lost if disabled into D3 */
-	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
+	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) (1 << 1),
 	/* Provide indication device is assigned by a Virtual Machine Manager */
-	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4,
+	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) (1 << 2),
 	/* Flag for quirk use to store if quirk-specific ACS is enabled */
-	PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) 8,
+	PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3),
 };
 
 enum pci_irq_reroute_variant {
-- 
cgit v1.2.3-71-gd317


From 31c2b8153c58f11ddb80dfd392c16f13c2d709c6 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 22 May 2014 17:07:43 -0600
Subject: PCI: Add support for DMA alias quirks

Some devices are broken and use a requester ID other than their physical
devfn.  Add a byte, using an existing gap in the pci_dev structure, to
store an alternate "alias" devfn.  A bit in the dev_flags tells us when
this is valid.  We then add the alias as one more step in the
pci_for_each_dma_alias() iterator.

Tested-by: George Spelvin <linux@horizon.com>
Tested-by: Pat Erley <pat-lkml@erley.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/search.c | 11 +++++++++++
 include/linux/pci.h  |  3 +++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 5601cdb8bbb3..2c19f3f40621 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -37,6 +37,17 @@ int pci_for_each_dma_alias(struct pci_dev *pdev,
 	if (ret)
 		return ret;
 
+	/*
+	 * If the device is broken and uses an alias requester ID for
+	 * DMA, iterate over that too.
+	 */
+	if (unlikely(pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN)) {
+		ret = fn(pdev, PCI_DEVID(pdev->bus->number,
+					 pdev->dma_alias_devfn), data);
+		if (ret)
+			return ret;
+	}
+
 	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
 		struct pci_dev *tmp;
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 545903df00dc..9d4035c276f4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -171,6 +171,8 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) (1 << 2),
 	/* Flag for quirk use to store if quirk-specific ACS is enabled */
 	PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3),
+	/* Flag to indicate the device uses dma_alias_devfn */
+	PCI_DEV_FLAGS_DMA_ALIAS_DEVFN = (__force pci_dev_flags_t) (1 << 4),
 };
 
 enum pci_irq_reroute_variant {
@@ -268,6 +270,7 @@ struct pci_dev {
 	u8		rom_base_reg;	/* which config register controls the ROM */
 	u8		pin;		/* which interrupt pin this device uses */
 	u16		pcie_flags_reg;	/* cached PCIe Capabilities Register */
+	u8		dma_alias_devfn;/* devfn of DMA alias, if any */
 
 	struct pci_driver *driver;	/* which driver has allocated this device */
 	u64		dma_mask;	/* Mask of the bits of bus address this
-- 
cgit v1.2.3-71-gd317


From a20c93e3160e37ecccc738d8eef085c8507949ed Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Apr 2014 15:07:21 +0200
Subject: nfs: remove ->write_pageio_init from rpc ops

The write_pageio_init method is just a very convoluted way to grab the
right nfs_pageio_ops vector.  The vector to chose is not a choice of
protocol version, but just a pNFS vs MDS I/O choice that can simply be
done inside nfs_pageio_init_write based on the presence of a layout
driver, and a new force_mds flag to the special case of falling back
to MDS I/O on a pNFS-capable volume.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c         |  4 ++--
 fs/nfs/internal.h       |  2 +-
 fs/nfs/nfs3proc.c       |  1 -
 fs/nfs/nfs4proc.c       |  1 -
 fs/nfs/pnfs.c           | 16 +---------------
 fs/nfs/pnfs.h           |  8 --------
 fs/nfs/proc.c           |  1 -
 fs/nfs/write.c          | 21 +++++++++++++--------
 include/linux/nfs_xdr.h |  2 --
 9 files changed, 17 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b8797ae6831f..6a31102b0819 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -564,7 +564,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	dreq->count = 0;
 	get_dreq(dreq);
 
-	NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE,
+	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
 
@@ -874,7 +874,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 	size_t requested_bytes = 0;
 	unsigned long seg;
 
-	NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
+	nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
 	get_dreq(dreq);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index dd8bfc2e2464..8431083de179 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -422,7 +422,7 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
 
 /* write.c */
 extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
-			struct inode *inode, int ioflags,
+			struct inode *inode, int ioflags, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
 extern struct nfs_write_header *nfs_writehdr_alloc(void);
 extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index db60149c4579..e98488053906 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -951,7 +951,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.read_rpc_prepare = nfs3_proc_read_rpc_prepare,
 	.read_done	= nfs3_read_done,
 	.write_setup	= nfs3_proc_write_setup,
-	.write_pageio_init = nfs_pageio_init_write,
 	.write_rpc_prepare = nfs3_proc_write_rpc_prepare,
 	.write_done	= nfs3_write_done,
 	.commit_setup	= nfs3_proc_commit_setup,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 397be39c6dc8..8da0c62966b5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8437,7 +8437,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.read_rpc_prepare = nfs4_proc_read_rpc_prepare,
 	.read_done	= nfs4_read_done,
 	.write_setup	= nfs4_proc_write_setup,
-	.write_pageio_init = pnfs_pageio_init_write,
 	.write_rpc_prepare = nfs4_proc_write_rpc_prepare,
 	.write_done	= nfs4_write_done,
 	.commit_setup	= nfs4_proc_commit_setup,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cb53d450ae32..9edac9f01c2a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1447,20 +1447,6 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
 		nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0);
 }
 
-void
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
-		       int ioflags,
-		       const struct nfs_pgio_completion_ops *compl_ops)
-{
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
-
-	if (ld == NULL)
-		nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
-	else
-		nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags);
-}
-
 bool
 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		     struct nfs_page *req)
@@ -1496,7 +1482,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
 	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops);
+	nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops);
 	pgio.pg_dreq = dreq;
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 023793909778..e9ac8fbaee3d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -182,8 +182,6 @@ void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
 
 void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
 			   const struct nfs_pgio_completion_ops *);
-void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
-			    int, const struct nfs_pgio_completion_ops *);
 
 void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
 void unset_pnfs_layoutdriver(struct nfs_server *);
@@ -467,12 +465,6 @@ static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, str
 	nfs_pageio_init_read(pgio, inode, compl_ops);
 }
 
-static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
-					  const struct nfs_pgio_completion_ops *compl_ops)
-{
-	nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
-}
-
 static inline int
 pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
 		 struct nfs_commit_info *cinfo)
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index e55ce9e8b034..f9cc29590a18 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -739,7 +739,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.read_rpc_prepare = nfs_proc_read_rpc_prepare,
 	.read_done	= nfs_read_done,
 	.write_setup	= nfs_proc_write_setup,
-	.write_pageio_init = nfs_pageio_init_write,
 	.write_rpc_prepare = nfs_proc_write_rpc_prepare,
 	.write_done	= nfs_write_done,
 	.commit_setup	= nfs_proc_commit_setup,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index cd7c651f9b84..ee6d46fde76c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -354,10 +354,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
 	struct nfs_pageio_descriptor pgio;
 	int err;
 
-	NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio,
-							  page->mapping->host,
-							  wb_priority(wbc),
-							  &nfs_async_write_completion_ops);
+	nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
+				false, &nfs_async_write_completion_ops);
 	err = nfs_do_writepage(page, wbc, &pgio);
 	nfs_pageio_complete(&pgio);
 	if (err < 0)
@@ -400,7 +398,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
-	NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops);
+	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
+				&nfs_async_write_completion_ops);
 	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 	nfs_pageio_complete(&pgio);
 
@@ -1282,11 +1281,17 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
 };
 
 void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
-			       struct inode *inode, int ioflags,
+			       struct inode *inode, int ioflags, bool force_mds,
 			       const struct nfs_pgio_completion_ops *compl_ops)
 {
-	nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
-				NFS_SERVER(inode)->wsize, ioflags);
+	struct nfs_server *server = NFS_SERVER(inode);
+	const struct nfs_pageio_ops *pg_ops = &nfs_pageio_write_ops;
+
+#ifdef CONFIG_NFS_V4_1
+	if (server->pnfs_curr_ld && !force_mds)
+		pg_ops = server->pnfs_curr_ld->pg_write_ops;
+#endif
+	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->wsize, ioflags);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6fb5b2335b59..78216f859527 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1471,8 +1471,6 @@ struct nfs_rpc_ops {
 	int	(*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
-	void	(*write_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, int,
-				     const struct nfs_pgio_completion_ops *);
 	int	(*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
 	int	(*write_done)  (struct rpc_task *, struct nfs_write_data *);
 	void	(*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
-- 
cgit v1.2.3-71-gd317


From fab5fc25d230edcc8ee72367e505955a2fae0cac Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Apr 2014 15:07:22 +0200
Subject: nfs: remove ->read_pageio_init from rpc ops

The read_pageio_init method is just a very convoluted way to grab the
right nfs_pageio_ops vector.  The vector to chose is not a choice of
protocol version, but just a pNFS vs MDS I/O choice that can simply be
done inside nfs_pageio_init_read based on the presence of a layout
driver, and a new force_mds flag to the special case of falling back
to MDS I/O on a pNFS-capable volume.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c         |  2 +-
 fs/nfs/internal.h       |  2 +-
 fs/nfs/nfs3proc.c       |  1 -
 fs/nfs/nfs4proc.c       |  1 -
 fs/nfs/pnfs.c           | 15 +--------------
 fs/nfs/pnfs.h           |  9 ---------
 fs/nfs/proc.c           |  1 -
 fs/nfs/read.c           | 19 ++++++++++++++-----
 include/linux/nfs_xdr.h |  2 --
 9 files changed, 17 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 6a31102b0819..bbe688e2cc89 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -424,7 +424,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 	size_t requested_bytes = 0;
 	unsigned long seg;
 
-	NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
+	nfs_pageio_init_read(&desc, dreq->inode, false,
 			     &nfs_direct_read_completion_ops);
 	get_dreq(dreq);
 	desc.pg_dreq = dreq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 8431083de179..98fe618db2aa 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -398,7 +398,7 @@ struct nfs_pgio_completion_ops;
 extern struct nfs_read_header *nfs_readhdr_alloc(void);
 extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
-			struct inode *inode,
+			struct inode *inode, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
 extern int nfs_initiate_read(struct rpc_clnt *clnt,
 			     struct nfs_read_data *data,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e98488053906..d873241a9b3a 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -947,7 +947,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.pathconf	= nfs3_proc_pathconf,
 	.decode_dirent	= nfs3_decode_dirent,
 	.read_setup	= nfs3_proc_read_setup,
-	.read_pageio_init = nfs_pageio_init_read,
 	.read_rpc_prepare = nfs3_proc_read_rpc_prepare,
 	.read_done	= nfs3_read_done,
 	.write_setup	= nfs3_proc_write_setup,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8da0c62966b5..21cd1f2ee35a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8433,7 +8433,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.set_capabilities = nfs4_server_capabilities,
 	.decode_dirent	= nfs4_decode_dirent,
 	.read_setup	= nfs4_proc_read_setup,
-	.read_pageio_init = pnfs_pageio_init_read,
 	.read_rpc_prepare = nfs4_proc_read_rpc_prepare,
 	.read_done	= nfs4_read_done,
 	.write_setup	= nfs4_proc_write_setup,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 9edac9f01c2a..3d5bc2baafd1 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1434,19 +1434,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
 
-void
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
-		      const struct nfs_pgio_completion_ops *compl_ops)
-{
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
-
-	if (ld == NULL)
-		nfs_pageio_init_read(pgio, inode, compl_ops);
-	else
-		nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0);
-}
-
 bool
 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		     struct nfs_page *req)
@@ -1641,7 +1628,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
 	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_read(&pgio, inode, compl_ops);
+	nfs_pageio_init_read(&pgio, inode, true, compl_ops);
 	pgio.pg_dreq = dreq;
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e9ac8fbaee3d..94a9a1834b3f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -180,9 +180,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
 void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
 void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
 
-void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
-			   const struct nfs_pgio_completion_ops *);
-
 void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
@@ -459,12 +456,6 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
 {
 }
 
-static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
-					 const struct nfs_pgio_completion_ops *compl_ops)
-{
-	nfs_pageio_init_read(pgio, inode, compl_ops);
-}
-
 static inline int
 pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
 		 struct nfs_commit_info *cinfo)
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f9cc29590a18..8cc227fcd4d2 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -735,7 +735,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.pathconf	= nfs_proc_pathconf,
 	.decode_dirent	= nfs2_decode_dirent,
 	.read_setup	= nfs_proc_read_setup,
-	.read_pageio_init = nfs_pageio_init_read,
 	.read_rpc_prepare = nfs_proc_read_rpc_prepare,
 	.read_done	= nfs_read_done,
 	.write_setup	= nfs_proc_write_setup,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 411aedda14bb..7f87461be3a9 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -24,6 +24,7 @@
 #include "internal.h"
 #include "iostat.h"
 #include "fscache.h"
+#include "pnfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
@@ -114,11 +115,17 @@ int nfs_return_empty_page(struct page *page)
 }
 
 void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
-			      struct inode *inode,
+			      struct inode *inode, bool force_mds,
 			      const struct nfs_pgio_completion_ops *compl_ops)
 {
-	nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
-			NFS_SERVER(inode)->rsize, 0);
+	struct nfs_server *server = NFS_SERVER(inode);
+	const struct nfs_pageio_ops *pg_ops = &nfs_pageio_read_ops;
+
+#ifdef CONFIG_NFS_V4_1
+	if (server->pnfs_curr_ld && !force_mds)
+		pg_ops = server->pnfs_curr_ld->pg_read_ops;
+#endif
+	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->rsize, 0);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
 
@@ -147,7 +154,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	if (len < PAGE_CACHE_SIZE)
 		zero_user_segment(page, len, PAGE_CACHE_SIZE);
 
-	NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
+	nfs_pageio_init_read(&pgio, inode, false,
+			     &nfs_async_read_completion_ops);
 	nfs_pageio_add_request(&pgio, new);
 	nfs_pageio_complete(&pgio);
 	NFS_I(inode)->read_io += pgio.pg_bytes_written;
@@ -654,7 +662,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	if (ret == 0)
 		goto read_complete; /* all pages were read */
 
-	NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
+	nfs_pageio_init_read(&pgio, inode, false,
+			     &nfs_async_read_completion_ops);
 
 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 78216f859527..3e8fc1fe585b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1466,8 +1466,6 @@ struct nfs_rpc_ops {
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
 	void	(*read_setup)   (struct nfs_read_data *, struct rpc_message *);
-	void	(*read_pageio_init)(struct nfs_pageio_descriptor *, struct inode *,
-				    const struct nfs_pgio_completion_ops *);
 	int	(*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
-- 
cgit v1.2.3-71-gd317


From c8fe16e3f96a9bb95a10cedb19d2be2d2d580940 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 28 May 2014 14:57:02 -0600
Subject: PCI: Add support for PCIe-to-PCI bridge DMA alias quirks

Several PCIe-to-PCI bridges fail to provide a PCIe capability, causing us
to handle them as conventional PCI devices when they really use the
requester ID of the secondary bus.  We need to differentiate these from
PCIe-to-PCI bridges that actually use the conventional PCI ID when a PCIe
capability is not present, such as those found on the root complex of may
Intel chipsets.  Add a dev_flag bit to identify devices to be handled as
standard PCIe-to-PCI bridges.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/search.c | 10 ++++++++--
 include/linux/pci.h  |  2 ++
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 2c19f3f40621..df38f73f091f 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -88,8 +88,14 @@ int pci_for_each_dma_alias(struct pci_dev *pdev,
 				continue;
 			}
 		} else {
-			ret = fn(tmp, PCI_DEVID(tmp->bus->number, tmp->devfn),
-				 data);
+			if (tmp->dev_flags & PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS)
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->subordinate->number,
+						   PCI_DEVFN(0, 0)), data);
+			else
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->bus->number,
+						   tmp->devfn), data);
 			if (ret)
 				return ret;
 		}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9d4035c276f4..85ab35e974a9 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -173,6 +173,8 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3),
 	/* Flag to indicate the device uses dma_alias_devfn */
 	PCI_DEV_FLAGS_DMA_ALIAS_DEVFN = (__force pci_dev_flags_t) (1 << 4),
+	/* Use a PCIe-to-PCI bridge alias even if !pci_is_pcie */
+	PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS = (__force pci_dev_flags_t) (1 << 5),
 };
 
 enum pci_irq_reroute_variant {
-- 
cgit v1.2.3-71-gd317


From 3c6b899c49e5e9c2803b59ee553eddaf69cea7f6 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:24 -0400
Subject: NFS: Create a common argument structure for reads and writes

Reads and writes have very similar arguments.  This patch combines them
together and documents the few fields used only by write.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs2xdr.c        |  8 ++++----
 fs/nfs/nfs3xdr.c        |  8 ++++----
 fs/nfs/nfs4proc.c       |  4 ++--
 fs/nfs/nfs4xdr.c        | 10 ++++++----
 fs/nfs/read.c           |  2 +-
 fs/nfs/write.c          |  2 +-
 include/linux/nfs_xdr.h | 47 +++++++++++++++++++----------------------------
 7 files changed, 37 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 62db136339ea..461cd8bd9401 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -613,7 +613,7 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
  *	};
  */
 static void encode_readargs(struct xdr_stream *xdr,
-			    const struct nfs_readargs *args)
+			    const struct nfs_pgio_args *args)
 {
 	u32 offset = args->offset;
 	u32 count = args->count;
@@ -629,7 +629,7 @@ static void encode_readargs(struct xdr_stream *xdr,
 
 static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nfs_readargs *args)
+				  const struct nfs_pgio_args *args)
 {
 	encode_readargs(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -649,7 +649,7 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
  *	};
  */
 static void encode_writeargs(struct xdr_stream *xdr,
-			     const struct nfs_writeargs *args)
+			     const struct nfs_pgio_args *args)
 {
 	u32 offset = args->offset;
 	u32 count = args->count;
@@ -669,7 +669,7 @@ static void encode_writeargs(struct xdr_stream *xdr,
 
 static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_writeargs *args)
+				   const struct nfs_pgio_args *args)
 {
 	encode_writeargs(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index fa6d72131c19..02f16c212007 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -953,7 +953,7 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
  *	};
  */
 static void encode_read3args(struct xdr_stream *xdr,
-			     const struct nfs_readargs *args)
+			     const struct nfs_pgio_args *args)
 {
 	__be32 *p;
 
@@ -966,7 +966,7 @@ static void encode_read3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_readargs *args)
+				   const struct nfs_pgio_args *args)
 {
 	encode_read3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -992,7 +992,7 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
  *	};
  */
 static void encode_write3args(struct xdr_stream *xdr,
-			      const struct nfs_writeargs *args)
+			      const struct nfs_pgio_args *args)
 {
 	__be32 *p;
 
@@ -1008,7 +1008,7 @@ static void encode_write3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_writeargs *args)
+				    const struct nfs_pgio_args *args)
 {
 	encode_write3args(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 21cd1f2ee35a..4794ca693367 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4055,7 +4055,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
 }
 
 static bool nfs4_read_stateid_changed(struct rpc_task *task,
-		struct nfs_readargs *args)
+		struct nfs_pgio_args *args)
 {
 
 	if (!nfs4_error_stateid_expired(task->tk_status) ||
@@ -4121,7 +4121,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
 }
 
 static bool nfs4_write_stateid_changed(struct rpc_task *task,
-		struct nfs_writeargs *args)
+		struct nfs_pgio_args *args)
 {
 
 	if (!nfs4_error_stateid_expired(task->tk_status) ||
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 73ce8d4fe2c8..032159c36a57 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1556,7 +1556,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 	encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
 }
 
-static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
+static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
+			struct compound_hdr *hdr)
 {
 	__be32 *p;
 
@@ -1701,7 +1702,8 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4
 	encode_nfs4_verifier(xdr, &arg->confirm);
 }
 
-static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
+static void encode_write(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
+			 struct compound_hdr *hdr)
 {
 	__be32 *p;
 
@@ -2451,7 +2453,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a READ request
  */
 static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      struct nfs_readargs *args)
+			      struct nfs_pgio_args *args)
 {
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2513,7 +2515,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a WRITE request
  */
 static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_writeargs *args)
+			       struct nfs_pgio_args *args)
 {
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7f87461be3a9..46d555206023 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -470,7 +470,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 
 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_readargs *argp = &data->args;
+	struct nfs_pgio_args *argp = &data->args;
 	struct nfs_readres *resp = &data->res;
 
 	/* This is a short read! */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ee6d46fde76c..25ba3830ec8b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1388,7 +1388,7 @@ static int nfs_should_remove_suid(const struct inode *inode)
  */
 void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_writeargs	*argp = &data->args;
+	struct nfs_pgio_args	*argp = &data->args;
 	struct nfs_writeres	*resp = &data->res;
 	struct inode		*inode = data->header->inode;
 	int status;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 3e8fc1fe585b..5875001928f9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -491,18 +491,6 @@ struct nfs4_delegreturnres {
 /*
  * Arguments to the read call.
  */
-struct nfs_readargs {
-	struct nfs4_sequence_args	seq_args;
-	struct nfs_fh *		fh;
-	struct nfs_open_context *context;
-	struct nfs_lock_context *lock_context;
-	nfs4_stateid		stateid;
-	__u64			offset;
-	__u32			count;
-	unsigned int		pgbase;
-	struct page **		pages;
-};
-
 struct nfs_readres {
 	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *	fattr;
@@ -513,20 +501,6 @@ struct nfs_readres {
 /*
  * Arguments to the write call.
  */
-struct nfs_writeargs {
-	struct nfs4_sequence_args	seq_args;
-	struct nfs_fh *		fh;
-	struct nfs_open_context *context;
-	struct nfs_lock_context *lock_context;
-	nfs4_stateid		stateid;
-	__u64			offset;
-	__u32			count;
-	enum nfs3_stable_how	stable;
-	unsigned int		pgbase;
-	struct page **		pages;
-	const u32 *		bitmask;
-};
-
 struct nfs_write_verifier {
 	char			data[8];
 };
@@ -544,6 +518,23 @@ struct nfs_writeres {
 	const struct nfs_server *server;
 };
 
+/*
+ * Arguments shared by the read and write call.
+ */
+struct nfs_pgio_args {
+	struct nfs4_sequence_args	seq_args;
+	struct nfs_fh *		fh;
+	struct nfs_open_context *context;
+	struct nfs_lock_context *lock_context;
+	nfs4_stateid		stateid;
+	__u64			offset;
+	__u32			count;
+	unsigned int		pgbase;
+	struct page **		pages;
+	const u32 *		bitmask;	/* used by write */
+	enum nfs3_stable_how	stable;		/* used by write */
+};
+
 /*
  * Arguments to the commit call.
  */
@@ -1269,7 +1260,7 @@ struct nfs_read_data {
 	struct list_head	list;
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;	/* fattr storage */
-	struct nfs_readargs args;
+	struct nfs_pgio_args	args;
 	struct nfs_readres  res;
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
@@ -1321,7 +1312,7 @@ struct nfs_write_data {
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;
 	struct nfs_writeverf	verf;
-	struct nfs_writeargs	args;		/* argument struct */
+	struct nfs_pgio_args	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
-- 
cgit v1.2.3-71-gd317


From 9137bdf3d241fc2cbeb2a8ced51d1546150aa6a1 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:25 -0400
Subject: NFS: Create a common results structure for reads and writes

Reads and writes have very similar results.  This patch combines the two
structs together with comments to show where the differing fields are
used.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs2xdr.c        |  6 +++---
 fs/nfs/nfs3xdr.c        |  8 ++++----
 fs/nfs/nfs4xdr.c        |  9 +++++----
 fs/nfs/read.c           |  2 +-
 fs/nfs/write.c          |  2 +-
 include/linux/nfs_xdr.h | 32 ++++++++++++--------------------
 6 files changed, 26 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 461cd8bd9401..5f61b83f4a1c 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -103,7 +103,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
 /*
  *	typedef opaque	nfsdata<>;
  */
-static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result)
+static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result)
 {
 	u32 recvd, count;
 	__be32 *p;
@@ -857,7 +857,7 @@ out_default:
  *	};
  */
 static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_readres *result)
+				struct nfs_pgio_res *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -878,7 +878,7 @@ out_default:
 }
 
 static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_writeres *result)
+				 struct nfs_pgio_res *result)
 {
 	/* All NFSv2 writes are "file sync" writes */
 	result->verf->committed = NFS_FILE_SYNC;
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 02f16c212007..8f4cbe7f4aa8 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1589,7 +1589,7 @@ out_default:
  *	};
  */
 static int decode_read3resok(struct xdr_stream *xdr,
-			     struct nfs_readres *result)
+			     struct nfs_pgio_res *result)
 {
 	u32 eof, count, ocount, recvd;
 	__be32 *p;
@@ -1625,7 +1625,7 @@ out_overflow:
 }
 
 static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_readres *result)
+				 struct nfs_pgio_res *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1673,7 +1673,7 @@ out_status:
  *	};
  */
 static int decode_write3resok(struct xdr_stream *xdr,
-			      struct nfs_writeres *result)
+			      struct nfs_pgio_res *result)
 {
 	__be32 *p;
 
@@ -1697,7 +1697,7 @@ out_eio:
 }
 
 static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  struct nfs_writeres *result)
+				  struct nfs_pgio_res *result)
 {
 	enum nfs_stat status;
 	int error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 032159c36a57..939ae606cfa4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5087,7 +5087,8 @@ static int decode_putrootfh(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_PUTROOTFH);
 }
 
-static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res)
+static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req,
+		       struct nfs_pgio_res *res)
 {
 	__be32 *p;
 	uint32_t count, eof, recvd;
@@ -5341,7 +5342,7 @@ static int decode_setclientid_confirm(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM);
 }
 
-static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
+static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res)
 {
 	__be32 *p;
 	int status;
@@ -6638,7 +6639,7 @@ out:
  * Decode Read response
  */
 static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs_readres *res)
+			     struct nfs_pgio_res *res)
 {
 	struct compound_hdr hdr;
 	int status;
@@ -6663,7 +6664,7 @@ out:
  * Decode WRITE response
  */
 static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_writeres *res)
+			      struct nfs_pgio_res *res)
 {
 	struct compound_hdr hdr;
 	int status;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 46d555206023..473bba35a2cb 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -471,7 +471,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
 {
 	struct nfs_pgio_args *argp = &data->args;
-	struct nfs_readres *resp = &data->res;
+	struct nfs_pgio_res  *resp = &data->res;
 
 	/* This is a short read! */
 	nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 25ba3830ec8b..d392a70092fe 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1389,7 +1389,7 @@ static int nfs_should_remove_suid(const struct inode *inode)
 void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct nfs_pgio_args	*argp = &data->args;
-	struct nfs_writeres	*resp = &data->res;
+	struct nfs_pgio_res	*resp = &data->res;
 	struct inode		*inode = data->header->inode;
 	int status;
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 5875001928f9..381f832b03c6 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -488,16 +488,6 @@ struct nfs4_delegreturnres {
 	const struct nfs_server *server;
 };
 
-/*
- * Arguments to the read call.
- */
-struct nfs_readres {
-	struct nfs4_sequence_res	seq_res;
-	struct nfs_fattr *	fattr;
-	__u32			count;
-	int                     eof;
-};
-
 /*
  * Arguments to the write call.
  */
@@ -510,14 +500,6 @@ struct nfs_writeverf {
 	enum nfs3_stable_how	committed;
 };
 
-struct nfs_writeres {
-	struct nfs4_sequence_res	seq_res;
-	struct nfs_fattr *	fattr;
-	struct nfs_writeverf *	verf;
-	__u32			count;
-	const struct nfs_server *server;
-};
-
 /*
  * Arguments shared by the read and write call.
  */
@@ -535,6 +517,16 @@ struct nfs_pgio_args {
 	enum nfs3_stable_how	stable;		/* used by write */
 };
 
+struct nfs_pgio_res {
+	struct nfs4_sequence_res	seq_res;
+	struct nfs_fattr *	fattr;
+	__u32			count;
+	int			eof;		/* used by read */
+	struct nfs_writeverf *	verf;		/* used by write */
+	const struct nfs_server *server;	/* used by write */
+
+};
+
 /*
  * Arguments to the commit call.
  */
@@ -1261,7 +1253,7 @@ struct nfs_read_data {
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;	/* fattr storage */
 	struct nfs_pgio_args	args;
-	struct nfs_readres  res;
+	struct nfs_pgio_res	res;
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
 	__u64			mds_offset;
@@ -1313,7 +1305,7 @@ struct nfs_write_data {
 	struct nfs_fattr	fattr;
 	struct nfs_writeverf	verf;
 	struct nfs_pgio_args	args;		/* argument struct */
-	struct nfs_writeres	res;		/* result struct */
+	struct nfs_pgio_res	res;		/* result struct */
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 	__u64			mds_offset;	/* Filelayout dense stripe */
-- 
cgit v1.2.3-71-gd317


From 9c7e1b3d50b56b8d8f6237ed232350b7c6476cd5 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:26 -0400
Subject: NFS: Create a common read and write data struct

At this point, the only difference between nfs_read_data and
nfs_write_data is the write verifier.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/blocklayout/blocklayout.c | 22 ++++++++++----------
 fs/nfs/internal.h                | 10 ++++-----
 fs/nfs/nfs3proc.c                | 12 +++++------
 fs/nfs/nfs4_fs.h                 |  4 ++--
 fs/nfs/nfs4filelayout.c          | 34 +++++++++++++++----------------
 fs/nfs/nfs4proc.c                | 30 +++++++++++++--------------
 fs/nfs/nfs4trace.h               |  8 ++++----
 fs/nfs/objlayout/objio_osd.c     |  6 +++---
 fs/nfs/objlayout/objlayout.c     | 16 +++++++--------
 fs/nfs/objlayout/objlayout.h     |  8 ++++----
 fs/nfs/pnfs.c                    | 26 ++++++++++++------------
 fs/nfs/pnfs.h                    | 10 ++++-----
 fs/nfs/proc.c                    | 12 +++++------
 fs/nfs/read.c                    | 32 ++++++++++++++---------------
 fs/nfs/write.c                   | 36 ++++++++++++++++----------------
 include/linux/nfs_fs.h           |  4 ++--
 include/linux/nfs_xdr.h          | 44 ++++++++++++++--------------------------
 17 files changed, 150 insertions(+), 164 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 65d849bdf77a..206cc68c9694 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -210,7 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
 			SetPageUptodate(bvec->bv_page);
 
 	if (err) {
-		struct nfs_read_data *rdata = par->data;
+		struct nfs_pgio_data *rdata = par->data;
 		struct nfs_pgio_header *header = rdata->header;
 
 		if (!header->pnfs_error)
@@ -224,17 +224,17 @@ static void bl_end_io_read(struct bio *bio, int err)
 static void bl_read_cleanup(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_read_data *rdata;
+	struct nfs_pgio_data *rdata;
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	rdata = container_of(task, struct nfs_read_data, task);
+	rdata = container_of(task, struct nfs_pgio_data, task);
 	pnfs_ld_read_done(rdata);
 }
 
 static void
 bl_end_par_io_read(void *data, int unused)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 
 	rdata->task.tk_status = rdata->header->pnfs_error;
 	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
@@ -242,7 +242,7 @@ bl_end_par_io_read(void *data, int unused)
 }
 
 static enum pnfs_try_status
-bl_read_pagelist(struct nfs_read_data *rdata)
+bl_read_pagelist(struct nfs_pgio_data *rdata)
 {
 	struct nfs_pgio_header *header = rdata->header;
 	int i, hole;
@@ -390,7 +390,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
 	}
 
 	if (unlikely(err)) {
-		struct nfs_write_data *data = par->data;
+		struct nfs_pgio_data *data = par->data;
 		struct nfs_pgio_header *header = data->header;
 
 		if (!header->pnfs_error)
@@ -405,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
 {
 	struct parallel_io *par = bio->bi_private;
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct nfs_write_data *data = par->data;
+	struct nfs_pgio_data *data = par->data;
 	struct nfs_pgio_header *header = data->header;
 
 	if (!uptodate) {
@@ -423,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err)
 static void bl_write_cleanup(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_write_data *wdata;
+	struct nfs_pgio_data *wdata;
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	wdata = container_of(task, struct nfs_write_data, task);
+	wdata = container_of(task, struct nfs_pgio_data, task);
 	if (likely(!wdata->header->pnfs_error)) {
 		/* Marks for LAYOUTCOMMIT */
 		mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
@@ -438,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work)
 /* Called when last of bios associated with a bl_write_pagelist call finishes */
 static void bl_end_par_io_write(void *data, int num_se)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 
 	if (unlikely(wdata->header->pnfs_error)) {
 		bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
@@ -673,7 +673,7 @@ check_page:
 }
 
 static enum pnfs_try_status
-bl_write_pagelist(struct nfs_write_data *wdata, int sync)
+bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
 {
 	struct nfs_pgio_header *header = wdata->header;
 	int i, ret, npg_zero, pg_index, last = 0;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 98fe618db2aa..af01b80412dd 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -401,13 +401,13 @@ extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
 extern int nfs_initiate_read(struct rpc_clnt *clnt,
-			     struct nfs_read_data *data,
+			     struct nfs_pgio_data *data,
 			     const struct rpc_call_ops *call_ops, int flags);
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
 			      struct nfs_pgio_header *hdr);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
-extern void nfs_readdata_release(struct nfs_read_data *rdata);
+extern void nfs_readdata_release(struct nfs_pgio_data *rdata);
 
 /* super.c */
 void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
@@ -429,10 +429,10 @@ extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
 			     struct nfs_pgio_header *hdr);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
-extern void nfs_writedata_release(struct nfs_write_data *wdata);
+extern void nfs_writedata_release(struct nfs_pgio_data *wdata);
 extern void nfs_commit_free(struct nfs_commit_data *p);
 extern int nfs_initiate_write(struct rpc_clnt *clnt,
-			      struct nfs_write_data *data,
+			      struct nfs_pgio_data *data,
 			      const struct rpc_call_ops *call_ops,
 			      int how, int flags);
 extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
@@ -492,7 +492,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
 extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
 
 /* nfs4proc.c */
-extern void __nfs4_read_done_cb(struct nfs_read_data *);
+extern void __nfs4_read_done_cb(struct nfs_pgio_data *);
 extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 			    const struct rpc_timeout *timeparms,
 			    const char *ip_addr);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d873241a9b3a..d235369c3dfb 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -795,7 +795,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return status;
 }
 
-static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 
@@ -807,18 +807,18 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
 	return 0;
 }
 
-static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
 }
 
-static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
 }
 
-static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 
@@ -829,12 +829,12 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
 	return 0;
 }
 
-static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
 }
 
-static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e1d1badbe53c..f63cb87cd730 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -337,7 +337,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
  */
 static inline void
 nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
-			 struct rpc_message *msg, struct nfs_write_data *wdata)
+			 struct rpc_message *msg, struct nfs_pgio_data *wdata)
 {
 	if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
 	    !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
@@ -369,7 +369,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
 
 static inline void
 nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
-			 struct rpc_message *msg, struct nfs_write_data *wdata)
+			 struct rpc_message *msg, struct nfs_pgio_data *wdata)
 {
 }
 #endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index b9a35c05b60f..e6936147ad95 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -84,7 +84,7 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
 	BUG();
 }
 
-static void filelayout_reset_write(struct nfs_write_data *data)
+static void filelayout_reset_write(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	struct rpc_task *task = &data->task;
@@ -105,7 +105,7 @@ static void filelayout_reset_write(struct nfs_write_data *data)
 	}
 }
 
-static void filelayout_reset_read(struct nfs_read_data *data)
+static void filelayout_reset_read(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	struct rpc_task *task = &data->task;
@@ -243,7 +243,7 @@ wait_on_recovery:
 /* NFS_PROTO call done callback routines */
 
 static int filelayout_read_done_cb(struct rpc_task *task,
-				struct nfs_read_data *data)
+				struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	int err;
@@ -270,7 +270,7 @@ static int filelayout_read_done_cb(struct rpc_task *task,
  * rfc5661 is not clear about which credential should be used.
  */
 static void
-filelayout_set_layoutcommit(struct nfs_write_data *wdata)
+filelayout_set_layoutcommit(struct nfs_pgio_data *wdata)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
 
@@ -305,7 +305,7 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
  */
 static void filelayout_read_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 
 	if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
 		rpc_exit(task, -EIO);
@@ -317,7 +317,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
 		rpc_exit(task, 0);
 		return;
 	}
-	rdata->read_done_cb = filelayout_read_done_cb;
+	rdata->pgio_done_cb = filelayout_read_done_cb;
 
 	if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
 			&rdata->args.seq_args,
@@ -331,7 +331,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
 
 static void filelayout_read_call_done(struct rpc_task *task, void *data)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 
 	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
 
@@ -347,14 +347,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
 
 static void filelayout_read_count_stats(struct rpc_task *task, void *data)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 
 	rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
 }
 
 static void filelayout_read_release(void *data)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 	struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
 
 	filelayout_fenceme(lo->plh_inode, lo);
@@ -363,7 +363,7 @@ static void filelayout_read_release(void *data)
 }
 
 static int filelayout_write_done_cb(struct rpc_task *task,
-				struct nfs_write_data *data)
+				struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	int err;
@@ -419,7 +419,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
 
 static void filelayout_write_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 
 	if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
 		rpc_exit(task, -EIO);
@@ -443,7 +443,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
 
 static void filelayout_write_call_done(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 
 	if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
 	    task->tk_status == 0) {
@@ -457,14 +457,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
 
 static void filelayout_write_count_stats(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 
 	rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
 }
 
 static void filelayout_write_release(void *data)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 	struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
 
 	filelayout_fenceme(lo->plh_inode, lo);
@@ -529,7 +529,7 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
 };
 
 static enum pnfs_try_status
-filelayout_read_pagelist(struct nfs_read_data *data)
+filelayout_read_pagelist(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -575,7 +575,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
 
 /* Perform async writes. */
 static enum pnfs_try_status
-filelayout_write_pagelist(struct nfs_write_data *data, int sync)
+filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -600,7 +600,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
 		__func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
 		offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
 
-	data->write_done_cb = filelayout_write_done_cb;
+	data->pgio_done_cb = filelayout_write_done_cb;
 	atomic_inc(&ds->ds_clp->cl_count);
 	data->ds_clp = ds->ds_clp;
 	fh = nfs4_fl_select_ds_fh(lseg, j);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4794ca693367..e793aa91454a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4033,12 +4033,12 @@ static bool nfs4_error_stateid_expired(int err)
 	return false;
 }
 
-void __nfs4_read_done_cb(struct nfs_read_data *data)
+void __nfs4_read_done_cb(struct nfs_pgio_data *data)
 {
 	nfs_invalidate_atime(data->header->inode);
 }
 
-static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct nfs_server *server = NFS_SERVER(data->header->inode);
 
@@ -4068,7 +4068,7 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
 	return true;
 }
 
-static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 
 	dprintk("--> %s\n", __func__);
@@ -4077,19 +4077,19 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
 		return -EAGAIN;
 	if (nfs4_read_stateid_changed(task, &data->args))
 		return -EAGAIN;
-	return data->read_done_cb ? data->read_done_cb(task, data) :
+	return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
 				    nfs4_read_done_cb(task, data);
 }
 
-static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	data->timestamp   = jiffies;
-	data->read_done_cb = nfs4_read_done_cb;
+	data->pgio_done_cb = nfs4_read_done_cb;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
 	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
 }
 
-static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
 			&data->args.seq_args,
@@ -4104,7 +4104,7 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
 	return 0;
 }
 
-static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 	
@@ -4134,18 +4134,18 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
 	return true;
 }
 
-static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	if (!nfs4_sequence_done(task, &data->res.seq_res))
 		return -EAGAIN;
 	if (nfs4_write_stateid_changed(task, &data->args))
 		return -EAGAIN;
-	return data->write_done_cb ? data->write_done_cb(task, data) :
+	return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
 		nfs4_write_done_cb(task, data);
 }
 
 static
-bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
+bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
 {
 	const struct nfs_pgio_header *hdr = data->header;
 
@@ -4158,7 +4158,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
 	return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
 }
 
-static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	struct nfs_server *server = NFS_SERVER(data->header->inode);
 
@@ -4168,8 +4168,8 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
 	} else
 		data->args.bitmask = server->cache_consistency_bitmask;
 
-	if (!data->write_done_cb)
-		data->write_done_cb = nfs4_write_done_cb;
+	if (!data->pgio_done_cb)
+		data->pgio_done_cb = nfs4_write_done_cb;
 	data->res.server = server;
 	data->timestamp   = jiffies;
 
@@ -4177,7 +4177,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
 	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
 }
 
-static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
 			&data->args.seq_args,
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 849cf146db30..0a744f3a86f6 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -932,7 +932,7 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
 
 DECLARE_EVENT_CLASS(nfs4_read_event,
 		TP_PROTO(
-			const struct nfs_read_data *data,
+			const struct nfs_pgio_data *data,
 			int error
 		),
 
@@ -972,7 +972,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
 #define DEFINE_NFS4_READ_EVENT(name) \
 	DEFINE_EVENT(nfs4_read_event, name, \
 			TP_PROTO( \
-				const struct nfs_read_data *data, \
+				const struct nfs_pgio_data *data, \
 				int error \
 			), \
 			TP_ARGS(data, error))
@@ -983,7 +983,7 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
 
 DECLARE_EVENT_CLASS(nfs4_write_event,
 		TP_PROTO(
-			const struct nfs_write_data *data,
+			const struct nfs_pgio_data *data,
 			int error
 		),
 
@@ -1024,7 +1024,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
 #define DEFINE_NFS4_WRITE_EVENT(name) \
 	DEFINE_EVENT(nfs4_write_event, name, \
 			TP_PROTO( \
-				const struct nfs_write_data *data, \
+				const struct nfs_pgio_data *data, \
 				int error \
 			), \
 			TP_ARGS(data, error))
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 5457745dd4f1..426b366b0b33 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -439,7 +439,7 @@ static void _read_done(struct ore_io_state *ios, void *private)
 	objlayout_read_done(&objios->oir, status, objios->sync);
 }
 
-int objio_read_pagelist(struct nfs_read_data *rdata)
+int objio_read_pagelist(struct nfs_pgio_data *rdata)
 {
 	struct nfs_pgio_header *hdr = rdata->header;
 	struct objio_state *objios;
@@ -487,7 +487,7 @@ static void _write_done(struct ore_io_state *ios, void *private)
 static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 {
 	struct objio_state *objios = priv;
-	struct nfs_write_data *wdata = objios->oir.rpcdata;
+	struct nfs_pgio_data *wdata = objios->oir.rpcdata;
 	struct address_space *mapping = wdata->header->inode->i_mapping;
 	pgoff_t index = offset / PAGE_SIZE;
 	struct page *page;
@@ -531,7 +531,7 @@ static const struct _ore_r4w_op _r4w_op = {
 	.put_page = &__r4w_put_page,
 };
 
-int objio_write_pagelist(struct nfs_write_data *wdata, int how)
+int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
 	struct objio_state *objios;
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index e4f9cbfec67b..2f955f671003 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -229,11 +229,11 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
 static void _rpc_read_complete(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_read_data *rdata;
+	struct nfs_pgio_data *rdata;
 
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	rdata = container_of(task, struct nfs_read_data, task);
+	rdata = container_of(task, struct nfs_pgio_data, task);
 
 	pnfs_ld_read_done(rdata);
 }
@@ -241,7 +241,7 @@ static void _rpc_read_complete(struct work_struct *work)
 void
 objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 {
-	struct nfs_read_data *rdata = oir->rpcdata;
+	struct nfs_pgio_data *rdata = oir->rpcdata;
 
 	oir->status = rdata->task.tk_status = status;
 	if (status >= 0)
@@ -266,7 +266,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  * Perform sync or async reads.
  */
 enum pnfs_try_status
-objlayout_read_pagelist(struct nfs_read_data *rdata)
+objlayout_read_pagelist(struct nfs_pgio_data *rdata)
 {
 	struct nfs_pgio_header *hdr = rdata->header;
 	struct inode *inode = hdr->inode;
@@ -312,11 +312,11 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
 static void _rpc_write_complete(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_write_data *wdata;
+	struct nfs_pgio_data *wdata;
 
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	wdata = container_of(task, struct nfs_write_data, task);
+	wdata = container_of(task, struct nfs_pgio_data, task);
 
 	pnfs_ld_write_done(wdata);
 }
@@ -324,7 +324,7 @@ static void _rpc_write_complete(struct work_struct *work)
 void
 objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 {
-	struct nfs_write_data *wdata = oir->rpcdata;
+	struct nfs_pgio_data *wdata = oir->rpcdata;
 
 	oir->status = wdata->task.tk_status = status;
 	if (status >= 0) {
@@ -351,7 +351,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  * Perform sync or async writes.
  */
 enum pnfs_try_status
-objlayout_write_pagelist(struct nfs_write_data *wdata,
+objlayout_write_pagelist(struct nfs_pgio_data *wdata,
 			 int how)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 87aa1dec6120..01e041029a6c 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
  */
 extern void objio_free_result(struct objlayout_io_res *oir);
 
-extern int objio_read_pagelist(struct nfs_read_data *rdata);
-extern int objio_write_pagelist(struct nfs_write_data *wdata, int how);
+extern int objio_read_pagelist(struct nfs_pgio_data *rdata);
+extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how);
 
 /*
  * callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
 extern void objlayout_free_lseg(struct pnfs_layout_segment *);
 
 extern enum pnfs_try_status objlayout_read_pagelist(
-	struct nfs_read_data *);
+	struct nfs_pgio_data *);
 
 extern enum pnfs_try_status objlayout_write_pagelist(
-	struct nfs_write_data *,
+	struct nfs_pgio_data *,
 	int how);
 
 extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3d5bc2baafd1..e9cea3ab7cf9 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1492,7 +1492,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
 }
 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
 
-static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
+static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1511,7 +1511,7 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
 /*
  * Called by non rpc-based layout drivers
  */
-void pnfs_ld_write_done(struct nfs_write_data *data)
+void pnfs_ld_write_done(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1527,7 +1527,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
 static void
 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
-		struct nfs_write_data *data)
+		struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1540,7 +1540,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
 }
 
 static enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_write_data *wdata,
+pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
 			const struct rpc_call_ops *call_ops,
 			struct pnfs_layout_segment *lseg,
 			int how)
@@ -1564,7 +1564,7 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
 static void
 pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
 {
-	struct nfs_write_data *data;
+	struct nfs_pgio_data *data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
 
@@ -1572,7 +1572,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
 	while (!list_empty(head)) {
 		enum pnfs_try_status trypnfs;
 
-		data = list_first_entry(head, struct nfs_write_data, list);
+		data = list_first_entry(head, struct nfs_pgio_data, list);
 		list_del_init(&data->list);
 
 		trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
@@ -1647,7 +1647,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
 }
 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
 
-static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1666,7 +1666,7 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
 /*
  * Called by non rpc-based layout drivers
  */
-void pnfs_ld_read_done(struct nfs_read_data *data)
+void pnfs_ld_read_done(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1682,7 +1682,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
 
 static void
 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
-		struct nfs_read_data *data)
+		struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1698,7 +1698,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
  * Call the appropriate parallel I/O subsystem read function.
  */
 static enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_read_data *rdata,
+pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
 		       const struct rpc_call_ops *call_ops,
 		       struct pnfs_layout_segment *lseg)
 {
@@ -1722,7 +1722,7 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
 static void
 pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
 {
-	struct nfs_read_data *data;
+	struct nfs_pgio_data *data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
 
@@ -1730,7 +1730,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
 	while (!list_empty(head)) {
 		enum pnfs_try_status trypnfs;
 
-		data = list_first_entry(head, struct nfs_read_data, list);
+		data = list_first_entry(head, struct nfs_pgio_data, list);
 		list_del_init(&data->list);
 
 		trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
@@ -1821,7 +1821,7 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
 
 void
-pnfs_set_layoutcommit(struct nfs_write_data *wdata)
+pnfs_set_layoutcommit(struct nfs_pgio_data *wdata)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
 	struct inode *inode = hdr->inode;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 94a9a1834b3f..0031267d7492 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type {
 	 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
 	 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
 	 */
-	enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
-	enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
+	enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data);
+	enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how);
 
 	void (*free_deviceid_node) (struct nfs4_deviceid_node *);
 
@@ -212,13 +212,13 @@ bool pnfs_roc(struct inode *ino);
 void pnfs_roc_release(struct inode *ino);
 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
-void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
+void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
 int pnfs_commit_and_return_layout(struct inode *);
-void pnfs_ld_write_done(struct nfs_write_data *);
-void pnfs_ld_read_done(struct nfs_read_data *);
+void pnfs_ld_write_done(struct nfs_pgio_data *);
+void pnfs_ld_read_done(struct nfs_pgio_data *);
 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
 					       struct nfs_open_context *ctx,
 					       loff_t pos,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 8cc227fcd4d2..c54829eb2156 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -578,7 +578,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return 0;
 }
 
-static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 
@@ -594,18 +594,18 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
 	return 0;
 }
 
-static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
 }
 
-static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
 }
 
-static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 
@@ -614,14 +614,14 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 	return 0;
 }
 
-static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
 	data->args.stable = NFS_FILE_SYNC;
 	msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
 }
 
-static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 473bba35a2cb..9e426cc179ed 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -51,10 +51,10 @@ struct nfs_read_header *nfs_readhdr_alloc(void)
 }
 EXPORT_SYMBOL_GPL(nfs_readhdr_alloc);
 
-static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
+static struct nfs_pgio_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
 						unsigned int pagecount)
 {
-	struct nfs_read_data *data, *prealloc;
+	struct nfs_pgio_data *data, *prealloc;
 
 	prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
 	if (prealloc->header == NULL)
@@ -84,7 +84,7 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr)
 }
 EXPORT_SYMBOL_GPL(nfs_readhdr_free);
 
-void nfs_readdata_release(struct nfs_read_data *rdata)
+void nfs_readdata_release(struct nfs_pgio_data *rdata)
 {
 	struct nfs_pgio_header *hdr = rdata->header;
 	struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
@@ -212,7 +212,7 @@ out:
 }
 
 int nfs_initiate_read(struct rpc_clnt *clnt,
-		      struct nfs_read_data *data,
+		      struct nfs_pgio_data *data,
 		      const struct rpc_call_ops *call_ops, int flags)
 {
 	struct inode *inode = data->header->inode;
@@ -255,7 +255,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
 /*
  * Set up the NFS read request struct
  */
-static void nfs_read_rpcsetup(struct nfs_read_data *data,
+static void nfs_read_rpcsetup(struct nfs_pgio_data *data,
 		unsigned int count, unsigned int offset)
 {
 	struct nfs_page *req = data->header->req;
@@ -274,7 +274,7 @@ static void nfs_read_rpcsetup(struct nfs_read_data *data,
 	nfs_fattr_init(&data->fattr);
 }
 
-static int nfs_do_read(struct nfs_read_data *data,
+static int nfs_do_read(struct nfs_pgio_data *data,
 		const struct rpc_call_ops *call_ops)
 {
 	struct inode *inode = data->header->inode;
@@ -286,13 +286,13 @@ static int
 nfs_do_multiple_reads(struct list_head *head,
 		const struct rpc_call_ops *call_ops)
 {
-	struct nfs_read_data *data;
+	struct nfs_pgio_data *data;
 	int ret = 0;
 
 	while (!list_empty(head)) {
 		int ret2;
 
-		data = list_first_entry(head, struct nfs_read_data, list);
+		data = list_first_entry(head, struct nfs_pgio_data, list);
 		list_del_init(&data->list);
 
 		ret2 = nfs_do_read(data, call_ops);
@@ -324,8 +324,8 @@ static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
 {
 	set_bit(NFS_IOHDR_REDO, &hdr->flags);
 	while (!list_empty(&hdr->rpc_list)) {
-		struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
-				struct nfs_read_data, list);
+		struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list,
+				struct nfs_pgio_data, list);
 		list_del(&data->list);
 		nfs_readdata_release(data);
 	}
@@ -350,7 +350,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_page *req = hdr->req;
 	struct page *page = req->wb_page;
-	struct nfs_read_data *data;
+	struct nfs_pgio_data *data;
 	size_t rsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
 
@@ -382,7 +382,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_page		*req;
 	struct page		**pages;
-	struct nfs_read_data    *data;
+	struct nfs_pgio_data	*data;
 	struct list_head *head = &desc->pg_list;
 
 	data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
@@ -447,7 +447,7 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
+int nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 	int status;
@@ -468,7 +468,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 	return 0;
 }
 
-static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_args *argp = &data->args;
 	struct nfs_pgio_res  *resp = &data->res;
@@ -490,7 +490,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
 
 static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
 {
-	struct nfs_read_data *data = calldata;
+	struct nfs_pgio_data *data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 
 	/* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
@@ -520,7 +520,7 @@ static void nfs_readpage_release_common(void *calldata)
 
 void nfs_read_prepare(struct rpc_task *task, void *calldata)
 {
-	struct nfs_read_data *data = calldata;
+	struct nfs_pgio_data *data = calldata;
 	int err;
 	err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
 	if (err)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d392a70092fe..3a2fc5c4c79a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -88,10 +88,10 @@ struct nfs_write_header *nfs_writehdr_alloc(void)
 }
 EXPORT_SYMBOL_GPL(nfs_writehdr_alloc);
 
-static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
+static struct nfs_pgio_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
 						  unsigned int pagecount)
 {
-	struct nfs_write_data *data, *prealloc;
+	struct nfs_pgio_data *data, *prealloc;
 
 	prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
 	if (prealloc->header == NULL)
@@ -120,7 +120,7 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 }
 EXPORT_SYMBOL_GPL(nfs_writehdr_free);
 
-void nfs_writedata_release(struct nfs_write_data *wdata)
+void nfs_writedata_release(struct nfs_pgio_data *wdata)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
 	struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
@@ -582,7 +582,7 @@ nfs_clear_request_commit(struct nfs_page *req)
 }
 
 static inline
-int nfs_write_need_commit(struct nfs_write_data *data)
+int nfs_write_need_commit(struct nfs_pgio_data *data)
 {
 	if (data->verf.committed == NFS_DATA_SYNC)
 		return data->header->lseg == NULL;
@@ -613,7 +613,7 @@ nfs_clear_request_commit(struct nfs_page *req)
 }
 
 static inline
-int nfs_write_need_commit(struct nfs_write_data *data)
+int nfs_write_need_commit(struct nfs_pgio_data *data)
 {
 	return 0;
 }
@@ -990,7 +990,7 @@ static int flush_task_priority(int how)
 }
 
 int nfs_initiate_write(struct rpc_clnt *clnt,
-		       struct nfs_write_data *data,
+		       struct nfs_pgio_data *data,
 		       const struct rpc_call_ops *call_ops,
 		       int how, int flags)
 {
@@ -1047,7 +1047,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
 /*
  * Set up the argument/result storage required for the RPC call.
  */
-static void nfs_write_rpcsetup(struct nfs_write_data *data,
+static void nfs_write_rpcsetup(struct nfs_pgio_data *data,
 		unsigned int count, unsigned int offset,
 		int how, struct nfs_commit_info *cinfo)
 {
@@ -1082,7 +1082,7 @@ static void nfs_write_rpcsetup(struct nfs_write_data *data,
 	nfs_fattr_init(&data->fattr);
 }
 
-static int nfs_do_write(struct nfs_write_data *data,
+static int nfs_do_write(struct nfs_pgio_data *data,
 		const struct rpc_call_ops *call_ops,
 		int how)
 {
@@ -1095,13 +1095,13 @@ static int nfs_do_multiple_writes(struct list_head *head,
 		const struct rpc_call_ops *call_ops,
 		int how)
 {
-	struct nfs_write_data *data;
+	struct nfs_pgio_data *data;
 	int ret = 0;
 
 	while (!list_empty(head)) {
 		int ret2;
 
-		data = list_first_entry(head, struct nfs_write_data, list);
+		data = list_first_entry(head, struct nfs_pgio_data, list);
 		list_del_init(&data->list);
 		
 		ret2 = nfs_do_write(data, call_ops, how);
@@ -1144,8 +1144,8 @@ static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
 {
 	set_bit(NFS_IOHDR_REDO, &hdr->flags);
 	while (!list_empty(&hdr->rpc_list)) {
-		struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
-				struct nfs_write_data, list);
+		struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list,
+				struct nfs_pgio_data, list);
 		list_del(&data->list);
 		nfs_writedata_release(data);
 	}
@@ -1161,7 +1161,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_page *req = hdr->req;
 	struct page *page = req->wb_page;
-	struct nfs_write_data *data;
+	struct nfs_pgio_data *data;
 	size_t wsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
 	int requests = 0;
@@ -1211,7 +1211,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_page		*req;
 	struct page		**pages;
-	struct nfs_write_data	*data;
+	struct nfs_pgio_data	*data;
 	struct list_head *head = &desc->pg_list;
 	struct nfs_commit_info cinfo;
 
@@ -1305,7 +1305,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
 
 void nfs_write_prepare(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data = calldata;
+	struct nfs_pgio_data *data = calldata;
 	int err;
 	err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
 	if (err)
@@ -1328,14 +1328,14 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
  */
 static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data	*data = calldata;
+	struct nfs_pgio_data	*data = calldata;
 
 	nfs_writeback_done(task, data);
 }
 
 static void nfs_writeback_release_common(void *calldata)
 {
-	struct nfs_write_data	*data = calldata;
+	struct nfs_pgio_data	*data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 	int status = data->task.tk_status;
 
@@ -1386,7 +1386,7 @@ static int nfs_should_remove_suid(const struct inode *inode)
 /*
  * This function is called when the WRITE call is complete.
  */
-void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
+void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_args	*argp = &data->args;
 	struct nfs_pgio_res	*resp = &data->res;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fa6918b0f829..7e0db561d829 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -520,7 +520,7 @@ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
 extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
-extern void nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
+extern void nfs_writeback_done(struct rpc_task *, struct nfs_pgio_data *);
 
 /*
  * Try to write back everything synchronously (but check the
@@ -553,7 +553,7 @@ nfs_have_writebacks(struct inode *inode)
 extern int  nfs_readpage(struct file *, struct page *);
 extern int  nfs_readpages(struct file *, struct address_space *,
 		struct list_head *, unsigned);
-extern int  nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
+extern int  nfs_readpage_result(struct rpc_task *, struct nfs_pgio_data *);
 extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
 			       struct page *);
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 381f832b03c6..64f6f1491c0d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1247,20 +1247,6 @@ struct nfs_page_array {
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
 };
 
-struct nfs_read_data {
-	struct nfs_pgio_header	*header;
-	struct list_head	list;
-	struct rpc_task		task;
-	struct nfs_fattr	fattr;	/* fattr storage */
-	struct nfs_pgio_args	args;
-	struct nfs_pgio_res	res;
-	unsigned long		timestamp;	/* For lease renewal */
-	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
-	__u64			mds_offset;
-	struct nfs_page_array	pages;
-	struct nfs_client	*ds_clp;	/* pNFS data server */
-};
-
 /* used as flag bits in nfs_pgio_header */
 enum {
 	NFS_IOHDR_ERROR = 0,
@@ -1293,29 +1279,29 @@ struct nfs_pgio_header {
 	unsigned long		flags;
 };
 
-struct nfs_read_header {
-	struct nfs_pgio_header	header;
-	struct nfs_read_data	rpc_data;
-};
-
-struct nfs_write_data {
+struct nfs_pgio_data {
 	struct nfs_pgio_header	*header;
 	struct list_head	list;
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;
-	struct nfs_writeverf	verf;
+	struct nfs_writeverf	verf;		/* Used for writes */
 	struct nfs_pgio_args	args;		/* argument struct */
 	struct nfs_pgio_res	res;		/* result struct */
 	unsigned long		timestamp;	/* For lease renewal */
-	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
+	int (*pgio_done_cb) (struct rpc_task *task, struct nfs_pgio_data *data);
 	__u64			mds_offset;	/* Filelayout dense stripe */
 	struct nfs_page_array	pages;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 };
 
+struct nfs_read_header {
+	struct nfs_pgio_header	header;
+	struct nfs_pgio_data	rpc_data;
+};
+
 struct nfs_write_header {
 	struct nfs_pgio_header	header;
-	struct nfs_write_data	rpc_data;
+	struct nfs_pgio_data	rpc_data;
 	struct nfs_writeverf	verf;
 };
 
@@ -1448,12 +1434,12 @@ struct nfs_rpc_ops {
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
-	void	(*read_setup)   (struct nfs_read_data *, struct rpc_message *);
-	int	(*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *);
-	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
-	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
-	int	(*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
-	int	(*write_done)  (struct rpc_task *, struct nfs_write_data *);
+	void	(*read_setup)   (struct nfs_pgio_data *, struct rpc_message *);
+	int	(*read_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
+	int	(*read_done)  (struct rpc_task *, struct nfs_pgio_data *);
+	void	(*write_setup)  (struct nfs_pgio_data *, struct rpc_message *);
+	int	(*write_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
+	int	(*write_done)  (struct rpc_task *, struct nfs_pgio_data *);
 	void	(*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
 	void	(*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
 	int	(*commit_done) (struct rpc_task *, struct nfs_commit_data *);
-- 
cgit v1.2.3-71-gd317


From c0752cdfbbb691cfe98812f7aed8ce1e766823c4 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:27 -0400
Subject: NFS: Create a common read and write header struct

The only difference is the write verifier field, but we can keep that
for a little bit longer.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h       |  4 ++--
 fs/nfs/pnfs.c           |  4 ++--
 fs/nfs/read.c           | 14 +++++++-------
 fs/nfs/write.c          | 14 +++++++-------
 include/linux/nfs_xdr.h |  7 +------
 5 files changed, 19 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index af01b80412dd..b0e7a41d14a8 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -395,7 +395,7 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool
 
 struct nfs_pgio_completion_ops;
 /* read.c */
-extern struct nfs_read_header *nfs_readhdr_alloc(void);
+extern struct nfs_rw_header *nfs_readhdr_alloc(void);
 extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, bool force_mds,
@@ -424,7 +424,7 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
 extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, int ioflags, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
-extern struct nfs_write_header *nfs_writehdr_alloc(void);
+extern struct nfs_rw_header *nfs_writehdr_alloc(void);
 extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
 			     struct nfs_pgio_header *hdr);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e9cea3ab7cf9..43cfe11aa1a4 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1592,7 +1592,7 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
 int
 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_write_header *whdr;
+	struct nfs_rw_header *whdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
@@ -1750,7 +1750,7 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
 int
 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_read_header *rhdr;
+	struct nfs_rw_header *rhdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 9e426cc179ed..d29ca3673694 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -34,9 +34,9 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
-struct nfs_read_header *nfs_readhdr_alloc(void)
+struct nfs_rw_header *nfs_readhdr_alloc(void)
 {
-	struct nfs_read_header *rhdr;
+	struct nfs_rw_header *rhdr;
 
 	rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
 	if (rhdr) {
@@ -56,7 +56,7 @@ static struct nfs_pgio_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
 {
 	struct nfs_pgio_data *data, *prealloc;
 
-	prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
+	prealloc = &container_of(hdr, struct nfs_rw_header, header)->rpc_data;
 	if (prealloc->header == NULL)
 		data = prealloc;
 	else
@@ -78,7 +78,7 @@ out:
 
 void nfs_readhdr_free(struct nfs_pgio_header *hdr)
 {
-	struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
+	struct nfs_rw_header *rhdr = container_of(hdr, struct nfs_rw_header, header);
 
 	kmem_cache_free(nfs_rdata_cachep, rhdr);
 }
@@ -87,7 +87,7 @@ EXPORT_SYMBOL_GPL(nfs_readhdr_free);
 void nfs_readdata_release(struct nfs_pgio_data *rdata)
 {
 	struct nfs_pgio_header *hdr = rdata->header;
-	struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
+	struct nfs_rw_header *read_header = container_of(hdr, struct nfs_rw_header, header);
 
 	put_nfs_open_context(rdata->args.context);
 	if (rdata->pages.pagevec != rdata->pages.page_array)
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pagein);
 
 static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_read_header *rhdr;
+	struct nfs_rw_header *rhdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
@@ -680,7 +680,7 @@ out:
 int __init nfs_init_readpagecache(void)
 {
 	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
-					     sizeof(struct nfs_read_header),
+					     sizeof(struct nfs_rw_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3a2fc5c4c79a..37c4c988519c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -70,9 +70,9 @@ void nfs_commit_free(struct nfs_commit_data *p)
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
 
-struct nfs_write_header *nfs_writehdr_alloc(void)
+struct nfs_rw_header *nfs_writehdr_alloc(void)
 {
-	struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
+	struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
 
 	if (p) {
 		struct nfs_pgio_header *hdr = &p->header;
@@ -93,7 +93,7 @@ static struct nfs_pgio_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
 {
 	struct nfs_pgio_data *data, *prealloc;
 
-	prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
+	prealloc = &container_of(hdr, struct nfs_rw_header, header)->rpc_data;
 	if (prealloc->header == NULL)
 		data = prealloc;
 	else
@@ -115,7 +115,7 @@ out:
 
 void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 {
-	struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
+	struct nfs_rw_header *whdr = container_of(hdr, struct nfs_rw_header, header);
 	mempool_free(whdr, nfs_wdata_mempool);
 }
 EXPORT_SYMBOL_GPL(nfs_writehdr_free);
@@ -123,7 +123,7 @@ EXPORT_SYMBOL_GPL(nfs_writehdr_free);
 void nfs_writedata_release(struct nfs_pgio_data *wdata)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
-	struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
+	struct nfs_rw_header *write_header = container_of(hdr, struct nfs_rw_header, header);
 
 	put_nfs_open_context(wdata->args.context);
 	if (wdata->pages.pagevec != wdata->pages.page_array)
@@ -1253,7 +1253,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_flush);
 
 static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_write_header *whdr;
+	struct nfs_rw_header *whdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
@@ -1910,7 +1910,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 int __init nfs_init_writepagecache(void)
 {
 	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
-					     sizeof(struct nfs_write_header),
+					     sizeof(struct nfs_rw_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_wdata_cachep == NULL)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 64f6f1491c0d..2d34e0dc1870 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1294,12 +1294,7 @@ struct nfs_pgio_data {
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 };
 
-struct nfs_read_header {
-	struct nfs_pgio_header	header;
-	struct nfs_pgio_data	rpc_data;
-};
-
-struct nfs_write_header {
+struct nfs_rw_header {
 	struct nfs_pgio_header	header;
 	struct nfs_pgio_data	rpc_data;
 	struct nfs_writeverf	verf;
-- 
cgit v1.2.3-71-gd317


From f79d06f544a797d75cbf5256a5d06c4b3d2759cc Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:28 -0400
Subject: NFS: Move the write verifier into the nfs_pgio_header

The header had a pointer to the verifier that was set from the old write
data struct.  We don't need to keep the pointer around now that we have
shared structures.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c         | 4 ++--
 fs/nfs/write.c          | 7 +++----
 include/linux/nfs_xdr.h | 3 +--
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bbe688e2cc89..164b0167677b 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -813,12 +813,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
 				bit = NFS_IOHDR_NEED_RESCHED;
 			else if (dreq->flags == 0) {
-				memcpy(&dreq->verf, hdr->verf,
+				memcpy(&dreq->verf, &hdr->verf,
 				       sizeof(dreq->verf));
 				bit = NFS_IOHDR_NEED_COMMIT;
 				dreq->flags = NFS_ODIRECT_DO_COMMIT;
 			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
-				if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {
+				if (memcmp(&dreq->verf, &hdr->verf, sizeof(dreq->verf))) {
 					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 					bit = NFS_IOHDR_NEED_RESCHED;
 				} else
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 37c4c988519c..321a791c72bf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -82,7 +82,6 @@ struct nfs_rw_header *nfs_writehdr_alloc(void)
 		INIT_LIST_HEAD(&hdr->rpc_list);
 		spin_lock_init(&hdr->lock);
 		atomic_set(&hdr->refcnt, 0);
-		hdr->verf = &p->verf;
 	}
 	return p;
 }
@@ -644,7 +643,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 			goto next;
 		}
 		if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
-			memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf));
+			memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
 			goto next;
 		}
@@ -1344,8 +1343,8 @@ static void nfs_writeback_release_common(void *calldata)
 		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
 			; /* Do nothing */
 		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
-			memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf));
-		else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf)))
+			memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
+		else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
 			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
 		spin_unlock(&hdr->lock);
 	}
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2d34e0dc1870..965c2aa6b33f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1263,7 +1263,7 @@ struct nfs_pgio_header {
 	struct list_head	rpc_list;
 	atomic_t		refcnt;
 	struct nfs_page		*req;
-	struct nfs_writeverf	*verf;
+	struct nfs_writeverf	verf;		/* Used for writes */
 	struct pnfs_layout_segment *lseg;
 	loff_t			io_start;
 	const struct rpc_call_ops *mds_ops;
@@ -1297,7 +1297,6 @@ struct nfs_pgio_data {
 struct nfs_rw_header {
 	struct nfs_pgio_header	header;
 	struct nfs_pgio_data	rpc_data;
-	struct nfs_writeverf	verf;
 };
 
 struct nfs_mds_commit_info {
-- 
cgit v1.2.3-71-gd317


From 4a0de55c565a36cac8422b76a948c4634a90781e Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:30 -0400
Subject: NFS: Create a common rw_header_alloc and rw_header_free function

I create a new struct nfs_rw_ops to decide the differences between reads
and writes.  This struct will be set when initializing a new
nfs_pgio_descriptor, and then passed on to the nfs_rw_header when a new
header is allocated.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h        |  6 ++----
 fs/nfs/pagelist.c        | 33 +++++++++++++++++++++++++++++++++
 fs/nfs/pnfs.c            |  8 ++++----
 fs/nfs/read.c            | 34 +++++++++++++---------------------
 fs/nfs/write.c           | 28 ++++++++++++----------------
 include/linux/nfs_page.h |  7 +++++++
 include/linux/nfs_xdr.h  |  1 +
 7 files changed, 72 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5ddc142c5062..9d6a40eae11c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -237,6 +237,8 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
 int nfs_iocounter_wait(struct nfs_io_counter *c);
 
+struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
+void nfs_rw_header_free(struct nfs_pgio_header *);
 struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int);
 void nfs_pgio_data_release(struct nfs_pgio_data *);
 
@@ -397,8 +399,6 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool
 
 struct nfs_pgio_completion_ops;
 /* read.c */
-extern struct nfs_rw_header *nfs_readhdr_alloc(void);
-extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
@@ -425,8 +425,6 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
 extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, int ioflags, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
-extern struct nfs_rw_header *nfs_writehdr_alloc(void);
-extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
 			     struct nfs_pgio_header *hdr);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a98ccf722d7b..ca356fe0836b 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -300,6 +300,37 @@ static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr)
 	return container_of(hdr, struct nfs_rw_header, header);
 }
 
+/**
+ * nfs_rw_header_alloc - Allocate a header for a read or write
+ * @ops: Read or write function vector
+ */
+struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
+{
+	struct nfs_rw_header *header = ops->rw_alloc_header();
+
+	if (header) {
+		struct nfs_pgio_header *hdr = &header->header;
+
+		INIT_LIST_HEAD(&hdr->pages);
+		INIT_LIST_HEAD(&hdr->rpc_list);
+		spin_lock_init(&hdr->lock);
+		atomic_set(&hdr->refcnt, 0);
+		hdr->rw_ops = ops;
+	}
+	return header;
+}
+EXPORT_SYMBOL_GPL(nfs_rw_header_alloc);
+
+/*
+ * nfs_rw_header_free - Free a read or write header
+ * @hdr: The header to free
+ */
+void nfs_rw_header_free(struct nfs_pgio_header *hdr)
+{
+	hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr));
+}
+EXPORT_SYMBOL_GPL(nfs_rw_header_free);
+
 /**
  * nfs_pgio_data_alloc - Allocate pageio data
  * @hdr: The header making a request
@@ -367,6 +398,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 		     struct inode *inode,
 		     const struct nfs_pageio_ops *pg_ops,
 		     const struct nfs_pgio_completion_ops *compl_ops,
+		     const struct nfs_rw_ops *rw_ops,
 		     size_t bsize,
 		     int io_flags)
 {
@@ -380,6 +412,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_inode = inode;
 	desc->pg_ops = pg_ops;
 	desc->pg_completion_ops = compl_ops;
+	desc->pg_rw_ops = rw_ops;
 	desc->pg_ioflags = io_flags;
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e192ba69a7d4..54c84c128b2b 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1585,7 +1585,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
 {
 	pnfs_put_lseg(hdr->lseg);
-	nfs_writehdr_free(hdr);
+	nfs_rw_header_free(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
 
@@ -1596,7 +1596,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	whdr = nfs_writehdr_alloc();
+	whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
 	if (!whdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		pnfs_put_lseg(desc->pg_lseg);
@@ -1743,7 +1743,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
 {
 	pnfs_put_lseg(hdr->lseg);
-	nfs_readhdr_free(hdr);
+	nfs_rw_header_free(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
 
@@ -1754,7 +1754,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	rhdr = nfs_readhdr_alloc();
+	rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
 	if (!rhdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		ret = -ENOMEM;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index ab4c1a5b5fbd..4cf3577bd54e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,33 +31,19 @@
 static const struct nfs_pageio_ops nfs_pageio_read_ops;
 static const struct rpc_call_ops nfs_read_common_ops;
 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
+static const struct nfs_rw_ops nfs_rw_read_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
-struct nfs_rw_header *nfs_readhdr_alloc(void)
+static struct nfs_rw_header *nfs_readhdr_alloc(void)
 {
-	struct nfs_rw_header *rhdr;
-
-	rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
-	if (rhdr) {
-		struct nfs_pgio_header *hdr = &rhdr->header;
-
-		INIT_LIST_HEAD(&hdr->pages);
-		INIT_LIST_HEAD(&hdr->rpc_list);
-		spin_lock_init(&hdr->lock);
-		atomic_set(&hdr->refcnt, 0);
-	}
-	return rhdr;
+	return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
 }
-EXPORT_SYMBOL_GPL(nfs_readhdr_alloc);
 
-void nfs_readhdr_free(struct nfs_pgio_header *hdr)
+static void nfs_readhdr_free(struct nfs_rw_header *rhdr)
 {
-	struct nfs_rw_header *rhdr = container_of(hdr, struct nfs_rw_header, header);
-
 	kmem_cache_free(nfs_rdata_cachep, rhdr);
 }
-EXPORT_SYMBOL_GPL(nfs_readhdr_free);
 
 static
 int nfs_return_empty_page(struct page *page)
@@ -79,7 +65,8 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 	if (server->pnfs_curr_ld && !force_mds)
 		pg_ops = server->pnfs_curr_ld->pg_read_ops;
 #endif
-	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->rsize, 0);
+	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
+			server->rsize, 0);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
 
@@ -375,13 +362,13 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	rhdr = nfs_readhdr_alloc();
+	rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
 	if (!rhdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		return -ENOMEM;
 	}
 	hdr = &rhdr->header;
-	nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
+	nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
 	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_pagein(desc, hdr);
 	if (ret == 0)
@@ -647,3 +634,8 @@ void nfs_destroy_readpagecache(void)
 {
 	kmem_cache_destroy(nfs_rdata_cachep);
 }
+
+static const struct nfs_rw_ops nfs_rw_read_ops = {
+	.rw_alloc_header	= nfs_readhdr_alloc,
+	.rw_free_header		= nfs_readhdr_free,
+};
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0dc4d6a28bd0..9c5cde38da45 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_write_common_ops;
 static const struct rpc_call_ops nfs_commit_ops;
 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
 static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
+static const struct nfs_rw_ops nfs_rw_write_ops;
 
 static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
@@ -70,29 +71,19 @@ void nfs_commit_free(struct nfs_commit_data *p)
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
 
-struct nfs_rw_header *nfs_writehdr_alloc(void)
+static struct nfs_rw_header *nfs_writehdr_alloc(void)
 {
 	struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
 
-	if (p) {
-		struct nfs_pgio_header *hdr = &p->header;
-
+	if (p)
 		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&hdr->pages);
-		INIT_LIST_HEAD(&hdr->rpc_list);
-		spin_lock_init(&hdr->lock);
-		atomic_set(&hdr->refcnt, 0);
-	}
 	return p;
 }
-EXPORT_SYMBOL_GPL(nfs_writehdr_alloc);
 
-void nfs_writehdr_free(struct nfs_pgio_header *hdr)
+static void nfs_writehdr_free(struct nfs_rw_header *whdr)
 {
-	struct nfs_rw_header *whdr = container_of(hdr, struct nfs_rw_header, header);
 	mempool_free(whdr, nfs_wdata_mempool);
 }
-EXPORT_SYMBOL_GPL(nfs_writehdr_free);
 
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
 {
@@ -1210,13 +1201,13 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	whdr = nfs_writehdr_alloc();
+	whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
 	if (!whdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		return -ENOMEM;
 	}
 	hdr = &whdr->header;
-	nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
+	nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
 	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_flush(desc, hdr);
 	if (ret == 0)
@@ -1244,7 +1235,8 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 	if (server->pnfs_curr_ld && !force_mds)
 		pg_ops = server->pnfs_curr_ld->pg_write_ops;
 #endif
-	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->wsize, ioflags);
+	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
+			server->wsize, ioflags);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
 
@@ -1925,3 +1917,7 @@ void nfs_destroy_writepagecache(void)
 	kmem_cache_destroy(nfs_wdata_cachep);
 }
 
+static const struct nfs_rw_ops nfs_rw_write_ops = {
+	.rw_alloc_header	= nfs_writehdr_alloc,
+	.rw_free_header		= nfs_writehdr_free,
+};
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 92ce5783b707..594812546c25 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -52,6 +52,11 @@ struct nfs_pageio_ops {
 	int	(*pg_doio)(struct nfs_pageio_descriptor *);
 };
 
+struct nfs_rw_ops {
+	struct nfs_rw_header *(*rw_alloc_header)(void);
+	void (*rw_free_header)(struct nfs_rw_header *);
+};
+
 struct nfs_pageio_descriptor {
 	struct list_head	pg_list;
 	unsigned long		pg_bytes_written;
@@ -63,6 +68,7 @@ struct nfs_pageio_descriptor {
 
 	struct inode		*pg_inode;
 	const struct nfs_pageio_ops *pg_ops;
+	const struct nfs_rw_ops *pg_rw_ops;
 	int 			pg_ioflags;
 	int			pg_error;
 	const struct rpc_call_ops *pg_rpc_callops;
@@ -86,6 +92,7 @@ extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 			     struct inode *inode,
 			     const struct nfs_pageio_ops *pg_ops,
 			     const struct nfs_pgio_completion_ops *compl_ops,
+			     const struct nfs_rw_ops *rw_ops,
 			     size_t bsize,
 			     int how);
 extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 965c2aa6b33f..a1b91b67145e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1269,6 +1269,7 @@ struct nfs_pgio_header {
 	const struct rpc_call_ops *mds_ops;
 	void (*release) (struct nfs_pgio_header *hdr);
 	const struct nfs_pgio_completion_ops *completion_ops;
+	const struct nfs_rw_ops	*rw_ops;
 	struct nfs_direct_req	*dreq;
 	void			*layout_private;
 	spinlock_t		lock;
-- 
cgit v1.2.3-71-gd317


From a4cdda59111f92000297e0d3edb1e0e08ba3549b Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:31 -0400
Subject: NFS: Create a common pgio_rpc_prepare function

The read and write paths do exactly the same thing for the rpc_prepare
rpc_op.  This patch combines them together into a single function.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h        |  2 ++
 fs/nfs/nfs3proc.c        | 11 ++---------
 fs/nfs/nfs4proc.c        | 22 +++-------------------
 fs/nfs/pagelist.c        | 26 ++++++++++++++++++++++++++
 fs/nfs/proc.c            | 11 ++---------
 fs/nfs/read.c            | 19 +++----------------
 fs/nfs/write.c           | 19 +++++--------------
 include/linux/nfs_page.h |  2 ++
 include/linux/nfs_xdr.h  |  3 +--
 9 files changed, 46 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9d6a40eae11c..1959260f8c57 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -241,6 +241,8 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
 void nfs_rw_header_free(struct nfs_pgio_header *);
 struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int);
 void nfs_pgio_data_release(struct nfs_pgio_data *);
+void nfs_pgio_prepare(struct rpc_task *, void *);
+void nfs_pgio_release(void *);
 
 static inline void nfs_iocounter_init(struct nfs_io_counter *c)
 {
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d235369c3dfb..e7daa42bbc86 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -812,7 +812,7 @@ static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
 }
 
-static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
@@ -834,12 +834,6 @@ static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
 }
 
-static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
-{
-	rpc_call_start(task);
-	return 0;
-}
-
 static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	rpc_call_start(task);
@@ -946,11 +940,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.fsinfo		= nfs3_proc_fsinfo,
 	.pathconf	= nfs3_proc_pathconf,
 	.decode_dirent	= nfs3_decode_dirent,
+	.pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare,
 	.read_setup	= nfs3_proc_read_setup,
-	.read_rpc_prepare = nfs3_proc_read_rpc_prepare,
 	.read_done	= nfs3_read_done,
 	.write_setup	= nfs3_proc_write_setup,
-	.write_rpc_prepare = nfs3_proc_write_rpc_prepare,
 	.write_done	= nfs3_write_done,
 	.commit_setup	= nfs3_proc_commit_setup,
 	.commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e793aa91454a..44fb93a66d26 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4089,7 +4089,7 @@ static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message
 	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
 }
 
-static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
 			&data->args.seq_args,
@@ -4097,7 +4097,7 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_dat
 			task))
 		return 0;
 	if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
-				data->args.lock_context, FMODE_READ) == -EIO)
+				data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO)
 		return -EIO;
 	if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
 		return -EIO;
@@ -4177,21 +4177,6 @@ static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message
 	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
 }
 
-static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
-{
-	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
-			&data->args.seq_args,
-			&data->res.seq_res,
-			task))
-		return 0;
-	if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
-				data->args.lock_context, FMODE_WRITE) == -EIO)
-		return -EIO;
-	if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
-		return -EIO;
-	return 0;
-}
-
 static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	nfs4_setup_sequence(NFS_SERVER(data->inode),
@@ -8432,11 +8417,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.pathconf	= nfs4_proc_pathconf,
 	.set_capabilities = nfs4_server_capabilities,
 	.decode_dirent	= nfs4_decode_dirent,
+	.pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare,
 	.read_setup	= nfs4_proc_read_setup,
-	.read_rpc_prepare = nfs4_proc_read_rpc_prepare,
 	.read_done	= nfs4_read_done,
 	.write_setup	= nfs4_proc_write_setup,
-	.write_rpc_prepare = nfs4_proc_write_rpc_prepare,
 	.write_done	= nfs4_write_done,
 	.commit_setup	= nfs4_proc_commit_setup,
 	.commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ca356fe0836b..0fa211d35e40 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -386,6 +386,32 @@ void nfs_pgio_data_release(struct nfs_pgio_data *data)
 }
 EXPORT_SYMBOL_GPL(nfs_pgio_data_release);
 
+/**
+ * nfs_pgio_prepare - Prepare pageio data to go over the wire
+ * @task: The current task
+ * @calldata: pageio data to prepare
+ */
+void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs_pgio_data *data = calldata;
+	int err;
+	err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data);
+	if (err)
+		rpc_exit(task, err);
+}
+
+/**
+ * nfs_pgio_release - Release pageio data
+ * @calldata: The pageio data to release
+ */
+void nfs_pgio_release(void *calldata)
+{
+	struct nfs_pgio_data *data = calldata;
+	if (data->header->rw_ops->rw_release)
+		data->header->rw_ops->rw_release(data);
+	nfs_pgio_data_release(data);
+}
+
 /**
  * nfs_pageio_init - initialise a page io descriptor
  * @desc: pointer to descriptor
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index c54829eb2156..c171ce1a8a30 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -599,7 +599,7 @@ static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *
 	msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
 }
 
-static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
@@ -621,12 +621,6 @@ static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message
 	msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
 }
 
-static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
-{
-	rpc_call_start(task);
-	return 0;
-}
-
 static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	BUG();
@@ -734,11 +728,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.fsinfo		= nfs_proc_fsinfo,
 	.pathconf	= nfs_proc_pathconf,
 	.decode_dirent	= nfs2_decode_dirent,
+	.pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare,
 	.read_setup	= nfs_proc_read_setup,
-	.read_rpc_prepare = nfs_proc_read_rpc_prepare,
 	.read_done	= nfs_read_done,
 	.write_setup	= nfs_proc_write_setup,
-	.write_rpc_prepare = nfs_proc_write_rpc_prepare,
 	.write_done	= nfs_write_done,
 	.commit_setup	= nfs_proc_commit_setup,
 	.commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 4cf3577bd54e..cfa15e828dd6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -454,24 +454,10 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
 		nfs_readpage_retry(task, data);
 }
 
-static void nfs_readpage_release_common(void *calldata)
-{
-	nfs_pgio_data_release(calldata);
-}
-
-void nfs_read_prepare(struct rpc_task *task, void *calldata)
-{
-	struct nfs_pgio_data *data = calldata;
-	int err;
-	err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
-	if (err)
-		rpc_exit(task, err);
-}
-
 static const struct rpc_call_ops nfs_read_common_ops = {
-	.rpc_call_prepare = nfs_read_prepare,
+	.rpc_call_prepare = nfs_pgio_prepare,
 	.rpc_call_done = nfs_readpage_result_common,
-	.rpc_release = nfs_readpage_release_common,
+	.rpc_release = nfs_pgio_release,
 };
 
 /*
@@ -636,6 +622,7 @@ void nfs_destroy_readpagecache(void)
 }
 
 static const struct nfs_rw_ops nfs_rw_read_ops = {
+	.rw_mode		= FMODE_READ,
 	.rw_alloc_header	= nfs_readhdr_alloc,
 	.rw_free_header		= nfs_readhdr_free,
 };
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9c5cde38da45..ae799c96ec2b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1248,15 +1248,6 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
 
 
-void nfs_write_prepare(struct rpc_task *task, void *calldata)
-{
-	struct nfs_pgio_data *data = calldata;
-	int err;
-	err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
-	if (err)
-		rpc_exit(task, err);
-}
-
 void nfs_commit_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_commit_data *data = calldata;
@@ -1278,9 +1269,8 @@ static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
 	nfs_writeback_done(task, data);
 }
 
-static void nfs_writeback_release_common(void *calldata)
+static void nfs_writeback_release_common(struct nfs_pgio_data *data)
 {
-	struct nfs_pgio_data	*data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 	int status = data->task.tk_status;
 
@@ -1294,13 +1284,12 @@ static void nfs_writeback_release_common(void *calldata)
 			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
 		spin_unlock(&hdr->lock);
 	}
-	nfs_pgio_data_release(data);
 }
 
 static const struct rpc_call_ops nfs_write_common_ops = {
-	.rpc_call_prepare = nfs_write_prepare,
+	.rpc_call_prepare = nfs_pgio_prepare,
 	.rpc_call_done = nfs_writeback_done_common,
-	.rpc_release = nfs_writeback_release_common,
+	.rpc_release = nfs_pgio_release,
 };
 
 /*
@@ -1918,6 +1907,8 @@ void nfs_destroy_writepagecache(void)
 }
 
 static const struct nfs_rw_ops nfs_rw_write_ops = {
+	.rw_mode		= FMODE_WRITE,
 	.rw_alloc_header	= nfs_writehdr_alloc,
 	.rw_free_header		= nfs_writehdr_free,
+	.rw_release		= nfs_writeback_release_common,
 };
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 594812546c25..da00a4d6f470 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -53,8 +53,10 @@ struct nfs_pageio_ops {
 };
 
 struct nfs_rw_ops {
+	const fmode_t rw_mode;
 	struct nfs_rw_header *(*rw_alloc_header)(void);
 	void (*rw_free_header)(struct nfs_rw_header *);
+	void (*rw_release)(struct nfs_pgio_data *);
 };
 
 struct nfs_pageio_descriptor {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a1b91b67145e..adef7bd2d06d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1429,11 +1429,10 @@ struct nfs_rpc_ops {
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
+	int	(*pgio_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
 	void	(*read_setup)   (struct nfs_pgio_data *, struct rpc_message *);
-	int	(*read_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_pgio_data *);
 	void	(*write_setup)  (struct nfs_pgio_data *, struct rpc_message *);
-	int	(*write_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
 	int	(*write_done)  (struct rpc_task *, struct nfs_pgio_data *);
 	void	(*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
 	void	(*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
-- 
cgit v1.2.3-71-gd317


From 0eecb2145c1ce18e36617008424a93836ad0a3bd Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:32 -0400
Subject: NFS: Create a common nfs_pgio_result_common function

Combining these functions will let me make a single nfs_rw_common_ops
struct (see the next patch).

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h        |  1 +
 fs/nfs/pagelist.c        | 23 ++++++++++++++++++++++
 fs/nfs/read.c            | 25 ++++++++----------------
 fs/nfs/write.c           | 51 +++++++++++++++++++-----------------------------
 include/linux/nfs_fs.h   |  2 --
 include/linux/nfs_page.h |  2 ++
 6 files changed, 54 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 1959260f8c57..7c0ae364bdad 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -243,6 +243,7 @@ struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int
 void nfs_pgio_data_release(struct nfs_pgio_data *);
 void nfs_pgio_prepare(struct rpc_task *, void *);
 void nfs_pgio_release(void *);
+void nfs_pgio_result(struct rpc_task *, void *);
 
 static inline void nfs_iocounter_init(struct nfs_io_counter *c)
 {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 0fa211d35e40..f74df87058b6 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -24,6 +24,8 @@
 #include "internal.h"
 #include "pnfs.h"
 
+#define NFSDBG_FACILITY		NFSDBG_PAGECACHE
+
 static struct kmem_cache *nfs_page_cachep;
 
 static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
@@ -447,6 +449,27 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init);
 
+/**
+ * nfs_pgio_result - Basic pageio error handling
+ * @task: The task that ran
+ * @calldata: Pageio data to check
+ */
+void nfs_pgio_result(struct rpc_task *task, void *calldata)
+{
+	struct nfs_pgio_data *data = calldata;
+	struct inode *inode = data->header->inode;
+
+	dprintk("NFS: %s: %5u, (status %d)\n", __func__,
+		task->tk_pid, task->tk_status);
+
+	if (data->header->rw_ops->rw_done(task, data, inode) != 0)
+		return;
+	if (task->tk_status < 0)
+		nfs_set_pgio_error(data->header, task->tk_status, data->args.offset);
+	else
+		data->header->rw_ops->rw_result(task, data);
+}
+
 static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
 		const struct nfs_open_context *ctx2)
 {
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index cfa15e828dd6..bc78bd248eb8 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -388,15 +388,10 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-int nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
+			     struct inode *inode)
 {
-	struct inode *inode = data->header->inode;
-	int status;
-
-	dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
-			task->tk_status);
-
-	status = NFS_PROTO(inode)->read_done(task, data);
+	int status = NFS_PROTO(inode)->read_done(task, data);
 	if (status != 0)
 		return status;
 
@@ -429,17 +424,11 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data
 	rpc_restart_call_prepare(task);
 }
 
-static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
+static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
 {
-	struct nfs_pgio_data *data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 
-	/* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
-	if (nfs_readpage_result(task, data) != 0)
-		return;
-	if (task->tk_status < 0)
-		nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
-	else if (data->res.eof) {
+	if (data->res.eof) {
 		loff_t bound;
 
 		bound = data->args.offset + data->res.count;
@@ -456,7 +445,7 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
 
 static const struct rpc_call_ops nfs_read_common_ops = {
 	.rpc_call_prepare = nfs_pgio_prepare,
-	.rpc_call_done = nfs_readpage_result_common,
+	.rpc_call_done = nfs_pgio_result,
 	.rpc_release = nfs_pgio_release,
 };
 
@@ -625,4 +614,6 @@ static const struct nfs_rw_ops nfs_rw_read_ops = {
 	.rw_mode		= FMODE_READ,
 	.rw_alloc_header	= nfs_readhdr_alloc,
 	.rw_free_header		= nfs_readhdr_free,
+	.rw_done		= nfs_readpage_done,
+	.rw_result		= nfs_readpage_result,
 };
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ae799c96ec2b..1d3e1d75c8c5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1255,20 +1255,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
 }
 
-/*
- * Handle a write reply that flushes a whole page.
- *
- * FIXME: There is an inherent race with invalidate_inode_pages and
- *	  writebacks since the page->count is kept > 1 for as long
- *	  as the page has a write request pending.
- */
-static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
-{
-	struct nfs_pgio_data	*data = calldata;
-
-	nfs_writeback_done(task, data);
-}
-
 static void nfs_writeback_release_common(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
@@ -1288,7 +1274,7 @@ static void nfs_writeback_release_common(struct nfs_pgio_data *data)
 
 static const struct rpc_call_ops nfs_write_common_ops = {
 	.rpc_call_prepare = nfs_pgio_prepare,
-	.rpc_call_done = nfs_writeback_done_common,
+	.rpc_call_done = nfs_pgio_result,
 	.rpc_release = nfs_pgio_release,
 };
 
@@ -1320,16 +1306,11 @@ static int nfs_should_remove_suid(const struct inode *inode)
 /*
  * This function is called when the WRITE call is complete.
  */
-void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
+			      struct inode *inode)
 {
-	struct nfs_pgio_args	*argp = &data->args;
-	struct nfs_pgio_res	*resp = &data->res;
-	struct inode		*inode = data->header->inode;
 	int status;
 
-	dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
-		task->tk_pid, task->tk_status);
-
 	/*
 	 * ->write_done will attempt to use post-op attributes to detect
 	 * conflicting writes by other clients.  A strict interpretation
@@ -1339,11 +1320,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data)
 	 */
 	status = NFS_PROTO(inode)->write_done(task, data);
 	if (status != 0)
-		return;
-	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+		return status;
+	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count);
 
 #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
-	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+	if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) {
 		/* We tried a write call, but the server did not
 		 * commit data to stable storage even though we
 		 * requested it.
@@ -1359,25 +1340,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data)
 			dprintk("NFS:       faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
 				NFS_SERVER(inode)->nfs_client->cl_hostname,
-				resp->verf->committed, argp->stable);
+				data->res.verf->committed, data->args.stable);
 			complain = jiffies + 300 * HZ;
 		}
 	}
 #endif
-	if (task->tk_status < 0) {
-		nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
-		return;
-	}
 
 	/* Deal with the suid/sgid bit corner case */
 	if (nfs_should_remove_suid(inode))
 		nfs_mark_for_revalidate(inode);
+	return 0;
+}
+
+/*
+ * This function is called when the WRITE call is complete.
+ */
+static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data)
+{
+	struct nfs_pgio_args	*argp = &data->args;
+	struct nfs_pgio_res	*resp = &data->res;
 
 	if (resp->count < argp->count) {
 		static unsigned long    complain;
 
 		/* This a short write! */
-		nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
+		nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE);
 
 		/* Has the server at least made some progress? */
 		if (resp->count == 0) {
@@ -1911,4 +1898,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = {
 	.rw_alloc_header	= nfs_writehdr_alloc,
 	.rw_free_header		= nfs_writehdr_free,
 	.rw_release		= nfs_writeback_release_common,
+	.rw_done		= nfs_writeback_done,
+	.rw_result		= nfs_writeback_result,
 };
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 7e0db561d829..919576b8e2cf 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -520,7 +520,6 @@ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
 extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
-extern void nfs_writeback_done(struct rpc_task *, struct nfs_pgio_data *);
 
 /*
  * Try to write back everything synchronously (but check the
@@ -553,7 +552,6 @@ nfs_have_writebacks(struct inode *inode)
 extern int  nfs_readpage(struct file *, struct page *);
 extern int  nfs_readpages(struct file *, struct address_space *,
 		struct list_head *, unsigned);
-extern int  nfs_readpage_result(struct rpc_task *, struct nfs_pgio_data *);
 extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
 			       struct page *);
 
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index da00a4d6f470..01aa29c5ec42 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -57,6 +57,8 @@ struct nfs_rw_ops {
 	struct nfs_rw_header *(*rw_alloc_header)(void);
 	void (*rw_free_header)(struct nfs_rw_header *);
 	void (*rw_release)(struct nfs_pgio_data *);
+	int  (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *);
+	void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *);
 };
 
 struct nfs_pageio_descriptor {
-- 
cgit v1.2.3-71-gd317


From b98abe52fa8e2a3797d3cc2db3d0e109f4549c03 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@kernel.org>
Date: Wed, 28 May 2014 23:51:53 -0700
Subject: Input: add common DT binding for touchscreens

Add common DT binding documentation for touchscreen devices and
implement input_parse_touchscreen_of_params, which parses the common
properties and configures the input device accordingly.

The method currently does not interpret the axis inversion properties,
since there is no matching flag in the generic linux input device.

Reviewed-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 .../bindings/input/touchscreen/touchscreen.txt     | 27 +++++++++++++
 drivers/input/touchscreen/Kconfig                  |  4 ++
 drivers/input/touchscreen/Makefile                 |  1 +
 drivers/input/touchscreen/of_touchscreen.c         | 45 ++++++++++++++++++++++
 include/linux/input/touchscreen.h                  | 22 +++++++++++
 5 files changed, 99 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
 create mode 100644 drivers/input/touchscreen/of_touchscreen.c
 create mode 100644 include/linux/input/touchscreen.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
new file mode 100644
index 000000000000..d8e06163c54e
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
@@ -0,0 +1,27 @@
+General Touchscreen Properties:
+
+Optional properties for Touchscreens:
+ - touchscreen-size-x		: horizontal resolution of touchscreen
+				  (in pixels)
+ - touchscreen-size-y		: vertical resolution of touchscreen
+				  (in pixels)
+ - touchscreen-max-pressure	: maximum reported pressure (arbitrary range
+				  dependent on the controller)
+ - touchscreen-fuzz-x		: horizontal noise value of the absolute input
+				  device (in pixels)
+ - touchscreen-fuzz-y		: vertical noise value of the absolute input
+				  device (in pixels)
+ - touchscreen-fuzz-pressure	: pressure noise value of the absolute input
+				  device (arbitrary range dependent on the
+				  controller)
+ - touchscreen-inverted-x	: X axis is inverted (boolean)
+ - touchscreen-inverted-y	: Y axis is inverted (boolean)
+
+Deprecated properties for Touchscreens:
+ - x-size			: deprecated name for touchscreen-size-x
+ - y-size			: deprecated name for touchscreen-size-y
+ - moving-threshold		: deprecated name for a combination of
+				  touchscreen-fuzz-x and touchscreen-fuzz-y
+ - contact-threshold		: deprecated name for touchscreen-fuzz-pressure
+ - x-invert			: deprecated name for touchscreen-inverted-x
+ - y-invert			: deprecated name for touchscreen-inverted-y
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 43389c097b02..8e07fe8505fd 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -11,6 +11,10 @@ menuconfig INPUT_TOUCHSCREEN
 
 if INPUT_TOUCHSCREEN
 
+config OF_TOUCHSCREEN
+	def_tristate INPUT
+	depends on INPUT && OF
+
 config TOUCHSCREEN_88PM860X
 	tristate "Marvell 88PM860x touchscreen"
 	depends on MFD_88PM860X
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 71a97559ce68..4d479fb0a768 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -6,6 +6,7 @@
 
 wm97xx-ts-y := wm97xx-core.o
 
+obj-$(CONFIG_OF_TOUCHSCREEN)		+= of_touchscreen.o
 obj-$(CONFIG_TOUCHSCREEN_88PM860X)	+= 88pm860x-ts.o
 obj-$(CONFIG_TOUCHSCREEN_AD7877)	+= ad7877.o
 obj-$(CONFIG_TOUCHSCREEN_AD7879)	+= ad7879.o
diff --git a/drivers/input/touchscreen/of_touchscreen.c b/drivers/input/touchscreen/of_touchscreen.c
new file mode 100644
index 000000000000..f8f9b84230b1
--- /dev/null
+++ b/drivers/input/touchscreen/of_touchscreen.c
@@ -0,0 +1,45 @@
+/*
+ *  Generic DT helper functions for touchscreen devices
+ *
+ *  Copyright (c) 2014 Sebastian Reichel <sre@kernel.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/of.h>
+#include <linux/input.h>
+#include <linux/input/touchscreen.h>
+
+/**
+ * touchscreen_parse_of_params - parse common touchscreen DT properties
+ * @dev: device that should be parsed
+ *
+ * This function parses common DT properties for touchscreens and setups the
+ * input device accordingly. The function keeps previously setuped default
+ * values if no value is specified via DT.
+ */
+void touchscreen_parse_of_params(struct input_dev *dev)
+{
+	struct device_node *np = dev->dev.parent->of_node;
+	struct input_absinfo *absinfo;
+
+	input_alloc_absinfo(dev);
+	if (!dev->absinfo)
+		return;
+
+	absinfo = &dev->absinfo[ABS_X];
+	of_property_read_u32(np, "touchscreen-size-x", &absinfo->maximum);
+	of_property_read_u32(np, "touchscreen-fuzz-x", &absinfo->fuzz);
+
+	absinfo = &dev->absinfo[ABS_Y];
+	of_property_read_u32(np, "touchscreen-size-y", &absinfo->maximum);
+	of_property_read_u32(np, "touchscreen-fuzz-y", &absinfo->fuzz);
+
+	absinfo = &dev->absinfo[ABS_PRESSURE];
+	of_property_read_u32(np, "touchscreen-max-pressure", &absinfo->maximum);
+	of_property_read_u32(np, "touchscreen-fuzz-pressure", &absinfo->fuzz);
+}
+EXPORT_SYMBOL(touchscreen_parse_of_params);
diff --git a/include/linux/input/touchscreen.h b/include/linux/input/touchscreen.h
new file mode 100644
index 000000000000..08a5ef6e8f25
--- /dev/null
+++ b/include/linux/input/touchscreen.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2014 Sebastian Reichel <sre@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef _TOUCHSCREEN_H
+#define _TOUCHSCREEN_H
+
+#include <linux/input.h>
+
+#ifdef CONFIG_OF
+void touchscreen_parse_of_params(struct input_dev *dev);
+#else
+static inline void touchscreen_parse_of_params(struct input_dev *dev)
+{
+}
+#endif
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 61721c88b8d85c9dc13bfeedf75dfc245f397c3c Mon Sep 17 00:00:00 2001
From: Joachim Eastwood <manabian@gmail.com>
Date: Thu, 29 May 2014 00:30:02 -0700
Subject: Input: omap-keypad - remove platform data support

This is unused since all users (OMAP4/5) are DT only.

Signed-off-by: Joachim Eastwood <manabian@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/Kconfig             |  2 +-
 drivers/input/keyboard/omap4-keypad.c      | 32 ++++++------------------------
 include/linux/platform_data/omap4-keypad.h | 13 ------------
 3 files changed, 7 insertions(+), 40 deletions(-)
 delete mode 100644 include/linux/platform_data/omap4-keypad.h

(limited to 'include')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 948a30304870..0f84f2346fe4 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -589,7 +589,7 @@ config KEYBOARD_OMAP
 
 config KEYBOARD_OMAP4
 	tristate "TI OMAP4+ keypad support"
-	depends on ARCH_OMAP2PLUS
+	depends on OF || ARCH_OMAP2PLUS
 	select INPUT_MATRIXKMAP
 	help
 	  Say Y here if you want to use the OMAP4+ keypad.
diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c
index 0400b3f2b4b9..024b7bdffe5b 100644
--- a/drivers/input/keyboard/omap4-keypad.c
+++ b/drivers/input/keyboard/omap4-keypad.c
@@ -28,11 +28,10 @@
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/input.h>
+#include <linux/input/matrix_keypad.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
 
-#include <linux/platform_data/omap4-keypad.h>
-
 /* OMAP4 registers */
 #define OMAP4_KBD_REVISION		0x00
 #define OMAP4_KBD_SYSCONFIG		0x10
@@ -218,7 +217,6 @@ static void omap4_keypad_close(struct input_dev *input)
 	pm_runtime_put_sync(input->dev.parent);
 }
 
-#ifdef CONFIG_OF
 static int omap4_keypad_parse_dt(struct device *dev,
 				 struct omap4_keypad *keypad_data)
 {
@@ -235,20 +233,9 @@ static int omap4_keypad_parse_dt(struct device *dev,
 
 	return 0;
 }
-#else
-static inline int omap4_keypad_parse_dt(struct device *dev,
-					struct omap4_keypad *keypad_data)
-{
-	return -ENOSYS;
-}
-#endif
 
 static int omap4_keypad_probe(struct platform_device *pdev)
 {
-	const struct omap4_keypad_platform_data *pdata =
-				dev_get_platdata(&pdev->dev);
-	const struct matrix_keymap_data *keymap_data =
-				pdata ? pdata->keymap_data : NULL;
 	struct omap4_keypad *keypad_data;
 	struct input_dev *input_dev;
 	struct resource *res;
@@ -277,14 +264,9 @@ static int omap4_keypad_probe(struct platform_device *pdev)
 
 	keypad_data->irq = irq;
 
-	if (pdata) {
-		keypad_data->rows = pdata->rows;
-		keypad_data->cols = pdata->cols;
-	} else {
-		error = omap4_keypad_parse_dt(&pdev->dev, keypad_data);
-		if (error)
-			return error;
-	}
+	error = omap4_keypad_parse_dt(&pdev->dev, keypad_data);
+	if (error)
+		return error;
 
 	res = request_mem_region(res->start, resource_size(res), pdev->name);
 	if (!res) {
@@ -363,7 +345,7 @@ static int omap4_keypad_probe(struct platform_device *pdev)
 		goto err_free_input;
 	}
 
-	error = matrix_keypad_build_keymap(keymap_data, NULL,
+	error = matrix_keypad_build_keymap(NULL, NULL,
 					   keypad_data->rows, keypad_data->cols,
 					   keypad_data->keymap, input_dev);
 	if (error) {
@@ -434,13 +416,11 @@ static int omap4_keypad_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_OF
 static const struct of_device_id omap_keypad_dt_match[] = {
 	{ .compatible = "ti,omap4-keypad" },
 	{},
 };
 MODULE_DEVICE_TABLE(of, omap_keypad_dt_match);
-#endif
 
 #ifdef CONFIG_PM_SLEEP
 static int omap4_keypad_suspend(struct device *dev)
@@ -482,7 +462,7 @@ static struct platform_driver omap4_keypad_driver = {
 		.name	= "omap4-keypad",
 		.owner	= THIS_MODULE,
 		.pm	= &omap4_keypad_pm_ops,
-		.of_match_table = of_match_ptr(omap_keypad_dt_match),
+		.of_match_table = omap_keypad_dt_match,
 	},
 };
 module_platform_driver(omap4_keypad_driver);
diff --git a/include/linux/platform_data/omap4-keypad.h b/include/linux/platform_data/omap4-keypad.h
deleted file mode 100644
index 4eef5fb05a17..000000000000
--- a/include/linux/platform_data/omap4-keypad.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __LINUX_INPUT_OMAP4_KEYPAD_H
-#define __LINUX_INPUT_OMAP4_KEYPAD_H
-
-#include <linux/input/matrix_keypad.h>
-
-struct omap4_keypad_platform_data {
-	const struct matrix_keymap_data *keymap_data;
-
-	u8 rows;
-	u8 cols;
-};
-
-#endif /* __LINUX_INPUT_OMAP4_KEYPAD_H */
-- 
cgit v1.2.3-71-gd317


From 1ed26f33008e954a8e91d26f97d4380dea8145db Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:37 -0400
Subject: NFS: Create a common initiate_pgio() function

Most of this code is the same for both the read and write paths, so
combine everything and use the rw_ops when necessary.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h        |  9 ++------
 fs/nfs/nfs4filelayout.c  |  6 +++---
 fs/nfs/pagelist.c        | 46 ++++++++++++++++++++++++++++++++++++++++
 fs/nfs/read.c            | 42 ++++++------------------------------
 fs/nfs/write.c           | 55 ++++++------------------------------------------
 include/linux/nfs_page.h |  2 ++
 6 files changed, 66 insertions(+), 94 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 365cdb11d0de..be4f2a7e9178 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -241,6 +241,8 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
 void nfs_rw_header_free(struct nfs_pgio_header *);
 void nfs_pgio_data_release(struct nfs_pgio_data *);
 int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
+int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
+		      const struct rpc_call_ops *, int, int);
 
 static inline void nfs_iocounter_init(struct nfs_io_counter *c)
 {
@@ -402,9 +404,6 @@ struct nfs_pgio_completion_ops;
 extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
-extern int nfs_initiate_read(struct rpc_clnt *clnt,
-			     struct nfs_pgio_data *data,
-			     const struct rpc_call_ops *call_ops, int flags);
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 
@@ -425,10 +424,6 @@ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 			const struct nfs_pgio_completion_ops *compl_ops);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_commit_free(struct nfs_commit_data *p);
-extern int nfs_initiate_write(struct rpc_clnt *clnt,
-			      struct nfs_pgio_data *data,
-			      const struct rpc_call_ops *call_ops,
-			      int how, int flags);
 extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
 extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
 extern int nfs_initiate_commit(struct rpc_clnt *clnt,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index e6936147ad95..7954e16a6d83 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -568,8 +568,8 @@ filelayout_read_pagelist(struct nfs_pgio_data *data)
 	data->mds_offset = offset;
 
 	/* Perform an asynchronous read to ds */
-	nfs_initiate_read(ds_clnt, data,
-				  &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
+	nfs_initiate_pgio(ds_clnt, data,
+			    &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
 	return PNFS_ATTEMPTED;
 }
 
@@ -613,7 +613,7 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
 	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
 
 	/* Perform an asynchronous write */
-	nfs_initiate_write(ds_clnt, data,
+	nfs_initiate_pgio(ds_clnt, data,
 				    &filelayout_write_call_ops, sync,
 				    RPC_TASK_SOFTCONN);
 	return PNFS_ATTEMPTED;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d8d25a4deb88..ab5b1850ca4f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -447,6 +447,52 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
 		rpc_exit(task, err);
 }
 
+int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data,
+		      const struct rpc_call_ops *call_ops, int how, int flags)
+{
+	struct rpc_task *task;
+	struct rpc_message msg = {
+		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
+		.rpc_cred = data->header->cred,
+	};
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = clnt,
+		.task = &data->task,
+		.rpc_message = &msg,
+		.callback_ops = call_ops,
+		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
+		.flags = RPC_TASK_ASYNC | flags,
+	};
+	int ret = 0;
+
+	data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how);
+
+	dprintk("NFS: %5u initiated pgio call "
+		"(req %s/%llu, %u bytes @ offset %llu)\n",
+		data->task.tk_pid,
+		data->header->inode->i_sb->s_id,
+		(unsigned long long)NFS_FILEID(data->header->inode),
+		data->args.count,
+		(unsigned long long)data->args.offset);
+
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task)) {
+		ret = PTR_ERR(task);
+		goto out;
+	}
+	if (how & FLUSH_SYNC) {
+		ret = rpc_wait_for_completion_task(task);
+		if (ret == 0)
+			ret = task->tk_status;
+	}
+	rpc_put_task(task);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
+
 /**
  * nfs_pgio_error - Clean up from a pageio error
  * @desc: IO descriptor
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 4fcef82d78b4..0359b0d76ef6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -151,53 +151,22 @@ out:
 	hdr->release(hdr);
 }
 
-int nfs_initiate_read(struct rpc_clnt *clnt,
-		      struct nfs_pgio_data *data,
-		      const struct rpc_call_ops *call_ops, int flags)
+static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg,
+			      struct rpc_task_setup *task_setup_data, int how)
 {
 	struct inode *inode = data->header->inode;
 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
-	struct rpc_task *task;
-	struct rpc_message msg = {
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = data->header->cred,
-	};
-	struct rpc_task_setup task_setup_data = {
-		.task = &data->task,
-		.rpc_client = clnt,
-		.rpc_message = &msg,
-		.callback_ops = call_ops,
-		.callback_data = data,
-		.workqueue = nfsiod_workqueue,
-		.flags = RPC_TASK_ASYNC | swap_flags | flags,
-	};
 
-	/* Set up the initial task struct. */
-	NFS_PROTO(inode)->read_setup(data, &msg);
-
-	dprintk("NFS: %5u initiated read call (req %s/%llu, %u bytes @ "
-			"offset %llu)\n",
-			data->task.tk_pid,
-			inode->i_sb->s_id,
-			(unsigned long long)NFS_FILEID(inode),
-			data->args.count,
-			(unsigned long long)data->args.offset);
-
-	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task))
-		return PTR_ERR(task);
-	rpc_put_task(task);
-	return 0;
+	task_setup_data->flags |= swap_flags;
+	NFS_PROTO(inode)->read_setup(data, msg);
 }
-EXPORT_SYMBOL_GPL(nfs_initiate_read);
 
 static int nfs_do_read(struct nfs_pgio_data *data,
 		const struct rpc_call_ops *call_ops)
 {
 	struct inode *inode = data->header->inode;
 
-	return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
+	return nfs_initiate_pgio(NFS_CLIENT(inode), data, call_ops, 0, 0);
 }
 
 static int
@@ -491,4 +460,5 @@ static const struct nfs_rw_ops nfs_rw_read_ops = {
 	.rw_free_header		= nfs_readhdr_free,
 	.rw_done		= nfs_readpage_done,
 	.rw_result		= nfs_readpage_result,
+	.rw_initiate		= nfs_initiate_read,
 };
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0e34c7024195..e46a1fc6c1fe 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -932,60 +932,18 @@ static int flush_task_priority(int how)
 	return RPC_PRIORITY_NORMAL;
 }
 
-int nfs_initiate_write(struct rpc_clnt *clnt,
-		       struct nfs_pgio_data *data,
-		       const struct rpc_call_ops *call_ops,
-		       int how, int flags)
+static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg,
+			       struct rpc_task_setup *task_setup_data, int how)
 {
 	struct inode *inode = data->header->inode;
 	int priority = flush_task_priority(how);
-	struct rpc_task *task;
-	struct rpc_message msg = {
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = data->header->cred,
-	};
-	struct rpc_task_setup task_setup_data = {
-		.rpc_client = clnt,
-		.task = &data->task,
-		.rpc_message = &msg,
-		.callback_ops = call_ops,
-		.callback_data = data,
-		.workqueue = nfsiod_workqueue,
-		.flags = RPC_TASK_ASYNC | flags,
-		.priority = priority,
-	};
-	int ret = 0;
 
-	/* Set up the initial task struct.  */
-	NFS_PROTO(inode)->write_setup(data, &msg);
-
-	dprintk("NFS: %5u initiated write call "
-		"(req %s/%llu, %u bytes @ offset %llu)\n",
-		data->task.tk_pid,
-		inode->i_sb->s_id,
-		(unsigned long long)NFS_FILEID(inode),
-		data->args.count,
-		(unsigned long long)data->args.offset);
+	task_setup_data->priority = priority;
+	NFS_PROTO(inode)->write_setup(data, msg);
 
 	nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
-				 &task_setup_data.rpc_client, &msg, data);
-
-	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task)) {
-		ret = PTR_ERR(task);
-		goto out;
-	}
-	if (how & FLUSH_SYNC) {
-		ret = rpc_wait_for_completion_task(task);
-		if (ret == 0)
-			ret = task->tk_status;
-	}
-	rpc_put_task(task);
-out:
-	return ret;
+				 &task_setup_data->rpc_client, msg, data);
 }
-EXPORT_SYMBOL_GPL(nfs_initiate_write);
 
 static int nfs_do_write(struct nfs_pgio_data *data,
 		const struct rpc_call_ops *call_ops,
@@ -993,7 +951,7 @@ static int nfs_do_write(struct nfs_pgio_data *data,
 {
 	struct inode *inode = data->header->inode;
 
-	return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
+	return nfs_initiate_pgio(NFS_CLIENT(inode), data, call_ops, how, 0);
 }
 
 static int nfs_do_multiple_writes(struct list_head *head,
@@ -1743,4 +1701,5 @@ static const struct nfs_rw_ops nfs_rw_write_ops = {
 	.rw_release		= nfs_writeback_release_common,
 	.rw_done		= nfs_writeback_done,
 	.rw_result		= nfs_writeback_result,
+	.rw_initiate		= nfs_initiate_write,
 };
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 01aa29c5ec42..c6a587f7118f 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -59,6 +59,8 @@ struct nfs_rw_ops {
 	void (*rw_release)(struct nfs_pgio_data *);
 	int  (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *);
 	void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *);
+	void (*rw_initiate)(struct nfs_pgio_data *, struct rpc_message *,
+			    struct rpc_task_setup *, int);
 };
 
 struct nfs_pageio_descriptor {
-- 
cgit v1.2.3-71-gd317


From 12c05792599ec57ebab33096b2c75b863dfe6ea4 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:41 -0400
Subject: nfs: clean up PG_* flags

Remove unused flags PG_NEED_COMMIT and PG_NEED_RESCHED.
Add comments describing how each flag is used.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/nfs_page.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index c6a587f7118f..eb2eb6396874 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -22,12 +22,10 @@
  * Valid flags for a dirty buffer
  */
 enum {
-	PG_BUSY = 0,
-	PG_MAPPED,
-	PG_CLEAN,
-	PG_NEED_COMMIT,
-	PG_NEED_RESCHED,
-	PG_COMMIT_TO_DS,
+	PG_BUSY = 0,		/* nfs_{un}lock_request */
+	PG_MAPPED,		/* page private set for buffered io */
+	PG_CLEAN,		/* write succeeded */
+	PG_COMMIT_TO_DS,	/* used by pnfs layouts */
 };
 
 struct nfs_inode;
-- 
cgit v1.2.3-71-gd317


From 8c8f1ac109726e4ed44a920f5c962c84610d4a17 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:42 -0400
Subject: nfs: remove unused arg from nfs_create_request

@inode is passed but not used.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c          | 6 ++----
 fs/nfs/pagelist.c        | 4 +---
 fs/nfs/read.c            | 5 ++---
 fs/nfs/write.c           | 2 +-
 include/linux/nfs_page.h | 1 -
 5 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 164b0167677b..1dd8c622d719 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -380,8 +380,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
 			struct nfs_page *req;
 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
 			/* XXX do we need to do the eof zeroing found in async_filler? */
-			req = nfs_create_request(dreq->ctx, dreq->inode,
-						 pagevec[i],
+			req = nfs_create_request(dreq->ctx, pagevec[i],
 						 pgbase, req_len);
 			if (IS_ERR(req)) {
 				result = PTR_ERR(req);
@@ -750,8 +749,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
 			struct nfs_page *req;
 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
 
-			req = nfs_create_request(dreq->ctx, dreq->inode,
-						 pagevec[i],
+			req = nfs_create_request(dreq->ctx, pagevec[i],
 						 pgbase, req_len);
 			if (IS_ERR(req)) {
 				result = PTR_ERR(req);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 29591094125a..4b4b212ec6b2 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -139,7 +139,6 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
 /**
  * nfs_create_request - Create an NFS read/write request.
  * @ctx: open context to use
- * @inode: inode to which the request is attached
  * @page: page to write
  * @offset: starting offset within the page for the write
  * @count: number of bytes to read/write
@@ -149,8 +148,7 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
  * User should ensure it is safe to sleep in this function.
  */
 struct nfs_page *
-nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
-		   struct page *page,
+nfs_create_request(struct nfs_open_context *ctx, struct page *page,
 		   unsigned int offset, unsigned int count)
 {
 	struct nfs_page		*req;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 3986668e4390..46d90448f69b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -85,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	len = nfs_page_length(page);
 	if (len == 0)
 		return nfs_return_empty_page(page);
-	new = nfs_create_request(ctx, inode, page, 0, len);
+	new = nfs_create_request(ctx, page, 0, len);
 	if (IS_ERR(new)) {
 		unlock_page(page);
 		return PTR_ERR(new);
@@ -303,7 +303,6 @@ static int
 readpage_async_filler(void *data, struct page *page)
 {
 	struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
-	struct inode *inode = page_file_mapping(page)->host;
 	struct nfs_page *new;
 	unsigned int len;
 	int error;
@@ -312,7 +311,7 @@ readpage_async_filler(void *data, struct page *page)
 	if (len == 0)
 		return nfs_return_empty_page(page);
 
-	new = nfs_create_request(desc->ctx, inode, page, 0, len);
+	new = nfs_create_request(desc->ctx, page, 0, len);
 	if (IS_ERR(new))
 		goto out_error;
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2680f29f8a51..e773df207c05 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -761,7 +761,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
 	req = nfs_try_to_update_request(inode, page, offset, bytes);
 	if (req != NULL)
 		goto out;
-	req = nfs_create_request(ctx, inode, page, offset, bytes);
+	req = nfs_create_request(ctx, page, offset, bytes);
 	if (IS_ERR(req))
 		goto out;
 	nfs_inode_add_request(inode, req);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index eb2eb6396874..be0b0981e7a0 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -85,7 +85,6 @@ struct nfs_pageio_descriptor {
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
 
 extern	struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
-					    struct inode *inode,
 					    struct page *page,
 					    unsigned int offset,
 					    unsigned int count);
-- 
cgit v1.2.3-71-gd317


From b4fdac1a5150174df0847a45dc6612ce5ce3daeb Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:43 -0400
Subject: nfs: modify pg_test interface to return size_t

This is a step toward allowing pg_test to inform the the
coalescing code to reduce the size of requests so they may fit in
whatever scheme the pg_test callback wants to define.

For now, just return the size of the request if there is space, or 0
if there is not.  This shouldn't change any behavior as it acts
the same as when the pg_test functions returned bool.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/blocklayout/blocklayout.c | 16 ++++++++++++----
 fs/nfs/nfs4filelayout.c          | 12 +++++++-----
 fs/nfs/objlayout/objio_osd.c     | 15 ++++++++++-----
 fs/nfs/pagelist.c                | 22 +++++++++++++++++++---
 fs/nfs/pnfs.c                    | 12 +++++++++---
 fs/nfs/pnfs.h                    |  3 ++-
 include/linux/nfs_page.h         |  5 +++--
 7 files changed, 62 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 206cc68c9694..9b431f44fad9 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -1189,13 +1189,17 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 		pnfs_generic_pg_init_read(pgio, req);
 }
 
-static bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
 bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		struct nfs_page *req)
 {
 	if (pgio->pg_dreq != NULL &&
 	    !is_aligned_req(req, SECTOR_SIZE))
-		return false;
+		return 0;
 
 	return pnfs_generic_pg_test(pgio, prev, req);
 }
@@ -1241,13 +1245,17 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 	}
 }
 
-static bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
 bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		 struct nfs_page *req)
 {
 	if (pgio->pg_dreq != NULL &&
 	    !is_aligned_req(req, PAGE_CACHE_SIZE))
-		return false;
+		return 0;
 
 	return pnfs_generic_pg_test(pgio, prev, req);
 }
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 9fd7cebbff04..ba9a9aadf6c8 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -915,10 +915,10 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 /*
  * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
  *
- * return true  : coalesce page
- * return false : don't coalesce page
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
  */
-static bool
+static size_t
 filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		   struct nfs_page *req)
 {
@@ -927,7 +927,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 
 	if (!pnfs_generic_pg_test(pgio, prev, req) ||
 	    !nfs_generic_pg_test(pgio, prev, req))
-		return false;
+		return 0;
 
 	p_stripe = (u64)req_offset(prev);
 	r_stripe = (u64)req_offset(req);
@@ -936,7 +936,9 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 	do_div(p_stripe, stripe_unit);
 	do_div(r_stripe, stripe_unit);
 
-	return (p_stripe == r_stripe);
+	if (p_stripe == r_stripe)
+		return req->wb_bytes;
+	return 0;
 }
 
 static void
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 426b366b0b33..71b9c69dbe9c 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -564,14 +564,19 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
 	return 0;
 }
 
-static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
 			  struct nfs_page *prev, struct nfs_page *req)
 {
-	if (!pnfs_generic_pg_test(pgio, prev, req))
-		return false;
+	if (!pnfs_generic_pg_test(pgio, prev, req) ||
+	    pgio->pg_count + req->wb_bytes >
+	    (unsigned long)pgio->pg_layout_private)
+		return 0;
 
-	return pgio->pg_count + req->wb_bytes <=
-			(unsigned long)pgio->pg_layout_private;
+	return req->wb_bytes;
 }
 
 static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 4b4b212ec6b2..82233431880d 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -280,7 +280,17 @@ nfs_wait_on_request(struct nfs_page *req)
 			TASK_UNINTERRUPTIBLE);
 }
 
-bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+/*
+ * nfs_generic_pg_test - determine if requests can be coalesced
+ * @desc: pointer to descriptor
+ * @prev: previous request in desc, or NULL
+ * @req: this request
+ *
+ * Returns zero if @req can be coalesced into @desc, otherwise it returns
+ * the size of the request.
+ */
+size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+			   struct nfs_page *prev, struct nfs_page *req)
 {
 	/*
 	 * FIXME: ideally we should be able to coalesce all requests
@@ -292,7 +302,9 @@ bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *pr
 	if (desc->pg_bsize < PAGE_SIZE)
 		return 0;
 
-	return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
+	if (desc->pg_count + req->wb_bytes <= desc->pg_bsize)
+		return req->wb_bytes;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 
@@ -747,6 +759,8 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
 				      struct nfs_page *req,
 				      struct nfs_pageio_descriptor *pgio)
 {
+	size_t size;
+
 	if (!nfs_match_open_context(req->wb_context, prev->wb_context))
 		return false;
 	if (req->wb_context->dentry->d_inode->i_flock != NULL &&
@@ -758,7 +772,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
 		return false;
 	if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
 		return false;
-	return pgio->pg_ops->pg_test(pgio, prev, req);
+	size = pgio->pg_ops->pg_test(pgio, prev, req);
+	WARN_ON_ONCE(size && size != req->wb_bytes);
+	return size > 0;
 }
 
 /**
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0fe670189fd1..de6eb16f94d1 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1434,7 +1434,11 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
 
-bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+size_t
 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		     struct nfs_page *req)
 {
@@ -1455,8 +1459,10 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 	 * first byte that lies outside the pnfs_layout_range. FIXME?
 	 *
 	 */
-	return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
-					 pgio->pg_lseg->pls_range.length);
+	if (req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
+					 pgio->pg_lseg->pls_range.length))
+		return req->wb_bytes;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 0031267d7492..dccf182ec4d8 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -187,7 +187,8 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
 void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
 			        struct nfs_page *req, u64 wb_size);
 int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
-bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
+size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
+			    struct nfs_page *prev, struct nfs_page *req);
 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
 struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_free_lseg_list(struct list_head *tmp_list);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index be0b0981e7a0..13d59af561f6 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -46,7 +46,8 @@ struct nfs_page {
 struct nfs_pageio_descriptor;
 struct nfs_pageio_ops {
 	void	(*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
-	bool	(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+	size_t	(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *,
+			   struct nfs_page *);
 	int	(*pg_doio)(struct nfs_pageio_descriptor *);
 };
 
@@ -102,7 +103,7 @@ extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
 				   struct nfs_page *);
 extern	void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
 extern	void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
-extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 				struct nfs_page *prev,
 				struct nfs_page *req);
 extern  int nfs_wait_on_request(struct nfs_page *);
-- 
cgit v1.2.3-71-gd317


From 2bfc6e566daa8386c9cffef2f7de17fc330d3835 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:45 -0400
Subject: nfs: add support for multiple nfs reqs per page

Add "page groups" - a circular list of nfs requests (struct nfs_page)
that all reference the same page. This gives nfs read and write paths
the ability to account for sub-page regions independently.  This
somewhat follows the design of struct buffer_head's sub-page
accounting.

Only "head" requests are ever added/removed from the inode list in
the buffered write path. "head" and "sub" requests are treated the
same through the read path and the rest of the write/commit path.
Requests are given an extra reference across the life of the list.

Page groups are never rejoined after being split. If the read/write
request fails and the client falls back to another path (ie revert
to MDS in PNFS case), the already split requests are pushed through
the recoalescing code again, which may split them further and then
coalesce them into properly sized requests on the wire. Fragmentation
shouldn't be a problem with the current design, because we flush all
requests in page group when a non-contiguous request is added, so
the only time resplitting should occur is on a resend of a read or
write.

This patch lays the groundwork for sub-page splitting, but does not
actually do any splitting. For now all page groups have one request
as pg_test functions don't yet split pages. There are several related
patches that are needed support multiple requests per page group.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c          |   7 +-
 fs/nfs/pagelist.c        | 220 ++++++++++++++++++++++++++++++++++++++++++++---
 fs/nfs/read.c            |   4 +-
 fs/nfs/write.c           |  13 ++-
 include/linux/nfs_page.h |  13 ++-
 5 files changed, 236 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1dd8c622d719..2c0e08f4cf71 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -380,7 +380,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
 			struct nfs_page *req;
 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
 			/* XXX do we need to do the eof zeroing found in async_filler? */
-			req = nfs_create_request(dreq->ctx, pagevec[i],
+			req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
 						 pgbase, req_len);
 			if (IS_ERR(req)) {
 				result = PTR_ERR(req);
@@ -749,7 +749,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
 			struct nfs_page *req;
 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
 
-			req = nfs_create_request(dreq->ctx, pagevec[i],
+			req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
 						 pgbase, req_len);
 			if (IS_ERR(req)) {
 				result = PTR_ERR(req);
@@ -827,6 +827,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 	spin_unlock(&dreq->lock);
 
 	while (!list_empty(&hdr->pages)) {
+		bool do_destroy = true;
+
 		req = nfs_list_entry(hdr->pages.next);
 		nfs_list_remove_request(req);
 		switch (bit) {
@@ -834,6 +836,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 		case NFS_IOHDR_NEED_COMMIT:
 			kref_get(&req->wb_kref);
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
+			do_destroy = false;
 		}
 		nfs_unlock_and_release_request(req);
 	}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index f343f49ff596..015fb7b48dfe 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,6 +29,8 @@
 static struct kmem_cache *nfs_page_cachep;
 static const struct rpc_call_ops nfs_pgio_common_ops;
 
+static void nfs_free_request(struct nfs_page *);
+
 static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
 {
 	p->npages = pagecount;
@@ -136,10 +138,151 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
 	return __nfs_iocounter_wait(c);
 }
 
+/*
+ * nfs_page_group_lock - lock the head of the page group
+ * @req - request in group that is to be locked
+ *
+ * this lock must be held if modifying the page group list
+ */
+void
+nfs_page_group_lock(struct nfs_page *req)
+{
+	struct nfs_page *head = req->wb_head;
+	int err = -EAGAIN;
+
+	WARN_ON_ONCE(head != head->wb_head);
+
+	while (err)
+		err = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+			nfs_wait_bit_killable, TASK_KILLABLE);
+}
+
+/*
+ * nfs_page_group_unlock - unlock the head of the page group
+ * @req - request in group that is to be unlocked
+ */
+void
+nfs_page_group_unlock(struct nfs_page *req)
+{
+	struct nfs_page *head = req->wb_head;
+
+	WARN_ON_ONCE(head != head->wb_head);
+
+	smp_mb__before_clear_bit();
+	clear_bit(PG_HEADLOCK, &head->wb_flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&head->wb_flags, PG_HEADLOCK);
+}
+
+/*
+ * nfs_page_group_sync_on_bit_locked
+ *
+ * must be called with page group lock held
+ */
+static bool
+nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
+{
+	struct nfs_page *head = req->wb_head;
+	struct nfs_page *tmp;
+
+	WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags));
+	WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags));
+
+	tmp = req->wb_this_page;
+	while (tmp != req) {
+		if (!test_bit(bit, &tmp->wb_flags))
+			return false;
+		tmp = tmp->wb_this_page;
+	}
+
+	/* true! reset all bits */
+	tmp = req;
+	do {
+		clear_bit(bit, &tmp->wb_flags);
+		tmp = tmp->wb_this_page;
+	} while (tmp != req);
+
+	return true;
+}
+
+/*
+ * nfs_page_group_sync_on_bit - set bit on current request, but only
+ *   return true if the bit is set for all requests in page group
+ * @req - request in page group
+ * @bit - PG_* bit that is used to sync page group
+ */
+bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
+{
+	bool ret;
+
+	nfs_page_group_lock(req);
+	ret = nfs_page_group_sync_on_bit_locked(req, bit);
+	nfs_page_group_unlock(req);
+
+	return ret;
+}
+
+/*
+ * nfs_page_group_init - Initialize the page group linkage for @req
+ * @req - a new nfs request
+ * @prev - the previous request in page group, or NULL if @req is the first
+ *         or only request in the group (the head).
+ */
+static inline void
+nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
+{
+	WARN_ON_ONCE(prev == req);
+
+	if (!prev) {
+		req->wb_head = req;
+		req->wb_this_page = req;
+	} else {
+		WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
+		WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
+		req->wb_head = prev->wb_head;
+		req->wb_this_page = prev->wb_this_page;
+		prev->wb_this_page = req;
+
+		/* grab extra ref if head request has extra ref from
+		 * the write/commit path to handle handoff between write
+		 * and commit lists */
+		if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags))
+			kref_get(&req->wb_kref);
+	}
+}
+
+/*
+ * nfs_page_group_destroy - sync the destruction of page groups
+ * @req - request that no longer needs the page group
+ *
+ * releases the page group reference from each member once all
+ * members have called this function.
+ */
+static void
+nfs_page_group_destroy(struct kref *kref)
+{
+	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+	struct nfs_page *tmp, *next;
+
+	if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
+		return;
+
+	tmp = req;
+	do {
+		next = tmp->wb_this_page;
+		/* unlink and free */
+		tmp->wb_this_page = tmp;
+		tmp->wb_head = tmp;
+		nfs_free_request(tmp);
+		tmp = next;
+	} while (tmp != req);
+}
+
 /**
  * nfs_create_request - Create an NFS read/write request.
  * @ctx: open context to use
  * @page: page to write
+ * @last: last nfs request created for this page group or NULL if head
  * @offset: starting offset within the page for the write
  * @count: number of bytes to read/write
  *
@@ -149,7 +292,8 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
  */
 struct nfs_page *
 nfs_create_request(struct nfs_open_context *ctx, struct page *page,
-		   unsigned int offset, unsigned int count)
+		   struct nfs_page *last, unsigned int offset,
+		   unsigned int count)
 {
 	struct nfs_page		*req;
 	struct nfs_lock_context *l_ctx;
@@ -181,6 +325,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
 	req->wb_bytes   = count;
 	req->wb_context = get_nfs_open_context(ctx);
 	kref_init(&req->wb_kref);
+	nfs_page_group_init(req, last);
 	return req;
 }
 
@@ -238,16 +383,18 @@ static void nfs_clear_request(struct nfs_page *req)
 	}
 }
 
-
 /**
  * nfs_release_request - Release the count on an NFS read/write request
  * @req: request to release
  *
  * Note: Should never be called with the spinlock held!
  */
-static void nfs_free_request(struct kref *kref)
+static void nfs_free_request(struct nfs_page *req)
 {
-	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+	WARN_ON_ONCE(req->wb_this_page != req);
+
+	/* extra debug: make sure no sync bits are still set */
+	WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
 
 	/* Release struct file and open context */
 	nfs_clear_request(req);
@@ -256,7 +403,7 @@ static void nfs_free_request(struct kref *kref)
 
 void nfs_release_request(struct nfs_page *req)
 {
-	kref_put(&req->wb_kref, nfs_free_request);
+	kref_put(&req->wb_kref, nfs_page_group_destroy);
 }
 
 static int nfs_wait_bit_uninterruptible(void *word)
@@ -832,21 +979,66 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
  * @desc: destination io descriptor
  * @req: request
  *
+ * This may split a request into subrequests which are all part of the
+ * same page group.
+ *
  * Returns true if the request 'req' was successfully coalesced into the
  * existing list of pages 'desc'.
  */
 static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 			   struct nfs_page *req)
 {
-	while (!nfs_pageio_do_add_request(desc, req)) {
-		desc->pg_moreio = 1;
-		nfs_pageio_doio(desc);
-		if (desc->pg_error < 0)
-			return 0;
-		desc->pg_moreio = 0;
-		if (desc->pg_recoalesce)
-			return 0;
-	}
+	struct nfs_page *subreq;
+	unsigned int bytes_left = 0;
+	unsigned int offset, pgbase;
+
+	nfs_page_group_lock(req);
+
+	subreq = req;
+	bytes_left = subreq->wb_bytes;
+	offset = subreq->wb_offset;
+	pgbase = subreq->wb_pgbase;
+
+	do {
+		if (!nfs_pageio_do_add_request(desc, subreq)) {
+			/* make sure pg_test call(s) did nothing */
+			WARN_ON_ONCE(subreq->wb_bytes != bytes_left);
+			WARN_ON_ONCE(subreq->wb_offset != offset);
+			WARN_ON_ONCE(subreq->wb_pgbase != pgbase);
+
+			nfs_page_group_unlock(req);
+			desc->pg_moreio = 1;
+			nfs_pageio_doio(desc);
+			if (desc->pg_error < 0)
+				return 0;
+			desc->pg_moreio = 0;
+			if (desc->pg_recoalesce)
+				return 0;
+			/* retry add_request for this subreq */
+			nfs_page_group_lock(req);
+			continue;
+		}
+
+		/* check for buggy pg_test call(s) */
+		WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE);
+		WARN_ON_ONCE(subreq->wb_bytes > bytes_left);
+		WARN_ON_ONCE(subreq->wb_bytes == 0);
+
+		bytes_left -= subreq->wb_bytes;
+		offset += subreq->wb_bytes;
+		pgbase += subreq->wb_bytes;
+
+		if (bytes_left) {
+			subreq = nfs_create_request(req->wb_context,
+					req->wb_page,
+					subreq, pgbase, bytes_left);
+			nfs_lock_request(subreq);
+			subreq->wb_offset  = offset;
+			subreq->wb_index = req->wb_index;
+		}
+	} while (bytes_left > 0);
+
+	nfs_page_group_unlock(req);
 	return 1;
 }
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 46d90448f69b..902ba2c63d05 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -85,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	len = nfs_page_length(page);
 	if (len == 0)
 		return nfs_return_empty_page(page);
-	new = nfs_create_request(ctx, page, 0, len);
+	new = nfs_create_request(ctx, page, NULL, 0, len);
 	if (IS_ERR(new)) {
 		unlock_page(page);
 		return PTR_ERR(new);
@@ -311,7 +311,7 @@ readpage_async_filler(void *data, struct page *page)
 	if (len == 0)
 		return nfs_return_empty_page(page);
 
-	new = nfs_create_request(desc->ctx, page, 0, len);
+	new = nfs_create_request(desc->ctx, page, NULL, 0, len);
 	if (IS_ERR(new))
 		goto out_error;
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e773df207c05..d0f30f12a8b3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -367,6 +367,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 
+	WARN_ON_ONCE(req->wb_this_page != req);
+
 	/* Lock the request! */
 	nfs_lock_request(req);
 
@@ -383,6 +385,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 		set_page_private(req->wb_page, (unsigned long)req);
 	}
 	nfsi->npages++;
+	set_bit(PG_INODE_REF, &req->wb_flags);
 	kref_get(&req->wb_kref);
 	spin_unlock(&inode->i_lock);
 }
@@ -567,6 +570,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_commit_info cinfo;
 	unsigned long bytes = 0;
+	bool do_destroy;
 
 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
 		goto out;
@@ -596,6 +600,7 @@ remove_req:
 next:
 		nfs_unlock_request(req);
 		nfs_end_page_writeback(req->wb_page);
+		do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
 		nfs_release_request(req);
 	}
 out:
@@ -700,6 +705,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
 		if (req == NULL)
 			goto out_unlock;
 
+		/* should be handled by nfs_flush_incompatible */
+		WARN_ON_ONCE(req->wb_head != req);
+		WARN_ON_ONCE(req->wb_this_page != req);
+
 		rqend = req->wb_offset + req->wb_bytes;
 		/*
 		 * Tell the caller to flush out the request if
@@ -761,7 +770,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
 	req = nfs_try_to_update_request(inode, page, offset, bytes);
 	if (req != NULL)
 		goto out;
-	req = nfs_create_request(ctx, page, offset, bytes);
+	req = nfs_create_request(ctx, page, NULL, offset, bytes);
 	if (IS_ERR(req))
 		goto out;
 	nfs_inode_add_request(inode, req);
@@ -805,6 +814,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
 			return 0;
 		l_ctx = req->wb_lock_context;
 		do_flush = req->wb_page != page || req->wb_context != ctx;
+		/* for now, flush if more than 1 request in page_group */
+		do_flush |= req->wb_this_page != req;
 		if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) {
 			do_flush |= l_ctx->lockowner.l_owner != current->files
 				|| l_ctx->lockowner.l_pid != current->tgid;
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 13d59af561f6..986c0c279d0e 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -26,6 +26,9 @@ enum {
 	PG_MAPPED,		/* page private set for buffered io */
 	PG_CLEAN,		/* write succeeded */
 	PG_COMMIT_TO_DS,	/* used by pnfs layouts */
+	PG_INODE_REF,		/* extra ref held by inode (head req only) */
+	PG_HEADLOCK,		/* page group lock of wb_head */
+	PG_TEARDOWN,		/* page group sync for destroy */
 };
 
 struct nfs_inode;
@@ -41,6 +44,8 @@ struct nfs_page {
 	struct kref		wb_kref;	/* reference count */
 	unsigned long		wb_flags;
 	struct nfs_write_verifier	wb_verf;	/* Commit cookie */
+	struct nfs_page		*wb_this_page;  /* list of reqs for this page */
+	struct nfs_page		*wb_head;       /* head pointer for req list */
 };
 
 struct nfs_pageio_descriptor;
@@ -87,9 +92,10 @@ struct nfs_pageio_descriptor {
 
 extern	struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
 					    struct page *page,
+					    struct nfs_page *last,
 					    unsigned int offset,
 					    unsigned int count);
-extern	void nfs_release_request(struct nfs_page *req);
+extern	void nfs_release_request(struct nfs_page *);
 
 
 extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
@@ -108,7 +114,10 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 				struct nfs_page *req);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
-extern	void nfs_unlock_and_release_request(struct nfs_page *req);
+extern	void nfs_unlock_and_release_request(struct nfs_page *);
+extern void nfs_page_group_lock(struct nfs_page *);
+extern void nfs_page_group_unlock(struct nfs_page *);
+extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
 
 /*
  * Lock the page of an asynchronous request
-- 
cgit v1.2.3-71-gd317


From 67d0338edd71db9a4f406d8778f7c525d31e9f7f Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:46 -0400
Subject: nfs: page group syncing in read path

Operations that modify state for a whole page must be syncronized across
all requests within a page group. In the read path, this is calling
unlock_page and SetPageUptodate. Both of these functions should not be
called until all requests in a page group have reached the point where
they would call them.

This patch should have no effect yet since all page groups currently
have one request, but will come into play when pg_test functions are
modified to split pages into sub-page regions.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c        |  2 ++
 fs/nfs/read.c            | 22 +++++++++++++++++-----
 include/linux/nfs_page.h |  2 ++
 3 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 015fb7b48dfe..18ee4e99347e 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -395,6 +395,8 @@ static void nfs_free_request(struct nfs_page *req)
 
 	/* extra debug: make sure no sync bits are still set */
 	WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+	WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags));
+	WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags));
 
 	/* Release struct file and open context */
 	nfs_clear_request(req);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 902ba2c63d05..53d5b83611ce 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -105,10 +105,16 @@ static void nfs_readpage_release(struct nfs_page *req)
 {
 	struct inode *d_inode = req->wb_context->dentry->d_inode;
 
-	if (PageUptodate(req->wb_page))
-		nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+	dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id,
+		(unsigned long long)NFS_FILEID(d_inode), req->wb_bytes,
+		(long long)req_offset(req));
 
-	unlock_page(req->wb_page);
+	if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
+		if (PageUptodate(req->wb_page))
+			nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+
+		unlock_page(req->wb_page);
+	}
 
 	dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
 			req->wb_context->dentry->d_inode->i_sb->s_id,
@@ -118,6 +124,12 @@ static void nfs_readpage_release(struct nfs_page *req)
 	nfs_release_request(req);
 }
 
+static void nfs_page_group_set_uptodate(struct nfs_page *req)
+{
+	if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
+		SetPageUptodate(req->wb_page);
+}
+
 /* Note io was page aligned */
 static void nfs_read_completion(struct nfs_pgio_header *hdr)
 {
@@ -140,9 +152,9 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
 		bytes += req->wb_bytes;
 		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
 			if (bytes <= hdr->good_bytes)
-				SetPageUptodate(page);
+				nfs_page_group_set_uptodate(req);
 		} else
-			SetPageUptodate(page);
+			nfs_page_group_set_uptodate(req);
 		nfs_list_remove_request(req);
 		nfs_readpage_release(req);
 	}
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 986c0c279d0e..6385175a127b 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -29,6 +29,8 @@ enum {
 	PG_INODE_REF,		/* extra ref held by inode (head req only) */
 	PG_HEADLOCK,		/* page group lock of wb_head */
 	PG_TEARDOWN,		/* page group sync for destroy */
+	PG_UNLOCKPAGE,		/* page group sync bit in read path */
+	PG_UPTODATE,		/* page group sync bit in read path */
 };
 
 struct nfs_inode;
-- 
cgit v1.2.3-71-gd317


From 20633f042fd0907300069714b98aaf607a8b5bf8 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:47 -0400
Subject: nfs: page group syncing in write path

Operations that modify state for a whole page must be syncronized across
all requests within a page group. In the write path, this is calling
end_page_writeback and removing the head request from an inode.
Both of these operations should not be called until all requests
in a page group have reached the point where they would call them.

This patch should have no effect yet since all page groups currently
have one request, but will come into play when pg_test functions are
modified to split pages into sub-page regions.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c        |  2 ++
 fs/nfs/write.c           | 32 ++++++++++++++++++++------------
 include/linux/nfs_page.h |  2 ++
 3 files changed, 24 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 18ee4e99347e..ceb4424614aa 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -397,6 +397,8 @@ static void nfs_free_request(struct nfs_page *req)
 	WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
 	WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags));
 	WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags));
+	WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags));
+	WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags));
 
 	/* Release struct file and open context */
 	nfs_clear_request(req);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d0f30f12a8b3..5d752766139d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -201,12 +201,15 @@ static void nfs_set_page_writeback(struct page *page)
 	}
 }
 
-static void nfs_end_page_writeback(struct page *page)
+static void nfs_end_page_writeback(struct nfs_page *req)
 {
-	struct inode *inode = page_file_mapping(page)->host;
+	struct inode *inode = page_file_mapping(req->wb_page)->host;
 	struct nfs_server *nfss = NFS_SERVER(inode);
 
-	end_page_writeback(page);
+	if (!nfs_page_group_sync_on_bit(req, PG_WB_END))
+		return;
+
+	end_page_writeback(req->wb_page);
 	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
 		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 }
@@ -397,15 +400,20 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 {
 	struct inode *inode = req->wb_context->dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_page *head;
 
-	spin_lock(&inode->i_lock);
-	if (likely(!PageSwapCache(req->wb_page))) {
-		set_page_private(req->wb_page, 0);
-		ClearPagePrivate(req->wb_page);
-		clear_bit(PG_MAPPED, &req->wb_flags);
+	if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
+		head = req->wb_head;
+
+		spin_lock(&inode->i_lock);
+		if (likely(!PageSwapCache(head->wb_page))) {
+			set_page_private(head->wb_page, 0);
+			ClearPagePrivate(head->wb_page);
+			clear_bit(PG_MAPPED, &head->wb_flags);
+		}
+		nfsi->npages--;
+		spin_unlock(&inode->i_lock);
 	}
-	nfsi->npages--;
-	spin_unlock(&inode->i_lock);
 	nfs_release_request(req);
 }
 
@@ -599,7 +607,7 @@ remove_req:
 		nfs_inode_remove_request(req);
 next:
 		nfs_unlock_request(req);
-		nfs_end_page_writeback(req->wb_page);
+		nfs_end_page_writeback(req);
 		do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
 		nfs_release_request(req);
 	}
@@ -964,7 +972,7 @@ static void nfs_redirty_request(struct nfs_page *req)
 {
 	nfs_mark_request_dirty(req);
 	nfs_unlock_request(req);
-	nfs_end_page_writeback(req->wb_page);
+	nfs_end_page_writeback(req);
 	nfs_release_request(req);
 }
 
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 6385175a127b..7d9096d95d4a 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -31,6 +31,8 @@ enum {
 	PG_TEARDOWN,		/* page group sync for destroy */
 	PG_UNLOCKPAGE,		/* page group sync bit in read path */
 	PG_UPTODATE,		/* page group sync bit in read path */
+	PG_WB_END,		/* page group sync bit in write path */
+	PG_REMOVE,		/* page group sync bit in write path */
 };
 
 struct nfs_inode;
-- 
cgit v1.2.3-71-gd317


From 7f714720fac03383d687dbe39494cc96b845bd46 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:53 -0400
Subject: nfs: remove data list from pgio header

Since the ability to split pages into subpage requests has been added,
nfs_pgio_header->rpc_list only ever has one pgio data.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c       | 39 ++++++---------------------------------
 fs/nfs/pnfs.c           | 41 +++++++++++++++--------------------------
 include/linux/nfs_xdr.h |  5 +++--
 3 files changed, 24 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ec4311df05d9..fab78d13ee14 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -470,7 +470,6 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
 		struct nfs_pgio_header *hdr = &header->header;
 
 		INIT_LIST_HEAD(&hdr->pages);
-		INIT_LIST_HEAD(&hdr->rpc_list);
 		spin_lock_init(&hdr->lock);
 		atomic_set(&hdr->refcnt, 0);
 		hdr->rw_ops = ops;
@@ -648,27 +647,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
 
-static int nfs_do_multiple_pgios(struct list_head *head,
-				 const struct rpc_call_ops *call_ops,
-				 int how)
-{
-	struct nfs_pgio_data *data;
-	int ret = 0;
-
-	while (!list_empty(head)) {
-		int ret2;
-
-		data = list_first_entry(head, struct nfs_pgio_data, list);
-		list_del_init(&data->list);
-
-		ret2 = nfs_initiate_pgio(NFS_CLIENT(data->header->inode),
-					 data, call_ops, how, 0);
-		if (ret == 0)
-			 ret = ret2;
-	}
-	return ret;
-}
-
 /**
  * nfs_pgio_error - Clean up from a pageio error
  * @desc: IO descriptor
@@ -677,14 +655,9 @@ static int nfs_do_multiple_pgios(struct list_head *head,
 static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
 			  struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_data *data;
-
 	set_bit(NFS_IOHDR_REDO, &hdr->flags);
-	while (!list_empty(&hdr->rpc_list)) {
-		data = list_first_entry(&hdr->rpc_list, struct nfs_pgio_data, list);
-		list_del(&data->list);
-		nfs_pgio_data_release(data);
-	}
+	nfs_pgio_data_release(hdr->data);
+	hdr->data = NULL;
 	desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 	return -ENOMEM;
 }
@@ -794,7 +767,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 
 	/* Set up the argument struct */
 	nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
-	list_add(&data->list, &hdr->rpc_list);
+	hdr->data = data;
 	desc->pg_rpc_callops = &nfs_pgio_common_ops;
 	return 0;
 }
@@ -816,9 +789,9 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
 	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_pgio(desc, hdr);
 	if (ret == 0)
-		ret = nfs_do_multiple_pgios(&hdr->rpc_list,
-					    desc->pg_rpc_callops,
-					    desc->pg_ioflags);
+		ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
+					hdr->data, desc->pg_rpc_callops,
+					desc->pg_ioflags, 0);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
 	return ret;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 354c53cd4095..6ef108b1d85f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1573,23 +1573,18 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
 }
 
 static void
-pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+pnfs_do_write(struct nfs_pageio_descriptor *desc,
+	      struct nfs_pgio_header *hdr, int how)
 {
-	struct nfs_pgio_data *data;
+	struct nfs_pgio_data *data = hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
+	enum pnfs_try_status trypnfs;
 
 	desc->pg_lseg = NULL;
-	while (!list_empty(head)) {
-		enum pnfs_try_status trypnfs;
-
-		data = list_first_entry(head, struct nfs_pgio_data, list);
-		list_del_init(&data->list);
-
-		trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
-		if (trypnfs == PNFS_NOT_ATTEMPTED)
-			pnfs_write_through_mds(desc, data);
-	}
+	trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+	if (trypnfs == PNFS_NOT_ATTEMPTED)
+		pnfs_write_through_mds(desc, data);
 	pnfs_put_lseg(lseg);
 }
 
@@ -1623,7 +1618,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 	} else
-		pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
+		pnfs_do_write(desc, hdr, desc->pg_ioflags);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
 	return ret;
@@ -1731,23 +1726,17 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
 }
 
 static void
-pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_data *data;
+	struct nfs_pgio_data *data = hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
+	enum pnfs_try_status trypnfs;
 
 	desc->pg_lseg = NULL;
-	while (!list_empty(head)) {
-		enum pnfs_try_status trypnfs;
-
-		data = list_first_entry(head, struct nfs_pgio_data, list);
-		list_del_init(&data->list);
-
-		trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
-		if (trypnfs == PNFS_NOT_ATTEMPTED)
-			pnfs_read_through_mds(desc, data);
-	}
+	trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+	if (trypnfs == PNFS_NOT_ATTEMPTED)
+		pnfs_read_through_mds(desc, data);
 	pnfs_put_lseg(lseg);
 }
 
@@ -1782,7 +1771,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 	} else
-		pnfs_do_multiple_reads(desc, &hdr->rpc_list);
+		pnfs_do_read(desc, hdr);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
 	return ret;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index adef7bd2d06d..ae636013fb1f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1256,11 +1256,13 @@ enum {
 	NFS_IOHDR_NEED_RESCHED,
 };
 
+struct nfs_pgio_data;
+
 struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
 	struct list_head	pages;
-	struct list_head	rpc_list;
+	struct nfs_pgio_data	*data;
 	atomic_t		refcnt;
 	struct nfs_page		*req;
 	struct nfs_writeverf	verf;		/* Used for writes */
@@ -1282,7 +1284,6 @@ struct nfs_pgio_header {
 
 struct nfs_pgio_data {
 	struct nfs_pgio_header	*header;
-	struct list_head	list;
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;
 	struct nfs_writeverf	verf;		/* Used for writes */
-- 
cgit v1.2.3-71-gd317


From 5002c58639d41b93e800c8a4b7eca49c40d57822 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:54 -0400
Subject: pnfs: support multiple verfs per direct req

Support direct requests that span multiple pnfs data servers by
comparing nfs_pgio_header->verf to a cached verf in pnfs_commit_bucket.
Continue to use dreq->verf if the MDS is used / non-pNFS.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c         | 102 +++++++++++++++++++++++++++++++++++++++++++++---
 fs/nfs/nfs4filelayout.c |   6 +++
 include/linux/nfs.h     |   5 ++-
 include/linux/nfs_xdr.h |   2 +
 4 files changed, 109 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 2c0e08f4cf71..4ad7bc388679 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
 	return atomic_dec_and_test(&dreq->io_count);
 }
 
+/*
+ * nfs_direct_select_verf - select the right verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
+ * @ds_idx - index of data server in data server list, only valid if ds_clp set
+ *
+ * returns the correct verifier to use given the role of the server
+ */
+static struct nfs_writeverf *
+nfs_direct_select_verf(struct nfs_direct_req *dreq,
+		       struct nfs_client *ds_clp,
+		       int ds_idx)
+{
+	struct nfs_writeverf *verfp = &dreq->verf;
+
+#ifdef CONFIG_NFS_V4_1
+	if (ds_clp) {
+		/* pNFS is in use, use the DS verf */
+		if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets)
+			verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf;
+		else
+			WARN_ON_ONCE(1);
+	}
+#endif
+	return verfp;
+}
+
+
+/*
+ * nfs_direct_set_hdr_verf - set the write/commit verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verfs
+ *
+ * Set the server's (MDS or DS) "seen" verifier
+ */
+static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
+				    struct nfs_pgio_header *hdr)
+{
+	struct nfs_writeverf *verfp;
+
+	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+				      hdr->data->ds_idx);
+	WARN_ON_ONCE(verfp->committed >= 0);
+	memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+	WARN_ON_ONCE(verfp->committed < 0);
+}
+
+/*
+ * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verf
+ *
+ * set the server's "seen" verf if not initialized.
+ * returns result of comparison between @hdr->verf and the "seen"
+ * verf of the server used by @hdr (DS or MDS)
+ */
+static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
+					  struct nfs_pgio_header *hdr)
+{
+	struct nfs_writeverf *verfp;
+
+	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+					 hdr->data->ds_idx);
+	if (verfp->committed < 0) {
+		nfs_direct_set_hdr_verf(dreq, hdr);
+		return 0;
+	}
+	return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+}
+
+#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
+/*
+ * nfs_direct_cmp_commit_data_verf - compare verifier for commit data
+ * @dreq - direct request possibly spanning multiple servers
+ * @data - commit data to validate against previously seen verf
+ *
+ * returns result of comparison between @data->verf and the verf of
+ * the server used by @data (DS or MDS)
+ */
+static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
+					   struct nfs_commit_data *data)
+{
+	struct nfs_writeverf *verfp;
+
+	verfp = nfs_direct_select_verf(dreq, data->ds_clp,
+					 data->ds_commit_index);
+	WARN_ON_ONCE(verfp->committed < 0);
+	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
+}
+#endif
+
 /**
  * nfs_direct_IO - NFS address space operation for direct I/O
  * @rw: direction (read or write)
@@ -168,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 	kref_get(&dreq->kref);
 	init_completion(&dreq->completion);
 	INIT_LIST_HEAD(&dreq->mds_cinfo.list);
+	dreq->verf.committed = NFS_INVALID_STABLE_HOW;	/* not set yet */
 	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
 	spin_lock_init(&dreq->lock);
 
@@ -602,7 +694,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 		dprintk("NFS: %5u commit failed with error %d.\n",
 			data->task.tk_pid, status);
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-	} else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+	} else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
 		dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 	}
@@ -811,13 +903,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
 				bit = NFS_IOHDR_NEED_RESCHED;
 			else if (dreq->flags == 0) {
-				memcpy(&dreq->verf, &hdr->verf,
-				       sizeof(dreq->verf));
+				nfs_direct_set_hdr_verf(dreq, hdr);
 				bit = NFS_IOHDR_NEED_COMMIT;
 				dreq->flags = NFS_ODIRECT_DO_COMMIT;
 			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
-				if (memcmp(&dreq->verf, &hdr->verf, sizeof(dreq->verf))) {
-					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+				if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
+					dreq->flags =
+						NFS_ODIRECT_RESCHED_WRITES;
 					bit = NFS_IOHDR_NEED_RESCHED;
 				} else
 					bit = NFS_IOHDR_NEED_COMMIT;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 7a665e0f35b7..0ebc521ea6fc 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -560,6 +560,7 @@ filelayout_read_pagelist(struct nfs_pgio_data *data)
 	/* No multipath support. Use first DS */
 	atomic_inc(&ds->ds_clp->cl_count);
 	data->ds_clp = ds->ds_clp;
+	data->ds_idx = idx;
 	fh = nfs4_fl_select_ds_fh(lseg, j);
 	if (fh)
 		data->args.fh = fh;
@@ -603,6 +604,7 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
 	data->pgio_done_cb = filelayout_write_done_cb;
 	atomic_inc(&ds->ds_clp->cl_count);
 	data->ds_clp = ds->ds_clp;
+	data->ds_idx = idx;
 	fh = nfs4_fl_select_ds_fh(lseg, j);
 	if (fh)
 		data->args.fh = fh;
@@ -875,6 +877,8 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
 	for (i = 0; i < size; i++) {
 		INIT_LIST_HEAD(&buckets[i].written);
 		INIT_LIST_HEAD(&buckets[i].committing);
+		/* mark direct verifier as unset */
+		buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
 	}
 
 	spin_lock(cinfo->lock);
@@ -885,6 +889,8 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
 			    &buckets[i].written);
 		list_splice(&cinfo->ds->buckets[i].committing,
 			    &buckets[i].committing);
+		buckets[i].direct_verf.committed =
+			cinfo->ds->buckets[i].direct_verf.committed;
 		buckets[i].wlseg = cinfo->ds->buckets[i].wlseg;
 		buckets[i].clseg = cinfo->ds->buckets[i].clseg;
 	}
diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index 3e794c12e90a..610af5155ef2 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h
@@ -46,6 +46,9 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc
 enum nfs3_stable_how {
 	NFS_UNSTABLE = 0,
 	NFS_DATA_SYNC = 1,
-	NFS_FILE_SYNC = 2
+	NFS_FILE_SYNC = 2,
+
+	/* used by direct.c to mark verf as invalid */
+	NFS_INVALID_STABLE_HOW = -1
 };
 #endif /* _LINUX_NFS_H */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ae636013fb1f..9a1396e70310 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1112,6 +1112,7 @@ struct pnfs_commit_bucket {
 	struct list_head committing;
 	struct pnfs_layout_segment *wlseg;
 	struct pnfs_layout_segment *clseg;
+	struct nfs_writeverf direct_verf;
 };
 
 struct pnfs_ds_commit_info {
@@ -1294,6 +1295,7 @@ struct nfs_pgio_data {
 	__u64			mds_offset;	/* Filelayout dense stripe */
 	struct nfs_page_array	pages;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
+	int			ds_idx;		/* ds index if ds_clp is set */
 };
 
 struct nfs_rw_header {
-- 
cgit v1.2.3-71-gd317


From 86f6cf41272de9d6ffa05ab46028b15d160a6f3e Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Sat, 24 May 2014 09:34:26 +0200
Subject: net: of_mdio: add of_mdiobus_link_phydev()

Add a function to walk the list of subnodes of a mdio bus and look for
a node that matches the phy's address with its 'reg' property. If found,
set the of_node pointer for the phy. This allows auto-probed pyh
devices to be augmented by information passed in via DT.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c |  6 ++++++
 drivers/of/of_mdio.c       | 33 +++++++++++++++++++++++++++++++++
 include/linux/of_mdio.h    |  8 ++++++++
 3 files changed, 47 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index a6284964b711..2e58aa54484c 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -300,6 +300,12 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr)
 	if (IS_ERR(phydev) || phydev == NULL)
 		return phydev;
 
+	/*
+	 * For DT, see if the auto-probed phy has a correspoding child
+	 * in the bus node, and set the of_node pointer in this case.
+	 */
+	of_mdiobus_link_phydev(bus, phydev);
+
 	err = phy_device_register(phydev);
 	if (err) {
 		phy_device_free(phydev);
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 731d3d9052d7..7c8c142e4eb8 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -183,6 +183,39 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 }
 EXPORT_SYMBOL(of_mdiobus_register);
 
+/**
+ * of_mdiobus_link_phydev - Find a device node for a phy
+ * @mdio: pointer to mii_bus structure
+ * @phydev: phydev for which the of_node pointer should be set
+ *
+ * Walk the list of subnodes of a mdio bus and look for a node that matches the
+ * phy's address with its 'reg' property. If found, set the of_node pointer for
+ * the phy. This allows auto-probed pyh devices to be supplied with information
+ * passed in via DT.
+ */
+void of_mdiobus_link_phydev(struct mii_bus *mdio,
+			    struct phy_device *phydev)
+{
+	struct device *dev = &phydev->dev;
+	struct device_node *child;
+
+	if (dev->of_node || !mdio->dev.of_node)
+		return;
+
+	for_each_available_child_of_node(mdio->dev.of_node, child) {
+		int addr;
+
+		addr = of_mdio_parse_addr(&mdio->dev, child);
+		if (addr < 0)
+			continue;
+
+		if (addr == phydev->addr) {
+			dev->of_node = child;
+			return;
+		}
+	}
+}
+
 /* Helper function for of_phy_find_device */
 static int of_phy_match(struct device *dev, void *phy_np)
 {
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index d449018d0726..a70c9493d55a 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -25,6 +25,9 @@ struct phy_device *of_phy_attach(struct net_device *dev,
 
 extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
 
+extern void of_mdiobus_link_phydev(struct mii_bus *mdio,
+				   struct phy_device *phydev);
+
 #else /* CONFIG_OF */
 static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 {
@@ -60,6 +63,11 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np)
 {
 	return NULL;
 }
+
+static inline void of_mdiobus_link_phydev(struct mii_bus *mdio,
+					  struct phy_device *phydev)
+{
+}
 #endif /* CONFIG_OF */
 
 #if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY)
-- 
cgit v1.2.3-71-gd317


From 2abdd3137e78adca69b0722307aa2ef89f2cf3b6 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 14 May 2014 16:58:19 +0300
Subject: drm: store connector name in connector struct (v2)

This makes drm_get_connector_name() thread safe.

[airlied: fix to build.]

Reference: http://lkml.kernel.org/r/645ee6e22cad47d38a2b35c21c8d5fe3@DC1-MBX-01.ptsecurity.ru
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 36 ++++++++++++++++++++----------------
 include/drm/drm_crtc.h     |  2 ++
 2 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 34f0bf18d80d..293eb4c24f35 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -277,21 +277,11 @@ EXPORT_SYMBOL(drm_get_encoder_name);
 
 /**
  * drm_get_connector_name - return a string for connector
- * @connector: connector to compute name of
- *
- * Note that the buffer used by this function is globally shared and owned by
- * the function itself.
- *
- * FIXME: This isn't really multithreading safe.
+ * @connector: the connector to get name for
  */
 const char *drm_get_connector_name(const struct drm_connector *connector)
 {
-	static char buf[32];
-
-	snprintf(buf, 32, "%s-%d",
-		 drm_connector_enum_list[connector->connector_type].name,
-		 connector->connector_type_id);
-	return buf;
+	return connector->name;
 }
 EXPORT_SYMBOL(drm_get_connector_name);
 
@@ -824,7 +814,7 @@ int drm_connector_init(struct drm_device *dev,
 
 	ret = drm_mode_object_get(dev, &connector->base, DRM_MODE_OBJECT_CONNECTOR);
 	if (ret)
-		goto out;
+		goto out_unlock;
 
 	connector->base.properties = &connector->properties;
 	connector->dev = dev;
@@ -834,9 +824,17 @@ int drm_connector_init(struct drm_device *dev,
 		ida_simple_get(connector_ida, 1, 0, GFP_KERNEL);
 	if (connector->connector_type_id < 0) {
 		ret = connector->connector_type_id;
-		drm_mode_object_put(dev, &connector->base);
-		goto out;
+		goto out_put;
 	}
+	connector->name =
+		kasprintf(GFP_KERNEL, "%s-%d",
+			  drm_connector_enum_list[connector_type].name,
+			  connector->connector_type_id);
+	if (!connector->name) {
+		ret = -ENOMEM;
+		goto out_put;
+	}
+
 	INIT_LIST_HEAD(&connector->probed_modes);
 	INIT_LIST_HEAD(&connector->modes);
 	connector->edid_blob_ptr = NULL;
@@ -853,7 +851,11 @@ int drm_connector_init(struct drm_device *dev,
 	drm_object_attach_property(&connector->base,
 				      dev->mode_config.dpms_property, 0);
 
- out:
+out_put:
+	if (ret)
+		drm_mode_object_put(dev, &connector->base);
+
+out_unlock:
 	drm_modeset_unlock_all(dev);
 
 	return ret;
@@ -881,6 +883,8 @@ void drm_connector_cleanup(struct drm_connector *connector)
 		   connector->connector_type_id);
 
 	drm_mode_object_put(dev, &connector->base);
+	kfree(connector->name);
+	connector->name = NULL;
 	list_del(&connector->head);
 	dev->mode_config.num_connector--;
 }
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 698d54e27f39..b5c97d5b2654 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -444,6 +444,7 @@ struct drm_encoder {
  * @attr: sysfs attributes
  * @head: list management
  * @base: base KMS object
+ * @name: connector name
  * @connector_type: one of the %DRM_MODE_CONNECTOR_<foo> types from drm_mode.h
  * @connector_type_id: index into connector type enum
  * @interlace_allowed: can this connector handle interlaced modes?
@@ -482,6 +483,7 @@ struct drm_connector {
 
 	struct drm_mode_object base;
 
+	char *name;
 	int connector_type;
 	int connector_type_id;
 	bool interlace_allowed;
-- 
cgit v1.2.3-71-gd317


From e5748946e907f767e7bb73e5ed31584981f0ff65 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 14 May 2014 16:58:20 +0300
Subject: drm: store encoder name in encoder struct

This makes drm_get_encoder_name() thread safe.

Reference: http://lkml.kernel.org/r/645ee6e22cad47d38a2b35c21c8d5fe3@DC1-MBX-01\
.ptsecurity.ru
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 31 +++++++++++++++++--------------
 include/drm/drm_crtc.h     |  2 ++
 2 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 293eb4c24f35..37a3e0791ddf 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -257,21 +257,11 @@ void drm_connector_ida_destroy(void)
 
 /**
  * drm_get_encoder_name - return a string for encoder
- * @encoder: encoder to compute name of
- *
- * Note that the buffer used by this function is globally shared and owned by
- * the function itself.
- *
- * FIXME: This isn't really multithreading safe.
+ * @encoder: the encoder to get name for
  */
 const char *drm_get_encoder_name(const struct drm_encoder *encoder)
 {
-	static char buf[32];
-
-	snprintf(buf, 32, "%s-%d",
-		 drm_encoder_enum_list[encoder->encoder_type].name,
-		 encoder->base.id);
-	return buf;
+	return encoder->name;
 }
 EXPORT_SYMBOL(drm_get_encoder_name);
 
@@ -986,16 +976,27 @@ int drm_encoder_init(struct drm_device *dev,
 
 	ret = drm_mode_object_get(dev, &encoder->base, DRM_MODE_OBJECT_ENCODER);
 	if (ret)
-		goto out;
+		goto out_unlock;
 
 	encoder->dev = dev;
 	encoder->encoder_type = encoder_type;
 	encoder->funcs = funcs;
+	encoder->name = kasprintf(GFP_KERNEL, "%s-%d",
+				  drm_encoder_enum_list[encoder_type].name,
+				  encoder->base.id);
+	if (!encoder->name) {
+		ret = -ENOMEM;
+		goto out_put;
+	}
 
 	list_add_tail(&encoder->head, &dev->mode_config.encoder_list);
 	dev->mode_config.num_encoder++;
 
- out:
+out_put:
+	if (ret)
+		drm_mode_object_put(dev, &encoder->base);
+
+out_unlock:
 	drm_modeset_unlock_all(dev);
 
 	return ret;
@@ -1013,6 +1014,8 @@ void drm_encoder_cleanup(struct drm_encoder *encoder)
 	struct drm_device *dev = encoder->dev;
 	drm_modeset_lock_all(dev);
 	drm_mode_object_put(dev, &encoder->base);
+	kfree(encoder->name);
+	encoder->name = NULL;
 	list_del(&encoder->head);
 	dev->mode_config.num_encoder--;
 	drm_modeset_unlock_all(dev);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index b5c97d5b2654..5c1c31cc11cd 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -400,6 +400,7 @@ struct drm_encoder_funcs {
  * @dev: parent DRM device
  * @head: list management
  * @base: base KMS object
+ * @name: encoder name
  * @encoder_type: one of the %DRM_MODE_ENCODER_<foo> types in drm_mode.h
  * @possible_crtcs: bitmask of potential CRTC bindings
  * @possible_clones: bitmask of potential sibling encoders for cloning
@@ -416,6 +417,7 @@ struct drm_encoder {
 	struct list_head head;
 
 	struct drm_mode_object base;
+	char *name;
 	int encoder_type;
 	uint32_t possible_crtcs;
 	uint32_t possible_clones;
-- 
cgit v1.2.3-71-gd317


From 182407a6ed5333fc37dd980a8de91a8f826a94f6 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 2 May 2014 11:09:54 +1000
Subject: drm: add DP MST encoder type

This adds an encoder type for DP MST encoders.

Reviewed-by: Todd Previte <tprevite@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c  | 1 +
 include/uapi/drm/drm_mode.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 37a3e0791ddf..edee61bccd14 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -227,6 +227,7 @@ static const struct drm_prop_enum_list drm_encoder_enum_list[] =
 	{ DRM_MODE_ENCODER_TVDAC, "TV" },
 	{ DRM_MODE_ENCODER_VIRTUAL, "Virtual" },
 	{ DRM_MODE_ENCODER_DSI, "DSI" },
+	{ DRM_MODE_ENCODER_DPMST, "DP MST" },
 };
 
 static const struct drm_prop_enum_list drm_subpixel_enum_list[] =
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index f104c2603ebe..719add464f95 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -181,6 +181,7 @@ struct drm_mode_get_plane_res {
 #define DRM_MODE_ENCODER_TVDAC	4
 #define DRM_MODE_ENCODER_VIRTUAL 5
 #define DRM_MODE_ENCODER_DSI	6
+#define DRM_MODE_ENCODER_DPMST	7
 
 struct drm_mode_get_encoder {
 	__u32 encoder_id;
-- 
cgit v1.2.3-71-gd317


From 97982f5a91e91dab26dd0246083b9adf3ba8b2e3 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 29 May 2014 16:31:02 +0300
Subject: IB/mlx4: Preparation for VFs to issue/receive SMI (QP0)
 requests/responses

Currently, VFs in SRIOV VFs are denied QP0 access.  The main reason
for this decision is security, since Subnet Management Datagrams
(SMPs) are not restricted by network partitioning and may affect the
physical network topology.  Moreover, even the SM may be denied access
from portions of the network by setting management keys unknown to the
SM.

However, it is desirable to grant SMI access to certain privileged
VFs, so that certain network management activities may be conducted
within virtual machines instead of the hypervisor.

This commit does the following:

1. Create QP0 tunnel QPs for all VFs.

2. Discard SMI mads sent-from/received-for non-privileged VFs in the
   hypervisor MAD multiplex/demultiplex logic.  SMI mads from/for
   privileged VFs are allowed to pass.

3. MAD_IFC wrapper changes/fixes.  For non-privileged VFs, only
   host-view MAD_IFC commands are allowed, and only for SMI LID-Routed
   GET mads.  For privileged VFs, there are no restrictions.

This commit does not allow privileged VFs as yet.  To determine if a VF
is privileged, it calls function mlx4_vf_smi_enabled().  This function
returns 0 unconditionally for now.

The next two commits allow defining and activating privileged VFs.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/mad.c         | 40 ++++++++++++++++-----------
 drivers/infiniband/hw/mlx4/qp.c          | 16 +++++------
 drivers/net/ethernet/mellanox/mlx4/cmd.c | 47 ++++++++++++++++++++++----------
 include/linux/mlx4/device.h              |  1 +
 4 files changed, 66 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index fd36ec672632..287ad0564acd 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -478,10 +478,6 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 	if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
 		return -EAGAIN;
 
-	/* QP0 forwarding only for Dom0 */
-	if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
-		return -EINVAL;
-
 	if (!dest_qpt)
 		tun_qp = &tun_ctx->qp[0];
 	else
@@ -667,6 +663,21 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
 	}
 	/* Class-specific handling */
 	switch (mad->mad_hdr.mgmt_class) {
+	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+		/* 255 indicates the dom0 */
+		if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) {
+			if (!mlx4_vf_smi_enabled(dev->dev, slave, port))
+				return -EPERM;
+			/* for a VF. drop unsolicited MADs */
+			if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) {
+				mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n",
+					     slave, mad->mad_hdr.mgmt_class,
+					     mad->mad_hdr.method);
+				return -EINVAL;
+			}
+		}
+		break;
 	case IB_MGMT_CLASS_SUBN_ADM:
 		if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
 					     (struct ib_sa_mad *) mad))
@@ -1165,10 +1176,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 	if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
 		return -EAGAIN;
 
-	/* QP0 forwarding only for Dom0 */
-	if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
-		return -EINVAL;
-
 	if (dest_qpt == IB_QPT_SMI) {
 		src_qpnum = 0;
 		sqp = &sqp_ctx->qp[0];
@@ -1285,11 +1292,6 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
 			     "belongs to another slave\n", wc->src_qp);
 		return;
 	}
-	if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
-		mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
-			     "non-master trying to send QP0 packets\n", wc->src_qp);
-		return;
-	}
 
 	/* Map transaction ID */
 	ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
@@ -1317,6 +1319,12 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
 
 	/* Class-specific handling */
 	switch (tunnel->mad.mad_hdr.mgmt_class) {
+	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+		if (slave != mlx4_master_func_num(dev->dev) &&
+		    !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port))
+			return;
+		break;
 	case IB_MGMT_CLASS_SUBN_ADM:
 		if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
 			      (struct ib_sa_mad *) &tunnel->mad))
@@ -1749,9 +1757,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
 		return -EEXIST;
 
 	ctx->state = DEMUX_PV_STATE_STARTING;
-	/* have QP0 only on port owner, and only if link layer is IB */
-	if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
-	    rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
+	/* have QP0 only if link layer is IB */
+	if (rdma_port_get_link_layer(ibdev, ctx->port) ==
+	    IB_LINK_LAYER_INFINIBAND)
 		ctx->has_smi = 1;
 
 	if (ctx->has_smi) {
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 41308af4163c..2e8c58806e2f 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2370,7 +2370,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
 
 static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
 				    struct mlx4_wqe_datagram_seg *dseg,
-				    struct ib_send_wr *wr, enum ib_qp_type qpt)
+				    struct ib_send_wr *wr,
+				    enum mlx4_ib_qp_type qpt)
 {
 	union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
 	struct mlx4_av sqp_av = {0};
@@ -2383,8 +2384,10 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
 			cpu_to_be32(0xf0000000);
 
 	memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
-	/* This function used only for sending on QP1 proxies */
-	dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+	if (qpt == MLX4_IB_QPT_PROXY_GSI)
+		dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+	else
+		dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]);
 	/* Use QKEY from the QP context, which is set by master */
 	dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
 }
@@ -2700,16 +2703,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			size += seglen / 16;
 			break;
 		case MLX4_IB_QPT_PROXY_SMI:
-			/* don't allow QP0 sends on guests */
-			err = -ENOSYS;
-			*bad_wr = wr;
-			goto out;
 		case MLX4_IB_QPT_PROXY_GSI:
 			/* If we are tunneling special qps, this is a UD qp.
 			 * In this case we first add a UD segment targeting
 			 * the tunnel qp, and then add a header with address
 			 * information */
-			set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
+			set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr,
+						qp->mlx4_ib_qp_type);
 			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
 			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 			build_tunnel_header(wr, wqe, &seglen);
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 78099eab7673..b0e48d426fce 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -705,20 +705,28 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
 	struct ib_smp *smp = inbox->buf;
 	u32 index;
 	u8 port;
+	u8 opcode_modifier;
 	u16 *table;
 	int err;
 	int vidx, pidx;
+	int network_view;
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct ib_smp *outsmp = outbox->buf;
 	__be16 *outtab = (__be16 *)(outsmp->data);
 	__be32 slave_cap_mask;
 	__be64 slave_node_guid;
+
 	port = vhcr->in_modifier;
 
+	/* network-view bit is for driver use only, and should not be passed to FW */
+	opcode_modifier = vhcr->op_modifier & ~0x8; /* clear netw view bit */
+	network_view = !!(vhcr->op_modifier & 0x8);
+
 	if (smp->base_version == 1 &&
 	    smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
 	    smp->class_version == 1) {
-		if (smp->method	== IB_MGMT_METHOD_GET) {
+		/* host view is paravirtualized */
+		if (!network_view && smp->method == IB_MGMT_METHOD_GET) {
 			if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) {
 				index = be32_to_cpu(smp->attr_mod);
 				if (port < 1 || port > dev->caps.num_ports)
@@ -743,7 +751,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
 				/*get the slave specific caps:*/
 				/*do the command */
 				err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-					    vhcr->in_modifier, vhcr->op_modifier,
+					    vhcr->in_modifier, opcode_modifier,
 					    vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 				/* modify the response for slaves */
 				if (!err && slave != mlx4_master_func_num(dev)) {
@@ -760,7 +768,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
 				smp->attr_mod = cpu_to_be32(slave / 8);
 				/* execute cmd */
 				err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-					     vhcr->in_modifier, vhcr->op_modifier,
+					     vhcr->in_modifier, opcode_modifier,
 					     vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 				if (!err) {
 					/* if needed, move slave gid to index 0 */
@@ -774,7 +782,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
 			}
 			if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
 				err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-					     vhcr->in_modifier, vhcr->op_modifier,
+					     vhcr->in_modifier, opcode_modifier,
 					     vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 				if (!err) {
 					slave_node_guid =  mlx4_get_slave_node_guid(dev, slave);
@@ -784,19 +792,24 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
 			}
 		}
 	}
+
+	/* Non-privileged VFs are only allowed "host" view LID-routed 'Get' MADs.
+	 * These are the MADs used by ib verbs (such as ib_query_gids).
+	 */
 	if (slave != mlx4_master_func_num(dev) &&
-	    ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) ||
-	     (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
-	      smp->method == IB_MGMT_METHOD_SET))) {
-		mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, "
-			 "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n",
-			 slave, smp->method, smp->mgmt_class,
-			 be16_to_cpu(smp->attr_id));
-		return -EPERM;
+	    !mlx4_vf_smi_enabled(dev, slave, port)) {
+		if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+		      smp->method == IB_MGMT_METHOD_GET) || network_view) {
+			mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n",
+				 slave, smp->method, smp->mgmt_class,
+				 network_view ? "Network" : "Host",
+				 be16_to_cpu(smp->attr_id));
+			return -EPERM;
+		}
 	}
-	/*default:*/
+
 	return mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-				    vhcr->in_modifier, vhcr->op_modifier,
+				    vhcr->in_modifier, opcode_modifier,
 				    vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 }
 
@@ -2537,3 +2550,9 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state);
+
+int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ba87bd21295a..83612faa819c 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1234,4 +1234,5 @@ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port);
 int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
 
 int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
+int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
 #endif /* MLX4_DEVICE_H */
-- 
cgit v1.2.3-71-gd317


From 99ec41d0a48cb6d14af25765f9449762f9d101f6 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 29 May 2014 16:31:03 +0300
Subject: mlx4: Add infrastructure for selecting VFs to enable QP0 via MLX
 proxy QPs

This commit adds the infrastructure for enabling selected VFs to
operate SMI (QP0) MADs without restriction.

Additionally, for these enabled VFs, their QP0 proxy and tunnel QPs
are MLX QPs.  As such, they operate over VL15.  Therefore, they are
not affected by "credit" problems or changes in the VLArb table (which
may shut down VL0).

Non-enabled VFs may only create UD proxy QP0 qps (which are forced by
the hypervisor to send packets using the q-key it assigns and places
in the qp-context).  Thus, non-enabled VFs will not pose a security
risk.  The hypervisor discards any privileged MADs it receives from
these non-enabled VFs.

By default, all VFs are NOT enabled, and must explicitly be enabled
by the administrator.

The sysfs interface which operates the VF enablement infrastructure
is provided in the next commit.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/qp.c                    | 52 +++++++++++++++++-----
 drivers/net/ethernet/mellanox/mlx4/cmd.c           | 13 +++++-
 drivers/net/ethernet/mellanox/mlx4/fw.c            | 44 ++++++++++++------
 drivers/net/ethernet/mellanox/mlx4/fw.h            |  1 +
 drivers/net/ethernet/mellanox/mlx4/main.c          | 16 +++++--
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  8 ++++
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 26 ++++++++---
 include/linux/mlx4/device.h                        |  1 +
 8 files changed, 126 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 2e8c58806e2f..b25600997cb6 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -608,6 +608,16 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
 	return !attr->srq;
 }
 
+static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
+{
+	int i;
+	for (i = 0; i < dev->caps.num_ports; i++) {
+		if (qpn == dev->caps.qp0_proxy[i])
+			return !!dev->caps.qp0_qkey[i];
+	}
+	return 0;
+}
+
 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			    struct ib_qp_init_attr *init_attr,
 			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
@@ -625,10 +635,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		     !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
 			if (init_attr->qp_type == IB_QPT_GSI)
 				qp_type = MLX4_IB_QPT_PROXY_GSI;
-			else if (mlx4_is_master(dev->dev))
-				qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
-			else
-				qp_type = MLX4_IB_QPT_PROXY_SMI;
+			else {
+				if (mlx4_is_master(dev->dev) ||
+				    qp0_enabled_vf(dev->dev, sqpn))
+					qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
+				else
+					qp_type = MLX4_IB_QPT_PROXY_SMI;
+			}
 		}
 		qpn = sqpn;
 		/* add extra sg entry for tunneling */
@@ -643,7 +656,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			return -EINVAL;
 		if (tnl_init->proxy_qp_type == IB_QPT_GSI)
 			qp_type = MLX4_IB_QPT_TUN_GSI;
-		else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
+		else if (tnl_init->slave == mlx4_master_func_num(dev->dev) ||
+			 mlx4_vf_smi_enabled(dev->dev, tnl_init->slave,
+					     tnl_init->port))
 			qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
 		else
 			qp_type = MLX4_IB_QPT_TUN_SMI;
@@ -1930,6 +1945,19 @@ out:
 	return err;
 }
 
+static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
+{
+	int i;
+	for (i = 0; i < dev->caps.num_ports; i++) {
+		if (qpn == dev->caps.qp0_proxy[i] ||
+		    qpn == dev->caps.qp0_tunnel[i]) {
+			*qkey = dev->caps.qp0_qkey[i];
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
 static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
 				  struct ib_send_wr *wr,
 				  void *wqe, unsigned *mlx_seg_len)
@@ -1987,8 +2015,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
 			cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
 
 	sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
-	if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
-		return -EINVAL;
+	if (mlx4_is_master(mdev->dev)) {
+		if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+			return -EINVAL;
+	} else {
+		if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+			return -EINVAL;
+	}
 	sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
 	sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
 
@@ -2682,11 +2715,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			break;
 
 		case MLX4_IB_QPT_PROXY_SMI_OWNER:
-			if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
-				err = -ENOSYS;
-				*bad_wr = wr;
-				goto out;
-			}
 			err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
 			if (unlikely(err)) {
 				*bad_wr = wr;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index b0e48d426fce..26c3ebaa49d1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1666,6 +1666,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave)
 	for (port = min_port; port <= max_port; port++) {
 		if (!test_bit(port - 1, actv_ports.ports))
 			continue;
+		priv->mfunc.master.vf_oper[slave].smi_enabled[port] =
+			priv->mfunc.master.vf_admin[slave].enable_smi[port];
 		vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 		vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
 		vp_oper->state = *vp_admin;
@@ -1717,6 +1719,8 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave
 	for (port = min_port; port <= max_port; port++) {
 		if (!test_bit(port - 1, actv_ports.ports))
 			continue;
+		priv->mfunc.master.vf_oper[slave].smi_enabled[port] =
+			MLX4_VF_SMI_DISABLED;
 		vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 		if (NO_INDX != vp_oper->vlan_idx) {
 			__mlx4_unregister_vlan(&priv->dev,
@@ -2553,6 +2557,13 @@ EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state);
 
 int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port)
 {
-	return 0;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	if (slave < 1 || slave >= dev->num_slaves ||
+	    port < 1 || port > MLX4_MAX_PORTS)
+		return 0;
+
+	return priv->mfunc.master.vf_oper[slave].smi_enabled[port] ==
+		MLX4_VF_SMI_ENABLED;
 }
 EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index ef242e19766f..01e6dd61ee3c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -178,8 +178,8 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 				struct mlx4_cmd_info *cmd)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
-	u8	field;
-	u32	size;
+	u8	field, port;
+	u32	size, proxy_qp, qkey;
 	int	err = 0;
 
 #define QUERY_FUNC_CAP_FLAGS_OFFSET		0x0
@@ -209,6 +209,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 
 /* when opcode modifier = 1 */
 #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET		0x3
+#define QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET	0x4
 #define QUERY_FUNC_CAP_FLAGS0_OFFSET		0x8
 #define QUERY_FUNC_CAP_FLAGS1_OFFSET		0xc
 
@@ -221,6 +222,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 #define QUERY_FUNC_CAP_FLAGS1_FORCE_MAC		0x40
 #define QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN	0x80
 #define QUERY_FUNC_CAP_FLAGS1_NIC_INFO			0x10
+#define QUERY_FUNC_CAP_VF_ENABLE_QP0		0x08
 
 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80
 
@@ -234,28 +236,35 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 			return -EINVAL;
 
 		vhcr->in_modifier = converted_port;
-		/* Set nic_info bit to mark new fields support */
-		field  = QUERY_FUNC_CAP_FLAGS1_NIC_INFO;
-		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET);
-
 		/* phys-port = logical-port */
 		field = vhcr->in_modifier -
 			find_first_bit(actv_ports.ports, dev->caps.num_ports);
 		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
 
-		field = vhcr->in_modifier;
+		port = vhcr->in_modifier;
+		proxy_qp = dev->phys_caps.base_proxy_sqpn + 8 * slave + port - 1;
+
+		/* Set nic_info bit to mark new fields support */
+		field  = QUERY_FUNC_CAP_FLAGS1_NIC_INFO;
+
+		if (mlx4_vf_smi_enabled(dev, slave, port) &&
+		    !mlx4_get_parav_qkey(dev, proxy_qp, &qkey)) {
+			field |= QUERY_FUNC_CAP_VF_ENABLE_QP0;
+			MLX4_PUT(outbox->buf, qkey,
+				 QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET);
+		}
+		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET);
+
 		/* size is now the QP number */
-		size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1;
+		size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + port - 1;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL);
 
 		size += 2;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL);
 
-		size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1;
-		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY);
-
-		size += 2;
-		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY);
+		MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP0_PROXY);
+		proxy_qp += 2;
+		MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP1_PROXY);
 
 		MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier],
 			 QUERY_FUNC_CAP_PHYS_PORT_ID);
@@ -326,7 +335,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
 	struct mlx4_cmd_mailbox *mailbox;
 	u32			*outbox;
 	u8			field, op_modifier;
-	u32			size;
+	u32			size, qkey;
 	int			err = 0, quotas = 0;
 
 	op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
@@ -442,6 +451,13 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
 		goto out;
 	}
 
+	if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) {
+		MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET);
+		func_cap->qp0_qkey = qkey;
+	} else {
+		func_cap->qp0_qkey = 0;
+	}
+
 	MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL);
 	func_cap->qp0_tunnel_qpn = size & 0xFFFFFF;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 6811ee00ba7c..1fce03ebe5c4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -134,6 +134,7 @@ struct mlx4_func_cap {
 	int	max_eq;
 	int	reserved_eq;
 	int	mcg_quota;
+	u32	qp0_qkey;
 	u32	qp0_tunnel_qpn;
 	u32	qp0_proxy_qpn;
 	u32	qp1_tunnel_qpn;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 12a7ee2e6098..908326876ab5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -666,13 +666,15 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 		return -ENODEV;
 	}
 
+	dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
 	dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 	dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 	dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 	dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 
 	if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
-	    !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
+	    !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy ||
+	    !dev->caps.qp0_qkey) {
 		err = -ENOMEM;
 		goto err_mem;
 	}
@@ -684,6 +686,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 				 " port %d, aborting (%d).\n", i, err);
 			goto err_mem;
 		}
+		dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey;
 		dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
 		dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
 		dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
@@ -729,12 +732,16 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 	return 0;
 
 err_mem:
+	kfree(dev->caps.qp0_qkey);
 	kfree(dev->caps.qp0_tunnel);
 	kfree(dev->caps.qp0_proxy);
 	kfree(dev->caps.qp1_tunnel);
 	kfree(dev->caps.qp1_proxy);
-	dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
-		dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
+	dev->caps.qp0_qkey = NULL;
+	dev->caps.qp0_tunnel = NULL;
+	dev->caps.qp0_proxy = NULL;
+	dev->caps.qp1_tunnel = NULL;
+	dev->caps.qp1_proxy = NULL;
 
 	return err;
 }
@@ -1697,6 +1704,7 @@ unmap_bf:
 	unmap_bf_area(dev);
 
 	if (mlx4_is_slave(dev)) {
+		kfree(dev->caps.qp0_qkey);
 		kfree(dev->caps.qp0_tunnel);
 		kfree(dev->caps.qp0_proxy);
 		kfree(dev->caps.qp1_tunnel);
@@ -2573,6 +2581,7 @@ err_master_mfunc:
 		mlx4_multi_func_cleanup(dev);
 
 	if (mlx4_is_slave(dev)) {
+		kfree(dev->caps.qp0_qkey);
 		kfree(dev->caps.qp0_tunnel);
 		kfree(dev->caps.qp0_proxy);
 		kfree(dev->caps.qp1_tunnel);
@@ -2702,6 +2711,7 @@ static void __mlx4_remove_one(struct pci_dev *pdev)
 	if (!mlx4_is_slave(dev))
 		mlx4_free_ownership(dev);
 
+	kfree(dev->caps.qp0_qkey);
 	kfree(dev->caps.qp0_tunnel);
 	kfree(dev->caps.qp0_proxy);
 	kfree(dev->caps.qp1_tunnel);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index f9c465101963..0efd1738fcb3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -133,6 +133,11 @@ enum {
 	MLX4_COMM_CMD_FLR = 254
 };
 
+enum {
+	MLX4_VF_SMI_DISABLED,
+	MLX4_VF_SMI_ENABLED
+};
+
 /*The flag indicates that the slave should delay the RESET cmd*/
 #define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb
 /*indicates how many retries will be done if we are in the middle of FLR*/
@@ -488,6 +493,7 @@ struct mlx4_vport_state {
 
 struct mlx4_vf_admin_state {
 	struct mlx4_vport_state vport[MLX4_MAX_PORTS + 1];
+	u8 enable_smi[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_vport_oper_state {
@@ -495,8 +501,10 @@ struct mlx4_vport_oper_state {
 	int mac_idx;
 	int vlan_idx;
 };
+
 struct mlx4_vf_oper_state {
 	struct mlx4_vport_oper_state vport[MLX4_MAX_PORTS + 1];
+	u8 smi_enabled[MLX4_MAX_PORTS + 1];
 };
 
 struct slave_list {
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 1c3fdd4a1f7d..ad98162a8d79 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2827,10 +2827,12 @@ static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start,
 }
 
 static int verify_qp_parameters(struct mlx4_dev *dev,
+				struct mlx4_vhcr *vhcr,
 				struct mlx4_cmd_mailbox *inbox,
 				enum qp_transition transition, u8 slave)
 {
 	u32			qp_type;
+	u32			qpn;
 	struct mlx4_qp_context	*qp_ctx;
 	enum mlx4_qp_optpar	optpar;
 	int port;
@@ -2870,6 +2872,20 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
 						return -EINVAL;
 				}
 			break;
+		case MLX4_QP_ST_MLX:
+			qpn = vhcr->in_modifier & 0x7fffff;
+			port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+			if (transition == QP_TRANS_INIT2RTR &&
+			    slave != mlx4_master_func_num(dev) &&
+			    mlx4_is_qp_reserved(dev, qpn) &&
+			    !mlx4_vf_smi_enabled(dev, slave, port)) {
+				/* only enabled VFs may create MLX proxy QPs */
+				mlx4_err(dev, "%s: unprivileged slave %d attempting to create an MLX proxy special QP on port %d\n",
+					 __func__, slave, port);
+				return -EPERM;
+			}
+			break;
+
 		default:
 			break;
 		}
@@ -3454,7 +3470,7 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
 	err = adjust_qp_sched_queue(dev, slave, qpc, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_INIT2RTR, slave);
 	if (err)
 		return err;
 
@@ -3508,7 +3524,7 @@ int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
 	err = adjust_qp_sched_queue(dev, slave, context, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTR2RTS, slave);
 	if (err)
 		return err;
 
@@ -3530,7 +3546,7 @@ int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
 	err = adjust_qp_sched_queue(dev, slave, context, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTS2RTS, slave);
 	if (err)
 		return err;
 
@@ -3567,7 +3583,7 @@ int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
 	err = adjust_qp_sched_queue(dev, slave, context, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2SQD, slave);
 	if (err)
 		return err;
 
@@ -3589,7 +3605,7 @@ int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
 	err = adjust_qp_sched_queue(dev, slave, context, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2RTS, slave);
 	if (err)
 		return err;
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 83612faa819c..e2fc7011314c 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -401,6 +401,7 @@ struct mlx4_caps {
 	int			max_rq_desc_sz;
 	int			max_qp_init_rdma;
 	int			max_qp_dest_rdma;
+	u32			*qp0_qkey;
 	u32			*qp0_proxy;
 	u32			*qp1_proxy;
 	u32			*qp0_tunnel;
-- 
cgit v1.2.3-71-gd317


From 65fed8a8c155271cf647651bd62eecb5928ae3a4 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 29 May 2014 16:31:04 +0300
Subject: IB/mlx4: Add interface for selecting VFs to enable QP0 via MLX proxy
 QPs

This commit adds the sysfs interface for enabling QP0 on VFs for
selected VF/port.

By default, no VFs are enabled for QP0 operation.

To enable QP0 operation on a VF/port, under
/sys/class/infiniband/mlx4_x/iov/<b:d:f>/ports/x there are two new entries:

- smi_enabled (read-only). Indicates whether smi is currently
  enabled for the indicated VF/port

- enable_smi_admin (rw). Used by the admin to request that smi
  capability be enabled or disabled for the indicated VF/port.
  0 = disable, 1 = enable.
  The requested enablement will occur at the next reset of the
  VF (e.g. driver restart on the VM which owns the VF).

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/sysfs.c       | 105 ++++++++++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/cmd.c |  34 ++++++++++
 include/linux/mlx4/device.h              |   3 +
 3 files changed, 141 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 5a38e43eca65..cb4c66e723b5 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -389,8 +389,10 @@ struct mlx4_port {
 	struct mlx4_ib_dev    *dev;
 	struct attribute_group pkey_group;
 	struct attribute_group gid_group;
-	u8                     port_num;
+	struct device_attribute	enable_smi_admin;
+	struct device_attribute	smi_enabled;
 	int		       slave;
+	u8                     port_num;
 };
 
 
@@ -558,6 +560,101 @@ err:
 	return NULL;
 }
 
+static ssize_t sysfs_show_smi_enabled(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct mlx4_port *p =
+		container_of(attr, struct mlx4_port, smi_enabled);
+	ssize_t len = 0;
+
+	if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num))
+		len = sprintf(buf, "%d\n", 1);
+	else
+		len = sprintf(buf, "%d\n", 0);
+
+	return len;
+}
+
+static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct mlx4_port *p =
+		container_of(attr, struct mlx4_port, enable_smi_admin);
+	ssize_t len = 0;
+
+	if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num))
+		len = sprintf(buf, "%d\n", 1);
+	else
+		len = sprintf(buf, "%d\n", 0);
+
+	return len;
+}
+
+static ssize_t sysfs_store_enable_smi_admin(struct device *dev,
+					    struct device_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct mlx4_port *p =
+		container_of(attr, struct mlx4_port, enable_smi_admin);
+	int enable;
+
+	if (sscanf(buf, "%i", &enable) != 1 ||
+	    enable < 0 || enable > 1)
+		return -EINVAL;
+
+	if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable))
+		return -EINVAL;
+	return count;
+}
+
+static int add_vf_smi_entries(struct mlx4_port *p)
+{
+	int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+			IB_LINK_LAYER_ETHERNET;
+	int ret;
+
+	/* do not display entries if eth transport, or if master */
+	if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+		return 0;
+
+	sysfs_attr_init(&p->smi_enabled.attr);
+	p->smi_enabled.show = sysfs_show_smi_enabled;
+	p->smi_enabled.store = NULL;
+	p->smi_enabled.attr.name = "smi_enabled";
+	p->smi_enabled.attr.mode = 0444;
+	ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr);
+	if (ret) {
+		pr_err("failed to create smi_enabled\n");
+		return ret;
+	}
+
+	sysfs_attr_init(&p->enable_smi_admin.attr);
+	p->enable_smi_admin.show = sysfs_show_enable_smi_admin;
+	p->enable_smi_admin.store = sysfs_store_enable_smi_admin;
+	p->enable_smi_admin.attr.name = "enable_smi_admin";
+	p->enable_smi_admin.attr.mode = 0644;
+	ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr);
+	if (ret) {
+		pr_err("failed to create enable_smi_admin\n");
+		sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+		return ret;
+	}
+	return 0;
+}
+
+static void remove_vf_smi_entries(struct mlx4_port *p)
+{
+	int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+			IB_LINK_LAYER_ETHERNET;
+
+	if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+		return;
+
+	sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+	sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr);
+}
+
 static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
 {
 	struct mlx4_port *p;
@@ -602,6 +699,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
 	if (ret)
 		goto err_free_gid;
 
+	ret = add_vf_smi_entries(p);
+	if (ret)
+		goto err_free_gid;
+
 	list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
 	return 0;
 
@@ -669,6 +770,7 @@ err_add:
 		mport = container_of(p, struct mlx4_port, kobj);
 		sysfs_remove_group(p, &mport->pkey_group);
 		sysfs_remove_group(p, &mport->gid_group);
+		remove_vf_smi_entries(mport);
 		kobject_put(p);
 	}
 	kobject_put(dev->dev_ports_parent[slave]);
@@ -713,6 +815,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
 			port = container_of(p, struct mlx4_port, kobj);
 			sysfs_remove_group(p, &port->pkey_group);
 			sysfs_remove_group(p, &port->gid_group);
+			remove_vf_smi_entries(port);
 			kobject_put(p);
 			kobject_put(device->dev_ports_parent[slave]);
 		}
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 26c3ebaa49d1..3370ecb8c3d2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2567,3 +2567,37 @@ int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port)
 		MLX4_VF_SMI_ENABLED;
 }
 EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled);
+
+int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	if (slave == mlx4_master_func_num(dev))
+		return 1;
+
+	if (slave < 1 || slave >= dev->num_slaves ||
+	    port < 1 || port > MLX4_MAX_PORTS)
+		return 0;
+
+	return priv->mfunc.master.vf_admin[slave].enable_smi[port] ==
+		MLX4_VF_SMI_ENABLED;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_get_enable_smi_admin);
+
+int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
+				 int enabled)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	if (slave == mlx4_master_func_num(dev))
+		return 0;
+
+	if (slave < 1 || slave >= dev->num_slaves ||
+	    port < 1 || port > MLX4_MAX_PORTS ||
+	    enabled < 0 || enabled > 1)
+		return -EINVAL;
+
+	priv->mfunc.master.vf_admin[slave].enable_smi[port] = enabled;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_set_enable_smi_admin);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e2fc7011314c..dcabe11f37f7 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1236,4 +1236,7 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
 
 int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
 int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
+int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
+int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
+				 int enable);
 #endif /* MLX4_DEVICE_H */
-- 
cgit v1.2.3-71-gd317


From 90dfdc7ceb577c0d7b7635def3c62039a091e50d Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Wed, 28 May 2014 23:52:12 +0200
Subject: MIPS: Add functions for hypervisor call

Introduce kvm_hypercall[0-3].
Define three new hypercalls for MIPS: GET_CLOCK_FREQ, EXIT_VM, and
CONSOLE_OUTPUT.

[andreas.herrmann:
  * Properly define hypercalls and HC numbers for MIPS
    in kvm_para.h header files]

Signed-off-by: David Daney <david.daney@cavium.com>
Signed-off-by: Andreas Herrmann <andreas.herrmann@caviumnetworks.com>
Cc: linux-mips@linux-mips.org
Cc: James Hogan <james.hogan@imgtec.com>
Cc: kvm@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/7005/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/kvm_para.h      | 109 ++++++++++++++++++++++++++++++++++
 arch/mips/include/uapi/asm/kvm_para.h |   6 +-
 include/uapi/linux/kvm_para.h         |   3 +
 3 files changed, 117 insertions(+), 1 deletion(-)
 create mode 100644 arch/mips/include/asm/kvm_para.h

(limited to 'include')

diff --git a/arch/mips/include/asm/kvm_para.h b/arch/mips/include/asm/kvm_para.h
new file mode 100644
index 000000000000..5a9aa918abe6
--- /dev/null
+++ b/arch/mips/include/asm/kvm_para.h
@@ -0,0 +1,109 @@
+#ifndef _ASM_MIPS_KVM_PARA_H
+#define _ASM_MIPS_KVM_PARA_H
+
+#include <uapi/asm/kvm_para.h>
+
+#define KVM_HYPERCALL ".word 0x42000028"
+
+/*
+ * Hypercalls for KVM.
+ *
+ * Hypercall number is passed in v0.
+ * Return value will be placed in v0.
+ * Up to 3 arguments are passed in a0, a1, and a2.
+ */
+static inline unsigned long kvm_hypercall0(unsigned long num)
+{
+	register unsigned long n asm("v0");
+	register unsigned long r asm("v0");
+
+	n = num;
+	__asm__ __volatile__(
+		KVM_HYPERCALL
+		: "=r" (r) : "r" (n) : "memory"
+		);
+
+	return r;
+}
+
+static inline unsigned long kvm_hypercall1(unsigned long num,
+					unsigned long arg0)
+{
+	register unsigned long n asm("v0");
+	register unsigned long r asm("v0");
+	register unsigned long a0 asm("a0");
+
+	n = num;
+	a0 = arg0;
+	__asm__ __volatile__(
+		KVM_HYPERCALL
+		: "=r" (r) : "r" (n), "r" (a0) : "memory"
+		);
+
+	return r;
+}
+
+static inline unsigned long kvm_hypercall2(unsigned long num,
+					unsigned long arg0, unsigned long arg1)
+{
+	register unsigned long n asm("v0");
+	register unsigned long r asm("v0");
+	register unsigned long a0 asm("a0");
+	register unsigned long a1 asm("a1");
+
+	n = num;
+	a0 = arg0;
+	a1 = arg1;
+	__asm__ __volatile__(
+		KVM_HYPERCALL
+		: "=r" (r) : "r" (n), "r" (a0), "r" (a1) : "memory"
+		);
+
+	return r;
+}
+
+static inline unsigned long kvm_hypercall3(unsigned long num,
+	unsigned long arg0, unsigned long arg1, unsigned long arg2)
+{
+	register unsigned long n asm("v0");
+	register unsigned long r asm("v0");
+	register unsigned long a0 asm("a0");
+	register unsigned long a1 asm("a1");
+	register unsigned long a2 asm("a2");
+
+	n = num;
+	a0 = arg0;
+	a1 = arg1;
+	a2 = arg2;
+	__asm__ __volatile__(
+		KVM_HYPERCALL
+		: "=r" (r) : "r" (n), "r" (a0), "r" (a1), "r" (a2) : "memory"
+		);
+
+	return r;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+	return false;
+}
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+	return 0;
+}
+
+#ifdef CONFIG_MIPS_PARAVIRT
+static inline bool kvm_para_available(void)
+{
+	return true;
+}
+#else
+static inline bool kvm_para_available(void)
+{
+	return false;
+}
+#endif
+
+
+#endif /* _ASM_MIPS_KVM_PARA_H */
diff --git a/arch/mips/include/uapi/asm/kvm_para.h b/arch/mips/include/uapi/asm/kvm_para.h
index 14fab8f0b957..7e16d7c42e65 100644
--- a/arch/mips/include/uapi/asm/kvm_para.h
+++ b/arch/mips/include/uapi/asm/kvm_para.h
@@ -1 +1,5 @@
-#include <asm-generic/kvm_para.h>
+#ifndef _UAPI_ASM_MIPS_KVM_PARA_H
+#define _UAPI_ASM_MIPS_KVM_PARA_H
+
+
+#endif /* _UAPI_ASM_MIPS_KVM_PARA_H */
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index 2841f86eae0b..bf6cd7d5cac2 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -20,6 +20,9 @@
 #define KVM_HC_FEATURES			3
 #define KVM_HC_PPC_MAP_MAGIC_PAGE	4
 #define KVM_HC_KICK_CPU			5
+#define KVM_HC_MIPS_GET_CLOCK_FREQ	6
+#define KVM_HC_MIPS_EXIT_VM		7
+#define KVM_HC_MIPS_CONSOLE_OUTPUT	8
 
 /*
  * hypercalls use architecture specific
-- 
cgit v1.2.3-71-gd317


From 3e19ce762b537dd9aeefdd0849ba5f2f01ff83cf Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 25 Feb 2014 17:44:21 -0500
Subject: rpc: xdr_truncate_encode

This will be used in the server side in a few cases:
	- when certain operations (read, readdir, readlink) fail after
	  encoding a partial response.
	- when we run out of space after encoding a partial response.
	- in readlink, where we initially reserve PAGE_SIZE bytes for
	  data, then truncate to the actual size.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/xdr.h |  1 +
 net/sunrpc/xdr.c           | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 15f9204ee70b..e7bb2e3bd0fb 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -215,6 +215,7 @@ typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
 extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index dd97ba3c4456..352f3b35bbe5 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -508,6 +508,72 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
 }
 EXPORT_SYMBOL_GPL(xdr_reserve_space);
 
+/**
+ * xdr_truncate_encode - truncate an encode buffer
+ * @xdr: pointer to xdr_stream
+ * @len: new length of buffer
+ *
+ * Truncates the xdr stream, so that xdr->buf->len == len,
+ * and xdr->p points at offset len from the start of the buffer, and
+ * head, tail, and page lengths are adjusted to correspond.
+ *
+ * If this means moving xdr->p to a different buffer, we assume that
+ * that the end pointer should be set to the end of the current page,
+ * except in the case of the head buffer when we assume the head
+ * buffer's current length represents the end of the available buffer.
+ *
+ * This is *not* safe to use on a buffer that already has inlined page
+ * cache pages (as in a zero-copy server read reply), except for the
+ * simple case of truncating from one position in the tail to another.
+ *
+ */
+void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
+{
+	struct xdr_buf *buf = xdr->buf;
+	struct kvec *head = buf->head;
+	struct kvec *tail = buf->tail;
+	int fraglen;
+	int new, old;
+
+	if (len > buf->len) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	fraglen = min_t(int, buf->len - len, tail->iov_len);
+	tail->iov_len -= fraglen;
+	buf->len -= fraglen;
+	if (tail->iov_len && buf->len == len) {
+		xdr->p = tail->iov_base + tail->iov_len;
+		/* xdr->end, xdr->iov should be set already */
+		return;
+	}
+	WARN_ON_ONCE(fraglen);
+	fraglen = min_t(int, buf->len - len, buf->page_len);
+	buf->page_len -= fraglen;
+	buf->len -= fraglen;
+
+	new = buf->page_base + buf->page_len;
+	old = new + fraglen;
+	xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT);
+
+	if (buf->page_len && buf->len == len) {
+		xdr->p = page_address(*xdr->page_ptr);
+		xdr->end = (void *)xdr->p + PAGE_SIZE;
+		xdr->p = (void *)xdr->p + (new % PAGE_SIZE);
+		/* xdr->iov should already be NULL */
+		return;
+	}
+	if (fraglen)
+		xdr->end = head->iov_base + head->iov_len;
+	/* (otherwise assume xdr->end is already set) */
+	head->iov_len = len;
+	buf->len = len;
+	xdr->p = head->iov_base + head->iov_len;
+	xdr->iov = buf->head;
+}
+EXPORT_SYMBOL(xdr_truncate_encode);
+
 /**
  * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
  * @xdr: pointer to xdr_stream
-- 
cgit v1.2.3-71-gd317


From 2825a7f90753012babe7ee292f4a1eadd3706f92 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 26 Aug 2013 16:04:46 -0400
Subject: nfsd4: allow encoding across page boundaries

After this we can handle for example getattr of very large ACLs.

Read, readdir, readlink are still special cases with their own limits.

Also we can't handle a new operation starting close to the end of a
page.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c         |  4 +++
 fs/nfsd/nfs4xdr.c          | 60 +++++++++++++++++++++++++----------
 include/linux/sunrpc/svc.h |  1 +
 include/linux/sunrpc/xdr.h |  1 +
 net/sunrpc/svc_xprt.c      |  1 +
 net/sunrpc/xdr.c           | 78 ++++++++++++++++++++++++++++++++++++++++++++--
 6 files changed, 126 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3ce431b9b577..5d8f9158701d 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1264,6 +1264,10 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
 	xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE;
 	/* Tail and page_len should be zero at this point: */
 	buf->len = buf->head[0].iov_len;
+	xdr->scratch.iov_len = 0;
+	xdr->page_ptr = buf->pages;
+	buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
+		- 2 * RPC_MAX_AUTH_SIZE;
 }
 
 /*
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index bd529e523087..d3a576dbd99b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1624,6 +1624,7 @@ static int nfsd4_max_reply(u32 opnum)
 		 * the head and tail in another page:
 		 */
 		return 2 * PAGE_SIZE;
+	case OP_GETATTR:
 	case OP_READ:
 		return INT_MAX;
 	default:
@@ -2560,21 +2561,31 @@ out_resource:
 	goto out;
 }
 
+static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr,
+				struct xdr_buf *buf, __be32 *p, int bytes)
+{
+	xdr->scratch.iov_len = 0;
+	memset(buf, 0, sizeof(struct xdr_buf));
+	buf->head[0].iov_base = p;
+	buf->head[0].iov_len = 0;
+	buf->len = 0;
+	xdr->buf = buf;
+	xdr->iov = buf->head;
+	xdr->p = p;
+	xdr->end = (void *)p + bytes;
+	buf->buflen = bytes;
+}
+
 __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
 			struct svc_fh *fhp, struct svc_export *exp,
 			struct dentry *dentry, u32 *bmval,
 			struct svc_rqst *rqstp, int ignore_crossmnt)
 {
-	struct xdr_buf dummy = {
-			.head[0] = {
-				.iov_base = *p,
-			},
-			.buflen = words << 2,
-		};
+	struct xdr_buf dummy;
 	struct xdr_stream xdr;
 	__be32 ret;
 
-	xdr_init_encode(&xdr, &dummy, NULL);
+	svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2);
 	ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp,
 							ignore_crossmnt);
 	*p = xdr.p;
@@ -3064,8 +3075,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 	if (nfserr)
 		return nfserr;
-	if (resp->xdr.buf->page_len)
-		return nfserr_resource;
 
 	p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
 	if (!p)
@@ -3075,6 +3084,9 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	if (xdr->end - xdr->p < 1)
 		return nfserr_resource;
 
+	if (resp->xdr.buf->page_len)
+		return nfserr_resource;
+
 	maxcount = svc_max_payload(resp->rqstp);
 	if (maxcount > read->rd_length)
 		maxcount = read->rd_length;
@@ -3119,6 +3131,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 				- (char *)resp->xdr.buf->head[0].iov_base);
 	resp->xdr.buf->page_len = maxcount;
 	xdr->buf->len += maxcount;
+	xdr->page_ptr += v;
+	xdr->buf->buflen = maxcount + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE;
 	xdr->iov = xdr->buf->tail;
 
 	/* Use rest of head for padding and remaining ops: */
@@ -3145,6 +3159,11 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
 
 	if (nfserr)
 		return nfserr;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return nfserr_resource;
+
 	if (resp->xdr.buf->page_len)
 		return nfserr_resource;
 	if (!*resp->rqstp->rq_next_page)
@@ -3154,10 +3173,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
 
 	maxcount = PAGE_SIZE;
 
-	p = xdr_reserve_space(xdr, 4);
-	if (!p)
-		return nfserr_resource;
-
 	if (xdr->end - xdr->p < 1)
 		return nfserr_resource;
 
@@ -3180,6 +3195,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
 				- (char *)resp->xdr.buf->head[0].iov_base;
 	resp->xdr.buf->page_len = maxcount;
 	xdr->buf->len += maxcount;
+	xdr->page_ptr += 1;
+	xdr->buf->buflen -= PAGE_SIZE;
 	xdr->iov = xdr->buf->tail;
 
 	/* Use rest of head for padding and remaining ops: */
@@ -3206,15 +3223,16 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 
 	if (nfserr)
 		return nfserr;
-	if (resp->xdr.buf->page_len)
-		return nfserr_resource;
-	if (!*resp->rqstp->rq_next_page)
-		return nfserr_resource;
 
 	p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
 	if (!p)
 		return nfserr_resource;
 
+	if (resp->xdr.buf->page_len)
+		return nfserr_resource;
+	if (!*resp->rqstp->rq_next_page)
+		return nfserr_resource;
+
 	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
 	WRITE32(0);
 	WRITE32(0);
@@ -3266,6 +3284,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 
 	xdr->iov = xdr->buf->tail;
 
+	xdr->page_ptr++;
+	xdr->buf->buflen -= PAGE_SIZE;
+	xdr->iov = xdr->buf->tail;
+
 	/* Use rest of head for padding and remaining ops: */
 	resp->xdr.buf->tail[0].iov_base = tailbase;
 	resp->xdr.buf->tail[0].iov_len = 0;
@@ -3800,6 +3822,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 	       !nfsd4_enc_ops[op->opnum]);
 	encoder = nfsd4_enc_ops[op->opnum];
 	op->status = encoder(resp, op->status, &op->u);
+	xdr_commit_encode(xdr);
+
 	/* nfsd4_check_resp_size guarantees enough room for error status */
 	if (!op->status) {
 		int space_needed = 0;
@@ -3919,6 +3943,8 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 	WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
 				 buf->tail[0].iov_len);
 
+	rqstp->rq_next_page = resp->xdr.page_ptr + 1;
+
 	p = resp->tagp;
 	*p++ = htonl(resp->taglen);
 	memcpy(p, resp->tag, resp->taglen);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index a0dbbd1e00e9..85cb6472a423 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -244,6 +244,7 @@ struct svc_rqst {
 	struct page *		rq_pages[RPCSVC_MAXPAGES];
 	struct page *		*rq_respages;	/* points into rq_pages */
 	struct page *		*rq_next_page; /* next reply page to use */
+	struct page *		*rq_page_end;  /* one past the last page */
 
 	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
 
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index e7bb2e3bd0fb..b23d69ffd5ec 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -215,6 +215,7 @@ typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_commit_encode(struct xdr_stream *xdr);
 extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 29772e01b179..b4737fbdec13 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -597,6 +597,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
 			}
 			rqstp->rq_pages[i] = p;
 		}
+	rqstp->rq_page_end = &rqstp->rq_pages[i];
 	rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
 
 	/* Make arg->head point to first page and arg->pages point to rest */
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 352f3b35bbe5..2b546e8ce43d 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -462,6 +462,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 	struct kvec *iov = buf->head;
 	int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
 
+	xdr_set_scratch_buffer(xdr, NULL, 0);
 	BUG_ON(scratch_len < 0);
 	xdr->buf = buf;
 	xdr->iov = iov;
@@ -481,6 +482,74 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 }
 EXPORT_SYMBOL_GPL(xdr_init_encode);
 
+/**
+ * xdr_commit_encode - Ensure all data is written to buffer
+ * @xdr: pointer to xdr_stream
+ *
+ * We handle encoding across page boundaries by giving the caller a
+ * temporary location to write to, then later copying the data into
+ * place; xdr_commit_encode does that copying.
+ *
+ * Normally the caller doesn't need to call this directly, as the
+ * following xdr_reserve_space will do it.  But an explicit call may be
+ * required at the end of encoding, or any other time when the xdr_buf
+ * data might be read.
+ */
+void xdr_commit_encode(struct xdr_stream *xdr)
+{
+	int shift = xdr->scratch.iov_len;
+	void *page;
+
+	if (shift == 0)
+		return;
+	page = page_address(*xdr->page_ptr);
+	memcpy(xdr->scratch.iov_base, page, shift);
+	memmove(page, page + shift, (void *)xdr->p - page);
+	xdr->scratch.iov_len = 0;
+}
+EXPORT_SYMBOL_GPL(xdr_commit_encode);
+
+__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes)
+{
+	static __be32 *p;
+	int space_left;
+	int frag1bytes, frag2bytes;
+
+	if (nbytes > PAGE_SIZE)
+		return NULL; /* Bigger buffers require special handling */
+	if (xdr->buf->len + nbytes > xdr->buf->buflen)
+		return NULL; /* Sorry, we're totally out of space */
+	frag1bytes = (xdr->end - xdr->p) << 2;
+	frag2bytes = nbytes - frag1bytes;
+	if (xdr->iov)
+		xdr->iov->iov_len += frag1bytes;
+	else {
+		xdr->buf->page_len += frag1bytes;
+		xdr->page_ptr++;
+	}
+	xdr->iov = NULL;
+	/*
+	 * If the last encode didn't end exactly on a page boundary, the
+	 * next one will straddle boundaries.  Encode into the next
+	 * page, then copy it back later in xdr_commit_encode.  We use
+	 * the "scratch" iov to track any temporarily unused fragment of
+	 * space at the end of the previous buffer:
+	 */
+	xdr->scratch.iov_base = xdr->p;
+	xdr->scratch.iov_len = frag1bytes;
+	p = page_address(*xdr->page_ptr);
+	/*
+	 * Note this is where the next encode will start after we've
+	 * shifted this one back:
+	 */
+	xdr->p = (void *)p + frag2bytes;
+	space_left = xdr->buf->buflen - xdr->buf->len;
+	xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+	xdr->buf->page_len += frag2bytes;
+	xdr->buf->len += nbytes;
+	return p;
+}
+
 /**
  * xdr_reserve_space - Reserve buffer space for sending
  * @xdr: pointer to xdr_stream
@@ -495,14 +564,18 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
 	__be32 *p = xdr->p;
 	__be32 *q;
 
+	xdr_commit_encode(xdr);
 	/* align nbytes on the next 32-bit boundary */
 	nbytes += 3;
 	nbytes &= ~3;
 	q = p + (nbytes >> 2);
 	if (unlikely(q > xdr->end || q < p))
-		return NULL;
+		return xdr_get_next_encode_buffer(xdr, nbytes);
 	xdr->p = q;
-	xdr->iov->iov_len += nbytes;
+	if (xdr->iov)
+		xdr->iov->iov_len += nbytes;
+	else
+		xdr->buf->page_len += nbytes;
 	xdr->buf->len += nbytes;
 	return p;
 }
@@ -539,6 +612,7 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
 		WARN_ON_ONCE(1);
 		return;
 	}
+	xdr_commit_encode(xdr);
 
 	fraglen = min_t(int, buf->len - len, tail->iov_len);
 	tail->iov_len -= fraglen;
-- 
cgit v1.2.3-71-gd317


From db3f58a95beea6752d90fed03f9f198d282a3913 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 6 Mar 2014 13:22:18 -0500
Subject: rpc: define xdr_restrict_buflen

With this xdr_reserve_space can help us enforce various limits.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/xdr.h |  1 +
 net/sunrpc/xdr.c           | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index b23d69ffd5ec..70c6b92e15a7 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -217,6 +217,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_commit_encode(struct xdr_stream *xdr);
 extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
+extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
 extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2b546e8ce43d..39928444c7fb 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -648,6 +648,35 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
 }
 EXPORT_SYMBOL(xdr_truncate_encode);
 
+/**
+ * xdr_restrict_buflen - decrease available buffer space
+ * @xdr: pointer to xdr_stream
+ * @newbuflen: new maximum number of bytes available
+ *
+ * Adjust our idea of how much space is available in the buffer.
+ * If we've already used too much space in the buffer, returns -1.
+ * If the available space is already smaller than newbuflen, returns 0
+ * and does nothing.  Otherwise, adjusts xdr->buf->buflen to newbuflen
+ * and ensures xdr->end is set at most offset newbuflen from the start
+ * of the buffer.
+ */
+int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen)
+{
+	struct xdr_buf *buf = xdr->buf;
+	int left_in_this_buf = (void *)xdr->end - (void *)xdr->p;
+	int end_offset = buf->len + left_in_this_buf;
+
+	if (newbuflen < 0 || newbuflen < buf->len)
+		return -1;
+	if (newbuflen > buf->buflen)
+		return 0;
+	if (newbuflen < end_offset)
+		xdr->end = (void *)xdr->end + newbuflen - end_offset;
+	buf->buflen = newbuflen;
+	return 0;
+}
+EXPORT_SYMBOL(xdr_restrict_buflen);
+
 /**
  * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
  * @xdr: pointer to xdr_stream
-- 
cgit v1.2.3-71-gd317


From a5cddc885b99458df963a75abbe0b40cbef56c48 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 12 May 2014 18:10:58 -0400
Subject: nfsd4: better reservation of head space for krb5

RPC_MAX_AUTH_SIZE is scattered around several places.  Better to set it
once in the auth code, where this kind of estimate should be made.  And
while we're at it we can leave it zero when we're not using krb5i or
krb5p.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c                |  4 ++--
 fs/nfsd/nfs4state.c               |  2 +-
 fs/nfsd/nfs4xdr.c                 |  5 +++--
 include/linux/sunrpc/svc.h        | 11 +++++------
 net/sunrpc/auth_gss/svcauth_gss.c |  2 ++
 net/sunrpc/svcauth.c              |  2 ++
 6 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2d786b813c7e..16e71d033ea5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1261,13 +1261,13 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
 	xdr->buf = buf;
 	xdr->iov = head;
 	xdr->p   = head->iov_base + head->iov_len;
-	xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE;
+	xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
 	/* Tail and page_len should be zero at this point: */
 	buf->len = buf->head[0].iov_len;
 	xdr->scratch.iov_len = 0;
 	xdr->page_ptr = buf->pages;
 	buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
-		- 2 * RPC_MAX_AUTH_SIZE;
+		- rqstp->rq_auth_slack;
 }
 
 /*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 62b882dc48ec..d0a016a502be 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2288,7 +2288,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 			session->se_fchannel.maxresp_sz;
 	status = (seq->cachethis) ? nfserr_rep_too_big_to_cache :
 				    nfserr_rep_too_big;
-	if (xdr_restrict_buflen(xdr, buflen - 2 * RPC_MAX_AUTH_SIZE))
+	if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack))
 		goto out_put_session;
 	svc_reserve(rqstp, buflen);
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3e347a1caec4..470fe8998c9b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1611,7 +1611,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	DECODE_HEAD;
 	struct nfsd4_op *op;
 	bool cachethis = false;
-	int max_reply = 2 * RPC_MAX_AUTH_SIZE + 8; /* opcnt, status */
+	int auth_slack= argp->rqstp->rq_auth_slack;
+	int max_reply = auth_slack + 8; /* opcnt, status */
 	int readcount = 0;
 	int readbytes = 0;
 	int i;
@@ -1677,7 +1678,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	svc_reserve(argp->rqstp, max_reply + readbytes);
 	argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
 
-	if (readcount > 1 || max_reply > PAGE_SIZE - 2*RPC_MAX_AUTH_SIZE)
+	if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
 		argp->rqstp->rq_splice_ok = false;
 
 	DECODE_TAIL;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 85cb6472a423..1bc7cd05b22e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -260,7 +260,10 @@ struct svc_rqst {
 	void *			rq_argp;	/* decoded arguments */
 	void *			rq_resp;	/* xdr'd results */
 	void *			rq_auth_data;	/* flavor-specific data */
-
+	int			rq_auth_slack;	/* extra space xdr code
+						 * should leave in head
+						 * for krb5i, krb5p.
+						 */
 	int			rq_reserved;	/* space on socket outq
 						 * reserved for this request
 						 */
@@ -456,11 +459,7 @@ char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
  */
 static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
 {
-	int added_space = 0;
-
-	if (rqstp->rq_authop->flavour)
-		added_space = RPC_MAX_AUTH_SIZE;
-	svc_reserve(rqstp, space + added_space);
+	svc_reserve(rqstp, space + rqstp->rq_auth_slack);
 }
 
 #endif /* SUNRPC_SVC_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 0f73f4507746..4ce5eccec1f6 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1503,6 +1503,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 			if (unwrap_integ_data(rqstp, &rqstp->rq_arg,
 					gc->gc_seq, rsci->mechctx))
 				goto garbage_args;
+			rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE;
 			break;
 		case RPC_GSS_SVC_PRIVACY:
 			/* placeholders for length and seq. number: */
@@ -1511,6 +1512,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 			if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
 					gc->gc_seq, rsci->mechctx))
 				goto garbage_args;
+			rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2;
 			break;
 		default:
 			goto auth_err;
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 2af7b0cba43a..79c0f3459b5c 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -54,6 +54,8 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
 	}
 	spin_unlock(&authtab_lock);
 
+	rqstp->rq_auth_slack = 0;
+
 	rqstp->rq_authop = aops;
 	return aops->accept(rqstp, authp);
 }
-- 
cgit v1.2.3-71-gd317


From 073ea2ae85629d4074596e2616588b13d4c665f7 Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Wed, 7 May 2014 20:44:51 +0900
Subject: drm/exynos: dp: Use DPCD defines of drm_dp_helper.h

Use DPCD defines of drm_dp_helper.h; thus, duplicated DPCD defines
of exynos_dp_core.h can be removed. Also, DP_TEST_EDID_CHECKSUM
define is added to drm_dp_helper.h. There is no functional change.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Reviewed-by: Sean Paul <seanpaul@chromium.org>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_dp_core.c | 98 ++++++++++++++++-----------------
 drivers/gpu/drm/exynos/exynos_dp_core.h | 59 ++------------------
 include/drm/drm_dp_helper.h             |  2 +
 3 files changed, 55 insertions(+), 104 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.c b/drivers/gpu/drm/exynos/exynos_dp_core.c
index 2b30c55ab050..ff63901e14c7 100644
--- a/drivers/gpu/drm/exynos/exynos_dp_core.c
+++ b/drivers/gpu/drm/exynos/exynos_dp_core.c
@@ -144,15 +144,15 @@ static int exynos_dp_read_edid(struct exynos_dp_device *dp)
 			return -EIO;
 		}
 
-		exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_TEST_REQUEST,
+		exynos_dp_read_byte_from_dpcd(dp, DP_TEST_REQUEST,
 					&test_vector);
-		if (test_vector & DPCD_TEST_EDID_READ) {
+		if (test_vector & DP_TEST_LINK_EDID_READ) {
 			exynos_dp_write_byte_to_dpcd(dp,
-				DPCD_ADDR_TEST_EDID_CHECKSUM,
+				DP_TEST_EDID_CHECKSUM,
 				edid[EDID_BLOCK_LENGTH + EDID_CHECKSUM]);
 			exynos_dp_write_byte_to_dpcd(dp,
-				DPCD_ADDR_TEST_RESPONSE,
-				DPCD_TEST_EDID_CHECKSUM_WRITE);
+				DP_TEST_RESPONSE,
+				DP_TEST_EDID_CHECKSUM_WRITE);
 		}
 	} else {
 		dev_info(dp->dev, "EDID data does not include any extensions.\n");
@@ -174,15 +174,15 @@ static int exynos_dp_read_edid(struct exynos_dp_device *dp)
 		}
 
 		exynos_dp_read_byte_from_dpcd(dp,
-			DPCD_ADDR_TEST_REQUEST,
+			DP_TEST_REQUEST,
 			&test_vector);
-		if (test_vector & DPCD_TEST_EDID_READ) {
+		if (test_vector & DP_TEST_LINK_EDID_READ) {
 			exynos_dp_write_byte_to_dpcd(dp,
-				DPCD_ADDR_TEST_EDID_CHECKSUM,
+				DP_TEST_EDID_CHECKSUM,
 				edid[EDID_CHECKSUM]);
 			exynos_dp_write_byte_to_dpcd(dp,
-				DPCD_ADDR_TEST_RESPONSE,
-				DPCD_TEST_EDID_CHECKSUM_WRITE);
+				DP_TEST_RESPONSE,
+				DP_TEST_EDID_CHECKSUM_WRITE);
 		}
 	}
 
@@ -196,8 +196,8 @@ static int exynos_dp_handle_edid(struct exynos_dp_device *dp)
 	int i;
 	int retval;
 
-	/* Read DPCD DPCD_ADDR_DPCD_REV~RECEIVE_PORT1_CAP_1 */
-	retval = exynos_dp_read_bytes_from_dpcd(dp, DPCD_ADDR_DPCD_REV,
+	/* Read DPCD DP_DPCD_REV~RECEIVE_PORT1_CAP_1 */
+	retval = exynos_dp_read_bytes_from_dpcd(dp, DP_DPCD_REV,
 				12, buf);
 	if (retval)
 		return retval;
@@ -217,14 +217,14 @@ static void exynos_dp_enable_rx_to_enhanced_mode(struct exynos_dp_device *dp,
 {
 	u8 data;
 
-	exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_LANE_COUNT_SET, &data);
+	exynos_dp_read_byte_from_dpcd(dp, DP_LANE_COUNT_SET, &data);
 
 	if (enable)
-		exynos_dp_write_byte_to_dpcd(dp, DPCD_ADDR_LANE_COUNT_SET,
-			DPCD_ENHANCED_FRAME_EN |
+		exynos_dp_write_byte_to_dpcd(dp, DP_LANE_COUNT_SET,
+			DP_LANE_COUNT_ENHANCED_FRAME_EN |
 			DPCD_LANE_COUNT_SET(data));
 	else
-		exynos_dp_write_byte_to_dpcd(dp, DPCD_ADDR_LANE_COUNT_SET,
+		exynos_dp_write_byte_to_dpcd(dp, DP_LANE_COUNT_SET,
 			DPCD_LANE_COUNT_SET(data));
 }
 
@@ -233,7 +233,7 @@ static int exynos_dp_is_enhanced_mode_available(struct exynos_dp_device *dp)
 	u8 data;
 	int retval;
 
-	exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_MAX_LANE_COUNT, &data);
+	exynos_dp_read_byte_from_dpcd(dp, DP_MAX_LANE_COUNT, &data);
 	retval = DPCD_ENHANCED_FRAME_CAP(data);
 
 	return retval;
@@ -253,8 +253,8 @@ static void exynos_dp_training_pattern_dis(struct exynos_dp_device *dp)
 	exynos_dp_set_training_pattern(dp, DP_NONE);
 
 	exynos_dp_write_byte_to_dpcd(dp,
-		DPCD_ADDR_TRAINING_PATTERN_SET,
-		DPCD_TRAINING_PATTERN_DISABLED);
+		DP_TRAINING_PATTERN_SET,
+		DP_TRAINING_PATTERN_DISABLE);
 }
 
 static void exynos_dp_set_lane_lane_pre_emphasis(struct exynos_dp_device *dp,
@@ -298,7 +298,7 @@ static int exynos_dp_link_start(struct exynos_dp_device *dp)
 	/* Setup RX configuration */
 	buf[0] = dp->link_train.link_rate;
 	buf[1] = dp->link_train.lane_count;
-	retval = exynos_dp_write_bytes_to_dpcd(dp, DPCD_ADDR_LINK_BW_SET,
+	retval = exynos_dp_write_bytes_to_dpcd(dp, DP_LINK_BW_SET,
 				2, buf);
 	if (retval)
 		return retval;
@@ -325,16 +325,16 @@ static int exynos_dp_link_start(struct exynos_dp_device *dp)
 
 	/* Set RX training pattern */
 	retval = exynos_dp_write_byte_to_dpcd(dp,
-			DPCD_ADDR_TRAINING_PATTERN_SET,
-			DPCD_SCRAMBLING_DISABLED | DPCD_TRAINING_PATTERN_1);
+			DP_TRAINING_PATTERN_SET,
+			DP_LINK_SCRAMBLING_DISABLE | DP_TRAINING_PATTERN_1);
 	if (retval)
 		return retval;
 
 	for (lane = 0; lane < lane_count; lane++)
-		buf[lane] = DPCD_PRE_EMPHASIS_PATTERN2_LEVEL0 |
-			    DPCD_VOLTAGE_SWING_PATTERN1_LEVEL0;
+		buf[lane] = DP_TRAIN_PRE_EMPHASIS_0 |
+			    DP_TRAIN_VOLTAGE_SWING_400;
 
-	retval = exynos_dp_write_bytes_to_dpcd(dp, DPCD_ADDR_TRAINING_LANE0_SET,
+	retval = exynos_dp_write_bytes_to_dpcd(dp, DP_TRAINING_LANE0_SET,
 			lane_count, buf);
 
 	return retval;
@@ -355,7 +355,7 @@ static int exynos_dp_clock_recovery_ok(u8 link_status[2], int lane_count)
 
 	for (lane = 0; lane < lane_count; lane++) {
 		lane_status = exynos_dp_get_lane_status(link_status, lane);
-		if ((lane_status & DPCD_LANE_CR_DONE) == 0)
+		if ((lane_status & DP_LANE_CR_DONE) == 0)
 			return -EINVAL;
 	}
 	return 0;
@@ -367,13 +367,13 @@ static int exynos_dp_channel_eq_ok(u8 link_status[2], u8 link_align,
 	int lane;
 	u8 lane_status;
 
-	if ((link_align & DPCD_INTERLANE_ALIGN_DONE) == 0)
+	if ((link_align & DP_INTERLANE_ALIGN_DONE) == 0)
 		return -EINVAL;
 
 	for (lane = 0; lane < lane_count; lane++) {
 		lane_status = exynos_dp_get_lane_status(link_status, lane);
-		lane_status &= DPCD_CHANNEL_EQ_BITS;
-		if (lane_status != DPCD_CHANNEL_EQ_BITS)
+		lane_status &= DP_CHANNEL_EQ_BITS;
+		if (lane_status != DP_CHANNEL_EQ_BITS)
 			return -EINVAL;
 	}
 
@@ -471,9 +471,9 @@ static void exynos_dp_get_adjust_training_lane(struct exynos_dp_device *dp,
 				DPCD_PRE_EMPHASIS_SET(pre_emphasis);
 
 		if (voltage_swing == VOLTAGE_LEVEL_3)
-			training_lane |= DPCD_MAX_SWING_REACHED;
+			training_lane |= DP_TRAIN_MAX_SWING_REACHED;
 		if (pre_emphasis == PRE_EMPHASIS_LEVEL_3)
-			training_lane |= DPCD_MAX_PRE_EMPHASIS_REACHED;
+			training_lane |= DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
 
 		dp->link_train.training_lane[lane] = training_lane;
 	}
@@ -490,12 +490,12 @@ static int exynos_dp_process_clock_recovery(struct exynos_dp_device *dp)
 	lane_count = dp->link_train.lane_count;
 
 	retval =  exynos_dp_read_bytes_from_dpcd(dp,
-			DPCD_ADDR_LANE0_1_STATUS, 2, link_status);
+			DP_LANE0_1_STATUS, 2, link_status);
 	if (retval)
 		return retval;
 
 	retval =  exynos_dp_read_bytes_from_dpcd(dp,
-			DPCD_ADDR_ADJUST_REQUEST_LANE0_1, 2, adjust_request);
+			DP_ADJUST_REQUEST_LANE0_1, 2, adjust_request);
 	if (retval)
 		return retval;
 
@@ -504,9 +504,9 @@ static int exynos_dp_process_clock_recovery(struct exynos_dp_device *dp)
 		exynos_dp_set_training_pattern(dp, TRAINING_PTN2);
 
 		retval = exynos_dp_write_byte_to_dpcd(dp,
-				DPCD_ADDR_TRAINING_PATTERN_SET,
-				DPCD_SCRAMBLING_DISABLED |
-				DPCD_TRAINING_PATTERN_2);
+				DP_TRAINING_PATTERN_SET,
+				DP_LINK_SCRAMBLING_DISABLE |
+				DP_TRAINING_PATTERN_2);
 		if (retval)
 			return retval;
 
@@ -546,7 +546,7 @@ static int exynos_dp_process_clock_recovery(struct exynos_dp_device *dp)
 			dp->link_train.training_lane[lane], lane);
 
 	retval = exynos_dp_write_bytes_to_dpcd(dp,
-			DPCD_ADDR_TRAINING_LANE0_SET, lane_count,
+			DP_TRAINING_LANE0_SET, lane_count,
 			dp->link_train.training_lane);
 	if (retval)
 		return retval;
@@ -565,7 +565,7 @@ static int exynos_dp_process_equalizer_training(struct exynos_dp_device *dp)
 	lane_count = dp->link_train.lane_count;
 
 	retval = exynos_dp_read_bytes_from_dpcd(dp,
-			DPCD_ADDR_LANE0_1_STATUS, 2, link_status);
+			DP_LANE0_1_STATUS, 2, link_status);
 	if (retval)
 		return retval;
 
@@ -575,12 +575,12 @@ static int exynos_dp_process_equalizer_training(struct exynos_dp_device *dp)
 	}
 
 	retval = exynos_dp_read_bytes_from_dpcd(dp,
-			DPCD_ADDR_ADJUST_REQUEST_LANE0_1, 2, adjust_request);
+			DP_ADJUST_REQUEST_LANE0_1, 2, adjust_request);
 	if (retval)
 		return retval;
 
 	retval = exynos_dp_read_byte_from_dpcd(dp,
-			DPCD_ADDR_LANE_ALIGN_STATUS_UPDATED, &link_align);
+			DP_LANE_ALIGN_STATUS_UPDATED, &link_align);
 	if (retval)
 		return retval;
 
@@ -622,7 +622,7 @@ static int exynos_dp_process_equalizer_training(struct exynos_dp_device *dp)
 		exynos_dp_set_lane_link_training(dp,
 			dp->link_train.training_lane[lane], lane);
 
-	retval = exynos_dp_write_bytes_to_dpcd(dp, DPCD_ADDR_TRAINING_LANE0_SET,
+	retval = exynos_dp_write_bytes_to_dpcd(dp, DP_TRAINING_LANE0_SET,
 			lane_count, dp->link_train.training_lane);
 
 	return retval;
@@ -637,7 +637,7 @@ static void exynos_dp_get_max_rx_bandwidth(struct exynos_dp_device *dp,
 	 * For DP rev.1.1, Maximum link rate of Main Link lanes
 	 * 0x06 = 1.62 Gbps, 0x0a = 2.7 Gbps
 	 */
-	exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_MAX_LINK_RATE, &data);
+	exynos_dp_read_byte_from_dpcd(dp, DP_MAX_LINK_RATE, &data);
 	*bandwidth = data;
 }
 
@@ -650,7 +650,7 @@ static void exynos_dp_get_max_rx_lane_count(struct exynos_dp_device *dp,
 	 * For DP rev.1.1, Maximum number of Main Link lanes
 	 * 0x01 = 1 lane, 0x02 = 2 lanes, 0x04 = 4 lanes
 	 */
-	exynos_dp_read_byte_from_dpcd(dp, DPCD_ADDR_MAX_LANE_COUNT, &data);
+	exynos_dp_read_byte_from_dpcd(dp, DP_MAX_LANE_COUNT, &data);
 	*lane_count = DPCD_MAX_LANE_COUNT(data);
 }
 
@@ -822,20 +822,20 @@ static void exynos_dp_enable_scramble(struct exynos_dp_device *dp, bool enable)
 		exynos_dp_enable_scrambling(dp);
 
 		exynos_dp_read_byte_from_dpcd(dp,
-			DPCD_ADDR_TRAINING_PATTERN_SET,
+			DP_TRAINING_PATTERN_SET,
 			&data);
 		exynos_dp_write_byte_to_dpcd(dp,
-			DPCD_ADDR_TRAINING_PATTERN_SET,
-			(u8)(data & ~DPCD_SCRAMBLING_DISABLED));
+			DP_TRAINING_PATTERN_SET,
+			(u8)(data & ~DP_LINK_SCRAMBLING_DISABLE));
 	} else {
 		exynos_dp_disable_scrambling(dp);
 
 		exynos_dp_read_byte_from_dpcd(dp,
-			DPCD_ADDR_TRAINING_PATTERN_SET,
+			DP_TRAINING_PATTERN_SET,
 			&data);
 		exynos_dp_write_byte_to_dpcd(dp,
-			DPCD_ADDR_TRAINING_PATTERN_SET,
-			(u8)(data | DPCD_SCRAMBLING_DISABLED));
+			DP_TRAINING_PATTERN_SET,
+			(u8)(data | DP_LINK_SCRAMBLING_DISABLE));
 	}
 }
 
diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.h b/drivers/gpu/drm/exynos/exynos_dp_core.h
index 56fa43eb7133..02cc4f9ab903 100644
--- a/drivers/gpu/drm/exynos/exynos_dp_core.h
+++ b/drivers/gpu/drm/exynos/exynos_dp_core.h
@@ -14,6 +14,7 @@
 #define _EXYNOS_DP_CORE_H
 
 #include <drm/drm_crtc.h>
+#include <drm/drm_dp_helper.h>
 #include <drm/exynos_drm.h>
 
 #define DP_TIMEOUT_LOOP_COUNT 100
@@ -262,69 +263,17 @@ void exynos_dp_disable_scrambling(struct exynos_dp_device *dp);
 #define EDID_EXTENSION_FLAG			0x7e
 #define EDID_CHECKSUM				0x7f
 
-/* Definition for DPCD Register */
-#define DPCD_ADDR_DPCD_REV			0x0000
-#define DPCD_ADDR_MAX_LINK_RATE			0x0001
-#define DPCD_ADDR_MAX_LANE_COUNT		0x0002
-#define DPCD_ADDR_LINK_BW_SET			0x0100
-#define DPCD_ADDR_LANE_COUNT_SET		0x0101
-#define DPCD_ADDR_TRAINING_PATTERN_SET		0x0102
-#define DPCD_ADDR_TRAINING_LANE0_SET		0x0103
-#define DPCD_ADDR_LANE0_1_STATUS		0x0202
-#define DPCD_ADDR_LANE_ALIGN_STATUS_UPDATED	0x0204
-#define DPCD_ADDR_ADJUST_REQUEST_LANE0_1	0x0206
-#define DPCD_ADDR_ADJUST_REQUEST_LANE2_3	0x0207
-#define DPCD_ADDR_TEST_REQUEST			0x0218
-#define DPCD_ADDR_TEST_RESPONSE			0x0260
-#define DPCD_ADDR_TEST_EDID_CHECKSUM		0x0261
-#define DPCD_ADDR_SINK_POWER_STATE		0x0600
-
-/* DPCD_ADDR_MAX_LANE_COUNT */
+/* DP_MAX_LANE_COUNT */
 #define DPCD_ENHANCED_FRAME_CAP(x)		(((x) >> 7) & 0x1)
 #define DPCD_MAX_LANE_COUNT(x)			((x) & 0x1f)
 
-/* DPCD_ADDR_LANE_COUNT_SET */
-#define DPCD_ENHANCED_FRAME_EN			(0x1 << 7)
+/* DP_LANE_COUNT_SET */
 #define DPCD_LANE_COUNT_SET(x)			((x) & 0x1f)
 
-/* DPCD_ADDR_TRAINING_PATTERN_SET */
-#define DPCD_SCRAMBLING_DISABLED		(0x1 << 5)
-#define DPCD_SCRAMBLING_ENABLED			(0x0 << 5)
-#define DPCD_TRAINING_PATTERN_2			(0x2 << 0)
-#define DPCD_TRAINING_PATTERN_1			(0x1 << 0)
-#define DPCD_TRAINING_PATTERN_DISABLED		(0x0 << 0)
-
-/* DPCD_ADDR_TRAINING_LANE0_SET */
-#define DPCD_MAX_PRE_EMPHASIS_REACHED		(0x1 << 5)
+/* DP_TRAINING_LANE0_SET */
 #define DPCD_PRE_EMPHASIS_SET(x)		(((x) & 0x3) << 3)
 #define DPCD_PRE_EMPHASIS_GET(x)		(((x) >> 3) & 0x3)
-#define DPCD_PRE_EMPHASIS_PATTERN2_LEVEL0	(0x0 << 3)
-#define DPCD_MAX_SWING_REACHED			(0x1 << 2)
 #define DPCD_VOLTAGE_SWING_SET(x)		(((x) & 0x3) << 0)
 #define DPCD_VOLTAGE_SWING_GET(x)		(((x) >> 0) & 0x3)
-#define DPCD_VOLTAGE_SWING_PATTERN1_LEVEL0	(0x0 << 0)
-
-/* DPCD_ADDR_LANE0_1_STATUS */
-#define DPCD_LANE_SYMBOL_LOCKED			(0x1 << 2)
-#define DPCD_LANE_CHANNEL_EQ_DONE		(0x1 << 1)
-#define DPCD_LANE_CR_DONE			(0x1 << 0)
-#define DPCD_CHANNEL_EQ_BITS			(DPCD_LANE_CR_DONE|	\
-						 DPCD_LANE_CHANNEL_EQ_DONE|\
-						 DPCD_LANE_SYMBOL_LOCKED)
-
-/* DPCD_ADDR_LANE_ALIGN__STATUS_UPDATED */
-#define DPCD_LINK_STATUS_UPDATED		(0x1 << 7)
-#define DPCD_DOWNSTREAM_PORT_STATUS_CHANGED	(0x1 << 6)
-#define DPCD_INTERLANE_ALIGN_DONE		(0x1 << 0)
-
-/* DPCD_ADDR_TEST_REQUEST */
-#define DPCD_TEST_EDID_READ			(0x1 << 2)
-
-/* DPCD_ADDR_TEST_RESPONSE */
-#define DPCD_TEST_EDID_CHECKSUM_WRITE		(0x1 << 2)
-
-/* DPCD_ADDR_SINK_POWER_STATE */
-#define DPCD_SET_POWER_STATE_D0			(0x1 << 0)
-#define DPCD_SET_POWER_STATE_D4			(0x2 << 0)
 
 #endif /* _EXYNOS_DP_CORE_H */
diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index efcde2c38cc1..488b41635dec 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -310,6 +310,8 @@
 # define DP_TEST_NAK			    (1 << 1)
 # define DP_TEST_EDID_CHECKSUM_WRITE	    (1 << 2)
 
+#define DP_TEST_EDID_CHECKSUM		    0x261
+
 #define DP_TEST_SINK			    0x270
 #define DP_TEST_SINK_START	    (1 << 0)
 
-- 
cgit v1.2.3-71-gd317


From c8865b64b05b2f4eeefd369373e9c8aeb069e7a1 Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Sun, 25 May 2014 17:47:26 +0300
Subject: cpumask: Utility function to set n'th cpu - local cpu first

This function sets the n'th cpu - local cpu's first.
For example: in a 16 cores server with even cpu's local, will get the
following values:
cpumask_set_cpu_local_first(0, numa, cpumask) => cpu 0 is set
cpumask_set_cpu_local_first(1, numa, cpumask) => cpu 2 is set
...
cpumask_set_cpu_local_first(7, numa, cpumask) => cpu 14 is set
cpumask_set_cpu_local_first(8, numa, cpumask) => cpu 1 is set
cpumask_set_cpu_local_first(9, numa, cpumask) => cpu 3 is set
...
cpumask_set_cpu_local_first(15, numa, cpumask) => cpu 15 is set

Curently this function will be used by multi queue networking devices to
calculate the irq affinity mask, such that as many local cpu's as
possible will be utilized to handle the mq device irq's.

Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cpumask.h |  2 ++
 lib/cpumask.c           | 64 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d08e4d2a9b92..3551d667ef9f 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -257,6 +257,8 @@ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
 	set_bit(cpumask_check(cpu), cpumask_bits(dstp));
 }
 
+int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp);
+
 /**
  * cpumask_clear_cpu - clear a cpu in a cpumask
  * @cpu: cpu number (< nr_cpu_ids)
diff --git a/lib/cpumask.c b/lib/cpumask.c
index b810b753c607..14049a96f04a 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -163,4 +163,68 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 {
 	memblock_free_early(__pa(mask), cpumask_size());
 }
+
+/**
+ * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first
+ *
+ * @i: index number
+ * @numa_node: local numa_node
+ * @dstp: cpumask with the relevant cpu bit set according to the policy
+ *
+ * This function sets the cpumask according to a numa aware policy.
+ * cpumask could be used as an affinity hint for the IRQ related to a
+ * queue. When the policy is to spread queues across cores - local cores
+ * first.
+ *
+ * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set
+ * the cpu bit and need to re-call the function.
+ */
+int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+{
+	cpumask_var_t mask;
+	int cpu;
+	int ret = 0;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	i %= num_online_cpus();
+
+	if (!cpumask_of_node(numa_node)) {
+		/* Use all online cpu's for non numa aware system */
+		cpumask_copy(mask, cpu_online_mask);
+	} else {
+		int n;
+
+		cpumask_and(mask,
+			    cpumask_of_node(numa_node), cpu_online_mask);
+
+		n = cpumask_weight(mask);
+		if (i >= n) {
+			i -= n;
+
+			/* If index > number of local cpu's, mask out local
+			 * cpu's
+			 */
+			cpumask_andnot(mask, cpu_online_mask, mask);
+		}
+	}
+
+	for_each_cpu(cpu, mask) {
+		if (--i < 0)
+			goto out;
+	}
+
+	ret = -EAGAIN;
+
+out:
+	free_cpumask_var(mask);
+
+	if (!ret)
+		cpumask_set_cpu(cpu, dstp);
+
+	return ret;
+}
+EXPORT_SYMBOL(cpumask_set_cpu_local_first);
+
 #endif
-- 
cgit v1.2.3-71-gd317


From 70a640d0dae3a9b1b222ce673eb5d92c263ddd61 Mon Sep 17 00:00:00 2001
From: Yuval Atias <yuvala@mellanox.com>
Date: Sun, 25 May 2014 17:47:27 +0300
Subject: net/mlx4_en: Use affinity hint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The “affinity hint” mechanism is used by the user space
daemon, irqbalancer, to indicate a preferred CPU mask for irqs.
Irqbalancer can use this hint to balance the irqs between the
cpus indicated by the mask.

We wish the HCA to preferentially map the IRQs it uses to numa cores
close to it.  To accomplish this, we use cpumask_set_cpu_local_first(), that
sets the affinity hint according the following policy:
First it maps IRQs to “close” numa cores.  If these are exhausted, the
remaining IRQs are mapped to “far” numa cores.

Signed-off-by: Yuval Atias <yuvala@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c              |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_cq.c     |  6 +++++-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 30 ++++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/eq.c        | 13 ++++++++++-
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  1 +
 include/linux/mlx4/device.h                    |  2 +-
 6 files changed, 50 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 199c7896f081..58b1f239ac2b 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1897,7 +1897,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 				 i, j, dev->pdev->bus->name);
 			/* Set IRQ for specific name (per ring) */
 			if (mlx4_assign_eq(dev, name, NULL,
-					   &ibdev->eq_table[eq])) {
+					   &ibdev->eq_table[eq], NULL)) {
 				/* Use legacy (same as mlx4_en driver) */
 				pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
 				ibdev->eq_table[eq] =
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 636963db598a..ea2cd72e5368 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -118,11 +118,15 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 	if (cq->is_tx == RX) {
 		if (mdev->dev->caps.comp_pool) {
 			if (!cq->vector) {
+				struct mlx4_en_rx_ring *ring =
+					priv->rx_ring[cq->ring];
+
 				sprintf(name, "%s-%d", priv->dev->name,
 					cq->ring);
 				/* Set IRQ for specific name (per ring) */
 				if (mlx4_assign_eq(mdev->dev, name, rmap,
-						   &cq->vector)) {
+						   &cq->vector,
+						   ring->affinity_mask)) {
 					cq->vector = (cq->ring + 1 + priv->port)
 					    % mdev->dev->caps.num_comp_vectors;
 					mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n",
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 58209bd0c94c..05d135572abc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1526,6 +1526,32 @@ static void mlx4_en_linkstate(struct work_struct *work)
 	mutex_unlock(&mdev->state_lock);
 }
 
+static void mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
+{
+	struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
+	int numa_node = priv->mdev->dev->numa_node;
+
+	if (numa_node == -1)
+		return;
+
+	if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) {
+		en_err(priv, "Failed to allocate core mask\n");
+		return;
+	}
+
+	if (cpumask_set_cpu_local_first(ring_idx, numa_node,
+					ring->affinity_mask)) {
+		en_err(priv, "Failed setting affinity hint\n");
+		free_cpumask_var(ring->affinity_mask);
+		ring->affinity_mask = NULL;
+	}
+}
+
+static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
+{
+	free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask);
+	priv->rx_ring[ring_idx]->affinity_mask = NULL;
+}
 
 int mlx4_en_start_port(struct net_device *dev)
 {
@@ -1567,6 +1593,8 @@ int mlx4_en_start_port(struct net_device *dev)
 
 		mlx4_en_cq_init_lock(cq);
 
+		mlx4_en_init_affinity_hint(priv, i);
+
 		err = mlx4_en_activate_cq(priv, cq, i);
 		if (err) {
 			en_err(priv, "Failed activating Rx CQ\n");
@@ -1847,6 +1875,8 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 			msleep(1);
 		mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
 		mlx4_en_deactivate_cq(priv, cq);
+
+		mlx4_en_free_affinity_hint(priv, i);
 	}
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index d954ec1eac17..f91659e5fa13 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1376,7 +1376,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev)
 EXPORT_SYMBOL(mlx4_test_interrupts);
 
 int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
-		   int *vector)
+		   int *vector, cpumask_var_t cpu_hint_mask)
 {
 
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1411,6 +1411,15 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
 			}
 			mlx4_assign_irq_notifier(priv, dev,
 						 priv->eq_table.eq[vec].irq);
+			if (cpu_hint_mask) {
+				err = irq_set_affinity_hint(
+						priv->eq_table.eq[vec].irq,
+						cpu_hint_mask);
+				if (err) {
+					mlx4_warn(dev, "Failed setting affinity hint\n");
+					/*we dont want to break here*/
+				}
+			}
 
 			eq_set_ci(&priv->eq_table.eq[vec], 1);
 		}
@@ -1441,6 +1450,8 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec)
 			irq_set_affinity_notifier(
 				priv->eq_table.eq[vec].irq,
 				NULL);
+			irq_set_affinity_hint(priv->eq_table.eq[vec].irq,
+					      NULL);
 			free_irq(priv->eq_table.eq[vec].irq,
 				 &priv->eq_table.eq[vec]);
 			priv->msix_ctl.pool_bm &= ~(1ULL << i);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index b5db1bf361dc..0e15295bedd6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -313,6 +313,7 @@ struct mlx4_en_rx_ring {
 	unsigned long csum_ok;
 	unsigned long csum_none;
 	int hwtstamp_rx_filter;
+	cpumask_var_t affinity_mask;
 };
 
 struct mlx4_en_cq {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ca38871a585c..b9b70e00e3c1 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1161,7 +1161,7 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
 int mlx4_SYNC_TPT(struct mlx4_dev *dev);
 int mlx4_test_interrupts(struct mlx4_dev *dev);
 int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
-		   int *vector);
+		   int *vector, cpumask_t *cpu_hint_mask);
 void mlx4_release_eq(struct mlx4_dev *dev, int vec);
 
 int mlx4_get_phys_port_id(struct mlx4_dev *dev);
-- 
cgit v1.2.3-71-gd317


From ee39facbf82e73e468c504d2b40e83e2d223c28c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 1 Jun 2014 21:58:02 -0700
Subject: net: Revert mlx4 cpumask changes.

This reverts commit 70a640d0dae3a9b1b222ce673eb5d92c263ddd61
("net/mlx4_en: Use affinity hint") and commit
c8865b64b05b2f4eeefd369373e9c8aeb069e7a1 ("cpumask: Utility function
to set n'th cpu - local cpu first") because these changes break
the build when SMP is disabled amongst other things.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 50 +++++++++++++++-------------
 include/linux/cpumask.h              |  2 --
 lib/cpumask.c                        | 64 ------------------------------------
 3 files changed, 28 insertions(+), 88 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 66d4ab703f45..e72918970a58 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -1441,30 +1441,32 @@ static int ks8851_probe(struct spi_device *spi)
 		}
 	}
 
-	ks->vdd_io = devm_regulator_get(&spi->dev, "vdd-io");
+	ks->vdd_io = devm_regulator_get_optional(&spi->dev, "vdd-io");
 	if (IS_ERR(ks->vdd_io)) {
 		ret = PTR_ERR(ks->vdd_io);
-		goto err_reg_io;
-	}
-
-	ret = regulator_enable(ks->vdd_io);
-	if (ret) {
-		dev_err(&spi->dev, "regulator vdd_io enable fail: %d\n",
-			ret);
-		goto err_reg_io;
+		if (ret == -EPROBE_DEFER)
+			goto err_reg_io;
+	} else {
+		ret = regulator_enable(ks->vdd_io);
+		if (ret) {
+			dev_err(&spi->dev, "regulator vdd_io enable fail: %d\n",
+				ret);
+			goto err_reg_io;
+		}
 	}
 
-	ks->vdd_reg = devm_regulator_get(&spi->dev, "vdd");
+	ks->vdd_reg = devm_regulator_get_optional(&spi->dev, "vdd");
 	if (IS_ERR(ks->vdd_reg)) {
 		ret = PTR_ERR(ks->vdd_reg);
-		goto err_reg;
-	}
-
-	ret = regulator_enable(ks->vdd_reg);
-	if (ret) {
-		dev_err(&spi->dev, "regulator vdd enable fail: %d\n",
-			ret);
-		goto err_reg;
+		if (ret == -EPROBE_DEFER)
+			goto err_reg;
+	} else {
+		ret = regulator_enable(ks->vdd_reg);
+		if (ret) {
+			dev_err(&spi->dev, "regulator vdd enable fail: %d\n",
+				ret);
+			goto err_reg;
+		}
 	}
 
 	if (gpio_is_valid(gpio)) {
@@ -1570,9 +1572,11 @@ err_irq:
 	if (gpio_is_valid(gpio))
 		gpio_set_value(gpio, 0);
 err_id:
-	regulator_disable(ks->vdd_reg);
+	if (!IS_ERR(ks->vdd_reg))
+		regulator_disable(ks->vdd_reg);
 err_reg:
-	regulator_disable(ks->vdd_io);
+	if (!IS_ERR(ks->vdd_io))
+		regulator_disable(ks->vdd_io);
 err_reg_io:
 err_gpio:
 	free_netdev(ndev);
@@ -1590,8 +1594,10 @@ static int ks8851_remove(struct spi_device *spi)
 	free_irq(spi->irq, priv);
 	if (gpio_is_valid(priv->gpio))
 		gpio_set_value(priv->gpio, 0);
-	regulator_disable(priv->vdd_reg);
-	regulator_disable(priv->vdd_io);
+	if (!IS_ERR(priv->vdd_reg))
+		regulator_disable(priv->vdd_reg);
+	if (!IS_ERR(priv->vdd_io))
+		regulator_disable(priv->vdd_io);
 	free_netdev(priv->netdev);
 
 	return 0;
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 3551d667ef9f..d08e4d2a9b92 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -257,8 +257,6 @@ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
 	set_bit(cpumask_check(cpu), cpumask_bits(dstp));
 }
 
-int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp);
-
 /**
  * cpumask_clear_cpu - clear a cpu in a cpumask
  * @cpu: cpu number (< nr_cpu_ids)
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 14049a96f04a..b810b753c607 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -163,68 +163,4 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 {
 	memblock_free_early(__pa(mask), cpumask_size());
 }
-
-/**
- * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first
- *
- * @i: index number
- * @numa_node: local numa_node
- * @dstp: cpumask with the relevant cpu bit set according to the policy
- *
- * This function sets the cpumask according to a numa aware policy.
- * cpumask could be used as an affinity hint for the IRQ related to a
- * queue. When the policy is to spread queues across cores - local cores
- * first.
- *
- * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set
- * the cpu bit and need to re-call the function.
- */
-int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
-{
-	cpumask_var_t mask;
-	int cpu;
-	int ret = 0;
-
-	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
-		return -ENOMEM;
-
-	i %= num_online_cpus();
-
-	if (!cpumask_of_node(numa_node)) {
-		/* Use all online cpu's for non numa aware system */
-		cpumask_copy(mask, cpu_online_mask);
-	} else {
-		int n;
-
-		cpumask_and(mask,
-			    cpumask_of_node(numa_node), cpu_online_mask);
-
-		n = cpumask_weight(mask);
-		if (i >= n) {
-			i -= n;
-
-			/* If index > number of local cpu's, mask out local
-			 * cpu's
-			 */
-			cpumask_andnot(mask, cpu_online_mask, mask);
-		}
-	}
-
-	for_each_cpu(cpu, mask) {
-		if (--i < 0)
-			goto out;
-	}
-
-	ret = -EAGAIN;
-
-out:
-	free_cpumask_var(mask);
-
-	if (!ret)
-		cpumask_set_cpu(cpu, dstp);
-
-	return ret;
-}
-EXPORT_SYMBOL(cpumask_set_cpu_local_first);
-
 #endif
-- 
cgit v1.2.3-71-gd317


From 3480593131e0b781287dae0139bf7ccee7cba7ff Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 29 May 2014 10:22:50 +0200
Subject: net: filter: get rid of BPF_S_* enum

This patch finally allows us to get rid of the BPF_S_* enum.
Currently, the code performs unnecessary encode and decode
workarounds in seccomp and filter migration itself when a filter
is being attached in order to overcome BPF_S_* encoding which
is not used anymore by the new interpreter resp. JIT compilers.

Keeping it around would mean that also in future we would need
to extend and maintain this enum and related encoders/decoders.
We can get rid of all that and save us these operations during
filter attaching. Naturally, also JIT compilers need to be updated
by this.

Before JIT conversion is being done, each compiler checks if A
is being loaded at startup to obtain information if it needs to
emit instructions to clear A first. Since BPF extensions are a
subset of BPF_LD | BPF_{W,H,B} | BPF_ABS variants, case statements
for extensions can be removed at that point. To ease and minimalize
code changes in the classic JITs, we have introduced bpf_anc_helper().

Tested with test_bpf on x86_64 (JIT, int), s390x (JIT, int),
arm (JIT, int), i368 (int), ppc64 (JIT, int); for sparc we
unfortunately didn't have access, but changes are analogous to
the rest.

Joint work with Alexei Starovoitov.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mircea Gherzan <mgherzan@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Acked-by: Chema Gonzalez <chemag@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/net/bpf_jit_32.c       | 139 ++++++++--------
 arch/powerpc/net/bpf_jit_64.S   |   2 +-
 arch/powerpc/net/bpf_jit_comp.c | 157 +++++++++---------
 arch/s390/net/bpf_jit_comp.c    | 163 +++++++++----------
 arch/sparc/net/bpf_jit_comp.c   | 154 +++++++++---------
 include/linux/filter.h          | 108 +++++--------
 kernel/seccomp.c                |  83 +++++-----
 net/core/filter.c               | 341 +++++++++++++++-------------------------
 8 files changed, 498 insertions(+), 649 deletions(-)

(limited to 'include')

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 6f879c319a9d..fb5503ce016f 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -136,7 +136,7 @@ static u16 saved_regs(struct jit_ctx *ctx)
 	u16 ret = 0;
 
 	if ((ctx->skf->len > 1) ||
-	    (ctx->skf->insns[0].code == BPF_S_RET_A))
+	    (ctx->skf->insns[0].code == (BPF_RET | BPF_A)))
 		ret |= 1 << r_A;
 
 #ifdef CONFIG_FRAME_POINTER
@@ -164,18 +164,10 @@ static inline int mem_words_used(struct jit_ctx *ctx)
 static inline bool is_load_to_a(u16 inst)
 {
 	switch (inst) {
-	case BPF_S_LD_W_LEN:
-	case BPF_S_LD_W_ABS:
-	case BPF_S_LD_H_ABS:
-	case BPF_S_LD_B_ABS:
-	case BPF_S_ANC_CPU:
-	case BPF_S_ANC_IFINDEX:
-	case BPF_S_ANC_MARK:
-	case BPF_S_ANC_PROTOCOL:
-	case BPF_S_ANC_RXHASH:
-	case BPF_S_ANC_VLAN_TAG:
-	case BPF_S_ANC_VLAN_TAG_PRESENT:
-	case BPF_S_ANC_QUEUE:
+	case BPF_LD | BPF_W | BPF_LEN:
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
 		return true;
 	default:
 		return false;
@@ -215,7 +207,7 @@ static void build_prologue(struct jit_ctx *ctx)
 		emit(ARM_MOV_I(r_X, 0), ctx);
 
 	/* do not leak kernel data to userspace */
-	if ((first_inst != BPF_S_RET_K) && !(is_load_to_a(first_inst)))
+	if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
 		emit(ARM_MOV_I(r_A, 0), ctx);
 
 	/* stack space for the BPF_MEM words */
@@ -480,36 +472,39 @@ static int build_body(struct jit_ctx *ctx)
 	u32 k;
 
 	for (i = 0; i < prog->len; i++) {
+		u16 code;
+
 		inst = &(prog->insns[i]);
 		/* K as an immediate value operand */
 		k = inst->k;
+		code = bpf_anc_helper(inst);
 
 		/* compute offsets only in the fake pass */
 		if (ctx->target == NULL)
 			ctx->offsets[i] = ctx->idx * 4;
 
-		switch (inst->code) {
-		case BPF_S_LD_IMM:
+		switch (code) {
+		case BPF_LD | BPF_IMM:
 			emit_mov_i(r_A, k, ctx);
 			break;
-		case BPF_S_LD_W_LEN:
+		case BPF_LD | BPF_W | BPF_LEN:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 			emit(ARM_LDR_I(r_A, r_skb,
 				       offsetof(struct sk_buff, len)), ctx);
 			break;
-		case BPF_S_LD_MEM:
+		case BPF_LD | BPF_MEM:
 			/* A = scratch[k] */
 			ctx->seen |= SEEN_MEM_WORD(k);
 			emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
 			break;
-		case BPF_S_LD_W_ABS:
+		case BPF_LD | BPF_W | BPF_ABS:
 			load_order = 2;
 			goto load;
-		case BPF_S_LD_H_ABS:
+		case BPF_LD | BPF_H | BPF_ABS:
 			load_order = 1;
 			goto load;
-		case BPF_S_LD_B_ABS:
+		case BPF_LD | BPF_B | BPF_ABS:
 			load_order = 0;
 load:
 			/* the interpreter will deal with the negative K */
@@ -552,31 +547,31 @@ load_common:
 			emit_err_ret(ARM_COND_NE, ctx);
 			emit(ARM_MOV_R(r_A, ARM_R0), ctx);
 			break;
-		case BPF_S_LD_W_IND:
+		case BPF_LD | BPF_W | BPF_IND:
 			load_order = 2;
 			goto load_ind;
-		case BPF_S_LD_H_IND:
+		case BPF_LD | BPF_H | BPF_IND:
 			load_order = 1;
 			goto load_ind;
-		case BPF_S_LD_B_IND:
+		case BPF_LD | BPF_B | BPF_IND:
 			load_order = 0;
 load_ind:
 			OP_IMM3(ARM_ADD, r_off, r_X, k, ctx);
 			goto load_common;
-		case BPF_S_LDX_IMM:
+		case BPF_LDX | BPF_IMM:
 			ctx->seen |= SEEN_X;
 			emit_mov_i(r_X, k, ctx);
 			break;
-		case BPF_S_LDX_W_LEN:
+		case BPF_LDX | BPF_W | BPF_LEN:
 			ctx->seen |= SEEN_X | SEEN_SKB;
 			emit(ARM_LDR_I(r_X, r_skb,
 				       offsetof(struct sk_buff, len)), ctx);
 			break;
-		case BPF_S_LDX_MEM:
+		case BPF_LDX | BPF_MEM:
 			ctx->seen |= SEEN_X | SEEN_MEM_WORD(k);
 			emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
 			break;
-		case BPF_S_LDX_B_MSH:
+		case BPF_LDX | BPF_B | BPF_MSH:
 			/* x = ((*(frame + k)) & 0xf) << 2; */
 			ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL;
 			/* the interpreter should deal with the negative K */
@@ -606,113 +601,113 @@ load_ind:
 			emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx);
 			emit(ARM_LSL_I(r_X, r_X, 2), ctx);
 			break;
-		case BPF_S_ST:
+		case BPF_ST:
 			ctx->seen |= SEEN_MEM_WORD(k);
 			emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
 			break;
-		case BPF_S_STX:
+		case BPF_STX:
 			update_on_xread(ctx);
 			ctx->seen |= SEEN_MEM_WORD(k);
 			emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
 			break;
-		case BPF_S_ALU_ADD_K:
+		case BPF_ALU | BPF_ADD | BPF_K:
 			/* A += K */
 			OP_IMM3(ARM_ADD, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_ADD_X:
+		case BPF_ALU | BPF_ADD | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_ADD_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_SUB_K:
+		case BPF_ALU | BPF_SUB | BPF_K:
 			/* A -= K */
 			OP_IMM3(ARM_SUB, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_SUB_X:
+		case BPF_ALU | BPF_SUB | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_SUB_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_MUL_K:
+		case BPF_ALU | BPF_MUL | BPF_K:
 			/* A *= K */
 			emit_mov_i(r_scratch, k, ctx);
 			emit(ARM_MUL(r_A, r_A, r_scratch), ctx);
 			break;
-		case BPF_S_ALU_MUL_X:
+		case BPF_ALU | BPF_MUL | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_MUL(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_DIV_K:
+		case BPF_ALU | BPF_DIV | BPF_K:
 			if (k == 1)
 				break;
 			emit_mov_i(r_scratch, k, ctx);
 			emit_udiv(r_A, r_A, r_scratch, ctx);
 			break;
-		case BPF_S_ALU_DIV_X:
+		case BPF_ALU | BPF_DIV | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_CMP_I(r_X, 0), ctx);
 			emit_err_ret(ARM_COND_EQ, ctx);
 			emit_udiv(r_A, r_A, r_X, ctx);
 			break;
-		case BPF_S_ALU_OR_K:
+		case BPF_ALU | BPF_OR | BPF_K:
 			/* A |= K */
 			OP_IMM3(ARM_ORR, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_OR_X:
+		case BPF_ALU | BPF_OR | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_ORR_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_XOR_K:
+		case BPF_ALU | BPF_XOR | BPF_K:
 			/* A ^= K; */
 			OP_IMM3(ARM_EOR, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ANC_ALU_XOR_X:
-		case BPF_S_ALU_XOR_X:
+		case BPF_ANC | SKF_AD_ALU_XOR_X:
+		case BPF_ALU | BPF_XOR | BPF_X:
 			/* A ^= X */
 			update_on_xread(ctx);
 			emit(ARM_EOR_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_AND_K:
+		case BPF_ALU | BPF_AND | BPF_K:
 			/* A &= K */
 			OP_IMM3(ARM_AND, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_AND_X:
+		case BPF_ALU | BPF_AND | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_AND_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_LSH_K:
+		case BPF_ALU | BPF_LSH | BPF_K:
 			if (unlikely(k > 31))
 				return -1;
 			emit(ARM_LSL_I(r_A, r_A, k), ctx);
 			break;
-		case BPF_S_ALU_LSH_X:
+		case BPF_ALU | BPF_LSH | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_LSL_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_RSH_K:
+		case BPF_ALU | BPF_RSH | BPF_K:
 			if (unlikely(k > 31))
 				return -1;
 			emit(ARM_LSR_I(r_A, r_A, k), ctx);
 			break;
-		case BPF_S_ALU_RSH_X:
+		case BPF_ALU | BPF_RSH | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_LSR_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_NEG:
+		case BPF_ALU | BPF_NEG:
 			/* A = -A */
 			emit(ARM_RSB_I(r_A, r_A, 0), ctx);
 			break;
-		case BPF_S_JMP_JA:
+		case BPF_JMP | BPF_JA:
 			/* pc += K */
 			emit(ARM_B(b_imm(i + k + 1, ctx)), ctx);
 			break;
-		case BPF_S_JMP_JEQ_K:
+		case BPF_JMP | BPF_JEQ | BPF_K:
 			/* pc += (A == K) ? pc->jt : pc->jf */
 			condt  = ARM_COND_EQ;
 			goto cmp_imm;
-		case BPF_S_JMP_JGT_K:
+		case BPF_JMP | BPF_JGT | BPF_K:
 			/* pc += (A > K) ? pc->jt : pc->jf */
 			condt  = ARM_COND_HI;
 			goto cmp_imm;
-		case BPF_S_JMP_JGE_K:
+		case BPF_JMP | BPF_JGE | BPF_K:
 			/* pc += (A >= K) ? pc->jt : pc->jf */
 			condt  = ARM_COND_HS;
 cmp_imm:
@@ -731,22 +726,22 @@ cond_jump:
 				_emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1,
 							     ctx)), ctx);
 			break;
-		case BPF_S_JMP_JEQ_X:
+		case BPF_JMP | BPF_JEQ | BPF_X:
 			/* pc += (A == X) ? pc->jt : pc->jf */
 			condt   = ARM_COND_EQ;
 			goto cmp_x;
-		case BPF_S_JMP_JGT_X:
+		case BPF_JMP | BPF_JGT | BPF_X:
 			/* pc += (A > X) ? pc->jt : pc->jf */
 			condt   = ARM_COND_HI;
 			goto cmp_x;
-		case BPF_S_JMP_JGE_X:
+		case BPF_JMP | BPF_JGE | BPF_X:
 			/* pc += (A >= X) ? pc->jt : pc->jf */
 			condt   = ARM_COND_CS;
 cmp_x:
 			update_on_xread(ctx);
 			emit(ARM_CMP_R(r_A, r_X), ctx);
 			goto cond_jump;
-		case BPF_S_JMP_JSET_K:
+		case BPF_JMP | BPF_JSET | BPF_K:
 			/* pc += (A & K) ? pc->jt : pc->jf */
 			condt  = ARM_COND_NE;
 			/* not set iff all zeroes iff Z==1 iff EQ */
@@ -759,16 +754,16 @@ cmp_x:
 				emit(ARM_TST_I(r_A, imm12), ctx);
 			}
 			goto cond_jump;
-		case BPF_S_JMP_JSET_X:
+		case BPF_JMP | BPF_JSET | BPF_X:
 			/* pc += (A & X) ? pc->jt : pc->jf */
 			update_on_xread(ctx);
 			condt  = ARM_COND_NE;
 			emit(ARM_TST_R(r_A, r_X), ctx);
 			goto cond_jump;
-		case BPF_S_RET_A:
+		case BPF_RET | BPF_A:
 			emit(ARM_MOV_R(ARM_R0, r_A), ctx);
 			goto b_epilogue;
-		case BPF_S_RET_K:
+		case BPF_RET | BPF_K:
 			if ((k == 0) && (ctx->ret0_fp_idx < 0))
 				ctx->ret0_fp_idx = i;
 			emit_mov_i(ARM_R0, k, ctx);
@@ -776,17 +771,17 @@ b_epilogue:
 			if (i != ctx->skf->len - 1)
 				emit(ARM_B(b_imm(prog->len, ctx)), ctx);
 			break;
-		case BPF_S_MISC_TAX:
+		case BPF_MISC | BPF_TAX:
 			/* X = A */
 			ctx->seen |= SEEN_X;
 			emit(ARM_MOV_R(r_X, r_A), ctx);
 			break;
-		case BPF_S_MISC_TXA:
+		case BPF_MISC | BPF_TXA:
 			/* A = X */
 			update_on_xread(ctx);
 			emit(ARM_MOV_R(r_A, r_X), ctx);
 			break;
-		case BPF_S_ANC_PROTOCOL:
+		case BPF_ANC | SKF_AD_PROTOCOL:
 			/* A = ntohs(skb->protocol) */
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
@@ -795,7 +790,7 @@ b_epilogue:
 			emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx);
 			emit_swap16(r_A, r_scratch, ctx);
 			break;
-		case BPF_S_ANC_CPU:
+		case BPF_ANC | SKF_AD_CPU:
 			/* r_scratch = current_thread_info() */
 			OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx);
 			/* A = current_thread_info()->cpu */
@@ -803,7 +798,7 @@ b_epilogue:
 			off = offsetof(struct thread_info, cpu);
 			emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
 			break;
-		case BPF_S_ANC_IFINDEX:
+		case BPF_ANC | SKF_AD_IFINDEX:
 			/* A = skb->dev->ifindex */
 			ctx->seen |= SEEN_SKB;
 			off = offsetof(struct sk_buff, dev);
@@ -817,30 +812,30 @@ b_epilogue:
 			off = offsetof(struct net_device, ifindex);
 			emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
 			break;
-		case BPF_S_ANC_MARK:
+		case BPF_ANC | SKF_AD_MARK:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 			off = offsetof(struct sk_buff, mark);
 			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
 			break;
-		case BPF_S_ANC_RXHASH:
+		case BPF_ANC | SKF_AD_RXHASH:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 			off = offsetof(struct sk_buff, hash);
 			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
 			break;
-		case BPF_S_ANC_VLAN_TAG:
-		case BPF_S_ANC_VLAN_TAG_PRESENT:
+		case BPF_ANC | SKF_AD_VLAN_TAG:
+		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 			off = offsetof(struct sk_buff, vlan_tci);
 			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
-			if (inst->code == BPF_S_ANC_VLAN_TAG)
+			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
 				OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx);
 			else
 				OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx);
 			break;
-		case BPF_S_ANC_QUEUE:
+		case BPF_ANC | SKF_AD_QUEUE:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  queue_mapping) != 2);
diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index e76eba74d9da..8f87d9217122 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S
@@ -78,7 +78,7 @@ sk_load_byte_positive_offset:
 	blr
 
 /*
- * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
+ * BPF_LDX | BPF_B | BPF_MSH: ldxb  4*([offset]&0xf)
  * r_addr is the offset value
  */
 	.globl sk_load_byte_msh
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 808ce1cae21a..6dcdadefd8d0 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -79,19 +79,11 @@ static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image,
 	}
 
 	switch (filter[0].code) {
-	case BPF_S_RET_K:
-	case BPF_S_LD_W_LEN:
-	case BPF_S_ANC_PROTOCOL:
-	case BPF_S_ANC_IFINDEX:
-	case BPF_S_ANC_MARK:
-	case BPF_S_ANC_RXHASH:
-	case BPF_S_ANC_VLAN_TAG:
-	case BPF_S_ANC_VLAN_TAG_PRESENT:
-	case BPF_S_ANC_CPU:
-	case BPF_S_ANC_QUEUE:
-	case BPF_S_LD_W_ABS:
-	case BPF_S_LD_H_ABS:
-	case BPF_S_LD_B_ABS:
+	case BPF_RET | BPF_K:
+	case BPF_LD | BPF_W | BPF_LEN:
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
 		/* first instruction sets A register (or is RET 'constant') */
 		break;
 	default:
@@ -144,6 +136,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 
 	for (i = 0; i < flen; i++) {
 		unsigned int K = filter[i].k;
+		u16 code = bpf_anc_helper(&filter[i]);
 
 		/*
 		 * addrs[] maps a BPF bytecode address into a real offset from
@@ -151,35 +144,35 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 		 */
 		addrs[i] = ctx->idx * 4;
 
-		switch (filter[i].code) {
+		switch (code) {
 			/*** ALU ops ***/
-		case BPF_S_ALU_ADD_X: /* A += X; */
+		case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_ADD(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_ADD_K: /* A += K; */
+		case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */
 			if (!K)
 				break;
 			PPC_ADDI(r_A, r_A, IMM_L(K));
 			if (K >= 32768)
 				PPC_ADDIS(r_A, r_A, IMM_HA(K));
 			break;
-		case BPF_S_ALU_SUB_X: /* A -= X; */
+		case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_SUB(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_SUB_K: /* A -= K */
+		case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
 			if (!K)
 				break;
 			PPC_ADDI(r_A, r_A, IMM_L(-K));
 			if (K >= 32768)
 				PPC_ADDIS(r_A, r_A, IMM_HA(-K));
 			break;
-		case BPF_S_ALU_MUL_X: /* A *= X; */
+		case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_MUL(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_MUL_K: /* A *= K */
+		case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
 			if (K < 32768)
 				PPC_MULI(r_A, r_A, K);
 			else {
@@ -187,7 +180,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 				PPC_MUL(r_A, r_A, r_scratch1);
 			}
 			break;
-		case BPF_S_ALU_MOD_X: /* A %= X; */
+		case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_CMPWI(r_X, 0);
 			if (ctx->pc_ret0 != -1) {
@@ -201,13 +194,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			PPC_MUL(r_scratch1, r_X, r_scratch1);
 			PPC_SUB(r_A, r_A, r_scratch1);
 			break;
-		case BPF_S_ALU_MOD_K: /* A %= K; */
+		case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */
 			PPC_LI32(r_scratch2, K);
 			PPC_DIVWU(r_scratch1, r_A, r_scratch2);
 			PPC_MUL(r_scratch1, r_scratch2, r_scratch1);
 			PPC_SUB(r_A, r_A, r_scratch1);
 			break;
-		case BPF_S_ALU_DIV_X: /* A /= X; */
+		case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_CMPWI(r_X, 0);
 			if (ctx->pc_ret0 != -1) {
@@ -223,17 +216,17 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			}
 			PPC_DIVWU(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_DIV_K: /* A /= K */
+		case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
 			if (K == 1)
 				break;
 			PPC_LI32(r_scratch1, K);
 			PPC_DIVWU(r_A, r_A, r_scratch1);
 			break;
-		case BPF_S_ALU_AND_X:
+		case BPF_ALU | BPF_AND | BPF_X:
 			ctx->seen |= SEEN_XREG;
 			PPC_AND(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_AND_K:
+		case BPF_ALU | BPF_AND | BPF_K:
 			if (!IMM_H(K))
 				PPC_ANDI(r_A, r_A, K);
 			else {
@@ -241,51 +234,51 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 				PPC_AND(r_A, r_A, r_scratch1);
 			}
 			break;
-		case BPF_S_ALU_OR_X:
+		case BPF_ALU | BPF_OR | BPF_X:
 			ctx->seen |= SEEN_XREG;
 			PPC_OR(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_OR_K:
+		case BPF_ALU | BPF_OR | BPF_K:
 			if (IMM_L(K))
 				PPC_ORI(r_A, r_A, IMM_L(K));
 			if (K >= 65536)
 				PPC_ORIS(r_A, r_A, IMM_H(K));
 			break;
-		case BPF_S_ANC_ALU_XOR_X:
-		case BPF_S_ALU_XOR_X: /* A ^= X */
+		case BPF_ANC | SKF_AD_ALU_XOR_X:
+		case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */
 			ctx->seen |= SEEN_XREG;
 			PPC_XOR(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_XOR_K: /* A ^= K */
+		case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
 			if (IMM_L(K))
 				PPC_XORI(r_A, r_A, IMM_L(K));
 			if (K >= 65536)
 				PPC_XORIS(r_A, r_A, IMM_H(K));
 			break;
-		case BPF_S_ALU_LSH_X: /* A <<= X; */
+		case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_SLW(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_LSH_K:
+		case BPF_ALU | BPF_LSH | BPF_K:
 			if (K == 0)
 				break;
 			else
 				PPC_SLWI(r_A, r_A, K);
 			break;
-		case BPF_S_ALU_RSH_X: /* A >>= X; */
+		case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_SRW(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_RSH_K: /* A >>= K; */
+		case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
 			if (K == 0)
 				break;
 			else
 				PPC_SRWI(r_A, r_A, K);
 			break;
-		case BPF_S_ALU_NEG:
+		case BPF_ALU | BPF_NEG:
 			PPC_NEG(r_A, r_A);
 			break;
-		case BPF_S_RET_K:
+		case BPF_RET | BPF_K:
 			PPC_LI32(r_ret, K);
 			if (!K) {
 				if (ctx->pc_ret0 == -1)
@@ -312,7 +305,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 					PPC_BLR();
 			}
 			break;
-		case BPF_S_RET_A:
+		case BPF_RET | BPF_A:
 			PPC_MR(r_ret, r_A);
 			if (i != flen - 1) {
 				if (ctx->seen)
@@ -321,53 +314,53 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 					PPC_BLR();
 			}
 			break;
-		case BPF_S_MISC_TAX: /* X = A */
+		case BPF_MISC | BPF_TAX: /* X = A */
 			PPC_MR(r_X, r_A);
 			break;
-		case BPF_S_MISC_TXA: /* A = X */
+		case BPF_MISC | BPF_TXA: /* A = X */
 			ctx->seen |= SEEN_XREG;
 			PPC_MR(r_A, r_X);
 			break;
 
 			/*** Constant loads/M[] access ***/
-		case BPF_S_LD_IMM: /* A = K */
+		case BPF_LD | BPF_IMM: /* A = K */
 			PPC_LI32(r_A, K);
 			break;
-		case BPF_S_LDX_IMM: /* X = K */
+		case BPF_LDX | BPF_IMM: /* X = K */
 			PPC_LI32(r_X, K);
 			break;
-		case BPF_S_LD_MEM: /* A = mem[K] */
+		case BPF_LD | BPF_MEM: /* A = mem[K] */
 			PPC_MR(r_A, r_M + (K & 0xf));
 			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
 			break;
-		case BPF_S_LDX_MEM: /* X = mem[K] */
+		case BPF_LDX | BPF_MEM: /* X = mem[K] */
 			PPC_MR(r_X, r_M + (K & 0xf));
 			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
 			break;
-		case BPF_S_ST: /* mem[K] = A */
+		case BPF_ST: /* mem[K] = A */
 			PPC_MR(r_M + (K & 0xf), r_A);
 			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
 			break;
-		case BPF_S_STX: /* mem[K] = X */
+		case BPF_STX: /* mem[K] = X */
 			PPC_MR(r_M + (K & 0xf), r_X);
 			ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
 			break;
-		case BPF_S_LD_W_LEN: /*	A = skb->len; */
+		case BPF_LD | BPF_W | BPF_LEN: /*	A = skb->len; */
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
 			break;
-		case BPF_S_LDX_W_LEN: /* X = skb->len; */
+		case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
 			PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
 			break;
 
 			/*** Ancillary info loads ***/
-		case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+		case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  protocol) != 2);
 			PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							    protocol));
 			break;
-		case BPF_S_ANC_IFINDEX:
+		case BPF_ANC | SKF_AD_IFINDEX:
 			PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
 								dev));
 			PPC_CMPDI(r_scratch1, 0);
@@ -384,33 +377,33 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			PPC_LWZ_OFFS(r_A, r_scratch1,
 				     offsetof(struct net_device, ifindex));
 			break;
-		case BPF_S_ANC_MARK:
+		case BPF_ANC | SKF_AD_MARK:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  mark));
 			break;
-		case BPF_S_ANC_RXHASH:
+		case BPF_ANC | SKF_AD_RXHASH:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  hash));
 			break;
-		case BPF_S_ANC_VLAN_TAG:
-		case BPF_S_ANC_VLAN_TAG_PRESENT:
+		case BPF_ANC | SKF_AD_VLAN_TAG:
+		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  vlan_tci));
-			if (filter[i].code == BPF_S_ANC_VLAN_TAG)
+			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
 				PPC_ANDI(r_A, r_A, VLAN_VID_MASK);
 			else
 				PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT);
 			break;
-		case BPF_S_ANC_QUEUE:
+		case BPF_ANC | SKF_AD_QUEUE:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  queue_mapping) != 2);
 			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  queue_mapping));
 			break;
-		case BPF_S_ANC_CPU:
+		case BPF_ANC | SKF_AD_CPU:
 #ifdef CONFIG_SMP
 			/*
 			 * PACA ptr is r13:
@@ -426,13 +419,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			break;
 
 			/*** Absolute loads from packet header/data ***/
-		case BPF_S_LD_W_ABS:
+		case BPF_LD | BPF_W | BPF_ABS:
 			func = CHOOSE_LOAD_FUNC(K, sk_load_word);
 			goto common_load;
-		case BPF_S_LD_H_ABS:
+		case BPF_LD | BPF_H | BPF_ABS:
 			func = CHOOSE_LOAD_FUNC(K, sk_load_half);
 			goto common_load;
-		case BPF_S_LD_B_ABS:
+		case BPF_LD | BPF_B | BPF_ABS:
 			func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
 		common_load:
 			/* Load from [K]. */
@@ -449,13 +442,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			break;
 
 			/*** Indirect loads from packet header/data ***/
-		case BPF_S_LD_W_IND:
+		case BPF_LD | BPF_W | BPF_IND:
 			func = sk_load_word;
 			goto common_load_ind;
-		case BPF_S_LD_H_IND:
+		case BPF_LD | BPF_H | BPF_IND:
 			func = sk_load_half;
 			goto common_load_ind;
-		case BPF_S_LD_B_IND:
+		case BPF_LD | BPF_B | BPF_IND:
 			func = sk_load_byte;
 		common_load_ind:
 			/*
@@ -473,31 +466,31 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			PPC_BCC(COND_LT, exit_addr);
 			break;
 
-		case BPF_S_LDX_B_MSH:
+		case BPF_LDX | BPF_B | BPF_MSH:
 			func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
 			goto common_load;
 			break;
 
 			/*** Jump and branches ***/
-		case BPF_S_JMP_JA:
+		case BPF_JMP | BPF_JA:
 			if (K != 0)
 				PPC_JMP(addrs[i + 1 + K]);
 			break;
 
-		case BPF_S_JMP_JGT_K:
-		case BPF_S_JMP_JGT_X:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
 			true_cond = COND_GT;
 			goto cond_branch;
-		case BPF_S_JMP_JGE_K:
-		case BPF_S_JMP_JGE_X:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
 			true_cond = COND_GE;
 			goto cond_branch;
-		case BPF_S_JMP_JEQ_K:
-		case BPF_S_JMP_JEQ_X:
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
 			true_cond = COND_EQ;
 			goto cond_branch;
-		case BPF_S_JMP_JSET_K:
-		case BPF_S_JMP_JSET_X:
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
 			true_cond = COND_NE;
 			/* Fall through */
 		cond_branch:
@@ -508,20 +501,20 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 				break;
 			}
 
-			switch (filter[i].code) {
-			case BPF_S_JMP_JGT_X:
-			case BPF_S_JMP_JGE_X:
-			case BPF_S_JMP_JEQ_X:
+			switch (code) {
+			case BPF_JMP | BPF_JGT | BPF_X:
+			case BPF_JMP | BPF_JGE | BPF_X:
+			case BPF_JMP | BPF_JEQ | BPF_X:
 				ctx->seen |= SEEN_XREG;
 				PPC_CMPLW(r_A, r_X);
 				break;
-			case BPF_S_JMP_JSET_X:
+			case BPF_JMP | BPF_JSET | BPF_X:
 				ctx->seen |= SEEN_XREG;
 				PPC_AND_DOT(r_scratch1, r_A, r_X);
 				break;
-			case BPF_S_JMP_JEQ_K:
-			case BPF_S_JMP_JGT_K:
-			case BPF_S_JMP_JGE_K:
+			case BPF_JMP | BPF_JEQ | BPF_K:
+			case BPF_JMP | BPF_JGT | BPF_K:
+			case BPF_JMP | BPF_JGE | BPF_K:
 				if (K < 32768)
 					PPC_CMPLWI(r_A, K);
 				else {
@@ -529,7 +522,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 					PPC_CMPLW(r_A, r_scratch1);
 				}
 				break;
-			case BPF_S_JMP_JSET_K:
+			case BPF_JMP | BPF_JSET | BPF_K:
 				if (K < 32768)
 					/* PPC_ANDI is /only/ dot-form */
 					PPC_ANDI(r_scratch1, r_A, K);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e9f8fa9337fe..a2cbd875543a 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -269,27 +269,17 @@ static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter)
 		EMIT4(0xa7c80000);
 	/* Clear A if the first register does not set it. */
 	switch (filter[0].code) {
-	case BPF_S_LD_W_ABS:
-	case BPF_S_LD_H_ABS:
-	case BPF_S_LD_B_ABS:
-	case BPF_S_LD_W_LEN:
-	case BPF_S_LD_W_IND:
-	case BPF_S_LD_H_IND:
-	case BPF_S_LD_B_IND:
-	case BPF_S_LD_IMM:
-	case BPF_S_LD_MEM:
-	case BPF_S_MISC_TXA:
-	case BPF_S_ANC_PROTOCOL:
-	case BPF_S_ANC_PKTTYPE:
-	case BPF_S_ANC_IFINDEX:
-	case BPF_S_ANC_MARK:
-	case BPF_S_ANC_QUEUE:
-	case BPF_S_ANC_HATYPE:
-	case BPF_S_ANC_RXHASH:
-	case BPF_S_ANC_CPU:
-	case BPF_S_ANC_VLAN_TAG:
-	case BPF_S_ANC_VLAN_TAG_PRESENT:
-	case BPF_S_RET_K:
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
+	case BPF_LD | BPF_W | BPF_LEN:
+	case BPF_LD | BPF_W | BPF_IND:
+	case BPF_LD | BPF_H | BPF_IND:
+	case BPF_LD | BPF_B | BPF_IND:
+	case BPF_LD | BPF_IMM:
+	case BPF_LD | BPF_MEM:
+	case BPF_MISC | BPF_TXA:
+	case BPF_RET | BPF_K:
 		/* first instruction sets A register */
 		break;
 	default: /* A = 0 */
@@ -304,15 +294,18 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 	unsigned int K;
 	int offset;
 	unsigned int mask;
+	u16 code;
 
 	K = filter->k;
-	switch (filter->code) {
-	case BPF_S_ALU_ADD_X: /* A += X */
+	code = bpf_anc_helper(filter);
+
+	switch (code) {
+	case BPF_ALU | BPF_ADD | BPF_X: /* A += X */
 		jit->seen |= SEEN_XREG;
 		/* ar %r5,%r12 */
 		EMIT2(0x1a5c);
 		break;
-	case BPF_S_ALU_ADD_K: /* A += K */
+	case BPF_ALU | BPF_ADD | BPF_K: /* A += K */
 		if (!K)
 			break;
 		if (K <= 16383)
@@ -325,12 +318,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 			/* a %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5a50d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_SUB_X: /* A -= X */
+	case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */
 		jit->seen |= SEEN_XREG;
 		/* sr %r5,%r12 */
 		EMIT2(0x1b5c);
 		break;
-	case BPF_S_ALU_SUB_K: /* A -= K */
+	case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
 		if (!K)
 			break;
 		if (K <= 16384)
@@ -343,12 +336,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 			/* s %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5b50d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_MUL_X: /* A *= X */
+	case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */
 		jit->seen |= SEEN_XREG;
 		/* msr %r5,%r12 */
 		EMIT4(0xb252005c);
 		break;
-	case BPF_S_ALU_MUL_K: /* A *= K */
+	case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
 		if (K <= 16383)
 			/* mhi %r5,K */
 			EMIT4_IMM(0xa75c0000, K);
@@ -359,7 +352,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 			/* ms %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x7150d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_DIV_X: /* A /= X */
+	case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */
 		jit->seen |= SEEN_XREG | SEEN_RET0;
 		/* ltr %r12,%r12 */
 		EMIT2(0x12cc);
@@ -370,7 +363,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		/* dlr %r4,%r12 */
 		EMIT4(0xb997004c);
 		break;
-	case BPF_S_ALU_DIV_K: /* A /= K */
+	case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
 		if (K == 1)
 			break;
 		/* lhi %r4,0 */
@@ -378,7 +371,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		/* dl %r4,<d(K)>(%r13) */
 		EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_MOD_X: /* A %= X */
+	case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */
 		jit->seen |= SEEN_XREG | SEEN_RET0;
 		/* ltr %r12,%r12 */
 		EMIT2(0x12cc);
@@ -391,7 +384,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		/* lr %r5,%r4 */
 		EMIT2(0x1854);
 		break;
-	case BPF_S_ALU_MOD_K: /* A %= K */
+	case BPF_ALU | BPF_MOD | BPF_K: /* A %= K */
 		if (K == 1) {
 			/* lhi %r5,0 */
 			EMIT4(0xa7580000);
@@ -404,12 +397,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		/* lr %r5,%r4 */
 		EMIT2(0x1854);
 		break;
-	case BPF_S_ALU_AND_X: /* A &= X */
+	case BPF_ALU | BPF_AND | BPF_X: /* A &= X */
 		jit->seen |= SEEN_XREG;
 		/* nr %r5,%r12 */
 		EMIT2(0x145c);
 		break;
-	case BPF_S_ALU_AND_K: /* A &= K */
+	case BPF_ALU | BPF_AND | BPF_K: /* A &= K */
 		if (test_facility(21))
 			/* nilf %r5,<K> */
 			EMIT6_IMM(0xc05b0000, K);
@@ -417,12 +410,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 			/* n %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5450d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_OR_X: /* A |= X */
+	case BPF_ALU | BPF_OR | BPF_X: /* A |= X */
 		jit->seen |= SEEN_XREG;
 		/* or %r5,%r12 */
 		EMIT2(0x165c);
 		break;
-	case BPF_S_ALU_OR_K: /* A |= K */
+	case BPF_ALU | BPF_OR | BPF_K: /* A |= K */
 		if (test_facility(21))
 			/* oilf %r5,<K> */
 			EMIT6_IMM(0xc05d0000, K);
@@ -430,55 +423,55 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 			/* o %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5650d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
-	case BPF_S_ALU_XOR_X:
+	case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
+	case BPF_ALU | BPF_XOR | BPF_X:
 		jit->seen |= SEEN_XREG;
 		/* xr %r5,%r12 */
 		EMIT2(0x175c);
 		break;
-	case BPF_S_ALU_XOR_K: /* A ^= K */
+	case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
 		if (!K)
 			break;
 		/* x %r5,<d(K)>(%r13) */
 		EMIT4_DISP(0x5750d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_LSH_X: /* A <<= X; */
+	case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
 		jit->seen |= SEEN_XREG;
 		/* sll %r5,0(%r12) */
 		EMIT4(0x8950c000);
 		break;
-	case BPF_S_ALU_LSH_K: /* A <<= K */
+	case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */
 		if (K == 0)
 			break;
 		/* sll %r5,K */
 		EMIT4_DISP(0x89500000, K);
 		break;
-	case BPF_S_ALU_RSH_X: /* A >>= X; */
+	case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
 		jit->seen |= SEEN_XREG;
 		/* srl %r5,0(%r12) */
 		EMIT4(0x8850c000);
 		break;
-	case BPF_S_ALU_RSH_K: /* A >>= K; */
+	case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
 		if (K == 0)
 			break;
 		/* srl %r5,K */
 		EMIT4_DISP(0x88500000, K);
 		break;
-	case BPF_S_ALU_NEG: /* A = -A */
+	case BPF_ALU | BPF_NEG: /* A = -A */
 		/* lnr %r5,%r5 */
 		EMIT2(0x1155);
 		break;
-	case BPF_S_JMP_JA: /* ip += K */
+	case BPF_JMP | BPF_JA: /* ip += K */
 		offset = addrs[i + K] + jit->start - jit->prg;
 		EMIT4_PCREL(0xa7f40000, offset);
 		break;
-	case BPF_S_JMP_JGT_K: /* ip += (A > K) ? jt : jf */
+	case BPF_JMP | BPF_JGT | BPF_K: /* ip += (A > K) ? jt : jf */
 		mask = 0x200000; /* jh */
 		goto kbranch;
-	case BPF_S_JMP_JGE_K: /* ip += (A >= K) ? jt : jf */
+	case BPF_JMP | BPF_JGE | BPF_K: /* ip += (A >= K) ? jt : jf */
 		mask = 0xa00000; /* jhe */
 		goto kbranch;
-	case BPF_S_JMP_JEQ_K: /* ip += (A == K) ? jt : jf */
+	case BPF_JMP | BPF_JEQ | BPF_K: /* ip += (A == K) ? jt : jf */
 		mask = 0x800000; /* je */
 kbranch:	/* Emit compare if the branch targets are different */
 		if (filter->jt != filter->jf) {
@@ -511,7 +504,7 @@ branch:		if (filter->jt == filter->jf) {
 			EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset);
 		}
 		break;
-	case BPF_S_JMP_JSET_K: /* ip += (A & K) ? jt : jf */
+	case BPF_JMP | BPF_JSET | BPF_K: /* ip += (A & K) ? jt : jf */
 		mask = 0x700000; /* jnz */
 		/* Emit test if the branch targets are different */
 		if (filter->jt != filter->jf) {
@@ -525,13 +518,13 @@ branch:		if (filter->jt == filter->jf) {
 				EMIT4_IMM(0xa7510000, K);
 		}
 		goto branch;
-	case BPF_S_JMP_JGT_X: /* ip += (A > X) ? jt : jf */
+	case BPF_JMP | BPF_JGT | BPF_X: /* ip += (A > X) ? jt : jf */
 		mask = 0x200000; /* jh */
 		goto xbranch;
-	case BPF_S_JMP_JGE_X: /* ip += (A >= X) ? jt : jf */
+	case BPF_JMP | BPF_JGE | BPF_X: /* ip += (A >= X) ? jt : jf */
 		mask = 0xa00000; /* jhe */
 		goto xbranch;
-	case BPF_S_JMP_JEQ_X: /* ip += (A == X) ? jt : jf */
+	case BPF_JMP | BPF_JEQ | BPF_X: /* ip += (A == X) ? jt : jf */
 		mask = 0x800000; /* je */
 xbranch:	/* Emit compare if the branch targets are different */
 		if (filter->jt != filter->jf) {
@@ -540,7 +533,7 @@ xbranch:	/* Emit compare if the branch targets are different */
 			EMIT2(0x195c);
 		}
 		goto branch;
-	case BPF_S_JMP_JSET_X: /* ip += (A & X) ? jt : jf */
+	case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */
 		mask = 0x700000; /* jnz */
 		/* Emit test if the branch targets are different */
 		if (filter->jt != filter->jf) {
@@ -551,15 +544,15 @@ xbranch:	/* Emit compare if the branch targets are different */
 			EMIT2(0x144c);
 		}
 		goto branch;
-	case BPF_S_LD_W_ABS: /* A = *(u32 *) (skb->data+K) */
+	case BPF_LD | BPF_W | BPF_ABS: /* A = *(u32 *) (skb->data+K) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD;
 		offset = jit->off_load_word;
 		goto load_abs;
-	case BPF_S_LD_H_ABS: /* A = *(u16 *) (skb->data+K) */
+	case BPF_LD | BPF_H | BPF_ABS: /* A = *(u16 *) (skb->data+K) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF;
 		offset = jit->off_load_half;
 		goto load_abs;
-	case BPF_S_LD_B_ABS: /* A = *(u8 *) (skb->data+K) */
+	case BPF_LD | BPF_B | BPF_ABS: /* A = *(u8 *) (skb->data+K) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE;
 		offset = jit->off_load_byte;
 load_abs:	if ((int) K < 0)
@@ -573,19 +566,19 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		/* jnz <ret0> */
 		EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg));
 		break;
-	case BPF_S_LD_W_IND: /* A = *(u32 *) (skb->data+K+X) */
+	case BPF_LD | BPF_W | BPF_IND: /* A = *(u32 *) (skb->data+K+X) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD;
 		offset = jit->off_load_iword;
 		goto call_fn;
-	case BPF_S_LD_H_IND: /* A = *(u16 *) (skb->data+K+X) */
+	case BPF_LD | BPF_H | BPF_IND: /* A = *(u16 *) (skb->data+K+X) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF;
 		offset = jit->off_load_ihalf;
 		goto call_fn;
-	case BPF_S_LD_B_IND: /* A = *(u8 *) (skb->data+K+X) */
+	case BPF_LD | BPF_B | BPF_IND: /* A = *(u8 *) (skb->data+K+X) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE;
 		offset = jit->off_load_ibyte;
 		goto call_fn;
-	case BPF_S_LDX_B_MSH:
+	case BPF_LDX | BPF_B | BPF_MSH:
 		/* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */
 		jit->seen |= SEEN_RET0;
 		if ((int) K < 0) {
@@ -596,17 +589,17 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH;
 		offset = jit->off_load_bmsh;
 		goto call_fn;
-	case BPF_S_LD_W_LEN: /*	A = skb->len; */
+	case BPF_LD | BPF_W | BPF_LEN: /*	A = skb->len; */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 		/* l %r5,<d(len)>(%r2) */
 		EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len));
 		break;
-	case BPF_S_LDX_W_LEN: /* X = skb->len; */
+	case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
 		jit->seen |= SEEN_XREG;
 		/* l %r12,<d(len)>(%r2) */
 		EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len));
 		break;
-	case BPF_S_LD_IMM: /* A = K */
+	case BPF_LD | BPF_IMM: /* A = K */
 		if (K <= 16383)
 			/* lhi %r5,K */
 			EMIT4_IMM(0xa7580000, K);
@@ -617,7 +610,7 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 			/* l %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5850d000, EMIT_CONST(K));
 		break;
-	case BPF_S_LDX_IMM: /* X = K */
+	case BPF_LDX | BPF_IMM: /* X = K */
 		jit->seen |= SEEN_XREG;
 		if (K <= 16383)
 			/* lhi %r12,<K> */
@@ -629,29 +622,29 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 			/* l %r12,<d(K)>(%r13) */
 			EMIT4_DISP(0x58c0d000, EMIT_CONST(K));
 		break;
-	case BPF_S_LD_MEM: /* A = mem[K] */
+	case BPF_LD | BPF_MEM: /* A = mem[K] */
 		jit->seen |= SEEN_MEM;
 		/* l %r5,<K>(%r15) */
 		EMIT4_DISP(0x5850f000,
 			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
 		break;
-	case BPF_S_LDX_MEM: /* X = mem[K] */
+	case BPF_LDX | BPF_MEM: /* X = mem[K] */
 		jit->seen |= SEEN_XREG | SEEN_MEM;
 		/* l %r12,<K>(%r15) */
 		EMIT4_DISP(0x58c0f000,
 			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
 		break;
-	case BPF_S_MISC_TAX: /* X = A */
+	case BPF_MISC | BPF_TAX: /* X = A */
 		jit->seen |= SEEN_XREG;
 		/* lr %r12,%r5 */
 		EMIT2(0x18c5);
 		break;
-	case BPF_S_MISC_TXA: /* A = X */
+	case BPF_MISC | BPF_TXA: /* A = X */
 		jit->seen |= SEEN_XREG;
 		/* lr %r5,%r12 */
 		EMIT2(0x185c);
 		break;
-	case BPF_S_RET_K:
+	case BPF_RET | BPF_K:
 		if (K == 0) {
 			jit->seen |= SEEN_RET0;
 			if (last)
@@ -671,33 +664,33 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 			EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
 		}
 		break;
-	case BPF_S_RET_A:
+	case BPF_RET | BPF_A:
 		/* llgfr %r2,%r5 */
 		EMIT4(0xb9160025);
 		/* j <exit> */
 		EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
 		break;
-	case BPF_S_ST: /* mem[K] = A */
+	case BPF_ST: /* mem[K] = A */
 		jit->seen |= SEEN_MEM;
 		/* st %r5,<K>(%r15) */
 		EMIT4_DISP(0x5050f000,
 			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
 		break;
-	case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
+	case BPF_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
 		jit->seen |= SEEN_XREG | SEEN_MEM;
 		/* st %r12,<K>(%r15) */
 		EMIT4_DISP(0x50c0f000,
 			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
 		break;
-	case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+	case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
 		/* lhi %r5,0 */
 		EMIT4(0xa7580000);
 		/* icm	%r5,3,<d(protocol)>(%r2) */
 		EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol));
 		break;
-	case BPF_S_ANC_IFINDEX:	/* if (!skb->dev) return 0;
-				 * A = skb->dev->ifindex */
+	case BPF_ANC | SKF_AD_IFINDEX:	/* if (!skb->dev) return 0;
+					 * A = skb->dev->ifindex */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
 		jit->seen |= SEEN_RET0;
 		/* lg %r1,<d(dev)>(%r2) */
@@ -709,20 +702,20 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		/* l %r5,<d(ifindex)>(%r1) */
 		EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex));
 		break;
-	case BPF_S_ANC_MARK: /* A = skb->mark */
+	case BPF_ANC | SKF_AD_MARK: /* A = skb->mark */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 		/* l %r5,<d(mark)>(%r2) */
 		EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark));
 		break;
-	case BPF_S_ANC_QUEUE: /* A = skb->queue_mapping */
+	case BPF_ANC | SKF_AD_QUEUE: /* A = skb->queue_mapping */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
 		/* lhi %r5,0 */
 		EMIT4(0xa7580000);
 		/* icm	%r5,3,<d(queue_mapping)>(%r2) */
 		EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping));
 		break;
-	case BPF_S_ANC_HATYPE:	/* if (!skb->dev) return 0;
-				 * A = skb->dev->type */
+	case BPF_ANC | SKF_AD_HATYPE:	/* if (!skb->dev) return 0;
+					 * A = skb->dev->type */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
 		jit->seen |= SEEN_RET0;
 		/* lg %r1,<d(dev)>(%r2) */
@@ -736,20 +729,20 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		/* icm	%r5,3,<d(type)>(%r1) */
 		EMIT4_DISP(0xbf531000, offsetof(struct net_device, type));
 		break;
-	case BPF_S_ANC_RXHASH: /* A = skb->hash */
+	case BPF_ANC | SKF_AD_RXHASH: /* A = skb->hash */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 		/* l %r5,<d(hash)>(%r2) */
 		EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash));
 		break;
-	case BPF_S_ANC_VLAN_TAG:
-	case BPF_S_ANC_VLAN_TAG_PRESENT:
+	case BPF_ANC | SKF_AD_VLAN_TAG:
+	case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
 		/* lhi %r5,0 */
 		EMIT4(0xa7580000);
 		/* icm	%r5,3,<d(vlan_tci)>(%r2) */
 		EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, vlan_tci));
-		if (filter->code == BPF_S_ANC_VLAN_TAG) {
+		if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
 			/* nill %r5,0xefff */
 			EMIT4_IMM(0xa5570000, ~VLAN_TAG_PRESENT);
 		} else {
@@ -759,7 +752,7 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 			EMIT4_DISP(0x88500000, 12);
 		}
 		break;
-	case BPF_S_ANC_PKTTYPE:
+	case BPF_ANC | SKF_AD_PKTTYPE:
 		if (pkt_type_offset < 0)
 			goto out;
 		/* lhi %r5,0 */
@@ -769,7 +762,7 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		/* srl %r5,5 */
 		EMIT4_DISP(0x88500000, 5);
 		break;
-	case BPF_S_ANC_CPU: /* A = smp_processor_id() */
+	case BPF_ANC | SKF_AD_CPU: /* A = smp_processor_id() */
 #ifdef CONFIG_SMP
 		/* l %r5,<d(cpu_nr)> */
 		EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr));
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index a82c6b2a9780..c88cf147deed 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -415,20 +415,11 @@ void bpf_jit_compile(struct sk_filter *fp)
 		emit_reg_move(O7, r_saved_O7);
 
 		switch (filter[0].code) {
-		case BPF_S_RET_K:
-		case BPF_S_LD_W_LEN:
-		case BPF_S_ANC_PROTOCOL:
-		case BPF_S_ANC_PKTTYPE:
-		case BPF_S_ANC_IFINDEX:
-		case BPF_S_ANC_MARK:
-		case BPF_S_ANC_RXHASH:
-		case BPF_S_ANC_VLAN_TAG:
-		case BPF_S_ANC_VLAN_TAG_PRESENT:
-		case BPF_S_ANC_CPU:
-		case BPF_S_ANC_QUEUE:
-		case BPF_S_LD_W_ABS:
-		case BPF_S_LD_H_ABS:
-		case BPF_S_LD_B_ABS:
+		case BPF_RET | BPF_K:
+		case BPF_LD | BPF_W | BPF_LEN:
+		case BPF_LD | BPF_W | BPF_ABS:
+		case BPF_LD | BPF_H | BPF_ABS:
+		case BPF_LD | BPF_B | BPF_ABS:
 			/* The first instruction sets the A register (or is
 			 * a "RET 'constant'")
 			 */
@@ -445,59 +436,60 @@ void bpf_jit_compile(struct sk_filter *fp)
 			unsigned int t_offset;
 			unsigned int f_offset;
 			u32 t_op, f_op;
+			u16 code = bpf_anc_helper(&filter[i]);
 			int ilen;
 
-			switch (filter[i].code) {
-			case BPF_S_ALU_ADD_X:	/* A += X; */
+			switch (code) {
+			case BPF_ALU | BPF_ADD | BPF_X:	/* A += X; */
 				emit_alu_X(ADD);
 				break;
-			case BPF_S_ALU_ADD_K:	/* A += K; */
+			case BPF_ALU | BPF_ADD | BPF_K:	/* A += K; */
 				emit_alu_K(ADD, K);
 				break;
-			case BPF_S_ALU_SUB_X:	/* A -= X; */
+			case BPF_ALU | BPF_SUB | BPF_X:	/* A -= X; */
 				emit_alu_X(SUB);
 				break;
-			case BPF_S_ALU_SUB_K:	/* A -= K */
+			case BPF_ALU | BPF_SUB | BPF_K:	/* A -= K */
 				emit_alu_K(SUB, K);
 				break;
-			case BPF_S_ALU_AND_X:	/* A &= X */
+			case BPF_ALU | BPF_AND | BPF_X:	/* A &= X */
 				emit_alu_X(AND);
 				break;
-			case BPF_S_ALU_AND_K:	/* A &= K */
+			case BPF_ALU | BPF_AND | BPF_K:	/* A &= K */
 				emit_alu_K(AND, K);
 				break;
-			case BPF_S_ALU_OR_X:	/* A |= X */
+			case BPF_ALU | BPF_OR | BPF_X:	/* A |= X */
 				emit_alu_X(OR);
 				break;
-			case BPF_S_ALU_OR_K:	/* A |= K */
+			case BPF_ALU | BPF_OR | BPF_K:	/* A |= K */
 				emit_alu_K(OR, K);
 				break;
-			case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
-			case BPF_S_ALU_XOR_X:
+			case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
+			case BPF_ALU | BPF_XOR | BPF_X:
 				emit_alu_X(XOR);
 				break;
-			case BPF_S_ALU_XOR_K:	/* A ^= K */
+			case BPF_ALU | BPF_XOR | BPF_K:	/* A ^= K */
 				emit_alu_K(XOR, K);
 				break;
-			case BPF_S_ALU_LSH_X:	/* A <<= X */
+			case BPF_ALU | BPF_LSH | BPF_X:	/* A <<= X */
 				emit_alu_X(SLL);
 				break;
-			case BPF_S_ALU_LSH_K:	/* A <<= K */
+			case BPF_ALU | BPF_LSH | BPF_K:	/* A <<= K */
 				emit_alu_K(SLL, K);
 				break;
-			case BPF_S_ALU_RSH_X:	/* A >>= X */
+			case BPF_ALU | BPF_RSH | BPF_X:	/* A >>= X */
 				emit_alu_X(SRL);
 				break;
-			case BPF_S_ALU_RSH_K:	/* A >>= K */
+			case BPF_ALU | BPF_RSH | BPF_K:	/* A >>= K */
 				emit_alu_K(SRL, K);
 				break;
-			case BPF_S_ALU_MUL_X:	/* A *= X; */
+			case BPF_ALU | BPF_MUL | BPF_X:	/* A *= X; */
 				emit_alu_X(MUL);
 				break;
-			case BPF_S_ALU_MUL_K:	/* A *= K */
+			case BPF_ALU | BPF_MUL | BPF_K:	/* A *= K */
 				emit_alu_K(MUL, K);
 				break;
-			case BPF_S_ALU_DIV_K:	/* A /= K with K != 0*/
+			case BPF_ALU | BPF_DIV | BPF_K:	/* A /= K with K != 0*/
 				if (K == 1)
 					break;
 				emit_write_y(G0);
@@ -512,7 +504,7 @@ void bpf_jit_compile(struct sk_filter *fp)
 #endif
 				emit_alu_K(DIV, K);
 				break;
-			case BPF_S_ALU_DIV_X:	/* A /= X; */
+			case BPF_ALU | BPF_DIV | BPF_X:	/* A /= X; */
 				emit_cmpi(r_X, 0);
 				if (pc_ret0 > 0) {
 					t_offset = addrs[pc_ret0 - 1];
@@ -544,10 +536,10 @@ void bpf_jit_compile(struct sk_filter *fp)
 #endif
 				emit_alu_X(DIV);
 				break;
-			case BPF_S_ALU_NEG:
+			case BPF_ALU | BPF_NEG:
 				emit_neg();
 				break;
-			case BPF_S_RET_K:
+			case BPF_RET | BPF_K:
 				if (!K) {
 					if (pc_ret0 == -1)
 						pc_ret0 = i;
@@ -556,7 +548,7 @@ void bpf_jit_compile(struct sk_filter *fp)
 					emit_loadimm(K, r_A);
 				}
 				/* Fallthrough */
-			case BPF_S_RET_A:
+			case BPF_RET | BPF_A:
 				if (seen_or_pass0) {
 					if (i != flen - 1) {
 						emit_jump(cleanup_addr);
@@ -573,18 +565,18 @@ void bpf_jit_compile(struct sk_filter *fp)
 				emit_jmpl(r_saved_O7, 8, G0);
 				emit_reg_move(r_A, O0); /* delay slot */
 				break;
-			case BPF_S_MISC_TAX:
+			case BPF_MISC | BPF_TAX:
 				seen |= SEEN_XREG;
 				emit_reg_move(r_A, r_X);
 				break;
-			case BPF_S_MISC_TXA:
+			case BPF_MISC | BPF_TXA:
 				seen |= SEEN_XREG;
 				emit_reg_move(r_X, r_A);
 				break;
-			case BPF_S_ANC_CPU:
+			case BPF_ANC | SKF_AD_CPU:
 				emit_load_cpu(r_A);
 				break;
-			case BPF_S_ANC_PROTOCOL:
+			case BPF_ANC | SKF_AD_PROTOCOL:
 				emit_skb_load16(protocol, r_A);
 				break;
 #if 0
@@ -592,38 +584,38 @@ void bpf_jit_compile(struct sk_filter *fp)
 				 * a bit field even though we very much
 				 * know what we are doing here.
 				 */
-			case BPF_S_ANC_PKTTYPE:
+			case BPF_ANC | SKF_AD_PKTTYPE:
 				__emit_skb_load8(pkt_type, r_A);
 				emit_alu_K(SRL, 5);
 				break;
 #endif
-			case BPF_S_ANC_IFINDEX:
+			case BPF_ANC | SKF_AD_IFINDEX:
 				emit_skb_loadptr(dev, r_A);
 				emit_cmpi(r_A, 0);
 				emit_branch(BNE_PTR, cleanup_addr + 4);
 				emit_nop();
 				emit_load32(r_A, struct net_device, ifindex, r_A);
 				break;
-			case BPF_S_ANC_MARK:
+			case BPF_ANC | SKF_AD_MARK:
 				emit_skb_load32(mark, r_A);
 				break;
-			case BPF_S_ANC_QUEUE:
+			case BPF_ANC | SKF_AD_QUEUE:
 				emit_skb_load16(queue_mapping, r_A);
 				break;
-			case BPF_S_ANC_HATYPE:
+			case BPF_ANC | SKF_AD_HATYPE:
 				emit_skb_loadptr(dev, r_A);
 				emit_cmpi(r_A, 0);
 				emit_branch(BNE_PTR, cleanup_addr + 4);
 				emit_nop();
 				emit_load16(r_A, struct net_device, type, r_A);
 				break;
-			case BPF_S_ANC_RXHASH:
+			case BPF_ANC | SKF_AD_RXHASH:
 				emit_skb_load32(hash, r_A);
 				break;
-			case BPF_S_ANC_VLAN_TAG:
-			case BPF_S_ANC_VLAN_TAG_PRESENT:
+			case BPF_ANC | SKF_AD_VLAN_TAG:
+			case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 				emit_skb_load16(vlan_tci, r_A);
-				if (filter[i].code == BPF_S_ANC_VLAN_TAG) {
+				if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
 					emit_andi(r_A, VLAN_VID_MASK, r_A);
 				} else {
 					emit_loadimm(VLAN_TAG_PRESENT, r_TMP);
@@ -631,44 +623,44 @@ void bpf_jit_compile(struct sk_filter *fp)
 				}
 				break;
 
-			case BPF_S_LD_IMM:
+			case BPF_LD | BPF_IMM:
 				emit_loadimm(K, r_A);
 				break;
-			case BPF_S_LDX_IMM:
+			case BPF_LDX | BPF_IMM:
 				emit_loadimm(K, r_X);
 				break;
-			case BPF_S_LD_MEM:
+			case BPF_LD | BPF_MEM:
 				emit_ldmem(K * 4, r_A);
 				break;
-			case BPF_S_LDX_MEM:
+			case BPF_LDX | BPF_MEM:
 				emit_ldmem(K * 4, r_X);
 				break;
-			case BPF_S_ST:
+			case BPF_ST:
 				emit_stmem(K * 4, r_A);
 				break;
-			case BPF_S_STX:
+			case BPF_STX:
 				emit_stmem(K * 4, r_X);
 				break;
 
 #define CHOOSE_LOAD_FUNC(K, func) \
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
-			case BPF_S_LD_W_ABS:
+			case BPF_LD | BPF_W | BPF_ABS:
 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_word);
 common_load:			seen |= SEEN_DATAREF;
 				emit_loadimm(K, r_OFF);
 				emit_call(func);
 				break;
-			case BPF_S_LD_H_ABS:
+			case BPF_LD | BPF_H | BPF_ABS:
 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_half);
 				goto common_load;
-			case BPF_S_LD_B_ABS:
+			case BPF_LD | BPF_B | BPF_ABS:
 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte);
 				goto common_load;
-			case BPF_S_LDX_B_MSH:
+			case BPF_LDX | BPF_B | BPF_MSH:
 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte_msh);
 				goto common_load;
-			case BPF_S_LD_W_IND:
+			case BPF_LD | BPF_W | BPF_IND:
 				func = bpf_jit_load_word;
 common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
 				if (K) {
@@ -683,13 +675,13 @@ common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
 				}
 				emit_call(func);
 				break;
-			case BPF_S_LD_H_IND:
+			case BPF_LD | BPF_H | BPF_IND:
 				func = bpf_jit_load_half;
 				goto common_load_ind;
-			case BPF_S_LD_B_IND:
+			case BPF_LD | BPF_B | BPF_IND:
 				func = bpf_jit_load_byte;
 				goto common_load_ind;
-			case BPF_S_JMP_JA:
+			case BPF_JMP | BPF_JA:
 				emit_jump(addrs[i + K]);
 				emit_nop();
 				break;
@@ -700,14 +692,14 @@ common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
 		f_op = FOP;		\
 		goto cond_branch
 
-			COND_SEL(BPF_S_JMP_JGT_K, BGU, BLEU);
-			COND_SEL(BPF_S_JMP_JGE_K, BGEU, BLU);
-			COND_SEL(BPF_S_JMP_JEQ_K, BE, BNE);
-			COND_SEL(BPF_S_JMP_JSET_K, BNE, BE);
-			COND_SEL(BPF_S_JMP_JGT_X, BGU, BLEU);
-			COND_SEL(BPF_S_JMP_JGE_X, BGEU, BLU);
-			COND_SEL(BPF_S_JMP_JEQ_X, BE, BNE);
-			COND_SEL(BPF_S_JMP_JSET_X, BNE, BE);
+			COND_SEL(BPF_JMP | BPF_JGT | BPF_K, BGU, BLEU);
+			COND_SEL(BPF_JMP | BPF_JGE | BPF_K, BGEU, BLU);
+			COND_SEL(BPF_JMP | BPF_JEQ | BPF_K, BE, BNE);
+			COND_SEL(BPF_JMP | BPF_JSET | BPF_K, BNE, BE);
+			COND_SEL(BPF_JMP | BPF_JGT | BPF_X, BGU, BLEU);
+			COND_SEL(BPF_JMP | BPF_JGE | BPF_X, BGEU, BLU);
+			COND_SEL(BPF_JMP | BPF_JEQ | BPF_X, BE, BNE);
+			COND_SEL(BPF_JMP | BPF_JSET | BPF_X, BNE, BE);
 
 cond_branch:			f_offset = addrs[i + filter[i].jf];
 				t_offset = addrs[i + filter[i].jt];
@@ -719,20 +711,20 @@ cond_branch:			f_offset = addrs[i + filter[i].jf];
 					break;
 				}
 
-				switch (filter[i].code) {
-				case BPF_S_JMP_JGT_X:
-				case BPF_S_JMP_JGE_X:
-				case BPF_S_JMP_JEQ_X:
+				switch (code) {
+				case BPF_JMP | BPF_JGT | BPF_X:
+				case BPF_JMP | BPF_JGE | BPF_X:
+				case BPF_JMP | BPF_JEQ | BPF_X:
 					seen |= SEEN_XREG;
 					emit_cmp(r_A, r_X);
 					break;
-				case BPF_S_JMP_JSET_X:
+				case BPF_JMP | BPF_JSET | BPF_X:
 					seen |= SEEN_XREG;
 					emit_btst(r_A, r_X);
 					break;
-				case BPF_S_JMP_JEQ_K:
-				case BPF_S_JMP_JGT_K:
-				case BPF_S_JMP_JGE_K:
+				case BPF_JMP | BPF_JEQ | BPF_K:
+				case BPF_JMP | BPF_JGT | BPF_K:
+				case BPF_JMP | BPF_JGE | BPF_K:
 					if (is_simm13(K)) {
 						emit_cmpi(r_A, K);
 					} else {
@@ -740,7 +732,7 @@ cond_branch:			f_offset = addrs[i + filter[i].jf];
 						emit_cmp(r_A, r_TMP);
 					}
 					break;
-				case BPF_S_JMP_JSET_K:
+				case BPF_JMP | BPF_JSET | BPF_K:
 					if (is_simm13(K)) {
 						emit_btsti(r_A, K);
 					} else {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 625f4de9bdf2..49ef7a298c92 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -197,7 +197,6 @@ int sk_detach_filter(struct sock *sk);
 int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
-void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
 
 void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
@@ -205,6 +204,41 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 void bpf_int_jit_compile(struct sk_filter *fp);
 
+#define BPF_ANC		BIT(15)
+
+static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
+{
+	BUG_ON(ftest->code & BPF_ANC);
+
+	switch (ftest->code) {
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
+#define BPF_ANCILLARY(CODE)	case SKF_AD_OFF + SKF_AD_##CODE:	\
+				return BPF_ANC | SKF_AD_##CODE
+		switch (ftest->k) {
+		BPF_ANCILLARY(PROTOCOL);
+		BPF_ANCILLARY(PKTTYPE);
+		BPF_ANCILLARY(IFINDEX);
+		BPF_ANCILLARY(NLATTR);
+		BPF_ANCILLARY(NLATTR_NEST);
+		BPF_ANCILLARY(MARK);
+		BPF_ANCILLARY(QUEUE);
+		BPF_ANCILLARY(HATYPE);
+		BPF_ANCILLARY(RXHASH);
+		BPF_ANCILLARY(CPU);
+		BPF_ANCILLARY(ALU_XOR_X);
+		BPF_ANCILLARY(VLAN_TAG);
+		BPF_ANCILLARY(VLAN_TAG_PRESENT);
+		BPF_ANCILLARY(PAY_OFFSET);
+		BPF_ANCILLARY(RANDOM);
+		}
+		/* Fallthrough. */
+	default:
+		return ftest->code;
+	}
+}
+
 #ifdef CONFIG_BPF_JIT
 #include <stdarg.h>
 #include <linux/linkage.h>
@@ -224,86 +258,20 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
 }
 #else
 #include <linux/slab.h>
+
 static inline void bpf_jit_compile(struct sk_filter *fp)
 {
 }
+
 static inline void bpf_jit_free(struct sk_filter *fp)
 {
 	kfree(fp);
 }
-#endif
+#endif /* CONFIG_BPF_JIT */
 
 static inline int bpf_tell_extensions(void)
 {
 	return SKF_AD_MAX;
 }
 
-enum {
-	BPF_S_RET_K = 1,
-	BPF_S_RET_A,
-	BPF_S_ALU_ADD_K,
-	BPF_S_ALU_ADD_X,
-	BPF_S_ALU_SUB_K,
-	BPF_S_ALU_SUB_X,
-	BPF_S_ALU_MUL_K,
-	BPF_S_ALU_MUL_X,
-	BPF_S_ALU_DIV_X,
-	BPF_S_ALU_MOD_K,
-	BPF_S_ALU_MOD_X,
-	BPF_S_ALU_AND_K,
-	BPF_S_ALU_AND_X,
-	BPF_S_ALU_OR_K,
-	BPF_S_ALU_OR_X,
-	BPF_S_ALU_XOR_K,
-	BPF_S_ALU_XOR_X,
-	BPF_S_ALU_LSH_K,
-	BPF_S_ALU_LSH_X,
-	BPF_S_ALU_RSH_K,
-	BPF_S_ALU_RSH_X,
-	BPF_S_ALU_NEG,
-	BPF_S_LD_W_ABS,
-	BPF_S_LD_H_ABS,
-	BPF_S_LD_B_ABS,
-	BPF_S_LD_W_LEN,
-	BPF_S_LD_W_IND,
-	BPF_S_LD_H_IND,
-	BPF_S_LD_B_IND,
-	BPF_S_LD_IMM,
-	BPF_S_LDX_W_LEN,
-	BPF_S_LDX_B_MSH,
-	BPF_S_LDX_IMM,
-	BPF_S_MISC_TAX,
-	BPF_S_MISC_TXA,
-	BPF_S_ALU_DIV_K,
-	BPF_S_LD_MEM,
-	BPF_S_LDX_MEM,
-	BPF_S_ST,
-	BPF_S_STX,
-	BPF_S_JMP_JA,
-	BPF_S_JMP_JEQ_K,
-	BPF_S_JMP_JEQ_X,
-	BPF_S_JMP_JGE_K,
-	BPF_S_JMP_JGE_X,
-	BPF_S_JMP_JGT_K,
-	BPF_S_JMP_JGT_X,
-	BPF_S_JMP_JSET_K,
-	BPF_S_JMP_JSET_X,
-	/* Ancillary data */
-	BPF_S_ANC_PROTOCOL,
-	BPF_S_ANC_PKTTYPE,
-	BPF_S_ANC_IFINDEX,
-	BPF_S_ANC_NLATTR,
-	BPF_S_ANC_NLATTR_NEST,
-	BPF_S_ANC_MARK,
-	BPF_S_ANC_QUEUE,
-	BPF_S_ANC_HATYPE,
-	BPF_S_ANC_RXHASH,
-	BPF_S_ANC_CPU,
-	BPF_S_ANC_ALU_XOR_X,
-	BPF_S_ANC_VLAN_TAG,
-	BPF_S_ANC_VLAN_TAG_PRESENT,
-	BPF_S_ANC_PAY_OFFSET,
-	BPF_S_ANC_RANDOM,
-};
-
 #endif /* __LINUX_FILTER_H__ */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 1036b6f2fded..44e69483b383 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -103,60 +103,59 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
 		u32 k = ftest->k;
 
 		switch (code) {
-		case BPF_S_LD_W_ABS:
+		case BPF_LD | BPF_W | BPF_ABS:
 			ftest->code = BPF_LDX | BPF_W | BPF_ABS;
 			/* 32-bit aligned and not out of bounds. */
 			if (k >= sizeof(struct seccomp_data) || k & 3)
 				return -EINVAL;
 			continue;
-		case BPF_S_LD_W_LEN:
+		case BPF_LD | BPF_W | BPF_LEN:
 			ftest->code = BPF_LD | BPF_IMM;
 			ftest->k = sizeof(struct seccomp_data);
 			continue;
-		case BPF_S_LDX_W_LEN:
+		case BPF_LDX | BPF_W | BPF_LEN:
 			ftest->code = BPF_LDX | BPF_IMM;
 			ftest->k = sizeof(struct seccomp_data);
 			continue;
 		/* Explicitly include allowed calls. */
-		case BPF_S_RET_K:
-		case BPF_S_RET_A:
-		case BPF_S_ALU_ADD_K:
-		case BPF_S_ALU_ADD_X:
-		case BPF_S_ALU_SUB_K:
-		case BPF_S_ALU_SUB_X:
-		case BPF_S_ALU_MUL_K:
-		case BPF_S_ALU_MUL_X:
-		case BPF_S_ALU_DIV_X:
-		case BPF_S_ALU_AND_K:
-		case BPF_S_ALU_AND_X:
-		case BPF_S_ALU_OR_K:
-		case BPF_S_ALU_OR_X:
-		case BPF_S_ALU_XOR_K:
-		case BPF_S_ALU_XOR_X:
-		case BPF_S_ALU_LSH_K:
-		case BPF_S_ALU_LSH_X:
-		case BPF_S_ALU_RSH_K:
-		case BPF_S_ALU_RSH_X:
-		case BPF_S_ALU_NEG:
-		case BPF_S_LD_IMM:
-		case BPF_S_LDX_IMM:
-		case BPF_S_MISC_TAX:
-		case BPF_S_MISC_TXA:
-		case BPF_S_ALU_DIV_K:
-		case BPF_S_LD_MEM:
-		case BPF_S_LDX_MEM:
-		case BPF_S_ST:
-		case BPF_S_STX:
-		case BPF_S_JMP_JA:
-		case BPF_S_JMP_JEQ_K:
-		case BPF_S_JMP_JEQ_X:
-		case BPF_S_JMP_JGE_K:
-		case BPF_S_JMP_JGE_X:
-		case BPF_S_JMP_JGT_K:
-		case BPF_S_JMP_JGT_X:
-		case BPF_S_JMP_JSET_K:
-		case BPF_S_JMP_JSET_X:
-			sk_decode_filter(ftest, ftest);
+		case BPF_RET | BPF_K:
+		case BPF_RET | BPF_A:
+		case BPF_ALU | BPF_ADD | BPF_K:
+		case BPF_ALU | BPF_ADD | BPF_X:
+		case BPF_ALU | BPF_SUB | BPF_K:
+		case BPF_ALU | BPF_SUB | BPF_X:
+		case BPF_ALU | BPF_MUL | BPF_K:
+		case BPF_ALU | BPF_MUL | BPF_X:
+		case BPF_ALU | BPF_DIV | BPF_K:
+		case BPF_ALU | BPF_DIV | BPF_X:
+		case BPF_ALU | BPF_AND | BPF_K:
+		case BPF_ALU | BPF_AND | BPF_X:
+		case BPF_ALU | BPF_OR | BPF_K:
+		case BPF_ALU | BPF_OR | BPF_X:
+		case BPF_ALU | BPF_XOR | BPF_K:
+		case BPF_ALU | BPF_XOR | BPF_X:
+		case BPF_ALU | BPF_LSH | BPF_K:
+		case BPF_ALU | BPF_LSH | BPF_X:
+		case BPF_ALU | BPF_RSH | BPF_K:
+		case BPF_ALU | BPF_RSH | BPF_X:
+		case BPF_ALU | BPF_NEG:
+		case BPF_LD | BPF_IMM:
+		case BPF_LDX | BPF_IMM:
+		case BPF_MISC | BPF_TAX:
+		case BPF_MISC | BPF_TXA:
+		case BPF_LD | BPF_MEM:
+		case BPF_LDX | BPF_MEM:
+		case BPF_ST:
+		case BPF_STX:
+		case BPF_JMP | BPF_JA:
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
 			continue;
 		default:
 			return -EINVAL;
diff --git a/net/core/filter.c b/net/core/filter.c
index 2c2d35d9d101..328aaf6ff4d1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -536,11 +536,13 @@ load_word:
 		 * Output:
 		 *   BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
 		 */
+
 		ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp);
 		if (likely(ptr != NULL)) {
 			BPF_R0 = get_unaligned_be32(ptr);
 			CONT;
 		}
+
 		return 0;
 	LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + K)) */
 		off = K;
@@ -550,6 +552,7 @@ load_half:
 			BPF_R0 = get_unaligned_be16(ptr);
 			CONT;
 		}
+
 		return 0;
 	LD_ABS_B: /* BPF_R0 = *(u8 *) (ctx + K) */
 		off = K;
@@ -559,6 +562,7 @@ load_byte:
 			BPF_R0 = *(u8 *)ptr;
 			CONT;
 		}
+
 		return 0;
 	LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + X + K)) */
 		off = K + X;
@@ -1136,44 +1140,46 @@ err:
  */
 static int check_load_and_stores(struct sock_filter *filter, int flen)
 {
-	u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
+	u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
 	int pc, ret = 0;
 
 	BUILD_BUG_ON(BPF_MEMWORDS > 16);
+
 	masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
 	if (!masks)
 		return -ENOMEM;
+
 	memset(masks, 0xff, flen * sizeof(*masks));
 
 	for (pc = 0; pc < flen; pc++) {
 		memvalid &= masks[pc];
 
 		switch (filter[pc].code) {
-		case BPF_S_ST:
-		case BPF_S_STX:
+		case BPF_ST:
+		case BPF_STX:
 			memvalid |= (1 << filter[pc].k);
 			break;
-		case BPF_S_LD_MEM:
-		case BPF_S_LDX_MEM:
+		case BPF_LD | BPF_MEM:
+		case BPF_LDX | BPF_MEM:
 			if (!(memvalid & (1 << filter[pc].k))) {
 				ret = -EINVAL;
 				goto error;
 			}
 			break;
-		case BPF_S_JMP_JA:
-			/* a jump must set masks on target */
+		case BPF_JMP | BPF_JA:
+			/* A jump must set masks on target */
 			masks[pc + 1 + filter[pc].k] &= memvalid;
 			memvalid = ~0;
 			break;
-		case BPF_S_JMP_JEQ_K:
-		case BPF_S_JMP_JEQ_X:
-		case BPF_S_JMP_JGE_K:
-		case BPF_S_JMP_JGE_X:
-		case BPF_S_JMP_JGT_K:
-		case BPF_S_JMP_JGT_X:
-		case BPF_S_JMP_JSET_X:
-		case BPF_S_JMP_JSET_K:
-			/* a jump must set masks on targets */
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
+			/* A jump must set masks on targets */
 			masks[pc + 1 + filter[pc].jt] &= memvalid;
 			masks[pc + 1 + filter[pc].jf] &= memvalid;
 			memvalid = ~0;
@@ -1185,6 +1191,72 @@ error:
 	return ret;
 }
 
+static bool chk_code_allowed(u16 code_to_probe)
+{
+	static const bool codes[] = {
+		/* 32 bit ALU operations */
+		[BPF_ALU | BPF_ADD | BPF_K] = true,
+		[BPF_ALU | BPF_ADD | BPF_X] = true,
+		[BPF_ALU | BPF_SUB | BPF_K] = true,
+		[BPF_ALU | BPF_SUB | BPF_X] = true,
+		[BPF_ALU | BPF_MUL | BPF_K] = true,
+		[BPF_ALU | BPF_MUL | BPF_X] = true,
+		[BPF_ALU | BPF_DIV | BPF_K] = true,
+		[BPF_ALU | BPF_DIV | BPF_X] = true,
+		[BPF_ALU | BPF_MOD | BPF_K] = true,
+		[BPF_ALU | BPF_MOD | BPF_X] = true,
+		[BPF_ALU | BPF_AND | BPF_K] = true,
+		[BPF_ALU | BPF_AND | BPF_X] = true,
+		[BPF_ALU | BPF_OR | BPF_K] = true,
+		[BPF_ALU | BPF_OR | BPF_X] = true,
+		[BPF_ALU | BPF_XOR | BPF_K] = true,
+		[BPF_ALU | BPF_XOR | BPF_X] = true,
+		[BPF_ALU | BPF_LSH | BPF_K] = true,
+		[BPF_ALU | BPF_LSH | BPF_X] = true,
+		[BPF_ALU | BPF_RSH | BPF_K] = true,
+		[BPF_ALU | BPF_RSH | BPF_X] = true,
+		[BPF_ALU | BPF_NEG] = true,
+		/* Load instructions */
+		[BPF_LD | BPF_W | BPF_ABS] = true,
+		[BPF_LD | BPF_H | BPF_ABS] = true,
+		[BPF_LD | BPF_B | BPF_ABS] = true,
+		[BPF_LD | BPF_W | BPF_LEN] = true,
+		[BPF_LD | BPF_W | BPF_IND] = true,
+		[BPF_LD | BPF_H | BPF_IND] = true,
+		[BPF_LD | BPF_B | BPF_IND] = true,
+		[BPF_LD | BPF_IMM] = true,
+		[BPF_LD | BPF_MEM] = true,
+		[BPF_LDX | BPF_W | BPF_LEN] = true,
+		[BPF_LDX | BPF_B | BPF_MSH] = true,
+		[BPF_LDX | BPF_IMM] = true,
+		[BPF_LDX | BPF_MEM] = true,
+		/* Store instructions */
+		[BPF_ST] = true,
+		[BPF_STX] = true,
+		/* Misc instructions */
+		[BPF_MISC | BPF_TAX] = true,
+		[BPF_MISC | BPF_TXA] = true,
+		/* Return instructions */
+		[BPF_RET | BPF_K] = true,
+		[BPF_RET | BPF_A] = true,
+		/* Jump instructions */
+		[BPF_JMP | BPF_JA] = true,
+		[BPF_JMP | BPF_JEQ | BPF_K] = true,
+		[BPF_JMP | BPF_JEQ | BPF_X] = true,
+		[BPF_JMP | BPF_JGE | BPF_K] = true,
+		[BPF_JMP | BPF_JGE | BPF_X] = true,
+		[BPF_JMP | BPF_JGT | BPF_K] = true,
+		[BPF_JMP | BPF_JGT | BPF_X] = true,
+		[BPF_JMP | BPF_JSET | BPF_K] = true,
+		[BPF_JMP | BPF_JSET | BPF_X] = true,
+	};
+
+	if (code_to_probe >= ARRAY_SIZE(codes))
+		return false;
+
+	return codes[code_to_probe];
+}
+
 /**
  *	sk_chk_filter - verify socket filter code
  *	@filter: filter to verify
@@ -1201,154 +1273,76 @@ error:
  */
 int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 {
-	/*
-	 * Valid instructions are initialized to non-0.
-	 * Invalid instructions are initialized to 0.
-	 */
-	static const u8 codes[] = {
-		[BPF_ALU|BPF_ADD|BPF_K]  = BPF_S_ALU_ADD_K,
-		[BPF_ALU|BPF_ADD|BPF_X]  = BPF_S_ALU_ADD_X,
-		[BPF_ALU|BPF_SUB|BPF_K]  = BPF_S_ALU_SUB_K,
-		[BPF_ALU|BPF_SUB|BPF_X]  = BPF_S_ALU_SUB_X,
-		[BPF_ALU|BPF_MUL|BPF_K]  = BPF_S_ALU_MUL_K,
-		[BPF_ALU|BPF_MUL|BPF_X]  = BPF_S_ALU_MUL_X,
-		[BPF_ALU|BPF_DIV|BPF_X]  = BPF_S_ALU_DIV_X,
-		[BPF_ALU|BPF_MOD|BPF_K]  = BPF_S_ALU_MOD_K,
-		[BPF_ALU|BPF_MOD|BPF_X]  = BPF_S_ALU_MOD_X,
-		[BPF_ALU|BPF_AND|BPF_K]  = BPF_S_ALU_AND_K,
-		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X,
-		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K,
-		[BPF_ALU|BPF_OR|BPF_X]   = BPF_S_ALU_OR_X,
-		[BPF_ALU|BPF_XOR|BPF_K]  = BPF_S_ALU_XOR_K,
-		[BPF_ALU|BPF_XOR|BPF_X]  = BPF_S_ALU_XOR_X,
-		[BPF_ALU|BPF_LSH|BPF_K]  = BPF_S_ALU_LSH_K,
-		[BPF_ALU|BPF_LSH|BPF_X]  = BPF_S_ALU_LSH_X,
-		[BPF_ALU|BPF_RSH|BPF_K]  = BPF_S_ALU_RSH_K,
-		[BPF_ALU|BPF_RSH|BPF_X]  = BPF_S_ALU_RSH_X,
-		[BPF_ALU|BPF_NEG]        = BPF_S_ALU_NEG,
-		[BPF_LD|BPF_W|BPF_ABS]   = BPF_S_LD_W_ABS,
-		[BPF_LD|BPF_H|BPF_ABS]   = BPF_S_LD_H_ABS,
-		[BPF_LD|BPF_B|BPF_ABS]   = BPF_S_LD_B_ABS,
-		[BPF_LD|BPF_W|BPF_LEN]   = BPF_S_LD_W_LEN,
-		[BPF_LD|BPF_W|BPF_IND]   = BPF_S_LD_W_IND,
-		[BPF_LD|BPF_H|BPF_IND]   = BPF_S_LD_H_IND,
-		[BPF_LD|BPF_B|BPF_IND]   = BPF_S_LD_B_IND,
-		[BPF_LD|BPF_IMM]         = BPF_S_LD_IMM,
-		[BPF_LDX|BPF_W|BPF_LEN]  = BPF_S_LDX_W_LEN,
-		[BPF_LDX|BPF_B|BPF_MSH]  = BPF_S_LDX_B_MSH,
-		[BPF_LDX|BPF_IMM]        = BPF_S_LDX_IMM,
-		[BPF_MISC|BPF_TAX]       = BPF_S_MISC_TAX,
-		[BPF_MISC|BPF_TXA]       = BPF_S_MISC_TXA,
-		[BPF_RET|BPF_K]          = BPF_S_RET_K,
-		[BPF_RET|BPF_A]          = BPF_S_RET_A,
-		[BPF_ALU|BPF_DIV|BPF_K]  = BPF_S_ALU_DIV_K,
-		[BPF_LD|BPF_MEM]         = BPF_S_LD_MEM,
-		[BPF_LDX|BPF_MEM]        = BPF_S_LDX_MEM,
-		[BPF_ST]                 = BPF_S_ST,
-		[BPF_STX]                = BPF_S_STX,
-		[BPF_JMP|BPF_JA]         = BPF_S_JMP_JA,
-		[BPF_JMP|BPF_JEQ|BPF_K]  = BPF_S_JMP_JEQ_K,
-		[BPF_JMP|BPF_JEQ|BPF_X]  = BPF_S_JMP_JEQ_X,
-		[BPF_JMP|BPF_JGE|BPF_K]  = BPF_S_JMP_JGE_K,
-		[BPF_JMP|BPF_JGE|BPF_X]  = BPF_S_JMP_JGE_X,
-		[BPF_JMP|BPF_JGT|BPF_K]  = BPF_S_JMP_JGT_K,
-		[BPF_JMP|BPF_JGT|BPF_X]  = BPF_S_JMP_JGT_X,
-		[BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
-		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
-	};
-	int pc;
 	bool anc_found;
+	int pc;
 
 	if (flen == 0 || flen > BPF_MAXINSNS)
 		return -EINVAL;
 
-	/* check the filter code now */
+	/* Check the filter code now */
 	for (pc = 0; pc < flen; pc++) {
 		struct sock_filter *ftest = &filter[pc];
-		u16 code = ftest->code;
 
-		if (code >= ARRAY_SIZE(codes))
-			return -EINVAL;
-		code = codes[code];
-		if (!code)
+		/* May we actually operate on this code? */
+		if (!chk_code_allowed(ftest->code))
 			return -EINVAL;
+
 		/* Some instructions need special checks */
-		switch (code) {
-		case BPF_S_ALU_DIV_K:
-		case BPF_S_ALU_MOD_K:
-			/* check for division by zero */
+		switch (ftest->code) {
+		case BPF_ALU | BPF_DIV | BPF_K:
+		case BPF_ALU | BPF_MOD | BPF_K:
+			/* Check for division by zero */
 			if (ftest->k == 0)
 				return -EINVAL;
 			break;
-		case BPF_S_LD_MEM:
-		case BPF_S_LDX_MEM:
-		case BPF_S_ST:
-		case BPF_S_STX:
-			/* check for invalid memory addresses */
+		case BPF_LD | BPF_MEM:
+		case BPF_LDX | BPF_MEM:
+		case BPF_ST:
+		case BPF_STX:
+			/* Check for invalid memory addresses */
 			if (ftest->k >= BPF_MEMWORDS)
 				return -EINVAL;
 			break;
-		case BPF_S_JMP_JA:
-			/*
-			 * Note, the large ftest->k might cause loops.
+		case BPF_JMP | BPF_JA:
+			/* Note, the large ftest->k might cause loops.
 			 * Compare this with conditional jumps below,
 			 * where offsets are limited. --ANK (981016)
 			 */
-			if (ftest->k >= (unsigned int)(flen-pc-1))
+			if (ftest->k >= (unsigned int)(flen - pc - 1))
 				return -EINVAL;
 			break;
-		case BPF_S_JMP_JEQ_K:
-		case BPF_S_JMP_JEQ_X:
-		case BPF_S_JMP_JGE_K:
-		case BPF_S_JMP_JGE_X:
-		case BPF_S_JMP_JGT_K:
-		case BPF_S_JMP_JGT_X:
-		case BPF_S_JMP_JSET_X:
-		case BPF_S_JMP_JSET_K:
-			/* for conditionals both must be safe */
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
+			/* Both conditionals must be safe */
 			if (pc + ftest->jt + 1 >= flen ||
 			    pc + ftest->jf + 1 >= flen)
 				return -EINVAL;
 			break;
-		case BPF_S_LD_W_ABS:
-		case BPF_S_LD_H_ABS:
-		case BPF_S_LD_B_ABS:
+		case BPF_LD | BPF_W | BPF_ABS:
+		case BPF_LD | BPF_H | BPF_ABS:
+		case BPF_LD | BPF_B | BPF_ABS:
 			anc_found = false;
-#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE:	\
-				code = BPF_S_ANC_##CODE;	\
-				anc_found = true;		\
-				break
-			switch (ftest->k) {
-			ANCILLARY(PROTOCOL);
-			ANCILLARY(PKTTYPE);
-			ANCILLARY(IFINDEX);
-			ANCILLARY(NLATTR);
-			ANCILLARY(NLATTR_NEST);
-			ANCILLARY(MARK);
-			ANCILLARY(QUEUE);
-			ANCILLARY(HATYPE);
-			ANCILLARY(RXHASH);
-			ANCILLARY(CPU);
-			ANCILLARY(ALU_XOR_X);
-			ANCILLARY(VLAN_TAG);
-			ANCILLARY(VLAN_TAG_PRESENT);
-			ANCILLARY(PAY_OFFSET);
-			ANCILLARY(RANDOM);
-			}
-
-			/* ancillary operation unknown or unsupported */
+			if (bpf_anc_helper(ftest) & BPF_ANC)
+				anc_found = true;
+			/* Ancillary operation unknown or unsupported */
 			if (anc_found == false && ftest->k >= SKF_AD_OFF)
 				return -EINVAL;
 		}
-		ftest->code = code;
 	}
 
-	/* last instruction must be a RET code */
+	/* Last instruction must be a RET code */
 	switch (filter[flen - 1].code) {
-	case BPF_S_RET_K:
-	case BPF_S_RET_A:
+	case BPF_RET | BPF_K:
+	case BPF_RET | BPF_A:
 		return check_load_and_stores(filter, flen);
 	}
+
 	return -EINVAL;
 }
 EXPORT_SYMBOL(sk_chk_filter);
@@ -1448,7 +1442,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
 {
 	struct sock_filter *old_prog;
 	struct sk_filter *old_fp;
-	int i, err, new_len, old_len = fp->len;
+	int err, new_len, old_len = fp->len;
 
 	/* We are free to overwrite insns et al right here as it
 	 * won't be used at this point in time anymore internally
@@ -1458,13 +1452,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
 	BUILD_BUG_ON(sizeof(struct sock_filter) !=
 		     sizeof(struct sock_filter_int));
 
-	/* For now, we need to unfiddle BPF_S_* identifiers in place.
-	 * This can sooner or later on be subject to removal, e.g. when
-	 * JITs have been converted.
-	 */
-	for (i = 0; i < fp->len; i++)
-		sk_decode_filter(&fp->insns[i], &fp->insns[i]);
-
 	/* Conversion cannot happen on overlapping memory areas,
 	 * so we need to keep the user BPF around until the 2nd
 	 * pass. At this time, the user BPF is stored in fp->insns.
@@ -1706,84 +1693,6 @@ int sk_detach_filter(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(sk_detach_filter);
 
-void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
-{
-	static const u16 decodes[] = {
-		[BPF_S_ALU_ADD_K]	= BPF_ALU|BPF_ADD|BPF_K,
-		[BPF_S_ALU_ADD_X]	= BPF_ALU|BPF_ADD|BPF_X,
-		[BPF_S_ALU_SUB_K]	= BPF_ALU|BPF_SUB|BPF_K,
-		[BPF_S_ALU_SUB_X]	= BPF_ALU|BPF_SUB|BPF_X,
-		[BPF_S_ALU_MUL_K]	= BPF_ALU|BPF_MUL|BPF_K,
-		[BPF_S_ALU_MUL_X]	= BPF_ALU|BPF_MUL|BPF_X,
-		[BPF_S_ALU_DIV_X]	= BPF_ALU|BPF_DIV|BPF_X,
-		[BPF_S_ALU_MOD_K]	= BPF_ALU|BPF_MOD|BPF_K,
-		[BPF_S_ALU_MOD_X]	= BPF_ALU|BPF_MOD|BPF_X,
-		[BPF_S_ALU_AND_K]	= BPF_ALU|BPF_AND|BPF_K,
-		[BPF_S_ALU_AND_X]	= BPF_ALU|BPF_AND|BPF_X,
-		[BPF_S_ALU_OR_K]	= BPF_ALU|BPF_OR|BPF_K,
-		[BPF_S_ALU_OR_X]	= BPF_ALU|BPF_OR|BPF_X,
-		[BPF_S_ALU_XOR_K]	= BPF_ALU|BPF_XOR|BPF_K,
-		[BPF_S_ALU_XOR_X]	= BPF_ALU|BPF_XOR|BPF_X,
-		[BPF_S_ALU_LSH_K]	= BPF_ALU|BPF_LSH|BPF_K,
-		[BPF_S_ALU_LSH_X]	= BPF_ALU|BPF_LSH|BPF_X,
-		[BPF_S_ALU_RSH_K]	= BPF_ALU|BPF_RSH|BPF_K,
-		[BPF_S_ALU_RSH_X]	= BPF_ALU|BPF_RSH|BPF_X,
-		[BPF_S_ALU_NEG]		= BPF_ALU|BPF_NEG,
-		[BPF_S_LD_W_ABS]	= BPF_LD|BPF_W|BPF_ABS,
-		[BPF_S_LD_H_ABS]	= BPF_LD|BPF_H|BPF_ABS,
-		[BPF_S_LD_B_ABS]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_PROTOCOL]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_PKTTYPE]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_IFINDEX]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_NLATTR]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_NLATTR_NEST]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_MARK]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_QUEUE]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_HATYPE]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_RXHASH]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_CPU]		= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_ALU_XOR_X]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_VLAN_TAG]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_PAY_OFFSET]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_RANDOM]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_LD_W_LEN]	= BPF_LD|BPF_W|BPF_LEN,
-		[BPF_S_LD_W_IND]	= BPF_LD|BPF_W|BPF_IND,
-		[BPF_S_LD_H_IND]	= BPF_LD|BPF_H|BPF_IND,
-		[BPF_S_LD_B_IND]	= BPF_LD|BPF_B|BPF_IND,
-		[BPF_S_LD_IMM]		= BPF_LD|BPF_IMM,
-		[BPF_S_LDX_W_LEN]	= BPF_LDX|BPF_W|BPF_LEN,
-		[BPF_S_LDX_B_MSH]	= BPF_LDX|BPF_B|BPF_MSH,
-		[BPF_S_LDX_IMM]		= BPF_LDX|BPF_IMM,
-		[BPF_S_MISC_TAX]	= BPF_MISC|BPF_TAX,
-		[BPF_S_MISC_TXA]	= BPF_MISC|BPF_TXA,
-		[BPF_S_RET_K]		= BPF_RET|BPF_K,
-		[BPF_S_RET_A]		= BPF_RET|BPF_A,
-		[BPF_S_ALU_DIV_K]	= BPF_ALU|BPF_DIV|BPF_K,
-		[BPF_S_LD_MEM]		= BPF_LD|BPF_MEM,
-		[BPF_S_LDX_MEM]		= BPF_LDX|BPF_MEM,
-		[BPF_S_ST]		= BPF_ST,
-		[BPF_S_STX]		= BPF_STX,
-		[BPF_S_JMP_JA]		= BPF_JMP|BPF_JA,
-		[BPF_S_JMP_JEQ_K]	= BPF_JMP|BPF_JEQ|BPF_K,
-		[BPF_S_JMP_JEQ_X]	= BPF_JMP|BPF_JEQ|BPF_X,
-		[BPF_S_JMP_JGE_K]	= BPF_JMP|BPF_JGE|BPF_K,
-		[BPF_S_JMP_JGE_X]	= BPF_JMP|BPF_JGE|BPF_X,
-		[BPF_S_JMP_JGT_K]	= BPF_JMP|BPF_JGT|BPF_K,
-		[BPF_S_JMP_JGT_X]	= BPF_JMP|BPF_JGT|BPF_X,
-		[BPF_S_JMP_JSET_K]	= BPF_JMP|BPF_JSET|BPF_K,
-		[BPF_S_JMP_JSET_X]	= BPF_JMP|BPF_JSET|BPF_X,
-	};
-	u16 code;
-
-	code = filt->code;
-
-	to->code = decodes[code];
-	to->jt = filt->jt;
-	to->jf = filt->jf;
-	to->k = filt->k;
-}
-
 int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
 		  unsigned int len)
 {
-- 
cgit v1.2.3-71-gd317


From f8f6d679aaa78b989d9aee8d2935066fbdca2a30 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 29 May 2014 10:22:51 +0200
Subject: net: filter: improve filter block macros

Commit 9739eef13c92 ("net: filter: make BPF conversion more readable")
started to introduce helper macros similar to BPF_STMT()/BPF_JUMP()
macros from classic BPF.

However, quite some statements in the filter conversion functions
remained in the old style which gives a mixture of block macros and
non block macros in the code. This patch makes the block macros itself
more readable by using explicit member initialization, and converts
the remaining ones where possible to remain in a more consistent state.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 255 +++++++++++++++++++++++++++++++++++++++----------
 net/core/filter.c      | 196 ++++++++++++++-----------------------
 2 files changed, 277 insertions(+), 174 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 49ef7a298c92..f0c2ad43b4af 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -76,56 +76,211 @@ enum {
 /* BPF program can access up to 512 bytes of stack space. */
 #define MAX_BPF_STACK	512
 
-/* bpf_add|sub|...: a += x, bpf_mov: a = x */
-#define BPF_ALU64_REG(op, a, x) \
-	((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_X, a, x, 0, 0})
-#define BPF_ALU32_REG(op, a, x) \
-	((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_X, a, x, 0, 0})
-
-/* bpf_add|sub|...: a += imm, bpf_mov: a = imm */
-#define BPF_ALU64_IMM(op, a, imm) \
-	((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_K, a, 0, 0, imm})
-#define BPF_ALU32_IMM(op, a, imm) \
-	((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_K, a, 0, 0, imm})
-
-/* R0 = *(uint *) (skb->data + off) */
-#define BPF_LD_ABS(size, off) \
-	((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_ABS, 0, 0, 0, off})
-
-/* R0 = *(uint *) (skb->data + x + off) */
-#define BPF_LD_IND(size, x, off) \
-	((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_IND, 0, x, 0, off})
-
-/* a = *(uint *) (x + off) */
-#define BPF_LDX_MEM(sz, a, x, off) \
-	((struct sock_filter_int) {BPF_LDX|BPF_SIZE(sz)|BPF_MEM, a, x, off, 0})
-
-/* if (a 'op' x) goto pc+off */
-#define BPF_JMP_REG(op, a, x, off) \
-	((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_X, a, x, off, 0})
-
-/* if (a 'op' imm) goto pc+off */
-#define BPF_JMP_IMM(op, a, imm, off) \
-	((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_K, a, 0, off, imm})
-
-#define BPF_EXIT_INSN() \
-	((struct sock_filter_int) {BPF_JMP|BPF_EXIT, 0, 0, 0, 0})
-
-static inline int size_to_bpf(int size)
-{
-	switch (size) {
-	case 1:
-		return BPF_B;
-	case 2:
-		return BPF_H;
-	case 4:
-		return BPF_W;
-	case 8:
-		return BPF_DW;
-	default:
-		return -EINVAL;
-	}
-}
+/* Helper macros for filter block array initializers. */
+
+/* ALU ops on registers, bpf_add|sub|...: A += X */
+
+#define BPF_ALU64_REG(OP, A, X)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,	\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define BPF_ALU32_REG(OP, A, X)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_OP(OP) | BPF_X,		\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: A += IMM */
+
+#define BPF_ALU64_IMM(OP, A, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
+		.a_reg = A,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+#define BPF_ALU32_IMM(OP, A, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
+		.a_reg = A,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
+
+#define BPF_ENDIAN(TYPE, A, LEN)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_END | BPF_SRC(TYPE),	\
+		.a_reg = A,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = LEN })
+
+/* Short form of mov, A = X */
+
+#define BPF_MOV64_REG(A, X)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define BPF_MOV32_REG(A, X)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+/* Short form of mov, A = IMM */
+
+#define BPF_MOV64_IMM(A, IMM)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_K,		\
+		.a_reg = A,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+#define BPF_MOV32_IMM(A, IMM)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_MOV | BPF_K,		\
+		.a_reg = A,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Short form of mov based on type, BPF_X: A = X,  BPF_K: A = IMM */
+
+#define BPF_MOV64_RAW(TYPE, A, X, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE),	\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+#define BPF_MOV32_RAW(TYPE, A, X, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_MOV | BPF_SRC(TYPE),	\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Direct packet access, R0 = *(uint *) (skb->data + OFF) */
+
+#define BPF_LD_ABS(SIZE, OFF)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,	\
+		.a_reg = 0,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = OFF })
+
+/* Indirect packet access, R0 = *(uint *) (skb->data + X + OFF) */
+
+#define BPF_LD_IND(SIZE, X, OFF)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_IND,	\
+		.a_reg = 0,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = OFF })
+
+/* Memory store, A = *(uint *) (X + OFF), and vice versa */
+
+#define BPF_LDX_MEM(SIZE, A, X, OFF)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+#define BPF_STX_MEM(SIZE, A, X, OFF)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Conditional jumps against registers, if (A 'op' X) goto pc + OFF */
+
+#define BPF_JMP_REG(OP, A, X, OFF)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_JMP | BPF_OP(OP) | BPF_X,		\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Conditional jumps against immediates, if (A 'op' IMM) goto pc + OFF */
+
+#define BPF_JMP_IMM(OP, A, IMM, OFF)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_JMP | BPF_OP(OP) | BPF_K,		\
+		.a_reg = A,					\
+		.x_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Function call */
+
+#define BPF_EMIT_CALL(FUNC)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_JMP | BPF_CALL,			\
+		.a_reg = 0,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = ((FUNC) - __bpf_call_base) })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, A, X, OFF, IMM)			\
+	((struct sock_filter_int) {				\
+		.code  = CODE,					\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN()						\
+	((struct sock_filter_int) {				\
+		.code  = BPF_JMP | BPF_EXIT,			\
+		.a_reg = 0,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define bytes_to_bpf_size(bytes)				\
+({								\
+	int bpf_size = -EINVAL;					\
+								\
+	if (bytes == sizeof(u8))				\
+		bpf_size = BPF_B;				\
+	else if (bytes == sizeof(u16))				\
+		bpf_size = BPF_H;				\
+	else if (bytes == sizeof(u32))				\
+		bpf_size = BPF_W;				\
+	else if (bytes == sizeof(u64))				\
+		bpf_size = BPF_DW;				\
+								\
+	bpf_size;						\
+})
 
 /* Macro to invoke filter function. */
 #define SK_RUN_FILTER(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
diff --git a/net/core/filter.c b/net/core/filter.c
index 328aaf6ff4d1..842f8393121d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -672,14 +672,10 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
 
 		/* A = *(u16 *) (ctx + offsetof(protocol)) */
-		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
-				    offsetof(struct sk_buff, protocol));
-		insn++;
-
+		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				      offsetof(struct sk_buff, protocol));
 		/* A = ntohs(A) [emitting a nop or swap16] */
-		insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
-		insn->a_reg = BPF_REG_A;
-		insn->imm = 16;
+		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
@@ -688,37 +684,27 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		if (insn->off < 0)
 			return false;
 		insn++;
-
 		*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
 		break;
 
 	case SKF_AD_OFF + SKF_AD_IFINDEX:
 	case SKF_AD_OFF + SKF_AD_HATYPE:
-		*insn = BPF_LDX_MEM(size_to_bpf(FIELD_SIZEOF(struct sk_buff, dev)),
-				    BPF_REG_TMP, BPF_REG_CTX,
-				    offsetof(struct sk_buff, dev));
-		insn++;
-
-		/* if (tmp != 0) goto pc+1 */
-		*insn = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
-		insn++;
-
-		*insn = BPF_EXIT_INSN();
-		insn++;
-
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
-
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_TMP;
-
-		if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->off = offsetof(struct net_device, ifindex);
-		} else {
-			insn->code = BPF_LDX | BPF_MEM | BPF_H;
-			insn->off = offsetof(struct net_device, type);
-		}
+		BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
+
+		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+				      BPF_REG_TMP, BPF_REG_CTX,
+				      offsetof(struct sk_buff, dev));
+		/* if (tmp != 0) goto pc + 1 */
+		*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
+		*insn++ = BPF_EXIT_INSN();
+		if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
+			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
+					    offsetof(struct net_device, ifindex));
+		else
+			*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
+					    offsetof(struct net_device, type));
 		break;
 
 	case SKF_AD_OFF + SKF_AD_MARK:
@@ -745,22 +731,17 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-
-		/* A = *(u16 *) (ctx + offsetof(vlan_tci)) */
-		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
-				    offsetof(struct sk_buff, vlan_tci));
-		insn++;
-
 		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
 
+		/* A = *(u16 *) (ctx + offsetof(vlan_tci)) */
+		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				      offsetof(struct sk_buff, vlan_tci));
 		if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
 			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
 					      ~VLAN_TAG_PRESENT);
 		} else {
 			/* A >>= 12 */
-			*insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
-			insn++;
-
+			*insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
 			/* A &= 1 */
 			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
 		}
@@ -772,34 +753,27 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_CPU:
 	case SKF_AD_OFF + SKF_AD_RANDOM:
 		/* arg1 = ctx */
-		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG1, BPF_REG_CTX);
-		insn++;
-
+		*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
 		/* arg2 = A */
-		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG2, BPF_REG_A);
-		insn++;
-
+		*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
 		/* arg3 = X */
-		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG3, BPF_REG_X);
-		insn++;
-
+		*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
 		/* Emit call(ctx, arg2=A, arg3=X) */
-		insn->code = BPF_JMP | BPF_CALL;
 		switch (fp->k) {
 		case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
-			insn->imm = __skb_get_pay_offset - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__skb_get_pay_offset);
 			break;
 		case SKF_AD_OFF + SKF_AD_NLATTR:
-			insn->imm = __skb_get_nlattr - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__skb_get_nlattr);
 			break;
 		case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
-			insn->imm = __skb_get_nlattr_nest - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
 			break;
 		case SKF_AD_OFF + SKF_AD_CPU:
-			insn->imm = __get_raw_cpu_id - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__get_raw_cpu_id);
 			break;
 		case SKF_AD_OFF + SKF_AD_RANDOM:
-			insn->imm = __get_random_u32 - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__get_random_u32);
 			break;
 		}
 		break;
@@ -871,9 +845,8 @@ do_pass:
 	new_insn = new_prog;
 	fp = prog;
 
-	if (new_insn) {
-		*new_insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_CTX, BPF_REG_ARG1);
-	}
+	if (new_insn)
+		*new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
 	new_insn++;
 
 	for (i = 0; i < len; fp++, i++) {
@@ -921,17 +894,16 @@ do_pass:
 			    convert_bpf_extensions(fp, &insn))
 				break;
 
-			insn->code = fp->code;
-			insn->a_reg = BPF_REG_A;
-			insn->x_reg = BPF_REG_X;
-			insn->imm = fp->k;
+			*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
 			break;
 
-		/* Jump opcodes map as-is, but offsets need adjustment. */
-		case BPF_JMP | BPF_JA:
-			target = i + fp->k + 1;
-			insn->code = fp->code;
-#define EMIT_JMP							\
+		/* Jump transformation cannot use BPF block macros
+		 * everywhere as offset calculation and target updates
+		 * require a bit more work than the rest, i.e. jump
+		 * opcodes map as-is, but offsets need adjustment.
+		 */
+
+#define BPF_EMIT_JMP							\
 	do {								\
 		if (target >= len || target < 0)			\
 			goto err;					\
@@ -940,7 +912,10 @@ do_pass:
 		insn->off -= insn - tmp_insns;				\
 	} while (0)
 
-			EMIT_JMP;
+		case BPF_JMP | BPF_JA:
+			target = i + fp->k + 1;
+			insn->code = fp->code;
+			BPF_EMIT_JMP;
 			break;
 
 		case BPF_JMP | BPF_JEQ | BPF_K:
@@ -956,10 +931,7 @@ do_pass:
 				 * immediate into tmp register and use it
 				 * in compare insn.
 				 */
-				insn->code = BPF_ALU | BPF_MOV | BPF_K;
-				insn->a_reg = BPF_REG_TMP;
-				insn->imm = fp->k;
-				insn++;
+				*insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
 
 				insn->a_reg = BPF_REG_A;
 				insn->x_reg = BPF_REG_TMP;
@@ -975,7 +947,7 @@ do_pass:
 			if (fp->jf == 0) {
 				insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
 				target = i + fp->jt + 1;
-				EMIT_JMP;
+				BPF_EMIT_JMP;
 				break;
 			}
 
@@ -983,116 +955,94 @@ do_pass:
 			if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
 				insn->code = BPF_JMP | BPF_JNE | bpf_src;
 				target = i + fp->jf + 1;
-				EMIT_JMP;
+				BPF_EMIT_JMP;
 				break;
 			}
 
 			/* Other jumps are mapped into two insns: Jxx and JA. */
 			target = i + fp->jt + 1;
 			insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
-			EMIT_JMP;
+			BPF_EMIT_JMP;
 			insn++;
 
 			insn->code = BPF_JMP | BPF_JA;
 			target = i + fp->jf + 1;
-			EMIT_JMP;
+			BPF_EMIT_JMP;
 			break;
 
 		/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
 		case BPF_LDX | BPF_MSH | BPF_B:
 			/* tmp = A */
-			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_TMP, BPF_REG_A);
-			insn++;
-
+			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
 			/* A = BPF_R0 = *(u8 *) (skb->data + K) */
-			*insn = BPF_LD_ABS(BPF_B, fp->k);
-			insn++;
-
+			*insn++ = BPF_LD_ABS(BPF_B, fp->k);
 			/* A &= 0xf */
-			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
-			insn++;
-
+			*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
 			/* A <<= 2 */
-			*insn = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
-			insn++;
-
+			*insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
 			/* X = A */
-			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A);
-			insn++;
-
+			*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
 			/* A = tmp */
-			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_TMP);
+			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
 			break;
 
 		/* RET_K, RET_A are remaped into 2 insns. */
 		case BPF_RET | BPF_A:
 		case BPF_RET | BPF_K:
-			insn->code = BPF_ALU | BPF_MOV |
-				     (BPF_RVAL(fp->code) == BPF_K ?
-				      BPF_K : BPF_X);
-			insn->a_reg = 0;
-			insn->x_reg = BPF_REG_A;
-			insn->imm = fp->k;
-			insn++;
-
+			*insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
+						BPF_K : BPF_X, BPF_REG_0,
+						BPF_REG_A, fp->k);
 			*insn = BPF_EXIT_INSN();
 			break;
 
 		/* Store to stack. */
 		case BPF_ST:
 		case BPF_STX:
-			insn->code = BPF_STX | BPF_MEM | BPF_W;
-			insn->a_reg = BPF_REG_FP;
-			insn->x_reg = fp->code == BPF_ST ?
-				      BPF_REG_A : BPF_REG_X;
-			insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+			*insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
+					    BPF_ST ? BPF_REG_A : BPF_REG_X,
+					    -(BPF_MEMWORDS - fp->k) * 4);
 			break;
 
 		/* Load from stack. */
 		case BPF_LD | BPF_MEM:
 		case BPF_LDX | BPF_MEM:
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      BPF_REG_A : BPF_REG_X;
-			insn->x_reg = BPF_REG_FP;
-			insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD  ?
+					    BPF_REG_A : BPF_REG_X, BPF_REG_FP,
+					    -(BPF_MEMWORDS - fp->k) * 4);
 			break;
 
 		/* A = K or X = K */
 		case BPF_LD | BPF_IMM:
 		case BPF_LDX | BPF_IMM:
-			insn->code = BPF_ALU | BPF_MOV | BPF_K;
-			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      BPF_REG_A : BPF_REG_X;
-			insn->imm = fp->k;
+			*insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
+					      BPF_REG_A : BPF_REG_X, fp->k);
 			break;
 
 		/* X = A */
 		case BPF_MISC | BPF_TAX:
-			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A);
+			*insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
 			break;
 
 		/* A = X */
 		case BPF_MISC | BPF_TXA:
-			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_X);
+			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
 			break;
 
 		/* A = skb->len or X = skb->len */
 		case BPF_LD | BPF_W | BPF_LEN:
 		case BPF_LDX | BPF_W | BPF_LEN:
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      BPF_REG_A : BPF_REG_X;
-			insn->x_reg = BPF_REG_CTX;
-			insn->off = offsetof(struct sk_buff, len);
+			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
+					    BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
+					    offsetof(struct sk_buff, len));
 			break;
 
-		/* access seccomp_data fields */
+		/* Access seccomp_data fields. */
 		case BPF_LDX | BPF_ABS | BPF_W:
 			/* A = *(u32 *) (ctx + K) */
 			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
 			break;
 
+		/* Unkown instruction. */
 		default:
 			goto err;
 		}
@@ -1101,7 +1051,6 @@ do_pass:
 		if (new_prog)
 			memcpy(new_insn, tmp_insns,
 			       sizeof(*insn) * (insn - tmp_insns));
-
 		new_insn += insn - tmp_insns;
 	}
 
@@ -1116,7 +1065,6 @@ do_pass:
 		new_flen = new_insn - new_prog;
 		if (pass > 2)
 			goto err;
-
 		goto do_pass;
 	}
 
-- 
cgit v1.2.3-71-gd317


From 96b2e73c5471542cb9c622c4360716684f8797ed Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 2 Jun 2014 00:18:48 -0700
Subject: Revert "net/mlx4_en: Use affinity hint"

This reverts commit 70a640d0dae3a9b1b222ce673eb5d92c263ddd61.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c              |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_cq.c     |  6 +-----
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 30 --------------------------
 drivers/net/ethernet/mellanox/mlx4/eq.c        | 13 +----------
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  1 -
 include/linux/mlx4/device.h                    |  2 +-
 6 files changed, 4 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 58b1f239ac2b..199c7896f081 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1897,7 +1897,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 				 i, j, dev->pdev->bus->name);
 			/* Set IRQ for specific name (per ring) */
 			if (mlx4_assign_eq(dev, name, NULL,
-					   &ibdev->eq_table[eq], NULL)) {
+					   &ibdev->eq_table[eq])) {
 				/* Use legacy (same as mlx4_en driver) */
 				pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
 				ibdev->eq_table[eq] =
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index ea2cd72e5368..636963db598a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -118,15 +118,11 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 	if (cq->is_tx == RX) {
 		if (mdev->dev->caps.comp_pool) {
 			if (!cq->vector) {
-				struct mlx4_en_rx_ring *ring =
-					priv->rx_ring[cq->ring];
-
 				sprintf(name, "%s-%d", priv->dev->name,
 					cq->ring);
 				/* Set IRQ for specific name (per ring) */
 				if (mlx4_assign_eq(mdev->dev, name, rmap,
-						   &cq->vector,
-						   ring->affinity_mask)) {
+						   &cq->vector)) {
 					cq->vector = (cq->ring + 1 + priv->port)
 					    % mdev->dev->caps.num_comp_vectors;
 					mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n",
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 05d135572abc..58209bd0c94c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1526,32 +1526,6 @@ static void mlx4_en_linkstate(struct work_struct *work)
 	mutex_unlock(&mdev->state_lock);
 }
 
-static void mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
-{
-	struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
-	int numa_node = priv->mdev->dev->numa_node;
-
-	if (numa_node == -1)
-		return;
-
-	if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) {
-		en_err(priv, "Failed to allocate core mask\n");
-		return;
-	}
-
-	if (cpumask_set_cpu_local_first(ring_idx, numa_node,
-					ring->affinity_mask)) {
-		en_err(priv, "Failed setting affinity hint\n");
-		free_cpumask_var(ring->affinity_mask);
-		ring->affinity_mask = NULL;
-	}
-}
-
-static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
-{
-	free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask);
-	priv->rx_ring[ring_idx]->affinity_mask = NULL;
-}
 
 int mlx4_en_start_port(struct net_device *dev)
 {
@@ -1593,8 +1567,6 @@ int mlx4_en_start_port(struct net_device *dev)
 
 		mlx4_en_cq_init_lock(cq);
 
-		mlx4_en_init_affinity_hint(priv, i);
-
 		err = mlx4_en_activate_cq(priv, cq, i);
 		if (err) {
 			en_err(priv, "Failed activating Rx CQ\n");
@@ -1875,8 +1847,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 			msleep(1);
 		mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
 		mlx4_en_deactivate_cq(priv, cq);
-
-		mlx4_en_free_affinity_hint(priv, i);
 	}
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index f91659e5fa13..d954ec1eac17 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1376,7 +1376,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev)
 EXPORT_SYMBOL(mlx4_test_interrupts);
 
 int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
-		   int *vector, cpumask_var_t cpu_hint_mask)
+		   int *vector)
 {
 
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1411,15 +1411,6 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
 			}
 			mlx4_assign_irq_notifier(priv, dev,
 						 priv->eq_table.eq[vec].irq);
-			if (cpu_hint_mask) {
-				err = irq_set_affinity_hint(
-						priv->eq_table.eq[vec].irq,
-						cpu_hint_mask);
-				if (err) {
-					mlx4_warn(dev, "Failed setting affinity hint\n");
-					/*we dont want to break here*/
-				}
-			}
 
 			eq_set_ci(&priv->eq_table.eq[vec], 1);
 		}
@@ -1450,8 +1441,6 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec)
 			irq_set_affinity_notifier(
 				priv->eq_table.eq[vec].irq,
 				NULL);
-			irq_set_affinity_hint(priv->eq_table.eq[vec].irq,
-					      NULL);
 			free_irq(priv->eq_table.eq[vec].irq,
 				 &priv->eq_table.eq[vec]);
 			priv->msix_ctl.pool_bm &= ~(1ULL << i);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 0e15295bedd6..b5db1bf361dc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -313,7 +313,6 @@ struct mlx4_en_rx_ring {
 	unsigned long csum_ok;
 	unsigned long csum_none;
 	int hwtstamp_rx_filter;
-	cpumask_var_t affinity_mask;
 };
 
 struct mlx4_en_cq {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b9b70e00e3c1..ca38871a585c 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1161,7 +1161,7 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
 int mlx4_SYNC_TPT(struct mlx4_dev *dev);
 int mlx4_test_interrupts(struct mlx4_dev *dev);
 int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
-		   int *vector, cpumask_t *cpu_hint_mask);
+		   int *vector);
 void mlx4_release_eq(struct mlx4_dev *dev, int vec);
 
 int mlx4_get_phys_port_id(struct mlx4_dev *dev);
-- 
cgit v1.2.3-71-gd317


From f220e62659e968c3bbe4d96d73008832d19b9f77 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 13 May 2014 12:47:42 +0200
Subject: drm/plane: Fix sparse warnings

Include the drm_plane_helper.h header file to fix the following sparse
warnings:

	  CHECK   drivers/gpu/drm/drm_plane_helper.c
	drivers/gpu/drm/drm_plane_helper.c:102:5: warning: symbol 'drm_primary_helper_update' was not declared. Should it be static?
	drivers/gpu/drm/drm_plane_helper.c:219:5: warning: symbol 'drm_primary_helper_disable' was not declared. Should it be static?
	drivers/gpu/drm/drm_plane_helper.c:233:6: warning: symbol 'drm_primary_helper_destroy' was not declared. Should it be static?
	drivers/gpu/drm/drm_plane_helper.c:241:30: warning: symbol 'drm_primary_helper_funcs' was not declared. Should it be static?
	drivers/gpu/drm/drm_plane_helper.c:259:18: warning: symbol 'drm_primary_helper_create_plane' was not declared. Should it be static?

Doing that makes gcc complain as follows:

	  CC      drivers/gpu/drm/drm_plane_helper.o
	drivers/gpu/drm/drm_plane_helper.c:260:19: error: conflicting types for 'drm_primary_helper_create_plane'
	 struct drm_plane *drm_primary_helper_create_plane(struct drm_device *dev,
	                   ^
	In file included from drivers/gpu/drm/drm_plane_helper.c:29:0:
	include/drm/drm_plane_helper.h:42:19: note: previous declaration of 'drm_primary_helper_create_plane' was here
	 struct drm_plane *drm_primary_helper_create_plane(struct drm_device *dev,
	                   ^
	drivers/gpu/drm/drm_plane_helper.c: In function 'drm_primary_helper_create_plane':
	drivers/gpu/drm/drm_plane_helper.c:274:11: warning: assignment discards 'const' qualifier from pointer target type
	   formats = safe_modeset_formats;
	           ^
	In file included from include/linux/linkage.h:6:0,
	                 from include/linux/kernel.h:6,
	                 from include/drm/drmP.h:45,
	                 from drivers/gpu/drm/drm_plane_helper.c:27:
	drivers/gpu/drm/drm_plane_helper.c: At top level:
	drivers/gpu/drm/drm_plane_helper.c:289:15: error: conflicting types for 'drm_primary_helper_create_plane'
	 EXPORT_SYMBOL(drm_primary_helper_create_plane);
	               ^
	include/linux/export.h:57:21: note: in definition of macro '__EXPORT_SYMBOL'
	  extern typeof(sym) sym;     \
	                     ^
	drivers/gpu/drm/drm_plane_helper.c:289:1: note: in expansion of macro 'EXPORT_SYMBOL'
	 EXPORT_SYMBOL(drm_primary_helper_create_plane);
	 ^
	In file included from drivers/gpu/drm/drm_plane_helper.c:29:0:
	include/drm/drm_plane_helper.h:42:19: note: previous declaration of 'drm_primary_helper_create_plane' was here
	 struct drm_plane *drm_primary_helper_create_plane(struct drm_device *dev,
	                   ^

Which can easily be fixed by making the signatures of the implementation
and the prototype match.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_plane_helper.c | 1 +
 include/drm/drm_plane_helper.h     | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c
index d966afa7ecae..f16bb92db577 100644
--- a/drivers/gpu/drm/drm_plane_helper.c
+++ b/drivers/gpu/drm/drm_plane_helper.c
@@ -26,6 +26,7 @@
 #include <linux/list.h>
 #include <drm/drmP.h>
 #include <drm/drm_rect.h>
+#include <drm/drm_plane_helper.h>
 
 #define SUBPIXEL_MASK 0xffff
 
diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h
index 09824becee3e..c5e7ab9503c8 100644
--- a/include/drm/drm_plane_helper.h
+++ b/include/drm/drm_plane_helper.h
@@ -42,7 +42,7 @@ extern int drm_primary_helper_disable(struct drm_plane *plane);
 extern void drm_primary_helper_destroy(struct drm_plane *plane);
 extern const struct drm_plane_funcs drm_primary_helper_funcs;
 extern struct drm_plane *drm_primary_helper_create_plane(struct drm_device *dev,
-							 uint32_t *formats,
+							 const uint32_t *formats,
 							 int num_formats);
 
 
-- 
cgit v1.2.3-71-gd317


From 63ff05910d9652f8a0c4dbb6c7772cfca5364f12 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Sun, 1 Jun 2014 16:06:53 +0200
Subject: include/scsi/osd_protocol.h: remove unnecessary __constant

__constant_cpu_to_be16 converted to cpu_to_be16

This patch fixes checkpatch warnings:

"WARNING: __constant_cpu_to_be16 should be cpu_to_be16"

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Ack-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/scsi/osd_protocol.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h
index 25ac6283b9c7..a2594afe05c7 100644
--- a/include/scsi/osd_protocol.h
+++ b/include/scsi/osd_protocol.h
@@ -263,16 +263,16 @@ static inline struct osd_cdb_head *osd_cdb_head(struct osd_cdb *ocdb)
  * Ex name = FORMAT_OSD we have OSD_ACT_FORMAT_OSD && OSDv1_ACT_FORMAT_OSD
  */
 #define OSD_ACT___(Name, Num) \
-	OSD_ACT_##Name = __constant_cpu_to_be16(0x8880 + Num), \
-	OSDv1_ACT_##Name = __constant_cpu_to_be16(0x8800 + Num),
+	OSD_ACT_##Name = cpu_to_be16(0x8880 + Num), \
+	OSDv1_ACT_##Name = cpu_to_be16(0x8800 + Num),
 
 /* V2 only actions */
 #define OSD_ACT_V2(Name, Num) \
-	OSD_ACT_##Name = __constant_cpu_to_be16(0x8880 + Num),
+	OSD_ACT_##Name = cpu_to_be16(0x8880 + Num),
 
 #define OSD_ACT_V1_V2(Name, Num1, Num2) \
-	OSD_ACT_##Name = __constant_cpu_to_be16(Num2), \
-	OSDv1_ACT_##Name = __constant_cpu_to_be16(Num1),
+	OSD_ACT_##Name = cpu_to_be16(Num2), \
+	OSDv1_ACT_##Name = cpu_to_be16(Num1),
 
 enum osd_service_actions {
 	OSD_ACT_V2(OBJECT_STRUCTURE_CHECK,	0x00)
-- 
cgit v1.2.3-71-gd317


From 670e5b8eaf85704742bc3cb1df51fdd3ce08fc15 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 28 May 2014 18:44:46 -0700
Subject: net: Add support for device specific address syncing

This change provides a function to be used in order to break the
ndo_set_rx_mode call into a set of address add and remove calls.  The code
is based on the implementation of dev_uc_sync/dev_mc_sync.  Since they
essentially do the same thing but with only one dev I simply named my
functions __dev_uc_sync/__dev_mc_sync.

I also implemented an unsync version of the functions as well to allow for
cleanup on close.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 73 ++++++++++++++++++++++++++++++++++++++++
 net/core/dev_addr_lists.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 158 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2db1610bf109..774e5391eb8e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3003,6 +3003,15 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
 		   struct netdev_hw_addr_list *from_list, int addr_len);
 void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
 		      struct netdev_hw_addr_list *from_list, int addr_len);
+int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
+		       struct net_device *dev,
+		       int (*sync)(struct net_device *, const unsigned char *),
+		       int (*unsync)(struct net_device *,
+				     const unsigned char *));
+void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
+			  struct net_device *dev,
+			  int (*unsync)(struct net_device *,
+					const unsigned char *));
 void __hw_addr_init(struct netdev_hw_addr_list *list);
 
 /* Functions used for device addresses handling */
@@ -3023,6 +3032,38 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from);
 void dev_uc_flush(struct net_device *dev);
 void dev_uc_init(struct net_device *dev);
 
+/**
+ *  __dev_uc_sync - Synchonize device's unicast list
+ *  @dev:  device to sync
+ *  @sync: function to call if address should be added
+ *  @unsync: function to call if address should be removed
+ *
+ *  Add newly added addresses to the interface, and release
+ *  addresses that have been deleted.
+ **/
+static inline int __dev_uc_sync(struct net_device *dev,
+				int (*sync)(struct net_device *,
+					    const unsigned char *),
+				int (*unsync)(struct net_device *,
+					      const unsigned char *))
+{
+	return __hw_addr_sync_dev(&dev->uc, dev, sync, unsync);
+}
+
+/**
+ *  __dev_uc_unsync - Remove synchonized addresses from device
+ *  @dev:  device to sync
+ *  @unsync: function to call if address should be removed
+ *
+ *  Remove all addresses that were added to the device by dev_uc_sync().
+ **/
+static inline void __dev_uc_unsync(struct net_device *dev,
+				   int (*unsync)(struct net_device *,
+						 const unsigned char *))
+{
+	__hw_addr_unsync_dev(&dev->uc, dev, unsync);
+}
+
 /* Functions used for multicast addresses handling */
 int dev_mc_add(struct net_device *dev, const unsigned char *addr);
 int dev_mc_add_global(struct net_device *dev, const unsigned char *addr);
@@ -3035,6 +3076,38 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from);
 void dev_mc_flush(struct net_device *dev);
 void dev_mc_init(struct net_device *dev);
 
+/**
+ *  __dev_mc_sync - Synchonize device's multicast list
+ *  @dev:  device to sync
+ *  @sync: function to call if address should be added
+ *  @unsync: function to call if address should be removed
+ *
+ *  Add newly added addresses to the interface, and release
+ *  addresses that have been deleted.
+ **/
+static inline int __dev_mc_sync(struct net_device *dev,
+				int (*sync)(struct net_device *,
+					    const unsigned char *),
+				int (*unsync)(struct net_device *,
+					      const unsigned char *))
+{
+	return __hw_addr_sync_dev(&dev->mc, dev, sync, unsync);
+}
+
+/**
+ *  __dev_mc_unsync - Remove synchonized addresses from device
+ *  @dev:  device to sync
+ *  @unsync: function to call if address should be removed
+ *
+ *  Remove all addresses that were added to the device by dev_mc_sync().
+ **/
+static inline void __dev_mc_unsync(struct net_device *dev,
+				   int (*unsync)(struct net_device *,
+						 const unsigned char *))
+{
+	__hw_addr_unsync_dev(&dev->mc, dev, unsync);
+}
+
 /* Functions used for secondary unicast and multicast support */
 void dev_set_rx_mode(struct net_device *dev);
 void __dev_set_rx_mode(struct net_device *dev);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 329d5794e7dc..b6b230600b97 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -225,6 +225,91 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
 }
 EXPORT_SYMBOL(__hw_addr_unsync);
 
+/**
+ *  __hw_addr_sync_dev - Synchonize device's multicast list
+ *  @list: address list to syncronize
+ *  @dev:  device to sync
+ *  @sync: function to call if address should be added
+ *  @unsync: function to call if address should be removed
+ *
+ *  This funciton is intended to be called from the ndo_set_rx_mode
+ *  function of devices that require explicit address add/remove
+ *  notifications.  The unsync function may be NULL in which case
+ *  the addresses requiring removal will simply be removed without
+ *  any notification to the device.
+ **/
+int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
+		       struct net_device *dev,
+		       int (*sync)(struct net_device *, const unsigned char *),
+		       int (*unsync)(struct net_device *,
+				     const unsigned char *))
+{
+	struct netdev_hw_addr *ha, *tmp;
+	int err;
+
+	/* first go through and flush out any stale entries */
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		if (!ha->sync_cnt || ha->refcount != 1)
+			continue;
+
+		/* if unsync is defined and fails defer unsyncing address */
+		if (unsync && unsync(dev, ha->addr))
+			continue;
+
+		ha->sync_cnt--;
+		__hw_addr_del_entry(list, ha, false, false);
+	}
+
+	/* go through and sync new entries to the list */
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		if (ha->sync_cnt)
+			continue;
+
+		err = sync(dev, ha->addr);
+		if (err)
+			return err;
+
+		ha->sync_cnt++;
+		ha->refcount++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(__hw_addr_sync_dev);
+
+/**
+ *  __hw_addr_unsync_dev - Remove synchonized addresses from device
+ *  @list: address list to remove syncronized addresses from
+ *  @dev:  device to sync
+ *  @unsync: function to call if address should be removed
+ *
+ *  Remove all addresses that were added to the device by __hw_addr_sync_dev().
+ *  This function is intended to be called from the ndo_stop or ndo_open
+ *  functions on devices that require explicit address add/remove
+ *  notifications.  If the unsync function pointer is NULL then this function
+ *  can be used to just reset the sync_cnt for the addresses in the list.
+ **/
+void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
+			  struct net_device *dev,
+			  int (*unsync)(struct net_device *,
+					const unsigned char *))
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		if (!ha->sync_cnt)
+			continue;
+
+		/* if unsync is defined and fails defer unsyncing address */
+		if (unsync && unsync(dev, ha->addr))
+			continue;
+
+		ha->sync_cnt--;
+		__hw_addr_del_entry(list, ha, false, false);
+	}
+}
+EXPORT_SYMBOL(__hw_addr_unsync_dev);
+
 static void __hw_addr_flush(struct netdev_hw_addr_list *list)
 {
 	struct netdev_hw_addr *ha, *tmp;
-- 
cgit v1.2.3-71-gd317


From 73f156a6e8c1074ac6327e0abd1169e95eb66463 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 2 Jun 2014 05:26:03 -0700
Subject: inetpeer: get rid of ip_id_count

Ideally, we would need to generate IP ID using a per destination IP
generator.

linux kernels used inet_peer cache for this purpose, but this had a huge
cost on servers disabling MTU discovery.

1) each inet_peer struct consumes 192 bytes

2) inetpeer cache uses a binary tree of inet_peer structs,
   with a nominal size of ~66000 elements under load.

3) lookups in this tree are hitting a lot of cache lines, as tree depth
   is about 20.

4) If server deals with many tcp flows, we have a high probability of
   not finding the inet_peer, allocating a fresh one, inserting it in
   the tree with same initial ip_id_count, (cf secure_ip_id())

5) We garbage collect inet_peer aggressively.

IP ID generation do not have to be 'perfect'

Goal is trying to avoid duplicates in a short period of time,
so that reassembly units have a chance to complete reassembly of
fragments belonging to one message before receiving other fragments
with a recycled ID.

We simply use an array of generators, and a Jenkin hash using the dst IP
as a key.

ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it
belongs (it is only used from this file)

secure_ip_id() and secure_ipv6_id() no longer are needed.

Rename ip_select_ident_more() to ip_select_ident_segs() to avoid
unnecessary decrement/increment of the number of segments.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pptp.c          |  2 +-
 include/net/inetpeer.h          | 23 +++------------------
 include/net/ip.h                | 40 ++++++++++++++++++++----------------
 include/net/ipv6.h              |  2 --
 include/net/secure_seq.h        |  2 --
 net/core/secure_seq.c           | 25 -----------------------
 net/ipv4/igmp.c                 |  4 ++--
 net/ipv4/inetpeer.c             | 18 -----------------
 net/ipv4/ip_output.c            |  7 +++----
 net/ipv4/ip_tunnel_core.c       |  2 +-
 net/ipv4/ipmr.c                 |  2 +-
 net/ipv4/raw.c                  |  2 +-
 net/ipv4/route.c                | 45 +++++++++++++++--------------------------
 net/ipv4/xfrm4_mode_tunnel.c    |  2 +-
 net/ipv6/ip6_output.c           | 12 +++++++++++
 net/ipv6/output_core.c          | 30 ---------------------------
 net/netfilter/ipvs/ip_vs_xmit.c |  2 +-
 17 files changed, 65 insertions(+), 155 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index 01805319e1e0..1aff970be33e 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -281,7 +281,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	nf_reset(skb);
 
 	skb->ip_summed = CHECKSUM_NONE;
-	ip_select_ident(skb, &rt->dst, NULL);
+	ip_select_ident(skb, NULL);
 	ip_send_check(iph);
 
 	ip_local_out(skb);
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 6efe73c79c52..823ec7bb9c67 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -41,14 +41,13 @@ struct inet_peer {
 		struct rcu_head     gc_rcu;
 	};
 	/*
-	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
-	 * are not available: rid, ip_id_count
+	 * Once inet_peer is queued for deletion (refcnt == -1), following field
+	 * is not available: rid
 	 * We can share memory with rcu_head to help keep inet_peer small.
 	 */
 	union {
 		struct {
 			atomic_t			rid;		/* Frag reception counter */
-			atomic_t			ip_id_count;	/* IP ID for the next packet */
 		};
 		struct rcu_head         rcu;
 		struct inet_peer	*gc_next;
@@ -165,7 +164,7 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
 void inetpeer_invalidate_tree(struct inet_peer_base *);
 
 /*
- * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
+ * temporary check to make sure we dont access rid, tcp_ts,
  * tcp_ts_stamp if no refcount is taken on inet_peer
  */
 static inline void inet_peer_refcheck(const struct inet_peer *p)
@@ -173,20 +172,4 @@ static inline void inet_peer_refcheck(const struct inet_peer *p)
 	WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
 }
 
-
-/* can be called with or without local BH being disabled */
-static inline int inet_getid(struct inet_peer *p, int more)
-{
-	int old, new;
-	more++;
-	inet_peer_refcheck(p);
-	do {
-		old = atomic_read(&p->ip_id_count);
-		new = old + more;
-		if (!new)
-			new = 1;
-	} while (atomic_cmpxchg(&p->ip_id_count, old, new) != old);
-	return new;
-}
-
 #endif /* _NET_INETPEER_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 2e4947895d75..0e795df05ec9 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -309,9 +309,19 @@ static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
 	}
 }
 
-void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
+#define IP_IDENTS_SZ 2048u
+extern atomic_t *ip_idents;
 
-static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk)
+static inline u32 ip_idents_reserve(u32 hash, int segs)
+{
+	atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ;
+
+	return atomic_add_return(segs, id_ptr) - segs;
+}
+
+void __ip_select_ident(struct iphdr *iph, int segs);
+
+static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs)
 {
 	struct iphdr *iph = ip_hdr(skb);
 
@@ -321,24 +331,20 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s
 		 * does not change, they drop every other packet in
 		 * a TCP stream using header compression.
 		 */
-		iph->id = (sk && inet_sk(sk)->inet_daddr) ?
-					htons(inet_sk(sk)->inet_id++) : 0;
-	} else
-		__ip_select_ident(iph, dst, 0);
-}
-
-static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more)
-{
-	struct iphdr *iph = ip_hdr(skb);
-
-	if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
 		if (sk && inet_sk(sk)->inet_daddr) {
 			iph->id = htons(inet_sk(sk)->inet_id);
-			inet_sk(sk)->inet_id += 1 + more;
-		} else
+			inet_sk(sk)->inet_id += segs;
+		} else {
 			iph->id = 0;
-	} else
-		__ip_select_ident(iph, dst, more);
+		}
+	} else {
+		__ip_select_ident(iph, segs);
+	}
+}
+
+static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk)
+{
+	ip_select_ident_segs(skb, sk, 1);
 }
 
 static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ba810d0546bc..574337fe72dd 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -668,8 +668,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
 	return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
 }
 
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt);
-
 int ip6_dst_hoplimit(struct dst_entry *dst);
 
 static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index f257486f17be..3f36d45b714a 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -3,8 +3,6 @@
 
 #include <linux/types.h>
 
-__u32 secure_ip_id(__be32 daddr);
-__u32 secure_ipv6_id(const __be32 daddr[4]);
 u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
 u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 			       __be16 dport);
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 897da56f3aff..ba71212f0251 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -85,31 +85,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 #endif
 
 #ifdef CONFIG_INET
-__u32 secure_ip_id(__be32 daddr)
-{
-	u32 hash[MD5_DIGEST_WORDS];
-
-	net_secret_init();
-	hash[0] = (__force __u32) daddr;
-	hash[1] = net_secret[13];
-	hash[2] = net_secret[14];
-	hash[3] = net_secret[15];
-
-	md5_transform(hash, net_secret);
-
-	return hash[0];
-}
-
-__u32 secure_ipv6_id(const __be32 daddr[4])
-{
-	__u32 hash[4];
-
-	net_secret_init();
-	memcpy(hash, daddr, 16);
-	md5_transform(hash, net_secret);
-
-	return hash[0];
-}
 
 __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 				 __be16 sport, __be16 dport)
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 17d34e3c2ac3..6748d420f714 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	pip->saddr    = fl4.saddr;
 	pip->protocol = IPPROTO_IGMP;
 	pip->tot_len  = 0;	/* filled in later */
-	ip_select_ident(skb, &rt->dst, NULL);
+	ip_select_ident(skb, NULL);
 	((u8 *)&pip[1])[0] = IPOPT_RA;
 	((u8 *)&pip[1])[1] = 4;
 	((u8 *)&pip[1])[2] = 0;
@@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	iph->daddr    = dst;
 	iph->saddr    = fl4.saddr;
 	iph->protocol = IPPROTO_IGMP;
-	ip_select_ident(skb, &rt->dst, NULL);
+	ip_select_ident(skb, NULL);
 	((u8 *)&iph[1])[0] = IPOPT_RA;
 	((u8 *)&iph[1])[1] = 4;
 	((u8 *)&iph[1])[2] = 0;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index c98cf141f4ed..4ced1b9a97f0 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -26,20 +26,7 @@
  *  Theory of operations.
  *  We keep one entry for each peer IP address.  The nodes contains long-living
  *  information about the peer which doesn't depend on routes.
- *  At this moment this information consists only of ID field for the next
- *  outgoing IP packet.  This field is incremented with each packet as encoded
- *  in inet_getid() function (include/net/inetpeer.h).
- *  At the moment of writing this notes identifier of IP packets is generated
- *  to be unpredictable using this code only for packets subjected
- *  (actually or potentially) to defragmentation.  I.e. DF packets less than
- *  PMTU in size when local fragmentation is disabled use a constant ID and do
- *  not use this code (see ip_select_ident() in include/net/ip.h).
  *
- *  Route cache entries hold references to our nodes.
- *  New cache entries get references via lookup by destination IP address in
- *  the avl tree.  The reference is grabbed only when it's needed i.e. only
- *  when we try to output IP packet which needs an unpredictable ID (see
- *  __ip_select_ident() in net/ipv4/route.c).
  *  Nodes are removed only when reference counter goes to 0.
  *  When it's happened the node may be removed when a sufficient amount of
  *  time has been passed since its last use.  The less-recently-used entry can
@@ -62,7 +49,6 @@
  *		refcnt: atomically against modifications on other CPU;
  *		   usually under some other lock to prevent node disappearing
  *		daddr: unchangeable
- *		ip_id_count: atomic value (no lock needed)
  */
 
 static struct kmem_cache *peer_cachep __read_mostly;
@@ -497,10 +483,6 @@ relookup:
 		p->daddr = *daddr;
 		atomic_set(&p->refcnt, 1);
 		atomic_set(&p->rid, 0);
-		atomic_set(&p->ip_id_count,
-				(daddr->family == AF_INET) ?
-					secure_ip_id(daddr->addr.a4) :
-					secure_ipv6_id(daddr->addr.a6));
 		p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
 		p->rate_tokens = 0;
 		/* 60*HZ is arbitrary, but chosen enough high so that the first
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6e231ab58d65..8d3b6b0e9857 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	iph->daddr    = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
 	iph->saddr    = saddr;
 	iph->protocol = sk->sk_protocol;
-	ip_select_ident(skb, &rt->dst, sk);
+	ip_select_ident(skb, sk);
 
 	if (opt && opt->opt.optlen) {
 		iph->ihl += opt->opt.optlen>>2;
@@ -430,8 +430,7 @@ packet_routed:
 		ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
 	}
 
-	ip_select_ident_more(skb, &rt->dst, sk,
-			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+	ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1);
 
 	/* TODO : should we use skb->sk here instead of sk ? */
 	skb->priority = sk->sk_priority;
@@ -1379,7 +1378,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 	iph->ttl = ttl;
 	iph->protocol = sk->sk_protocol;
 	ip_copy_addrs(iph, fl4);
-	ip_select_ident(skb, &rt->dst, sk);
+	ip_select_ident(skb, sk);
 
 	if (opt) {
 		iph->ihl += opt->optlen>>2;
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index bcf206c79005..847e69cbff7e 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -74,7 +74,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 	iph->daddr	=	dst;
 	iph->saddr	=	src;
 	iph->ttl	=	ttl;
-	__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+	__ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
 
 	err = ip_local_out_sk(sk, skb);
 	if (unlikely(net_xmit_eval(err)))
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 2bc9cc47f246..65bcaa789043 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1663,7 +1663,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 	iph->protocol	=	IPPROTO_IPIP;
 	iph->ihl	=	5;
 	iph->tot_len	=	htons(skb->len);
-	ip_select_ident(skb, skb_dst(skb), NULL);
+	ip_select_ident(skb, NULL);
 	ip_send_check(iph);
 
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a9dbe58bdfe7..2c65160565e1 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -389,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 		iph->check   = 0;
 		iph->tot_len = htons(length);
 		if (!iph->id)
-			ip_select_ident(skb, &rt->dst, NULL);
+			ip_select_ident(skb, NULL);
 
 		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4154eb76b0ad..082239ffe34a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -89,6 +89,7 @@
 #include <linux/rcupdate.h>
 #include <linux/times.h>
 #include <linux/slab.h>
+#include <linux/jhash.h>
 #include <net/dst.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
@@ -456,39 +457,19 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 	return neigh_create(&arp_tbl, pkey, dev);
 }
 
-/*
- * Peer allocation may fail only in serious out-of-memory conditions.  However
- * we still can generate some output.
- * Random ID selection looks a bit dangerous because we have no chances to
- * select ID being unique in a reasonable period of time.
- * But broken packet identifier may be better than no packet at all.
- */
-static void ip_select_fb_ident(struct iphdr *iph)
-{
-	static DEFINE_SPINLOCK(ip_fb_id_lock);
-	static u32 ip_fallback_id;
-	u32 salt;
+atomic_t *ip_idents __read_mostly;
+EXPORT_SYMBOL(ip_idents);
 
-	spin_lock_bh(&ip_fb_id_lock);
-	salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
-	iph->id = htons(salt & 0xFFFF);
-	ip_fallback_id = salt;
-	spin_unlock_bh(&ip_fb_id_lock);
-}
-
-void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
+void __ip_select_ident(struct iphdr *iph, int segs)
 {
-	struct net *net = dev_net(dst->dev);
-	struct inet_peer *peer;
+	static u32 ip_idents_hashrnd __read_mostly;
+	u32 hash, id;
 
-	peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
-	if (peer) {
-		iph->id = htons(inet_getid(peer, more));
-		inet_putpeer(peer);
-		return;
-	}
+	net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
 
-	ip_select_fb_ident(iph);
+	hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd);
+	id = ip_idents_reserve(hash, segs);
+	iph->id = htons(id);
 }
 EXPORT_SYMBOL(__ip_select_ident);
 
@@ -2711,6 +2692,12 @@ int __init ip_rt_init(void)
 {
 	int rc = 0;
 
+	ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
+	if (!ip_idents)
+		panic("IP: failed to allocate ip_idents\n");
+
+	prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
 	if (!ip_rt_acct)
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 05f2b484954f..91771a7c802f 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -58,12 +58,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
 		0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
-	ip_select_ident(skb, dst->child, NULL);
 
 	top_iph->ttl = ip4_dst_hoplimit(dst->child);
 
 	top_iph->saddr = x->props.saddr.a4;
 	top_iph->daddr = x->id.daddr.a4;
+	ip_select_ident(skb, NULL);
 
 	return 0;
 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 85aaeca1f7f3..cb9df0eb4023 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -537,6 +537,18 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	skb_copy_secmark(to, from);
 }
 
+static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
+{
+	static u32 ip6_idents_hashrnd __read_mostly;
+	u32 hash, id;
+
+	net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
+
+	hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
+	id = ip_idents_reserve(hash, 1);
+	fhdr->identification = htonl(id);
+}
+
 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
 	struct sk_buff *frag;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 6313abd53c9d..6179ac186ab9 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -8,36 +8,6 @@
 #include <net/addrconf.h>
 #include <net/secure_seq.h>
 
-void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
-{
-	static atomic_t ipv6_fragmentation_id;
-	struct in6_addr addr;
-	int old, new;
-
-#if IS_ENABLED(CONFIG_IPV6)
-	struct inet_peer *peer;
-	struct net *net;
-
-	net = dev_net(rt->dst.dev);
-	peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
-	if (peer) {
-		fhdr->identification = htonl(inet_getid(peer, 0));
-		inet_putpeer(peer);
-		return;
-	}
-#endif
-	do {
-		old = atomic_read(&ipv6_fragmentation_id);
-		new = old + 1;
-		if (!new)
-			new = 1;
-	} while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
-
-	addr = rt->rt6i_dst.addr;
-	addr.s6_addr32[0] ^= (__force __be32)new;
-	fhdr->identification = htonl(secure_ipv6_id(addr.s6_addr32));
-}
-EXPORT_SYMBOL(ipv6_select_ident);
 
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 {
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 487b55e04337..73ba1cc7a88d 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	iph->daddr		=	cp->daddr.ip;
 	iph->saddr		=	saddr;
 	iph->ttl		=	old_iph->ttl;
-	ip_select_ident(skb, &rt->dst, NULL);
+	ip_select_ident(skb, NULL);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->ignore_df = 1;
-- 
cgit v1.2.3-71-gd317


From 1b49dcf3d7c765ad18ca7167a0e441824eb1f7af Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sun, 16 Mar 2014 14:51:35 -0700
Subject: virtio-scsi.h: Add virtio_scsi_cmd_req_pi + VIRTIO_SCSI_F_T10_PI bits

This patch adds a virtio_scsi_cmd_req_pi header as recommened by
Paolo that contains pi_bytesout + pi_bytesin elements used for
signaling when protection information buffers (in bytes) are
expected to preceed the data payload buffers.

Also add new VIRTIO_SCSI_F_T10_PI feature bit to be used to signal
host support.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Sagi Grimberg <sagig@dev.mellanox.co.il>
Cc: H. Peter Anvin <hpa@zytor.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/linux/virtio_scsi.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
index 4195b97a3def..de429d1f4357 100644
--- a/include/linux/virtio_scsi.h
+++ b/include/linux/virtio_scsi.h
@@ -35,11 +35,23 @@ struct virtio_scsi_cmd_req {
 	u8 lun[8];		/* Logical Unit Number */
 	u64 tag;		/* Command identifier */
 	u8 task_attr;		/* Task attribute */
-	u8 prio;
+	u8 prio;		/* SAM command priority field */
 	u8 crn;
 	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
 } __packed;
 
+/* SCSI command request, followed by protection information */
+struct virtio_scsi_cmd_req_pi {
+	u8 lun[8];		/* Logical Unit Number */
+	u64 tag;		/* Command identifier */
+	u8 task_attr;		/* Task attribute */
+	u8 prio;		/* SAM command priority field */
+	u8 crn;
+	u32 pi_bytesout;	/* DataOUT PI Number of bytes */
+	u32 pi_bytesin;		/* DataIN PI Number of bytes */
+	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
+} __packed;
+
 /* Response, followed by sense data and data-in */
 struct virtio_scsi_cmd_resp {
 	u32 sense_len;		/* Sense data length */
@@ -97,6 +109,7 @@ struct virtio_scsi_config {
 #define VIRTIO_SCSI_F_INOUT                    0
 #define VIRTIO_SCSI_F_HOTPLUG                  1
 #define VIRTIO_SCSI_F_CHANGE                   2
+#define VIRTIO_SCSI_F_T10_PI                   3
 
 /* Response codes */
 #define VIRTIO_SCSI_S_OK                       0
-- 
cgit v1.2.3-71-gd317


From 09b93088d75009807b72293f26e2634430ce5ba9 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Sun, 11 May 2014 15:15:11 +0300
Subject: IB: Add a QP creation flag to use GFP_NOIO allocations

This addresses a problem where NFS client writes over IPoIB connected
mode may deadlock on memory allocation/writeback.

The problem is not directly memory reclamation.  There is an indirect
dependency between network filesystems writing back pages and
ipoib_cm_tx_init() due to how a kworker is used.  Page reclaim cannot
make forward progress until ipoib_cm_tx_init() succeeds and it is
stuck in page reclaim itself waiting for network transmission.
Ordinarily this situation may be avoided by having the caller use
GFP_NOFS but ipoib_cm_tx_init() does not have that information.

To address this, take a general approach and add a new QP creation
flag that tells the low-level hardware driver to use GFP_NOIO for the
memory allocations related to the new QP.

Use the new flag in the ipoib connected mode path, and if the driver
doesn't support it, re-issue the QP creation without the flag.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 18 +++++++++++++++---
 include/rdma/ib_verbs.h                 |  1 +
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 1377f85911c2..933efcea0d03 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1030,10 +1030,20 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
 		.cap.max_send_sge	= 1,
 		.sq_sig_type		= IB_SIGNAL_ALL_WR,
 		.qp_type		= IB_QPT_RC,
-		.qp_context		= tx
+		.qp_context		= tx,
+		.create_flags		= IB_QP_CREATE_USE_GFP_NOIO
 	};
 
-	return ib_create_qp(priv->pd, &attr);
+	struct ib_qp *tx_qp;
+
+	tx_qp = ib_create_qp(priv->pd, &attr);
+	if (PTR_ERR(tx_qp) == -EINVAL) {
+		ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n",
+			   priv->ca->name);
+		attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
+		tx_qp = ib_create_qp(priv->pd, &attr);
+	}
+	return tx_qp;
 }
 
 static int ipoib_cm_send_req(struct net_device *dev,
@@ -1104,12 +1114,14 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
 	struct ipoib_dev_priv *priv = netdev_priv(p->dev);
 	int ret;
 
-	p->tx_ring = vzalloc(ipoib_sendq_size * sizeof *p->tx_ring);
+	p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
+			       GFP_NOIO, PAGE_KERNEL);
 	if (!p->tx_ring) {
 		ipoib_warn(priv, "failed to allocate tx ring\n");
 		ret = -ENOMEM;
 		goto err_tx;
 	}
+	memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
 
 	p->qp = ipoib_cm_create_tx_qp(p->dev, p);
 	if (IS_ERR(p->qp)) {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index acd825182977..d75b02f9c80d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -783,6 +783,7 @@ enum ib_qp_create_flags {
 	IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK	= 1 << 1,
 	IB_QP_CREATE_NETIF_QP			= 1 << 5,
 	IB_QP_CREATE_SIGNATURE_EN		= 1 << 6,
+	IB_QP_CREATE_USE_GFP_NOIO		= 1 << 7,
 	/* reserve bits 26-31 for low level drivers' internal use */
 	IB_QP_CREATE_RESERVED_START		= 1 << 26,
 	IB_QP_CREATE_RESERVED_END		= 1 << 31,
-- 
cgit v1.2.3-71-gd317


From 40f2287bd583f4df4c602c1a29a48df2730fb6d4 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Sun, 11 May 2014 15:15:12 +0300
Subject: IB/mlx4: Implement IB_QP_CREATE_USE_GFP_NOIO

Modify the various routines used to allocate memory resources which
serve QPs in mlx4 to get an input GFP directive.  Have the Ethernet
driver to use GFP_KERNEL in it's QP allocations as done prior to this
commit, and the IB driver to use GFP_NOIO when the IB verbs
IB_QP_CREATE_USE_GFP_NOIO QP creation flag is provided.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/cq.c                    |  6 ++---
 drivers/infiniband/hw/mlx4/mlx4_ib.h               |  1 +
 drivers/infiniband/hw/mlx4/qp.c                    | 30 ++++++++++++----------
 drivers/infiniband/hw/mlx4/srq.c                   |  7 ++---
 drivers/net/ethernet/mellanox/mlx4/alloc.c         | 27 +++++++++----------
 drivers/net/ethernet/mellanox/mlx4/cq.c            |  4 +--
 drivers/net/ethernet/mellanox/mlx4/en_rx.c         |  6 ++---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c         |  2 +-
 drivers/net/ethernet/mellanox/mlx4/icm.c           |  7 ++---
 drivers/net/ethernet/mellanox/mlx4/icm.h           |  3 ++-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  4 +--
 drivers/net/ethernet/mellanox/mlx4/mr.c            | 17 ++++++------
 drivers/net/ethernet/mellanox/mlx4/qp.c            | 20 +++++++--------
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  4 +--
 drivers/net/ethernet/mellanox/mlx4/srq.c           |  4 +--
 include/linux/mlx4/device.h                        | 10 +++++---
 16 files changed, 82 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 5f640814cc81..1066eec854a9 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
 	int err;
 
 	err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
-			     PAGE_SIZE * 2, &buf->buf);
+			     PAGE_SIZE * 2, &buf->buf, GFP_KERNEL);
 
 	if (err)
 		goto out;
@@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
 	if (err)
 		goto err_buf;
 
-	err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
+	err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL);
 	if (err)
 		goto err_mtt;
 
@@ -209,7 +209,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
 
 		uar = &to_mucontext(context)->uar;
 	} else {
-		err = mlx4_db_alloc(dev->dev, &cq->db, 1);
+		err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
 		if (err)
 			goto err_cq;
 
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index f589522fddfd..bb8c9dd442ae 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -156,6 +156,7 @@ enum mlx4_ib_qp_flags {
 	MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
 	MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
 	MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
+	MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
 	MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
 	MLX4_IB_SRIOV_SQP = 1 << 31,
 };
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 41308af4163c..8710baf60bb9 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -610,7 +610,8 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
 
 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
+			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
+			    gfp_t gfp)
 {
 	int qpn;
 	int err;
@@ -748,14 +749,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			goto err;
 
 		if (qp_has_rq(init_attr)) {
-			err = mlx4_db_alloc(dev->dev, &qp->db, 0);
+			err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
 			if (err)
 				goto err;
 
 			*qp->db.db = 0;
 		}
 
-		if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
+		if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) {
 			err = -ENOMEM;
 			goto err_db;
 		}
@@ -765,13 +766,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		if (err)
 			goto err_buf;
 
-		err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
+		err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
 		if (err)
 			goto err_mtt;
 
-		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-
+		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
+		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
 		if (!qp->sq.wrid || !qp->rq.wrid) {
 			err = -ENOMEM;
 			goto err_wrid;
@@ -801,7 +801,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			goto err_proxy;
 	}
 
-	err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+	err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
 	if (err)
 		goto err_qpn;
 
@@ -1040,7 +1040,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 	struct mlx4_ib_qp *qp = NULL;
 	int err;
 	u16 xrcdn = 0;
+	gfp_t gfp;
 
+	gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
+		GFP_NOIO : GFP_KERNEL;
 	/*
 	 * We only support LSO, vendor flag1, and multicast loopback blocking,
 	 * and only for kernel UD QPs.
@@ -1049,7 +1052,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 					MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
 					MLX4_IB_SRIOV_TUNNEL_QP |
 					MLX4_IB_SRIOV_SQP |
-					MLX4_IB_QP_NETIF))
+					MLX4_IB_QP_NETIF |
+					MLX4_IB_QP_CREATE_USE_GFP_NOIO))
 		return ERR_PTR(-EINVAL);
 
 	if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
@@ -1059,7 +1063,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 
 	if (init_attr->create_flags &&
 	    (udata ||
-	     ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
+	     ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) &&
 	      init_attr->qp_type != IB_QPT_UD) ||
 	     ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
 	      init_attr->qp_type > IB_QPT_GSI)))
@@ -1079,7 +1083,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 	case IB_QPT_RC:
 	case IB_QPT_UC:
 	case IB_QPT_RAW_PACKET:
-		qp = kzalloc(sizeof *qp, GFP_KERNEL);
+		qp = kzalloc(sizeof *qp, gfp);
 		if (!qp)
 			return ERR_PTR(-ENOMEM);
 		qp->pri.vid = 0xFFFF;
@@ -1088,7 +1092,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 	case IB_QPT_UD:
 	{
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr,
-				       udata, 0, &qp);
+				       udata, 0, &qp, gfp);
 		if (err)
 			return ERR_PTR(err);
 
@@ -1106,7 +1110,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
 				       get_sqp_num(to_mdev(pd->device), init_attr),
-				       &qp);
+				       &qp, gfp);
 		if (err)
 			return ERR_PTR(err);
 
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 60c5fb025fc7..62d9285300af 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -134,13 +134,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 		if (err)
 			goto err_mtt;
 	} else {
-		err = mlx4_db_alloc(dev->dev, &srq->db, 0);
+		err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
 		if (err)
 			goto err_srq;
 
 		*srq->db.db = 0;
 
-		if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+		if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
+				   GFP_KERNEL)) {
 			err = -ENOMEM;
 			goto err_db;
 		}
@@ -165,7 +166,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 		if (err)
 			goto err_buf;
 
-		err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
+		err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
 		if (err)
 			goto err_mtt;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index c3ad464d0627..b0297da50304 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -171,7 +171,7 @@ void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap)
  */
 
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-		   struct mlx4_buf *buf)
+		   struct mlx4_buf *buf, gfp_t gfp)
 {
 	dma_addr_t t;
 
@@ -180,7 +180,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		buf->npages       = 1;
 		buf->page_shift   = get_order(size) + PAGE_SHIFT;
 		buf->direct.buf   = dma_alloc_coherent(&dev->pdev->dev,
-						       size, &t, GFP_KERNEL);
+						       size, &t, gfp);
 		if (!buf->direct.buf)
 			return -ENOMEM;
 
@@ -200,14 +200,14 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		buf->npages      = buf->nbufs;
 		buf->page_shift  = PAGE_SHIFT;
 		buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
-					   GFP_KERNEL);
+					   gfp);
 		if (!buf->page_list)
 			return -ENOMEM;
 
 		for (i = 0; i < buf->nbufs; ++i) {
 			buf->page_list[i].buf =
 				dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
-						   &t, GFP_KERNEL);
+						   &t, gfp);
 			if (!buf->page_list[i].buf)
 				goto err_free;
 
@@ -218,7 +218,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 
 		if (BITS_PER_LONG == 64) {
 			struct page **pages;
-			pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL);
+			pages = kmalloc(sizeof *pages * buf->nbufs, gfp);
 			if (!pages)
 				goto err_free;
 			for (i = 0; i < buf->nbufs; ++i)
@@ -260,11 +260,12 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
 }
 EXPORT_SYMBOL_GPL(mlx4_buf_free);
 
-static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device)
+static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device,
+						 gfp_t gfp)
 {
 	struct mlx4_db_pgdir *pgdir;
 
-	pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL);
+	pgdir = kzalloc(sizeof *pgdir, gfp);
 	if (!pgdir)
 		return NULL;
 
@@ -272,7 +273,7 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device)
 	pgdir->bits[0] = pgdir->order0;
 	pgdir->bits[1] = pgdir->order1;
 	pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE,
-					    &pgdir->db_dma, GFP_KERNEL);
+					    &pgdir->db_dma, gfp);
 	if (!pgdir->db_page) {
 		kfree(pgdir);
 		return NULL;
@@ -312,7 +313,7 @@ found:
 	return 0;
 }
 
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order)
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_db_pgdir *pgdir;
@@ -324,7 +325,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order)
 		if (!mlx4_alloc_db_from_pgdir(pgdir, db, order))
 			goto out;
 
-	pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev));
+	pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp);
 	if (!pgdir) {
 		ret = -ENOMEM;
 		goto out;
@@ -376,13 +377,13 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
 {
 	int err;
 
-	err = mlx4_db_alloc(dev, &wqres->db, 1);
+	err = mlx4_db_alloc(dev, &wqres->db, 1, GFP_KERNEL);
 	if (err)
 		return err;
 
 	*wqres->db.db = 0;
 
-	err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf);
+	err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL);
 	if (err)
 		goto err_db;
 
@@ -391,7 +392,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
 	if (err)
 		goto err_buf;
 
-	err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf);
+	err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf, GFP_KERNEL);
 	if (err)
 		goto err_mtt;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 0487121e4a0f..c90cde5b4aee 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -173,11 +173,11 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
 	if (*cqn == -1)
 		return -ENOMEM;
 
-	err = mlx4_table_get(dev, &cq_table->table, *cqn);
+	err = mlx4_table_get(dev, &cq_table->table, *cqn, GFP_KERNEL);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn);
+	err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn, GFP_KERNEL);
 	if (err)
 		goto err_put;
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index ba049ae88749..87857a6463eb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -972,7 +972,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
 	if (!context)
 		return -ENOMEM;
 
-	err = mlx4_qp_alloc(mdev->dev, qpn, qp);
+	err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed to allocate qp #%x\n", qpn);
 		goto out;
@@ -1012,7 +1012,7 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
 		en_err(priv, "Failed reserving drop qpn\n");
 		return err;
 	}
-	err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp);
+	err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed allocating drop qp\n");
 		mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
@@ -1071,7 +1071,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
 	}
 
 	/* Configure RSS indirection qp */
-	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp);
+	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed to allocate RSS indirection QP\n");
 		goto rss_err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index dd1f6d346459..bc0cc1eb214d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -113,7 +113,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 	       ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map);
 
 	ring->qpn = qpn;
-	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp);
+	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed allocating qp %d\n", ring->qpn);
 		goto err_map;
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 5fbf4924c272..eb1747e1937d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -245,7 +245,8 @@ int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
 			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
+		   int gfp)
 {
 	u32 i = (obj & (table->num_obj - 1)) /
 			(MLX4_TABLE_CHUNK_SIZE / table->obj_size);
@@ -259,7 +260,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
 	}
 
 	table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
-				       (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
+				       (table->lowmem ? gfp : GFP_HIGHUSER) |
 				       __GFP_NOWARN, table->coherent);
 	if (!table->icm[i]) {
 		ret = -ENOMEM;
@@ -356,7 +357,7 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 	u32 i;
 
 	for (i = start; i <= end; i += inc) {
-		err = mlx4_table_get(dev, table, i);
+		err = mlx4_table_get(dev, table, i, GFP_KERNEL);
 		if (err)
 			goto fail;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h
index dee67fa39107..067e6e0af36c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.h
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.h
@@ -71,7 +71,8 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 				gfp_t gfp_mask, int coherent);
 void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent);
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
+		   int gfp);
 void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
 int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 			 u32 start, u32 end);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index f9c465101963..627a54ef2955 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -888,7 +888,7 @@ void mlx4_cleanup_cq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
 void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
-int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn);
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp);
 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn);
 int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn);
 void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn);
@@ -896,7 +896,7 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn);
 void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn);
 int __mlx4_mpt_reserve(struct mlx4_dev *dev);
 void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index);
-int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index);
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp);
 void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index);
 u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order);
 void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 24835853b753..4c71dafad217 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -364,14 +364,14 @@ static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
 	__mlx4_mpt_release(dev, index);
 }
 
-int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
 {
 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
 
-	return mlx4_table_get(dev, &mr_table->dmpt_table, index);
+	return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp);
 }
 
-static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
+static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
 {
 	u64 param = 0;
 
@@ -382,7 +382,7 @@ static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
 							MLX4_CMD_TIME_CLASS_A,
 							MLX4_CMD_WRAPPED);
 	}
-	return __mlx4_mpt_alloc_icm(dev, index);
+	return __mlx4_mpt_alloc_icm(dev, index, gfp);
 }
 
 void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
@@ -469,7 +469,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 	struct mlx4_mpt_entry *mpt_entry;
 	int err;
 
-	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key));
+	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL);
 	if (err)
 		return err;
 
@@ -627,13 +627,14 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 EXPORT_SYMBOL_GPL(mlx4_write_mtt);
 
 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-		       struct mlx4_buf *buf)
+		       struct mlx4_buf *buf, gfp_t gfp)
 {
 	u64 *page_list;
 	int err;
 	int i;
 
-	page_list = kmalloc(buf->npages * sizeof *page_list, GFP_KERNEL);
+	page_list = kmalloc(buf->npages * sizeof *page_list,
+			    gfp);
 	if (!page_list)
 		return -ENOMEM;
 
@@ -680,7 +681,7 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw)
 	struct mlx4_mpt_entry *mpt_entry;
 	int err;
 
-	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key));
+	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 61d64ebffd56..917f0d0ba7c6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -272,29 +272,29 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
 
-int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_qp_table *qp_table = &priv->qp_table;
 	int err;
 
-	err = mlx4_table_get(dev, &qp_table->qp_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->qp_table, qpn, gfp);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &qp_table->auxc_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->auxc_table, qpn, gfp);
 	if (err)
 		goto err_put_qp;
 
-	err = mlx4_table_get(dev, &qp_table->altc_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->altc_table, qpn, gfp);
 	if (err)
 		goto err_put_auxc;
 
-	err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn, gfp);
 	if (err)
 		goto err_put_altc;
 
-	err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn, gfp);
 	if (err)
 		goto err_put_rdmarc;
 
@@ -316,7 +316,7 @@ err_out:
 	return err;
 }
 
-static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
+static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, int gfp)
 {
 	u64 param = 0;
 
@@ -326,7 +326,7 @@ static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
 				    MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A,
 				    MLX4_CMD_WRAPPED);
 	}
-	return __mlx4_qp_alloc_icm(dev, qpn);
+	return __mlx4_qp_alloc_icm(dev, qpn, gfp);
 }
 
 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
@@ -355,7 +355,7 @@ static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
 		__mlx4_qp_free_icm(dev, qpn);
 }
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_qp_table *qp_table = &priv->qp_table;
@@ -366,7 +366,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
 
 	qp->qpn = qpn;
 
-	err = mlx4_qp_alloc_icm(dev, qpn);
+	err = mlx4_qp_alloc_icm(dev, qpn, gfp);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 1c3fdd4a1f7d..45da913b5679 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1532,7 +1532,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 			return err;
 
 		if (!fw_reserved(dev, qpn)) {
-			err = __mlx4_qp_alloc_icm(dev, qpn);
+			err = __mlx4_qp_alloc_icm(dev, qpn, GFP_KERNEL);
 			if (err) {
 				res_abort_move(dev, slave, RES_QP, qpn);
 				return err;
@@ -1619,7 +1619,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 		if (err)
 			return err;
 
-		err = __mlx4_mpt_alloc_icm(dev, mpt->key);
+		err = __mlx4_mpt_alloc_icm(dev, mpt->key, GFP_KERNEL);
 		if (err) {
 			res_abort_move(dev, slave, RES_MPT, id);
 			return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index 98faf870b0b0..67146624eb58 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -103,11 +103,11 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn)
 	if (*srqn == -1)
 		return -ENOMEM;
 
-	err = mlx4_table_get(dev, &srq_table->table, *srqn);
+	err = mlx4_table_get(dev, &srq_table->table, *srqn, GFP_KERNEL);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn);
+	err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn, GFP_KERNEL);
 	if (err)
 		goto err_put;
 	return 0;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ba87bd21295a..be60b002bb37 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -837,7 +837,7 @@ static inline int mlx4_is_slave(struct mlx4_dev *dev)
 }
 
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-		   struct mlx4_buf *buf);
+		   struct mlx4_buf *buf, gfp_t gfp);
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
 static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
 {
@@ -874,9 +874,10 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw);
 int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		   int start_index, int npages, u64 *page_list);
 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-		       struct mlx4_buf *buf);
+		       struct mlx4_buf *buf, gfp_t gfp);
 
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order);
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order,
+		  gfp_t gfp);
 void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db);
 
 int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
@@ -892,7 +893,8 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
 int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);
 void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp,
+		  gfp_t gfp);
 void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
 
 int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn,
-- 
cgit v1.2.3-71-gd317


From d0c94692e0a360828745a469bcf90b5c4f9273d0 Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de@gmail.com>
Date: Thu, 27 Mar 2014 19:59:39 +0100
Subject: drm/edid: Parse and handle HDMI deep color modes.

Check the HDMI cea block for deep color mode bits. If available,
assign the highest supported bpc for a hdmi display, corresponding
to the given deep color modes.

Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/drm_edid.c | 110 ++++++++++++++++++++++++++++++++++++++++++++-
 include/drm/drm_edid.h     |   5 +++
 2 files changed, 113 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index d74239fec291..3989c185579f 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -3422,17 +3422,117 @@ bool drm_rgb_quant_range_selectable(struct edid *edid)
 }
 EXPORT_SYMBOL(drm_rgb_quant_range_selectable);
 
+/**
+ * drm_assign_hdmi_deep_color_info - detect whether monitor supports
+ * hdmi deep color modes and update drm_display_info if so.
+ *
+ * @edid: monitor EDID information
+ * @info: Updated with maximum supported deep color bpc and color format
+ *        if deep color supported.
+ *
+ * Parse the CEA extension according to CEA-861-B.
+ * Return true if HDMI deep color supported, false if not or unknown.
+ */
+static bool drm_assign_hdmi_deep_color_info(struct edid *edid,
+                                            struct drm_display_info *info,
+                                            struct drm_connector *connector)
+{
+	u8 *edid_ext, *hdmi;
+	int i;
+	int start_offset, end_offset;
+	unsigned int dc_bpc = 0;
+
+	edid_ext = drm_find_cea_extension(edid);
+	if (!edid_ext)
+		return false;
+
+	if (cea_db_offsets(edid_ext, &start_offset, &end_offset))
+		return false;
+
+	/*
+	 * Because HDMI identifier is in Vendor Specific Block,
+	 * search it from all data blocks of CEA extension.
+	 */
+	for_each_cea_db(edid_ext, i, start_offset, end_offset) {
+		if (cea_db_is_hdmi_vsdb(&edid_ext[i])) {
+			/* HDMI supports at least 8 bpc */
+			info->bpc = 8;
+
+			hdmi = &edid_ext[i];
+			if (cea_db_payload_len(hdmi) < 6)
+				return false;
+
+			if (hdmi[6] & DRM_EDID_HDMI_DC_30) {
+				dc_bpc = 10;
+				DRM_DEBUG("%s: HDMI sink does deep color 30.\n",
+						  drm_get_connector_name(connector));
+			}
+
+			if (hdmi[6] & DRM_EDID_HDMI_DC_36) {
+				dc_bpc = 12;
+				DRM_DEBUG("%s: HDMI sink does deep color 36.\n",
+						  drm_get_connector_name(connector));
+			}
+
+			if (hdmi[6] & DRM_EDID_HDMI_DC_48) {
+				dc_bpc = 16;
+				DRM_DEBUG("%s: HDMI sink does deep color 48.\n",
+						  drm_get_connector_name(connector));
+			}
+
+			if (dc_bpc > 0) {
+				DRM_DEBUG("%s: Assigning HDMI sink color depth as %d bpc.\n",
+						  drm_get_connector_name(connector), dc_bpc);
+				info->bpc = dc_bpc;
+
+				/*
+				 * Deep color support mandates RGB444 support for all video
+				 * modes and forbids YCRCB422 support for all video modes per
+				 * HDMI 1.3 spec.
+				 */
+				info->color_formats = DRM_COLOR_FORMAT_RGB444;
+
+				/* YCRCB444 is optional according to spec. */
+				if (hdmi[6] & DRM_EDID_HDMI_DC_Y444) {
+					info->color_formats |= DRM_COLOR_FORMAT_YCRCB444;
+					DRM_DEBUG("%s: HDMI sink does YCRCB444 in deep color.\n",
+							  drm_get_connector_name(connector));
+				}
+
+				/*
+				 * Spec says that if any deep color mode is supported at all,
+				 * then deep color 36 bit must be supported.
+				 */
+				if (!(hdmi[6] & DRM_EDID_HDMI_DC_36)) {
+					DRM_DEBUG("%s: HDMI sink should do DC_36, but does not!\n",
+							  drm_get_connector_name(connector));
+				}
+
+				return true;
+			}
+			else {
+				DRM_DEBUG("%s: No deep color support on this HDMI sink.\n",
+						  drm_get_connector_name(connector));
+			}
+		}
+	}
+
+	return false;
+}
+
 /**
  * drm_add_display_info - pull display info out if present
  * @edid: EDID data
  * @info: display info (attached to connector)
+ * @connector: connector whose edid is used to build display info
  *
  * Grab any available display info and stuff it into the drm_display_info
  * structure that's part of the connector.  Useful for tracking bpp and
  * color spaces.
  */
 static void drm_add_display_info(struct edid *edid,
-				 struct drm_display_info *info)
+                                 struct drm_display_info *info,
+                                 struct drm_connector *connector)
 {
 	u8 *edid_ext;
 
@@ -3462,6 +3562,9 @@ static void drm_add_display_info(struct edid *edid,
 			info->color_formats |= DRM_COLOR_FORMAT_YCRCB422;
 	}
 
+	/* HDMI deep color modes supported? Assign to info, if so */
+	drm_assign_hdmi_deep_color_info(edid, info, connector);
+
 	/* Only defined for 1.4 with digital displays */
 	if (edid->revision < 4)
 		return;
@@ -3491,6 +3594,9 @@ static void drm_add_display_info(struct edid *edid,
 		break;
 	}
 
+	DRM_DEBUG("%s: Assigning EDID-1.4 digital sink color depth as %d bpc.\n",
+			  drm_get_connector_name(connector), info->bpc);
+
 	info->color_formats |= DRM_COLOR_FORMAT_RGB444;
 	if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB444)
 		info->color_formats |= DRM_COLOR_FORMAT_YCRCB444;
@@ -3549,7 +3655,7 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 	if (quirks & (EDID_QUIRK_PREFER_LARGE_60 | EDID_QUIRK_PREFER_LARGE_75))
 		edid_fixup_preferred(connector, quirks);
 
-	drm_add_display_info(edid, &connector->display_info);
+	drm_add_display_info(edid, &connector->display_info, connector);
 
 	if (quirks & EDID_QUIRK_FORCE_8BPC)
 		connector->display_info.bpc = 8;
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index a1441c5ac63d..b96031d947a0 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -202,6 +202,11 @@ struct detailed_timing {
 #define DRM_EDID_FEATURE_PM_SUSPEND       (1 << 6)
 #define DRM_EDID_FEATURE_PM_STANDBY       (1 << 7)
 
+#define DRM_EDID_HDMI_DC_48               (1 << 6)
+#define DRM_EDID_HDMI_DC_36               (1 << 5)
+#define DRM_EDID_HDMI_DC_30               (1 << 4)
+#define DRM_EDID_HDMI_DC_Y444             (1 << 3)
+
 struct edid {
 	u8 header[8];
 	/* Vendor & product info */
-- 
cgit v1.2.3-71-gd317


From 688cea83f4396fa98b77a126ed278b89daccccdc Mon Sep 17 00:00:00 2001
From: dingtianhong <dingtianhong@huawei.com>
Date: Fri, 30 May 2014 16:00:56 +0800
Subject: macvlan: add netpoll support

Add netpoll support to macvlan devices. Based on the netpoll support in the 802.1q vlan code.

Tested and macvlan could work well with netconsole.

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      | 66 +++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/if_macvlan.h |  3 +++
 2 files changed, 68 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d2dbcfc68ee4..eee9106d1da1 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -33,6 +33,7 @@
 #include <linux/workqueue.h>
 #include <net/rtnetlink.h>
 #include <net/xfrm.h>
+#include <linux/netpoll.h>
 
 #define MACVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
 
@@ -357,12 +358,26 @@ xmit_world:
 	return dev_queue_xmit(skb);
 }
 
+static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (vlan->netpoll)
+		netpoll_send_skb(vlan->netpoll, skb);
+#else
+	BUG();
+#endif
+	return NETDEV_TX_OK;
+}
+
 static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	unsigned int len = skb->len;
 	int ret;
-	const struct macvlan_dev *vlan = netdev_priv(dev);
+	struct macvlan_dev *vlan = netdev_priv(dev);
+
+	if (unlikely(netpoll_tx_running(dev)))
+		return macvlan_netpoll_send_skb(vlan, skb);
 
 	if (vlan->fwd_priv) {
 		skb->dev = vlan->lowerdev;
@@ -788,6 +803,50 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev,
 	return features;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void macvlan_dev_poll_controller(struct net_device *dev)
+{
+	return;
+}
+
+static int macvlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct net_device *real_dev = vlan->lowerdev;
+	struct netpoll *netpoll;
+	int err = 0;
+
+	netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL);
+	err = -ENOMEM;
+	if (!netpoll)
+		goto out;
+
+	err = __netpoll_setup(netpoll, real_dev);
+	if (err) {
+		kfree(netpoll);
+		goto out;
+	}
+
+	vlan->netpoll = netpoll;
+
+out:
+	return err;
+}
+
+static void macvlan_dev_netpoll_cleanup(struct net_device *dev)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct netpoll *netpoll = vlan->netpoll;
+
+	if (!netpoll)
+		return;
+
+	vlan->netpoll = NULL;
+
+	__netpoll_free_async(netpoll);
+}
+#endif	/* CONFIG_NET_POLL_CONTROLLER */
+
 static const struct ethtool_ops macvlan_ethtool_ops = {
 	.get_link		= ethtool_op_get_link,
 	.get_settings		= macvlan_ethtool_get_settings,
@@ -813,6 +872,11 @@ static const struct net_device_ops macvlan_netdev_ops = {
 	.ndo_fdb_del		= macvlan_fdb_del,
 	.ndo_fdb_dump		= ndo_dflt_fdb_dump,
 	.ndo_get_lock_subclass  = macvlan_get_nest_level,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= macvlan_dev_poll_controller,
+	.ndo_netpoll_setup	= macvlan_dev_netpoll_setup,
+	.ndo_netpoll_cleanup	= macvlan_dev_netpoll_cleanup,
+#endif
 };
 
 void macvlan_common_setup(struct net_device *dev)
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index a9a53b12397b..6b2c7cf352a5 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -57,6 +57,9 @@ struct macvlan_dev {
 	netdev_features_t	tap_features;
 	int			minor;
 	int			nest_level;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	struct netpoll		*netpoll;
+#endif
 };
 
 static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
-- 
cgit v1.2.3-71-gd317


From 41c389d72cf0756957450c25c1dbc7d026324df8 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Tue, 27 May 2014 22:39:37 -0700
Subject: bridge: Add bridge ifindex to bridge fdb notify msgs

(This patch was previously posted as RFC at
http://patchwork.ozlabs.org/patch/352677/)

This patch adds NDA_MASTER attribute to neighbour attributes enum for
bridge/master ifindex. And adds NDA_MASTER to bridge fdb notify msgs.

Today bridge fdb notifications dont contain bridge information.
Userspace can derive it from the port information in the fdb
notification. However this is tricky in some scenarious.

Example, bridge port delete notification comes before bridge fdb
delete notifications. And we have seen problems in userspace
when using libnl where, the bridge fdb delete notification handling code
does not understand which bridge this fdb entry is part of because
the bridge and port association has already been deleted.
And these notifications (port membership and fdb) are generated on
separate rtnl groups.

Fixing the order of notifications could possibly solve the problem
for some cases (I can submit a separate patch for that).

This patch chooses to add NDA_MASTER to bridge fdb notify msgs
because it not only solves the problem described above, but also helps
userspace avoid another lookup into link msgs to derive the master index.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/neighbour.h | 1 +
 net/bridge/br_fdb.c            | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index d3ef583104e0..4a1d7e96dfe3 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -24,6 +24,7 @@ enum {
 	NDA_PORT,
 	NDA_VNI,
 	NDA_IFINDEX,
+	NDA_MASTER,
 	__NDA_MAX
 };
 
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 648d0e849595..2c45c069ea1a 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -616,6 +616,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
 
 	if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr))
 		goto nla_put_failure;
+	if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex))
+		goto nla_put_failure;
 	ci.ndm_used	 = jiffies_to_clock_t(now - fdb->used);
 	ci.ndm_confirmed = 0;
 	ci.ndm_updated	 = jiffies_to_clock_t(now - fdb->updated);
@@ -637,6 +639,7 @@ static inline size_t fdb_nlmsg_size(void)
 {
 	return NLMSG_ALIGN(sizeof(struct ndmsg))
 		+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+		+ nla_total_size(sizeof(u32)) /* NDA_MASTER */
 		+ nla_total_size(sizeof(u16)) /* NDA_VLAN */
 		+ nla_total_size(sizeof(struct nda_cacheinfo));
 }
-- 
cgit v1.2.3-71-gd317


From fe62d001372388abb15a324148c913f9b43722a8 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Thu, 15 May 2014 01:25:27 +0100
Subject: ethtool: Replace ethtool_ops::{get,set}_rxfh_indir() with
 {get,set}_rxfh()

ETHTOOL_{G,S}RXFHINDIR and ETHTOOL_{G,S}RSSH should work for drivers
regardless of whether they expose the hash key, unless you try to
set a hash key for a driver that doesn't expose it.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 15 ++++++++-------
 drivers/net/ethernet/broadcom/tg3.c                 |  8 ++++----
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c     |  8 ++++----
 drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c  | 15 +++++++++------
 drivers/net/ethernet/intel/igb/igb_ethtool.c        |  9 +++++----
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c     | 10 +++++-----
 drivers/net/ethernet/sfc/ethtool.c                  | 10 +++++-----
 drivers/net/vmxnet3/vmxnet3_ethtool.c               |  8 ++++----
 include/linux/ethtool.h                             |  6 ------
 net/core/ethtool.c                                  |  8 ++++----
 10 files changed, 48 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 03224090ecf9..af138f8aa361 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -3316,7 +3316,7 @@ static u32 bnx2x_get_rxfh_indir_size(struct net_device *dev)
 	return T_ETH_INDIRECTION_TABLE_SIZE;
 }
 
-static int bnx2x_get_rxfh_indir(struct net_device *dev, u32 *indir)
+static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
@@ -3340,14 +3340,15 @@ static int bnx2x_get_rxfh_indir(struct net_device *dev, u32 *indir)
 	return 0;
 }
 
-static int bnx2x_set_rxfh_indir(struct net_device *dev, const u32 *indir)
+static int bnx2x_set_rxfh(struct net_device *dev, const u32 *indir,
+			  const u8 *key)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	size_t i;
 
 	for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++) {
 		/*
-		 * The same as in bnx2x_get_rxfh_indir: we can't use a memcpy()
+		 * The same as in bnx2x_get_rxfh: we can't use a memcpy()
 		 * as an internal storage of an indirection table is a u8 array
 		 * while indir->ring_index points to an array of u32.
 		 *
@@ -3471,8 +3472,8 @@ static const struct ethtool_ops bnx2x_ethtool_ops = {
 	.get_rxnfc		= bnx2x_get_rxnfc,
 	.set_rxnfc		= bnx2x_set_rxnfc,
 	.get_rxfh_indir_size	= bnx2x_get_rxfh_indir_size,
-	.get_rxfh_indir		= bnx2x_get_rxfh_indir,
-	.set_rxfh_indir		= bnx2x_set_rxfh_indir,
+	.get_rxfh		= bnx2x_get_rxfh,
+	.set_rxfh		= bnx2x_set_rxfh,
 	.get_channels		= bnx2x_get_channels,
 	.set_channels		= bnx2x_set_channels,
 	.get_module_info	= bnx2x_get_module_info,
@@ -3498,8 +3499,8 @@ static const struct ethtool_ops bnx2x_vf_ethtool_ops = {
 	.get_rxnfc		= bnx2x_get_rxnfc,
 	.set_rxnfc		= bnx2x_set_rxnfc,
 	.get_rxfh_indir_size	= bnx2x_get_rxfh_indir_size,
-	.get_rxfh_indir		= bnx2x_get_rxfh_indir,
-	.set_rxfh_indir		= bnx2x_set_rxfh_indir,
+	.get_rxfh		= bnx2x_get_rxfh,
+	.set_rxfh		= bnx2x_set_rxfh,
 	.get_channels		= bnx2x_get_channels,
 	.set_channels		= bnx2x_set_channels,
 };
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index ccd90156aebc..8c2314ed260c 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12501,7 +12501,7 @@ static u32 tg3_get_rxfh_indir_size(struct net_device *dev)
 	return size;
 }
 
-static int tg3_get_rxfh_indir(struct net_device *dev, u32 *indir)
+static int tg3_get_rxfh(struct net_device *dev, u32 *indir, u8 *key)
 {
 	struct tg3 *tp = netdev_priv(dev);
 	int i;
@@ -12512,7 +12512,7 @@ static int tg3_get_rxfh_indir(struct net_device *dev, u32 *indir)
 	return 0;
 }
 
-static int tg3_set_rxfh_indir(struct net_device *dev, const u32 *indir)
+static int tg3_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key)
 {
 	struct tg3 *tp = netdev_priv(dev);
 	size_t i;
@@ -14044,8 +14044,8 @@ static const struct ethtool_ops tg3_ethtool_ops = {
 	.get_sset_count		= tg3_get_sset_count,
 	.get_rxnfc		= tg3_get_rxnfc,
 	.get_rxfh_indir_size    = tg3_get_rxfh_indir_size,
-	.get_rxfh_indir		= tg3_get_rxfh_indir,
-	.set_rxfh_indir		= tg3_set_rxfh_indir,
+	.get_rxfh		= tg3_get_rxfh,
+	.set_rxfh		= tg3_set_rxfh,
 	.get_channels		= tg3_get_channels,
 	.set_channels		= tg3_set_channels,
 	.get_ts_info		= tg3_get_ts_info,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 266a5bc6aedf..8cf6be93f491 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2739,7 +2739,7 @@ static u32 get_rss_table_size(struct net_device *dev)
 	return pi->rss_size;
 }
 
-static int get_rss_table(struct net_device *dev, u32 *p)
+static int get_rss_table(struct net_device *dev, u32 *p, u8 *key)
 {
 	const struct port_info *pi = netdev_priv(dev);
 	unsigned int n = pi->rss_size;
@@ -2749,7 +2749,7 @@ static int get_rss_table(struct net_device *dev, u32 *p)
 	return 0;
 }
 
-static int set_rss_table(struct net_device *dev, const u32 *p)
+static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key)
 {
 	unsigned int i;
 	struct port_info *pi = netdev_priv(dev);
@@ -2851,8 +2851,8 @@ static const struct ethtool_ops cxgb_ethtool_ops = {
 	.set_wol           = set_wol,
 	.get_rxnfc         = get_rxnfc,
 	.get_rxfh_indir_size = get_rss_table_size,
-	.get_rxfh_indir    = get_rss_table,
-	.set_rxfh_indir    = set_rss_table,
+	.get_rxfh	   = get_rss_table,
+	.set_rxfh	   = set_rss_table,
 	.flash_device      = set_flash,
 };
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index 77e786d2d0e0..dbc8986c2dae 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -626,13 +626,14 @@ static u32 i40evf_get_rxfh_indir_size(struct net_device *netdev)
 }
 
 /**
- * i40evf_get_rxfh_indir - get the rx flow hash indirection table
+ * i40evf_get_rxfh - get the rx flow hash indirection table
  * @netdev: network interface device structure
  * @indir: indirection table
+ * @key: hash key (will be %NULL until get_rxfh_key_size is implemented)
  *
  * Reads the indirection table directly from the hardware. Always returns 0.
  **/
-static int i40evf_get_rxfh_indir(struct net_device *netdev, u32 *indir)
+static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 	struct i40e_hw *hw = &adapter->hw;
@@ -650,14 +651,16 @@ static int i40evf_get_rxfh_indir(struct net_device *netdev, u32 *indir)
 }
 
 /**
- * i40evf_set_rxfh_indir - set the rx flow hash indirection table
+ * i40evf_set_rxfh - set the rx flow hash indirection table
  * @netdev: network interface device structure
  * @indir: indirection table
+ * @key: hash key (will be %NULL until get_rxfh_key_size is implemented)
  *
  * Returns -EINVAL if the table specifies an inavlid queue id, otherwise
  * returns 0 after programming the table.
  **/
-static int i40evf_set_rxfh_indir(struct net_device *netdev, const u32 *indir)
+static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir,
+			   const u8 *key)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 	struct i40e_hw *hw = &adapter->hw;
@@ -691,8 +694,8 @@ static struct ethtool_ops i40evf_ethtool_ops = {
 	.get_rxnfc		= i40evf_get_rxnfc,
 	.set_rxnfc		= i40evf_set_rxnfc,
 	.get_rxfh_indir_size	= i40evf_get_rxfh_indir_size,
-	.get_rxfh_indir		= i40evf_get_rxfh_indir,
-	.set_rxfh_indir		= i40evf_set_rxfh_indir,
+	.get_rxfh		= i40evf_get_rxfh,
+	.set_rxfh		= i40evf_set_rxfh,
 	.get_channels		= i40evf_get_channels,
 };
 
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index a84297c85fb1..d8bbcf1873ca 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -2830,7 +2830,7 @@ static u32 igb_get_rxfh_indir_size(struct net_device *netdev)
 	return IGB_RETA_SIZE;
 }
 
-static int igb_get_rxfh_indir(struct net_device *netdev, u32 *indir)
+static int igb_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	int i;
@@ -2876,7 +2876,8 @@ void igb_write_rss_indir_tbl(struct igb_adapter *adapter)
 	}
 }
 
-static int igb_set_rxfh_indir(struct net_device *netdev, const u32 *indir)
+static int igb_set_rxfh(struct net_device *netdev, const u32 *indir,
+			const u8 *key)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -3025,8 +3026,8 @@ static const struct ethtool_ops igb_ethtool_ops = {
 	.get_module_info	= igb_get_module_info,
 	.get_module_eeprom	= igb_get_module_eeprom,
 	.get_rxfh_indir_size	= igb_get_rxfh_indir_size,
-	.get_rxfh_indir		= igb_get_rxfh_indir,
-	.set_rxfh_indir		= igb_set_rxfh_indir,
+	.get_rxfh		= igb_get_rxfh,
+	.set_rxfh		= igb_set_rxfh,
 	.get_channels		= igb_get_channels,
 	.set_channels		= igb_set_channels,
 	.begin			= igb_ethtool_begin,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index a72d99fd7a2d..263a1c7a3370 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -564,7 +564,7 @@ static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev)
 	return priv->rx_ring_num;
 }
 
-static int mlx4_en_get_rxfh_indir(struct net_device *dev, u32 *ring_index)
+static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
@@ -582,8 +582,8 @@ static int mlx4_en_get_rxfh_indir(struct net_device *dev, u32 *ring_index)
 	return err;
 }
 
-static int mlx4_en_set_rxfh_indir(struct net_device *dev,
-		const u32 *ring_index)
+static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index,
+			    const u8 *key)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
@@ -1224,8 +1224,8 @@ const struct ethtool_ops mlx4_en_ethtool_ops = {
 	.get_rxnfc = mlx4_en_get_rxnfc,
 	.set_rxnfc = mlx4_en_set_rxnfc,
 	.get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size,
-	.get_rxfh_indir = mlx4_en_get_rxfh_indir,
-	.set_rxfh_indir = mlx4_en_set_rxfh_indir,
+	.get_rxfh = mlx4_en_get_rxfh,
+	.set_rxfh = mlx4_en_set_rxfh,
 	.get_channels = mlx4_en_get_channels,
 	.set_channels = mlx4_en_set_channels,
 	.get_ts_info = mlx4_en_get_ts_info,
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 0de8b07c24c2..74739c4b9997 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -1033,7 +1033,7 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
 		0 : ARRAY_SIZE(efx->rx_indir_table));
 }
 
-static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev, u32 *indir)
+static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
@@ -1041,8 +1041,8 @@ static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev, u32 *indir)
 	return 0;
 }
 
-static int efx_ethtool_set_rxfh_indir(struct net_device *net_dev,
-				      const u32 *indir)
+static int efx_ethtool_set_rxfh(struct net_device *net_dev,
+				const u32 *indir, const u8 *key)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
@@ -1125,8 +1125,8 @@ const struct ethtool_ops efx_ethtool_ops = {
 	.get_rxnfc		= efx_ethtool_get_rxnfc,
 	.set_rxnfc		= efx_ethtool_set_rxnfc,
 	.get_rxfh_indir_size	= efx_ethtool_get_rxfh_indir_size,
-	.get_rxfh_indir		= efx_ethtool_get_rxfh_indir,
-	.set_rxfh_indir		= efx_ethtool_set_rxfh_indir,
+	.get_rxfh		= efx_ethtool_get_rxfh,
+	.set_rxfh		= efx_ethtool_set_rxfh,
 	.get_ts_info		= efx_ethtool_get_ts_info,
 	.get_module_info	= efx_ethtool_get_module_info,
 	.get_module_eeprom	= efx_ethtool_get_module_eeprom,
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 00e120296e92..9396cca93b09 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -579,7 +579,7 @@ vmxnet3_get_rss_indir_size(struct net_device *netdev)
 }
 
 static int
-vmxnet3_get_rss_indir(struct net_device *netdev, u32 *p)
+vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	struct UPT1_RSSConf *rssConf = adapter->rss_conf;
@@ -592,7 +592,7 @@ vmxnet3_get_rss_indir(struct net_device *netdev, u32 *p)
 }
 
 static int
-vmxnet3_set_rss_indir(struct net_device *netdev, const u32 *p)
+vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key)
 {
 	unsigned int i;
 	unsigned long flags;
@@ -628,8 +628,8 @@ static const struct ethtool_ops vmxnet3_ethtool_ops = {
 	.get_rxnfc         = vmxnet3_get_rxnfc,
 #ifdef VMXNET3_RSS
 	.get_rxfh_indir_size = vmxnet3_get_rss_indir_size,
-	.get_rxfh_indir    = vmxnet3_get_rss_indir,
-	.set_rxfh_indir    = vmxnet3_set_rss_indir,
+	.get_rxfh          = vmxnet3_get_rss,
+	.set_rxfh          = vmxnet3_set_rss,
 #endif
 };
 
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 874fde01d398..e658229fee39 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -158,15 +158,11 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  *	Returns zero if not supported for this specific device.
  * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table.
  *	Returns zero if not supported for this specific device.
- * @get_rxfh_indir: Get the contents of the RX flow hash indirection table.
- *	Will not be called if @get_rxfh_indir_size returns zero.
  * @get_rxfh: Get the contents of the RX flow hash indirection table and hash
  *	key.
  *	Will only be called if one or both of @get_rxfh_indir_size and
  *	@get_rxfh_key_size are implemented and return non-zero.
  *	Returns a negative error code or zero.
- * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
- *	Will not be called if @get_rxfh_indir_size returns zero.
  * @set_rxfh: Set the contents of the RX flow hash indirection table and/or
  *	hash key.  In case only the indirection table or hash key is to be
  *	changed, the other argument will be %NULL.
@@ -248,8 +244,6 @@ struct ethtool_ops {
 	int	(*get_rxfh)(struct net_device *, u32 *indir, u8 *key);
 	int	(*set_rxfh)(struct net_device *, const u32 *indir,
 			    const u8 *key);
-	int	(*get_rxfh_indir)(struct net_device *, u32 *);
-	int	(*set_rxfh_indir)(struct net_device *, const u32 *);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*get_dump_flag)(struct net_device *, struct ethtool_dump *);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index b8857348bdf3..8ae452afb545 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -582,7 +582,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
 	int ret;
 
 	if (!dev->ethtool_ops->get_rxfh_indir_size ||
-	    !dev->ethtool_ops->get_rxfh_indir)
+	    !dev->ethtool_ops->get_rxfh)
 		return -EOPNOTSUPP;
 	dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
 	if (dev_size == 0)
@@ -608,7 +608,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
 	if (!indir)
 		return -ENOMEM;
 
-	ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
+	ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL);
 	if (ret)
 		goto out;
 
@@ -632,7 +632,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 	int ret;
 	u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]);
 
-	if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir ||
+	if (!ops->get_rxfh_indir_size || !ops->set_rxfh ||
 	    !ops->get_rxnfc)
 		return -EOPNOTSUPP;
 
@@ -669,7 +669,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 			goto out;
 	}
 
-	ret = ops->set_rxfh_indir(dev, indir);
+	ret = ops->set_rxfh(dev, indir, NULL);
 
 out:
 	kfree(indir);
-- 
cgit v1.2.3-71-gd317


From 23372af15e638bf3ce0764554db3b5e58bf7ceb8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Fri, 9 May 2014 22:45:08 -0400
Subject: NVMe: Update data structures for NVMe 1.2

Include changes from the current set of ratified Technical Proposals
for NVMe 1.2.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/uapi/linux/nvme.h | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index e74da782d69c..a6fb2a360577 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -27,7 +27,12 @@ struct nvme_id_power_state {
 	__u8			read_lat;
 	__u8			write_tput;
 	__u8			write_lat;
-	__u8			rsvd16[16];
+	__le16			idle_power;
+	__u8			idle_scale;
+	__u8			rsvd19;
+	__le16			active_power;
+	__u8			active_work_scale;
+	__u8			rsvd23[9];
 };
 
 enum {
@@ -46,7 +51,8 @@ struct nvme_id_ctrl {
 	__u8			mic;
 	__u8			mdts;
 	__u16			cntlid;
-	__u8			rsvd80[176];
+	__u32			ver;
+	__u8			rsvd84[172];
 	__le16			oacs;
 	__u8			acl;
 	__u8			aerl;
@@ -56,7 +62,9 @@ struct nvme_id_ctrl {
 	__u8			npss;
 	__u8			avscc;
 	__u8			apsta;
-	__u8			rsvd266[246];
+	__le16			wctemp;
+	__le16			cctemp;
+	__u8			rsvd270[242];
 	__u8			sqes;
 	__u8			cqes;
 	__u8			rsvd514[2];
@@ -102,7 +110,12 @@ struct nvme_id_ns {
 	__u8			dps;
 	__u8			nmic;
 	__u8			rescap;
-	__u8			rsvd32[88];
+	__u8			fpi;
+	__u8			rsvd33;
+	__le16			nawun;
+	__le16			nawupf;
+	__le16			nacwu;
+	__u8			rsvd40[80];
 	__u8			eui64[8];
 	struct nvme_lbaf	lbaf[16];
 	__u8			rsvd192[192];
@@ -134,7 +147,10 @@ struct nvme_smart_log {
 	__u8			unsafe_shutdowns[16];
 	__u8			media_errors[16];
 	__u8			num_err_log_entries[16];
-	__u8			rsvd192[320];
+	__le32			warning_temp_time;
+	__le32			critical_comp_time;
+	__le16			temp_sensor[8];
+	__u8			rsvd216[296];
 };
 
 enum {
-- 
cgit v1.2.3-71-gd317


From 1dd40c3ecd9b8a4ab91dbf2e6ce10b82a3b5ae63 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 14 Mar 2014 18:41:24 -0400
Subject: dm: introduce dm_accept_partial_bio

The function dm_accept_partial_bio allows the target to specify how many
sectors of the current bio it will process.  If the target only wants to
accept part of the bio, it calls dm_accept_partial_bio and the DM core
sends the rest of the data in next bio.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm.c               | 59 +++++++++++++++++++++++++++++++++++++------
 include/linux/device-mapper.h |  2 ++
 2 files changed, 53 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 368a20dd85c2..97940fc8c302 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1110,6 +1110,46 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
 }
 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
 
+/*
+ * A target may call dm_accept_partial_bio only from the map routine.  It is
+ * allowed for all bio types except REQ_FLUSH.
+ *
+ * dm_accept_partial_bio informs the dm that the target only wants to process
+ * additional n_sectors sectors of the bio and the rest of the data should be
+ * sent in a next bio.
+ *
+ * A diagram that explains the arithmetics:
+ * +--------------------+---------------+-------+
+ * |         1          |       2       |   3   |
+ * +--------------------+---------------+-------+
+ *
+ * <-------------- *tio->len_ptr --------------->
+ *                      <------- bi_size ------->
+ *                      <-- n_sectors -->
+ *
+ * Region 1 was already iterated over with bio_advance or similar function.
+ *	(it may be empty if the target doesn't use bio_advance)
+ * Region 2 is the remaining bio size that the target wants to process.
+ *	(it may be empty if region 1 is non-empty, although there is no reason
+ *	 to make it empty)
+ * The target requires that region 3 is to be sent in the next bio.
+ *
+ * If the target wants to receive multiple copies of the bio (via num_*bios, etc),
+ * the partially processed part (the sum of regions 1+2) must be the same for all
+ * copies of the bio.
+ */
+void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
+{
+	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
+	unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
+	BUG_ON(bio->bi_rw & REQ_FLUSH);
+	BUG_ON(bi_size > *tio->len_ptr);
+	BUG_ON(n_sectors > bi_size);
+	*tio->len_ptr -= bi_size - n_sectors;
+	bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
+}
+EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
+
 static void __map_bio(struct dm_target_io *tio)
 {
 	int r;
@@ -1200,11 +1240,13 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci,
 
 static void __clone_and_map_simple_bio(struct clone_info *ci,
 				       struct dm_target *ti,
-				       unsigned target_bio_nr, unsigned len)
+				       unsigned target_bio_nr, unsigned *len)
 {
 	struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr);
 	struct bio *clone = &tio->clone;
 
+	tio->len_ptr = len;
+
 	/*
 	 * Discard requests require the bio's inline iovecs be initialized.
 	 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
@@ -1212,13 +1254,13 @@ static void __clone_and_map_simple_bio(struct clone_info *ci,
 	 */
 	 __bio_clone_fast(clone, ci->bio);
 	if (len)
-		bio_setup_sector(clone, ci->sector, len);
+		bio_setup_sector(clone, ci->sector, *len);
 
 	__map_bio(tio);
 }
 
 static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
-				  unsigned num_bios, unsigned len)
+				  unsigned num_bios, unsigned *len)
 {
 	unsigned target_bio_nr;
 
@@ -1233,13 +1275,13 @@ static int __send_empty_flush(struct clone_info *ci)
 
 	BUG_ON(bio_has_data(ci->bio));
 	while ((ti = dm_table_get_target(ci->map, target_nr++)))
-		__send_duplicate_bios(ci, ti, ti->num_flush_bios, 0);
+		__send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
 
 	return 0;
 }
 
 static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
-				     sector_t sector, unsigned len)
+				     sector_t sector, unsigned *len)
 {
 	struct bio *bio = ci->bio;
 	struct dm_target_io *tio;
@@ -1254,7 +1296,8 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti
 
 	for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
 		tio = alloc_tio(ci, ti, 0, target_bio_nr);
-		clone_bio(tio, bio, sector, len);
+		tio->len_ptr = len;
+		clone_bio(tio, bio, sector, *len);
 		__map_bio(tio);
 	}
 }
@@ -1306,7 +1349,7 @@ static int __send_changing_extent_only(struct clone_info *ci,
 		else
 			len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
 
-		__send_duplicate_bios(ci, ti, num_bios, len);
+		__send_duplicate_bios(ci, ti, num_bios, &len);
 
 		ci->sector += len;
 	} while (ci->sector_count -= len);
@@ -1345,7 +1388,7 @@ static int __split_and_process_non_flush(struct clone_info *ci)
 
 	len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
 
-	__clone_and_map_data_bio(ci, ti, ci->sector, len);
+	__clone_and_map_data_bio(ci, ti, ci->sector, &len);
 
 	ci->sector += len;
 	ci->sector_count -= len;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 63da56ed9796..0adca299f238 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -291,6 +291,7 @@ struct dm_target_io {
 	struct dm_io *io;
 	struct dm_target *ti;
 	unsigned target_bio_nr;
+	unsigned *len_ptr;
 	struct bio clone;
 };
 
@@ -401,6 +402,7 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid);
 struct gendisk *dm_disk(struct mapped_device *md);
 int dm_suspended(struct dm_target *ti);
 int dm_noflush_suspending(struct dm_target *ti);
+void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors);
 union map_info *dm_get_rq_mapinfo(struct request *rq);
 
 struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
-- 
cgit v1.2.3-71-gd317


From bd67608a6127c994e897c49cc4f72d9095925301 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Tue, 3 Jun 2014 23:04:30 -0400
Subject: NVMe: Rename io_timeout to nvme_io_timeout

It's positively immoral to have a global variable called 'io_timeout'.
Keep the module parameter called io_timeout, though.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 4 ++--
 include/linux/nvme.h      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index bb6ce311ad44..2af079e571fc 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -54,8 +54,8 @@ static unsigned char admin_timeout = 60;
 module_param(admin_timeout, byte, 0644);
 MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
 
-unsigned char io_timeout = 30;
-module_param(io_timeout, byte, 0644);
+unsigned char nvme_io_timeout = 30;
+module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
 
 static unsigned char retry_time = 30;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 1813cfdb7e80..8541dd920bb7 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -62,8 +62,8 @@ enum {
 
 #define NVME_VS(major, minor)	(major << 16 | minor)
 
-extern unsigned char io_timeout;
-#define NVME_IO_TIMEOUT	(io_timeout * HZ)
+extern unsigned char nvme_io_timeout;
+#define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)
 
 /*
  * Represents an NVM Express device.  Each nvme_dev is a PCI function.
-- 
cgit v1.2.3-71-gd317


From d5ab2b430b3cec65ba5b30b2e1de8ea46715cb93 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Tue, 3 Jun 2014 14:56:23 +0300
Subject: drm: drop drm_get_connector_name() and drm_get_encoder_name()

No longer used or needed as the structs have a name field.

Acked-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 20 --------------------
 include/drm/drm_crtc.h     |  2 --
 2 files changed, 22 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 986531395872..f6664a75ad57 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -256,26 +256,6 @@ void drm_connector_ida_destroy(void)
 		ida_destroy(&drm_connector_enum_list[i].ida);
 }
 
-/**
- * drm_get_encoder_name - return a string for encoder
- * @encoder: the encoder to get name for
- */
-const char *drm_get_encoder_name(const struct drm_encoder *encoder)
-{
-	return encoder->name;
-}
-EXPORT_SYMBOL(drm_get_encoder_name);
-
-/**
- * drm_get_connector_name - return a string for connector
- * @connector: the connector to get name for
- */
-const char *drm_get_connector_name(const struct drm_connector *connector)
-{
-	return connector->name;
-}
-EXPORT_SYMBOL(drm_get_connector_name);
-
 /**
  * drm_get_connector_status_name - return a string for connector status
  * @status: connector status to compute name of
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 5c1c31cc11cd..e8fe9d8e135c 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -909,7 +909,6 @@ extern int drm_crtc_check_viewport(const struct drm_crtc *crtc,
 
 extern void drm_encoder_cleanup(struct drm_encoder *encoder);
 
-extern const char *drm_get_connector_name(const struct drm_connector *connector);
 extern const char *drm_get_connector_status_name(enum drm_connector_status status);
 extern const char *drm_get_subpixel_order_name(enum subpixel_order order);
 extern const char *drm_get_dpms_name(int val);
@@ -972,7 +971,6 @@ extern int drm_mode_create_tv_properties(struct drm_device *dev, int num_formats
 				     char *formats[]);
 extern int drm_mode_create_scaling_mode_property(struct drm_device *dev);
 extern int drm_mode_create_dirty_info_property(struct drm_device *dev);
-extern const char *drm_get_encoder_name(const struct drm_encoder *encoder);
 
 extern int drm_mode_connector_attach_encoder(struct drm_connector *connector,
 					     struct drm_encoder *encoder);
-- 
cgit v1.2.3-71-gd317


From a2b34e226ac9fbd20179091fad0ee1a24ad48669 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Sat, 5 Oct 2013 16:36:52 -0400
Subject: drm: helpers to find mode objects

Add a few more useful helpers to find mode objects.

Signed-off-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_crtc.c | 90 +++++++++++++++-------------------------------
 include/drm/drm_crtc.h     | 33 +++++++++++++++++
 2 files changed, 61 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index f6664a75ad57..c4b44be7d464 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -1692,7 +1692,6 @@ int drm_mode_getcrtc(struct drm_device *dev,
 {
 	struct drm_mode_crtc *crtc_resp = data;
 	struct drm_crtc *crtc;
-	struct drm_mode_object *obj;
 	int ret = 0;
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
@@ -1700,13 +1699,11 @@ int drm_mode_getcrtc(struct drm_device *dev,
 
 	drm_modeset_lock_all(dev);
 
-	obj = drm_mode_object_find(dev, crtc_resp->crtc_id,
-				   DRM_MODE_OBJECT_CRTC);
-	if (!obj) {
+	crtc = drm_crtc_find(dev, crtc_resp->crtc_id);
+	if (!crtc) {
 		ret = -ENOENT;
 		goto out;
 	}
-	crtc = obj_to_crtc(obj);
 
 	crtc_resp->x = crtc->x;
 	crtc_resp->y = crtc->y;
@@ -1760,7 +1757,6 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv)
 {
 	struct drm_mode_get_connector *out_resp = data;
-	struct drm_mode_object *obj;
 	struct drm_connector *connector;
 	struct drm_display_mode *mode;
 	int mode_count = 0;
@@ -1784,13 +1780,11 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 
 	mutex_lock(&dev->mode_config.mutex);
 
-	obj = drm_mode_object_find(dev, out_resp->connector_id,
-				   DRM_MODE_OBJECT_CONNECTOR);
-	if (!obj) {
+	connector = drm_connector_find(dev, out_resp->connector_id);
+	if (!connector) {
 		ret = -ENOENT;
 		goto out;
 	}
-	connector = obj_to_connector(obj);
 
 	props_count = connector->properties.count;
 
@@ -1905,7 +1899,6 @@ int drm_mode_getencoder(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
 {
 	struct drm_mode_get_encoder *enc_resp = data;
-	struct drm_mode_object *obj;
 	struct drm_encoder *encoder;
 	int ret = 0;
 
@@ -1913,13 +1906,11 @@ int drm_mode_getencoder(struct drm_device *dev, void *data,
 		return -EINVAL;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, enc_resp->encoder_id,
-				   DRM_MODE_OBJECT_ENCODER);
-	if (!obj) {
+	encoder = drm_encoder_find(dev, enc_resp->encoder_id);
+	if (!encoder) {
 		ret = -ENOENT;
 		goto out;
 	}
-	encoder = obj_to_encoder(obj);
 
 	if (encoder->crtc)
 		enc_resp->crtc_id = encoder->crtc->base.id;
@@ -2017,7 +2008,6 @@ int drm_mode_getplane(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv)
 {
 	struct drm_mode_get_plane *plane_resp = data;
-	struct drm_mode_object *obj;
 	struct drm_plane *plane;
 	uint32_t __user *format_ptr;
 	int ret = 0;
@@ -2026,13 +2016,11 @@ int drm_mode_getplane(struct drm_device *dev, void *data,
 		return -EINVAL;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, plane_resp->plane_id,
-				   DRM_MODE_OBJECT_PLANE);
-	if (!obj) {
+	plane = drm_plane_find(dev, plane_resp->plane_id);
+	if (!plane) {
 		ret = -ENOENT;
 		goto out;
 	}
-	plane = obj_to_plane(obj);
 
 	if (plane->crtc)
 		plane_resp->crtc_id = plane->crtc->base.id;
@@ -2085,7 +2073,6 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv)
 {
 	struct drm_mode_set_plane *plane_req = data;
-	struct drm_mode_object *obj;
 	struct drm_plane *plane;
 	struct drm_crtc *crtc;
 	struct drm_framebuffer *fb = NULL, *old_fb = NULL;
@@ -2100,14 +2087,12 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
 	 * First, find the plane, crtc, and fb objects.  If not available,
 	 * we don't bother to call the driver.
 	 */
-	obj = drm_mode_object_find(dev, plane_req->plane_id,
-				   DRM_MODE_OBJECT_PLANE);
-	if (!obj) {
+	plane = drm_plane_find(dev, plane_req->plane_id);
+	if (!plane) {
 		DRM_DEBUG_KMS("Unknown plane ID %d\n",
 			      plane_req->plane_id);
 		return -ENOENT;
 	}
-	plane = obj_to_plane(obj);
 
 	/* No fb means shut it down */
 	if (!plane_req->fb_id) {
@@ -2124,15 +2109,13 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	obj = drm_mode_object_find(dev, plane_req->crtc_id,
-				   DRM_MODE_OBJECT_CRTC);
-	if (!obj) {
+	crtc = drm_crtc_find(dev, plane_req->crtc_id);
+	if (!crtc) {
 		DRM_DEBUG_KMS("Unknown crtc ID %d\n",
 			      plane_req->crtc_id);
 		ret = -ENOENT;
 		goto out;
 	}
-	crtc = obj_to_crtc(obj);
 
 	fb = drm_framebuffer_lookup(dev, plane_req->fb_id);
 	if (!fb) {
@@ -2319,7 +2302,6 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
 {
 	struct drm_mode_config *config = &dev->mode_config;
 	struct drm_mode_crtc *crtc_req = data;
-	struct drm_mode_object *obj;
 	struct drm_crtc *crtc;
 	struct drm_connector **connector_set = NULL, *connector;
 	struct drm_framebuffer *fb = NULL;
@@ -2337,14 +2319,12 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
 		return -ERANGE;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, crtc_req->crtc_id,
-				   DRM_MODE_OBJECT_CRTC);
-	if (!obj) {
+	crtc = drm_crtc_find(dev, crtc_req->crtc_id);
+	if (!crtc) {
 		DRM_DEBUG_KMS("Unknown CRTC ID %d\n", crtc_req->crtc_id);
 		ret = -ENOENT;
 		goto out;
 	}
-	crtc = obj_to_crtc(obj);
 	DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id);
 
 	if (crtc_req->mode_valid) {
@@ -2427,15 +2407,13 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
 				goto out;
 			}
 
-			obj = drm_mode_object_find(dev, out_id,
-						   DRM_MODE_OBJECT_CONNECTOR);
-			if (!obj) {
+			connector = drm_connector_find(dev, out_id);
+			if (!connector) {
 				DRM_DEBUG_KMS("Connector id %d unknown\n",
 						out_id);
 				ret = -ENOENT;
 				goto out;
 			}
-			connector = obj_to_connector(obj);
 			DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 					connector->base.id,
 					connector->name);
@@ -2467,7 +2445,6 @@ static int drm_mode_cursor_common(struct drm_device *dev,
 				  struct drm_mode_cursor2 *req,
 				  struct drm_file *file_priv)
 {
-	struct drm_mode_object *obj;
 	struct drm_crtc *crtc;
 	int ret = 0;
 
@@ -2477,12 +2454,11 @@ static int drm_mode_cursor_common(struct drm_device *dev,
 	if (!req->flags || (~DRM_MODE_CURSOR_FLAGS & req->flags))
 		return -EINVAL;
 
-	obj = drm_mode_object_find(dev, req->crtc_id, DRM_MODE_OBJECT_CRTC);
-	if (!obj) {
+	crtc = drm_crtc_find(dev, req->crtc_id);
+	if (!crtc) {
 		DRM_DEBUG_KMS("Unknown CRTC ID %d\n", req->crtc_id);
 		return -ENOENT;
 	}
-	crtc = obj_to_crtc(obj);
 
 	mutex_lock(&crtc->mutex);
 	if (req->flags & DRM_MODE_CURSOR_BO) {
@@ -3439,7 +3415,6 @@ EXPORT_SYMBOL(drm_object_property_get_value);
 int drm_mode_getproperty_ioctl(struct drm_device *dev,
 			       void *data, struct drm_file *file_priv)
 {
-	struct drm_mode_object *obj;
 	struct drm_mode_get_property *out_resp = data;
 	struct drm_property *property;
 	int enum_count = 0;
@@ -3458,12 +3433,11 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev,
 		return -EINVAL;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, out_resp->prop_id, DRM_MODE_OBJECT_PROPERTY);
-	if (!obj) {
+	property = drm_property_find(dev, out_resp->prop_id);
+	if (!property) {
 		ret = -ENOENT;
 		goto done;
 	}
-	property = obj_to_property(obj);
 
 	if (property->flags & (DRM_MODE_PROP_ENUM | DRM_MODE_PROP_BITMASK)) {
 		list_for_each_entry(prop_enum, &property->enum_blob_list, head)
@@ -3591,7 +3565,6 @@ static void drm_property_destroy_blob(struct drm_device *dev,
 int drm_mode_getblob_ioctl(struct drm_device *dev,
 			   void *data, struct drm_file *file_priv)
 {
-	struct drm_mode_object *obj;
 	struct drm_mode_get_blob *out_resp = data;
 	struct drm_property_blob *blob;
 	int ret = 0;
@@ -3601,12 +3574,11 @@ int drm_mode_getblob_ioctl(struct drm_device *dev,
 		return -EINVAL;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, out_resp->blob_id, DRM_MODE_OBJECT_BLOB);
-	if (!obj) {
+	blob = drm_property_blob_find(dev, out_resp->blob_id);
+	if (!blob) {
 		ret = -ENOENT;
 		goto done;
 	}
-	blob = obj_to_blob(obj);
 
 	if (out_resp->length == blob->length) {
 		blob_ptr = (void __user *)(unsigned long)out_resp->data;
@@ -3988,7 +3960,6 @@ int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 			     void *data, struct drm_file *file_priv)
 {
 	struct drm_mode_crtc_lut *crtc_lut = data;
-	struct drm_mode_object *obj;
 	struct drm_crtc *crtc;
 	void *r_base, *g_base, *b_base;
 	int size;
@@ -3998,12 +3969,11 @@ int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 		return -EINVAL;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
-	if (!obj) {
+	crtc = drm_crtc_find(dev, crtc_lut->crtc_id);
+	if (!crtc) {
 		ret = -ENOENT;
 		goto out;
 	}
-	crtc = obj_to_crtc(obj);
 
 	if (crtc->funcs->gamma_set == NULL) {
 		ret = -ENOSYS;
@@ -4062,7 +4032,6 @@ int drm_mode_gamma_get_ioctl(struct drm_device *dev,
 			     void *data, struct drm_file *file_priv)
 {
 	struct drm_mode_crtc_lut *crtc_lut = data;
-	struct drm_mode_object *obj;
 	struct drm_crtc *crtc;
 	void *r_base, *g_base, *b_base;
 	int size;
@@ -4072,12 +4041,11 @@ int drm_mode_gamma_get_ioctl(struct drm_device *dev,
 		return -EINVAL;
 
 	drm_modeset_lock_all(dev);
-	obj = drm_mode_object_find(dev, crtc_lut->crtc_id, DRM_MODE_OBJECT_CRTC);
-	if (!obj) {
+	crtc = drm_crtc_find(dev, crtc_lut->crtc_id);
+	if (!crtc) {
 		ret = -ENOENT;
 		goto out;
 	}
-	crtc = obj_to_crtc(obj);
 
 	/* memcpy into gamma store */
 	if (crtc_lut->gamma_size != crtc->gamma_size) {
@@ -4130,7 +4098,6 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 			     void *data, struct drm_file *file_priv)
 {
 	struct drm_mode_crtc_page_flip *page_flip = data;
-	struct drm_mode_object *obj;
 	struct drm_crtc *crtc;
 	struct drm_framebuffer *fb = NULL, *old_fb = NULL;
 	struct drm_pending_vblank_event *e = NULL;
@@ -4144,10 +4111,9 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 	if ((page_flip->flags & DRM_MODE_PAGE_FLIP_ASYNC) && !dev->mode_config.async_page_flip)
 		return -EINVAL;
 
-	obj = drm_mode_object_find(dev, page_flip->crtc_id, DRM_MODE_OBJECT_CRTC);
-	if (!obj)
+	crtc = drm_crtc_find(dev, page_flip->crtc_id);
+	if (!crtc)
 		return -ENOENT;
-	crtc = obj_to_crtc(obj);
 
 	mutex_lock(&crtc->mutex);
 	if (crtc->primary->fb == NULL) {
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index e8fe9d8e135c..90bd1a25e2c6 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -1061,6 +1061,15 @@ extern int drm_format_vert_chroma_subsampling(uint32_t format);
 extern const char *drm_get_format_name(uint32_t format);
 
 /* Helpers */
+
+static inline struct drm_plane *drm_plane_find(struct drm_device *dev,
+		uint32_t id)
+{
+	struct drm_mode_object *mo;
+	mo = drm_mode_object_find(dev, id, DRM_MODE_OBJECT_PLANE);
+	return mo ? obj_to_plane(mo) : NULL;
+}
+
 static inline struct drm_crtc *drm_crtc_find(struct drm_device *dev,
 	uint32_t id)
 {
@@ -1077,6 +1086,30 @@ static inline struct drm_encoder *drm_encoder_find(struct drm_device *dev,
 	return mo ? obj_to_encoder(mo) : NULL;
 }
 
+static inline struct drm_connector *drm_connector_find(struct drm_device *dev,
+		uint32_t id)
+{
+	struct drm_mode_object *mo;
+	mo = drm_mode_object_find(dev, id, DRM_MODE_OBJECT_CONNECTOR);
+	return mo ? obj_to_connector(mo) : NULL;
+}
+
+static inline struct drm_property *drm_property_find(struct drm_device *dev,
+		uint32_t id)
+{
+	struct drm_mode_object *mo;
+	mo = drm_mode_object_find(dev, id, DRM_MODE_OBJECT_PROPERTY);
+	return mo ? obj_to_property(mo) : NULL;
+}
+
+static inline struct drm_property_blob *
+drm_property_blob_find(struct drm_device *dev, uint32_t id)
+{
+	struct drm_mode_object *mo;
+	mo = drm_mode_object_find(dev, id, DRM_MODE_OBJECT_BLOB);
+	return mo ? obj_to_blob(mo) : NULL;
+}
+
 /* Plane list iterator for legacy (overlay only) planes. */
 #define drm_for_each_legacy_plane(plane, planelist) \
 	list_for_each_entry(plane, planelist, head) \
-- 
cgit v1.2.3-71-gd317


From 5ea22f24d77b511d68c4ecaf4e6fd5d6ab462b8f Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Fri, 30 May 2014 11:34:01 -0400
Subject: drm: add extended property types

If we continue to use bitmask for type, we will quickly run out of room
to add new types.  Split this up so existing part of bitmask range
continues to function as before, but reserve a chunk of the remaining
space for an integer type-id.  Wrap this all up in some type-check
helpers to keep the backwards-compat uglyness contained.

Signed-off-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_crtc.c  | 26 +++++++++++++++++---------
 include/drm/drm_crtc.h      | 17 +++++++++++++++++
 include/uapi/drm/drm_mode.h | 13 +++++++++++++
 3 files changed, 47 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index c4b44be7d464..73f543af4924 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -3094,6 +3094,9 @@ struct drm_property *drm_property_create(struct drm_device *dev, int flags,
 	}
 
 	list_add_tail(&property->head, &dev->mode_config.property_list);
+
+	WARN_ON(!drm_property_type_valid(property));
+
 	return property;
 fail:
 	kfree(property->values);
@@ -3251,14 +3254,16 @@ int drm_property_add_enum(struct drm_property *property, int index,
 {
 	struct drm_property_enum *prop_enum;
 
-	if (!(property->flags & (DRM_MODE_PROP_ENUM | DRM_MODE_PROP_BITMASK)))
+	if (!(drm_property_type_is(property, DRM_MODE_PROP_ENUM) ||
+			drm_property_type_is(property, DRM_MODE_PROP_BITMASK)))
 		return -EINVAL;
 
 	/*
 	 * Bitmask enum properties have the additional constraint of values
 	 * from 0 to 63
 	 */
-	if ((property->flags & DRM_MODE_PROP_BITMASK) && (value > 63))
+	if (drm_property_type_is(property, DRM_MODE_PROP_BITMASK) &&
+			(value > 63))
 		return -EINVAL;
 
 	if (!list_empty(&property->enum_blob_list)) {
@@ -3439,10 +3444,11 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev,
 		goto done;
 	}
 
-	if (property->flags & (DRM_MODE_PROP_ENUM | DRM_MODE_PROP_BITMASK)) {
+	if (drm_property_type_is(property, DRM_MODE_PROP_ENUM) ||
+			drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) {
 		list_for_each_entry(prop_enum, &property->enum_blob_list, head)
 			enum_count++;
-	} else if (property->flags & DRM_MODE_PROP_BLOB) {
+	} else if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) {
 		list_for_each_entry(prop_blob, &property->enum_blob_list, head)
 			blob_count++;
 	}
@@ -3464,7 +3470,8 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev,
 	}
 	out_resp->count_values = value_count;
 
-	if (property->flags & (DRM_MODE_PROP_ENUM | DRM_MODE_PROP_BITMASK)) {
+	if (drm_property_type_is(property, DRM_MODE_PROP_ENUM) ||
+			drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) {
 		if ((out_resp->count_enum_blobs >= enum_count) && enum_count) {
 			copied = 0;
 			enum_ptr = (struct drm_mode_property_enum __user *)(unsigned long)out_resp->enum_blob_ptr;
@@ -3486,7 +3493,7 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev,
 		out_resp->count_enum_blobs = enum_count;
 	}
 
-	if (property->flags & DRM_MODE_PROP_BLOB) {
+	if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) {
 		if ((out_resp->count_enum_blobs >= blob_count) && blob_count) {
 			copied = 0;
 			blob_id_ptr = (uint32_t __user *)(unsigned long)out_resp->enum_blob_ptr;
@@ -3640,17 +3647,18 @@ static bool drm_property_change_is_valid(struct drm_property *property,
 {
 	if (property->flags & DRM_MODE_PROP_IMMUTABLE)
 		return false;
-	if (property->flags & DRM_MODE_PROP_RANGE) {
+
+	if (drm_property_type_is(property, DRM_MODE_PROP_RANGE)) {
 		if (value < property->values[0] || value > property->values[1])
 			return false;
 		return true;
-	} else if (property->flags & DRM_MODE_PROP_BITMASK) {
+	} else if (drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) {
 		int i;
 		uint64_t valid_mask = 0;
 		for (i = 0; i < property->num_values; i++)
 			valid_mask |= (1ULL << property->values[i]);
 		return !(value & ~valid_mask);
-	} else if (property->flags & DRM_MODE_PROP_BLOB) {
+	} else if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) {
 		/* Only the driver knows */
 		return true;
 	} else {
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 90bd1a25e2c6..92f6ec2de86a 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -930,6 +930,23 @@ extern void drm_mode_config_cleanup(struct drm_device *dev);
 
 extern int drm_mode_connector_update_edid_property(struct drm_connector *connector,
 						struct edid *edid);
+
+static inline bool drm_property_type_is(struct drm_property *property,
+		uint32_t type)
+{
+	/* instanceof for props.. handles extended type vs original types: */
+	if (property->flags & DRM_MODE_PROP_EXTENDED_TYPE)
+		return (property->flags & DRM_MODE_PROP_EXTENDED_TYPE) == type;
+	return property->flags & type;
+}
+
+static inline bool drm_property_type_valid(struct drm_property *property)
+{
+	if (property->flags & DRM_MODE_PROP_EXTENDED_TYPE)
+		return !(property->flags & DRM_MODE_PROP_LEGACY_TYPE);
+	return !!(property->flags & DRM_MODE_PROP_LEGACY_TYPE);
+}
+
 extern int drm_object_property_set_value(struct drm_mode_object *obj,
 					 struct drm_property *property,
 					 uint64_t val);
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 719add464f95..caeaa6f7ebde 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -252,6 +252,19 @@ struct drm_mode_get_connector {
 #define DRM_MODE_PROP_BLOB	(1<<4)
 #define DRM_MODE_PROP_BITMASK	(1<<5) /* bitmask of enumerated types */
 
+/* non-extended types: legacy bitmask, one bit per type: */
+#define DRM_MODE_PROP_LEGACY_TYPE  ( \
+		DRM_MODE_PROP_RANGE | \
+		DRM_MODE_PROP_ENUM | \
+		DRM_MODE_PROP_BLOB | \
+		DRM_MODE_PROP_BITMASK)
+
+/* extended-types: rather than continue to consume a bit per type,
+ * grab a chunk of the bits to use as integer type id.
+ */
+#define DRM_MODE_PROP_EXTENDED_TYPE	0x0000ffc0
+#define DRM_MODE_PROP_TYPE(n)		((n) << 6)
+
 struct drm_mode_property_enum {
 	__u64 value;
 	char name[DRM_PROP_NAME_LEN];
-- 
cgit v1.2.3-71-gd317


From 98f75de40e9d83c3a90d294b8fd25fa2874212a9 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Fri, 30 May 2014 11:37:03 -0400
Subject: drm: add object property type

An object property is an id (idr) for a drm mode object.  This
will allow a property to be used set/get a framebuffer, CRTC, etc.

Signed-off-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_crtc.c  | 61 +++++++++++++++++++++++++++++++++++++++------
 include/drm/drm_crtc.h      |  5 ++++
 include/uapi/drm/drm_mode.h |  1 +
 3 files changed, 60 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 73f543af4924..f990d9f180f0 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -370,6 +370,21 @@ void drm_mode_object_put(struct drm_device *dev,
 	mutex_unlock(&dev->mode_config.idr_mutex);
 }
 
+static struct drm_mode_object *_object_find(struct drm_device *dev,
+		uint32_t id, uint32_t type)
+{
+	struct drm_mode_object *obj = NULL;
+
+	mutex_lock(&dev->mode_config.idr_mutex);
+	obj = idr_find(&dev->mode_config.crtc_idr, id);
+	if (!obj || (type != DRM_MODE_OBJECT_ANY && obj->type != type) ||
+	    (obj->id != id))
+		obj = NULL;
+	mutex_unlock(&dev->mode_config.idr_mutex);
+
+	return obj;
+}
+
 /**
  * drm_mode_object_find - look up a drm object with static lifetime
  * @dev: drm device
@@ -377,7 +392,9 @@ void drm_mode_object_put(struct drm_device *dev,
  * @type: type of the mode object
  *
  * Note that framebuffers cannot be looked up with this functions - since those
- * are reference counted, they need special treatment.
+ * are reference counted, they need special treatment.  Even with
+ * DRM_MODE_OBJECT_ANY (although that will simply return NULL
+ * rather than WARN_ON()).
  */
 struct drm_mode_object *drm_mode_object_find(struct drm_device *dev,
 		uint32_t id, uint32_t type)
@@ -387,13 +404,10 @@ struct drm_mode_object *drm_mode_object_find(struct drm_device *dev,
 	/* Framebuffers are reference counted and need their own lookup
 	 * function.*/
 	WARN_ON(type == DRM_MODE_OBJECT_FB);
-
-	mutex_lock(&dev->mode_config.idr_mutex);
-	obj = idr_find(&dev->mode_config.crtc_idr, id);
-	if (!obj || (obj->type != type) || (obj->id != id))
+	obj = _object_find(dev, id, type);
+	/* don't leak out unref'd fb's */
+	if (obj && (obj->type == DRM_MODE_OBJECT_FB))
 		obj = NULL;
-	mutex_unlock(&dev->mode_config.idr_mutex);
-
 	return obj;
 }
 EXPORT_SYMBOL(drm_mode_object_find);
@@ -3074,6 +3088,8 @@ struct drm_property *drm_property_create(struct drm_device *dev, int flags,
 	if (!property)
 		return NULL;
 
+	property->dev = dev;
+
 	if (num_values) {
 		property->values = kzalloc(sizeof(uint64_t)*num_values, GFP_KERNEL);
 		if (!property->values)
@@ -3234,6 +3250,23 @@ struct drm_property *drm_property_create_range(struct drm_device *dev, int flags
 }
 EXPORT_SYMBOL(drm_property_create_range);
 
+struct drm_property *drm_property_create_object(struct drm_device *dev,
+					 int flags, const char *name, uint32_t type)
+{
+	struct drm_property *property;
+
+	flags |= DRM_MODE_PROP_OBJECT;
+
+	property = drm_property_create(dev, flags, name, 1);
+	if (!property)
+		return NULL;
+
+	property->values[0] = type;
+
+	return property;
+}
+EXPORT_SYMBOL(drm_property_create_object);
+
 /**
  * drm_property_add_enum - add a possible value to an enumeration property
  * @property: enumeration property to change
@@ -3661,6 +3694,20 @@ static bool drm_property_change_is_valid(struct drm_property *property,
 	} else if (drm_property_type_is(property, DRM_MODE_PROP_BLOB)) {
 		/* Only the driver knows */
 		return true;
+	} else if (drm_property_type_is(property, DRM_MODE_PROP_OBJECT)) {
+		struct drm_mode_object *obj;
+		/* a zero value for an object property translates to null: */
+		if (value == 0)
+			return true;
+		/*
+		 * NOTE: use _object_find() directly to bypass restriction on
+		 * looking up refcnt'd objects (ie. fb's).  For a refcnt'd
+		 * object this could race against object finalization, so it
+		 * simply tells us that the object *was* valid.  Which is good
+		 * enough.
+		 */
+		obj = _object_find(property->dev, value, property->values[0]);
+		return obj != NULL;
 	} else {
 		int i;
 		for (i = 0; i < property->num_values; i++)
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 92f6ec2de86a..cb2e599f769f 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -50,6 +50,7 @@ struct drm_clip_rect;
 #define DRM_MODE_OBJECT_BLOB 0xbbbbbbbb
 #define DRM_MODE_OBJECT_PLANE 0xeeeeeeee
 #define DRM_MODE_OBJECT_BRIDGE 0xbdbdbdbd
+#define DRM_MODE_OBJECT_ANY 0
 
 struct drm_mode_object {
 	uint32_t id;
@@ -190,6 +191,7 @@ struct drm_property {
 	char name[DRM_PROP_NAME_LEN];
 	uint32_t num_values;
 	uint64_t *values;
+	struct drm_device *dev;
 
 	struct list_head enum_blob_list;
 };
@@ -980,6 +982,8 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev,
 struct drm_property *drm_property_create_range(struct drm_device *dev, int flags,
 					 const char *name,
 					 uint64_t min, uint64_t max);
+struct drm_property *drm_property_create_object(struct drm_device *dev,
+					 int flags, const char *name, uint32_t type);
 extern void drm_property_destroy(struct drm_device *dev, struct drm_property *property);
 extern int drm_property_add_enum(struct drm_property *property, int index,
 				 uint64_t value, const char *name);
@@ -995,6 +999,7 @@ extern int drm_mode_crtc_set_gamma_size(struct drm_crtc *crtc,
 					 int gamma_size);
 extern struct drm_mode_object *drm_mode_object_find(struct drm_device *dev,
 		uint32_t id, uint32_t type);
+
 /* IOCTLs */
 extern int drm_mode_getresources(struct drm_device *dev,
 				 void *data, struct drm_file *file_priv);
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index caeaa6f7ebde..57f46348befe 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -264,6 +264,7 @@ struct drm_mode_get_connector {
  */
 #define DRM_MODE_PROP_EXTENDED_TYPE	0x0000ffc0
 #define DRM_MODE_PROP_TYPE(n)		((n) << 6)
+#define DRM_MODE_PROP_OBJECT		DRM_MODE_PROP_TYPE(1)
 
 struct drm_mode_property_enum {
 	__u64 value;
-- 
cgit v1.2.3-71-gd317


From ebc44cf386734374983922c6fc1ae8ac47f88bef Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Wed, 12 Sep 2012 22:22:31 -0500
Subject: drm: add signed-range property type

Like range, but values are signed.

Signed-off-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
---
 drivers/gpu/drm/drm_crtc.c  | 45 +++++++++++++++++++++++++++++++++------------
 include/drm/drm_crtc.h      | 12 ++++++++++++
 include/uapi/drm/drm_mode.h |  1 +
 3 files changed, 46 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index f990d9f180f0..6127073407e0 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -3213,6 +3213,22 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev,
 }
 EXPORT_SYMBOL(drm_property_create_bitmask);
 
+static struct drm_property *property_create_range(struct drm_device *dev,
+					 int flags, const char *name,
+					 uint64_t min, uint64_t max)
+{
+	struct drm_property *property;
+
+	property = drm_property_create(dev, flags, name, 2);
+	if (!property)
+		return NULL;
+
+	property->values[0] = min;
+	property->values[1] = max;
+
+	return property;
+}
+
 /**
  * drm_property_create - create a new ranged property type
  * @dev: drm device
@@ -3235,21 +3251,20 @@ struct drm_property *drm_property_create_range(struct drm_device *dev, int flags
 					 const char *name,
 					 uint64_t min, uint64_t max)
 {
-	struct drm_property *property;
-
-	flags |= DRM_MODE_PROP_RANGE;
-
-	property = drm_property_create(dev, flags, name, 2);
-	if (!property)
-		return NULL;
-
-	property->values[0] = min;
-	property->values[1] = max;
-
-	return property;
+	return property_create_range(dev, DRM_MODE_PROP_RANGE | flags,
+			name, min, max);
 }
 EXPORT_SYMBOL(drm_property_create_range);
 
+struct drm_property *drm_property_create_signed_range(struct drm_device *dev,
+					 int flags, const char *name,
+					 int64_t min, int64_t max)
+{
+	return property_create_range(dev, DRM_MODE_PROP_SIGNED_RANGE | flags,
+			name, I642U64(min), I642U64(max));
+}
+EXPORT_SYMBOL(drm_property_create_signed_range);
+
 struct drm_property *drm_property_create_object(struct drm_device *dev,
 					 int flags, const char *name, uint32_t type)
 {
@@ -3685,6 +3700,12 @@ static bool drm_property_change_is_valid(struct drm_property *property,
 		if (value < property->values[0] || value > property->values[1])
 			return false;
 		return true;
+	} else if (drm_property_type_is(property, DRM_MODE_PROP_SIGNED_RANGE)) {
+		int64_t svalue = U642I64(value);
+		if (svalue < U642I64(property->values[0]) ||
+				svalue > U642I64(property->values[1]))
+			return false;
+		return true;
 	} else if (drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) {
 		int i;
 		uint64_t valid_mask = 0;
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index cb2e599f769f..849cfdccff09 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -65,6 +65,15 @@ struct drm_object_properties {
 	uint64_t values[DRM_OBJECT_MAX_PROPERTY];
 };
 
+static inline int64_t U642I64(uint64_t val)
+{
+	return (int64_t)*((int64_t *)&val);
+}
+static inline uint64_t I642U64(int64_t val)
+{
+	return (uint64_t)*((uint64_t *)&val);
+}
+
 enum drm_connector_force {
 	DRM_FORCE_UNSPECIFIED,
 	DRM_FORCE_OFF,
@@ -982,6 +991,9 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev,
 struct drm_property *drm_property_create_range(struct drm_device *dev, int flags,
 					 const char *name,
 					 uint64_t min, uint64_t max);
+struct drm_property *drm_property_create_signed_range(struct drm_device *dev,
+					 int flags, const char *name,
+					 int64_t min, int64_t max);
 struct drm_property *drm_property_create_object(struct drm_device *dev,
 					 int flags, const char *name, uint32_t type);
 extern void drm_property_destroy(struct drm_device *dev, struct drm_property *property);
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 57f46348befe..def54f9e07ca 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -265,6 +265,7 @@ struct drm_mode_get_connector {
 #define DRM_MODE_PROP_EXTENDED_TYPE	0x0000ffc0
 #define DRM_MODE_PROP_TYPE(n)		((n) << 6)
 #define DRM_MODE_PROP_OBJECT		DRM_MODE_PROP_TYPE(1)
+#define DRM_MODE_PROP_SIGNED_RANGE	DRM_MODE_PROP_TYPE(2)
 
 struct drm_mode_property_enum {
 	__u64 value;
-- 
cgit v1.2.3-71-gd317


From 6e9f798d91c526982cca0026cd451e8fdbf18aaf Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 29 May 2014 23:54:47 +0200
Subject: drm: Split connection_mutex out of mode_config.mutex (v3)

After the split-out of crtc locks from the big mode_config.mutex
there's still two major areas it protects:
- Various connector probe states, like connector->status, EDID
  properties, probed mode lists and similar information.
- The links from connector->encoder and encoder->crtc and other
  modeset-relevant connector state (e.g. properties which control the
  panel fitter).

The later is used by modeset operations. But they don't really care
about the former since it's allowed to e.g. enable a disconnected VGA
output or with a mode not in the probed list.

Thus far this hasn't been a problem, but for the atomic modeset
conversion Rob Clark needs to convert all modeset relevant locks into
w/w locks. This is required because the order of acquisition is
determined by how userspace supplies the atomic modeset data. This has
run into troubles in the detect path since the i915 load detect code
needs _both_ protections offered by the mode_config.mutex: It updates
probe state and it needs to change the modeset configuration to enable
the temporary load detect pipe.

The big deal here is that for the probe/detect users of this lock a
plain mutex fits best, but for atomic modesets we really want a w/w
mutex. To fix this lets split out a new connection_mutex lock for the
modeset relevant parts.

For simplicity I've decided to only add one additional lock for all
connector/encoder links and modeset configuration states. We have
piles of different modeset objects in addition to those (like bridges
or panels), so adding per-object locks would be much more effort.

Also, we're guaranteed (at least for now) to do a full modeset if we
need to acquire this lock. Which means that fine-grained locking is
fairly irrelevant compared to the amount of time the full modeset will
take.

I've done a full audit, and there's just a few things that justify
special focus:
- Locking in drm_sysfs.c is almost completely absent. We should
  sprinkle mode_config.connection_mutex over this file a bit, but
  since it already lacks mode_config.mutex this patch wont make the
  situation any worse. This is material for a follow-up patch.

- omap has a omap_framebuffer_flush function which walks the
  connector->encoder->crtc links and is called from many contexts.
  Some look like they don't acquire mode_config.mutex, so this is
  already racy. Again fixing this is material for a separate patch.

- The radeon hot_plug function to retrain DP links looks at
  connector->dpms. Currently this happens without any locking, so is
  already racy. I think radeon_hotplug_work_func should gain
  mutex_lock/unlock calls for the mode_config.connection_mutex.

- Same applies to i915's intel_dp_hot_plug. But again, this is already
  racy.

- i915 load_detect code needs to acquire this lock. Which means the
  w/w dance due to Rob's work will be nicely contained to _just_ this
  function.

I've added fixme comments everywhere where it looks suspicious but in
the sysfs code. After a quick irc discussion with Dave Airlie it
sounds like the lack of locking in there is due to sysfs cleanup fun
at module unload.

v1: original (only compile tested)

v2: missing mutex_init(), etc (from Rob Clark)

v3: i915 needs more care in the conversion:
- Protect the edp pp logic with the connection_mutex.
- Use connection_mutex in the backlight code due to
  get_pipe_from_connector.
- Use drm_modeset_lock_all in suspend/resume paths.
- Update lock checks in the overlay code.

Cc: Alex Deucher <alexdeucher@gmail.com>
Cc: Rob Clark <robdclark@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/drm_crtc.c                 |  8 ++++++++
 drivers/gpu/drm/drm_crtc_helper.c          |  2 ++
 drivers/gpu/drm/drm_edid.c                 |  2 ++
 drivers/gpu/drm/drm_plane_helper.c         |  7 +++++++
 drivers/gpu/drm/i915/i915_debugfs.c        |  4 ++--
 drivers/gpu/drm/i915/i915_drv.c            |  6 ++----
 drivers/gpu/drm/i915/intel_display.c       | 16 ++++++++++++----
 drivers/gpu/drm/i915/intel_dp.c            | 15 ++++++++-------
 drivers/gpu/drm/i915/intel_opregion.c      |  4 ++--
 drivers/gpu/drm/i915/intel_overlay.c       |  4 ++--
 drivers/gpu/drm/i915/intel_panel.c         |  8 ++++----
 drivers/gpu/drm/omapdrm/omap_fb.c          |  1 +
 drivers/gpu/drm/radeon/radeon_connectors.c |  1 +
 include/drm/drm_crtc.h                     |  1 +
 14 files changed, 54 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 5b3e2920c376..f3b98d4b6f46 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -54,6 +54,8 @@ void drm_modeset_lock_all(struct drm_device *dev)
 
 	mutex_lock(&dev->mode_config.mutex);
 
+	mutex_lock(&dev->mode_config.connection_mutex);
+
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
 		mutex_lock_nest_lock(&crtc->mutex, &dev->mode_config.mutex);
 }
@@ -72,6 +74,8 @@ void drm_modeset_unlock_all(struct drm_device *dev)
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
 		mutex_unlock(&crtc->mutex);
 
+	mutex_unlock(&dev->mode_config.connection_mutex);
+
 	mutex_unlock(&dev->mode_config.mutex);
 }
 EXPORT_SYMBOL(drm_modeset_unlock_all);
@@ -93,6 +97,7 @@ void drm_warn_on_modeset_not_all_locked(struct drm_device *dev)
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
 		WARN_ON(!mutex_is_locked(&crtc->mutex));
 
+	WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
 	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
 }
 EXPORT_SYMBOL(drm_warn_on_modeset_not_all_locked);
@@ -1793,6 +1798,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 	DRM_DEBUG_KMS("[CONNECTOR:%d:?]\n", out_resp->connector_id);
 
 	mutex_lock(&dev->mode_config.mutex);
+	mutex_lock(&dev->mode_config.connection_mutex);
 
 	connector = drm_connector_find(dev, out_resp->connector_id);
 	if (!connector) {
@@ -1891,6 +1897,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 	out_resp->count_encoders = encoders_count;
 
 out:
+	mutex_unlock(&dev->mode_config.connection_mutex);
 	mutex_unlock(&dev->mode_config.mutex);
 
 	return ret;
@@ -4640,6 +4647,7 @@ EXPORT_SYMBOL(drm_format_vert_chroma_subsampling);
 void drm_mode_config_init(struct drm_device *dev)
 {
 	mutex_init(&dev->mode_config.mutex);
+	mutex_init(&dev->mode_config.connection_mutex);
 	mutex_init(&dev->mode_config.idr_mutex);
 	mutex_init(&dev->mode_config.fb_lock);
 	INIT_LIST_HEAD(&dev->mode_config.fb_list);
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 231c9433341f..b55d27c872f6 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -89,6 +89,8 @@ bool drm_helper_encoder_in_use(struct drm_encoder *encoder)
 	struct drm_device *dev = encoder->dev;
 
 	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
+	WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
+
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
 		if (connector->encoder == encoder)
 			return true;
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 085e5550aac5..7be21781d3bd 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -3304,6 +3304,8 @@ struct drm_connector *drm_select_eld(struct drm_encoder *encoder,
 	struct drm_connector *connector;
 	struct drm_device *dev = encoder->dev;
 
+	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
+
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
 		if (connector->encoder == encoder && connector->eld[0])
 			return connector;
diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c
index d966afa7ecae..458d9bf09209 100644
--- a/drivers/gpu/drm/drm_plane_helper.c
+++ b/drivers/gpu/drm/drm_plane_helper.c
@@ -54,6 +54,13 @@ static int get_connectors_for_crtc(struct drm_crtc *crtc,
 	struct drm_connector *connector;
 	int count = 0;
 
+	/*
+	 * Note: Once we change the plane hooks to more fine-grained locking we
+	 * need to grab the connection_mutex here to be able to make these
+	 * checks.
+	 */
+	WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
+
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
 		if (connector->encoder && connector->encoder->crtc == crtc) {
 			if (connector_list != NULL && count < num_connectors)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 36f4c15f538b..169fc2d8c554 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2609,7 +2609,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_device *dev, enum pipe pipe,
 
 	*source = INTEL_PIPE_CRC_SOURCE_PIPE;
 
-	mutex_lock(&dev->mode_config.mutex);
+	drm_modeset_lock_all(dev);
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
 			    base.head) {
 		if (!encoder->base.crtc)
@@ -2645,7 +2645,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_device *dev, enum pipe pipe,
 			break;
 		}
 	}
-	mutex_unlock(&dev->mode_config.mutex);
+	drm_modeset_unlock_all(dev);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 4619c9e51907..5b5b82c3f570 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -533,13 +533,11 @@ static int i915_drm_freeze(struct drm_device *dev)
 		 * Disable CRTCs directly since we want to preserve sw state
 		 * for _thaw.
 		 */
-		mutex_lock(&dev->mode_config.mutex);
+		drm_modeset_lock_all(dev);
 		for_each_crtc(dev, crtc) {
-			mutex_lock(&crtc->mutex);
 			dev_priv->display.crtc_disable(crtc);
-			mutex_unlock(&crtc->mutex);
 		}
-		mutex_unlock(&dev->mode_config.mutex);
+		drm_modeset_unlock_all(dev);
 
 		intel_modeset_suspend_hw(dev);
 	}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e80b2917e3cb..c2976ade823f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8323,6 +8323,8 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
 		      connector->base.id, connector->name,
 		      encoder->base.id, encoder->name);
 
+	mutex_lock(&dev->mode_config.connection_mutex);
+
 	/*
 	 * Algorithm gets a little messy:
 	 *
@@ -8365,7 +8367,7 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
 	 */
 	if (!crtc) {
 		DRM_DEBUG_KMS("no pipe available for load-detect\n");
-		return false;
+		goto fail_unlock_connector;
 	}
 
 	mutex_lock(&crtc->mutex);
@@ -8419,6 +8421,9 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
 	else
 		intel_crtc->new_config = NULL;
 	mutex_unlock(&crtc->mutex);
+fail_unlock_connector:
+	mutex_unlock(&dev->mode_config.connection_mutex);
+
 	return false;
 }
 
@@ -8448,6 +8453,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
 		}
 
 		mutex_unlock(&crtc->mutex);
+		mutex_unlock(&connector->dev->mode_config.connection_mutex);
 		return;
 	}
 
@@ -8456,6 +8462,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
 		connector->funcs->dpms(connector, old->dpms_mode);
 
 	mutex_unlock(&crtc->mutex);
+	mutex_unlock(&connector->dev->mode_config.connection_mutex);
 }
 
 static int i9xx_pll_refclk(struct drm_device *dev,
@@ -10986,8 +10993,9 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
 enum pipe intel_get_pipe_from_connector(struct intel_connector *connector)
 {
 	struct drm_encoder *encoder = connector->base.encoder;
+	struct drm_device *dev = connector->base.dev;
 
-	WARN_ON(!mutex_is_locked(&connector->base.dev->mode_config.mutex));
+	WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
 
 	if (!encoder)
 		return INVALID_PIPE;
@@ -11756,9 +11764,9 @@ void intel_modeset_init(struct drm_device *dev)
 	/* Just in case the BIOS is doing something questionable. */
 	intel_disable_fbc(dev);
 
-	mutex_lock(&dev->mode_config.mutex);
+	drm_modeset_lock_all(dev);
 	intel_modeset_setup_hw_state(dev, false);
-	mutex_unlock(&dev->mode_config.mutex);
+	drm_modeset_unlock_all(dev);
 
 	for_each_intel_crtc(dev, crtc) {
 		if (!crtc->active)
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index d94f0271e422..c4d883921282 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1154,7 +1154,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
 	u32 pp;
 	u32 pp_stat_reg, pp_ctrl_reg;
 
-	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
+	WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
 
 	if (!intel_dp->want_panel_vdd && edp_have_panel_vdd(intel_dp)) {
 		struct intel_digital_port *intel_dig_port =
@@ -1191,9 +1191,9 @@ static void edp_panel_vdd_work(struct work_struct *__work)
 						 struct intel_dp, panel_vdd_work);
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 
-	mutex_lock(&dev->mode_config.mutex);
+	mutex_lock(&dev->mode_config.connection_mutex);
 	edp_panel_vdd_off_sync(intel_dp);
-	mutex_unlock(&dev->mode_config.mutex);
+	mutex_unlock(&dev->mode_config.connection_mutex);
 }
 
 static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync)
@@ -3235,6 +3235,7 @@ intel_dp_check_link_status(struct intel_dp *intel_dp)
 	u8 sink_irq_vector;
 	u8 link_status[DP_LINK_STATUS_SIZE];
 
+	/* FIXME: This access isn't protected by any locks. */
 	if (!intel_encoder->connectors_active)
 		return;
 
@@ -3665,9 +3666,9 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 	drm_encoder_cleanup(encoder);
 	if (is_edp(intel_dp)) {
 		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
-		mutex_lock(&dev->mode_config.mutex);
+		mutex_lock(&dev->mode_config.connection_mutex);
 		edp_panel_vdd_off_sync(intel_dp);
-		mutex_unlock(&dev->mode_config.mutex);
+		mutex_unlock(&dev->mode_config.connection_mutex);
 	}
 	kfree(intel_dig_port);
 }
@@ -4246,9 +4247,9 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 		drm_dp_aux_unregister_i2c_bus(&intel_dp->aux);
 		if (is_edp(intel_dp)) {
 			cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
-			mutex_lock(&dev->mode_config.mutex);
+			mutex_lock(&dev->mode_config.connection_mutex);
 			edp_panel_vdd_off_sync(intel_dp);
-			mutex_unlock(&dev->mode_config.mutex);
+			mutex_unlock(&dev->mode_config.connection_mutex);
 		}
 		drm_sysfs_connector_remove(connector);
 		drm_connector_cleanup(connector);
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index acde2945eb8a..b812e9d39f38 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -410,7 +410,7 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
 	if (bclp > 255)
 		return ASLC_BACKLIGHT_FAILED;
 
-	mutex_lock(&dev->mode_config.mutex);
+	mutex_lock(&dev->mode_config.connection_mutex);
 
 	/*
 	 * Update backlight on all connectors that support backlight (usually
@@ -421,7 +421,7 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
 		intel_panel_set_backlight(intel_connector, bclp, 255);
 	iowrite32(DIV_ROUND_UP(bclp * 100, 255) | ASLE_CBLV_VALID, &asle->cblv);
 
-	mutex_unlock(&dev->mode_config.mutex);
+	mutex_unlock(&dev->mode_config.connection_mutex);
 
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 27eb820d1b19..e97ea33e0117 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -688,7 +688,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	u32 swidth, swidthsw, sheight, ostride;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(!mutex_is_locked(&dev->mode_config.mutex));
+	BUG_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
 	BUG_ON(!overlay);
 
 	ret = intel_overlay_release_old_vid(overlay);
@@ -793,7 +793,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
 	int ret;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(!mutex_is_locked(&dev->mode_config.mutex));
+	BUG_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
 
 	ret = intel_overlay_recover_from_interrupt(overlay);
 	if (ret != 0)
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 3fb3f939dca5..d4d415665475 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -876,12 +876,12 @@ static int intel_backlight_device_update_status(struct backlight_device *bd)
 	struct intel_connector *connector = bl_get_data(bd);
 	struct drm_device *dev = connector->base.dev;
 
-	mutex_lock(&dev->mode_config.mutex);
+	mutex_lock(&dev->mode_config.connection_mutex);
 	DRM_DEBUG_KMS("updating intel_backlight, brightness=%d/%d\n",
 		      bd->props.brightness, bd->props.max_brightness);
 	intel_panel_set_backlight(connector, bd->props.brightness,
 				  bd->props.max_brightness);
-	mutex_unlock(&dev->mode_config.mutex);
+	mutex_unlock(&dev->mode_config.connection_mutex);
 	return 0;
 }
 
@@ -893,9 +893,9 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd)
 	int ret;
 
 	intel_runtime_pm_get(dev_priv);
-	mutex_lock(&dev->mode_config.mutex);
+	mutex_lock(&dev->mode_config.connection_mutex);
 	ret = intel_panel_get_backlight(connector);
-	mutex_unlock(&dev->mode_config.mutex);
+	mutex_unlock(&dev->mode_config.connection_mutex);
 	intel_runtime_pm_put(dev_priv);
 
 	return ret;
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c
index 8b019602ffe6..2a5cacdc344b 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c
@@ -346,6 +346,7 @@ void omap_framebuffer_flush(struct drm_framebuffer *fb,
 
 	VERB("flush: %d,%d %dx%d, fb=%p", x, y, w, h, fb);
 
+	/* FIXME: This is racy - no protection against modeset config changes. */
 	while ((connector = omap_framebuffer_get_next_connector(fb, connector))) {
 		/* only consider connectors that are part of a chain */
 		if (connector->encoder && connector->encoder->crtc) {
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 31d4cf60bae3..4522f7dce653 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -48,6 +48,7 @@ void radeon_connector_hotplug(struct drm_connector *connector)
 	radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
 
 	/* if the connector is already off, don't turn it back on */
+	/* FIXME: This access isn't protected by any locks. */
 	if (connector->dpms != DRM_MODE_DPMS_ON)
 		return;
 
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 849cfdccff09..6c295df7b0df 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -738,6 +738,7 @@ struct drm_mode_group {
  */
 struct drm_mode_config {
 	struct mutex mutex; /* protects configuration (mode lists etc.) */
+	struct mutex connection_mutex; /* protects connector->encoder and encoder->crtc links */
 	struct mutex idr_mutex; /* for IDR management */
 	struct idr crtc_idr; /* use this idr for all IDs, fb, crtc, connector, modes - just makes life easier */
 	/* this is limited to one for now */
-- 
cgit v1.2.3-71-gd317


From bfec07d0f8ed78b10df3ca3bc23e27de1166ea45 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Wed, 28 May 2014 08:56:09 +0800
Subject: f2fs: avoid overflow when large directory feathure is enabled

When large directory feathure is enable, We have one case which could cause
overflow in dir_buckets() as following:
special case: level + dir_level >= 32 and level < MAX_DIR_HASH_DEPTH / 2.

Here we define MAX_DIR_BUCKETS to limit the return value when the condition
could trigger potential overflow.

Changes from V1
 o modify description of calculation in f2fs.txt suggested by Changman Lee.

Suggested-by: Changman Lee <cm224.lee@samsung.com>
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.txt | 8 ++++----
 fs/f2fs/dir.c                      | 4 ++--
 include/linux/f2fs_fs.h            | 3 +++
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 25311e113e75..51afba17bbae 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -461,11 +461,11 @@ The number of blocks and buckets are determined by,
   # of blocks in level #n = |
                             `- 4, Otherwise
 
-                             ,- 2^ (n + dir_level),
-			     |            if n < MAX_DIR_HASH_DEPTH / 2,
+                             ,- 2^(n + dir_level),
+			     |        if n + dir_level < MAX_DIR_HASH_DEPTH / 2,
   # of buckets in level #n = |
-                             `- 2^((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1),
-			                  Otherwise
+                             `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1),
+			              Otherwise
 
 When F2FS finds a file name in a directory, at first a hash value of the file
 name is calculated. Then, F2FS scans the hash table in level #0 to find the
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index c3f148555c37..966acb039e3b 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -23,10 +23,10 @@ static unsigned long dir_blocks(struct inode *inode)
 
 static unsigned int dir_buckets(unsigned int level, int dir_level)
 {
-	if (level < MAX_DIR_HASH_DEPTH / 2)
+	if (level + dir_level < MAX_DIR_HASH_DEPTH / 2)
 		return 1 << (level + dir_level);
 	else
-		return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1);
+		return MAX_DIR_BUCKETS;
 }
 
 static unsigned int bucket_blocks(unsigned int level)
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 8c03f71307c6..ba6f3127738f 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -394,6 +394,9 @@ typedef __le32	f2fs_hash_t;
 /* MAX level for dir lookup */
 #define MAX_DIR_HASH_DEPTH	63
 
+/* MAX buckets in one level of dir */
+#define MAX_DIR_BUCKETS		(1 << ((MAX_DIR_HASH_DEPTH / 2) - 1))
+
 #define SIZE_OF_DIR_ENTRY	11	/* by byte */
 #define SIZE_OF_DENTRY_BITMAP	((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
 					BITS_PER_BYTE)
-- 
cgit v1.2.3-71-gd317


From 1dbe4152168d44fa164edbdc9f1243de70b98f7a Mon Sep 17 00:00:00 2001
From: Changman Lee <cm224.lee@samsung.com>
Date: Mon, 12 May 2014 12:27:43 +0900
Subject: f2fs: large volume support

f2fs's cp has one page which consists of struct f2fs_checkpoint and
version bitmap of sit and nat. To support lots of segments, we need more
blocks for sit bitmap. So let's arrange sit bitmap as following:
+-----------------+------------+
| f2fs_checkpoint | sit bitmap |
| + nat bitmap    |            |
+-----------------+------------+
0                 4k        N blocks

Signed-off-by: Changman Lee <cm224.lee@samsung.com>
[Jaegeuk Kim: simple code change for readability]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c    | 45 ++++++++++++++++++++++++++++++++++++++++-----
 fs/f2fs/f2fs.h          | 13 +++++++++++--
 include/linux/f2fs_fs.h |  2 ++
 3 files changed, 53 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index fe968c7bfc90..ecba8da3308b 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -371,7 +371,9 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
 		return;
 
 	sbi->por_doing = true;
-	start_blk = __start_cp_addr(sbi) + 1;
+
+	start_blk = __start_cp_addr(sbi) + 1 +
+		le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
 	orphan_blkaddr = __start_sum_addr(sbi) - 1;
 
 	ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
@@ -512,8 +514,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
 	unsigned long blk_size = sbi->blocksize;
 	unsigned long long cp1_version = 0, cp2_version = 0;
 	unsigned long long cp_start_blk_no;
+	unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+	block_t cp_blk_no;
+	int i;
 
-	sbi->ckpt = kzalloc(blk_size, GFP_KERNEL);
+	sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
 	if (!sbi->ckpt)
 		return -ENOMEM;
 	/*
@@ -544,6 +549,23 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
 	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
 	memcpy(sbi->ckpt, cp_block, blk_size);
 
+	if (cp_blks <= 1)
+		goto done;
+
+	cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
+	if (cur_page == cp2)
+		cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
+
+	for (i = 1; i < cp_blks; i++) {
+		void *sit_bitmap_ptr;
+		unsigned char *ckpt = (unsigned char *)sbi->ckpt;
+
+		cur_page = get_meta_page(sbi, cp_blk_no + i);
+		sit_bitmap_ptr = page_address(cur_page);
+		memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
+		f2fs_put_page(cur_page, 1);
+	}
+done:
 	f2fs_put_page(cp1, 1);
 	f2fs_put_page(cp2, 1);
 	return 0;
@@ -736,6 +758,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	__u32 crc32 = 0;
 	void *kaddr;
 	int i;
+	int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
 
 	/*
 	 * This avoids to conduct wrong roll-forward operations and uses
@@ -786,16 +809,19 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 
 	orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
 					/ F2FS_ORPHANS_PER_BLOCK;
-	ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
+	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
+			orphan_blocks);
 
 	if (is_umount) {
 		set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
-			data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
+				cp_payload_blks + data_sum_blocks +
+				orphan_blocks + NR_CURSEG_NODE_TYPE);
 	} else {
 		clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
-			data_sum_blocks + orphan_blocks);
+				cp_payload_blks + data_sum_blocks +
+				orphan_blocks);
 	}
 
 	if (sbi->n_orphans)
@@ -821,6 +847,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	set_page_dirty(cp_page);
 	f2fs_put_page(cp_page, 1);
 
+	for (i = 1; i < 1 + cp_payload_blks; i++) {
+		cp_page = grab_meta_page(sbi, start_blk++);
+		kaddr = page_address(cp_page);
+		memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE,
+				(1 << sbi->log_blocksize));
+		set_page_dirty(cp_page);
+		f2fs_put_page(cp_page, 1);
+	}
+
 	if (sbi->n_orphans) {
 		write_orphan_inodes(sbi, start_blk);
 		start_blk += orphan_blocks;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 676a2c6ccec7..9684b1f77a7d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -764,9 +764,18 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
 static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
-	int offset = (flag == NAT_BITMAP) ?
+	int offset;
+
+	if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) {
+		if (flag == NAT_BITMAP)
+			return &ckpt->sit_nat_version_bitmap;
+		else
+			return ((unsigned char *)ckpt + F2FS_BLKSIZE);
+	} else {
+		offset = (flag == NAT_BITMAP) ?
 			le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
-	return &ckpt->sit_nat_version_bitmap + offset;
+		return &ckpt->sit_nat_version_bitmap + offset;
+	}
 }
 
 static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index ba6f3127738f..6ff0b0b42d47 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -19,6 +19,7 @@
 #define F2FS_LOG_SECTORS_PER_BLOCK	3	/* 4KB: F2FS_BLKSIZE */
 #define F2FS_BLKSIZE			4096	/* support only 4KB block */
 #define F2FS_MAX_EXTENSION		64	/* # of extension entries */
+#define F2FS_BLK_ALIGN(x)	(((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
 
 #define NULL_ADDR		((block_t)0)	/* used as block_t addresses */
 #define NEW_ADDR		((block_t)-1)	/* used as block_t addresses */
@@ -75,6 +76,7 @@ struct f2fs_super_block {
 	__le16 volume_name[512];	/* volume name */
 	__le32 extension_count;		/* # of extensions below */
 	__u8 extension_list[F2FS_MAX_EXTENSION][8];	/* extension array */
+	__le32 cp_payload;
 } __packed;
 
 /*
-- 
cgit v1.2.3-71-gd317


From 39b9004d1f626b88b775c7655d3f286e135dfec6 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Mon, 30 Sep 2013 16:13:39 +0200
Subject: gpu: ipu-v3: Move i.MX IPUv3 core driver out of staging

The i.MX Image Processing Unit (IPU) contains a number of image processing
blocks that sit right in the middle between DRM and V4L2. Some of the modules,
such as Display Controller, Processor, and Interface (DC, DP, DI) or CMOS
Sensor Interface (CSI) and their FIFOs could be assigned to either framework,
but others, such as the dma controller (IDMAC) and image converter (IC) can
be used by both.
The IPUv3 core driver provides an internal API to access the modules, to be
used by both DRM and V4L2 IPUv3 drivers.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/Makefile                        |    1 +
 drivers/gpu/ipu-v3/Kconfig                  |    7 +
 drivers/gpu/ipu-v3/Makefile                 |    3 +
 drivers/gpu/ipu-v3/ipu-common.c             | 1261 +++++++++++++++++++++++++++
 drivers/gpu/ipu-v3/ipu-dc.c                 |  411 +++++++++
 drivers/gpu/ipu-v3/ipu-di.c                 |  730 ++++++++++++++++
 drivers/gpu/ipu-v3/ipu-dmfc.c               |  415 +++++++++
 drivers/gpu/ipu-v3/ipu-dp.c                 |  338 +++++++
 drivers/gpu/ipu-v3/ipu-prv.h                |  206 +++++
 drivers/staging/imx-drm/Kconfig             |   11 +-
 drivers/staging/imx-drm/Makefile            |    1 -
 drivers/staging/imx-drm/imx-hdmi.c          |    2 +-
 drivers/staging/imx-drm/imx-tve.c           |    2 +-
 drivers/staging/imx-drm/ipu-v3/Makefile     |    3 -
 drivers/staging/imx-drm/ipu-v3/imx-ipu-v3.h |  326 -------
 drivers/staging/imx-drm/ipu-v3/ipu-common.c | 1261 ---------------------------
 drivers/staging/imx-drm/ipu-v3/ipu-dc.c     |  412 ---------
 drivers/staging/imx-drm/ipu-v3/ipu-di.c     |  730 ----------------
 drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c   |  415 ---------
 drivers/staging/imx-drm/ipu-v3/ipu-dp.c     |  338 -------
 drivers/staging/imx-drm/ipu-v3/ipu-prv.h    |  206 -----
 drivers/staging/imx-drm/ipuv3-crtc.c        |    2 +-
 drivers/staging/imx-drm/ipuv3-plane.c       |    2 +-
 drivers/video/Kconfig                       |    1 +
 include/video/imx-ipu-v3.h                  |  326 +++++++
 25 files changed, 3704 insertions(+), 3706 deletions(-)
 create mode 100644 drivers/gpu/ipu-v3/Kconfig
 create mode 100644 drivers/gpu/ipu-v3/Makefile
 create mode 100644 drivers/gpu/ipu-v3/ipu-common.c
 create mode 100644 drivers/gpu/ipu-v3/ipu-dc.c
 create mode 100644 drivers/gpu/ipu-v3/ipu-di.c
 create mode 100644 drivers/gpu/ipu-v3/ipu-dmfc.c
 create mode 100644 drivers/gpu/ipu-v3/ipu-dp.c
 create mode 100644 drivers/gpu/ipu-v3/ipu-prv.h
 delete mode 100644 drivers/staging/imx-drm/ipu-v3/Makefile
 delete mode 100644 drivers/staging/imx-drm/ipu-v3/imx-ipu-v3.h
 delete mode 100644 drivers/staging/imx-drm/ipu-v3/ipu-common.c
 delete mode 100644 drivers/staging/imx-drm/ipu-v3/ipu-dc.c
 delete mode 100644 drivers/staging/imx-drm/ipu-v3/ipu-di.c
 delete mode 100644 drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c
 delete mode 100644 drivers/staging/imx-drm/ipu-v3/ipu-dp.c
 delete mode 100644 drivers/staging/imx-drm/ipu-v3/ipu-prv.h
 create mode 100644 include/video/imx-ipu-v3.h

(limited to 'include')

diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index d8a22c2a579d..70da9eb52a42 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -1,2 +1,3 @@
 obj-y			+= drm/ vga/
 obj-$(CONFIG_TEGRA_HOST1X)	+= host1x/
+obj-$(CONFIG_IMX_IPUV3_CORE)	+= ipu-v3/
diff --git a/drivers/gpu/ipu-v3/Kconfig b/drivers/gpu/ipu-v3/Kconfig
new file mode 100644
index 000000000000..2f228a2f2a48
--- /dev/null
+++ b/drivers/gpu/ipu-v3/Kconfig
@@ -0,0 +1,7 @@
+config IMX_IPUV3_CORE
+	tristate "IPUv3 core support"
+	depends on SOC_IMX5 || SOC_IMX6Q || SOC_IMX6SL || ARCH_MULTIPLATFORM
+	depends on RESET_CONTROLLER
+	help
+	  Choose this if you have a i.MX5/6 system and want to use the Image
+	  Processing Unit. This option only enables IPU base support.
diff --git a/drivers/gpu/ipu-v3/Makefile b/drivers/gpu/ipu-v3/Makefile
new file mode 100644
index 000000000000..d21cc37d0498
--- /dev/null
+++ b/drivers/gpu/ipu-v3/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_IMX_IPUV3_CORE) += imx-ipu-v3.o
+
+imx-ipu-v3-objs := ipu-common.o ipu-dc.o ipu-di.o ipu-dp.o ipu-dmfc.o
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
new file mode 100644
index 000000000000..7e1f614e7fbc
--- /dev/null
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -0,0 +1,1261 @@
+/*
+ * Copyright (c) 2010 Sascha Hauer <s.hauer@pengutronix.de>
+ * Copyright (C) 2005-2009 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/reset.h>
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/list.h>
+#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_device.h>
+
+#include <drm/drm_fourcc.h>
+
+#include <video/imx-ipu-v3.h>
+#include "ipu-prv.h"
+
+static inline u32 ipu_cm_read(struct ipu_soc *ipu, unsigned offset)
+{
+	return readl(ipu->cm_reg + offset);
+}
+
+static inline void ipu_cm_write(struct ipu_soc *ipu, u32 value, unsigned offset)
+{
+	writel(value, ipu->cm_reg + offset);
+}
+
+static inline u32 ipu_idmac_read(struct ipu_soc *ipu, unsigned offset)
+{
+	return readl(ipu->idmac_reg + offset);
+}
+
+static inline void ipu_idmac_write(struct ipu_soc *ipu, u32 value,
+		unsigned offset)
+{
+	writel(value, ipu->idmac_reg + offset);
+}
+
+void ipu_srm_dp_sync_update(struct ipu_soc *ipu)
+{
+	u32 val;
+
+	val = ipu_cm_read(ipu, IPU_SRM_PRI2);
+	val |= 0x8;
+	ipu_cm_write(ipu, val, IPU_SRM_PRI2);
+}
+EXPORT_SYMBOL_GPL(ipu_srm_dp_sync_update);
+
+struct ipu_ch_param __iomem *ipu_get_cpmem(struct ipuv3_channel *channel)
+{
+	struct ipu_soc *ipu = channel->ipu;
+
+	return ipu->cpmem_base + channel->num;
+}
+EXPORT_SYMBOL_GPL(ipu_get_cpmem);
+
+void ipu_cpmem_set_high_priority(struct ipuv3_channel *channel)
+{
+	struct ipu_soc *ipu = channel->ipu;
+	struct ipu_ch_param __iomem *p = ipu_get_cpmem(channel);
+	u32 val;
+
+	if (ipu->ipu_type == IPUV3EX)
+		ipu_ch_param_write_field(p, IPU_FIELD_ID, 1);
+
+	val = ipu_idmac_read(ipu, IDMAC_CHA_PRI(channel->num));
+	val |= 1 << (channel->num % 32);
+	ipu_idmac_write(ipu, val, IDMAC_CHA_PRI(channel->num));
+};
+EXPORT_SYMBOL_GPL(ipu_cpmem_set_high_priority);
+
+void ipu_ch_param_write_field(struct ipu_ch_param __iomem *base, u32 wbs, u32 v)
+{
+	u32 bit = (wbs >> 8) % 160;
+	u32 size = wbs & 0xff;
+	u32 word = (wbs >> 8) / 160;
+	u32 i = bit / 32;
+	u32 ofs = bit % 32;
+	u32 mask = (1 << size) - 1;
+	u32 val;
+
+	pr_debug("%s %d %d %d\n", __func__, word, bit , size);
+
+	val = readl(&base->word[word].data[i]);
+	val &= ~(mask << ofs);
+	val |= v << ofs;
+	writel(val, &base->word[word].data[i]);
+
+	if ((bit + size - 1) / 32 > i) {
+		val = readl(&base->word[word].data[i + 1]);
+		val &= ~(mask >> (ofs ? (32 - ofs) : 0));
+		val |= v >> (ofs ? (32 - ofs) : 0);
+		writel(val, &base->word[word].data[i + 1]);
+	}
+}
+EXPORT_SYMBOL_GPL(ipu_ch_param_write_field);
+
+u32 ipu_ch_param_read_field(struct ipu_ch_param __iomem *base, u32 wbs)
+{
+	u32 bit = (wbs >> 8) % 160;
+	u32 size = wbs & 0xff;
+	u32 word = (wbs >> 8) / 160;
+	u32 i = bit / 32;
+	u32 ofs = bit % 32;
+	u32 mask = (1 << size) - 1;
+	u32 val = 0;
+
+	pr_debug("%s %d %d %d\n", __func__, word, bit , size);
+
+	val = (readl(&base->word[word].data[i]) >> ofs) & mask;
+
+	if ((bit + size - 1) / 32 > i) {
+		u32 tmp;
+		tmp = readl(&base->word[word].data[i + 1]);
+		tmp &= mask >> (ofs ? (32 - ofs) : 0);
+		val |= tmp << (ofs ? (32 - ofs) : 0);
+	}
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(ipu_ch_param_read_field);
+
+int ipu_cpmem_set_format_rgb(struct ipu_ch_param __iomem *p,
+		const struct ipu_rgb *rgb)
+{
+	int bpp = 0, npb = 0, ro, go, bo, to;
+
+	ro = rgb->bits_per_pixel - rgb->red.length - rgb->red.offset;
+	go = rgb->bits_per_pixel - rgb->green.length - rgb->green.offset;
+	bo = rgb->bits_per_pixel - rgb->blue.length - rgb->blue.offset;
+	to = rgb->bits_per_pixel - rgb->transp.length - rgb->transp.offset;
+
+	ipu_ch_param_write_field(p, IPU_FIELD_WID0, rgb->red.length - 1);
+	ipu_ch_param_write_field(p, IPU_FIELD_OFS0, ro);
+	ipu_ch_param_write_field(p, IPU_FIELD_WID1, rgb->green.length - 1);
+	ipu_ch_param_write_field(p, IPU_FIELD_OFS1, go);
+	ipu_ch_param_write_field(p, IPU_FIELD_WID2, rgb->blue.length - 1);
+	ipu_ch_param_write_field(p, IPU_FIELD_OFS2, bo);
+
+	if (rgb->transp.length) {
+		ipu_ch_param_write_field(p, IPU_FIELD_WID3,
+				rgb->transp.length - 1);
+		ipu_ch_param_write_field(p, IPU_FIELD_OFS3, to);
+	} else {
+		ipu_ch_param_write_field(p, IPU_FIELD_WID3, 7);
+		ipu_ch_param_write_field(p, IPU_FIELD_OFS3,
+				rgb->bits_per_pixel);
+	}
+
+	switch (rgb->bits_per_pixel) {
+	case 32:
+		bpp = 0;
+		npb = 15;
+		break;
+	case 24:
+		bpp = 1;
+		npb = 19;
+		break;
+	case 16:
+		bpp = 3;
+		npb = 31;
+		break;
+	case 8:
+		bpp = 5;
+		npb = 63;
+		break;
+	default:
+		return -EINVAL;
+	}
+	ipu_ch_param_write_field(p, IPU_FIELD_BPP, bpp);
+	ipu_ch_param_write_field(p, IPU_FIELD_NPB, npb);
+	ipu_ch_param_write_field(p, IPU_FIELD_PFS, 7); /* rgb mode */
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_cpmem_set_format_rgb);
+
+int ipu_cpmem_set_format_passthrough(struct ipu_ch_param __iomem *p,
+		int width)
+{
+	int bpp = 0, npb = 0;
+
+	switch (width) {
+	case 32:
+		bpp = 0;
+		npb = 15;
+		break;
+	case 24:
+		bpp = 1;
+		npb = 19;
+		break;
+	case 16:
+		bpp = 3;
+		npb = 31;
+		break;
+	case 8:
+		bpp = 5;
+		npb = 63;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ipu_ch_param_write_field(p, IPU_FIELD_BPP, bpp);
+	ipu_ch_param_write_field(p, IPU_FIELD_NPB, npb);
+	ipu_ch_param_write_field(p, IPU_FIELD_PFS, 6); /* raw mode */
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_cpmem_set_format_passthrough);
+
+void ipu_cpmem_set_yuv_interleaved(struct ipu_ch_param __iomem *p,
+				   u32 pixel_format)
+{
+	switch (pixel_format) {
+	case V4L2_PIX_FMT_UYVY:
+		ipu_ch_param_write_field(p, IPU_FIELD_BPP, 3);    /* bits/pixel */
+		ipu_ch_param_write_field(p, IPU_FIELD_PFS, 0xA);  /* pix format */
+		ipu_ch_param_write_field(p, IPU_FIELD_NPB, 31);   /* burst size */
+		break;
+	case V4L2_PIX_FMT_YUYV:
+		ipu_ch_param_write_field(p, IPU_FIELD_BPP, 3);    /* bits/pixel */
+		ipu_ch_param_write_field(p, IPU_FIELD_PFS, 0x8);  /* pix format */
+		ipu_ch_param_write_field(p, IPU_FIELD_NPB, 31);   /* burst size */
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(ipu_cpmem_set_yuv_interleaved);
+
+void ipu_cpmem_set_yuv_planar_full(struct ipu_ch_param __iomem *p,
+		u32 pixel_format, int stride, int u_offset, int v_offset)
+{
+	switch (pixel_format) {
+	case V4L2_PIX_FMT_YUV420:
+		ipu_ch_param_write_field(p, IPU_FIELD_SLUV, (stride / 2) - 1);
+		ipu_ch_param_write_field(p, IPU_FIELD_UBO, u_offset / 8);
+		ipu_ch_param_write_field(p, IPU_FIELD_VBO, v_offset / 8);
+		break;
+	case V4L2_PIX_FMT_YVU420:
+		ipu_ch_param_write_field(p, IPU_FIELD_SLUV, (stride / 2) - 1);
+		ipu_ch_param_write_field(p, IPU_FIELD_UBO, v_offset / 8);
+		ipu_ch_param_write_field(p, IPU_FIELD_VBO, u_offset / 8);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(ipu_cpmem_set_yuv_planar_full);
+
+void ipu_cpmem_set_yuv_planar(struct ipu_ch_param __iomem *p, u32 pixel_format,
+		int stride, int height)
+{
+	int u_offset, v_offset;
+	int uv_stride = 0;
+
+	switch (pixel_format) {
+	case V4L2_PIX_FMT_YUV420:
+	case V4L2_PIX_FMT_YVU420:
+		uv_stride = stride / 2;
+		u_offset = stride * height;
+		v_offset = u_offset + (uv_stride * height / 2);
+		ipu_cpmem_set_yuv_planar_full(p, pixel_format, stride,
+				u_offset, v_offset);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(ipu_cpmem_set_yuv_planar);
+
+static const struct ipu_rgb def_rgb_32 = {
+	.red	= { .offset = 16, .length = 8, },
+	.green	= { .offset =  8, .length = 8, },
+	.blue	= { .offset =  0, .length = 8, },
+	.transp = { .offset = 24, .length = 8, },
+	.bits_per_pixel = 32,
+};
+
+static const struct ipu_rgb def_bgr_32 = {
+	.red	= { .offset =  0, .length = 8, },
+	.green	= { .offset =  8, .length = 8, },
+	.blue	= { .offset = 16, .length = 8, },
+	.transp = { .offset = 24, .length = 8, },
+	.bits_per_pixel = 32,
+};
+
+static const struct ipu_rgb def_rgb_24 = {
+	.red	= { .offset = 16, .length = 8, },
+	.green	= { .offset =  8, .length = 8, },
+	.blue	= { .offset =  0, .length = 8, },
+	.transp = { .offset =  0, .length = 0, },
+	.bits_per_pixel = 24,
+};
+
+static const struct ipu_rgb def_bgr_24 = {
+	.red	= { .offset =  0, .length = 8, },
+	.green	= { .offset =  8, .length = 8, },
+	.blue	= { .offset = 16, .length = 8, },
+	.transp = { .offset =  0, .length = 0, },
+	.bits_per_pixel = 24,
+};
+
+static const struct ipu_rgb def_rgb_16 = {
+	.red	= { .offset = 11, .length = 5, },
+	.green	= { .offset =  5, .length = 6, },
+	.blue	= { .offset =  0, .length = 5, },
+	.transp = { .offset =  0, .length = 0, },
+	.bits_per_pixel = 16,
+};
+
+static const struct ipu_rgb def_bgr_16 = {
+	.red	= { .offset =  0, .length = 5, },
+	.green	= { .offset =  5, .length = 6, },
+	.blue	= { .offset = 11, .length = 5, },
+	.transp = { .offset =  0, .length = 0, },
+	.bits_per_pixel = 16,
+};
+
+#define Y_OFFSET(pix, x, y)	((x) + pix->width * (y))
+#define U_OFFSET(pix, x, y)	((pix->width * pix->height) + \
+					(pix->width * (y) / 4) + (x) / 2)
+#define V_OFFSET(pix, x, y)	((pix->width * pix->height) + \
+					(pix->width * pix->height / 4) + \
+					(pix->width * (y) / 4) + (x) / 2)
+
+int ipu_cpmem_set_fmt(struct ipu_ch_param __iomem *cpmem, u32 drm_fourcc)
+{
+	switch (drm_fourcc) {
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+		/* pix format */
+		ipu_ch_param_write_field(cpmem, IPU_FIELD_PFS, 2);
+		/* burst size */
+		ipu_ch_param_write_field(cpmem, IPU_FIELD_NPB, 63);
+		break;
+	case DRM_FORMAT_UYVY:
+		/* bits/pixel */
+		ipu_ch_param_write_field(cpmem, IPU_FIELD_BPP, 3);
+		/* pix format */
+		ipu_ch_param_write_field(cpmem, IPU_FIELD_PFS, 0xA);
+		/* burst size */
+		ipu_ch_param_write_field(cpmem, IPU_FIELD_NPB, 31);
+		break;
+	case DRM_FORMAT_YUYV:
+		/* bits/pixel */
+		ipu_ch_param_write_field(cpmem, IPU_FIELD_BPP, 3);
+		/* pix format */
+		ipu_ch_param_write_field(cpmem, IPU_FIELD_PFS, 0x8);
+		/* burst size */
+		ipu_ch_param_write_field(cpmem, IPU_FIELD_NPB, 31);
+		break;
+	case DRM_FORMAT_ABGR8888:
+	case DRM_FORMAT_XBGR8888:
+		ipu_cpmem_set_format_rgb(cpmem, &def_bgr_32);
+		break;
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_XRGB8888:
+		ipu_cpmem_set_format_rgb(cpmem, &def_rgb_32);
+		break;
+	case DRM_FORMAT_BGR888:
+		ipu_cpmem_set_format_rgb(cpmem, &def_bgr_24);
+		break;
+	case DRM_FORMAT_RGB888:
+		ipu_cpmem_set_format_rgb(cpmem, &def_rgb_24);
+		break;
+	case DRM_FORMAT_RGB565:
+		ipu_cpmem_set_format_rgb(cpmem, &def_rgb_16);
+		break;
+	case DRM_FORMAT_BGR565:
+		ipu_cpmem_set_format_rgb(cpmem, &def_bgr_16);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_cpmem_set_fmt);
+
+/*
+ * The V4L2 spec defines packed RGB formats in memory byte order, which from
+ * point of view of the IPU corresponds to little-endian words with the first
+ * component in the least significant bits.
+ * The DRM pixel formats and IPU internal representation are ordered the other
+ * way around, with the first named component ordered at the most significant
+ * bits. Further, V4L2 formats are not well defined:
+ *     http://linuxtv.org/downloads/v4l-dvb-apis/packed-rgb.html
+ * We choose the interpretation which matches GStreamer behavior.
+ */
+static int v4l2_pix_fmt_to_drm_fourcc(u32 pixelformat)
+{
+	switch (pixelformat) {
+	case V4L2_PIX_FMT_RGB565:
+		/*
+		 * Here we choose the 'corrected' interpretation of RGBP, a
+		 * little-endian 16-bit word with the red component at the most
+		 * significant bits:
+		 * g[2:0]b[4:0] r[4:0]g[5:3] <=> [16:0] R:G:B
+		 */
+		return DRM_FORMAT_RGB565;
+	case V4L2_PIX_FMT_BGR24:
+		/* B G R <=> [24:0] R:G:B */
+		return DRM_FORMAT_RGB888;
+	case V4L2_PIX_FMT_RGB24:
+		/* R G B <=> [24:0] B:G:R */
+		return DRM_FORMAT_BGR888;
+	case V4L2_PIX_FMT_BGR32:
+		/* B G R A <=> [32:0] A:B:G:R */
+		return DRM_FORMAT_XRGB8888;
+	case V4L2_PIX_FMT_RGB32:
+		/* R G B A <=> [32:0] A:B:G:R */
+		return DRM_FORMAT_XBGR8888;
+	case V4L2_PIX_FMT_UYVY:
+		return DRM_FORMAT_UYVY;
+	case V4L2_PIX_FMT_YUYV:
+		return DRM_FORMAT_YUYV;
+	case V4L2_PIX_FMT_YUV420:
+		return DRM_FORMAT_YUV420;
+	case V4L2_PIX_FMT_YVU420:
+		return DRM_FORMAT_YVU420;
+	}
+
+	return -EINVAL;
+}
+
+enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc)
+{
+	switch (drm_fourcc) {
+	case DRM_FORMAT_RGB565:
+	case DRM_FORMAT_BGR565:
+	case DRM_FORMAT_RGB888:
+	case DRM_FORMAT_BGR888:
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_RGBX8888:
+	case DRM_FORMAT_BGRX8888:
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_ABGR8888:
+	case DRM_FORMAT_RGBA8888:
+	case DRM_FORMAT_BGRA8888:
+		return IPUV3_COLORSPACE_RGB;
+	case DRM_FORMAT_YUYV:
+	case DRM_FORMAT_UYVY:
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+		return IPUV3_COLORSPACE_YUV;
+	default:
+		return IPUV3_COLORSPACE_UNKNOWN;
+	}
+}
+EXPORT_SYMBOL_GPL(ipu_drm_fourcc_to_colorspace);
+
+int ipu_cpmem_set_image(struct ipu_ch_param __iomem *cpmem,
+		struct ipu_image *image)
+{
+	struct v4l2_pix_format *pix = &image->pix;
+	int y_offset, u_offset, v_offset;
+
+	pr_debug("%s: resolution: %dx%d stride: %d\n",
+			__func__, pix->width, pix->height,
+			pix->bytesperline);
+
+	ipu_cpmem_set_resolution(cpmem, image->rect.width,
+			image->rect.height);
+	ipu_cpmem_set_stride(cpmem, pix->bytesperline);
+
+	ipu_cpmem_set_fmt(cpmem, v4l2_pix_fmt_to_drm_fourcc(pix->pixelformat));
+
+	switch (pix->pixelformat) {
+	case V4L2_PIX_FMT_YUV420:
+	case V4L2_PIX_FMT_YVU420:
+		y_offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
+		u_offset = U_OFFSET(pix, image->rect.left,
+				image->rect.top) - y_offset;
+		v_offset = V_OFFSET(pix, image->rect.left,
+				image->rect.top) - y_offset;
+
+		ipu_cpmem_set_yuv_planar_full(cpmem, pix->pixelformat,
+				pix->bytesperline, u_offset, v_offset);
+		ipu_cpmem_set_buffer(cpmem, 0, image->phys + y_offset);
+		break;
+	case V4L2_PIX_FMT_UYVY:
+	case V4L2_PIX_FMT_YUYV:
+		ipu_cpmem_set_buffer(cpmem, 0, image->phys +
+				image->rect.left * 2 +
+				image->rect.top * image->pix.bytesperline);
+		break;
+	case V4L2_PIX_FMT_RGB32:
+	case V4L2_PIX_FMT_BGR32:
+		ipu_cpmem_set_buffer(cpmem, 0, image->phys +
+				image->rect.left * 4 +
+				image->rect.top * image->pix.bytesperline);
+		break;
+	case V4L2_PIX_FMT_RGB565:
+		ipu_cpmem_set_buffer(cpmem, 0, image->phys +
+				image->rect.left * 2 +
+				image->rect.top * image->pix.bytesperline);
+		break;
+	case V4L2_PIX_FMT_RGB24:
+	case V4L2_PIX_FMT_BGR24:
+		ipu_cpmem_set_buffer(cpmem, 0, image->phys +
+				image->rect.left * 3 +
+				image->rect.top * image->pix.bytesperline);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_cpmem_set_image);
+
+enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat)
+{
+	switch (pixelformat) {
+	case V4L2_PIX_FMT_YUV420:
+	case V4L2_PIX_FMT_YVU420:
+	case V4L2_PIX_FMT_UYVY:
+	case V4L2_PIX_FMT_YUYV:
+		return IPUV3_COLORSPACE_YUV;
+	case V4L2_PIX_FMT_RGB32:
+	case V4L2_PIX_FMT_BGR32:
+	case V4L2_PIX_FMT_RGB24:
+	case V4L2_PIX_FMT_BGR24:
+	case V4L2_PIX_FMT_RGB565:
+		return IPUV3_COLORSPACE_RGB;
+	default:
+		return IPUV3_COLORSPACE_UNKNOWN;
+	}
+}
+EXPORT_SYMBOL_GPL(ipu_pixelformat_to_colorspace);
+
+struct ipuv3_channel *ipu_idmac_get(struct ipu_soc *ipu, unsigned num)
+{
+	struct ipuv3_channel *channel;
+
+	dev_dbg(ipu->dev, "%s %d\n", __func__, num);
+
+	if (num > 63)
+		return ERR_PTR(-ENODEV);
+
+	mutex_lock(&ipu->channel_lock);
+
+	channel = &ipu->channel[num];
+
+	if (channel->busy) {
+		channel = ERR_PTR(-EBUSY);
+		goto out;
+	}
+
+	channel->busy = true;
+	channel->num = num;
+
+out:
+	mutex_unlock(&ipu->channel_lock);
+
+	return channel;
+}
+EXPORT_SYMBOL_GPL(ipu_idmac_get);
+
+void ipu_idmac_put(struct ipuv3_channel *channel)
+{
+	struct ipu_soc *ipu = channel->ipu;
+
+	dev_dbg(ipu->dev, "%s %d\n", __func__, channel->num);
+
+	mutex_lock(&ipu->channel_lock);
+
+	channel->busy = false;
+
+	mutex_unlock(&ipu->channel_lock);
+}
+EXPORT_SYMBOL_GPL(ipu_idmac_put);
+
+#define idma_mask(ch)			(1 << (ch & 0x1f))
+
+void ipu_idmac_set_double_buffer(struct ipuv3_channel *channel,
+		bool doublebuffer)
+{
+	struct ipu_soc *ipu = channel->ipu;
+	unsigned long flags;
+	u32 reg;
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	reg = ipu_cm_read(ipu, IPU_CHA_DB_MODE_SEL(channel->num));
+	if (doublebuffer)
+		reg |= idma_mask(channel->num);
+	else
+		reg &= ~idma_mask(channel->num);
+	ipu_cm_write(ipu, reg, IPU_CHA_DB_MODE_SEL(channel->num));
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+}
+EXPORT_SYMBOL_GPL(ipu_idmac_set_double_buffer);
+
+int ipu_module_enable(struct ipu_soc *ipu, u32 mask)
+{
+	unsigned long lock_flags;
+	u32 val;
+
+	spin_lock_irqsave(&ipu->lock, lock_flags);
+
+	val = ipu_cm_read(ipu, IPU_DISP_GEN);
+
+	if (mask & IPU_CONF_DI0_EN)
+		val |= IPU_DI0_COUNTER_RELEASE;
+	if (mask & IPU_CONF_DI1_EN)
+		val |= IPU_DI1_COUNTER_RELEASE;
+
+	ipu_cm_write(ipu, val, IPU_DISP_GEN);
+
+	val = ipu_cm_read(ipu, IPU_CONF);
+	val |= mask;
+	ipu_cm_write(ipu, val, IPU_CONF);
+
+	spin_unlock_irqrestore(&ipu->lock, lock_flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_module_enable);
+
+int ipu_module_disable(struct ipu_soc *ipu, u32 mask)
+{
+	unsigned long lock_flags;
+	u32 val;
+
+	spin_lock_irqsave(&ipu->lock, lock_flags);
+
+	val = ipu_cm_read(ipu, IPU_CONF);
+	val &= ~mask;
+	ipu_cm_write(ipu, val, IPU_CONF);
+
+	val = ipu_cm_read(ipu, IPU_DISP_GEN);
+
+	if (mask & IPU_CONF_DI0_EN)
+		val &= ~IPU_DI0_COUNTER_RELEASE;
+	if (mask & IPU_CONF_DI1_EN)
+		val &= ~IPU_DI1_COUNTER_RELEASE;
+
+	ipu_cm_write(ipu, val, IPU_DISP_GEN);
+
+	spin_unlock_irqrestore(&ipu->lock, lock_flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_module_disable);
+
+void ipu_idmac_select_buffer(struct ipuv3_channel *channel, u32 buf_num)
+{
+	struct ipu_soc *ipu = channel->ipu;
+	unsigned int chno = channel->num;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	/* Mark buffer as ready. */
+	if (buf_num == 0)
+		ipu_cm_write(ipu, idma_mask(chno), IPU_CHA_BUF0_RDY(chno));
+	else
+		ipu_cm_write(ipu, idma_mask(chno), IPU_CHA_BUF1_RDY(chno));
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+}
+EXPORT_SYMBOL_GPL(ipu_idmac_select_buffer);
+
+int ipu_idmac_enable_channel(struct ipuv3_channel *channel)
+{
+	struct ipu_soc *ipu = channel->ipu;
+	u32 val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	val = ipu_idmac_read(ipu, IDMAC_CHA_EN(channel->num));
+	val |= idma_mask(channel->num);
+	ipu_idmac_write(ipu, val, IDMAC_CHA_EN(channel->num));
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_idmac_enable_channel);
+
+int ipu_idmac_wait_busy(struct ipuv3_channel *channel, int ms)
+{
+	struct ipu_soc *ipu = channel->ipu;
+	unsigned long timeout;
+
+	timeout = jiffies + msecs_to_jiffies(ms);
+	while (ipu_idmac_read(ipu, IDMAC_CHA_BUSY(channel->num)) &
+			idma_mask(channel->num)) {
+		if (time_after(jiffies, timeout))
+			return -ETIMEDOUT;
+		cpu_relax();
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_idmac_wait_busy);
+
+int ipu_idmac_disable_channel(struct ipuv3_channel *channel)
+{
+	struct ipu_soc *ipu = channel->ipu;
+	u32 val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	/* Disable DMA channel(s) */
+	val = ipu_idmac_read(ipu, IDMAC_CHA_EN(channel->num));
+	val &= ~idma_mask(channel->num);
+	ipu_idmac_write(ipu, val, IDMAC_CHA_EN(channel->num));
+
+	/* Set channel buffers NOT to be ready */
+	ipu_cm_write(ipu, 0xf0000000, IPU_GPR); /* write one to clear */
+
+	if (ipu_cm_read(ipu, IPU_CHA_BUF0_RDY(channel->num)) &
+			idma_mask(channel->num)) {
+		ipu_cm_write(ipu, idma_mask(channel->num),
+			     IPU_CHA_BUF0_RDY(channel->num));
+	}
+
+	if (ipu_cm_read(ipu, IPU_CHA_BUF1_RDY(channel->num)) &
+			idma_mask(channel->num)) {
+		ipu_cm_write(ipu, idma_mask(channel->num),
+			     IPU_CHA_BUF1_RDY(channel->num));
+	}
+
+	ipu_cm_write(ipu, 0x0, IPU_GPR); /* write one to set */
+
+	/* Reset the double buffer */
+	val = ipu_cm_read(ipu, IPU_CHA_DB_MODE_SEL(channel->num));
+	val &= ~idma_mask(channel->num);
+	ipu_cm_write(ipu, val, IPU_CHA_DB_MODE_SEL(channel->num));
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_idmac_disable_channel);
+
+static int ipu_memory_reset(struct ipu_soc *ipu)
+{
+	unsigned long timeout;
+
+	ipu_cm_write(ipu, 0x807FFFFF, IPU_MEM_RST);
+
+	timeout = jiffies + msecs_to_jiffies(1000);
+	while (ipu_cm_read(ipu, IPU_MEM_RST) & 0x80000000) {
+		if (time_after(jiffies, timeout))
+			return -ETIME;
+		cpu_relax();
+	}
+
+	return 0;
+}
+
+struct ipu_devtype {
+	const char *name;
+	unsigned long cm_ofs;
+	unsigned long cpmem_ofs;
+	unsigned long srm_ofs;
+	unsigned long tpm_ofs;
+	unsigned long disp0_ofs;
+	unsigned long disp1_ofs;
+	unsigned long dc_tmpl_ofs;
+	unsigned long vdi_ofs;
+	enum ipuv3_type type;
+};
+
+static struct ipu_devtype ipu_type_imx51 = {
+	.name = "IPUv3EX",
+	.cm_ofs = 0x1e000000,
+	.cpmem_ofs = 0x1f000000,
+	.srm_ofs = 0x1f040000,
+	.tpm_ofs = 0x1f060000,
+	.disp0_ofs = 0x1e040000,
+	.disp1_ofs = 0x1e048000,
+	.dc_tmpl_ofs = 0x1f080000,
+	.vdi_ofs = 0x1e068000,
+	.type = IPUV3EX,
+};
+
+static struct ipu_devtype ipu_type_imx53 = {
+	.name = "IPUv3M",
+	.cm_ofs = 0x06000000,
+	.cpmem_ofs = 0x07000000,
+	.srm_ofs = 0x07040000,
+	.tpm_ofs = 0x07060000,
+	.disp0_ofs = 0x06040000,
+	.disp1_ofs = 0x06048000,
+	.dc_tmpl_ofs = 0x07080000,
+	.vdi_ofs = 0x06068000,
+	.type = IPUV3M,
+};
+
+static struct ipu_devtype ipu_type_imx6q = {
+	.name = "IPUv3H",
+	.cm_ofs = 0x00200000,
+	.cpmem_ofs = 0x00300000,
+	.srm_ofs = 0x00340000,
+	.tpm_ofs = 0x00360000,
+	.disp0_ofs = 0x00240000,
+	.disp1_ofs = 0x00248000,
+	.dc_tmpl_ofs = 0x00380000,
+	.vdi_ofs = 0x00268000,
+	.type = IPUV3H,
+};
+
+static const struct of_device_id imx_ipu_dt_ids[] = {
+	{ .compatible = "fsl,imx51-ipu", .data = &ipu_type_imx51, },
+	{ .compatible = "fsl,imx53-ipu", .data = &ipu_type_imx53, },
+	{ .compatible = "fsl,imx6q-ipu", .data = &ipu_type_imx6q, },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, imx_ipu_dt_ids);
+
+static int ipu_submodules_init(struct ipu_soc *ipu,
+		struct platform_device *pdev, unsigned long ipu_base,
+		struct clk *ipu_clk)
+{
+	char *unit;
+	int ret;
+	struct device *dev = &pdev->dev;
+	const struct ipu_devtype *devtype = ipu->devtype;
+
+	ret = ipu_di_init(ipu, dev, 0, ipu_base + devtype->disp0_ofs,
+			IPU_CONF_DI0_EN, ipu_clk);
+	if (ret) {
+		unit = "di0";
+		goto err_di_0;
+	}
+
+	ret = ipu_di_init(ipu, dev, 1, ipu_base + devtype->disp1_ofs,
+			IPU_CONF_DI1_EN, ipu_clk);
+	if (ret) {
+		unit = "di1";
+		goto err_di_1;
+	}
+
+	ret = ipu_dc_init(ipu, dev, ipu_base + devtype->cm_ofs +
+			IPU_CM_DC_REG_OFS, ipu_base + devtype->dc_tmpl_ofs);
+	if (ret) {
+		unit = "dc_template";
+		goto err_dc;
+	}
+
+	ret = ipu_dmfc_init(ipu, dev, ipu_base +
+			devtype->cm_ofs + IPU_CM_DMFC_REG_OFS, ipu_clk);
+	if (ret) {
+		unit = "dmfc";
+		goto err_dmfc;
+	}
+
+	ret = ipu_dp_init(ipu, dev, ipu_base + devtype->srm_ofs);
+	if (ret) {
+		unit = "dp";
+		goto err_dp;
+	}
+
+	return 0;
+
+err_dp:
+	ipu_dmfc_exit(ipu);
+err_dmfc:
+	ipu_dc_exit(ipu);
+err_dc:
+	ipu_di_exit(ipu, 1);
+err_di_1:
+	ipu_di_exit(ipu, 0);
+err_di_0:
+	dev_err(&pdev->dev, "init %s failed with %d\n", unit, ret);
+	return ret;
+}
+
+static void ipu_irq_handle(struct ipu_soc *ipu, const int *regs, int num_regs)
+{
+	unsigned long status;
+	int i, bit, irq;
+
+	for (i = 0; i < num_regs; i++) {
+
+		status = ipu_cm_read(ipu, IPU_INT_STAT(regs[i]));
+		status &= ipu_cm_read(ipu, IPU_INT_CTRL(regs[i]));
+
+		for_each_set_bit(bit, &status, 32) {
+			irq = irq_linear_revmap(ipu->domain, regs[i] * 32 + bit);
+			if (irq)
+				generic_handle_irq(irq);
+		}
+	}
+}
+
+static void ipu_irq_handler(unsigned int irq, struct irq_desc *desc)
+{
+	struct ipu_soc *ipu = irq_desc_get_handler_data(desc);
+	const int int_reg[] = { 0, 1, 2, 3, 10, 11, 12, 13, 14};
+	struct irq_chip *chip = irq_get_chip(irq);
+
+	chained_irq_enter(chip, desc);
+
+	ipu_irq_handle(ipu, int_reg, ARRAY_SIZE(int_reg));
+
+	chained_irq_exit(chip, desc);
+}
+
+static void ipu_err_irq_handler(unsigned int irq, struct irq_desc *desc)
+{
+	struct ipu_soc *ipu = irq_desc_get_handler_data(desc);
+	const int int_reg[] = { 4, 5, 8, 9};
+	struct irq_chip *chip = irq_get_chip(irq);
+
+	chained_irq_enter(chip, desc);
+
+	ipu_irq_handle(ipu, int_reg, ARRAY_SIZE(int_reg));
+
+	chained_irq_exit(chip, desc);
+}
+
+int ipu_idmac_channel_irq(struct ipu_soc *ipu, struct ipuv3_channel *channel,
+		enum ipu_channel_irq irq_type)
+{
+	int irq = irq_linear_revmap(ipu->domain, irq_type + channel->num);
+
+	if (!irq)
+		irq = irq_create_mapping(ipu->domain, irq_type + channel->num);
+
+	return irq;
+}
+EXPORT_SYMBOL_GPL(ipu_idmac_channel_irq);
+
+static void ipu_submodules_exit(struct ipu_soc *ipu)
+{
+	ipu_dp_exit(ipu);
+	ipu_dmfc_exit(ipu);
+	ipu_dc_exit(ipu);
+	ipu_di_exit(ipu, 1);
+	ipu_di_exit(ipu, 0);
+}
+
+static int platform_remove_devices_fn(struct device *dev, void *unused)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+
+	platform_device_unregister(pdev);
+
+	return 0;
+}
+
+static void platform_device_unregister_children(struct platform_device *pdev)
+{
+	device_for_each_child(&pdev->dev, NULL, platform_remove_devices_fn);
+}
+
+struct ipu_platform_reg {
+	struct ipu_client_platformdata pdata;
+	const char *name;
+};
+
+static const struct ipu_platform_reg client_reg[] = {
+	{
+		.pdata = {
+			.di = 0,
+			.dc = 5,
+			.dp = IPU_DP_FLOW_SYNC_BG,
+			.dma[0] = IPUV3_CHANNEL_MEM_BG_SYNC,
+			.dma[1] = IPUV3_CHANNEL_MEM_FG_SYNC,
+		},
+		.name = "imx-ipuv3-crtc",
+	}, {
+		.pdata = {
+			.di = 1,
+			.dc = 1,
+			.dp = -EINVAL,
+			.dma[0] = IPUV3_CHANNEL_MEM_DC_SYNC,
+			.dma[1] = -EINVAL,
+		},
+		.name = "imx-ipuv3-crtc",
+	},
+};
+
+static DEFINE_MUTEX(ipu_client_id_mutex);
+static int ipu_client_id;
+
+static int ipu_add_client_devices(struct ipu_soc *ipu)
+{
+	struct device *dev = ipu->dev;
+	unsigned i;
+	int id, ret;
+
+	mutex_lock(&ipu_client_id_mutex);
+	id = ipu_client_id;
+	ipu_client_id += ARRAY_SIZE(client_reg);
+	mutex_unlock(&ipu_client_id_mutex);
+
+	for (i = 0; i < ARRAY_SIZE(client_reg); i++) {
+		const struct ipu_platform_reg *reg = &client_reg[i];
+		struct platform_device *pdev;
+
+		pdev = platform_device_register_data(dev, reg->name,
+			id++, &reg->pdata, sizeof(reg->pdata));
+
+		if (IS_ERR(pdev))
+			goto err_register;
+	}
+
+	return 0;
+
+err_register:
+	platform_device_unregister_children(to_platform_device(dev));
+
+	return ret;
+}
+
+
+static int ipu_irq_init(struct ipu_soc *ipu)
+{
+	struct irq_chip_generic *gc;
+	struct irq_chip_type *ct;
+	unsigned long unused[IPU_NUM_IRQS / 32] = {
+		0x400100d0, 0xffe000fd,
+		0x400100d0, 0xffe000fd,
+		0x400100d0, 0xffe000fd,
+		0x4077ffff, 0xffe7e1fd,
+		0x23fffffe, 0x8880fff0,
+		0xf98fe7d0, 0xfff81fff,
+		0x400100d0, 0xffe000fd,
+		0x00000000,
+	};
+	int ret, i;
+
+	ipu->domain = irq_domain_add_linear(ipu->dev->of_node, IPU_NUM_IRQS,
+					    &irq_generic_chip_ops, ipu);
+	if (!ipu->domain) {
+		dev_err(ipu->dev, "failed to add irq domain\n");
+		return -ENODEV;
+	}
+
+	ret = irq_alloc_domain_generic_chips(ipu->domain, 32, 1, "IPU",
+					     handle_level_irq, 0, IRQF_VALID, 0);
+	if (ret < 0) {
+		dev_err(ipu->dev, "failed to alloc generic irq chips\n");
+		irq_domain_remove(ipu->domain);
+		return ret;
+	}
+
+	for (i = 0; i < IPU_NUM_IRQS; i += 32) {
+		gc = irq_get_domain_generic_chip(ipu->domain, i);
+		gc->reg_base = ipu->cm_reg;
+		gc->unused = unused[i / 32];
+		ct = gc->chip_types;
+		ct->chip.irq_ack = irq_gc_ack_set_bit;
+		ct->chip.irq_mask = irq_gc_mask_clr_bit;
+		ct->chip.irq_unmask = irq_gc_mask_set_bit;
+		ct->regs.ack = IPU_INT_STAT(i / 32);
+		ct->regs.mask = IPU_INT_CTRL(i / 32);
+	}
+
+	irq_set_chained_handler(ipu->irq_sync, ipu_irq_handler);
+	irq_set_handler_data(ipu->irq_sync, ipu);
+	irq_set_chained_handler(ipu->irq_err, ipu_err_irq_handler);
+	irq_set_handler_data(ipu->irq_err, ipu);
+
+	return 0;
+}
+
+static void ipu_irq_exit(struct ipu_soc *ipu)
+{
+	int i, irq;
+
+	irq_set_chained_handler(ipu->irq_err, NULL);
+	irq_set_handler_data(ipu->irq_err, NULL);
+	irq_set_chained_handler(ipu->irq_sync, NULL);
+	irq_set_handler_data(ipu->irq_sync, NULL);
+
+	/* TODO: remove irq_domain_generic_chips */
+
+	for (i = 0; i < IPU_NUM_IRQS; i++) {
+		irq = irq_linear_revmap(ipu->domain, i);
+		if (irq)
+			irq_dispose_mapping(irq);
+	}
+
+	irq_domain_remove(ipu->domain);
+}
+
+static int ipu_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *of_id =
+			of_match_device(imx_ipu_dt_ids, &pdev->dev);
+	struct ipu_soc *ipu;
+	struct resource *res;
+	unsigned long ipu_base;
+	int i, ret, irq_sync, irq_err;
+	const struct ipu_devtype *devtype;
+
+	devtype = of_id->data;
+
+	irq_sync = platform_get_irq(pdev, 0);
+	irq_err = platform_get_irq(pdev, 1);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	dev_dbg(&pdev->dev, "irq_sync: %d irq_err: %d\n",
+			irq_sync, irq_err);
+
+	if (!res || irq_sync < 0 || irq_err < 0)
+		return -ENODEV;
+
+	ipu_base = res->start;
+
+	ipu = devm_kzalloc(&pdev->dev, sizeof(*ipu), GFP_KERNEL);
+	if (!ipu)
+		return -ENODEV;
+
+	for (i = 0; i < 64; i++)
+		ipu->channel[i].ipu = ipu;
+	ipu->devtype = devtype;
+	ipu->ipu_type = devtype->type;
+
+	spin_lock_init(&ipu->lock);
+	mutex_init(&ipu->channel_lock);
+
+	dev_dbg(&pdev->dev, "cm_reg:   0x%08lx\n",
+			ipu_base + devtype->cm_ofs);
+	dev_dbg(&pdev->dev, "idmac:    0x%08lx\n",
+			ipu_base + devtype->cm_ofs + IPU_CM_IDMAC_REG_OFS);
+	dev_dbg(&pdev->dev, "cpmem:    0x%08lx\n",
+			ipu_base + devtype->cpmem_ofs);
+	dev_dbg(&pdev->dev, "disp0:    0x%08lx\n",
+			ipu_base + devtype->disp0_ofs);
+	dev_dbg(&pdev->dev, "disp1:    0x%08lx\n",
+			ipu_base + devtype->disp1_ofs);
+	dev_dbg(&pdev->dev, "srm:      0x%08lx\n",
+			ipu_base + devtype->srm_ofs);
+	dev_dbg(&pdev->dev, "tpm:      0x%08lx\n",
+			ipu_base + devtype->tpm_ofs);
+	dev_dbg(&pdev->dev, "dc:       0x%08lx\n",
+			ipu_base + devtype->cm_ofs + IPU_CM_DC_REG_OFS);
+	dev_dbg(&pdev->dev, "ic:       0x%08lx\n",
+			ipu_base + devtype->cm_ofs + IPU_CM_IC_REG_OFS);
+	dev_dbg(&pdev->dev, "dmfc:     0x%08lx\n",
+			ipu_base + devtype->cm_ofs + IPU_CM_DMFC_REG_OFS);
+	dev_dbg(&pdev->dev, "vdi:      0x%08lx\n",
+			ipu_base + devtype->vdi_ofs);
+
+	ipu->cm_reg = devm_ioremap(&pdev->dev,
+			ipu_base + devtype->cm_ofs, PAGE_SIZE);
+	ipu->idmac_reg = devm_ioremap(&pdev->dev,
+			ipu_base + devtype->cm_ofs + IPU_CM_IDMAC_REG_OFS,
+			PAGE_SIZE);
+	ipu->cpmem_base = devm_ioremap(&pdev->dev,
+			ipu_base + devtype->cpmem_ofs, PAGE_SIZE);
+
+	if (!ipu->cm_reg || !ipu->idmac_reg || !ipu->cpmem_base)
+		return -ENOMEM;
+
+	ipu->clk = devm_clk_get(&pdev->dev, "bus");
+	if (IS_ERR(ipu->clk)) {
+		ret = PTR_ERR(ipu->clk);
+		dev_err(&pdev->dev, "clk_get failed with %d", ret);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, ipu);
+
+	ret = clk_prepare_enable(ipu->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "clk_prepare_enable failed: %d\n", ret);
+		return ret;
+	}
+
+	ipu->dev = &pdev->dev;
+	ipu->irq_sync = irq_sync;
+	ipu->irq_err = irq_err;
+
+	ret = ipu_irq_init(ipu);
+	if (ret)
+		goto out_failed_irq;
+
+	ret = device_reset(&pdev->dev);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to reset: %d\n", ret);
+		goto out_failed_reset;
+	}
+	ret = ipu_memory_reset(ipu);
+	if (ret)
+		goto out_failed_reset;
+
+	/* Set MCU_T to divide MCU access window into 2 */
+	ipu_cm_write(ipu, 0x00400000L | (IPU_MCU_T_DEFAULT << 18),
+			IPU_DISP_GEN);
+
+	ret = ipu_submodules_init(ipu, pdev, ipu_base, ipu->clk);
+	if (ret)
+		goto failed_submodules_init;
+
+	ret = ipu_add_client_devices(ipu);
+	if (ret) {
+		dev_err(&pdev->dev, "adding client devices failed with %d\n",
+				ret);
+		goto failed_add_clients;
+	}
+
+	dev_info(&pdev->dev, "%s probed\n", devtype->name);
+
+	return 0;
+
+failed_add_clients:
+	ipu_submodules_exit(ipu);
+failed_submodules_init:
+out_failed_reset:
+	ipu_irq_exit(ipu);
+out_failed_irq:
+	clk_disable_unprepare(ipu->clk);
+	return ret;
+}
+
+static int ipu_remove(struct platform_device *pdev)
+{
+	struct ipu_soc *ipu = platform_get_drvdata(pdev);
+
+	platform_device_unregister_children(pdev);
+	ipu_submodules_exit(ipu);
+	ipu_irq_exit(ipu);
+
+	clk_disable_unprepare(ipu->clk);
+
+	return 0;
+}
+
+static struct platform_driver imx_ipu_driver = {
+	.driver = {
+		.name = "imx-ipuv3",
+		.of_match_table = imx_ipu_dt_ids,
+	},
+	.probe = ipu_probe,
+	.remove = ipu_remove,
+};
+
+module_platform_driver(imx_ipu_driver);
+
+MODULE_ALIAS("platform:imx-ipuv3");
+MODULE_DESCRIPTION("i.MX IPU v3 driver");
+MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/ipu-v3/ipu-dc.c b/drivers/gpu/ipu-v3/ipu-dc.c
new file mode 100644
index 000000000000..9f1e5efa3acf
--- /dev/null
+++ b/drivers/gpu/ipu-v3/ipu-dc.c
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2010 Sascha Hauer <s.hauer@pengutronix.de>
+ * Copyright (C) 2005-2009 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include <video/imx-ipu-v3.h>
+#include "ipu-prv.h"
+
+#define DC_MAP_CONF_PTR(n)	(0x108 + ((n) & ~0x1) * 2)
+#define DC_MAP_CONF_VAL(n)	(0x144 + ((n) & ~0x1) * 2)
+
+#define DC_EVT_NF		0
+#define DC_EVT_NL		1
+#define DC_EVT_EOF		2
+#define DC_EVT_NFIELD		3
+#define DC_EVT_EOL		4
+#define DC_EVT_EOFIELD		5
+#define DC_EVT_NEW_ADDR		6
+#define DC_EVT_NEW_CHAN		7
+#define DC_EVT_NEW_DATA		8
+
+#define DC_EVT_NEW_ADDR_W_0	0
+#define DC_EVT_NEW_ADDR_W_1	1
+#define DC_EVT_NEW_CHAN_W_0	2
+#define DC_EVT_NEW_CHAN_W_1	3
+#define DC_EVT_NEW_DATA_W_0	4
+#define DC_EVT_NEW_DATA_W_1	5
+#define DC_EVT_NEW_ADDR_R_0	6
+#define DC_EVT_NEW_ADDR_R_1	7
+#define DC_EVT_NEW_CHAN_R_0	8
+#define DC_EVT_NEW_CHAN_R_1	9
+#define DC_EVT_NEW_DATA_R_0	10
+#define DC_EVT_NEW_DATA_R_1	11
+
+#define DC_WR_CH_CONF		0x0
+#define DC_WR_CH_ADDR		0x4
+#define DC_RL_CH(evt)		(8 + ((evt) & ~0x1) * 2)
+
+#define DC_GEN			0xd4
+#define DC_DISP_CONF1(disp)	(0xd8 + (disp) * 4)
+#define DC_DISP_CONF2(disp)	(0xe8 + (disp) * 4)
+#define DC_STAT			0x1c8
+
+#define WROD(lf)		(0x18 | ((lf) << 1))
+#define WRG			0x01
+#define WCLK			0xc9
+
+#define SYNC_WAVE 0
+#define NULL_WAVE (-1)
+
+#define DC_GEN_SYNC_1_6_SYNC	(2 << 1)
+#define DC_GEN_SYNC_PRIORITY_1	(1 << 7)
+
+#define DC_WR_CH_CONF_WORD_SIZE_8		(0 << 0)
+#define DC_WR_CH_CONF_WORD_SIZE_16		(1 << 0)
+#define DC_WR_CH_CONF_WORD_SIZE_24		(2 << 0)
+#define DC_WR_CH_CONF_WORD_SIZE_32		(3 << 0)
+#define DC_WR_CH_CONF_DISP_ID_PARALLEL(i)	(((i) & 0x1) << 3)
+#define DC_WR_CH_CONF_DISP_ID_SERIAL		(2 << 3)
+#define DC_WR_CH_CONF_DISP_ID_ASYNC		(3 << 4)
+#define DC_WR_CH_CONF_FIELD_MODE		(1 << 9)
+#define DC_WR_CH_CONF_PROG_TYPE_NORMAL		(4 << 5)
+#define DC_WR_CH_CONF_PROG_TYPE_MASK		(7 << 5)
+#define DC_WR_CH_CONF_PROG_DI_ID		(1 << 2)
+#define DC_WR_CH_CONF_PROG_DISP_ID(i)		(((i) & 0x1) << 3)
+
+#define IPU_DC_NUM_CHANNELS	10
+
+struct ipu_dc_priv;
+
+enum ipu_dc_map {
+	IPU_DC_MAP_RGB24,
+	IPU_DC_MAP_RGB565,
+	IPU_DC_MAP_GBR24, /* TVEv2 */
+	IPU_DC_MAP_BGR666,
+	IPU_DC_MAP_BGR24,
+};
+
+struct ipu_dc {
+	/* The display interface number assigned to this dc channel */
+	unsigned int		di;
+	void __iomem		*base;
+	struct ipu_dc_priv	*priv;
+	int			chno;
+	bool			in_use;
+};
+
+struct ipu_dc_priv {
+	void __iomem		*dc_reg;
+	void __iomem		*dc_tmpl_reg;
+	struct ipu_soc		*ipu;
+	struct device		*dev;
+	struct ipu_dc		channels[IPU_DC_NUM_CHANNELS];
+	struct mutex		mutex;
+};
+
+static void dc_link_event(struct ipu_dc *dc, int event, int addr, int priority)
+{
+	u32 reg;
+
+	reg = readl(dc->base + DC_RL_CH(event));
+	reg &= ~(0xffff << (16 * (event & 0x1)));
+	reg |= ((addr << 8) | priority) << (16 * (event & 0x1));
+	writel(reg, dc->base + DC_RL_CH(event));
+}
+
+static void dc_write_tmpl(struct ipu_dc *dc, int word, u32 opcode, u32 operand,
+		int map, int wave, int glue, int sync, int stop)
+{
+	struct ipu_dc_priv *priv = dc->priv;
+	u32 reg1, reg2;
+
+	if (opcode == WCLK) {
+		reg1 = (operand << 20) & 0xfff00000;
+		reg2 = operand >> 12 | opcode << 1 | stop << 9;
+	} else if (opcode == WRG) {
+		reg1 = sync | glue << 4 | ++wave << 11 | ((operand << 15) & 0xffff8000);
+		reg2 = operand >> 17 | opcode << 7 | stop << 9;
+	} else {
+		reg1 = sync | glue << 4 | ++wave << 11 | ++map << 15 | ((operand << 20) & 0xfff00000);
+		reg2 = operand >> 12 | opcode << 4 | stop << 9;
+	}
+	writel(reg1, priv->dc_tmpl_reg + word * 8);
+	writel(reg2, priv->dc_tmpl_reg + word * 8 + 4);
+}
+
+static int ipu_pixfmt_to_map(u32 fmt)
+{
+	switch (fmt) {
+	case V4L2_PIX_FMT_RGB24:
+		return IPU_DC_MAP_RGB24;
+	case V4L2_PIX_FMT_RGB565:
+		return IPU_DC_MAP_RGB565;
+	case IPU_PIX_FMT_GBR24:
+		return IPU_DC_MAP_GBR24;
+	case V4L2_PIX_FMT_BGR666:
+		return IPU_DC_MAP_BGR666;
+	case V4L2_PIX_FMT_BGR24:
+		return IPU_DC_MAP_BGR24;
+	default:
+		return -EINVAL;
+	}
+}
+
+int ipu_dc_init_sync(struct ipu_dc *dc, struct ipu_di *di, bool interlaced,
+		u32 pixel_fmt, u32 width)
+{
+	struct ipu_dc_priv *priv = dc->priv;
+	u32 reg = 0;
+	int map;
+
+	dc->di = ipu_di_get_num(di);
+
+	map = ipu_pixfmt_to_map(pixel_fmt);
+	if (map < 0) {
+		dev_dbg(priv->dev, "IPU_DISP: No MAP\n");
+		return map;
+	}
+
+	if (interlaced) {
+		dc_link_event(dc, DC_EVT_NL, 0, 3);
+		dc_link_event(dc, DC_EVT_EOL, 0, 2);
+		dc_link_event(dc, DC_EVT_NEW_DATA, 0, 1);
+
+		/* Init template microcode */
+		dc_write_tmpl(dc, 0, WROD(0), 0, map, SYNC_WAVE, 0, 8, 1);
+	} else {
+		if (dc->di) {
+			dc_link_event(dc, DC_EVT_NL, 2, 3);
+			dc_link_event(dc, DC_EVT_EOL, 3, 2);
+			dc_link_event(dc, DC_EVT_NEW_DATA, 1, 1);
+			/* Init template microcode */
+			dc_write_tmpl(dc, 2, WROD(0), 0, map, SYNC_WAVE, 8, 5, 1);
+			dc_write_tmpl(dc, 3, WROD(0), 0, map, SYNC_WAVE, 4, 5, 0);
+			dc_write_tmpl(dc, 4, WRG, 0, map, NULL_WAVE, 0, 0, 1);
+			dc_write_tmpl(dc, 1, WROD(0), 0, map, SYNC_WAVE, 0, 5, 1);
+		} else {
+			dc_link_event(dc, DC_EVT_NL, 5, 3);
+			dc_link_event(dc, DC_EVT_EOL, 6, 2);
+			dc_link_event(dc, DC_EVT_NEW_DATA, 8, 1);
+			/* Init template microcode */
+			dc_write_tmpl(dc, 5, WROD(0), 0, map, SYNC_WAVE, 8, 5, 1);
+			dc_write_tmpl(dc, 6, WROD(0), 0, map, SYNC_WAVE, 4, 5, 0);
+			dc_write_tmpl(dc, 7, WRG, 0, map, NULL_WAVE, 0, 0, 1);
+			dc_write_tmpl(dc, 8, WROD(0), 0, map, SYNC_WAVE, 0, 5, 1);
+		}
+	}
+	dc_link_event(dc, DC_EVT_NF, 0, 0);
+	dc_link_event(dc, DC_EVT_NFIELD, 0, 0);
+	dc_link_event(dc, DC_EVT_EOF, 0, 0);
+	dc_link_event(dc, DC_EVT_EOFIELD, 0, 0);
+	dc_link_event(dc, DC_EVT_NEW_CHAN, 0, 0);
+	dc_link_event(dc, DC_EVT_NEW_ADDR, 0, 0);
+
+	reg = readl(dc->base + DC_WR_CH_CONF);
+	if (interlaced)
+		reg |= DC_WR_CH_CONF_FIELD_MODE;
+	else
+		reg &= ~DC_WR_CH_CONF_FIELD_MODE;
+	writel(reg, dc->base + DC_WR_CH_CONF);
+
+	writel(0x0, dc->base + DC_WR_CH_ADDR);
+	writel(width, priv->dc_reg + DC_DISP_CONF2(dc->di));
+
+	ipu_module_enable(priv->ipu, IPU_CONF_DC_EN);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_dc_init_sync);
+
+void ipu_dc_enable_channel(struct ipu_dc *dc)
+{
+	int di;
+	u32 reg;
+
+	di = dc->di;
+
+	reg = readl(dc->base + DC_WR_CH_CONF);
+	reg |= DC_WR_CH_CONF_PROG_TYPE_NORMAL;
+	writel(reg, dc->base + DC_WR_CH_CONF);
+}
+EXPORT_SYMBOL_GPL(ipu_dc_enable_channel);
+
+void ipu_dc_disable_channel(struct ipu_dc *dc)
+{
+	struct ipu_dc_priv *priv = dc->priv;
+	u32 val;
+	int irq = 0, timeout = 50;
+
+	if (dc->chno == 1)
+		irq = IPU_IRQ_DC_FC_1;
+	else if (dc->chno == 5)
+		irq = IPU_IRQ_DP_SF_END;
+	else
+		return;
+
+	/* should wait for the interrupt here */
+	mdelay(50);
+
+	if (dc->di == 0)
+		val = 0x00000002;
+	else
+		val = 0x00000020;
+
+	/* Wait for DC triple buffer to empty */
+	while ((readl(priv->dc_reg + DC_STAT) & val) != val) {
+		usleep_range(2000, 20000);
+		timeout -= 2;
+		if (timeout <= 0)
+			break;
+	}
+
+	val = readl(dc->base + DC_WR_CH_CONF);
+	val &= ~DC_WR_CH_CONF_PROG_TYPE_MASK;
+	writel(val, dc->base + DC_WR_CH_CONF);
+}
+EXPORT_SYMBOL_GPL(ipu_dc_disable_channel);
+
+static void ipu_dc_map_config(struct ipu_dc_priv *priv, enum ipu_dc_map map,
+		int byte_num, int offset, int mask)
+{
+	int ptr = map * 3 + byte_num;
+	u32 reg;
+
+	reg = readl(priv->dc_reg + DC_MAP_CONF_VAL(ptr));
+	reg &= ~(0xffff << (16 * (ptr & 0x1)));
+	reg |= ((offset << 8) | mask) << (16 * (ptr & 0x1));
+	writel(reg, priv->dc_reg + DC_MAP_CONF_VAL(ptr));
+
+	reg = readl(priv->dc_reg + DC_MAP_CONF_PTR(map));
+	reg &= ~(0x1f << ((16 * (map & 0x1)) + (5 * byte_num)));
+	reg |= ptr << ((16 * (map & 0x1)) + (5 * byte_num));
+	writel(reg, priv->dc_reg + DC_MAP_CONF_PTR(map));
+}
+
+static void ipu_dc_map_clear(struct ipu_dc_priv *priv, int map)
+{
+	u32 reg = readl(priv->dc_reg + DC_MAP_CONF_PTR(map));
+
+	writel(reg & ~(0xffff << (16 * (map & 0x1))),
+		     priv->dc_reg + DC_MAP_CONF_PTR(map));
+}
+
+struct ipu_dc *ipu_dc_get(struct ipu_soc *ipu, int channel)
+{
+	struct ipu_dc_priv *priv = ipu->dc_priv;
+	struct ipu_dc *dc;
+
+	if (channel >= IPU_DC_NUM_CHANNELS)
+		return ERR_PTR(-ENODEV);
+
+	dc = &priv->channels[channel];
+
+	mutex_lock(&priv->mutex);
+
+	if (dc->in_use) {
+		mutex_unlock(&priv->mutex);
+		return ERR_PTR(-EBUSY);
+	}
+
+	dc->in_use = true;
+
+	mutex_unlock(&priv->mutex);
+
+	return dc;
+}
+EXPORT_SYMBOL_GPL(ipu_dc_get);
+
+void ipu_dc_put(struct ipu_dc *dc)
+{
+	struct ipu_dc_priv *priv = dc->priv;
+
+	mutex_lock(&priv->mutex);
+	dc->in_use = false;
+	mutex_unlock(&priv->mutex);
+}
+EXPORT_SYMBOL_GPL(ipu_dc_put);
+
+int ipu_dc_init(struct ipu_soc *ipu, struct device *dev,
+		unsigned long base, unsigned long template_base)
+{
+	struct ipu_dc_priv *priv;
+	static int channel_offsets[] = { 0, 0x1c, 0x38, 0x54, 0x58, 0x5c,
+		0x78, 0, 0x94, 0xb4};
+	int i;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	mutex_init(&priv->mutex);
+
+	priv->dev = dev;
+	priv->ipu = ipu;
+	priv->dc_reg = devm_ioremap(dev, base, PAGE_SIZE);
+	priv->dc_tmpl_reg = devm_ioremap(dev, template_base, PAGE_SIZE);
+	if (!priv->dc_reg || !priv->dc_tmpl_reg)
+		return -ENOMEM;
+
+	for (i = 0; i < IPU_DC_NUM_CHANNELS; i++) {
+		priv->channels[i].chno = i;
+		priv->channels[i].priv = priv;
+		priv->channels[i].base = priv->dc_reg + channel_offsets[i];
+	}
+
+	writel(DC_WR_CH_CONF_WORD_SIZE_24 | DC_WR_CH_CONF_DISP_ID_PARALLEL(1) |
+			DC_WR_CH_CONF_PROG_DI_ID,
+			priv->channels[1].base + DC_WR_CH_CONF);
+	writel(DC_WR_CH_CONF_WORD_SIZE_24 | DC_WR_CH_CONF_DISP_ID_PARALLEL(0),
+			priv->channels[5].base + DC_WR_CH_CONF);
+
+	writel(DC_GEN_SYNC_1_6_SYNC | DC_GEN_SYNC_PRIORITY_1, priv->dc_reg + DC_GEN);
+
+	ipu->dc_priv = priv;
+
+	dev_dbg(dev, "DC base: 0x%08lx template base: 0x%08lx\n",
+			base, template_base);
+
+	/* rgb24 */
+	ipu_dc_map_clear(priv, IPU_DC_MAP_RGB24);
+	ipu_dc_map_config(priv, IPU_DC_MAP_RGB24, 0, 7, 0xff); /* blue */
+	ipu_dc_map_config(priv, IPU_DC_MAP_RGB24, 1, 15, 0xff); /* green */
+	ipu_dc_map_config(priv, IPU_DC_MAP_RGB24, 2, 23, 0xff); /* red */
+
+	/* rgb565 */
+	ipu_dc_map_clear(priv, IPU_DC_MAP_RGB565);
+	ipu_dc_map_config(priv, IPU_DC_MAP_RGB565, 0, 4, 0xf8); /* blue */
+	ipu_dc_map_config(priv, IPU_DC_MAP_RGB565, 1, 10, 0xfc); /* green */
+	ipu_dc_map_config(priv, IPU_DC_MAP_RGB565, 2, 15, 0xf8); /* red */
+
+	/* gbr24 */
+	ipu_dc_map_clear(priv, IPU_DC_MAP_GBR24);
+	ipu_dc_map_config(priv, IPU_DC_MAP_GBR24, 2, 15, 0xff); /* green */
+	ipu_dc_map_config(priv, IPU_DC_MAP_GBR24, 1, 7, 0xff); /* blue */
+	ipu_dc_map_config(priv, IPU_DC_MAP_GBR24, 0, 23, 0xff); /* red */
+
+	/* bgr666 */
+	ipu_dc_map_clear(priv, IPU_DC_MAP_BGR666);
+	ipu_dc_map_config(priv, IPU_DC_MAP_BGR666, 0, 5, 0xfc); /* blue */
+	ipu_dc_map_config(priv, IPU_DC_MAP_BGR666, 1, 11, 0xfc); /* green */
+	ipu_dc_map_config(priv, IPU_DC_MAP_BGR666, 2, 17, 0xfc); /* red */
+
+	/* bgr24 */
+	ipu_dc_map_clear(priv, IPU_DC_MAP_BGR24);
+	ipu_dc_map_config(priv, IPU_DC_MAP_BGR24, 2, 7, 0xff); /* red */
+	ipu_dc_map_config(priv, IPU_DC_MAP_BGR24, 1, 15, 0xff); /* green */
+	ipu_dc_map_config(priv, IPU_DC_MAP_BGR24, 0, 23, 0xff); /* blue */
+
+	return 0;
+}
+
+void ipu_dc_exit(struct ipu_soc *ipu)
+{
+}
diff --git a/drivers/gpu/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c
new file mode 100644
index 000000000000..42e60b447ae7
--- /dev/null
+++ b/drivers/gpu/ipu-v3/ipu-di.c
@@ -0,0 +1,730 @@
+/*
+ * Copyright (c) 2010 Sascha Hauer <s.hauer@pengutronix.de>
+ * Copyright (C) 2005-2009 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+
+#include <video/imx-ipu-v3.h>
+#include "ipu-prv.h"
+
+struct ipu_di {
+	void __iomem *base;
+	int id;
+	u32 module;
+	struct clk *clk_di;	/* display input clock */
+	struct clk *clk_ipu;	/* IPU bus clock */
+	struct clk *clk_di_pixel; /* resulting pixel clock */
+	bool inuse;
+	struct ipu_soc *ipu;
+};
+
+static DEFINE_MUTEX(di_mutex);
+
+struct di_sync_config {
+	int run_count;
+	int run_src;
+	int offset_count;
+	int offset_src;
+	int repeat_count;
+	int cnt_clr_src;
+	int cnt_polarity_gen_en;
+	int cnt_polarity_clr_src;
+	int cnt_polarity_trigger_src;
+	int cnt_up;
+	int cnt_down;
+};
+
+enum di_pins {
+	DI_PIN11 = 0,
+	DI_PIN12 = 1,
+	DI_PIN13 = 2,
+	DI_PIN14 = 3,
+	DI_PIN15 = 4,
+	DI_PIN16 = 5,
+	DI_PIN17 = 6,
+	DI_PIN_CS = 7,
+
+	DI_PIN_SER_CLK = 0,
+	DI_PIN_SER_RS = 1,
+};
+
+enum di_sync_wave {
+	DI_SYNC_NONE = 0,
+	DI_SYNC_CLK = 1,
+	DI_SYNC_INT_HSYNC = 2,
+	DI_SYNC_HSYNC = 3,
+	DI_SYNC_VSYNC = 4,
+	DI_SYNC_DE = 6,
+};
+
+#define SYNC_WAVE 0
+
+#define DI_GENERAL		0x0000
+#define DI_BS_CLKGEN0		0x0004
+#define DI_BS_CLKGEN1		0x0008
+#define DI_SW_GEN0(gen)		(0x000c + 4 * ((gen) - 1))
+#define DI_SW_GEN1(gen)		(0x0030 + 4 * ((gen) - 1))
+#define DI_STP_REP(gen)		(0x0148 + 4 * (((gen) - 1)/2))
+#define DI_SYNC_AS_GEN		0x0054
+#define DI_DW_GEN(gen)		(0x0058 + 4 * (gen))
+#define DI_DW_SET(gen, set)	(0x0088 + 4 * ((gen) + 0xc * (set)))
+#define DI_SER_CONF		0x015c
+#define DI_SSC			0x0160
+#define DI_POL			0x0164
+#define DI_AW0			0x0168
+#define DI_AW1			0x016c
+#define DI_SCR_CONF		0x0170
+#define DI_STAT			0x0174
+
+#define DI_SW_GEN0_RUN_COUNT(x)			((x) << 19)
+#define DI_SW_GEN0_RUN_SRC(x)			((x) << 16)
+#define DI_SW_GEN0_OFFSET_COUNT(x)		((x) << 3)
+#define DI_SW_GEN0_OFFSET_SRC(x)		((x) << 0)
+
+#define DI_SW_GEN1_CNT_POL_GEN_EN(x)		((x) << 29)
+#define DI_SW_GEN1_CNT_CLR_SRC(x)		((x) << 25)
+#define DI_SW_GEN1_CNT_POL_TRIGGER_SRC(x)	((x) << 12)
+#define DI_SW_GEN1_CNT_POL_CLR_SRC(x)		((x) << 9)
+#define DI_SW_GEN1_CNT_DOWN(x)			((x) << 16)
+#define DI_SW_GEN1_CNT_UP(x)			(x)
+#define DI_SW_GEN1_AUTO_RELOAD			(0x10000000)
+
+#define DI_DW_GEN_ACCESS_SIZE_OFFSET		24
+#define DI_DW_GEN_COMPONENT_SIZE_OFFSET		16
+
+#define DI_GEN_POLARITY_1			(1 << 0)
+#define DI_GEN_POLARITY_2			(1 << 1)
+#define DI_GEN_POLARITY_3			(1 << 2)
+#define DI_GEN_POLARITY_4			(1 << 3)
+#define DI_GEN_POLARITY_5			(1 << 4)
+#define DI_GEN_POLARITY_6			(1 << 5)
+#define DI_GEN_POLARITY_7			(1 << 6)
+#define DI_GEN_POLARITY_8			(1 << 7)
+#define DI_GEN_POLARITY_DISP_CLK		(1 << 17)
+#define DI_GEN_DI_CLK_EXT			(1 << 20)
+#define DI_GEN_DI_VSYNC_EXT			(1 << 21)
+
+#define DI_POL_DRDY_DATA_POLARITY		(1 << 7)
+#define DI_POL_DRDY_POLARITY_15			(1 << 4)
+
+#define DI_VSYNC_SEL_OFFSET			13
+
+static inline u32 ipu_di_read(struct ipu_di *di, unsigned offset)
+{
+	return readl(di->base + offset);
+}
+
+static inline void ipu_di_write(struct ipu_di *di, u32 value, unsigned offset)
+{
+	writel(value, di->base + offset);
+}
+
+static void ipu_di_data_wave_config(struct ipu_di *di,
+				     int wave_gen,
+				     int access_size, int component_size)
+{
+	u32 reg;
+	reg = (access_size << DI_DW_GEN_ACCESS_SIZE_OFFSET) |
+	    (component_size << DI_DW_GEN_COMPONENT_SIZE_OFFSET);
+	ipu_di_write(di, reg, DI_DW_GEN(wave_gen));
+}
+
+static void ipu_di_data_pin_config(struct ipu_di *di, int wave_gen, int di_pin,
+		int set, int up, int down)
+{
+	u32 reg;
+
+	reg = ipu_di_read(di, DI_DW_GEN(wave_gen));
+	reg &= ~(0x3 << (di_pin * 2));
+	reg |= set << (di_pin * 2);
+	ipu_di_write(di, reg, DI_DW_GEN(wave_gen));
+
+	ipu_di_write(di, (down << 16) | up, DI_DW_SET(wave_gen, set));
+}
+
+static void ipu_di_sync_config(struct ipu_di *di, struct di_sync_config *config,
+		int start, int count)
+{
+	u32 reg;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		struct di_sync_config *c = &config[i];
+		int wave_gen = start + i + 1;
+
+		if ((c->run_count >= 0x1000) || (c->offset_count >= 0x1000) ||
+				(c->repeat_count >= 0x1000) ||
+				(c->cnt_up >= 0x400) ||
+				(c->cnt_down >= 0x400)) {
+			dev_err(di->ipu->dev, "DI%d counters out of range.\n",
+					di->id);
+			return;
+		}
+
+		reg = DI_SW_GEN0_RUN_COUNT(c->run_count) |
+			DI_SW_GEN0_RUN_SRC(c->run_src) |
+			DI_SW_GEN0_OFFSET_COUNT(c->offset_count) |
+			DI_SW_GEN0_OFFSET_SRC(c->offset_src);
+		ipu_di_write(di, reg, DI_SW_GEN0(wave_gen));
+
+		reg = DI_SW_GEN1_CNT_POL_GEN_EN(c->cnt_polarity_gen_en) |
+			DI_SW_GEN1_CNT_CLR_SRC(c->cnt_clr_src) |
+			DI_SW_GEN1_CNT_POL_TRIGGER_SRC(
+					c->cnt_polarity_trigger_src) |
+			DI_SW_GEN1_CNT_POL_CLR_SRC(c->cnt_polarity_clr_src) |
+			DI_SW_GEN1_CNT_DOWN(c->cnt_down) |
+			DI_SW_GEN1_CNT_UP(c->cnt_up);
+
+		/* Enable auto reload */
+		if (c->repeat_count == 0)
+			reg |= DI_SW_GEN1_AUTO_RELOAD;
+
+		ipu_di_write(di, reg, DI_SW_GEN1(wave_gen));
+
+		reg = ipu_di_read(di, DI_STP_REP(wave_gen));
+		reg &= ~(0xffff << (16 * ((wave_gen - 1) & 0x1)));
+		reg |= c->repeat_count << (16 * ((wave_gen - 1) & 0x1));
+		ipu_di_write(di, reg, DI_STP_REP(wave_gen));
+	}
+}
+
+static void ipu_di_sync_config_interlaced(struct ipu_di *di,
+		struct ipu_di_signal_cfg *sig)
+{
+	u32 h_total = sig->width + sig->h_sync_width +
+		sig->h_start_width + sig->h_end_width;
+	u32 v_total = sig->height + sig->v_sync_width +
+		sig->v_start_width + sig->v_end_width;
+	u32 reg;
+	struct di_sync_config cfg[] = {
+		{
+			.run_count = h_total / 2 - 1,
+			.run_src = DI_SYNC_CLK,
+		}, {
+			.run_count = h_total - 11,
+			.run_src = DI_SYNC_CLK,
+			.cnt_down = 4,
+		}, {
+			.run_count = v_total * 2 - 1,
+			.run_src = DI_SYNC_INT_HSYNC,
+			.offset_count = 1,
+			.offset_src = DI_SYNC_INT_HSYNC,
+			.cnt_down = 4,
+		}, {
+			.run_count = v_total / 2 - 1,
+			.run_src = DI_SYNC_HSYNC,
+			.offset_count = sig->v_start_width,
+			.offset_src = DI_SYNC_HSYNC,
+			.repeat_count = 2,
+			.cnt_clr_src = DI_SYNC_VSYNC,
+		}, {
+			.run_src = DI_SYNC_HSYNC,
+			.repeat_count = sig->height / 2,
+			.cnt_clr_src = 4,
+		}, {
+			.run_count = v_total - 1,
+			.run_src = DI_SYNC_HSYNC,
+		}, {
+			.run_count = v_total / 2 - 1,
+			.run_src = DI_SYNC_HSYNC,
+			.offset_count = 9,
+			.offset_src = DI_SYNC_HSYNC,
+			.repeat_count = 2,
+			.cnt_clr_src = DI_SYNC_VSYNC,
+		}, {
+			.run_src = DI_SYNC_CLK,
+			.offset_count = sig->h_start_width,
+			.offset_src = DI_SYNC_CLK,
+			.repeat_count = sig->width,
+			.cnt_clr_src = 5,
+		}, {
+			.run_count = v_total - 1,
+			.run_src = DI_SYNC_INT_HSYNC,
+			.offset_count = v_total / 2,
+			.offset_src = DI_SYNC_INT_HSYNC,
+			.cnt_clr_src = DI_SYNC_HSYNC,
+			.cnt_down = 4,
+		}
+	};
+
+	ipu_di_sync_config(di, cfg, 0, ARRAY_SIZE(cfg));
+
+	/* set gentime select and tag sel */
+	reg = ipu_di_read(di, DI_SW_GEN1(9));
+	reg &= 0x1FFFFFFF;
+	reg |= (3 - 1) << 29 | 0x00008000;
+	ipu_di_write(di, reg, DI_SW_GEN1(9));
+
+	ipu_di_write(di, v_total / 2 - 1, DI_SCR_CONF);
+}
+
+static void ipu_di_sync_config_noninterlaced(struct ipu_di *di,
+		struct ipu_di_signal_cfg *sig, int div)
+{
+	u32 h_total = sig->width + sig->h_sync_width + sig->h_start_width +
+		sig->h_end_width;
+	u32 v_total = sig->height + sig->v_sync_width + sig->v_start_width +
+		sig->v_end_width;
+	struct di_sync_config cfg[] = {
+		{
+			/* 1: INT_HSYNC */
+			.run_count = h_total - 1,
+			.run_src = DI_SYNC_CLK,
+		} , {
+			/* PIN2: HSYNC */
+			.run_count = h_total - 1,
+			.run_src = DI_SYNC_CLK,
+			.offset_count = div * sig->v_to_h_sync,
+			.offset_src = DI_SYNC_CLK,
+			.cnt_polarity_gen_en = 1,
+			.cnt_polarity_trigger_src = DI_SYNC_CLK,
+			.cnt_down = sig->h_sync_width * 2,
+		} , {
+			/* PIN3: VSYNC */
+			.run_count = v_total - 1,
+			.run_src = DI_SYNC_INT_HSYNC,
+			.cnt_polarity_gen_en = 1,
+			.cnt_polarity_trigger_src = DI_SYNC_INT_HSYNC,
+			.cnt_down = sig->v_sync_width * 2,
+		} , {
+			/* 4: Line Active */
+			.run_src = DI_SYNC_HSYNC,
+			.offset_count = sig->v_sync_width + sig->v_start_width,
+			.offset_src = DI_SYNC_HSYNC,
+			.repeat_count = sig->height,
+			.cnt_clr_src = DI_SYNC_VSYNC,
+		} , {
+			/* 5: Pixel Active, referenced by DC */
+			.run_src = DI_SYNC_CLK,
+			.offset_count = sig->h_sync_width + sig->h_start_width,
+			.offset_src = DI_SYNC_CLK,
+			.repeat_count = sig->width,
+			.cnt_clr_src = 5, /* Line Active */
+		} , {
+			/* unused */
+		} , {
+			/* unused */
+		} , {
+			/* unused */
+		} , {
+			/* unused */
+		},
+	};
+	/* can't use #7 and #8 for line active and pixel active counters */
+	struct di_sync_config cfg_vga[] = {
+		{
+			/* 1: INT_HSYNC */
+			.run_count = h_total - 1,
+			.run_src = DI_SYNC_CLK,
+		} , {
+			/* 2: VSYNC */
+			.run_count = v_total - 1,
+			.run_src = DI_SYNC_INT_HSYNC,
+		} , {
+			/* 3: Line Active */
+			.run_src = DI_SYNC_INT_HSYNC,
+			.offset_count = sig->v_sync_width + sig->v_start_width,
+			.offset_src = DI_SYNC_INT_HSYNC,
+			.repeat_count = sig->height,
+			.cnt_clr_src = 3 /* VSYNC */,
+		} , {
+			/* PIN4: HSYNC for VGA via TVEv2 on TQ MBa53 */
+			.run_count = h_total - 1,
+			.run_src = DI_SYNC_CLK,
+			.offset_count = div * sig->v_to_h_sync + 18, /* magic value from Freescale TVE driver */
+			.offset_src = DI_SYNC_CLK,
+			.cnt_polarity_gen_en = 1,
+			.cnt_polarity_trigger_src = DI_SYNC_CLK,
+			.cnt_down = sig->h_sync_width * 2,
+		} , {
+			/* 5: Pixel Active signal to DC */
+			.run_src = DI_SYNC_CLK,
+			.offset_count = sig->h_sync_width + sig->h_start_width,
+			.offset_src = DI_SYNC_CLK,
+			.repeat_count = sig->width,
+			.cnt_clr_src = 4, /* Line Active */
+		} , {
+			/* PIN6: VSYNC for VGA via TVEv2 on TQ MBa53 */
+			.run_count = v_total - 1,
+			.run_src = DI_SYNC_INT_HSYNC,
+			.offset_count = 1, /* magic value from Freescale TVE driver */
+			.offset_src = DI_SYNC_INT_HSYNC,
+			.cnt_polarity_gen_en = 1,
+			.cnt_polarity_trigger_src = DI_SYNC_INT_HSYNC,
+			.cnt_down = sig->v_sync_width * 2,
+		} , {
+			/* PIN4: HSYNC for VGA via TVEv2 on i.MX53-QSB */
+			.run_count = h_total - 1,
+			.run_src = DI_SYNC_CLK,
+			.offset_count = div * sig->v_to_h_sync + 18, /* magic value from Freescale TVE driver */
+			.offset_src = DI_SYNC_CLK,
+			.cnt_polarity_gen_en = 1,
+			.cnt_polarity_trigger_src = DI_SYNC_CLK,
+			.cnt_down = sig->h_sync_width * 2,
+		} , {
+			/* PIN6: VSYNC for VGA via TVEv2 on i.MX53-QSB */
+			.run_count = v_total - 1,
+			.run_src = DI_SYNC_INT_HSYNC,
+			.offset_count = 1, /* magic value from Freescale TVE driver */
+			.offset_src = DI_SYNC_INT_HSYNC,
+			.cnt_polarity_gen_en = 1,
+			.cnt_polarity_trigger_src = DI_SYNC_INT_HSYNC,
+			.cnt_down = sig->v_sync_width * 2,
+		} , {
+			/* unused */
+		},
+	};
+
+	ipu_di_write(di, v_total - 1, DI_SCR_CONF);
+	if (sig->hsync_pin == 2 && sig->vsync_pin == 3)
+		ipu_di_sync_config(di, cfg, 0, ARRAY_SIZE(cfg));
+	else
+		ipu_di_sync_config(di, cfg_vga, 0, ARRAY_SIZE(cfg_vga));
+}
+
+static void ipu_di_config_clock(struct ipu_di *di,
+	const struct ipu_di_signal_cfg *sig)
+{
+	struct clk *clk;
+	unsigned clkgen0;
+	uint32_t val;
+
+	if (sig->clkflags & IPU_DI_CLKMODE_EXT) {
+		/*
+		 * CLKMODE_EXT means we must use the DI clock: this is
+		 * needed for things like LVDS which needs to feed the
+		 * DI and LDB with the same pixel clock.
+		 */
+		clk = di->clk_di;
+
+		if (sig->clkflags & IPU_DI_CLKMODE_SYNC) {
+			/*
+			 * CLKMODE_SYNC means that we want the DI to be
+			 * clocked at the same rate as the parent clock.
+			 * This is needed (eg) for LDB which needs to be
+			 * fed with the same pixel clock.  We assume that
+			 * the LDB clock has already been set correctly.
+			 */
+			clkgen0 = 1 << 4;
+		} else {
+			/*
+			 * We can use the divider.  We should really have
+			 * a flag here indicating whether the bridge can
+			 * cope with a fractional divider or not.  For the
+			 * time being, let's go for simplicitly and
+			 * reliability.
+			 */
+			unsigned long in_rate;
+			unsigned div;
+
+			clk_set_rate(clk, sig->pixelclock);
+
+			in_rate = clk_get_rate(clk);
+			div = (in_rate + sig->pixelclock / 2) / sig->pixelclock;
+			if (div == 0)
+				div = 1;
+
+			clkgen0 = div << 4;
+		}
+	} else {
+		/*
+		 * For other interfaces, we can arbitarily select between
+		 * the DI specific clock and the internal IPU clock.  See
+		 * DI_GENERAL bit 20.  We select the IPU clock if it can
+		 * give us a clock rate within 1% of the requested frequency,
+		 * otherwise we use the DI clock.
+		 */
+		unsigned long rate, clkrate;
+		unsigned div, error;
+
+		clkrate = clk_get_rate(di->clk_ipu);
+		div = (clkrate + sig->pixelclock / 2) / sig->pixelclock;
+		rate = clkrate / div;
+
+		error = rate / (sig->pixelclock / 1000);
+
+		dev_dbg(di->ipu->dev, "  IPU clock can give %lu with divider %u, error %d.%u%%\n",
+			rate, div, (signed)(error - 1000) / 10, error % 10);
+
+		/* Allow a 1% error */
+		if (error < 1010 && error >= 990) {
+			clk = di->clk_ipu;
+
+			clkgen0 = div << 4;
+		} else {
+			unsigned long in_rate;
+			unsigned div;
+
+			clk = di->clk_di;
+
+			clk_set_rate(clk, sig->pixelclock);
+
+			in_rate = clk_get_rate(clk);
+			div = (in_rate + sig->pixelclock / 2) / sig->pixelclock;
+			if (div == 0)
+				div = 1;
+
+			clkgen0 = div << 4;
+		}
+	}
+
+	di->clk_di_pixel = clk;
+
+	/* Set the divider */
+	ipu_di_write(di, clkgen0, DI_BS_CLKGEN0);
+
+	/*
+	 * Set the high/low periods.  Bits 24:16 give us the falling edge,
+	 * and bits 8:0 give the rising edge.  LSB is fraction, and is
+	 * based on the divider above.  We want a 50% duty cycle, so set
+	 * the falling edge to be half the divider.
+	 */
+	ipu_di_write(di, (clkgen0 >> 4) << 16, DI_BS_CLKGEN1);
+
+	/* Finally select the input clock */
+	val = ipu_di_read(di, DI_GENERAL) & ~DI_GEN_DI_CLK_EXT;
+	if (clk == di->clk_di)
+		val |= DI_GEN_DI_CLK_EXT;
+	ipu_di_write(di, val, DI_GENERAL);
+
+	dev_dbg(di->ipu->dev, "Want %luHz IPU %luHz DI %luHz using %s, %luHz\n",
+		sig->pixelclock,
+		clk_get_rate(di->clk_ipu),
+		clk_get_rate(di->clk_di),
+		clk == di->clk_di ? "DI" : "IPU",
+		clk_get_rate(di->clk_di_pixel) / (clkgen0 >> 4));
+}
+
+int ipu_di_init_sync_panel(struct ipu_di *di, struct ipu_di_signal_cfg *sig)
+{
+	u32 reg;
+	u32 di_gen, vsync_cnt;
+	u32 div;
+	u32 h_total, v_total;
+
+	dev_dbg(di->ipu->dev, "disp %d: panel size = %d x %d\n",
+		di->id, sig->width, sig->height);
+
+	if ((sig->v_sync_width == 0) || (sig->h_sync_width == 0))
+		return -EINVAL;
+
+	h_total = sig->width + sig->h_sync_width + sig->h_start_width +
+		sig->h_end_width;
+	v_total = sig->height + sig->v_sync_width + sig->v_start_width +
+		sig->v_end_width;
+
+	dev_dbg(di->ipu->dev, "Clocks: IPU %luHz DI %luHz Needed %luHz\n",
+		clk_get_rate(di->clk_ipu),
+		clk_get_rate(di->clk_di),
+		sig->pixelclock);
+
+	mutex_lock(&di_mutex);
+
+	ipu_di_config_clock(di, sig);
+
+	div = ipu_di_read(di, DI_BS_CLKGEN0) & 0xfff;
+	div = div / 16;		/* Now divider is integer portion */
+
+	/* Setup pixel clock timing */
+	/* Down time is half of period */
+	ipu_di_write(di, (div << 16), DI_BS_CLKGEN1);
+
+	ipu_di_data_wave_config(di, SYNC_WAVE, div - 1, div - 1);
+	ipu_di_data_pin_config(di, SYNC_WAVE, DI_PIN15, 3, 0, div * 2);
+
+	di_gen = ipu_di_read(di, DI_GENERAL) & DI_GEN_DI_CLK_EXT;
+	di_gen |= DI_GEN_DI_VSYNC_EXT;
+
+	if (sig->interlaced) {
+		ipu_di_sync_config_interlaced(di, sig);
+
+		/* set y_sel = 1 */
+		di_gen |= 0x10000000;
+		di_gen |= DI_GEN_POLARITY_5;
+		di_gen |= DI_GEN_POLARITY_8;
+
+		vsync_cnt = 7;
+
+		if (sig->Hsync_pol)
+			di_gen |= DI_GEN_POLARITY_3;
+		if (sig->Vsync_pol)
+			di_gen |= DI_GEN_POLARITY_2;
+	} else {
+		ipu_di_sync_config_noninterlaced(di, sig, div);
+
+		vsync_cnt = 3;
+		if (di->id == 1)
+			/*
+			 * TODO: change only for TVEv2, parallel display
+			 * uses pin 2 / 3
+			 */
+			if (!(sig->hsync_pin == 2 && sig->vsync_pin == 3))
+				vsync_cnt = 6;
+
+		if (sig->Hsync_pol) {
+			if (sig->hsync_pin == 2)
+				di_gen |= DI_GEN_POLARITY_2;
+			else if (sig->hsync_pin == 4)
+				di_gen |= DI_GEN_POLARITY_4;
+			else if (sig->hsync_pin == 7)
+				di_gen |= DI_GEN_POLARITY_7;
+		}
+		if (sig->Vsync_pol) {
+			if (sig->vsync_pin == 3)
+				di_gen |= DI_GEN_POLARITY_3;
+			else if (sig->vsync_pin == 6)
+				di_gen |= DI_GEN_POLARITY_6;
+			else if (sig->vsync_pin == 8)
+				di_gen |= DI_GEN_POLARITY_8;
+		}
+	}
+
+	if (!sig->clk_pol)
+		di_gen |= DI_GEN_POLARITY_DISP_CLK;
+
+	ipu_di_write(di, di_gen, DI_GENERAL);
+
+	ipu_di_write(di, (--vsync_cnt << DI_VSYNC_SEL_OFFSET) | 0x00000002,
+		     DI_SYNC_AS_GEN);
+
+	reg = ipu_di_read(di, DI_POL);
+	reg &= ~(DI_POL_DRDY_DATA_POLARITY | DI_POL_DRDY_POLARITY_15);
+
+	if (sig->enable_pol)
+		reg |= DI_POL_DRDY_POLARITY_15;
+	if (sig->data_pol)
+		reg |= DI_POL_DRDY_DATA_POLARITY;
+
+	ipu_di_write(di, reg, DI_POL);
+
+	mutex_unlock(&di_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_di_init_sync_panel);
+
+int ipu_di_enable(struct ipu_di *di)
+{
+	int ret;
+
+	WARN_ON(IS_ERR(di->clk_di_pixel));
+
+	ret = clk_prepare_enable(di->clk_di_pixel);
+	if (ret)
+		return ret;
+
+	ipu_module_enable(di->ipu, di->module);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_di_enable);
+
+int ipu_di_disable(struct ipu_di *di)
+{
+	WARN_ON(IS_ERR(di->clk_di_pixel));
+
+	ipu_module_disable(di->ipu, di->module);
+
+	clk_disable_unprepare(di->clk_di_pixel);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_di_disable);
+
+int ipu_di_get_num(struct ipu_di *di)
+{
+	return di->id;
+}
+EXPORT_SYMBOL_GPL(ipu_di_get_num);
+
+static DEFINE_MUTEX(ipu_di_lock);
+
+struct ipu_di *ipu_di_get(struct ipu_soc *ipu, int disp)
+{
+	struct ipu_di *di;
+
+	if (disp > 1)
+		return ERR_PTR(-EINVAL);
+
+	di = ipu->di_priv[disp];
+
+	mutex_lock(&ipu_di_lock);
+
+	if (di->inuse) {
+		di = ERR_PTR(-EBUSY);
+		goto out;
+	}
+
+	di->inuse = true;
+out:
+	mutex_unlock(&ipu_di_lock);
+
+	return di;
+}
+EXPORT_SYMBOL_GPL(ipu_di_get);
+
+void ipu_di_put(struct ipu_di *di)
+{
+	mutex_lock(&ipu_di_lock);
+
+	di->inuse = false;
+
+	mutex_unlock(&ipu_di_lock);
+}
+EXPORT_SYMBOL_GPL(ipu_di_put);
+
+int ipu_di_init(struct ipu_soc *ipu, struct device *dev, int id,
+		unsigned long base,
+		u32 module, struct clk *clk_ipu)
+{
+	struct ipu_di *di;
+
+	if (id > 1)
+		return -ENODEV;
+
+	di = devm_kzalloc(dev, sizeof(*di), GFP_KERNEL);
+	if (!di)
+		return -ENOMEM;
+
+	ipu->di_priv[id] = di;
+
+	di->clk_di = devm_clk_get(dev, id ? "di1" : "di0");
+	if (IS_ERR(di->clk_di))
+		return PTR_ERR(di->clk_di);
+
+	di->module = module;
+	di->id = id;
+	di->clk_ipu = clk_ipu;
+	di->base = devm_ioremap(dev, base, PAGE_SIZE);
+	if (!di->base)
+		return -ENOMEM;
+
+	ipu_di_write(di, 0x10, DI_BS_CLKGEN0);
+
+	dev_dbg(dev, "DI%d base: 0x%08lx remapped to %p\n",
+			id, base, di->base);
+	di->inuse = false;
+	di->ipu = ipu;
+
+	return 0;
+}
+
+void ipu_di_exit(struct ipu_soc *ipu, int id)
+{
+}
diff --git a/drivers/gpu/ipu-v3/ipu-dmfc.c b/drivers/gpu/ipu-v3/ipu-dmfc.c
new file mode 100644
index 000000000000..e1493ab36ca2
--- /dev/null
+++ b/drivers/gpu/ipu-v3/ipu-dmfc.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2010 Sascha Hauer <s.hauer@pengutronix.de>
+ * Copyright (C) 2005-2009 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+
+#include <video/imx-ipu-v3.h>
+#include "ipu-prv.h"
+
+#define DMFC_RD_CHAN		0x0000
+#define DMFC_WR_CHAN		0x0004
+#define DMFC_WR_CHAN_DEF	0x0008
+#define DMFC_DP_CHAN		0x000c
+#define DMFC_DP_CHAN_DEF	0x0010
+#define DMFC_GENERAL1		0x0014
+#define DMFC_GENERAL2		0x0018
+#define DMFC_IC_CTRL		0x001c
+#define DMFC_STAT		0x0020
+
+#define DMFC_WR_CHAN_1_28		0
+#define DMFC_WR_CHAN_2_41		8
+#define DMFC_WR_CHAN_1C_42		16
+#define DMFC_WR_CHAN_2C_43		24
+
+#define DMFC_DP_CHAN_5B_23		0
+#define DMFC_DP_CHAN_5F_27		8
+#define DMFC_DP_CHAN_6B_24		16
+#define DMFC_DP_CHAN_6F_29		24
+
+#define DMFC_FIFO_SIZE_64		(3 << 3)
+#define DMFC_FIFO_SIZE_128		(2 << 3)
+#define DMFC_FIFO_SIZE_256		(1 << 3)
+#define DMFC_FIFO_SIZE_512		(0 << 3)
+
+#define DMFC_SEGMENT(x)			((x & 0x7) << 0)
+#define DMFC_BURSTSIZE_128		(0 << 6)
+#define DMFC_BURSTSIZE_64		(1 << 6)
+#define DMFC_BURSTSIZE_32		(2 << 6)
+#define DMFC_BURSTSIZE_16		(3 << 6)
+
+struct dmfc_channel_data {
+	int		ipu_channel;
+	unsigned long	channel_reg;
+	unsigned long	shift;
+	unsigned	eot_shift;
+	unsigned	max_fifo_lines;
+};
+
+static const struct dmfc_channel_data dmfcdata[] = {
+	{
+		.ipu_channel	= IPUV3_CHANNEL_MEM_BG_SYNC,
+		.channel_reg	= DMFC_DP_CHAN,
+		.shift		= DMFC_DP_CHAN_5B_23,
+		.eot_shift	= 20,
+		.max_fifo_lines	= 3,
+	}, {
+		.ipu_channel	= 24,
+		.channel_reg	= DMFC_DP_CHAN,
+		.shift		= DMFC_DP_CHAN_6B_24,
+		.eot_shift	= 22,
+		.max_fifo_lines	= 1,
+	}, {
+		.ipu_channel	= IPUV3_CHANNEL_MEM_FG_SYNC,
+		.channel_reg	= DMFC_DP_CHAN,
+		.shift		= DMFC_DP_CHAN_5F_27,
+		.eot_shift	= 21,
+		.max_fifo_lines	= 2,
+	}, {
+		.ipu_channel	= IPUV3_CHANNEL_MEM_DC_SYNC,
+		.channel_reg	= DMFC_WR_CHAN,
+		.shift		= DMFC_WR_CHAN_1_28,
+		.eot_shift	= 16,
+		.max_fifo_lines	= 2,
+	}, {
+		.ipu_channel	= 29,
+		.channel_reg	= DMFC_DP_CHAN,
+		.shift		= DMFC_DP_CHAN_6F_29,
+		.eot_shift	= 23,
+		.max_fifo_lines	= 1,
+	},
+};
+
+#define DMFC_NUM_CHANNELS	ARRAY_SIZE(dmfcdata)
+
+struct ipu_dmfc_priv;
+
+struct dmfc_channel {
+	unsigned			slots;
+	unsigned			slotmask;
+	unsigned			segment;
+	int				burstsize;
+	struct ipu_soc			*ipu;
+	struct ipu_dmfc_priv		*priv;
+	const struct dmfc_channel_data	*data;
+};
+
+struct ipu_dmfc_priv {
+	struct ipu_soc *ipu;
+	struct device *dev;
+	struct dmfc_channel channels[DMFC_NUM_CHANNELS];
+	struct mutex mutex;
+	unsigned long bandwidth_per_slot;
+	void __iomem *base;
+	int use_count;
+};
+
+int ipu_dmfc_enable_channel(struct dmfc_channel *dmfc)
+{
+	struct ipu_dmfc_priv *priv = dmfc->priv;
+	mutex_lock(&priv->mutex);
+
+	if (!priv->use_count)
+		ipu_module_enable(priv->ipu, IPU_CONF_DMFC_EN);
+
+	priv->use_count++;
+
+	mutex_unlock(&priv->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_dmfc_enable_channel);
+
+void ipu_dmfc_disable_channel(struct dmfc_channel *dmfc)
+{
+	struct ipu_dmfc_priv *priv = dmfc->priv;
+
+	mutex_lock(&priv->mutex);
+
+	priv->use_count--;
+
+	if (!priv->use_count)
+		ipu_module_disable(priv->ipu, IPU_CONF_DMFC_EN);
+
+	if (priv->use_count < 0)
+		priv->use_count = 0;
+
+	mutex_unlock(&priv->mutex);
+}
+EXPORT_SYMBOL_GPL(ipu_dmfc_disable_channel);
+
+static int ipu_dmfc_setup_channel(struct dmfc_channel *dmfc, int slots,
+		int segment, int burstsize)
+{
+	struct ipu_dmfc_priv *priv = dmfc->priv;
+	u32 val, field;
+
+	dev_dbg(priv->dev,
+			"dmfc: using %d slots starting from segment %d for IPU channel %d\n",
+			slots, segment, dmfc->data->ipu_channel);
+
+	switch (slots) {
+	case 1:
+		field = DMFC_FIFO_SIZE_64;
+		break;
+	case 2:
+		field = DMFC_FIFO_SIZE_128;
+		break;
+	case 4:
+		field = DMFC_FIFO_SIZE_256;
+		break;
+	case 8:
+		field = DMFC_FIFO_SIZE_512;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (burstsize) {
+	case 16:
+		field |= DMFC_BURSTSIZE_16;
+		break;
+	case 32:
+		field |= DMFC_BURSTSIZE_32;
+		break;
+	case 64:
+		field |= DMFC_BURSTSIZE_64;
+		break;
+	case 128:
+		field |= DMFC_BURSTSIZE_128;
+		break;
+	}
+
+	field |= DMFC_SEGMENT(segment);
+
+	val = readl(priv->base + dmfc->data->channel_reg);
+
+	val &= ~(0xff << dmfc->data->shift);
+	val |= field << dmfc->data->shift;
+
+	writel(val, priv->base + dmfc->data->channel_reg);
+
+	dmfc->slots = slots;
+	dmfc->segment = segment;
+	dmfc->burstsize = burstsize;
+	dmfc->slotmask = ((1 << slots) - 1) << segment;
+
+	return 0;
+}
+
+static int dmfc_bandwidth_to_slots(struct ipu_dmfc_priv *priv,
+		unsigned long bandwidth)
+{
+	int slots = 1;
+
+	while (slots * priv->bandwidth_per_slot < bandwidth)
+		slots *= 2;
+
+	return slots;
+}
+
+static int dmfc_find_slots(struct ipu_dmfc_priv *priv, int slots)
+{
+	unsigned slotmask_need, slotmask_used = 0;
+	int i, segment = 0;
+
+	slotmask_need = (1 << slots) - 1;
+
+	for (i = 0; i < DMFC_NUM_CHANNELS; i++)
+		slotmask_used |= priv->channels[i].slotmask;
+
+	while (slotmask_need <= 0xff) {
+		if (!(slotmask_used & slotmask_need))
+			return segment;
+
+		slotmask_need <<= 1;
+		segment++;
+	}
+
+	return -EBUSY;
+}
+
+void ipu_dmfc_free_bandwidth(struct dmfc_channel *dmfc)
+{
+	struct ipu_dmfc_priv *priv = dmfc->priv;
+	int i;
+
+	dev_dbg(priv->dev, "dmfc: freeing %d slots starting from segment %d\n",
+			dmfc->slots, dmfc->segment);
+
+	mutex_lock(&priv->mutex);
+
+	if (!dmfc->slots)
+		goto out;
+
+	dmfc->slotmask = 0;
+	dmfc->slots = 0;
+	dmfc->segment = 0;
+
+	for (i = 0; i < DMFC_NUM_CHANNELS; i++)
+		priv->channels[i].slotmask = 0;
+
+	for (i = 0; i < DMFC_NUM_CHANNELS; i++) {
+		if (priv->channels[i].slots > 0) {
+			priv->channels[i].segment =
+				dmfc_find_slots(priv, priv->channels[i].slots);
+			priv->channels[i].slotmask =
+				((1 << priv->channels[i].slots) - 1) <<
+				priv->channels[i].segment;
+		}
+	}
+
+	for (i = 0; i < DMFC_NUM_CHANNELS; i++) {
+		if (priv->channels[i].slots > 0)
+			ipu_dmfc_setup_channel(&priv->channels[i],
+					priv->channels[i].slots,
+					priv->channels[i].segment,
+					priv->channels[i].burstsize);
+	}
+out:
+	mutex_unlock(&priv->mutex);
+}
+EXPORT_SYMBOL_GPL(ipu_dmfc_free_bandwidth);
+
+int ipu_dmfc_alloc_bandwidth(struct dmfc_channel *dmfc,
+		unsigned long bandwidth_pixel_per_second, int burstsize)
+{
+	struct ipu_dmfc_priv *priv = dmfc->priv;
+	int slots = dmfc_bandwidth_to_slots(priv, bandwidth_pixel_per_second);
+	int segment = -1, ret = 0;
+
+	dev_dbg(priv->dev, "dmfc: trying to allocate %ldMpixel/s for IPU channel %d\n",
+			bandwidth_pixel_per_second / 1000000,
+			dmfc->data->ipu_channel);
+
+	ipu_dmfc_free_bandwidth(dmfc);
+
+	mutex_lock(&priv->mutex);
+
+	if (slots > 8) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* For the MEM_BG channel, first try to allocate twice the slots */
+	if (dmfc->data->ipu_channel == IPUV3_CHANNEL_MEM_BG_SYNC)
+		segment = dmfc_find_slots(priv, slots * 2);
+	else if (slots < 2)
+		/* Always allocate at least 128*4 bytes (2 slots) */
+		slots = 2;
+
+	if (segment >= 0)
+		slots *= 2;
+	else
+		segment = dmfc_find_slots(priv, slots);
+	if (segment < 0) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	ipu_dmfc_setup_channel(dmfc, slots, segment, burstsize);
+
+out:
+	mutex_unlock(&priv->mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ipu_dmfc_alloc_bandwidth);
+
+int ipu_dmfc_init_channel(struct dmfc_channel *dmfc, int width)
+{
+	struct ipu_dmfc_priv *priv = dmfc->priv;
+	u32 dmfc_gen1;
+
+	dmfc_gen1 = readl(priv->base + DMFC_GENERAL1);
+
+	if ((dmfc->slots * 64 * 4) / width > dmfc->data->max_fifo_lines)
+		dmfc_gen1 |= 1 << dmfc->data->eot_shift;
+	else
+		dmfc_gen1 &= ~(1 << dmfc->data->eot_shift);
+
+	writel(dmfc_gen1, priv->base + DMFC_GENERAL1);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_dmfc_init_channel);
+
+struct dmfc_channel *ipu_dmfc_get(struct ipu_soc *ipu, int ipu_channel)
+{
+	struct ipu_dmfc_priv *priv = ipu->dmfc_priv;
+	int i;
+
+	for (i = 0; i < DMFC_NUM_CHANNELS; i++)
+		if (dmfcdata[i].ipu_channel == ipu_channel)
+			return &priv->channels[i];
+	return ERR_PTR(-ENODEV);
+}
+EXPORT_SYMBOL_GPL(ipu_dmfc_get);
+
+void ipu_dmfc_put(struct dmfc_channel *dmfc)
+{
+	ipu_dmfc_free_bandwidth(dmfc);
+}
+EXPORT_SYMBOL_GPL(ipu_dmfc_put);
+
+int ipu_dmfc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base,
+		struct clk *ipu_clk)
+{
+	struct ipu_dmfc_priv *priv;
+	int i;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base = devm_ioremap(dev, base, PAGE_SIZE);
+	if (!priv->base)
+		return -ENOMEM;
+
+	priv->dev = dev;
+	priv->ipu = ipu;
+	mutex_init(&priv->mutex);
+
+	ipu->dmfc_priv = priv;
+
+	for (i = 0; i < DMFC_NUM_CHANNELS; i++) {
+		priv->channels[i].priv = priv;
+		priv->channels[i].ipu = ipu;
+		priv->channels[i].data = &dmfcdata[i];
+	}
+
+	writel(0x0, priv->base + DMFC_WR_CHAN);
+	writel(0x0, priv->base + DMFC_DP_CHAN);
+
+	/*
+	 * We have a total bandwidth of clkrate * 4pixel divided
+	 * into 8 slots.
+	 */
+	priv->bandwidth_per_slot = clk_get_rate(ipu_clk) * 4 / 8;
+
+	dev_dbg(dev, "dmfc: 8 slots with %ldMpixel/s bandwidth each\n",
+			priv->bandwidth_per_slot / 1000000);
+
+	writel(0x202020f6, priv->base + DMFC_WR_CHAN_DEF);
+	writel(0x2020f6f6, priv->base + DMFC_DP_CHAN_DEF);
+	writel(0x00000003, priv->base + DMFC_GENERAL1);
+
+	return 0;
+}
+
+void ipu_dmfc_exit(struct ipu_soc *ipu)
+{
+}
diff --git a/drivers/gpu/ipu-v3/ipu-dp.c b/drivers/gpu/ipu-v3/ipu-dp.c
new file mode 100644
index 000000000000..e17fa3f7c4b6
--- /dev/null
+++ b/drivers/gpu/ipu-v3/ipu-dp.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2010 Sascha Hauer <s.hauer@pengutronix.de>
+ * Copyright (C) 2005-2009 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/err.h>
+
+#include <video/imx-ipu-v3.h>
+#include "ipu-prv.h"
+
+#define DP_SYNC 0
+#define DP_ASYNC0 0x60
+#define DP_ASYNC1 0xBC
+
+#define DP_COM_CONF		0x0
+#define DP_GRAPH_WIND_CTRL	0x0004
+#define DP_FG_POS		0x0008
+#define DP_CSC_A_0		0x0044
+#define DP_CSC_A_1		0x0048
+#define DP_CSC_A_2		0x004C
+#define DP_CSC_A_3		0x0050
+#define DP_CSC_0		0x0054
+#define DP_CSC_1		0x0058
+
+#define DP_COM_CONF_FG_EN		(1 << 0)
+#define DP_COM_CONF_GWSEL		(1 << 1)
+#define DP_COM_CONF_GWAM		(1 << 2)
+#define DP_COM_CONF_GWCKE		(1 << 3)
+#define DP_COM_CONF_CSC_DEF_MASK	(3 << 8)
+#define DP_COM_CONF_CSC_DEF_OFFSET	8
+#define DP_COM_CONF_CSC_DEF_FG		(3 << 8)
+#define DP_COM_CONF_CSC_DEF_BG		(2 << 8)
+#define DP_COM_CONF_CSC_DEF_BOTH	(1 << 8)
+
+#define IPUV3_NUM_FLOWS		3
+
+struct ipu_dp_priv;
+
+struct ipu_dp {
+	u32 flow;
+	bool in_use;
+	bool foreground;
+	enum ipu_color_space in_cs;
+};
+
+struct ipu_flow {
+	struct ipu_dp foreground;
+	struct ipu_dp background;
+	enum ipu_color_space out_cs;
+	void __iomem *base;
+	struct ipu_dp_priv *priv;
+};
+
+struct ipu_dp_priv {
+	struct ipu_soc *ipu;
+	struct device *dev;
+	void __iomem *base;
+	struct ipu_flow flow[IPUV3_NUM_FLOWS];
+	struct mutex mutex;
+	int use_count;
+};
+
+static u32 ipu_dp_flow_base[] = {DP_SYNC, DP_ASYNC0, DP_ASYNC1};
+
+static inline struct ipu_flow *to_flow(struct ipu_dp *dp)
+{
+	if (dp->foreground)
+		return container_of(dp, struct ipu_flow, foreground);
+	else
+		return container_of(dp, struct ipu_flow, background);
+}
+
+int ipu_dp_set_global_alpha(struct ipu_dp *dp, bool enable,
+		u8 alpha, bool bg_chan)
+{
+	struct ipu_flow *flow = to_flow(dp);
+	struct ipu_dp_priv *priv = flow->priv;
+	u32 reg;
+
+	mutex_lock(&priv->mutex);
+
+	reg = readl(flow->base + DP_COM_CONF);
+	if (bg_chan)
+		reg &= ~DP_COM_CONF_GWSEL;
+	else
+		reg |= DP_COM_CONF_GWSEL;
+	writel(reg, flow->base + DP_COM_CONF);
+
+	if (enable) {
+		reg = readl(flow->base + DP_GRAPH_WIND_CTRL) & 0x00FFFFFFL;
+		writel(reg | ((u32) alpha << 24),
+			     flow->base + DP_GRAPH_WIND_CTRL);
+
+		reg = readl(flow->base + DP_COM_CONF);
+		writel(reg | DP_COM_CONF_GWAM, flow->base + DP_COM_CONF);
+	} else {
+		reg = readl(flow->base + DP_COM_CONF);
+		writel(reg & ~DP_COM_CONF_GWAM, flow->base + DP_COM_CONF);
+	}
+
+	ipu_srm_dp_sync_update(priv->ipu);
+
+	mutex_unlock(&priv->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_dp_set_global_alpha);
+
+int ipu_dp_set_window_pos(struct ipu_dp *dp, u16 x_pos, u16 y_pos)
+{
+	struct ipu_flow *flow = to_flow(dp);
+	struct ipu_dp_priv *priv = flow->priv;
+
+	writel((x_pos << 16) | y_pos, flow->base + DP_FG_POS);
+
+	ipu_srm_dp_sync_update(priv->ipu);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_dp_set_window_pos);
+
+static void ipu_dp_csc_init(struct ipu_flow *flow,
+		enum ipu_color_space in,
+		enum ipu_color_space out,
+		u32 place)
+{
+	u32 reg;
+
+	reg = readl(flow->base + DP_COM_CONF);
+	reg &= ~DP_COM_CONF_CSC_DEF_MASK;
+
+	if (in == out) {
+		writel(reg, flow->base + DP_COM_CONF);
+		return;
+	}
+
+	if (in == IPUV3_COLORSPACE_RGB && out == IPUV3_COLORSPACE_YUV) {
+		writel(0x099 | (0x12d << 16), flow->base + DP_CSC_A_0);
+		writel(0x03a | (0x3a9 << 16), flow->base + DP_CSC_A_1);
+		writel(0x356 | (0x100 << 16), flow->base + DP_CSC_A_2);
+		writel(0x100 | (0x329 << 16), flow->base + DP_CSC_A_3);
+		writel(0x3d6 | (0x0000 << 16) | (2 << 30),
+				flow->base + DP_CSC_0);
+		writel(0x200 | (2 << 14) | (0x200 << 16) | (2 << 30),
+				flow->base + DP_CSC_1);
+	} else {
+		writel(0x095 | (0x000 << 16), flow->base + DP_CSC_A_0);
+		writel(0x0cc | (0x095 << 16), flow->base + DP_CSC_A_1);
+		writel(0x3ce | (0x398 << 16), flow->base + DP_CSC_A_2);
+		writel(0x095 | (0x0ff << 16), flow->base + DP_CSC_A_3);
+		writel(0x000 | (0x3e42 << 16) | (1 << 30),
+				flow->base + DP_CSC_0);
+		writel(0x10a | (1 << 14) | (0x3dd6 << 16) | (1 << 30),
+				flow->base + DP_CSC_1);
+	}
+
+	reg |= place;
+
+	writel(reg, flow->base + DP_COM_CONF);
+}
+
+int ipu_dp_setup_channel(struct ipu_dp *dp,
+		enum ipu_color_space in,
+		enum ipu_color_space out)
+{
+	struct ipu_flow *flow = to_flow(dp);
+	struct ipu_dp_priv *priv = flow->priv;
+
+	mutex_lock(&priv->mutex);
+
+	dp->in_cs = in;
+
+	if (!dp->foreground)
+		flow->out_cs = out;
+
+	if (flow->foreground.in_cs == flow->background.in_cs) {
+		/*
+		 * foreground and background are of same colorspace, put
+		 * colorspace converter after combining unit.
+		 */
+		ipu_dp_csc_init(flow, flow->foreground.in_cs, flow->out_cs,
+				DP_COM_CONF_CSC_DEF_BOTH);
+	} else {
+		if (flow->foreground.in_cs == flow->out_cs)
+			/*
+			 * foreground identical to output, apply color
+			 * conversion on background
+			 */
+			ipu_dp_csc_init(flow, flow->background.in_cs,
+					flow->out_cs, DP_COM_CONF_CSC_DEF_BG);
+		else
+			ipu_dp_csc_init(flow, flow->foreground.in_cs,
+					flow->out_cs, DP_COM_CONF_CSC_DEF_FG);
+	}
+
+	ipu_srm_dp_sync_update(priv->ipu);
+
+	mutex_unlock(&priv->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_dp_setup_channel);
+
+int ipu_dp_enable_channel(struct ipu_dp *dp)
+{
+	struct ipu_flow *flow = to_flow(dp);
+	struct ipu_dp_priv *priv = flow->priv;
+
+	mutex_lock(&priv->mutex);
+
+	if (!priv->use_count)
+		ipu_module_enable(priv->ipu, IPU_CONF_DP_EN);
+
+	priv->use_count++;
+
+	if (dp->foreground) {
+		u32 reg;
+
+		reg = readl(flow->base + DP_COM_CONF);
+		reg |= DP_COM_CONF_FG_EN;
+		writel(reg, flow->base + DP_COM_CONF);
+
+		ipu_srm_dp_sync_update(priv->ipu);
+	}
+
+	mutex_unlock(&priv->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_dp_enable_channel);
+
+void ipu_dp_disable_channel(struct ipu_dp *dp)
+{
+	struct ipu_flow *flow = to_flow(dp);
+	struct ipu_dp_priv *priv = flow->priv;
+
+	mutex_lock(&priv->mutex);
+
+	priv->use_count--;
+
+	if (dp->foreground) {
+		u32 reg, csc;
+
+		reg = readl(flow->base + DP_COM_CONF);
+		csc = reg & DP_COM_CONF_CSC_DEF_MASK;
+		if (csc == DP_COM_CONF_CSC_DEF_FG)
+			reg &= ~DP_COM_CONF_CSC_DEF_MASK;
+
+		reg &= ~DP_COM_CONF_FG_EN;
+		writel(reg, flow->base + DP_COM_CONF);
+
+		writel(0, flow->base + DP_FG_POS);
+		ipu_srm_dp_sync_update(priv->ipu);
+	}
+
+	if (!priv->use_count)
+		ipu_module_disable(priv->ipu, IPU_CONF_DP_EN);
+
+	if (priv->use_count < 0)
+		priv->use_count = 0;
+
+	mutex_unlock(&priv->mutex);
+}
+EXPORT_SYMBOL_GPL(ipu_dp_disable_channel);
+
+struct ipu_dp *ipu_dp_get(struct ipu_soc *ipu, unsigned int flow)
+{
+	struct ipu_dp_priv *priv = ipu->dp_priv;
+	struct ipu_dp *dp;
+
+	if ((flow >> 1) >= IPUV3_NUM_FLOWS)
+		return ERR_PTR(-EINVAL);
+
+	if (flow & 1)
+		dp = &priv->flow[flow >> 1].foreground;
+	else
+		dp = &priv->flow[flow >> 1].background;
+
+	if (dp->in_use)
+		return ERR_PTR(-EBUSY);
+
+	dp->in_use = true;
+
+	return dp;
+}
+EXPORT_SYMBOL_GPL(ipu_dp_get);
+
+void ipu_dp_put(struct ipu_dp *dp)
+{
+	dp->in_use = false;
+}
+EXPORT_SYMBOL_GPL(ipu_dp_put);
+
+int ipu_dp_init(struct ipu_soc *ipu, struct device *dev, unsigned long base)
+{
+	struct ipu_dp_priv *priv;
+	int i;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+	priv->dev = dev;
+	priv->ipu = ipu;
+
+	ipu->dp_priv = priv;
+
+	priv->base = devm_ioremap(dev, base, PAGE_SIZE);
+	if (!priv->base)
+		return -ENOMEM;
+
+	mutex_init(&priv->mutex);
+
+	for (i = 0; i < IPUV3_NUM_FLOWS; i++) {
+		priv->flow[i].foreground.foreground = true;
+		priv->flow[i].base = priv->base + ipu_dp_flow_base[i];
+		priv->flow[i].priv = priv;
+	}
+
+	return 0;
+}
+
+void ipu_dp_exit(struct ipu_soc *ipu)
+{
+}
diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h
new file mode 100644
index 000000000000..5cb075fdd48c
--- /dev/null
+++ b/drivers/gpu/ipu-v3/ipu-prv.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2010 Sascha Hauer <s.hauer@pengutronix.de>
+ * Copyright (C) 2005-2009 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+#ifndef __IPU_PRV_H__
+#define __IPU_PRV_H__
+
+struct ipu_soc;
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+
+#include <video/imx-ipu-v3.h>
+
+#define IPUV3_CHANNEL_CSI0			 0
+#define IPUV3_CHANNEL_CSI1			 1
+#define IPUV3_CHANNEL_CSI2			 2
+#define IPUV3_CHANNEL_CSI3			 3
+#define IPUV3_CHANNEL_MEM_BG_SYNC		23
+#define IPUV3_CHANNEL_MEM_FG_SYNC		27
+#define IPUV3_CHANNEL_MEM_DC_SYNC		28
+#define IPUV3_CHANNEL_MEM_FG_SYNC_ALPHA		31
+#define IPUV3_CHANNEL_MEM_DC_ASYNC		41
+#define IPUV3_CHANNEL_ROT_ENC_MEM		45
+#define IPUV3_CHANNEL_ROT_VF_MEM		46
+#define IPUV3_CHANNEL_ROT_PP_MEM		47
+#define IPUV3_CHANNEL_ROT_ENC_MEM_OUT		48
+#define IPUV3_CHANNEL_ROT_VF_MEM_OUT		49
+#define IPUV3_CHANNEL_ROT_PP_MEM_OUT		50
+#define IPUV3_CHANNEL_MEM_BG_SYNC_ALPHA		51
+
+#define IPU_MCU_T_DEFAULT	8
+#define IPU_CM_IDMAC_REG_OFS	0x00008000
+#define IPU_CM_IC_REG_OFS	0x00020000
+#define IPU_CM_IRT_REG_OFS	0x00028000
+#define IPU_CM_CSI0_REG_OFS	0x00030000
+#define IPU_CM_CSI1_REG_OFS	0x00038000
+#define IPU_CM_SMFC_REG_OFS	0x00050000
+#define IPU_CM_DC_REG_OFS	0x00058000
+#define IPU_CM_DMFC_REG_OFS	0x00060000
+
+/* Register addresses */
+/* IPU Common registers */
+#define IPU_CM_REG(offset)	(offset)
+
+#define IPU_CONF			IPU_CM_REG(0)
+
+#define IPU_SRM_PRI1			IPU_CM_REG(0x00a0)
+#define IPU_SRM_PRI2			IPU_CM_REG(0x00a4)
+#define IPU_FS_PROC_FLOW1		IPU_CM_REG(0x00a8)
+#define IPU_FS_PROC_FLOW2		IPU_CM_REG(0x00ac)
+#define IPU_FS_PROC_FLOW3		IPU_CM_REG(0x00b0)
+#define IPU_FS_DISP_FLOW1		IPU_CM_REG(0x00b4)
+#define IPU_FS_DISP_FLOW2		IPU_CM_REG(0x00b8)
+#define IPU_SKIP			IPU_CM_REG(0x00bc)
+#define IPU_DISP_ALT_CONF		IPU_CM_REG(0x00c0)
+#define IPU_DISP_GEN			IPU_CM_REG(0x00c4)
+#define IPU_DISP_ALT1			IPU_CM_REG(0x00c8)
+#define IPU_DISP_ALT2			IPU_CM_REG(0x00cc)
+#define IPU_DISP_ALT3			IPU_CM_REG(0x00d0)
+#define IPU_DISP_ALT4			IPU_CM_REG(0x00d4)
+#define IPU_SNOOP			IPU_CM_REG(0x00d8)
+#define IPU_MEM_RST			IPU_CM_REG(0x00dc)
+#define IPU_PM				IPU_CM_REG(0x00e0)
+#define IPU_GPR				IPU_CM_REG(0x00e4)
+#define IPU_CHA_DB_MODE_SEL(ch)		IPU_CM_REG(0x0150 + 4 * ((ch) / 32))
+#define IPU_ALT_CHA_DB_MODE_SEL(ch)	IPU_CM_REG(0x0168 + 4 * ((ch) / 32))
+#define IPU_CHA_CUR_BUF(ch)		IPU_CM_REG(0x023C + 4 * ((ch) / 32))
+#define IPU_ALT_CUR_BUF0		IPU_CM_REG(0x0244)
+#define IPU_ALT_CUR_BUF1		IPU_CM_REG(0x0248)
+#define IPU_SRM_STAT			IPU_CM_REG(0x024C)
+#define IPU_PROC_TASK_STAT		IPU_CM_REG(0x0250)
+#define IPU_DISP_TASK_STAT		IPU_CM_REG(0x0254)
+#define IPU_CHA_BUF0_RDY(ch)		IPU_CM_REG(0x0268 + 4 * ((ch) / 32))
+#define IPU_CHA_BUF1_RDY(ch)		IPU_CM_REG(0x0270 + 4 * ((ch) / 32))
+#define IPU_ALT_CHA_BUF0_RDY(ch)	IPU_CM_REG(0x0278 + 4 * ((ch) / 32))
+#define IPU_ALT_CHA_BUF1_RDY(ch)	IPU_CM_REG(0x0280 + 4 * ((ch) / 32))
+
+#define IPU_INT_CTRL(n)		IPU_CM_REG(0x003C + 4 * (n))
+#define IPU_INT_STAT(n)		IPU_CM_REG(0x0200 + 4 * (n))
+
+#define IPU_DI0_COUNTER_RELEASE			(1 << 24)
+#define IPU_DI1_COUNTER_RELEASE			(1 << 25)
+
+#define IPU_IDMAC_REG(offset)	(offset)
+
+#define IDMAC_CONF			IPU_IDMAC_REG(0x0000)
+#define IDMAC_CHA_EN(ch)		IPU_IDMAC_REG(0x0004 + 4 * ((ch) / 32))
+#define IDMAC_SEP_ALPHA			IPU_IDMAC_REG(0x000c)
+#define IDMAC_ALT_SEP_ALPHA		IPU_IDMAC_REG(0x0010)
+#define IDMAC_CHA_PRI(ch)		IPU_IDMAC_REG(0x0014 + 4 * ((ch) / 32))
+#define IDMAC_WM_EN(ch)			IPU_IDMAC_REG(0x001c + 4 * ((ch) / 32))
+#define IDMAC_CH_LOCK_EN_1		IPU_IDMAC_REG(0x0024)
+#define IDMAC_CH_LOCK_EN_2		IPU_IDMAC_REG(0x0028)
+#define IDMAC_SUB_ADDR_0		IPU_IDMAC_REG(0x002c)
+#define IDMAC_SUB_ADDR_1		IPU_IDMAC_REG(0x0030)
+#define IDMAC_SUB_ADDR_2		IPU_IDMAC_REG(0x0034)
+#define IDMAC_BAND_EN(ch)		IPU_IDMAC_REG(0x0040 + 4 * ((ch) / 32))
+#define IDMAC_CHA_BUSY(ch)		IPU_IDMAC_REG(0x0100 + 4 * ((ch) / 32))
+
+#define IPU_NUM_IRQS	(32 * 15)
+
+enum ipu_modules {
+	IPU_CONF_CSI0_EN		= (1 << 0),
+	IPU_CONF_CSI1_EN		= (1 << 1),
+	IPU_CONF_IC_EN			= (1 << 2),
+	IPU_CONF_ROT_EN			= (1 << 3),
+	IPU_CONF_ISP_EN			= (1 << 4),
+	IPU_CONF_DP_EN			= (1 << 5),
+	IPU_CONF_DI0_EN			= (1 << 6),
+	IPU_CONF_DI1_EN			= (1 << 7),
+	IPU_CONF_SMFC_EN		= (1 << 8),
+	IPU_CONF_DC_EN			= (1 << 9),
+	IPU_CONF_DMFC_EN		= (1 << 10),
+
+	IPU_CONF_VDI_EN			= (1 << 12),
+
+	IPU_CONF_IDMAC_DIS		= (1 << 22),
+
+	IPU_CONF_IC_DMFC_SEL		= (1 << 25),
+	IPU_CONF_IC_DMFC_SYNC		= (1 << 26),
+	IPU_CONF_VDI_DMFC_SYNC		= (1 << 27),
+
+	IPU_CONF_CSI0_DATA_SOURCE	= (1 << 28),
+	IPU_CONF_CSI1_DATA_SOURCE	= (1 << 29),
+	IPU_CONF_IC_INPUT		= (1 << 30),
+	IPU_CONF_CSI_SEL		= (1 << 31),
+};
+
+struct ipuv3_channel {
+	unsigned int num;
+
+	bool enabled;
+	bool busy;
+
+	struct ipu_soc *ipu;
+};
+
+struct ipu_dc_priv;
+struct ipu_dmfc_priv;
+struct ipu_di;
+struct ipu_devtype;
+
+struct ipu_soc {
+	struct device		*dev;
+	const struct ipu_devtype	*devtype;
+	enum ipuv3_type		ipu_type;
+	spinlock_t		lock;
+	struct mutex		channel_lock;
+
+	void __iomem		*cm_reg;
+	void __iomem		*idmac_reg;
+	struct ipu_ch_param __iomem	*cpmem_base;
+
+	int			usecount;
+
+	struct clk		*clk;
+
+	struct ipuv3_channel	channel[64];
+
+	int			irq_sync;
+	int			irq_err;
+	struct irq_domain	*domain;
+
+	struct ipu_dc_priv	*dc_priv;
+	struct ipu_dp_priv	*dp_priv;
+	struct ipu_dmfc_priv	*dmfc_priv;
+	struct ipu_di		*di_priv[2];
+};
+
+void ipu_srm_dp_sync_update(struct ipu_soc *ipu);
+
+int ipu_module_enable(struct ipu_soc *ipu, u32 mask);
+int ipu_module_disable(struct ipu_soc *ipu, u32 mask);
+
+int ipu_di_init(struct ipu_soc *ipu, struct device *dev, int id,
+		unsigned long base, u32 module, struct clk *ipu_clk);
+void ipu_di_exit(struct ipu_soc *ipu, int id);
+
+int ipu_dmfc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base,
+		struct clk *ipu_clk);
+void ipu_dmfc_exit(struct ipu_soc *ipu);
+
+int ipu_dp_init(struct ipu_soc *ipu, struct device *dev, unsigned long base);
+void ipu_dp_exit(struct ipu_soc *ipu);
+
+int ipu_dc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base,
+		unsigned long template_base);
+void ipu_dc_exit(struct ipu_soc *ipu);
+
+int ipu_cpmem_init(struct ipu_soc *ipu, struct device *dev, unsigned long base);
+void ipu_cpmem_exit(struct ipu_soc *ipu);
+
+#endif				/* __IPU_PRV_H__ */
diff --git a/drivers/staging/imx-drm/Kconfig b/drivers/staging/imx-drm/Kconfig
index c6e8ba7b3e4e..82fb758a29bc 100644
--- a/drivers/staging/imx-drm/Kconfig
+++ b/drivers/staging/imx-drm/Kconfig
@@ -39,19 +39,10 @@ config DRM_IMX_LDB
 	  Choose this to enable the internal LVDS Display Bridge (LDB)
 	  found on i.MX53 and i.MX6 processors.
 
-config DRM_IMX_IPUV3_CORE
-	tristate "IPUv3 core support"
-	depends on DRM_IMX
-	depends on RESET_CONTROLLER
-	help
-	  Choose this if you have a i.MX5/6 system and want
-	  to use the IPU. This option only enables IPU base
-	  support.
-
 config DRM_IMX_IPUV3
 	tristate "DRM Support for i.MX IPUv3"
 	depends on DRM_IMX
-	depends on DRM_IMX_IPUV3_CORE
+	depends on IMX_IPUV3_CORE
 	help
 	  Choose this if you have a i.MX5 or i.MX6 processor.
 
diff --git a/drivers/staging/imx-drm/Makefile b/drivers/staging/imx-drm/Makefile
index 129e3a3f59f1..582c438d8cbd 100644
--- a/drivers/staging/imx-drm/Makefile
+++ b/drivers/staging/imx-drm/Makefile
@@ -6,7 +6,6 @@ obj-$(CONFIG_DRM_IMX) += imxdrm.o
 obj-$(CONFIG_DRM_IMX_PARALLEL_DISPLAY) += parallel-display.o
 obj-$(CONFIG_DRM_IMX_TVE) += imx-tve.o
 obj-$(CONFIG_DRM_IMX_LDB) += imx-ldb.o
-obj-$(CONFIG_DRM_IMX_IPUV3_CORE) += ipu-v3/
 
 imx-ipuv3-crtc-objs  := ipuv3-crtc.o ipuv3-plane.o
 obj-$(CONFIG_DRM_IMX_IPUV3)	+= imx-ipuv3-crtc.o
diff --git a/drivers/staging/imx-drm/imx-hdmi.c b/drivers/staging/imx-drm/imx-hdmi.c
index d47dedd2cdb4..461f224d26ab 100644
--- a/drivers/staging/imx-drm/imx-hdmi.c
+++ b/drivers/staging/imx-drm/imx-hdmi.c
@@ -27,8 +27,8 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_encoder_slave.h>
+#include <video/imx-ipu-v3.h>
 
-#include "ipu-v3/imx-ipu-v3.h"
 #include "imx-hdmi.h"
 #include "imx-drm.h"
 
diff --git a/drivers/staging/imx-drm/imx-tve.c b/drivers/staging/imx-drm/imx-tve.c
index 575533f4fd64..5bcb40516656 100644
--- a/drivers/staging/imx-drm/imx-tve.c
+++ b/drivers/staging/imx-drm/imx-tve.c
@@ -30,8 +30,8 @@
 #include <drm/drmP.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
+#include <video/imx-ipu-v3.h>
 
-#include "ipu-v3/imx-ipu-v3.h"
 #include "imx-drm.h"
 
 #define TVE_COM_CONF_REG	0x00
diff --git a/drivers/staging/imx-drm/ipu-v3/Makefile b/drivers/staging/imx-drm/ipu-v3/Makefile
deleted file mode 100644
index 28ed72e98a96..000000000000
--- a/drivers/staging/imx-drm/ipu-v3/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_DRM_IMX_IPUV3_CORE) += imx-ipu-v3.o
-
-imx-ipu-v3-objs := ipu-common.o ipu-dc.o ipu-di.o ipu-dp.o ipu-dmfc.o
diff --git a/drivers/staging/imx-drm/ipu-v3/imx-ipu-v3.h b/drivers/staging/imx-drm/ipu-v3/imx-ipu-v3.h
deleted file mode 100644
index c4d14ead5837..000000000000
--- a/drivers/staging/imx-drm/ipu-v3/imx-ipu-v3.h
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * Copyright 2005-2009 Freescale Semiconductor, Inc.
- *
- * The code contained herein is licensed under the GNU Lesser General
- * Public License.  You may obtain a copy of the GNU Lesser General
- * Public License Version 2.1 or later at the following locations:
- *
- * http://www.opensource.org/licenses/lgpl-license.html
- * http://www.gnu.org/copyleft/lgpl.html
- */
-
-#ifndef __DRM_IPU_H__
-#define __DRM_IPU_H__
-
-#include <linux/types.h>
-#include <linux/videodev2.h>
-#include <linux/bitmap.h>
-#include <linux/fb.h>
-
-struct ipu_soc;
-
-enum ipuv3_type {
-	IPUV3EX,
-	IPUV3M,
-	IPUV3H,
-};
-
-#define IPU_PIX_FMT_GBR24	v4l2_fourcc('G', 'B', 'R', '3')
-
-/*
- * Bitfield of Display Interface signal polarities.
- */
-struct ipu_di_signal_cfg {
-	unsigned datamask_en:1;
-	unsigned interlaced:1;
-	unsigned odd_field_first:1;
-	unsigned clksel_en:1;
-	unsigned clkidle_en:1;
-	unsigned data_pol:1;	/* true = inverted */
-	unsigned clk_pol:1;	/* true = rising edge */
-	unsigned enable_pol:1;
-	unsigned Hsync_pol:1;	/* true = active high */
-	unsigned Vsync_pol:1;
-
-	u16 width;
-	u16 height;
-	u32 pixel_fmt;
-	u16 h_start_width;
-	u16 h_sync_width;
-	u16 h_end_width;
-	u16 v_start_width;
-	u16 v_sync_width;
-	u16 v_end_width;
-	u32 v_to_h_sync;
-	unsigned long pixelclock;
-#define IPU_DI_CLKMODE_SYNC	(1 << 0)
-#define IPU_DI_CLKMODE_EXT	(1 << 1)
-	unsigned long clkflags;
-
-	u8 hsync_pin;
-	u8 vsync_pin;
-};
-
-enum ipu_color_space {
-	IPUV3_COLORSPACE_RGB,
-	IPUV3_COLORSPACE_YUV,
-	IPUV3_COLORSPACE_UNKNOWN,
-};
-
-struct ipuv3_channel;
-
-enum ipu_channel_irq {
-	IPU_IRQ_EOF = 0,
-	IPU_IRQ_NFACK = 64,
-	IPU_IRQ_NFB4EOF = 128,
-	IPU_IRQ_EOS = 192,
-};
-
-int ipu_idmac_channel_irq(struct ipu_soc *ipu, struct ipuv3_channel *channel,
-		enum ipu_channel_irq irq);
-
-#define IPU_IRQ_DP_SF_START		(448 + 2)
-#define IPU_IRQ_DP_SF_END		(448 + 3)
-#define IPU_IRQ_BG_SF_END		IPU_IRQ_DP_SF_END,
-#define IPU_IRQ_DC_FC_0			(448 + 8)
-#define IPU_IRQ_DC_FC_1			(448 + 9)
-#define IPU_IRQ_DC_FC_2			(448 + 10)
-#define IPU_IRQ_DC_FC_3			(448 + 11)
-#define IPU_IRQ_DC_FC_4			(448 + 12)
-#define IPU_IRQ_DC_FC_6			(448 + 13)
-#define IPU_IRQ_VSYNC_PRE_0		(448 + 14)
-#define IPU_IRQ_VSYNC_PRE_1		(448 + 15)
-
-/*
- * IPU Image DMA Controller (idmac) functions
- */
-struct ipuv3_channel *ipu_idmac_get(struct ipu_soc *ipu, unsigned channel);
-void ipu_idmac_put(struct ipuv3_channel *);
-
-int ipu_idmac_enable_channel(struct ipuv3_channel *channel);
-int ipu_idmac_disable_channel(struct ipuv3_channel *channel);
-int ipu_idmac_wait_busy(struct ipuv3_channel *channel, int ms);
-
-void ipu_idmac_set_double_buffer(struct ipuv3_channel *channel,
-		bool doublebuffer);
-void ipu_idmac_select_buffer(struct ipuv3_channel *channel, u32 buf_num);
-
-/*
- * IPU Display Controller (dc) functions
- */
-struct ipu_dc;
-struct ipu_di;
-struct ipu_dc *ipu_dc_get(struct ipu_soc *ipu, int channel);
-void ipu_dc_put(struct ipu_dc *dc);
-int ipu_dc_init_sync(struct ipu_dc *dc, struct ipu_di *di, bool interlaced,
-		u32 pixel_fmt, u32 width);
-void ipu_dc_enable_channel(struct ipu_dc *dc);
-void ipu_dc_disable_channel(struct ipu_dc *dc);
-
-/*
- * IPU Display Interface (di) functions
- */
-struct ipu_di *ipu_di_get(struct ipu_soc *ipu, int disp);
-void ipu_di_put(struct ipu_di *);
-int ipu_di_disable(struct ipu_di *);
-int ipu_di_enable(struct ipu_di *);
-int ipu_di_get_num(struct ipu_di *);
-int ipu_di_init_sync_panel(struct ipu_di *, struct ipu_di_signal_cfg *sig);
-
-/*
- * IPU Display Multi FIFO Controller (dmfc) functions
- */
-struct dmfc_channel;
-int ipu_dmfc_enable_channel(struct dmfc_channel *dmfc);
-void ipu_dmfc_disable_channel(struct dmfc_channel *dmfc);
-int ipu_dmfc_alloc_bandwidth(struct dmfc_channel *dmfc,
-		unsigned long bandwidth_mbs, int burstsize);
-void ipu_dmfc_free_bandwidth(struct dmfc_channel *dmfc);
-int ipu_dmfc_init_channel(struct dmfc_channel *dmfc, int width);
-struct dmfc_channel *ipu_dmfc_get(struct ipu_soc *ipu, int ipuv3_channel);
-void ipu_dmfc_put(struct dmfc_channel *dmfc);
-
-/*
- * IPU Display Processor (dp) functions
- */
-#define IPU_DP_FLOW_SYNC_BG	0
-#define IPU_DP_FLOW_SYNC_FG	1
-#define IPU_DP_FLOW_ASYNC0_BG	2
-#define IPU_DP_FLOW_ASYNC0_FG	3
-#define IPU_DP_FLOW_ASYNC1_BG	4
-#define IPU_DP_FLOW_ASYNC1_FG	5
-
-struct ipu_dp *ipu_dp_get(struct ipu_soc *ipu, unsigned int flow);
-void ipu_dp_put(struct ipu_dp *);
-int ipu_dp_enable_channel(struct ipu_dp *dp);
-void ipu_dp_disable_channel(struct ipu_dp *dp);
-int ipu_dp_setup_channel(struct ipu_dp *dp,
-		enum ipu_color_space in, enum ipu_color_space out);
-int ipu_dp_set_window_pos(struct ipu_dp *, u16 x_pos, u16 y_pos);
-int ipu_dp_set_global_alpha(struct ipu_dp *dp, bool enable, u8 alpha,
-		bool bg_chan);
-
-#define IPU_CPMEM_WORD(word, ofs, size) ((((word) * 160 + (ofs)) << 8) | (size))
-
-#define IPU_FIELD_UBO		IPU_CPMEM_WORD(0, 46, 22)
-#define IPU_FIELD_VBO		IPU_CPMEM_WORD(0, 68, 22)
-#define IPU_FIELD_IOX		IPU_CPMEM_WORD(0, 90, 4)
-#define IPU_FIELD_RDRW		IPU_CPMEM_WORD(0, 94, 1)
-#define IPU_FIELD_SO		IPU_CPMEM_WORD(0, 113, 1)
-#define IPU_FIELD_SLY		IPU_CPMEM_WORD(1, 102, 14)
-#define IPU_FIELD_SLUV		IPU_CPMEM_WORD(1, 128, 14)
-
-#define IPU_FIELD_XV		IPU_CPMEM_WORD(0, 0, 10)
-#define IPU_FIELD_YV		IPU_CPMEM_WORD(0, 10, 9)
-#define IPU_FIELD_XB		IPU_CPMEM_WORD(0, 19, 13)
-#define IPU_FIELD_YB		IPU_CPMEM_WORD(0, 32, 12)
-#define IPU_FIELD_NSB_B		IPU_CPMEM_WORD(0, 44, 1)
-#define IPU_FIELD_CF		IPU_CPMEM_WORD(0, 45, 1)
-#define IPU_FIELD_SX		IPU_CPMEM_WORD(0, 46, 12)
-#define IPU_FIELD_SY		IPU_CPMEM_WORD(0, 58, 11)
-#define IPU_FIELD_NS		IPU_CPMEM_WORD(0, 69, 10)
-#define IPU_FIELD_SDX		IPU_CPMEM_WORD(0, 79, 7)
-#define IPU_FIELD_SM		IPU_CPMEM_WORD(0, 86, 10)
-#define IPU_FIELD_SCC		IPU_CPMEM_WORD(0, 96, 1)
-#define IPU_FIELD_SCE		IPU_CPMEM_WORD(0, 97, 1)
-#define IPU_FIELD_SDY		IPU_CPMEM_WORD(0, 98, 7)
-#define IPU_FIELD_SDRX		IPU_CPMEM_WORD(0, 105, 1)
-#define IPU_FIELD_SDRY		IPU_CPMEM_WORD(0, 106, 1)
-#define IPU_FIELD_BPP		IPU_CPMEM_WORD(0, 107, 3)
-#define IPU_FIELD_DEC_SEL	IPU_CPMEM_WORD(0, 110, 2)
-#define IPU_FIELD_DIM		IPU_CPMEM_WORD(0, 112, 1)
-#define IPU_FIELD_BNDM		IPU_CPMEM_WORD(0, 114, 3)
-#define IPU_FIELD_BM		IPU_CPMEM_WORD(0, 117, 2)
-#define IPU_FIELD_ROT		IPU_CPMEM_WORD(0, 119, 1)
-#define IPU_FIELD_HF		IPU_CPMEM_WORD(0, 120, 1)
-#define IPU_FIELD_VF		IPU_CPMEM_WORD(0, 121, 1)
-#define IPU_FIELD_THE		IPU_CPMEM_WORD(0, 122, 1)
-#define IPU_FIELD_CAP		IPU_CPMEM_WORD(0, 123, 1)
-#define IPU_FIELD_CAE		IPU_CPMEM_WORD(0, 124, 1)
-#define IPU_FIELD_FW		IPU_CPMEM_WORD(0, 125, 13)
-#define IPU_FIELD_FH		IPU_CPMEM_WORD(0, 138, 12)
-#define IPU_FIELD_EBA0		IPU_CPMEM_WORD(1, 0, 29)
-#define IPU_FIELD_EBA1		IPU_CPMEM_WORD(1, 29, 29)
-#define IPU_FIELD_ILO		IPU_CPMEM_WORD(1, 58, 20)
-#define IPU_FIELD_NPB		IPU_CPMEM_WORD(1, 78, 7)
-#define IPU_FIELD_PFS		IPU_CPMEM_WORD(1, 85, 4)
-#define IPU_FIELD_ALU		IPU_CPMEM_WORD(1, 89, 1)
-#define IPU_FIELD_ALBM		IPU_CPMEM_WORD(1, 90, 3)
-#define IPU_FIELD_ID		IPU_CPMEM_WORD(1, 93, 2)
-#define IPU_FIELD_TH		IPU_CPMEM_WORD(1, 95, 7)
-#define IPU_FIELD_SL		IPU_CPMEM_WORD(1, 102, 14)
-#define IPU_FIELD_WID0		IPU_CPMEM_WORD(1, 116, 3)
-#define IPU_FIELD_WID1		IPU_CPMEM_WORD(1, 119, 3)
-#define IPU_FIELD_WID2		IPU_CPMEM_WORD(1, 122, 3)
-#define IPU_FIELD_WID3		IPU_CPMEM_WORD(1, 125, 3)
-#define IPU_FIELD_OFS0		IPU_CPMEM_WORD(1, 128, 5)
-#define IPU_FIELD_OFS1		IPU_CPMEM_WORD(1, 133, 5)
-#define IPU_FIELD_OFS2		IPU_CPMEM_WORD(1, 138, 5)
-#define IPU_FIELD_OFS3		IPU_CPMEM_WORD(1, 143, 5)
-#define IPU_FIELD_SXYS		IPU_CPMEM_WORD(1, 148, 1)
-#define IPU_FIELD_CRE		IPU_CPMEM_WORD(1, 149, 1)
-#define IPU_FIELD_DEC_SEL2	IPU_CPMEM_WORD(1, 150, 1)
-
-struct ipu_cpmem_word {
-	u32 data[5];
-	u32 res[3];
-};
-
-struct ipu_ch_param {
-	struct ipu_cpmem_word word[2];
-};
-
-void ipu_ch_param_write_field(struct ipu_ch_param __iomem *base, u32 wbs, u32 v);
-u32 ipu_ch_param_read_field(struct ipu_ch_param __iomem *base, u32 wbs);
-struct ipu_ch_param __iomem *ipu_get_cpmem(struct ipuv3_channel *channel);
-void ipu_ch_param_dump(struct ipu_ch_param __iomem *p);
-
-static inline void ipu_ch_param_zero(struct ipu_ch_param __iomem *p)
-{
-	int i;
-	void __iomem *base = p;
-
-	for (i = 0; i < sizeof(*p) / sizeof(u32); i++)
-		writel(0, base + i * sizeof(u32));
-}
-
-static inline void ipu_cpmem_set_buffer(struct ipu_ch_param __iomem *p,
-		int bufnum, dma_addr_t buf)
-{
-	if (bufnum)
-		ipu_ch_param_write_field(p, IPU_FIELD_EBA1, buf >> 3);
-	else
-		ipu_ch_param_write_field(p, IPU_FIELD_EBA0, buf >> 3);
-}
-
-static inline void ipu_cpmem_set_resolution(struct ipu_ch_param __iomem *p,
-		int xres, int yres)
-{
-	ipu_ch_param_write_field(p, IPU_FIELD_FW, xres - 1);
-	ipu_ch_param_write_field(p, IPU_FIELD_FH, yres - 1);
-}
-
-static inline void ipu_cpmem_set_stride(struct ipu_ch_param __iomem *p,
-		int stride)
-{
-	ipu_ch_param_write_field(p, IPU_FIELD_SLY, stride - 1);
-}
-
-void ipu_cpmem_set_high_priority(struct ipuv3_channel *channel);
-
-struct ipu_rgb {
-	struct fb_bitfield	red;
-	struct fb_bitfield	green;
-	struct fb_bitfield	blue;
-	struct fb_bitfield	transp;
-	int			bits_per_pixel;
-};
-
-struct ipu_image {
-	struct v4l2_pix_format pix;
-	struct v4l2_rect rect;
-	dma_addr_t phys;
-};
-
-int ipu_cpmem_set_format_passthrough(struct ipu_ch_param __iomem *p,
-		int width);
-
-int ipu_cpmem_set_format_rgb(struct ipu_ch_param __iomem *,
-		const struct ipu_rgb *rgb);
-
-static inline void ipu_cpmem_interlaced_scan(struct ipu_ch_param *p,
-		int stride)
-{
-	ipu_ch_param_write_field(p, IPU_FIELD_SO, 1);
-	ipu_ch_param_write_field(p, IPU_FIELD_ILO, stride / 8);
-	ipu_ch_param_write_field(p, IPU_FIELD_SLY, (stride * 2) - 1);
-};
-
-void ipu_cpmem_set_yuv_planar(struct ipu_ch_param __iomem *p, u32 pixel_format,
-			int stride, int height);
-void ipu_cpmem_set_yuv_interleaved(struct ipu_ch_param __iomem *p,
-				   u32 pixel_format);
-void ipu_cpmem_set_yuv_planar_full(struct ipu_ch_param __iomem *p,
-		u32 pixel_format, int stride, int u_offset, int v_offset);
-int ipu_cpmem_set_fmt(struct ipu_ch_param __iomem *cpmem, u32 pixelformat);
-int ipu_cpmem_set_image(struct ipu_ch_param __iomem *cpmem,
-		struct ipu_image *image);
-
-enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc);
-enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat);
-
-static inline void ipu_cpmem_set_burstsize(struct ipu_ch_param __iomem *p,
-		int burstsize)
-{
-	ipu_ch_param_write_field(p, IPU_FIELD_NPB, burstsize - 1);
-};
-
-struct ipu_client_platformdata {
-	int di;
-	int dc;
-	int dp;
-	int dmfc;
-	int dma[2];
-};
-
-#endif /* __DRM_IPU_H__ */
diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-common.c b/drivers/staging/imx-drm/ipu-v3/ipu-common.c
deleted file mode 100644
index ca85d3d70ae3..000000000000
--- a/drivers/staging/imx-drm/ipu-v3/ipu-common.c
+++ /dev/null
@@ -1,1261 +0,0 @@
-/*
- * Copyright (c) 2010 Sascha Hauer <s.hauer@pengutronix.de>
- * Copyright (C) 2005-2009 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- */
-#include <linux/module.h>
-#include <linux/export.h>
-#include <linux/types.h>
-#include <linux/reset.h>
-#include <linux/platform_device.h>
-#include <linux/err.h>
-#include <linux/spinlock.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/clk.h>
-#include <linux/list.h>
-#include <linux/irq.h>
-#include <linux/irqchip/chained_irq.h>
-#include <linux/irqdomain.h>
-#include <linux/of_device.h>
-
-#include <drm/drm_fourcc.h>
-
-#include "imx-ipu-v3.h"
-#include "ipu-prv.h"
-
-static inline u32 ipu_cm_read(struct ipu_soc *ipu, unsigned offset)
-{
-	return readl(ipu->cm_reg + offset);
-}
-
-static inline void ipu_cm_write(struct ipu_soc *ipu, u32 value, unsigned offset)
-{
-	writel(value, ipu->cm_reg + offset);
-}
-
-static inline u32 ipu_idmac_read(struct ipu_soc *ipu, unsigned offset)
-{
-	return readl(ipu->idmac_reg + offset);
-}
-
-static inline void ipu_idmac_write(struct ipu_soc *ipu, u32 value,
-		unsigned offset)
-{
-	writel(value, ipu->idmac_reg + offset);
-}
-
-void ipu_srm_dp_sync_update(struct ipu_soc *ipu)
-{
-	u32 val;
-
-	val = ipu_cm_read(ipu, IPU_SRM_PRI2);
-	val |= 0x8;
-	ipu_cm_write(ipu, val, IPU_SRM_PRI2);
-}
-EXPORT_SYMBOL_GPL(ipu_srm_dp_sync_update);
-
-struct ipu_ch_param __iomem *ipu_get_cpmem(struct ipuv3_channel *channel)
-{
-	struct ipu_soc *ipu = channel->ipu;
-
-	return ipu->cpmem_base + channel->num;
-}
-EXPORT_SYMBOL_GPL(ipu_get_cpmem);
-
-void ipu_cpmem_set_high_priority(struct ipuv3_channel *channel)
-{
-	struct ipu_soc *ipu = channel->ipu;
-	struct ipu_ch_param __iomem *p = ipu_get_cpmem(channel);
-	u32 val;
-
-	if (ipu->ipu_type == IPUV3EX)
-		ipu_ch_param_write_field(p, IPU_FIELD_ID, 1);
-
-	val = ipu_idmac_read(ipu, IDMAC_CHA_PRI(channel->num));
-	val |= 1 << (channel->num % 32);
-	ipu_idmac_write(ipu, val, IDMAC_CHA_PRI(channel->num));
-};
-EXPORT_SYMBOL_GPL(ipu_cpmem_set_high_priority);
-
-void ipu_ch_param_write_field(struct ipu_ch_param __iomem *base, u32 wbs, u32 v)
-{
-	u32 bit = (wbs >> 8) % 160;
-	u32 size = wbs & 0xff;
-	u32 word = (wbs >> 8) / 160;
-	u32 i = bit / 32;
-	u32 ofs = bit % 32;
-	u32 mask = (1 << size) - 1;
-	u32 val;
-
-	pr_debug("%s %d %d %d\n", __func__, word, bit , size);
-
-	val = readl(&base->word[word].data[i]);
-	val &= ~(mask << ofs);
-	val |= v << ofs;
-	writel(val, &base->word[word].data[i]);
-
-	if ((bit + size - 1) / 32 > i) {
-		val = readl(&base->word[word].data[i + 1]);
-		val &= ~(mask >> (ofs ? (32 - ofs) : 0));
-		val |= v >> (ofs ? (32 - ofs) : 0);
-		writel(val, &base->word[word].data[i + 1]);
-	}
-}
-EXPORT_SYMBOL_GPL(ipu_ch_param_write_field);
-
-u32 ipu_ch_param_read_field(struct ipu_ch_param __iomem *base, u32 wbs)
-{
-	u32 bit = (wbs >> 8) % 160;
-	u32 size = wbs & 0xff;
-	u32 word = (wbs >> 8) / 160;
-	u32 i = bit / 32;
-	u32 ofs = bit % 32;
-	u32 mask = (1 << size) - 1;
-	u32 val = 0;
-
-	pr_debug("%s %d %d %d\n", __func__, word, bit , size);
-
-	val = (readl(&base->word[word].data[i]) >> ofs) & mask;
-
-	if ((bit + size - 1) / 32 > i) {
-		u32 tmp;
-		tmp = readl(&base->word[word].data[i + 1]);
-		tmp &= mask >> (ofs ? (32 - ofs) : 0);
-		val |= tmp << (ofs ? (32 - ofs) : 0);
-	}
-
-	return val;
-}
-EXPORT_SYMBOL_GPL(ipu_ch_param_read_field);
-
-int ipu_cpmem_set_format_rgb(struct ipu_ch_param __iomem *p,
-		const struct ipu_rgb *rgb)
-{
-	int bpp = 0, npb = 0, ro, go, bo, to;
-
-	ro = rgb->bits_per_pixel - rgb->red.length - rgb->red.offset;
-	go = rgb->bits_per_pixel - rgb->green.length - rgb->green.offset;
-	bo = rgb->bits_per_pixel - rgb->blue.length - rgb->blue.offset;
-	to = rgb->bits_per_pixel - rgb->transp.length - rgb->transp.offset;
-
-	ipu_ch_param_write_field(p, IPU_FIELD_WID0, rgb->red.length - 1);
-	ipu_ch_param_write_field(p, IPU_FIELD_OFS0, ro);
-	ipu_ch_param_write_field(p, IPU_FIELD_WID1, rgb->green.length - 1);
-	ipu_ch_param_write_field(p, IPU_FIELD_OFS1, go);
-	ipu_ch_param_write_field(p, IPU_FIELD_WID2, rgb->blue.length - 1);
-	ipu_ch_param_write_field(p, IPU_FIELD_OFS2, bo);
-
-	if (rgb->transp.length) {
-		ipu_ch_param_write_field(p, IPU_FIELD_WID3,
-				rgb->transp.length - 1);
-		ipu_ch_param_write_field(p, IPU_FIELD_OFS3, to);
-	} else {
-		ipu_ch_param_write_field(p, IPU_FIELD_WID3, 7);
-		ipu_ch_param_write_field(p, IPU_FIELD_OFS3,
-				rgb->bits_per_pixel);
-	}
-
-	switch (rgb->bits_per_pixel) {
-	case 32:
-		bpp = 0;
-		npb = 15;
-		break;
-	case 24:
-		bpp = 1;
-		npb = 19;
-		break;
-	case 16:
-		bpp = 3;
-		npb = 31;
-		break;
-	case 8:
-		bpp = 5;
-		npb = 63;
-		break;
-	default:
-		return -EINVAL;
-	}
-	ipu_ch_param_write_field(p, IPU_FIELD_BPP, bpp);
-	ipu_ch_param_write_field(p, IPU_FIELD_NPB, npb);
-	ipu_ch_param_write_field(p, IPU_FIELD_PFS, 7); /* rgb mode */
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_cpmem_set_format_rgb);
-
-int ipu_cpmem_set_format_passthrough(struct ipu_ch_param __iomem *p,
-		int width)
-{
-	int bpp = 0, npb = 0;
-
-	switch (width) {
-	case 32:
-		bpp = 0;
-		npb = 15;
-		break;
-	case 24:
-		bpp = 1;
-		npb = 19;
-		break;
-	case 16:
-		bpp = 3;
-		npb = 31;
-		break;
-	case 8:
-		bpp = 5;
-		npb = 63;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	ipu_ch_param_write_field(p, IPU_FIELD_BPP, bpp);
-	ipu_ch_param_write_field(p, IPU_FIELD_NPB, npb);
-	ipu_ch_param_write_field(p, IPU_FIELD_PFS, 6); /* raw mode */
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_cpmem_set_format_passthrough);
-
-void ipu_cpmem_set_yuv_interleaved(struct ipu_ch_param __iomem *p,
-				   u32 pixel_format)
-{
-	switch (pixel_format) {
-	case V4L2_PIX_FMT_UYVY:
-		ipu_ch_param_write_field(p, IPU_FIELD_BPP, 3);    /* bits/pixel */
-		ipu_ch_param_write_field(p, IPU_FIELD_PFS, 0xA);  /* pix format */
-		ipu_ch_param_write_field(p, IPU_FIELD_NPB, 31);   /* burst size */
-		break;
-	case V4L2_PIX_FMT_YUYV:
-		ipu_ch_param_write_field(p, IPU_FIELD_BPP, 3);    /* bits/pixel */
-		ipu_ch_param_write_field(p, IPU_FIELD_PFS, 0x8);  /* pix format */
-		ipu_ch_param_write_field(p, IPU_FIELD_NPB, 31);   /* burst size */
-		break;
-	}
-}
-EXPORT_SYMBOL_GPL(ipu_cpmem_set_yuv_interleaved);
-
-void ipu_cpmem_set_yuv_planar_full(struct ipu_ch_param __iomem *p,
-		u32 pixel_format, int stride, int u_offset, int v_offset)
-{
-	switch (pixel_format) {
-	case V4L2_PIX_FMT_YUV420:
-		ipu_ch_param_write_field(p, IPU_FIELD_SLUV, (stride / 2) - 1);
-		ipu_ch_param_write_field(p, IPU_FIELD_UBO, u_offset / 8);
-		ipu_ch_param_write_field(p, IPU_FIELD_VBO, v_offset / 8);
-		break;
-	case V4L2_PIX_FMT_YVU420:
-		ipu_ch_param_write_field(p, IPU_FIELD_SLUV, (stride / 2) - 1);
-		ipu_ch_param_write_field(p, IPU_FIELD_UBO, v_offset / 8);
-		ipu_ch_param_write_field(p, IPU_FIELD_VBO, u_offset / 8);
-		break;
-	}
-}
-EXPORT_SYMBOL_GPL(ipu_cpmem_set_yuv_planar_full);
-
-void ipu_cpmem_set_yuv_planar(struct ipu_ch_param __iomem *p, u32 pixel_format,
-		int stride, int height)
-{
-	int u_offset, v_offset;
-	int uv_stride = 0;
-
-	switch (pixel_format) {
-	case V4L2_PIX_FMT_YUV420:
-	case V4L2_PIX_FMT_YVU420:
-		uv_stride = stride / 2;
-		u_offset = stride * height;
-		v_offset = u_offset + (uv_stride * height / 2);
-		ipu_cpmem_set_yuv_planar_full(p, pixel_format, stride,
-				u_offset, v_offset);
-		break;
-	}
-}
-EXPORT_SYMBOL_GPL(ipu_cpmem_set_yuv_planar);
-
-static const struct ipu_rgb def_rgb_32 = {
-	.red	= { .offset = 16, .length = 8, },
-	.green	= { .offset =  8, .length = 8, },
-	.blue	= { .offset =  0, .length = 8, },
-	.transp = { .offset = 24, .length = 8, },
-	.bits_per_pixel = 32,
-};
-
-static const struct ipu_rgb def_bgr_32 = {
-	.red	= { .offset =  0, .length = 8, },
-	.green	= { .offset =  8, .length = 8, },
-	.blue	= { .offset = 16, .length = 8, },
-	.transp = { .offset = 24, .length = 8, },
-	.bits_per_pixel = 32,
-};
-
-static const struct ipu_rgb def_rgb_24 = {
-	.red	= { .offset = 16, .length = 8, },
-	.green	= { .offset =  8, .length = 8, },
-	.blue	= { .offset =  0, .length = 8, },
-	.transp = { .offset =  0, .length = 0, },
-	.bits_per_pixel = 24,
-};
-
-static const struct ipu_rgb def_bgr_24 = {
-	.red	= { .offset =  0, .length = 8, },
-	.green	= { .offset =  8, .length = 8, },
-	.blue	= { .offset = 16, .length = 8, },
-	.transp = { .offset =  0, .length = 0, },
-	.bits_per_pixel = 24,
-};
-
-static const struct ipu_rgb def_rgb_16 = {
-	.red	= { .offset = 11, .length = 5, },
-	.green	= { .offset =  5, .length = 6, },
-	.blue	= { .offset =  0, .length = 5, },
-	.transp = { .offset =  0, .length = 0, },
-	.bits_per_pixel = 16,
-};
-
-static const struct ipu_rgb def_bgr_16 = {
-	.red	= { .offset =  0, .length = 5, },
-	.green	= { .offset =  5, .length = 6, },
-	.blue	= { .offset = 11, .length = 5, },
-	.transp = { .offset =  0, .length = 0, },
-	.bits_per_pixel = 16,
-};
-
-#define Y_OFFSET(pix, x, y)	((x) + pix->width * (y))
-#define U_OFFSET(pix, x, y)	((pix->width * pix->height) + \
-					(pix->width * (y) / 4) + (x) / 2)
-#define V_OFFSET(pix, x, y)	((pix->width * pix->height) + \
-					(pix->width * pix->height / 4) + \
-					(pix->width * (y) / 4) + (x) / 2)
-
-int ipu_cpmem_set_fmt(struct ipu_ch_param __iomem *cpmem, u32 drm_fourcc)
-{
-	switch (drm_fourcc) {
-	case DRM_FORMAT_YUV420:
-	case DRM_FORMAT_YVU420:
-		/* pix format */
-		ipu_ch_param_write_field(cpmem, IPU_FIELD_PFS, 2);
-		/* burst size */
-		ipu_ch_param_write_field(cpmem, IPU_FIELD_NPB, 63);
-		break;
-	case DRM_FORMAT_UYVY:
-		/* bits/pixel */
-		ipu_ch_param_write_field(cpmem, IPU_FIELD_BPP, 3);
-		/* pix format */
-		ipu_ch_param_write_field(cpmem, IPU_FIELD_PFS, 0xA);
-		/* burst size */
-		ipu_ch_param_write_field(cpmem, IPU_FIELD_NPB, 31);
-		break;
-	case DRM_FORMAT_YUYV:
-		/* bits/pixel */
-		ipu_ch_param_write_field(cpmem, IPU_FIELD_BPP, 3);
-		/* pix format */
-		ipu_ch_param_write_field(cpmem, IPU_FIELD_PFS, 0x8);
-		/* burst size */
-		ipu_ch_param_write_field(cpmem, IPU_FIELD_NPB, 31);
-		break;
-	case DRM_FORMAT_ABGR8888:
-	case DRM_FORMAT_XBGR8888:
-		ipu_cpmem_set_format_rgb(cpmem, &def_bgr_32);
-		break;
-	case DRM_FORMAT_ARGB8888:
-	case DRM_FORMAT_XRGB8888:
-		ipu_cpmem_set_format_rgb(cpmem, &def_rgb_32);
-		break;
-	case DRM_FORMAT_BGR888:
-		ipu_cpmem_set_format_rgb(cpmem, &def_bgr_24);
-		break;
-	case DRM_FORMAT_RGB888:
-		ipu_cpmem_set_format_rgb(cpmem, &def_rgb_24);
-		break;
-	case DRM_FORMAT_RGB565:
-		ipu_cpmem_set_format_rgb(cpmem, &def_rgb_16);
-		break;
-	case DRM_FORMAT_BGR565:
-		ipu_cpmem_set_format_rgb(cpmem, &def_bgr_16);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_cpmem_set_fmt);
-
-/*
- * The V4L2 spec defines packed RGB formats in memory byte order, which from
- * point of view of the IPU corresponds to little-endian words with the first
- * component in the least significant bits.
- * The DRM pixel formats and IPU internal representation are ordered the other
- * way around, with the first named component ordered at the most significant
- * bits. Further, V4L2 formats are not well defined:
- *     http://linuxtv.org/downloads/v4l-dvb-apis/packed-rgb.html
- * We choose the interpretation which matches GStreamer behavior.
- */
-static int v4l2_pix_fmt_to_drm_fourcc(u32 pixelformat)
-{
-	switch (pixelformat) {
-	case V4L2_PIX_FMT_RGB565:
-		/*
-		 * Here we choose the 'corrected' interpretation of RGBP, a
-		 * little-endian 16-bit word with the red component at the most
-		 * significant bits:
-		 * g[2:0]b[4:0] r[4:0]g[5:3] <=> [16:0] R:G:B
-		 */
-		return DRM_FORMAT_RGB565;
-	case V4L2_PIX_FMT_BGR24:
-		/* B G R <=> [24:0] R:G:B */
-		return DRM_FORMAT_RGB888;
-	case V4L2_PIX_FMT_RGB24:
-		/* R G B <=> [24:0] B:G:R */
-		return DRM_FORMAT_BGR888;
-	case V4L2_PIX_FMT_BGR32:
-		/* B G R A <=> [32:0] A:B:G:R */
-		return DRM_FORMAT_XRGB8888;
-	case V4L2_PIX_FMT_RGB32:
-		/* R G B A <=> [32:0] A:B:G:R */
-		return DRM_FORMAT_XBGR8888;
-	case V4L2_PIX_FMT_UYVY:
-		return DRM_FORMAT_UYVY;
-	case V4L2_PIX_FMT_YUYV:
-		return DRM_FORMAT_YUYV;
-	case V4L2_PIX_FMT_YUV420:
-		return DRM_FORMAT_YUV420;
-	case V4L2_PIX_FMT_YVU420:
-		return DRM_FORMAT_YVU420;
-	}
-
-	return -EINVAL;
-}
-
-enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc)
-{
-	switch (drm_fourcc) {
-	case DRM_FORMAT_RGB565:
-	case DRM_FORMAT_BGR565:
-	case DRM_FORMAT_RGB888:
-	case DRM_FORMAT_BGR888:
-	case DRM_FORMAT_XRGB8888:
-	case DRM_FORMAT_XBGR8888:
-	case DRM_FORMAT_RGBX8888:
-	case DRM_FORMAT_BGRX8888:
-	case DRM_FORMAT_ARGB8888:
-	case DRM_FORMAT_ABGR8888:
-	case DRM_FORMAT_RGBA8888:
-	case DRM_FORMAT_BGRA8888:
-		return IPUV3_COLORSPACE_RGB;
-	case DRM_FORMAT_YUYV:
-	case DRM_FORMAT_UYVY:
-	case DRM_FORMAT_YUV420:
-	case DRM_FORMAT_YVU420:
-		return IPUV3_COLORSPACE_YUV;
-	default:
-		return IPUV3_COLORSPACE_UNKNOWN;
-	}
-}
-EXPORT_SYMBOL_GPL(ipu_drm_fourcc_to_colorspace);
-
-int ipu_cpmem_set_image(struct ipu_ch_param __iomem *cpmem,
-		struct ipu_image *image)
-{
-	struct v4l2_pix_format *pix = &image->pix;
-	int y_offset, u_offset, v_offset;
-
-	pr_debug("%s: resolution: %dx%d stride: %d\n",
-			__func__, pix->width, pix->height,
-			pix->bytesperline);
-
-	ipu_cpmem_set_resolution(cpmem, image->rect.width,
-			image->rect.height);
-	ipu_cpmem_set_stride(cpmem, pix->bytesperline);
-
-	ipu_cpmem_set_fmt(cpmem, v4l2_pix_fmt_to_drm_fourcc(pix->pixelformat));
-
-	switch (pix->pixelformat) {
-	case V4L2_PIX_FMT_YUV420:
-	case V4L2_PIX_FMT_YVU420:
-		y_offset = Y_OFFSET(pix, image->rect.left, image->rect.top);
-		u_offset = U_OFFSET(pix, image->rect.left,
-				image->rect.top) - y_offset;
-		v_offset = V_OFFSET(pix, image->rect.left,
-				image->rect.top) - y_offset;
-
-		ipu_cpmem_set_yuv_planar_full(cpmem, pix->pixelformat,
-				pix->bytesperline, u_offset, v_offset);
-		ipu_cpmem_set_buffer(cpmem, 0, image->phys + y_offset);
-		break;
-	case V4L2_PIX_FMT_UYVY:
-	case V4L2_PIX_FMT_YUYV:
-		ipu_cpmem_set_buffer(cpmem, 0, image->phys +
-				image->rect.left * 2 +
-				image->rect.top * image->pix.bytesperline);
-		break;
-	case V4L2_PIX_FMT_RGB32:
-	case V4L2_PIX_FMT_BGR32:
-		ipu_cpmem_set_buffer(cpmem, 0, image->phys +
-				image->rect.left * 4 +
-				image->rect.top * image->pix.bytesperline);
-		break;
-	case V4L2_PIX_FMT_RGB565:
-		ipu_cpmem_set_buffer(cpmem, 0, image->phys +
-				image->rect.left * 2 +
-				image->rect.top * image->pix.bytesperline);
-		break;
-	case V4L2_PIX_FMT_RGB24:
-	case V4L2_PIX_FMT_BGR24:
-		ipu_cpmem_set_buffer(cpmem, 0, image->phys +
-				image->rect.left * 3 +
-				image->rect.top * image->pix.bytesperline);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_cpmem_set_image);
-
-enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat)
-{
-	switch (pixelformat) {
-	case V4L2_PIX_FMT_YUV420:
-	case V4L2_PIX_FMT_YVU420:
-	case V4L2_PIX_FMT_UYVY:
-	case V4L2_PIX_FMT_YUYV:
-		return IPUV3_COLORSPACE_YUV;
-	case V4L2_PIX_FMT_RGB32:
-	case V4L2_PIX_FMT_BGR32:
-	case V4L2_PIX_FMT_RGB24:
-	case V4L2_PIX_FMT_BGR24:
-	case V4L2_PIX_FMT_RGB565:
-		return IPUV3_COLORSPACE_RGB;
-	default:
-		return IPUV3_COLORSPACE_UNKNOWN;
-	}
-}
-EXPORT_SYMBOL_GPL(ipu_pixelformat_to_colorspace);
-
-struct ipuv3_channel *ipu_idmac_get(struct ipu_soc *ipu, unsigned num)
-{
-	struct ipuv3_channel *channel;
-
-	dev_dbg(ipu->dev, "%s %d\n", __func__, num);
-
-	if (num > 63)
-		return ERR_PTR(-ENODEV);
-
-	mutex_lock(&ipu->channel_lock);
-
-	channel = &ipu->channel[num];
-
-	if (channel->busy) {
-		channel = ERR_PTR(-EBUSY);
-		goto out;
-	}
-
-	channel->busy = true;
-	channel->num = num;
-
-out:
-	mutex_unlock(&ipu->channel_lock);
-
-	return channel;
-}
-EXPORT_SYMBOL_GPL(ipu_idmac_get);
-
-void ipu_idmac_put(struct ipuv3_channel *channel)
-{
-	struct ipu_soc *ipu = channel->ipu;
-
-	dev_dbg(ipu->dev, "%s %d\n", __func__, channel->num);
-
-	mutex_lock(&ipu->channel_lock);
-
-	channel->busy = false;
-
-	mutex_unlock(&ipu->channel_lock);
-}
-EXPORT_SYMBOL_GPL(ipu_idmac_put);
-
-#define idma_mask(ch)			(1 << (ch & 0x1f))
-
-void ipu_idmac_set_double_buffer(struct ipuv3_channel *channel,
-		bool doublebuffer)
-{
-	struct ipu_soc *ipu = channel->ipu;
-	unsigned long flags;
-	u32 reg;
-
-	spin_lock_irqsave(&ipu->lock, flags);
-
-	reg = ipu_cm_read(ipu, IPU_CHA_DB_MODE_SEL(channel->num));
-	if (doublebuffer)
-		reg |= idma_mask(channel->num);
-	else
-		reg &= ~idma_mask(channel->num);
-	ipu_cm_write(ipu, reg, IPU_CHA_DB_MODE_SEL(channel->num));
-
-	spin_unlock_irqrestore(&ipu->lock, flags);
-}
-EXPORT_SYMBOL_GPL(ipu_idmac_set_double_buffer);
-
-int ipu_module_enable(struct ipu_soc *ipu, u32 mask)
-{
-	unsigned long lock_flags;
-	u32 val;
-
-	spin_lock_irqsave(&ipu->lock, lock_flags);
-
-	val = ipu_cm_read(ipu, IPU_DISP_GEN);
-
-	if (mask & IPU_CONF_DI0_EN)
-		val |= IPU_DI0_COUNTER_RELEASE;
-	if (mask & IPU_CONF_DI1_EN)
-		val |= IPU_DI1_COUNTER_RELEASE;
-
-	ipu_cm_write(ipu, val, IPU_DISP_GEN);
-
-	val = ipu_cm_read(ipu, IPU_CONF);
-	val |= mask;
-	ipu_cm_write(ipu, val, IPU_CONF);
-
-	spin_unlock_irqrestore(&ipu->lock, lock_flags);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_module_enable);
-
-int ipu_module_disable(struct ipu_soc *ipu, u32 mask)
-{
-	unsigned long lock_flags;
-	u32 val;
-
-	spin_lock_irqsave(&ipu->lock, lock_flags);
-
-	val = ipu_cm_read(ipu, IPU_CONF);
-	val &= ~mask;
-	ipu_cm_write(ipu, val, IPU_CONF);
-
-	val = ipu_cm_read(ipu, IPU_DISP_GEN);
-
-	if (mask & IPU_CONF_DI0_EN)
-		val &= ~IPU_DI0_COUNTER_RELEASE;
-	if (mask & IPU_CONF_DI1_EN)
-		val &= ~IPU_DI1_COUNTER_RELEASE;
-
-	ipu_cm_write(ipu, val, IPU_DISP_GEN);
-
-	spin_unlock_irqrestore(&ipu->lock, lock_flags);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_module_disable);
-
-void ipu_idmac_select_buffer(struct ipuv3_channel *channel, u32 buf_num)
-{
-	struct ipu_soc *ipu = channel->ipu;
-	unsigned int chno = channel->num;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ipu->lock, flags);
-
-	/* Mark buffer as ready. */
-	if (buf_num == 0)
-		ipu_cm_write(ipu, idma_mask(chno), IPU_CHA_BUF0_RDY(chno));
-	else
-		ipu_cm_write(ipu, idma_mask(chno), IPU_CHA_BUF1_RDY(chno));
-
-	spin_unlock_irqrestore(&ipu->lock, flags);
-}
-EXPORT_SYMBOL_GPL(ipu_idmac_select_buffer);
-
-int ipu_idmac_enable_channel(struct ipuv3_channel *channel)
-{
-	struct ipu_soc *ipu = channel->ipu;
-	u32 val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ipu->lock, flags);
-
-	val = ipu_idmac_read(ipu, IDMAC_CHA_EN(channel->num));
-	val |= idma_mask(channel->num);
-	ipu_idmac_write(ipu, val, IDMAC_CHA_EN(channel->num));
-
-	spin_unlock_irqrestore(&ipu->lock, flags);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_idmac_enable_channel);
-
-int ipu_idmac_wait_busy(struct ipuv3_channel *channel, int ms)
-{
-	struct ipu_soc *ipu = channel->ipu;
-	unsigned long timeout;
-
-	timeout = jiffies + msecs_to_jiffies(ms);
-	while (ipu_idmac_read(ipu, IDMAC_CHA_BUSY(channel->num)) &
-			idma_mask(channel->num)) {
-		if (time_after(jiffies, timeout))
-			return -ETIMEDOUT;
-		cpu_relax();
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_idmac_wait_busy);
-
-int ipu_idmac_disable_channel(struct ipuv3_channel *channel)
-{
-	struct ipu_soc *ipu = channel->ipu;
-	u32 val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ipu->lock, flags);
-
-	/* Disable DMA channel(s) */
-	val = ipu_idmac_read(ipu, IDMAC_CHA_EN(channel->num));
-	val &= ~idma_mask(channel->num);
-	ipu_idmac_write(ipu, val, IDMAC_CHA_EN(channel->num));
-
-	/* Set channel buffers NOT to be ready */
-	ipu_cm_write(ipu, 0xf0000000, IPU_GPR); /* write one to clear */
-
-	if (ipu_cm_read(ipu, IPU_CHA_BUF0_RDY(channel->num)) &
-			idma_mask(channel->num)) {
-		ipu_cm_write(ipu, idma_mask(channel->num),
-			     IPU_CHA_BUF0_RDY(channel->num));
-	}
-
-	if (ipu_cm_read(ipu, IPU_CHA_BUF1_RDY(channel->num)) &
-			idma_mask(channel->num)) {
-		ipu_cm_write(ipu, idma_mask(channel->num),
-			     IPU_CHA_BUF1_RDY(channel->num));
-	}
-
-	ipu_cm_write(ipu, 0x0, IPU_GPR); /* write one to set */
-
-	/* Reset the double buffer */
-	val = ipu_cm_read(ipu, IPU_CHA_DB_MODE_SEL(channel->num));
-	val &= ~idma_mask(channel->num);
-	ipu_cm_write(ipu, val, IPU_CHA_DB_MODE_SEL(channel->num));
-
-	spin_unlock_irqrestore(&ipu->lock, flags);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_idmac_disable_channel);
-
-static int ipu_memory_reset(struct ipu_soc *ipu)
-{
-	unsigned long timeout;
-
-	ipu_cm_write(ipu, 0x807FFFFF, IPU_MEM_RST);
-
-	timeout = jiffies + msecs_to_jiffies(1000);
-	while (ipu_cm_read(ipu, IPU_MEM_RST) & 0x80000000) {
-		if (time_after(jiffies, timeout))
-			return -ETIME;
-		cpu_relax();
-	}
-
-	return 0;
-}
-
-struct ipu_devtype {
-	const char *name;
-	unsigned long cm_ofs;
-	unsigned long cpmem_ofs;
-	unsigned long srm_ofs;
-	unsigned long tpm_ofs;
-	unsigned long disp0_ofs;
-	unsigned long disp1_ofs;
-	unsigned long dc_tmpl_ofs;
-	unsigned long vdi_ofs;
-	enum ipuv3_type type;
-};
-
-static struct ipu_devtype ipu_type_imx51 = {
-	.name = "IPUv3EX",
-	.cm_ofs = 0x1e000000,
-	.cpmem_ofs = 0x1f000000,
-	.srm_ofs = 0x1f040000,
-	.tpm_ofs = 0x1f060000,
-	.disp0_ofs = 0x1e040000,
-	.disp1_ofs = 0x1e048000,
-	.dc_tmpl_ofs = 0x1f080000,
-	.vdi_ofs = 0x1e068000,
-	.type = IPUV3EX,
-};
-
-static struct ipu_devtype ipu_type_imx53 = {
-	.name = "IPUv3M",
-	.cm_ofs = 0x06000000,
-	.cpmem_ofs = 0x07000000,
-	.srm_ofs = 0x07040000,
-	.tpm_ofs = 0x07060000,
-	.disp0_ofs = 0x06040000,
-	.disp1_ofs = 0x06048000,
-	.dc_tmpl_ofs = 0x07080000,
-	.vdi_ofs = 0x06068000,
-	.type = IPUV3M,
-};
-
-static struct ipu_devtype ipu_type_imx6q = {
-	.name = "IPUv3H",
-	.cm_ofs = 0x00200000,
-	.cpmem_ofs = 0x00300000,
-	.srm_ofs = 0x00340000,
-	.tpm_ofs = 0x00360000,
-	.disp0_ofs = 0x00240000,
-	.disp1_ofs = 0x00248000,
-	.dc_tmpl_ofs = 0x00380000,
-	.vdi_ofs = 0x00268000,
-	.type = IPUV3H,
-};
-
-static const struct of_device_id imx_ipu_dt_ids[] = {
-	{ .compatible = "fsl,imx51-ipu", .data = &ipu_type_imx51, },
-	{ .compatible = "fsl,imx53-ipu", .data = &ipu_type_imx53, },
-	{ .compatible = "fsl,imx6q-ipu", .data = &ipu_type_imx6q, },
-	{ /* sentinel */ }
-};
-MODULE_DEVICE_TABLE(of, imx_ipu_dt_ids);
-
-static int ipu_submodules_init(struct ipu_soc *ipu,
-		struct platform_device *pdev, unsigned long ipu_base,
-		struct clk *ipu_clk)
-{
-	char *unit;
-	int ret;
-	struct device *dev = &pdev->dev;
-	const struct ipu_devtype *devtype = ipu->devtype;
-
-	ret = ipu_di_init(ipu, dev, 0, ipu_base + devtype->disp0_ofs,
-			IPU_CONF_DI0_EN, ipu_clk);
-	if (ret) {
-		unit = "di0";
-		goto err_di_0;
-	}
-
-	ret = ipu_di_init(ipu, dev, 1, ipu_base + devtype->disp1_ofs,
-			IPU_CONF_DI1_EN, ipu_clk);
-	if (ret) {
-		unit = "di1";
-		goto err_di_1;
-	}
-
-	ret = ipu_dc_init(ipu, dev, ipu_base + devtype->cm_ofs +
-			IPU_CM_DC_REG_OFS, ipu_base + devtype->dc_tmpl_ofs);
-	if (ret) {
-		unit = "dc_template";
-		goto err_dc;
-	}
-
-	ret = ipu_dmfc_init(ipu, dev, ipu_base +
-			devtype->cm_ofs + IPU_CM_DMFC_REG_OFS, ipu_clk);
-	if (ret) {
-		unit = "dmfc";
-		goto err_dmfc;
-	}
-
-	ret = ipu_dp_init(ipu, dev, ipu_base + devtype->srm_ofs);
-	if (ret) {
-		unit = "dp";
-		goto err_dp;
-	}
-
-	return 0;
-
-err_dp:
-	ipu_dmfc_exit(ipu);
-err_dmfc:
-	ipu_dc_exit(ipu);
-err_dc:
-	ipu_di_exit(ipu, 1);
-err_di_1:
-	ipu_di_exit(ipu, 0);
-err_di_0:
-	dev_err(&pdev->dev, "init %s failed with %d\n", unit, ret);
-	return ret;
-}
-
-static void ipu_irq_handle(struct ipu_soc *ipu, const int *regs, int num_regs)
-{
-	unsigned long status;
-	int i, bit, irq;
-
-	for (i = 0; i < num_regs; i++) {
-
-		status = ipu_cm_read(ipu, IPU_INT_STAT(regs[i]));
-		status &= ipu_cm_read(ipu, IPU_INT_CTRL(regs[i]));
-
-		for_each_set_bit(bit, &status, 32) {
-			irq = irq_linear_revmap(ipu->domain, regs[i] * 32 + bit);
-			if (irq)
-				generic_handle_irq(irq);
-		}
-	}
-}
-
-static void ipu_irq_handler(unsigned int irq, struct irq_desc *desc)
-{
-	struct ipu_soc *ipu = irq_desc_get_handler_data(desc);
-	const int int_reg[] = { 0, 1, 2, 3, 10, 11, 12, 13, 14};
-	struct irq_chip *chip = irq_get_chip(irq);
-
-	chained_irq_enter(chip, desc);
-
-	ipu_irq_handle(ipu, int_reg, ARRAY_SIZE(int_reg));
-
-	chained_irq_exit(chip, desc);
-}
-
-static void ipu_err_irq_handler(unsigned int irq, struct irq_desc *desc)
-{
-	struct ipu_soc *ipu = irq_desc_get_handler_data(desc);
-	const int int_reg[] = { 4, 5, 8, 9};
-	struct irq_chip *chip = irq_get_chip(irq);
-
-	chained_irq_enter(chip, desc);
-
-	ipu_irq_handle(ipu, int_reg, ARRAY_SIZE(int_reg));
-
-	chained_irq_exit(chip, desc);
-}
-
-int ipu_idmac_channel_irq(struct ipu_soc *ipu, struct ipuv3_channel *channel,
-		enum ipu_channel_irq irq_type)
-{
-	int irq = irq_linear_revmap(ipu->domain, irq_type + channel->num);
-
-	if (!irq)
-		irq = irq_create_mapping(ipu->domain, irq_type + channel->num);
-
-	return irq;
-}
-EXPORT_SYMBOL_GPL(ipu_idmac_channel_irq);
-
-static void ipu_submodules_exit(struct ipu_soc *ipu)
-{
-	ipu_dp_exit(ipu);
-	ipu_dmfc_exit(ipu);
-	ipu_dc_exit(ipu);
-	ipu_di_exit(ipu, 1);
-	ipu_di_exit(ipu, 0);
-}
-
-static int platform_remove_devices_fn(struct device *dev, void *unused)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-
-	platform_device_unregister(pdev);
-
-	return 0;
-}
-
-static void platform_device_unregister_children(struct platform_device *pdev)
-{
-	device_for_each_child(&pdev->dev, NULL, platform_remove_devices_fn);
-}
-
-struct ipu_platform_reg {
-	struct ipu_client_platformdata pdata;
-	const char *name;
-};
-
-static const struct ipu_platform_reg client_reg[] = {
-	{
-		.pdata = {
-			.di = 0,
-			.dc = 5,
-			.dp = IPU_DP_FLOW_SYNC_BG,
-			.dma[0] = IPUV3_CHANNEL_MEM_BG_SYNC,
-			.dma[1] = IPUV3_CHANNEL_MEM_FG_SYNC,
-		},
-		.name = "imx-ipuv3-crtc",
-	}, {
-		.pdata = {
-			.di = 1,
-			.dc = 1,
-			.dp = -EINVAL,
-			.dma[0] = IPUV3_CHANNEL_MEM_DC_SYNC,
-			.dma[1] = -EINVAL,
-		},
-		.name = "imx-ipuv3-crtc",
-	},
-};
-
-static DEFINE_MUTEX(ipu_client_id_mutex);
-static int ipu_client_id;
-
-static int ipu_add_client_devices(struct ipu_soc *ipu)
-{
-	struct device *dev = ipu->dev;
-	unsigned i;
-	int id, ret;
-
-	mutex_lock(&ipu_client_id_mutex);
-	id = ipu_client_id;
-	ipu_client_id += ARRAY_SIZE(client_reg);
-	mutex_unlock(&ipu_client_id_mutex);
-
-	for (i = 0; i < ARRAY_SIZE(client_reg); i++) {
-		const struct ipu_platform_reg *reg = &client_reg[i];
-		struct platform_device *pdev;
-
-		pdev = platform_device_register_data(dev, reg->name,
-			id++, &reg->pdata, sizeof(reg->pdata));
-
-		if (IS_ERR(pdev))
-			goto err_register;
-	}
-
-	return 0;
-
-err_register:
-	platform_device_unregister_children(to_platform_device(dev));
-
-	return ret;
-}
-
-
-static int ipu_irq_init(struct ipu_soc *ipu)
-{
-	struct irq_chip_generic *gc;
-	struct irq_chip_type *ct;
-	unsigned long unused[IPU_NUM_IRQS / 32] = {
-		0x400100d0, 0xffe000fd,
-		0x400100d0, 0xffe000fd,
-		0x400100d0, 0xffe000fd,
-		0x4077ffff, 0xffe7e1fd,
-		0x23fffffe, 0x8880fff0,
-		0xf98fe7d0, 0xfff81fff,
-		0x400100d0, 0xffe000fd,
-		0x00000000,
-	};
-	int ret, i;
-
-	ipu->domain = irq_domain_add_linear(ipu->dev->of_node, IPU_NUM_IRQS,
-					    &irq_generic_chip_ops, ipu);
-	if (!ipu->domain) {
-		dev_err(ipu->dev, "failed to add irq domain\n");
-		return -ENODEV;
-	}
-
-	ret = irq_alloc_domain_generic_chips(ipu->domain, 32, 1, "IPU",
-					     handle_level_irq, 0, IRQF_VALID, 0);
-	if (ret < 0) {
-		dev_err(ipu->dev, "failed to alloc generic irq chips\n");
-		irq_domain_remove(ipu->domain);
-		return ret;
-	}
-
-	for (i = 0; i < IPU_NUM_IRQS; i += 32) {
-		gc = irq_get_domain_generic_chip(ipu->domain, i);
-		gc->reg_base = ipu->cm_reg;
-		gc->unused = unused[i / 32];
-		ct = gc->chip_types;
-		ct->chip.irq_ack = irq_gc_ack_set_bit;
-		ct->chip.irq_mask = irq_gc_mask_clr_bit;
-		ct->chip.irq_unmask = irq_gc_mask_set_bit;
-		ct->regs.ack = IPU_INT_STAT(i / 32);
-		ct->regs.mask = IPU_INT_CTRL(i / 32);
-	}
-
-	irq_set_chained_handler(ipu->irq_sync, ipu_irq_handler);
-	irq_set_handler_data(ipu->irq_sync, ipu);
-	irq_set_chained_handler(ipu->irq_err, ipu_err_irq_handler);
-	irq_set_handler_data(ipu->irq_err, ipu);
-
-	return 0;
-}
-
-static void ipu_irq_exit(struct ipu_soc *ipu)
-{
-	int i, irq;
-
-	irq_set_chained_handler(ipu->irq_err, NULL);
-	irq_set_handler_data(ipu->irq_err, NULL);
-	irq_set_chained_handler(ipu->irq_sync, NULL);
-	irq_set_handler_data(ipu->irq_sync, NULL);
-
-	/* TODO: remove irq_domain_generic_chips */
-
-	for (i = 0; i < IPU_NUM_IRQS; i++) {
-		irq = irq_linear_revmap(ipu->domain, i);
-		if (irq)
-			irq_dispose_mapping(irq);
-	}
-
-	irq_domain_remove(ipu->domain);
-}
-
-static int ipu_probe(struct platform_device *pdev)
-{
-	const struct of_device_id *of_id =
-			of_match_device(imx_ipu_dt_ids, &pdev->dev);
-	struct ipu_soc *ipu;
-	struct resource *res;
-	unsigned long ipu_base;
-	int i, ret, irq_sync, irq_err;
-	const struct ipu_devtype *devtype;
-
-	devtype = of_id->data;
-
-	irq_sync = platform_get_irq(pdev, 0);
-	irq_err = platform_get_irq(pdev, 1);
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-	dev_dbg(&pdev->dev, "irq_sync: %d irq_err: %d\n",
-			irq_sync, irq_err);
-
-	if (!res || irq_sync < 0 || irq_err < 0)
-		return -ENODEV;
-
-	ipu_base = res->start;
-
-	ipu = devm_kzalloc(&pdev->dev, sizeof(*ipu), GFP_KERNEL);
-	if (!ipu)
-		return -ENODEV;
-
-	for (i = 0; i < 64; i++)
-		ipu->channel[i].ipu = ipu;
-	ipu->devtype = devtype;
-	ipu->ipu_type = devtype->type;
-
-	spin_lock_init(&ipu->lock);
-	mutex_init(&ipu->channel_lock);
-
-	dev_dbg(&pdev->dev, "cm_reg:   0x%08lx\n",
-			ipu_base + devtype->cm_ofs);
-	dev_dbg(&pdev->dev, "idmac:    0x%08lx\n",
-			ipu_base + devtype->cm_ofs + IPU_CM_IDMAC_REG_OFS);
-	dev_dbg(&pdev->dev, "cpmem:    0x%08lx\n",
-			ipu_base + devtype->cpmem_ofs);
-	dev_dbg(&pdev->dev, "disp0:    0x%08lx\n",
-			ipu_base + devtype->disp0_ofs);
-	dev_dbg(&pdev->dev, "disp1:    0x%08lx\n",
-			ipu_base + devtype->disp1_ofs);
-	dev_dbg(&pdev->dev, "srm:      0x%08lx\n",
-			ipu_base + devtype->srm_ofs);
-	dev_dbg(&pdev->dev, "tpm:      0x%08lx\n",
-			ipu_base + devtype->tpm_ofs);
-	dev_dbg(&pdev->dev, "dc:       0x%08lx\n",
-			ipu_base + devtype->cm_ofs + IPU_CM_DC_REG_OFS);
-	dev_dbg(&pdev->dev, "ic:       0x%08lx\n",
-			ipu_base + devtype->cm_ofs + IPU_CM_IC_REG_OFS);
-	dev_dbg(&pdev->dev, "dmfc:     0x%08lx\n",
-			ipu_base + devtype->cm_ofs + IPU_CM_DMFC_REG_OFS);
-	dev_dbg(&pdev->dev, "vdi:      0x%08lx\n",
-			ipu_base + devtype->vdi_ofs);
-
-	ipu->cm_reg = devm_ioremap(&pdev->dev,
-			ipu_base + devtype->cm_ofs, PAGE_SIZE);
-	ipu->idmac_reg = devm_ioremap(&pdev->dev,
-			ipu_base + devtype->cm_ofs + IPU_CM_IDMAC_REG_OFS,
-			PAGE_SIZE);
-	ipu->cpmem_base = devm_ioremap(&pdev->dev,
-			ipu_base + devtype->cpmem_ofs, PAGE_SIZE);
-
-	if (!ipu->cm_reg || !ipu->idmac_reg || !ipu->cpmem_base)
-		return -ENOMEM;
-
-	ipu->clk = devm_clk_get(&pdev->dev, "bus");
-	if (IS_ERR(ipu->clk)) {
-		ret = PTR_ERR(ipu->clk);
-		dev_err(&pdev->dev, "clk_get failed with %d", ret);
-		return ret;
-	}
-
-	platform_set_drvdata(pdev, ipu);
-
-	ret = clk_prepare_enable(ipu->clk);
-	if (ret) {
-		dev_err(&pdev->dev, "clk_prepare_enable failed: %d\n", ret);
-		return ret;
-	}
-
-	ipu->dev = &pdev->dev;
-	ipu->irq_sync = irq_sync;
-	ipu->irq_err = irq_err;
-
-	ret = ipu_irq_init(ipu);
-	if (ret)
-		goto out_failed_irq;
-
-	ret = device_reset(&pdev->dev);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to reset: %d\n", ret);
-		goto out_failed_reset;
-	}
-	ret = ipu_memory_reset(ipu);
-	if (ret)
-		goto out_failed_reset;
-
-	/* Set MCU_T to divide MCU access window into 2 */
-	ipu_cm_write(ipu, 0x00400000L | (IPU_MCU_T_DEFAULT << 18),
-			IPU_DISP_GEN);
-
-	ret = ipu_submodules_init(ipu, pdev, ipu_base, ipu->clk);
-	if (ret)
-		goto failed_submodules_init;
-
-	ret = ipu_add_client_devices(ipu);
-	if (ret) {
-		dev_err(&pdev->dev, "adding client devices failed with %d\n",
-				ret);
-		goto failed_add_clients;
-	}
-
-	dev_info(&pdev->dev, "%s probed\n", devtype->name);
-
-	return 0;
-
-failed_add_clients:
-	ipu_submodules_exit(ipu);
-failed_submodules_init:
-out_failed_reset:
-	ipu_irq_exit(ipu);
-out_failed_irq:
-	clk_disable_unprepare(ipu->clk);
-	return ret;
-}
-
-static int ipu_remove(struct platform_device *pdev)
-{
-	struct ipu_soc *ipu = platform_get_drvdata(pdev);
-
-	platform_device_unregister_children(pdev);
-	ipu_submodules_exit(ipu);
-	ipu_irq_exit(ipu);
-
-	clk_disable_unprepare(ipu->clk);
-
-	return 0;
-}
-
-static struct platform_driver imx_ipu_driver = {
-	.driver = {
-		.name = "imx-ipuv3",
-		.of_match_table = imx_ipu_dt_ids,
-	},
-	.probe = ipu_probe,
-	.remove = ipu_remove,
-};
-
-module_platform_driver(imx_ipu_driver);
-
-MODULE_ALIAS("platform:imx-ipuv3");
-MODULE_DESCRIPTION("i.MX IPU v3 driver");
-MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-dc.c b/drivers/staging/imx-drm/ipu-v3/ipu-dc.c
deleted file mode 100644
index d5de8bb5c803..000000000000
--- a/drivers/staging/imx-drm/ipu-v3/ipu-dc.c
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
- * Copyright (c) 2010 Sascha Hauer <s.hauer@pengutronix.de>
- * Copyright (C) 2005-2009 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- */
-
-#include <linux/export.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-
-#include "../imx-drm.h"
-#include "imx-ipu-v3.h"
-#include "ipu-prv.h"
-
-#define DC_MAP_CONF_PTR(n)	(0x108 + ((n) & ~0x1) * 2)
-#define DC_MAP_CONF_VAL(n)	(0x144 + ((n) & ~0x1) * 2)
-
-#define DC_EVT_NF		0
-#define DC_EVT_NL		1
-#define DC_EVT_EOF		2
-#define DC_EVT_NFIELD		3
-#define DC_EVT_EOL		4
-#define DC_EVT_EOFIELD		5
-#define DC_EVT_NEW_ADDR		6
-#define DC_EVT_NEW_CHAN		7
-#define DC_EVT_NEW_DATA		8
-
-#define DC_EVT_NEW_ADDR_W_0	0
-#define DC_EVT_NEW_ADDR_W_1	1
-#define DC_EVT_NEW_CHAN_W_0	2
-#define DC_EVT_NEW_CHAN_W_1	3
-#define DC_EVT_NEW_DATA_W_0	4
-#define DC_EVT_NEW_DATA_W_1	5
-#define DC_EVT_NEW_ADDR_R_0	6
-#define DC_EVT_NEW_ADDR_R_1	7
-#define DC_EVT_NEW_CHAN_R_0	8
-#define DC_EVT_NEW_CHAN_R_1	9
-#define DC_EVT_NEW_DATA_R_0	10
-#define DC_EVT_NEW_DATA_R_1	11
-
-#define DC_WR_CH_CONF		0x0
-#define DC_WR_CH_ADDR		0x4
-#define DC_RL_CH(evt)		(8 + ((evt) & ~0x1) * 2)
-
-#define DC_GEN			0xd4
-#define DC_DISP_CONF1(disp)	(0xd8 + (disp) * 4)
-#define DC_DISP_CONF2(disp)	(0xe8 + (disp) * 4)
-#define DC_STAT			0x1c8
-
-#define WROD(lf)		(0x18 | ((lf) << 1))
-#define WRG			0x01
-#define WCLK			0xc9
-
-#define SYNC_WAVE 0
-#define NULL_WAVE (-1)
-
-#define DC_GEN_SYNC_1_6_SYNC	(2 << 1)
-#define DC_GEN_SYNC_PRIORITY_1	(1 << 7)
-
-#define DC_WR_CH_CONF_WORD_SIZE_8		(0 << 0)
-#define DC_WR_CH_CONF_WORD_SIZE_16		(1 << 0)
-#define DC_WR_CH_CONF_WORD_SIZE_24		(2 << 0)
-#define DC_WR_CH_CONF_WORD_SIZE_32		(3 << 0)
-#define DC_WR_CH_CONF_DISP_ID_PARALLEL(i)	(((i) & 0x1) << 3)
-#define DC_WR_CH_CONF_DISP_ID_SERIAL		(2 << 3)
-#define DC_WR_CH_CONF_DISP_ID_ASYNC		(3 << 4)
-#define DC_WR_CH_CONF_FIELD_MODE		(1 << 9)
-#define DC_WR_CH_CONF_PROG_TYPE_NORMAL		(4 << 5)
-#define DC_WR_CH_CONF_PROG_TYPE_MASK		(7 << 5)
-#define DC_WR_CH_CONF_PROG_DI_ID		(1 << 2)
-#define DC_WR_CH_CONF_PROG_DISP_ID(i)		(((i) & 0x1) << 3)
-
-#define IPU_DC_NUM_CHANNELS	10
-
-struct ipu_dc_priv;
-
-enum ipu_dc_map {
-	IPU_DC_MAP_RGB24,
-	IPU_DC_MAP_RGB565,
-	IPU_DC_MAP_GBR24, /* TVEv2 */
-	IPU_DC_MAP_BGR666,
-	IPU_DC_MAP_BGR24,
-};
-
-struct ipu_dc {
-	/* The display interface number assigned to this dc channel */
-	unsigned int		di;
-	void __iomem		*base;
-	struct ipu_dc_priv	*priv;
-	int			chno;
-	bool			in_use;
-};
-
-struct ipu_dc_priv {
-	void __iomem		*dc_reg;
-	void __iomem		*dc_tmpl_reg;
-	struct ipu_soc		*ipu;
-	struct device		*dev;
-	struct ipu_dc		channels[IPU_DC_NUM_CHANNELS];
-	struct mutex		mutex;
-};
-
-static void dc_link_event(struct ipu_dc *dc, int event, int addr, int priority)
-{
-	u32 reg;
-
-	reg = readl(dc->base + DC_RL_CH(event));
-	reg &= ~(0xffff << (16 * (event & 0x1)));
-	reg |= ((addr << 8) | priority) << (16 * (event & 0x1));
-	writel(reg, dc->base + DC_RL_CH(event));
-}
-
-static void dc_write_tmpl(struct ipu_dc *dc, int word, u32 opcode, u32 operand,
-		int map, int wave, int glue, int sync, int stop)
-{
-	struct ipu_dc_priv *priv = dc->priv;
-	u32 reg1, reg2;
-
-	if (opcode == WCLK) {
-		reg1 = (operand << 20) & 0xfff00000;
-		reg2 = operand >> 12 | opcode << 1 | stop << 9;
-	} else if (opcode == WRG) {
-		reg1 = sync | glue << 4 | ++wave << 11 | ((operand << 15) & 0xffff8000);
-		reg2 = operand >> 17 | opcode << 7 | stop << 9;
-	} else {
-		reg1 = sync | glue << 4 | ++wave << 11 | ++map << 15 | ((operand << 20) & 0xfff00000);
-		reg2 = operand >> 12 | opcode << 4 | stop << 9;
-	}
-	writel(reg1, priv->dc_tmpl_reg + word * 8);
-	writel(reg2, priv->dc_tmpl_reg + word * 8 + 4);
-}
-
-static int ipu_pixfmt_to_map(u32 fmt)
-{
-	switch (fmt) {
-	case V4L2_PIX_FMT_RGB24:
-		return IPU_DC_MAP_RGB24;
-	case V4L2_PIX_FMT_RGB565:
-		return IPU_DC_MAP_RGB565;
-	case IPU_PIX_FMT_GBR24:
-		return IPU_DC_MAP_GBR24;
-	case V4L2_PIX_FMT_BGR666:
-		return IPU_DC_MAP_BGR666;
-	case V4L2_PIX_FMT_BGR24:
-		return IPU_DC_MAP_BGR24;
-	default:
-		return -EINVAL;
-	}
-}
-
-int ipu_dc_init_sync(struct ipu_dc *dc, struct ipu_di *di, bool interlaced,
-		u32 pixel_fmt, u32 width)
-{
-	struct ipu_dc_priv *priv = dc->priv;
-	u32 reg = 0;
-	int map;
-
-	dc->di = ipu_di_get_num(di);
-
-	map = ipu_pixfmt_to_map(pixel_fmt);
-	if (map < 0) {
-		dev_dbg(priv->dev, "IPU_DISP: No MAP\n");
-		return map;
-	}
-
-	if (interlaced) {
-		dc_link_event(dc, DC_EVT_NL, 0, 3);
-		dc_link_event(dc, DC_EVT_EOL, 0, 2);
-		dc_link_event(dc, DC_EVT_NEW_DATA, 0, 1);
-
-		/* Init template microcode */
-		dc_write_tmpl(dc, 0, WROD(0), 0, map, SYNC_WAVE, 0, 8, 1);
-	} else {
-		if (dc->di) {
-			dc_link_event(dc, DC_EVT_NL, 2, 3);
-			dc_link_event(dc, DC_EVT_EOL, 3, 2);
-			dc_link_event(dc, DC_EVT_NEW_DATA, 1, 1);
-			/* Init template microcode */
-			dc_write_tmpl(dc, 2, WROD(0), 0, map, SYNC_WAVE, 8, 5, 1);
-			dc_write_tmpl(dc, 3, WROD(0), 0, map, SYNC_WAVE, 4, 5, 0);
-			dc_write_tmpl(dc, 4, WRG, 0, map, NULL_WAVE, 0, 0, 1);
-			dc_write_tmpl(dc, 1, WROD(0), 0, map, SYNC_WAVE, 0, 5, 1);
-		} else {
-			dc_link_event(dc, DC_EVT_NL, 5, 3);
-			dc_link_event(dc, DC_EVT_EOL, 6, 2);
-			dc_link_event(dc, DC_EVT_NEW_DATA, 8, 1);
-			/* Init template microcode */
-			dc_write_tmpl(dc, 5, WROD(0), 0, map, SYNC_WAVE, 8, 5, 1);
-			dc_write_tmpl(dc, 6, WROD(0), 0, map, SYNC_WAVE, 4, 5, 0);
-			dc_write_tmpl(dc, 7, WRG, 0, map, NULL_WAVE, 0, 0, 1);
-			dc_write_tmpl(dc, 8, WROD(0), 0, map, SYNC_WAVE, 0, 5, 1);
-		}
-	}
-	dc_link_event(dc, DC_EVT_NF, 0, 0);
-	dc_link_event(dc, DC_EVT_NFIELD, 0, 0);
-	dc_link_event(dc, DC_EVT_EOF, 0, 0);
-	dc_link_event(dc, DC_EVT_EOFIELD, 0, 0);
-	dc_link_event(dc, DC_EVT_NEW_CHAN, 0, 0);
-	dc_link_event(dc, DC_EVT_NEW_ADDR, 0, 0);
-
-	reg = readl(dc->base + DC_WR_CH_CONF);
-	if (interlaced)
-		reg |= DC_WR_CH_CONF_FIELD_MODE;
-	else
-		reg &= ~DC_WR_CH_CONF_FIELD_MODE;
-	writel(reg, dc->base + DC_WR_CH_CONF);
-
-	writel(0x0, dc->base + DC_WR_CH_ADDR);
-	writel(width, priv->dc_reg + DC_DISP_CONF2(dc->di));
-
-	ipu_module_enable(priv->ipu, IPU_CONF_DC_EN);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_dc_init_sync);
-
-void ipu_dc_enable_channel(struct ipu_dc *dc)
-{
-	int di;
-	u32 reg;
-
-	di = dc->di;
-
-	reg = readl(dc->base + DC_WR_CH_CONF);
-	reg |= DC_WR_CH_CONF_PROG_TYPE_NORMAL;
-	writel(reg, dc->base + DC_WR_CH_CONF);
-}
-EXPORT_SYMBOL_GPL(ipu_dc_enable_channel);
-
-void ipu_dc_disable_channel(struct ipu_dc *dc)
-{
-	struct ipu_dc_priv *priv = dc->priv;
-	u32 val;
-	int irq = 0, timeout = 50;
-
-	if (dc->chno == 1)
-		irq = IPU_IRQ_DC_FC_1;
-	else if (dc->chno == 5)
-		irq = IPU_IRQ_DP_SF_END;
-	else
-		return;
-
-	/* should wait for the interrupt here */
-	mdelay(50);
-
-	if (dc->di == 0)
-		val = 0x00000002;
-	else
-		val = 0x00000020;
-
-	/* Wait for DC triple buffer to empty */
-	while ((readl(priv->dc_reg + DC_STAT) & val) != val) {
-		usleep_range(2000, 20000);
-		timeout -= 2;
-		if (timeout <= 0)
-			break;
-	}
-
-	val = readl(dc->base + DC_WR_CH_CONF);
-	val &= ~DC_WR_CH_CONF_PROG_TYPE_MASK;
-	writel(val, dc->base + DC_WR_CH_CONF);
-}
-EXPORT_SYMBOL_GPL(ipu_dc_disable_channel);
-
-static void ipu_dc_map_config(struct ipu_dc_priv *priv, enum ipu_dc_map map,
-		int byte_num, int offset, int mask)
-{
-	int ptr = map * 3 + byte_num;
-	u32 reg;
-
-	reg = readl(priv->dc_reg + DC_MAP_CONF_VAL(ptr));
-	reg &= ~(0xffff << (16 * (ptr & 0x1)));
-	reg |= ((offset << 8) | mask) << (16 * (ptr & 0x1));
-	writel(reg, priv->dc_reg + DC_MAP_CONF_VAL(ptr));
-
-	reg = readl(priv->dc_reg + DC_MAP_CONF_PTR(map));
-	reg &= ~(0x1f << ((16 * (map & 0x1)) + (5 * byte_num)));
-	reg |= ptr << ((16 * (map & 0x1)) + (5 * byte_num));
-	writel(reg, priv->dc_reg + DC_MAP_CONF_PTR(map));
-}
-
-static void ipu_dc_map_clear(struct ipu_dc_priv *priv, int map)
-{
-	u32 reg = readl(priv->dc_reg + DC_MAP_CONF_PTR(map));
-
-	writel(reg & ~(0xffff << (16 * (map & 0x1))),
-		     priv->dc_reg + DC_MAP_CONF_PTR(map));
-}
-
-struct ipu_dc *ipu_dc_get(struct ipu_soc *ipu, int channel)
-{
-	struct ipu_dc_priv *priv = ipu->dc_priv;
-	struct ipu_dc *dc;
-
-	if (channel >= IPU_DC_NUM_CHANNELS)
-		return ERR_PTR(-ENODEV);
-
-	dc = &priv->channels[channel];
-
-	mutex_lock(&priv->mutex);
-
-	if (dc->in_use) {
-		mutex_unlock(&priv->mutex);
-		return ERR_PTR(-EBUSY);
-	}
-
-	dc->in_use = true;
-
-	mutex_unlock(&priv->mutex);
-
-	return dc;
-}
-EXPORT_SYMBOL_GPL(ipu_dc_get);
-
-void ipu_dc_put(struct ipu_dc *dc)
-{
-	struct ipu_dc_priv *priv = dc->priv;
-
-	mutex_lock(&priv->mutex);
-	dc->in_use = false;
-	mutex_unlock(&priv->mutex);
-}
-EXPORT_SYMBOL_GPL(ipu_dc_put);
-
-int ipu_dc_init(struct ipu_soc *ipu, struct device *dev,
-		unsigned long base, unsigned long template_base)
-{
-	struct ipu_dc_priv *priv;
-	static int channel_offsets[] = { 0, 0x1c, 0x38, 0x54, 0x58, 0x5c,
-		0x78, 0, 0x94, 0xb4};
-	int i;
-
-	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	mutex_init(&priv->mutex);
-
-	priv->dev = dev;
-	priv->ipu = ipu;
-	priv->dc_reg = devm_ioremap(dev, base, PAGE_SIZE);
-	priv->dc_tmpl_reg = devm_ioremap(dev, template_base, PAGE_SIZE);
-	if (!priv->dc_reg || !priv->dc_tmpl_reg)
-		return -ENOMEM;
-
-	for (i = 0; i < IPU_DC_NUM_CHANNELS; i++) {
-		priv->channels[i].chno = i;
-		priv->channels[i].priv = priv;
-		priv->channels[i].base = priv->dc_reg + channel_offsets[i];
-	}
-
-	writel(DC_WR_CH_CONF_WORD_SIZE_24 | DC_WR_CH_CONF_DISP_ID_PARALLEL(1) |
-			DC_WR_CH_CONF_PROG_DI_ID,
-			priv->channels[1].base + DC_WR_CH_CONF);
-	writel(DC_WR_CH_CONF_WORD_SIZE_24 | DC_WR_CH_CONF_DISP_ID_PARALLEL(0),
-			priv->channels[5].base + DC_WR_CH_CONF);
-
-	writel(DC_GEN_SYNC_1_6_SYNC | DC_GEN_SYNC_PRIORITY_1, priv->dc_reg + DC_GEN);
-
-	ipu->dc_priv = priv;
-
-	dev_dbg(dev, "DC base: 0x%08lx template base: 0x%08lx\n",
-			base, template_base);
-
-	/* rgb24 */
-	ipu_dc_map_clear(priv, IPU_DC_MAP_RGB24);
-	ipu_dc_map_config(priv, IPU_DC_MAP_RGB24, 0, 7, 0xff); /* blue */
-	ipu_dc_map_config(priv, IPU_DC_MAP_RGB24, 1, 15, 0xff); /* green */
-	ipu_dc_map_config(priv, IPU_DC_MAP_RGB24, 2, 23, 0xff); /* red */
-
-	/* rgb565 */
-	ipu_dc_map_clear(priv, IPU_DC_MAP_RGB565);
-	ipu_dc_map_config(priv, IPU_DC_MAP_RGB565, 0, 4, 0xf8); /* blue */
-	ipu_dc_map_config(priv, IPU_DC_MAP_RGB565, 1, 10, 0xfc); /* green */
-	ipu_dc_map_config(priv, IPU_DC_MAP_RGB565, 2, 15, 0xf8); /* red */
-
-	/* gbr24 */
-	ipu_dc_map_clear(priv, IPU_DC_MAP_GBR24);
-	ipu_dc_map_config(priv, IPU_DC_MAP_GBR24, 2, 15, 0xff); /* green */
-	ipu_dc_map_config(priv, IPU_DC_MAP_GBR24, 1, 7, 0xff); /* blue */
-	ipu_dc_map_config(priv, IPU_DC_MAP_GBR24, 0, 23, 0xff); /* red */
-
-	/* bgr666 */
-	ipu_dc_map_clear(priv, IPU_DC_MAP_BGR666);
-	ipu_dc_map_config(priv, IPU_DC_MAP_BGR666, 0, 5, 0xfc); /* blue */
-	ipu_dc_map_config(priv, IPU_DC_MAP_BGR666, 1, 11, 0xfc); /* green */
-	ipu_dc_map_config(priv, IPU_DC_MAP_BGR666, 2, 17, 0xfc); /* red */
-
-	/* bgr24 */
-	ipu_dc_map_clear(priv, IPU_DC_MAP_BGR24);
-	ipu_dc_map_config(priv, IPU_DC_MAP_BGR24, 2, 7, 0xff); /* red */
-	ipu_dc_map_config(priv, IPU_DC_MAP_BGR24, 1, 15, 0xff); /* green */
-	ipu_dc_map_config(priv, IPU_DC_MAP_BGR24, 0, 23, 0xff); /* blue */
-
-	return 0;
-}
-
-void ipu_dc_exit(struct ipu_soc *ipu)
-{
-}
diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-di.c b/drivers/staging/imx-drm/ipu-v3/ipu-di.c
deleted file mode 100644
index 82a9ebad697c..000000000000
--- a/drivers/staging/imx-drm/ipu-v3/ipu-di.c
+++ /dev/null
@@ -1,730 +0,0 @@
-/*
- * Copyright (c) 2010 Sascha Hauer <s.hauer@pengutronix.de>
- * Copyright (C) 2005-2009 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- */
-#include <linux/export.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/io.h>
-#include <linux/err.h>
-#include <linux/platform_device.h>
-
-#include "imx-ipu-v3.h"
-#include "ipu-prv.h"
-
-struct ipu_di {
-	void __iomem *base;
-	int id;
-	u32 module;
-	struct clk *clk_di;	/* display input clock */
-	struct clk *clk_ipu;	/* IPU bus clock */
-	struct clk *clk_di_pixel; /* resulting pixel clock */
-	bool inuse;
-	struct ipu_soc *ipu;
-};
-
-static DEFINE_MUTEX(di_mutex);
-
-struct di_sync_config {
-	int run_count;
-	int run_src;
-	int offset_count;
-	int offset_src;
-	int repeat_count;
-	int cnt_clr_src;
-	int cnt_polarity_gen_en;
-	int cnt_polarity_clr_src;
-	int cnt_polarity_trigger_src;
-	int cnt_up;
-	int cnt_down;
-};
-
-enum di_pins {
-	DI_PIN11 = 0,
-	DI_PIN12 = 1,
-	DI_PIN13 = 2,
-	DI_PIN14 = 3,
-	DI_PIN15 = 4,
-	DI_PIN16 = 5,
-	DI_PIN17 = 6,
-	DI_PIN_CS = 7,
-
-	DI_PIN_SER_CLK = 0,
-	DI_PIN_SER_RS = 1,
-};
-
-enum di_sync_wave {
-	DI_SYNC_NONE = 0,
-	DI_SYNC_CLK = 1,
-	DI_SYNC_INT_HSYNC = 2,
-	DI_SYNC_HSYNC = 3,
-	DI_SYNC_VSYNC = 4,
-	DI_SYNC_DE = 6,
-};
-
-#define SYNC_WAVE 0
-
-#define DI_GENERAL		0x0000
-#define DI_BS_CLKGEN0		0x0004
-#define DI_BS_CLKGEN1		0x0008
-#define DI_SW_GEN0(gen)		(0x000c + 4 * ((gen) - 1))
-#define DI_SW_GEN1(gen)		(0x0030 + 4 * ((gen) - 1))
-#define DI_STP_REP(gen)		(0x0148 + 4 * (((gen) - 1)/2))
-#define DI_SYNC_AS_GEN		0x0054
-#define DI_DW_GEN(gen)		(0x0058 + 4 * (gen))
-#define DI_DW_SET(gen, set)	(0x0088 + 4 * ((gen) + 0xc * (set)))
-#define DI_SER_CONF		0x015c
-#define DI_SSC			0x0160
-#define DI_POL			0x0164
-#define DI_AW0			0x0168
-#define DI_AW1			0x016c
-#define DI_SCR_CONF		0x0170
-#define DI_STAT			0x0174
-
-#define DI_SW_GEN0_RUN_COUNT(x)			((x) << 19)
-#define DI_SW_GEN0_RUN_SRC(x)			((x) << 16)
-#define DI_SW_GEN0_OFFSET_COUNT(x)		((x) << 3)
-#define DI_SW_GEN0_OFFSET_SRC(x)		((x) << 0)
-
-#define DI_SW_GEN1_CNT_POL_GEN_EN(x)		((x) << 29)
-#define DI_SW_GEN1_CNT_CLR_SRC(x)		((x) << 25)
-#define DI_SW_GEN1_CNT_POL_TRIGGER_SRC(x)	((x) << 12)
-#define DI_SW_GEN1_CNT_POL_CLR_SRC(x)		((x) << 9)
-#define DI_SW_GEN1_CNT_DOWN(x)			((x) << 16)
-#define DI_SW_GEN1_CNT_UP(x)			(x)
-#define DI_SW_GEN1_AUTO_RELOAD			(0x10000000)
-
-#define DI_DW_GEN_ACCESS_SIZE_OFFSET		24
-#define DI_DW_GEN_COMPONENT_SIZE_OFFSET		16
-
-#define DI_GEN_POLARITY_1			(1 << 0)
-#define DI_GEN_POLARITY_2			(1 << 1)
-#define DI_GEN_POLARITY_3			(1 << 2)
-#define DI_GEN_POLARITY_4			(1 << 3)
-#define DI_GEN_POLARITY_5			(1 << 4)
-#define DI_GEN_POLARITY_6			(1 << 5)
-#define DI_GEN_POLARITY_7			(1 << 6)
-#define DI_GEN_POLARITY_8			(1 << 7)
-#define DI_GEN_POLARITY_DISP_CLK		(1 << 17)
-#define DI_GEN_DI_CLK_EXT			(1 << 20)
-#define DI_GEN_DI_VSYNC_EXT			(1 << 21)
-
-#define DI_POL_DRDY_DATA_POLARITY		(1 << 7)
-#define DI_POL_DRDY_POLARITY_15			(1 << 4)
-
-#define DI_VSYNC_SEL_OFFSET			13
-
-static inline u32 ipu_di_read(struct ipu_di *di, unsigned offset)
-{
-	return readl(di->base + offset);
-}
-
-static inline void ipu_di_write(struct ipu_di *di, u32 value, unsigned offset)
-{
-	writel(value, di->base + offset);
-}
-
-static void ipu_di_data_wave_config(struct ipu_di *di,
-				     int wave_gen,
-				     int access_size, int component_size)
-{
-	u32 reg;
-	reg = (access_size << DI_DW_GEN_ACCESS_SIZE_OFFSET) |
-	    (component_size << DI_DW_GEN_COMPONENT_SIZE_OFFSET);
-	ipu_di_write(di, reg, DI_DW_GEN(wave_gen));
-}
-
-static void ipu_di_data_pin_config(struct ipu_di *di, int wave_gen, int di_pin,
-		int set, int up, int down)
-{
-	u32 reg;
-
-	reg = ipu_di_read(di, DI_DW_GEN(wave_gen));
-	reg &= ~(0x3 << (di_pin * 2));
-	reg |= set << (di_pin * 2);
-	ipu_di_write(di, reg, DI_DW_GEN(wave_gen));
-
-	ipu_di_write(di, (down << 16) | up, DI_DW_SET(wave_gen, set));
-}
-
-static void ipu_di_sync_config(struct ipu_di *di, struct di_sync_config *config,
-		int start, int count)
-{
-	u32 reg;
-	int i;
-
-	for (i = 0; i < count; i++) {
-		struct di_sync_config *c = &config[i];
-		int wave_gen = start + i + 1;
-
-		if ((c->run_count >= 0x1000) || (c->offset_count >= 0x1000) ||
-				(c->repeat_count >= 0x1000) ||
-				(c->cnt_up >= 0x400) ||
-				(c->cnt_down >= 0x400)) {
-			dev_err(di->ipu->dev, "DI%d counters out of range.\n",
-					di->id);
-			return;
-		}
-
-		reg = DI_SW_GEN0_RUN_COUNT(c->run_count) |
-			DI_SW_GEN0_RUN_SRC(c->run_src) |
-			DI_SW_GEN0_OFFSET_COUNT(c->offset_count) |
-			DI_SW_GEN0_OFFSET_SRC(c->offset_src);
-		ipu_di_write(di, reg, DI_SW_GEN0(wave_gen));
-
-		reg = DI_SW_GEN1_CNT_POL_GEN_EN(c->cnt_polarity_gen_en) |
-			DI_SW_GEN1_CNT_CLR_SRC(c->cnt_clr_src) |
-			DI_SW_GEN1_CNT_POL_TRIGGER_SRC(
-					c->cnt_polarity_trigger_src) |
-			DI_SW_GEN1_CNT_POL_CLR_SRC(c->cnt_polarity_clr_src) |
-			DI_SW_GEN1_CNT_DOWN(c->cnt_down) |
-			DI_SW_GEN1_CNT_UP(c->cnt_up);
-
-		/* Enable auto reload */
-		if (c->repeat_count == 0)
-			reg |= DI_SW_GEN1_AUTO_RELOAD;
-
-		ipu_di_write(di, reg, DI_SW_GEN1(wave_gen));
-
-		reg = ipu_di_read(di, DI_STP_REP(wave_gen));
-		reg &= ~(0xffff << (16 * ((wave_gen - 1) & 0x1)));
-		reg |= c->repeat_count << (16 * ((wave_gen - 1) & 0x1));
-		ipu_di_write(di, reg, DI_STP_REP(wave_gen));
-	}
-}
-
-static void ipu_di_sync_config_interlaced(struct ipu_di *di,
-		struct ipu_di_signal_cfg *sig)
-{
-	u32 h_total = sig->width + sig->h_sync_width +
-		sig->h_start_width + sig->h_end_width;
-	u32 v_total = sig->height + sig->v_sync_width +
-		sig->v_start_width + sig->v_end_width;
-	u32 reg;
-	struct di_sync_config cfg[] = {
-		{
-			.run_count = h_total / 2 - 1,
-			.run_src = DI_SYNC_CLK,
-		}, {
-			.run_count = h_total - 11,
-			.run_src = DI_SYNC_CLK,
-			.cnt_down = 4,
-		}, {
-			.run_count = v_total * 2 - 1,
-			.run_src = DI_SYNC_INT_HSYNC,
-			.offset_count = 1,
-			.offset_src = DI_SYNC_INT_HSYNC,
-			.cnt_down = 4,
-		}, {
-			.run_count = v_total / 2 - 1,
-			.run_src = DI_SYNC_HSYNC,
-			.offset_count = sig->v_start_width,
-			.offset_src = DI_SYNC_HSYNC,
-			.repeat_count = 2,
-			.cnt_clr_src = DI_SYNC_VSYNC,
-		}, {
-			.run_src = DI_SYNC_HSYNC,
-			.repeat_count = sig->height / 2,
-			.cnt_clr_src = 4,
-		}, {
-			.run_count = v_total - 1,
-			.run_src = DI_SYNC_HSYNC,
-		}, {
-			.run_count = v_total / 2 - 1,
-			.run_src = DI_SYNC_HSYNC,
-			.offset_count = 9,
-			.offset_src = DI_SYNC_HSYNC,
-			.repeat_count = 2,
-			.cnt_clr_src = DI_SYNC_VSYNC,
-		}, {
-			.run_src = DI_SYNC_CLK,
-			.offset_count = sig->h_start_width,
-			.offset_src = DI_SYNC_CLK,
-			.repeat_count = sig->width,
-			.cnt_clr_src = 5,
-		}, {
-			.run_count = v_total - 1,
-			.run_src = DI_SYNC_INT_HSYNC,
-			.offset_count = v_total / 2,
-			.offset_src = DI_SYNC_INT_HSYNC,
-			.cnt_clr_src = DI_SYNC_HSYNC,
-			.cnt_down = 4,
-		}
-	};
-
-	ipu_di_sync_config(di, cfg, 0, ARRAY_SIZE(cfg));
-
-	/* set gentime select and tag sel */
-	reg = ipu_di_read(di, DI_SW_GEN1(9));
-	reg &= 0x1FFFFFFF;
-	reg |= (3 - 1) << 29 | 0x00008000;
-	ipu_di_write(di, reg, DI_SW_GEN1(9));
-
-	ipu_di_write(di, v_total / 2 - 1, DI_SCR_CONF);
-}
-
-static void ipu_di_sync_config_noninterlaced(struct ipu_di *di,
-		struct ipu_di_signal_cfg *sig, int div)
-{
-	u32 h_total = sig->width + sig->h_sync_width + sig->h_start_width +
-		sig->h_end_width;
-	u32 v_total = sig->height + sig->v_sync_width + sig->v_start_width +
-		sig->v_end_width;
-	struct di_sync_config cfg[] = {
-		{
-			/* 1: INT_HSYNC */
-			.run_count = h_total - 1,
-			.run_src = DI_SYNC_CLK,
-		} , {
-			/* PIN2: HSYNC */
-			.run_count = h_total - 1,
-			.run_src = DI_SYNC_CLK,
-			.offset_count = div * sig->v_to_h_sync,
-			.offset_src = DI_SYNC_CLK,
-			.cnt_polarity_gen_en = 1,
-			.cnt_polarity_trigger_src = DI_SYNC_CLK,
-			.cnt_down = sig->h_sync_width * 2,
-		} , {
-			/* PIN3: VSYNC */
-			.run_count = v_total - 1,
-			.run_src = DI_SYNC_INT_HSYNC,
-			.cnt_polarity_gen_en = 1,
-			.cnt_polarity_trigger_src = DI_SYNC_INT_HSYNC,
-			.cnt_down = sig->v_sync_width * 2,
-		} , {
-			/* 4: Line Active */
-			.run_src = DI_SYNC_HSYNC,
-			.offset_count = sig->v_sync_width + sig->v_start_width,
-			.offset_src = DI_SYNC_HSYNC,
-			.repeat_count = sig->height,
-			.cnt_clr_src = DI_SYNC_VSYNC,
-		} , {
-			/* 5: Pixel Active, referenced by DC */
-			.run_src = DI_SYNC_CLK,
-			.offset_count = sig->h_sync_width + sig->h_start_width,
-			.offset_src = DI_SYNC_CLK,
-			.repeat_count = sig->width,
-			.cnt_clr_src = 5, /* Line Active */
-		} , {
-			/* unused */
-		} , {
-			/* unused */
-		} , {
-			/* unused */
-		} , {
-			/* unused */
-		},
-	};
-	/* can't use #7 and #8 for line active and pixel active counters */
-	struct di_sync_config cfg_vga[] = {
-		{
-			/* 1: INT_HSYNC */
-			.run_count = h_total - 1,
-			.run_src = DI_SYNC_CLK,
-		} , {
-			/* 2: VSYNC */
-			.run_count = v_total - 1,
-			.run_src = DI_SYNC_INT_HSYNC,
-		} , {
-			/* 3: Line Active */
-			.run_src = DI_SYNC_INT_HSYNC,
-			.offset_count = sig->v_sync_width + sig->v_start_width,
-			.offset_src = DI_SYNC_INT_HSYNC,
-			.repeat_count = sig->height,
-			.cnt_clr_src = 3 /* VSYNC */,
-		} , {
-			/* PIN4: HSYNC for VGA via TVEv2 on TQ MBa53 */
-			.run_count = h_total - 1,
-			.run_src = DI_SYNC_CLK,
-			.offset_count = div * sig->v_to_h_sync + 18, /* magic value from Freescale TVE driver */
-			.offset_src = DI_SYNC_CLK,
-			.cnt_polarity_gen_en = 1,
-			.cnt_polarity_trigger_src = DI_SYNC_CLK,
-			.cnt_down = sig->h_sync_width * 2,
-		} , {
-			/* 5: Pixel Active signal to DC */
-			.run_src = DI_SYNC_CLK,
-			.offset_count = sig->h_sync_width + sig->h_start_width,
-			.offset_src = DI_SYNC_CLK,
-			.repeat_count = sig->width,
-			.cnt_clr_src = 4, /* Line Active */
-		} , {
-			/* PIN6: VSYNC for VGA via TVEv2 on TQ MBa53 */
-			.run_count = v_total - 1,
-			.run_src = DI_SYNC_INT_HSYNC,
-			.offset_count = 1, /* magic value from Freescale TVE driver */
-			.offset_src = DI_SYNC_INT_HSYNC,
-			.cnt_polarity_gen_en = 1,
-			.cnt_polarity_trigger_src = DI_SYNC_INT_HSYNC,
-			.cnt_down = sig->v_sync_width * 2,
-		} , {
-			/* PIN4: HSYNC for VGA via TVEv2 on i.MX53-QSB */
-			.run_count = h_total - 1,
-			.run_src = DI_SYNC_CLK,
-			.offset_count = div * sig->v_to_h_sync + 18, /* magic value from Freescale TVE driver */
-			.offset_src = DI_SYNC_CLK,
-			.cnt_polarity_gen_en = 1,
-			.cnt_polarity_trigger_src = DI_SYNC_CLK,
-			.cnt_down = sig->h_sync_width * 2,
-		} , {
-			/* PIN6: VSYNC for VGA via TVEv2 on i.MX53-QSB */
-			.run_count = v_total - 1,
-			.run_src = DI_SYNC_INT_HSYNC,
-			.offset_count = 1, /* magic value from Freescale TVE driver */
-			.offset_src = DI_SYNC_INT_HSYNC,
-			.cnt_polarity_gen_en = 1,
-			.cnt_polarity_trigger_src = DI_SYNC_INT_HSYNC,
-			.cnt_down = sig->v_sync_width * 2,
-		} , {
-			/* unused */
-		},
-	};
-
-	ipu_di_write(di, v_total - 1, DI_SCR_CONF);
-	if (sig->hsync_pin == 2 && sig->vsync_pin == 3)
-		ipu_di_sync_config(di, cfg, 0, ARRAY_SIZE(cfg));
-	else
-		ipu_di_sync_config(di, cfg_vga, 0, ARRAY_SIZE(cfg_vga));
-}
-
-static void ipu_di_config_clock(struct ipu_di *di,
-	const struct ipu_di_signal_cfg *sig)
-{
-	struct clk *clk;
-	unsigned clkgen0;
-	uint32_t val;
-
-	if (sig->clkflags & IPU_DI_CLKMODE_EXT) {
-		/*
-		 * CLKMODE_EXT means we must use the DI clock: this is
-		 * needed for things like LVDS which needs to feed the
-		 * DI and LDB with the same pixel clock.
-		 */
-		clk = di->clk_di;
-
-		if (sig->clkflags & IPU_DI_CLKMODE_SYNC) {
-			/*
-			 * CLKMODE_SYNC means that we want the DI to be
-			 * clocked at the same rate as the parent clock.
-			 * This is needed (eg) for LDB which needs to be
-			 * fed with the same pixel clock.  We assume that
-			 * the LDB clock has already been set correctly.
-			 */
-			clkgen0 = 1 << 4;
-		} else {
-			/*
-			 * We can use the divider.  We should really have
-			 * a flag here indicating whether the bridge can
-			 * cope with a fractional divider or not.  For the
-			 * time being, let's go for simplicitly and
-			 * reliability.
-			 */
-			unsigned long in_rate;
-			unsigned div;
-
-			clk_set_rate(clk, sig->pixelclock);
-
-			in_rate = clk_get_rate(clk);
-			div = (in_rate + sig->pixelclock / 2) / sig->pixelclock;
-			if (div == 0)
-				div = 1;
-
-			clkgen0 = div << 4;
-		}
-	} else {
-		/*
-		 * For other interfaces, we can arbitarily select between
-		 * the DI specific clock and the internal IPU clock.  See
-		 * DI_GENERAL bit 20.  We select the IPU clock if it can
-		 * give us a clock rate within 1% of the requested frequency,
-		 * otherwise we use the DI clock.
-		 */
-		unsigned long rate, clkrate;
-		unsigned div, error;
-
-		clkrate = clk_get_rate(di->clk_ipu);
-		div = (clkrate + sig->pixelclock / 2) / sig->pixelclock;
-		rate = clkrate / div;
-
-		error = rate / (sig->pixelclock / 1000);
-
-		dev_dbg(di->ipu->dev, "  IPU clock can give %lu with divider %u, error %d.%u%%\n",
-			rate, div, (signed)(error - 1000) / 10, error % 10);
-
-		/* Allow a 1% error */
-		if (error < 1010 && error >= 990) {
-			clk = di->clk_ipu;
-
-			clkgen0 = div << 4;
-		} else {
-			unsigned long in_rate;
-			unsigned div;
-
-			clk = di->clk_di;
-
-			clk_set_rate(clk, sig->pixelclock);
-
-			in_rate = clk_get_rate(clk);
-			div = (in_rate + sig->pixelclock / 2) / sig->pixelclock;
-			if (div == 0)
-				div = 1;
-
-			clkgen0 = div << 4;
-		}
-	}
-
-	di->clk_di_pixel = clk;
-
-	/* Set the divider */
-	ipu_di_write(di, clkgen0, DI_BS_CLKGEN0);
-
-	/*
-	 * Set the high/low periods.  Bits 24:16 give us the falling edge,
-	 * and bits 8:0 give the rising edge.  LSB is fraction, and is
-	 * based on the divider above.  We want a 50% duty cycle, so set
-	 * the falling edge to be half the divider.
-	 */
-	ipu_di_write(di, (clkgen0 >> 4) << 16, DI_BS_CLKGEN1);
-
-	/* Finally select the input clock */
-	val = ipu_di_read(di, DI_GENERAL) & ~DI_GEN_DI_CLK_EXT;
-	if (clk == di->clk_di)
-		val |= DI_GEN_DI_CLK_EXT;
-	ipu_di_write(di, val, DI_GENERAL);
-
-	dev_dbg(di->ipu->dev, "Want %luHz IPU %luHz DI %luHz using %s, %luHz\n",
-		sig->pixelclock,
-		clk_get_rate(di->clk_ipu),
-		clk_get_rate(di->clk_di),
-		clk == di->clk_di ? "DI" : "IPU",
-		clk_get_rate(di->clk_di_pixel) / (clkgen0 >> 4));
-}
-
-int ipu_di_init_sync_panel(struct ipu_di *di, struct ipu_di_signal_cfg *sig)
-{
-	u32 reg;
-	u32 di_gen, vsync_cnt;
-	u32 div;
-	u32 h_total, v_total;
-
-	dev_dbg(di->ipu->dev, "disp %d: panel size = %d x %d\n",
-		di->id, sig->width, sig->height);
-
-	if ((sig->v_sync_width == 0) || (sig->h_sync_width == 0))
-		return -EINVAL;
-
-	h_total = sig->width + sig->h_sync_width + sig->h_start_width +
-		sig->h_end_width;
-	v_total = sig->height + sig->v_sync_width + sig->v_start_width +
-		sig->v_end_width;
-
-	dev_dbg(di->ipu->dev, "Clocks: IPU %luHz DI %luHz Needed %luHz\n",
-		clk_get_rate(di->clk_ipu),
-		clk_get_rate(di->clk_di),
-		sig->pixelclock);
-
-	mutex_lock(&di_mutex);
-
-	ipu_di_config_clock(di, sig);
-
-	div = ipu_di_read(di, DI_BS_CLKGEN0) & 0xfff;
-	div = div / 16;		/* Now divider is integer portion */
-
-	/* Setup pixel clock timing */
-	/* Down time is half of period */
-	ipu_di_write(di, (div << 16), DI_BS_CLKGEN1);
-
-	ipu_di_data_wave_config(di, SYNC_WAVE, div - 1, div - 1);
-	ipu_di_data_pin_config(di, SYNC_WAVE, DI_PIN15, 3, 0, div * 2);
-
-	di_gen = ipu_di_read(di, DI_GENERAL) & DI_GEN_DI_CLK_EXT;
-	di_gen |= DI_GEN_DI_VSYNC_EXT;
-
-	if (sig->interlaced) {
-		ipu_di_sync_config_interlaced(di, sig);
-
-		/* set y_sel = 1 */
-		di_gen |= 0x10000000;
-		di_gen |= DI_GEN_POLARITY_5;
-		di_gen |= DI_GEN_POLARITY_8;
-
-		vsync_cnt = 7;
-
-		if (sig->Hsync_pol)
-			di_gen |= DI_GEN_POLARITY_3;
-		if (sig->Vsync_pol)
-			di_gen |= DI_GEN_POLARITY_2;
-	} else {
-		ipu_di_sync_config_noninterlaced(di, sig, div);
-
-		vsync_cnt = 3;
-		if (di->id == 1)
-			/*
-			 * TODO: change only for TVEv2, parallel display
-			 * uses pin 2 / 3
-			 */
-			if (!(sig->hsync_pin == 2 && sig->vsync_pin == 3))
-				vsync_cnt = 6;
-
-		if (sig->Hsync_pol) {
-			if (sig->hsync_pin == 2)
-				di_gen |= DI_GEN_POLARITY_2;
-			else if (sig->hsync_pin == 4)
-				di_gen |= DI_GEN_POLARITY_4;
-			else if (sig->hsync_pin == 7)
-				di_gen |= DI_GEN_POLARITY_7;
-		}
-		if (sig->Vsync_pol) {
-			if (sig->vsync_pin == 3)
-				di_gen |= DI_GEN_POLARITY_3;
-			else if (sig->vsync_pin == 6)
-				di_gen |= DI_GEN_POLARITY_6;
-			else if (sig->vsync_pin == 8)
-				di_gen |= DI_GEN_POLARITY_8;
-		}
-	}
-
-	if (!sig->clk_pol)
-		di_gen |= DI_GEN_POLARITY_DISP_CLK;
-
-	ipu_di_write(di, di_gen, DI_GENERAL);
-
-	ipu_di_write(di, (--vsync_cnt << DI_VSYNC_SEL_OFFSET) | 0x00000002,
-		     DI_SYNC_AS_GEN);
-
-	reg = ipu_di_read(di, DI_POL);
-	reg &= ~(DI_POL_DRDY_DATA_POLARITY | DI_POL_DRDY_POLARITY_15);
-
-	if (sig->enable_pol)
-		reg |= DI_POL_DRDY_POLARITY_15;
-	if (sig->data_pol)
-		reg |= DI_POL_DRDY_DATA_POLARITY;
-
-	ipu_di_write(di, reg, DI_POL);
-
-	mutex_unlock(&di_mutex);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_di_init_sync_panel);
-
-int ipu_di_enable(struct ipu_di *di)
-{
-	int ret;
-
-	WARN_ON(IS_ERR(di->clk_di_pixel));
-
-	ret = clk_prepare_enable(di->clk_di_pixel);
-	if (ret)
-		return ret;
-
-	ipu_module_enable(di->ipu, di->module);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_di_enable);
-
-int ipu_di_disable(struct ipu_di *di)
-{
-	WARN_ON(IS_ERR(di->clk_di_pixel));
-
-	ipu_module_disable(di->ipu, di->module);
-
-	clk_disable_unprepare(di->clk_di_pixel);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_di_disable);
-
-int ipu_di_get_num(struct ipu_di *di)
-{
-	return di->id;
-}
-EXPORT_SYMBOL_GPL(ipu_di_get_num);
-
-static DEFINE_MUTEX(ipu_di_lock);
-
-struct ipu_di *ipu_di_get(struct ipu_soc *ipu, int disp)
-{
-	struct ipu_di *di;
-
-	if (disp > 1)
-		return ERR_PTR(-EINVAL);
-
-	di = ipu->di_priv[disp];
-
-	mutex_lock(&ipu_di_lock);
-
-	if (di->inuse) {
-		di = ERR_PTR(-EBUSY);
-		goto out;
-	}
-
-	di->inuse = true;
-out:
-	mutex_unlock(&ipu_di_lock);
-
-	return di;
-}
-EXPORT_SYMBOL_GPL(ipu_di_get);
-
-void ipu_di_put(struct ipu_di *di)
-{
-	mutex_lock(&ipu_di_lock);
-
-	di->inuse = false;
-
-	mutex_unlock(&ipu_di_lock);
-}
-EXPORT_SYMBOL_GPL(ipu_di_put);
-
-int ipu_di_init(struct ipu_soc *ipu, struct device *dev, int id,
-		unsigned long base,
-		u32 module, struct clk *clk_ipu)
-{
-	struct ipu_di *di;
-
-	if (id > 1)
-		return -ENODEV;
-
-	di = devm_kzalloc(dev, sizeof(*di), GFP_KERNEL);
-	if (!di)
-		return -ENOMEM;
-
-	ipu->di_priv[id] = di;
-
-	di->clk_di = devm_clk_get(dev, id ? "di1" : "di0");
-	if (IS_ERR(di->clk_di))
-		return PTR_ERR(di->clk_di);
-
-	di->module = module;
-	di->id = id;
-	di->clk_ipu = clk_ipu;
-	di->base = devm_ioremap(dev, base, PAGE_SIZE);
-	if (!di->base)
-		return -ENOMEM;
-
-	ipu_di_write(di, 0x10, DI_BS_CLKGEN0);
-
-	dev_dbg(dev, "DI%d base: 0x%08lx remapped to %p\n",
-			id, base, di->base);
-	di->inuse = false;
-	di->ipu = ipu;
-
-	return 0;
-}
-
-void ipu_di_exit(struct ipu_soc *ipu, int id)
-{
-}
diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c b/drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c
deleted file mode 100644
index 45213017fa4b..000000000000
--- a/drivers/staging/imx-drm/ipu-v3/ipu-dmfc.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Copyright (c) 2010 Sascha Hauer <s.hauer@pengutronix.de>
- * Copyright (C) 2005-2009 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- */
-#include <linux/export.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/io.h>
-
-#include "imx-ipu-v3.h"
-#include "ipu-prv.h"
-
-#define DMFC_RD_CHAN		0x0000
-#define DMFC_WR_CHAN		0x0004
-#define DMFC_WR_CHAN_DEF	0x0008
-#define DMFC_DP_CHAN		0x000c
-#define DMFC_DP_CHAN_DEF	0x0010
-#define DMFC_GENERAL1		0x0014
-#define DMFC_GENERAL2		0x0018
-#define DMFC_IC_CTRL		0x001c
-#define DMFC_STAT		0x0020
-
-#define DMFC_WR_CHAN_1_28		0
-#define DMFC_WR_CHAN_2_41		8
-#define DMFC_WR_CHAN_1C_42		16
-#define DMFC_WR_CHAN_2C_43		24
-
-#define DMFC_DP_CHAN_5B_23		0
-#define DMFC_DP_CHAN_5F_27		8
-#define DMFC_DP_CHAN_6B_24		16
-#define DMFC_DP_CHAN_6F_29		24
-
-#define DMFC_FIFO_SIZE_64		(3 << 3)
-#define DMFC_FIFO_SIZE_128		(2 << 3)
-#define DMFC_FIFO_SIZE_256		(1 << 3)
-#define DMFC_FIFO_SIZE_512		(0 << 3)
-
-#define DMFC_SEGMENT(x)			((x & 0x7) << 0)
-#define DMFC_BURSTSIZE_128		(0 << 6)
-#define DMFC_BURSTSIZE_64		(1 << 6)
-#define DMFC_BURSTSIZE_32		(2 << 6)
-#define DMFC_BURSTSIZE_16		(3 << 6)
-
-struct dmfc_channel_data {
-	int		ipu_channel;
-	unsigned long	channel_reg;
-	unsigned long	shift;
-	unsigned	eot_shift;
-	unsigned	max_fifo_lines;
-};
-
-static const struct dmfc_channel_data dmfcdata[] = {
-	{
-		.ipu_channel	= IPUV3_CHANNEL_MEM_BG_SYNC,
-		.channel_reg	= DMFC_DP_CHAN,
-		.shift		= DMFC_DP_CHAN_5B_23,
-		.eot_shift	= 20,
-		.max_fifo_lines	= 3,
-	}, {
-		.ipu_channel	= 24,
-		.channel_reg	= DMFC_DP_CHAN,
-		.shift		= DMFC_DP_CHAN_6B_24,
-		.eot_shift	= 22,
-		.max_fifo_lines	= 1,
-	}, {
-		.ipu_channel	= IPUV3_CHANNEL_MEM_FG_SYNC,
-		.channel_reg	= DMFC_DP_CHAN,
-		.shift		= DMFC_DP_CHAN_5F_27,
-		.eot_shift	= 21,
-		.max_fifo_lines	= 2,
-	}, {
-		.ipu_channel	= IPUV3_CHANNEL_MEM_DC_SYNC,
-		.channel_reg	= DMFC_WR_CHAN,
-		.shift		= DMFC_WR_CHAN_1_28,
-		.eot_shift	= 16,
-		.max_fifo_lines	= 2,
-	}, {
-		.ipu_channel	= 29,
-		.channel_reg	= DMFC_DP_CHAN,
-		.shift		= DMFC_DP_CHAN_6F_29,
-		.eot_shift	= 23,
-		.max_fifo_lines	= 1,
-	},
-};
-
-#define DMFC_NUM_CHANNELS	ARRAY_SIZE(dmfcdata)
-
-struct ipu_dmfc_priv;
-
-struct dmfc_channel {
-	unsigned			slots;
-	unsigned			slotmask;
-	unsigned			segment;
-	int				burstsize;
-	struct ipu_soc			*ipu;
-	struct ipu_dmfc_priv		*priv;
-	const struct dmfc_channel_data	*data;
-};
-
-struct ipu_dmfc_priv {
-	struct ipu_soc *ipu;
-	struct device *dev;
-	struct dmfc_channel channels[DMFC_NUM_CHANNELS];
-	struct mutex mutex;
-	unsigned long bandwidth_per_slot;
-	void __iomem *base;
-	int use_count;
-};
-
-int ipu_dmfc_enable_channel(struct dmfc_channel *dmfc)
-{
-	struct ipu_dmfc_priv *priv = dmfc->priv;
-	mutex_lock(&priv->mutex);
-
-	if (!priv->use_count)
-		ipu_module_enable(priv->ipu, IPU_CONF_DMFC_EN);
-
-	priv->use_count++;
-
-	mutex_unlock(&priv->mutex);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_dmfc_enable_channel);
-
-void ipu_dmfc_disable_channel(struct dmfc_channel *dmfc)
-{
-	struct ipu_dmfc_priv *priv = dmfc->priv;
-
-	mutex_lock(&priv->mutex);
-
-	priv->use_count--;
-
-	if (!priv->use_count)
-		ipu_module_disable(priv->ipu, IPU_CONF_DMFC_EN);
-
-	if (priv->use_count < 0)
-		priv->use_count = 0;
-
-	mutex_unlock(&priv->mutex);
-}
-EXPORT_SYMBOL_GPL(ipu_dmfc_disable_channel);
-
-static int ipu_dmfc_setup_channel(struct dmfc_channel *dmfc, int slots,
-		int segment, int burstsize)
-{
-	struct ipu_dmfc_priv *priv = dmfc->priv;
-	u32 val, field;
-
-	dev_dbg(priv->dev,
-			"dmfc: using %d slots starting from segment %d for IPU channel %d\n",
-			slots, segment, dmfc->data->ipu_channel);
-
-	switch (slots) {
-	case 1:
-		field = DMFC_FIFO_SIZE_64;
-		break;
-	case 2:
-		field = DMFC_FIFO_SIZE_128;
-		break;
-	case 4:
-		field = DMFC_FIFO_SIZE_256;
-		break;
-	case 8:
-		field = DMFC_FIFO_SIZE_512;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	switch (burstsize) {
-	case 16:
-		field |= DMFC_BURSTSIZE_16;
-		break;
-	case 32:
-		field |= DMFC_BURSTSIZE_32;
-		break;
-	case 64:
-		field |= DMFC_BURSTSIZE_64;
-		break;
-	case 128:
-		field |= DMFC_BURSTSIZE_128;
-		break;
-	}
-
-	field |= DMFC_SEGMENT(segment);
-
-	val = readl(priv->base + dmfc->data->channel_reg);
-
-	val &= ~(0xff << dmfc->data->shift);
-	val |= field << dmfc->data->shift;
-
-	writel(val, priv->base + dmfc->data->channel_reg);
-
-	dmfc->slots = slots;
-	dmfc->segment = segment;
-	dmfc->burstsize = burstsize;
-	dmfc->slotmask = ((1 << slots) - 1) << segment;
-
-	return 0;
-}
-
-static int dmfc_bandwidth_to_slots(struct ipu_dmfc_priv *priv,
-		unsigned long bandwidth)
-{
-	int slots = 1;
-
-	while (slots * priv->bandwidth_per_slot < bandwidth)
-		slots *= 2;
-
-	return slots;
-}
-
-static int dmfc_find_slots(struct ipu_dmfc_priv *priv, int slots)
-{
-	unsigned slotmask_need, slotmask_used = 0;
-	int i, segment = 0;
-
-	slotmask_need = (1 << slots) - 1;
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++)
-		slotmask_used |= priv->channels[i].slotmask;
-
-	while (slotmask_need <= 0xff) {
-		if (!(slotmask_used & slotmask_need))
-			return segment;
-
-		slotmask_need <<= 1;
-		segment++;
-	}
-
-	return -EBUSY;
-}
-
-void ipu_dmfc_free_bandwidth(struct dmfc_channel *dmfc)
-{
-	struct ipu_dmfc_priv *priv = dmfc->priv;
-	int i;
-
-	dev_dbg(priv->dev, "dmfc: freeing %d slots starting from segment %d\n",
-			dmfc->slots, dmfc->segment);
-
-	mutex_lock(&priv->mutex);
-
-	if (!dmfc->slots)
-		goto out;
-
-	dmfc->slotmask = 0;
-	dmfc->slots = 0;
-	dmfc->segment = 0;
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++)
-		priv->channels[i].slotmask = 0;
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++) {
-		if (priv->channels[i].slots > 0) {
-			priv->channels[i].segment =
-				dmfc_find_slots(priv, priv->channels[i].slots);
-			priv->channels[i].slotmask =
-				((1 << priv->channels[i].slots) - 1) <<
-				priv->channels[i].segment;
-		}
-	}
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++) {
-		if (priv->channels[i].slots > 0)
-			ipu_dmfc_setup_channel(&priv->channels[i],
-					priv->channels[i].slots,
-					priv->channels[i].segment,
-					priv->channels[i].burstsize);
-	}
-out:
-	mutex_unlock(&priv->mutex);
-}
-EXPORT_SYMBOL_GPL(ipu_dmfc_free_bandwidth);
-
-int ipu_dmfc_alloc_bandwidth(struct dmfc_channel *dmfc,
-		unsigned long bandwidth_pixel_per_second, int burstsize)
-{
-	struct ipu_dmfc_priv *priv = dmfc->priv;
-	int slots = dmfc_bandwidth_to_slots(priv, bandwidth_pixel_per_second);
-	int segment = -1, ret = 0;
-
-	dev_dbg(priv->dev, "dmfc: trying to allocate %ldMpixel/s for IPU channel %d\n",
-			bandwidth_pixel_per_second / 1000000,
-			dmfc->data->ipu_channel);
-
-	ipu_dmfc_free_bandwidth(dmfc);
-
-	mutex_lock(&priv->mutex);
-
-	if (slots > 8) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	/* For the MEM_BG channel, first try to allocate twice the slots */
-	if (dmfc->data->ipu_channel == IPUV3_CHANNEL_MEM_BG_SYNC)
-		segment = dmfc_find_slots(priv, slots * 2);
-	else if (slots < 2)
-		/* Always allocate at least 128*4 bytes (2 slots) */
-		slots = 2;
-
-	if (segment >= 0)
-		slots *= 2;
-	else
-		segment = dmfc_find_slots(priv, slots);
-	if (segment < 0) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	ipu_dmfc_setup_channel(dmfc, slots, segment, burstsize);
-
-out:
-	mutex_unlock(&priv->mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(ipu_dmfc_alloc_bandwidth);
-
-int ipu_dmfc_init_channel(struct dmfc_channel *dmfc, int width)
-{
-	struct ipu_dmfc_priv *priv = dmfc->priv;
-	u32 dmfc_gen1;
-
-	dmfc_gen1 = readl(priv->base + DMFC_GENERAL1);
-
-	if ((dmfc->slots * 64 * 4) / width > dmfc->data->max_fifo_lines)
-		dmfc_gen1 |= 1 << dmfc->data->eot_shift;
-	else
-		dmfc_gen1 &= ~(1 << dmfc->data->eot_shift);
-
-	writel(dmfc_gen1, priv->base + DMFC_GENERAL1);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_dmfc_init_channel);
-
-struct dmfc_channel *ipu_dmfc_get(struct ipu_soc *ipu, int ipu_channel)
-{
-	struct ipu_dmfc_priv *priv = ipu->dmfc_priv;
-	int i;
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++)
-		if (dmfcdata[i].ipu_channel == ipu_channel)
-			return &priv->channels[i];
-	return ERR_PTR(-ENODEV);
-}
-EXPORT_SYMBOL_GPL(ipu_dmfc_get);
-
-void ipu_dmfc_put(struct dmfc_channel *dmfc)
-{
-	ipu_dmfc_free_bandwidth(dmfc);
-}
-EXPORT_SYMBOL_GPL(ipu_dmfc_put);
-
-int ipu_dmfc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base,
-		struct clk *ipu_clk)
-{
-	struct ipu_dmfc_priv *priv;
-	int i;
-
-	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	priv->base = devm_ioremap(dev, base, PAGE_SIZE);
-	if (!priv->base)
-		return -ENOMEM;
-
-	priv->dev = dev;
-	priv->ipu = ipu;
-	mutex_init(&priv->mutex);
-
-	ipu->dmfc_priv = priv;
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++) {
-		priv->channels[i].priv = priv;
-		priv->channels[i].ipu = ipu;
-		priv->channels[i].data = &dmfcdata[i];
-	}
-
-	writel(0x0, priv->base + DMFC_WR_CHAN);
-	writel(0x0, priv->base + DMFC_DP_CHAN);
-
-	/*
-	 * We have a total bandwidth of clkrate * 4pixel divided
-	 * into 8 slots.
-	 */
-	priv->bandwidth_per_slot = clk_get_rate(ipu_clk) * 4 / 8;
-
-	dev_dbg(dev, "dmfc: 8 slots with %ldMpixel/s bandwidth each\n",
-			priv->bandwidth_per_slot / 1000000);
-
-	writel(0x202020f6, priv->base + DMFC_WR_CHAN_DEF);
-	writel(0x2020f6f6, priv->base + DMFC_DP_CHAN_DEF);
-	writel(0x00000003, priv->base + DMFC_GENERAL1);
-
-	return 0;
-}
-
-void ipu_dmfc_exit(struct ipu_soc *ipu)
-{
-}
diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-dp.c b/drivers/staging/imx-drm/ipu-v3/ipu-dp.c
deleted file mode 100644
index 58f87c8d7c07..000000000000
--- a/drivers/staging/imx-drm/ipu-v3/ipu-dp.c
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * Copyright (c) 2010 Sascha Hauer <s.hauer@pengutronix.de>
- * Copyright (C) 2005-2009 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- */
-#include <linux/export.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/io.h>
-#include <linux/err.h>
-
-#include "imx-ipu-v3.h"
-#include "ipu-prv.h"
-
-#define DP_SYNC 0
-#define DP_ASYNC0 0x60
-#define DP_ASYNC1 0xBC
-
-#define DP_COM_CONF		0x0
-#define DP_GRAPH_WIND_CTRL	0x0004
-#define DP_FG_POS		0x0008
-#define DP_CSC_A_0		0x0044
-#define DP_CSC_A_1		0x0048
-#define DP_CSC_A_2		0x004C
-#define DP_CSC_A_3		0x0050
-#define DP_CSC_0		0x0054
-#define DP_CSC_1		0x0058
-
-#define DP_COM_CONF_FG_EN		(1 << 0)
-#define DP_COM_CONF_GWSEL		(1 << 1)
-#define DP_COM_CONF_GWAM		(1 << 2)
-#define DP_COM_CONF_GWCKE		(1 << 3)
-#define DP_COM_CONF_CSC_DEF_MASK	(3 << 8)
-#define DP_COM_CONF_CSC_DEF_OFFSET	8
-#define DP_COM_CONF_CSC_DEF_FG		(3 << 8)
-#define DP_COM_CONF_CSC_DEF_BG		(2 << 8)
-#define DP_COM_CONF_CSC_DEF_BOTH	(1 << 8)
-
-#define IPUV3_NUM_FLOWS		3
-
-struct ipu_dp_priv;
-
-struct ipu_dp {
-	u32 flow;
-	bool in_use;
-	bool foreground;
-	enum ipu_color_space in_cs;
-};
-
-struct ipu_flow {
-	struct ipu_dp foreground;
-	struct ipu_dp background;
-	enum ipu_color_space out_cs;
-	void __iomem *base;
-	struct ipu_dp_priv *priv;
-};
-
-struct ipu_dp_priv {
-	struct ipu_soc *ipu;
-	struct device *dev;
-	void __iomem *base;
-	struct ipu_flow flow[IPUV3_NUM_FLOWS];
-	struct mutex mutex;
-	int use_count;
-};
-
-static u32 ipu_dp_flow_base[] = {DP_SYNC, DP_ASYNC0, DP_ASYNC1};
-
-static inline struct ipu_flow *to_flow(struct ipu_dp *dp)
-{
-	if (dp->foreground)
-		return container_of(dp, struct ipu_flow, foreground);
-	else
-		return container_of(dp, struct ipu_flow, background);
-}
-
-int ipu_dp_set_global_alpha(struct ipu_dp *dp, bool enable,
-		u8 alpha, bool bg_chan)
-{
-	struct ipu_flow *flow = to_flow(dp);
-	struct ipu_dp_priv *priv = flow->priv;
-	u32 reg;
-
-	mutex_lock(&priv->mutex);
-
-	reg = readl(flow->base + DP_COM_CONF);
-	if (bg_chan)
-		reg &= ~DP_COM_CONF_GWSEL;
-	else
-		reg |= DP_COM_CONF_GWSEL;
-	writel(reg, flow->base + DP_COM_CONF);
-
-	if (enable) {
-		reg = readl(flow->base + DP_GRAPH_WIND_CTRL) & 0x00FFFFFFL;
-		writel(reg | ((u32) alpha << 24),
-			     flow->base + DP_GRAPH_WIND_CTRL);
-
-		reg = readl(flow->base + DP_COM_CONF);
-		writel(reg | DP_COM_CONF_GWAM, flow->base + DP_COM_CONF);
-	} else {
-		reg = readl(flow->base + DP_COM_CONF);
-		writel(reg & ~DP_COM_CONF_GWAM, flow->base + DP_COM_CONF);
-	}
-
-	ipu_srm_dp_sync_update(priv->ipu);
-
-	mutex_unlock(&priv->mutex);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_dp_set_global_alpha);
-
-int ipu_dp_set_window_pos(struct ipu_dp *dp, u16 x_pos, u16 y_pos)
-{
-	struct ipu_flow *flow = to_flow(dp);
-	struct ipu_dp_priv *priv = flow->priv;
-
-	writel((x_pos << 16) | y_pos, flow->base + DP_FG_POS);
-
-	ipu_srm_dp_sync_update(priv->ipu);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_dp_set_window_pos);
-
-static void ipu_dp_csc_init(struct ipu_flow *flow,
-		enum ipu_color_space in,
-		enum ipu_color_space out,
-		u32 place)
-{
-	u32 reg;
-
-	reg = readl(flow->base + DP_COM_CONF);
-	reg &= ~DP_COM_CONF_CSC_DEF_MASK;
-
-	if (in == out) {
-		writel(reg, flow->base + DP_COM_CONF);
-		return;
-	}
-
-	if (in == IPUV3_COLORSPACE_RGB && out == IPUV3_COLORSPACE_YUV) {
-		writel(0x099 | (0x12d << 16), flow->base + DP_CSC_A_0);
-		writel(0x03a | (0x3a9 << 16), flow->base + DP_CSC_A_1);
-		writel(0x356 | (0x100 << 16), flow->base + DP_CSC_A_2);
-		writel(0x100 | (0x329 << 16), flow->base + DP_CSC_A_3);
-		writel(0x3d6 | (0x0000 << 16) | (2 << 30),
-				flow->base + DP_CSC_0);
-		writel(0x200 | (2 << 14) | (0x200 << 16) | (2 << 30),
-				flow->base + DP_CSC_1);
-	} else {
-		writel(0x095 | (0x000 << 16), flow->base + DP_CSC_A_0);
-		writel(0x0cc | (0x095 << 16), flow->base + DP_CSC_A_1);
-		writel(0x3ce | (0x398 << 16), flow->base + DP_CSC_A_2);
-		writel(0x095 | (0x0ff << 16), flow->base + DP_CSC_A_3);
-		writel(0x000 | (0x3e42 << 16) | (1 << 30),
-				flow->base + DP_CSC_0);
-		writel(0x10a | (1 << 14) | (0x3dd6 << 16) | (1 << 30),
-				flow->base + DP_CSC_1);
-	}
-
-	reg |= place;
-
-	writel(reg, flow->base + DP_COM_CONF);
-}
-
-int ipu_dp_setup_channel(struct ipu_dp *dp,
-		enum ipu_color_space in,
-		enum ipu_color_space out)
-{
-	struct ipu_flow *flow = to_flow(dp);
-	struct ipu_dp_priv *priv = flow->priv;
-
-	mutex_lock(&priv->mutex);
-
-	dp->in_cs = in;
-
-	if (!dp->foreground)
-		flow->out_cs = out;
-
-	if (flow->foreground.in_cs == flow->background.in_cs) {
-		/*
-		 * foreground and background are of same colorspace, put
-		 * colorspace converter after combining unit.
-		 */
-		ipu_dp_csc_init(flow, flow->foreground.in_cs, flow->out_cs,
-				DP_COM_CONF_CSC_DEF_BOTH);
-	} else {
-		if (flow->foreground.in_cs == flow->out_cs)
-			/*
-			 * foreground identical to output, apply color
-			 * conversion on background
-			 */
-			ipu_dp_csc_init(flow, flow->background.in_cs,
-					flow->out_cs, DP_COM_CONF_CSC_DEF_BG);
-		else
-			ipu_dp_csc_init(flow, flow->foreground.in_cs,
-					flow->out_cs, DP_COM_CONF_CSC_DEF_FG);
-	}
-
-	ipu_srm_dp_sync_update(priv->ipu);
-
-	mutex_unlock(&priv->mutex);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_dp_setup_channel);
-
-int ipu_dp_enable_channel(struct ipu_dp *dp)
-{
-	struct ipu_flow *flow = to_flow(dp);
-	struct ipu_dp_priv *priv = flow->priv;
-
-	mutex_lock(&priv->mutex);
-
-	if (!priv->use_count)
-		ipu_module_enable(priv->ipu, IPU_CONF_DP_EN);
-
-	priv->use_count++;
-
-	if (dp->foreground) {
-		u32 reg;
-
-		reg = readl(flow->base + DP_COM_CONF);
-		reg |= DP_COM_CONF_FG_EN;
-		writel(reg, flow->base + DP_COM_CONF);
-
-		ipu_srm_dp_sync_update(priv->ipu);
-	}
-
-	mutex_unlock(&priv->mutex);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ipu_dp_enable_channel);
-
-void ipu_dp_disable_channel(struct ipu_dp *dp)
-{
-	struct ipu_flow *flow = to_flow(dp);
-	struct ipu_dp_priv *priv = flow->priv;
-
-	mutex_lock(&priv->mutex);
-
-	priv->use_count--;
-
-	if (dp->foreground) {
-		u32 reg, csc;
-
-		reg = readl(flow->base + DP_COM_CONF);
-		csc = reg & DP_COM_CONF_CSC_DEF_MASK;
-		if (csc == DP_COM_CONF_CSC_DEF_FG)
-			reg &= ~DP_COM_CONF_CSC_DEF_MASK;
-
-		reg &= ~DP_COM_CONF_FG_EN;
-		writel(reg, flow->base + DP_COM_CONF);
-
-		writel(0, flow->base + DP_FG_POS);
-		ipu_srm_dp_sync_update(priv->ipu);
-	}
-
-	if (!priv->use_count)
-		ipu_module_disable(priv->ipu, IPU_CONF_DP_EN);
-
-	if (priv->use_count < 0)
-		priv->use_count = 0;
-
-	mutex_unlock(&priv->mutex);
-}
-EXPORT_SYMBOL_GPL(ipu_dp_disable_channel);
-
-struct ipu_dp *ipu_dp_get(struct ipu_soc *ipu, unsigned int flow)
-{
-	struct ipu_dp_priv *priv = ipu->dp_priv;
-	struct ipu_dp *dp;
-
-	if ((flow >> 1) >= IPUV3_NUM_FLOWS)
-		return ERR_PTR(-EINVAL);
-
-	if (flow & 1)
-		dp = &priv->flow[flow >> 1].foreground;
-	else
-		dp = &priv->flow[flow >> 1].background;
-
-	if (dp->in_use)
-		return ERR_PTR(-EBUSY);
-
-	dp->in_use = true;
-
-	return dp;
-}
-EXPORT_SYMBOL_GPL(ipu_dp_get);
-
-void ipu_dp_put(struct ipu_dp *dp)
-{
-	dp->in_use = false;
-}
-EXPORT_SYMBOL_GPL(ipu_dp_put);
-
-int ipu_dp_init(struct ipu_soc *ipu, struct device *dev, unsigned long base)
-{
-	struct ipu_dp_priv *priv;
-	int i;
-
-	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-	priv->dev = dev;
-	priv->ipu = ipu;
-
-	ipu->dp_priv = priv;
-
-	priv->base = devm_ioremap(dev, base, PAGE_SIZE);
-	if (!priv->base)
-		return -ENOMEM;
-
-	mutex_init(&priv->mutex);
-
-	for (i = 0; i < IPUV3_NUM_FLOWS; i++) {
-		priv->flow[i].foreground.foreground = true;
-		priv->flow[i].base = priv->base + ipu_dp_flow_base[i];
-		priv->flow[i].priv = priv;
-	}
-
-	return 0;
-}
-
-void ipu_dp_exit(struct ipu_soc *ipu)
-{
-}
diff --git a/drivers/staging/imx-drm/ipu-v3/ipu-prv.h b/drivers/staging/imx-drm/ipu-v3/ipu-prv.h
deleted file mode 100644
index 4df00501adc2..000000000000
--- a/drivers/staging/imx-drm/ipu-v3/ipu-prv.h
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Copyright (c) 2010 Sascha Hauer <s.hauer@pengutronix.de>
- * Copyright (C) 2005-2009 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- */
-#ifndef __IPU_PRV_H__
-#define __IPU_PRV_H__
-
-struct ipu_soc;
-
-#include <linux/types.h>
-#include <linux/device.h>
-#include <linux/clk.h>
-#include <linux/platform_device.h>
-
-#include "imx-ipu-v3.h"
-
-#define IPUV3_CHANNEL_CSI0			 0
-#define IPUV3_CHANNEL_CSI1			 1
-#define IPUV3_CHANNEL_CSI2			 2
-#define IPUV3_CHANNEL_CSI3			 3
-#define IPUV3_CHANNEL_MEM_BG_SYNC		23
-#define IPUV3_CHANNEL_MEM_FG_SYNC		27
-#define IPUV3_CHANNEL_MEM_DC_SYNC		28
-#define IPUV3_CHANNEL_MEM_FG_SYNC_ALPHA		31
-#define IPUV3_CHANNEL_MEM_DC_ASYNC		41
-#define IPUV3_CHANNEL_ROT_ENC_MEM		45
-#define IPUV3_CHANNEL_ROT_VF_MEM		46
-#define IPUV3_CHANNEL_ROT_PP_MEM		47
-#define IPUV3_CHANNEL_ROT_ENC_MEM_OUT		48
-#define IPUV3_CHANNEL_ROT_VF_MEM_OUT		49
-#define IPUV3_CHANNEL_ROT_PP_MEM_OUT		50
-#define IPUV3_CHANNEL_MEM_BG_SYNC_ALPHA		51
-
-#define IPU_MCU_T_DEFAULT	8
-#define IPU_CM_IDMAC_REG_OFS	0x00008000
-#define IPU_CM_IC_REG_OFS	0x00020000
-#define IPU_CM_IRT_REG_OFS	0x00028000
-#define IPU_CM_CSI0_REG_OFS	0x00030000
-#define IPU_CM_CSI1_REG_OFS	0x00038000
-#define IPU_CM_SMFC_REG_OFS	0x00050000
-#define IPU_CM_DC_REG_OFS	0x00058000
-#define IPU_CM_DMFC_REG_OFS	0x00060000
-
-/* Register addresses */
-/* IPU Common registers */
-#define IPU_CM_REG(offset)	(offset)
-
-#define IPU_CONF			IPU_CM_REG(0)
-
-#define IPU_SRM_PRI1			IPU_CM_REG(0x00a0)
-#define IPU_SRM_PRI2			IPU_CM_REG(0x00a4)
-#define IPU_FS_PROC_FLOW1		IPU_CM_REG(0x00a8)
-#define IPU_FS_PROC_FLOW2		IPU_CM_REG(0x00ac)
-#define IPU_FS_PROC_FLOW3		IPU_CM_REG(0x00b0)
-#define IPU_FS_DISP_FLOW1		IPU_CM_REG(0x00b4)
-#define IPU_FS_DISP_FLOW2		IPU_CM_REG(0x00b8)
-#define IPU_SKIP			IPU_CM_REG(0x00bc)
-#define IPU_DISP_ALT_CONF		IPU_CM_REG(0x00c0)
-#define IPU_DISP_GEN			IPU_CM_REG(0x00c4)
-#define IPU_DISP_ALT1			IPU_CM_REG(0x00c8)
-#define IPU_DISP_ALT2			IPU_CM_REG(0x00cc)
-#define IPU_DISP_ALT3			IPU_CM_REG(0x00d0)
-#define IPU_DISP_ALT4			IPU_CM_REG(0x00d4)
-#define IPU_SNOOP			IPU_CM_REG(0x00d8)
-#define IPU_MEM_RST			IPU_CM_REG(0x00dc)
-#define IPU_PM				IPU_CM_REG(0x00e0)
-#define IPU_GPR				IPU_CM_REG(0x00e4)
-#define IPU_CHA_DB_MODE_SEL(ch)		IPU_CM_REG(0x0150 + 4 * ((ch) / 32))
-#define IPU_ALT_CHA_DB_MODE_SEL(ch)	IPU_CM_REG(0x0168 + 4 * ((ch) / 32))
-#define IPU_CHA_CUR_BUF(ch)		IPU_CM_REG(0x023C + 4 * ((ch) / 32))
-#define IPU_ALT_CUR_BUF0		IPU_CM_REG(0x0244)
-#define IPU_ALT_CUR_BUF1		IPU_CM_REG(0x0248)
-#define IPU_SRM_STAT			IPU_CM_REG(0x024C)
-#define IPU_PROC_TASK_STAT		IPU_CM_REG(0x0250)
-#define IPU_DISP_TASK_STAT		IPU_CM_REG(0x0254)
-#define IPU_CHA_BUF0_RDY(ch)		IPU_CM_REG(0x0268 + 4 * ((ch) / 32))
-#define IPU_CHA_BUF1_RDY(ch)		IPU_CM_REG(0x0270 + 4 * ((ch) / 32))
-#define IPU_ALT_CHA_BUF0_RDY(ch)	IPU_CM_REG(0x0278 + 4 * ((ch) / 32))
-#define IPU_ALT_CHA_BUF1_RDY(ch)	IPU_CM_REG(0x0280 + 4 * ((ch) / 32))
-
-#define IPU_INT_CTRL(n)		IPU_CM_REG(0x003C + 4 * (n))
-#define IPU_INT_STAT(n)		IPU_CM_REG(0x0200 + 4 * (n))
-
-#define IPU_DI0_COUNTER_RELEASE			(1 << 24)
-#define IPU_DI1_COUNTER_RELEASE			(1 << 25)
-
-#define IPU_IDMAC_REG(offset)	(offset)
-
-#define IDMAC_CONF			IPU_IDMAC_REG(0x0000)
-#define IDMAC_CHA_EN(ch)		IPU_IDMAC_REG(0x0004 + 4 * ((ch) / 32))
-#define IDMAC_SEP_ALPHA			IPU_IDMAC_REG(0x000c)
-#define IDMAC_ALT_SEP_ALPHA		IPU_IDMAC_REG(0x0010)
-#define IDMAC_CHA_PRI(ch)		IPU_IDMAC_REG(0x0014 + 4 * ((ch) / 32))
-#define IDMAC_WM_EN(ch)			IPU_IDMAC_REG(0x001c + 4 * ((ch) / 32))
-#define IDMAC_CH_LOCK_EN_1		IPU_IDMAC_REG(0x0024)
-#define IDMAC_CH_LOCK_EN_2		IPU_IDMAC_REG(0x0028)
-#define IDMAC_SUB_ADDR_0		IPU_IDMAC_REG(0x002c)
-#define IDMAC_SUB_ADDR_1		IPU_IDMAC_REG(0x0030)
-#define IDMAC_SUB_ADDR_2		IPU_IDMAC_REG(0x0034)
-#define IDMAC_BAND_EN(ch)		IPU_IDMAC_REG(0x0040 + 4 * ((ch) / 32))
-#define IDMAC_CHA_BUSY(ch)		IPU_IDMAC_REG(0x0100 + 4 * ((ch) / 32))
-
-#define IPU_NUM_IRQS	(32 * 15)
-
-enum ipu_modules {
-	IPU_CONF_CSI0_EN		= (1 << 0),
-	IPU_CONF_CSI1_EN		= (1 << 1),
-	IPU_CONF_IC_EN			= (1 << 2),
-	IPU_CONF_ROT_EN			= (1 << 3),
-	IPU_CONF_ISP_EN			= (1 << 4),
-	IPU_CONF_DP_EN			= (1 << 5),
-	IPU_CONF_DI0_EN			= (1 << 6),
-	IPU_CONF_DI1_EN			= (1 << 7),
-	IPU_CONF_SMFC_EN		= (1 << 8),
-	IPU_CONF_DC_EN			= (1 << 9),
-	IPU_CONF_DMFC_EN		= (1 << 10),
-
-	IPU_CONF_VDI_EN			= (1 << 12),
-
-	IPU_CONF_IDMAC_DIS		= (1 << 22),
-
-	IPU_CONF_IC_DMFC_SEL		= (1 << 25),
-	IPU_CONF_IC_DMFC_SYNC		= (1 << 26),
-	IPU_CONF_VDI_DMFC_SYNC		= (1 << 27),
-
-	IPU_CONF_CSI0_DATA_SOURCE	= (1 << 28),
-	IPU_CONF_CSI1_DATA_SOURCE	= (1 << 29),
-	IPU_CONF_IC_INPUT		= (1 << 30),
-	IPU_CONF_CSI_SEL		= (1 << 31),
-};
-
-struct ipuv3_channel {
-	unsigned int num;
-
-	bool enabled;
-	bool busy;
-
-	struct ipu_soc *ipu;
-};
-
-struct ipu_dc_priv;
-struct ipu_dmfc_priv;
-struct ipu_di;
-struct ipu_devtype;
-
-struct ipu_soc {
-	struct device		*dev;
-	const struct ipu_devtype	*devtype;
-	enum ipuv3_type		ipu_type;
-	spinlock_t		lock;
-	struct mutex		channel_lock;
-
-	void __iomem		*cm_reg;
-	void __iomem		*idmac_reg;
-	struct ipu_ch_param __iomem	*cpmem_base;
-
-	int			usecount;
-
-	struct clk		*clk;
-
-	struct ipuv3_channel	channel[64];
-
-	int			irq_sync;
-	int			irq_err;
-	struct irq_domain	*domain;
-
-	struct ipu_dc_priv	*dc_priv;
-	struct ipu_dp_priv	*dp_priv;
-	struct ipu_dmfc_priv	*dmfc_priv;
-	struct ipu_di		*di_priv[2];
-};
-
-void ipu_srm_dp_sync_update(struct ipu_soc *ipu);
-
-int ipu_module_enable(struct ipu_soc *ipu, u32 mask);
-int ipu_module_disable(struct ipu_soc *ipu, u32 mask);
-
-int ipu_di_init(struct ipu_soc *ipu, struct device *dev, int id,
-		unsigned long base, u32 module, struct clk *ipu_clk);
-void ipu_di_exit(struct ipu_soc *ipu, int id);
-
-int ipu_dmfc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base,
-		struct clk *ipu_clk);
-void ipu_dmfc_exit(struct ipu_soc *ipu);
-
-int ipu_dp_init(struct ipu_soc *ipu, struct device *dev, unsigned long base);
-void ipu_dp_exit(struct ipu_soc *ipu);
-
-int ipu_dc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base,
-		unsigned long template_base);
-void ipu_dc_exit(struct ipu_soc *ipu);
-
-int ipu_cpmem_init(struct ipu_soc *ipu, struct device *dev, unsigned long base);
-void ipu_cpmem_exit(struct ipu_soc *ipu);
-
-#endif				/* __IPU_PRV_H__ */
diff --git a/drivers/staging/imx-drm/ipuv3-crtc.c b/drivers/staging/imx-drm/ipuv3-crtc.c
index c48f640db006..d6913d2e6f77 100644
--- a/drivers/staging/imx-drm/ipuv3-crtc.c
+++ b/drivers/staging/imx-drm/ipuv3-crtc.c
@@ -30,7 +30,7 @@
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 
-#include "ipu-v3/imx-ipu-v3.h"
+#include <video/imx-ipu-v3.h>
 #include "imx-drm.h"
 #include "ipuv3-plane.h"
 
diff --git a/drivers/staging/imx-drm/ipuv3-plane.c b/drivers/staging/imx-drm/ipuv3-plane.c
index 27a8d735dae0..02b4486435a0 100644
--- a/drivers/staging/imx-drm/ipuv3-plane.c
+++ b/drivers/staging/imx-drm/ipuv3-plane.c
@@ -17,7 +17,7 @@
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 
-#include "ipu-v3/imx-ipu-v3.h"
+#include "video/imx-ipu-v3.h"
 #include "ipuv3-plane.h"
 
 #define to_ipu_plane(x)	container_of(x, struct ipu_plane, base)
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index c7b4f0f927b1..8bf495ffb020 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -20,6 +20,7 @@ source "drivers/char/agp/Kconfig"
 source "drivers/gpu/vga/Kconfig"
 
 source "drivers/gpu/host1x/Kconfig"
+source "drivers/gpu/ipu-v3/Kconfig"
 
 menu "Direct Rendering Manager"
 source "drivers/gpu/drm/Kconfig"
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
new file mode 100644
index 000000000000..c4d14ead5837
--- /dev/null
+++ b/include/video/imx-ipu-v3.h
@@ -0,0 +1,326 @@
+/*
+ * Copyright 2005-2009 Freescale Semiconductor, Inc.
+ *
+ * The code contained herein is licensed under the GNU Lesser General
+ * Public License.  You may obtain a copy of the GNU Lesser General
+ * Public License Version 2.1 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/lgpl-license.html
+ * http://www.gnu.org/copyleft/lgpl.html
+ */
+
+#ifndef __DRM_IPU_H__
+#define __DRM_IPU_H__
+
+#include <linux/types.h>
+#include <linux/videodev2.h>
+#include <linux/bitmap.h>
+#include <linux/fb.h>
+
+struct ipu_soc;
+
+enum ipuv3_type {
+	IPUV3EX,
+	IPUV3M,
+	IPUV3H,
+};
+
+#define IPU_PIX_FMT_GBR24	v4l2_fourcc('G', 'B', 'R', '3')
+
+/*
+ * Bitfield of Display Interface signal polarities.
+ */
+struct ipu_di_signal_cfg {
+	unsigned datamask_en:1;
+	unsigned interlaced:1;
+	unsigned odd_field_first:1;
+	unsigned clksel_en:1;
+	unsigned clkidle_en:1;
+	unsigned data_pol:1;	/* true = inverted */
+	unsigned clk_pol:1;	/* true = rising edge */
+	unsigned enable_pol:1;
+	unsigned Hsync_pol:1;	/* true = active high */
+	unsigned Vsync_pol:1;
+
+	u16 width;
+	u16 height;
+	u32 pixel_fmt;
+	u16 h_start_width;
+	u16 h_sync_width;
+	u16 h_end_width;
+	u16 v_start_width;
+	u16 v_sync_width;
+	u16 v_end_width;
+	u32 v_to_h_sync;
+	unsigned long pixelclock;
+#define IPU_DI_CLKMODE_SYNC	(1 << 0)
+#define IPU_DI_CLKMODE_EXT	(1 << 1)
+	unsigned long clkflags;
+
+	u8 hsync_pin;
+	u8 vsync_pin;
+};
+
+enum ipu_color_space {
+	IPUV3_COLORSPACE_RGB,
+	IPUV3_COLORSPACE_YUV,
+	IPUV3_COLORSPACE_UNKNOWN,
+};
+
+struct ipuv3_channel;
+
+enum ipu_channel_irq {
+	IPU_IRQ_EOF = 0,
+	IPU_IRQ_NFACK = 64,
+	IPU_IRQ_NFB4EOF = 128,
+	IPU_IRQ_EOS = 192,
+};
+
+int ipu_idmac_channel_irq(struct ipu_soc *ipu, struct ipuv3_channel *channel,
+		enum ipu_channel_irq irq);
+
+#define IPU_IRQ_DP_SF_START		(448 + 2)
+#define IPU_IRQ_DP_SF_END		(448 + 3)
+#define IPU_IRQ_BG_SF_END		IPU_IRQ_DP_SF_END,
+#define IPU_IRQ_DC_FC_0			(448 + 8)
+#define IPU_IRQ_DC_FC_1			(448 + 9)
+#define IPU_IRQ_DC_FC_2			(448 + 10)
+#define IPU_IRQ_DC_FC_3			(448 + 11)
+#define IPU_IRQ_DC_FC_4			(448 + 12)
+#define IPU_IRQ_DC_FC_6			(448 + 13)
+#define IPU_IRQ_VSYNC_PRE_0		(448 + 14)
+#define IPU_IRQ_VSYNC_PRE_1		(448 + 15)
+
+/*
+ * IPU Image DMA Controller (idmac) functions
+ */
+struct ipuv3_channel *ipu_idmac_get(struct ipu_soc *ipu, unsigned channel);
+void ipu_idmac_put(struct ipuv3_channel *);
+
+int ipu_idmac_enable_channel(struct ipuv3_channel *channel);
+int ipu_idmac_disable_channel(struct ipuv3_channel *channel);
+int ipu_idmac_wait_busy(struct ipuv3_channel *channel, int ms);
+
+void ipu_idmac_set_double_buffer(struct ipuv3_channel *channel,
+		bool doublebuffer);
+void ipu_idmac_select_buffer(struct ipuv3_channel *channel, u32 buf_num);
+
+/*
+ * IPU Display Controller (dc) functions
+ */
+struct ipu_dc;
+struct ipu_di;
+struct ipu_dc *ipu_dc_get(struct ipu_soc *ipu, int channel);
+void ipu_dc_put(struct ipu_dc *dc);
+int ipu_dc_init_sync(struct ipu_dc *dc, struct ipu_di *di, bool interlaced,
+		u32 pixel_fmt, u32 width);
+void ipu_dc_enable_channel(struct ipu_dc *dc);
+void ipu_dc_disable_channel(struct ipu_dc *dc);
+
+/*
+ * IPU Display Interface (di) functions
+ */
+struct ipu_di *ipu_di_get(struct ipu_soc *ipu, int disp);
+void ipu_di_put(struct ipu_di *);
+int ipu_di_disable(struct ipu_di *);
+int ipu_di_enable(struct ipu_di *);
+int ipu_di_get_num(struct ipu_di *);
+int ipu_di_init_sync_panel(struct ipu_di *, struct ipu_di_signal_cfg *sig);
+
+/*
+ * IPU Display Multi FIFO Controller (dmfc) functions
+ */
+struct dmfc_channel;
+int ipu_dmfc_enable_channel(struct dmfc_channel *dmfc);
+void ipu_dmfc_disable_channel(struct dmfc_channel *dmfc);
+int ipu_dmfc_alloc_bandwidth(struct dmfc_channel *dmfc,
+		unsigned long bandwidth_mbs, int burstsize);
+void ipu_dmfc_free_bandwidth(struct dmfc_channel *dmfc);
+int ipu_dmfc_init_channel(struct dmfc_channel *dmfc, int width);
+struct dmfc_channel *ipu_dmfc_get(struct ipu_soc *ipu, int ipuv3_channel);
+void ipu_dmfc_put(struct dmfc_channel *dmfc);
+
+/*
+ * IPU Display Processor (dp) functions
+ */
+#define IPU_DP_FLOW_SYNC_BG	0
+#define IPU_DP_FLOW_SYNC_FG	1
+#define IPU_DP_FLOW_ASYNC0_BG	2
+#define IPU_DP_FLOW_ASYNC0_FG	3
+#define IPU_DP_FLOW_ASYNC1_BG	4
+#define IPU_DP_FLOW_ASYNC1_FG	5
+
+struct ipu_dp *ipu_dp_get(struct ipu_soc *ipu, unsigned int flow);
+void ipu_dp_put(struct ipu_dp *);
+int ipu_dp_enable_channel(struct ipu_dp *dp);
+void ipu_dp_disable_channel(struct ipu_dp *dp);
+int ipu_dp_setup_channel(struct ipu_dp *dp,
+		enum ipu_color_space in, enum ipu_color_space out);
+int ipu_dp_set_window_pos(struct ipu_dp *, u16 x_pos, u16 y_pos);
+int ipu_dp_set_global_alpha(struct ipu_dp *dp, bool enable, u8 alpha,
+		bool bg_chan);
+
+#define IPU_CPMEM_WORD(word, ofs, size) ((((word) * 160 + (ofs)) << 8) | (size))
+
+#define IPU_FIELD_UBO		IPU_CPMEM_WORD(0, 46, 22)
+#define IPU_FIELD_VBO		IPU_CPMEM_WORD(0, 68, 22)
+#define IPU_FIELD_IOX		IPU_CPMEM_WORD(0, 90, 4)
+#define IPU_FIELD_RDRW		IPU_CPMEM_WORD(0, 94, 1)
+#define IPU_FIELD_SO		IPU_CPMEM_WORD(0, 113, 1)
+#define IPU_FIELD_SLY		IPU_CPMEM_WORD(1, 102, 14)
+#define IPU_FIELD_SLUV		IPU_CPMEM_WORD(1, 128, 14)
+
+#define IPU_FIELD_XV		IPU_CPMEM_WORD(0, 0, 10)
+#define IPU_FIELD_YV		IPU_CPMEM_WORD(0, 10, 9)
+#define IPU_FIELD_XB		IPU_CPMEM_WORD(0, 19, 13)
+#define IPU_FIELD_YB		IPU_CPMEM_WORD(0, 32, 12)
+#define IPU_FIELD_NSB_B		IPU_CPMEM_WORD(0, 44, 1)
+#define IPU_FIELD_CF		IPU_CPMEM_WORD(0, 45, 1)
+#define IPU_FIELD_SX		IPU_CPMEM_WORD(0, 46, 12)
+#define IPU_FIELD_SY		IPU_CPMEM_WORD(0, 58, 11)
+#define IPU_FIELD_NS		IPU_CPMEM_WORD(0, 69, 10)
+#define IPU_FIELD_SDX		IPU_CPMEM_WORD(0, 79, 7)
+#define IPU_FIELD_SM		IPU_CPMEM_WORD(0, 86, 10)
+#define IPU_FIELD_SCC		IPU_CPMEM_WORD(0, 96, 1)
+#define IPU_FIELD_SCE		IPU_CPMEM_WORD(0, 97, 1)
+#define IPU_FIELD_SDY		IPU_CPMEM_WORD(0, 98, 7)
+#define IPU_FIELD_SDRX		IPU_CPMEM_WORD(0, 105, 1)
+#define IPU_FIELD_SDRY		IPU_CPMEM_WORD(0, 106, 1)
+#define IPU_FIELD_BPP		IPU_CPMEM_WORD(0, 107, 3)
+#define IPU_FIELD_DEC_SEL	IPU_CPMEM_WORD(0, 110, 2)
+#define IPU_FIELD_DIM		IPU_CPMEM_WORD(0, 112, 1)
+#define IPU_FIELD_BNDM		IPU_CPMEM_WORD(0, 114, 3)
+#define IPU_FIELD_BM		IPU_CPMEM_WORD(0, 117, 2)
+#define IPU_FIELD_ROT		IPU_CPMEM_WORD(0, 119, 1)
+#define IPU_FIELD_HF		IPU_CPMEM_WORD(0, 120, 1)
+#define IPU_FIELD_VF		IPU_CPMEM_WORD(0, 121, 1)
+#define IPU_FIELD_THE		IPU_CPMEM_WORD(0, 122, 1)
+#define IPU_FIELD_CAP		IPU_CPMEM_WORD(0, 123, 1)
+#define IPU_FIELD_CAE		IPU_CPMEM_WORD(0, 124, 1)
+#define IPU_FIELD_FW		IPU_CPMEM_WORD(0, 125, 13)
+#define IPU_FIELD_FH		IPU_CPMEM_WORD(0, 138, 12)
+#define IPU_FIELD_EBA0		IPU_CPMEM_WORD(1, 0, 29)
+#define IPU_FIELD_EBA1		IPU_CPMEM_WORD(1, 29, 29)
+#define IPU_FIELD_ILO		IPU_CPMEM_WORD(1, 58, 20)
+#define IPU_FIELD_NPB		IPU_CPMEM_WORD(1, 78, 7)
+#define IPU_FIELD_PFS		IPU_CPMEM_WORD(1, 85, 4)
+#define IPU_FIELD_ALU		IPU_CPMEM_WORD(1, 89, 1)
+#define IPU_FIELD_ALBM		IPU_CPMEM_WORD(1, 90, 3)
+#define IPU_FIELD_ID		IPU_CPMEM_WORD(1, 93, 2)
+#define IPU_FIELD_TH		IPU_CPMEM_WORD(1, 95, 7)
+#define IPU_FIELD_SL		IPU_CPMEM_WORD(1, 102, 14)
+#define IPU_FIELD_WID0		IPU_CPMEM_WORD(1, 116, 3)
+#define IPU_FIELD_WID1		IPU_CPMEM_WORD(1, 119, 3)
+#define IPU_FIELD_WID2		IPU_CPMEM_WORD(1, 122, 3)
+#define IPU_FIELD_WID3		IPU_CPMEM_WORD(1, 125, 3)
+#define IPU_FIELD_OFS0		IPU_CPMEM_WORD(1, 128, 5)
+#define IPU_FIELD_OFS1		IPU_CPMEM_WORD(1, 133, 5)
+#define IPU_FIELD_OFS2		IPU_CPMEM_WORD(1, 138, 5)
+#define IPU_FIELD_OFS3		IPU_CPMEM_WORD(1, 143, 5)
+#define IPU_FIELD_SXYS		IPU_CPMEM_WORD(1, 148, 1)
+#define IPU_FIELD_CRE		IPU_CPMEM_WORD(1, 149, 1)
+#define IPU_FIELD_DEC_SEL2	IPU_CPMEM_WORD(1, 150, 1)
+
+struct ipu_cpmem_word {
+	u32 data[5];
+	u32 res[3];
+};
+
+struct ipu_ch_param {
+	struct ipu_cpmem_word word[2];
+};
+
+void ipu_ch_param_write_field(struct ipu_ch_param __iomem *base, u32 wbs, u32 v);
+u32 ipu_ch_param_read_field(struct ipu_ch_param __iomem *base, u32 wbs);
+struct ipu_ch_param __iomem *ipu_get_cpmem(struct ipuv3_channel *channel);
+void ipu_ch_param_dump(struct ipu_ch_param __iomem *p);
+
+static inline void ipu_ch_param_zero(struct ipu_ch_param __iomem *p)
+{
+	int i;
+	void __iomem *base = p;
+
+	for (i = 0; i < sizeof(*p) / sizeof(u32); i++)
+		writel(0, base + i * sizeof(u32));
+}
+
+static inline void ipu_cpmem_set_buffer(struct ipu_ch_param __iomem *p,
+		int bufnum, dma_addr_t buf)
+{
+	if (bufnum)
+		ipu_ch_param_write_field(p, IPU_FIELD_EBA1, buf >> 3);
+	else
+		ipu_ch_param_write_field(p, IPU_FIELD_EBA0, buf >> 3);
+}
+
+static inline void ipu_cpmem_set_resolution(struct ipu_ch_param __iomem *p,
+		int xres, int yres)
+{
+	ipu_ch_param_write_field(p, IPU_FIELD_FW, xres - 1);
+	ipu_ch_param_write_field(p, IPU_FIELD_FH, yres - 1);
+}
+
+static inline void ipu_cpmem_set_stride(struct ipu_ch_param __iomem *p,
+		int stride)
+{
+	ipu_ch_param_write_field(p, IPU_FIELD_SLY, stride - 1);
+}
+
+void ipu_cpmem_set_high_priority(struct ipuv3_channel *channel);
+
+struct ipu_rgb {
+	struct fb_bitfield	red;
+	struct fb_bitfield	green;
+	struct fb_bitfield	blue;
+	struct fb_bitfield	transp;
+	int			bits_per_pixel;
+};
+
+struct ipu_image {
+	struct v4l2_pix_format pix;
+	struct v4l2_rect rect;
+	dma_addr_t phys;
+};
+
+int ipu_cpmem_set_format_passthrough(struct ipu_ch_param __iomem *p,
+		int width);
+
+int ipu_cpmem_set_format_rgb(struct ipu_ch_param __iomem *,
+		const struct ipu_rgb *rgb);
+
+static inline void ipu_cpmem_interlaced_scan(struct ipu_ch_param *p,
+		int stride)
+{
+	ipu_ch_param_write_field(p, IPU_FIELD_SO, 1);
+	ipu_ch_param_write_field(p, IPU_FIELD_ILO, stride / 8);
+	ipu_ch_param_write_field(p, IPU_FIELD_SLY, (stride * 2) - 1);
+};
+
+void ipu_cpmem_set_yuv_planar(struct ipu_ch_param __iomem *p, u32 pixel_format,
+			int stride, int height);
+void ipu_cpmem_set_yuv_interleaved(struct ipu_ch_param __iomem *p,
+				   u32 pixel_format);
+void ipu_cpmem_set_yuv_planar_full(struct ipu_ch_param __iomem *p,
+		u32 pixel_format, int stride, int u_offset, int v_offset);
+int ipu_cpmem_set_fmt(struct ipu_ch_param __iomem *cpmem, u32 pixelformat);
+int ipu_cpmem_set_image(struct ipu_ch_param __iomem *cpmem,
+		struct ipu_image *image);
+
+enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc);
+enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat);
+
+static inline void ipu_cpmem_set_burstsize(struct ipu_ch_param __iomem *p,
+		int burstsize)
+{
+	ipu_ch_param_write_field(p, IPU_FIELD_NPB, burstsize - 1);
+};
+
+struct ipu_client_platformdata {
+	int di;
+	int dc;
+	int dp;
+	int dmfc;
+	int dma[2];
+};
+
+#endif /* __DRM_IPU_H__ */
-- 
cgit v1.2.3-71-gd317


From 35de925ffaa67971e073d3ebf1e0600be0d0d3f1 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 9 May 2012 16:59:01 +0200
Subject: gpu: ipu-v3: Add SMFC code

The Sensor Multi Fifo Controller (SMFC) is used as a buffer between
the two CSIs (writing simultaneously) and up to four IDMAC channels.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/Makefile     |  2 +-
 drivers/gpu/ipu-v3/ipu-common.c | 10 +++++
 drivers/gpu/ipu-v3/ipu-prv.h    |  6 +++
 drivers/gpu/ipu-v3/ipu-smfc.c   | 97 +++++++++++++++++++++++++++++++++++++++++
 include/video/imx-ipu-v3.h      |  6 +++
 5 files changed, 120 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/ipu-v3/ipu-smfc.c

(limited to 'include')

diff --git a/drivers/gpu/ipu-v3/Makefile b/drivers/gpu/ipu-v3/Makefile
index d21cc37d0498..1887972b4ac2 100644
--- a/drivers/gpu/ipu-v3/Makefile
+++ b/drivers/gpu/ipu-v3/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_IMX_IPUV3_CORE) += imx-ipu-v3.o
 
-imx-ipu-v3-objs := ipu-common.o ipu-dc.o ipu-di.o ipu-dp.o ipu-dmfc.o
+imx-ipu-v3-objs := ipu-common.o ipu-dc.o ipu-di.o ipu-dp.o ipu-dmfc.o ipu-smfc.o
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index 7e1f614e7fbc..2aecc474c478 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -874,8 +874,17 @@ static int ipu_submodules_init(struct ipu_soc *ipu,
 		goto err_dp;
 	}
 
+	ret = ipu_smfc_init(ipu, dev, ipu_base +
+			devtype->cm_ofs + IPU_CM_SMFC_REG_OFS);
+	if (ret) {
+		unit = "smfc";
+		goto err_smfc;
+	}
+
 	return 0;
 
+err_smfc:
+	ipu_dp_exit(ipu);
 err_dp:
 	ipu_dmfc_exit(ipu);
 err_dmfc:
@@ -947,6 +956,7 @@ EXPORT_SYMBOL_GPL(ipu_idmac_channel_irq);
 
 static void ipu_submodules_exit(struct ipu_soc *ipu)
 {
+	ipu_smfc_exit(ipu);
 	ipu_dp_exit(ipu);
 	ipu_dmfc_exit(ipu);
 	ipu_dc_exit(ipu);
diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h
index 5cb075fdd48c..acf181183f0b 100644
--- a/drivers/gpu/ipu-v3/ipu-prv.h
+++ b/drivers/gpu/ipu-v3/ipu-prv.h
@@ -151,6 +151,8 @@ struct ipuv3_channel {
 struct ipu_dc_priv;
 struct ipu_dmfc_priv;
 struct ipu_di;
+struct ipu_smfc_priv;
+
 struct ipu_devtype;
 
 struct ipu_soc {
@@ -178,6 +180,7 @@ struct ipu_soc {
 	struct ipu_dp_priv	*dp_priv;
 	struct ipu_dmfc_priv	*dmfc_priv;
 	struct ipu_di		*di_priv[2];
+	struct ipu_smfc_priv	*smfc_priv;
 };
 
 void ipu_srm_dp_sync_update(struct ipu_soc *ipu);
@@ -203,4 +206,7 @@ void ipu_dc_exit(struct ipu_soc *ipu);
 int ipu_cpmem_init(struct ipu_soc *ipu, struct device *dev, unsigned long base);
 void ipu_cpmem_exit(struct ipu_soc *ipu);
 
+int ipu_smfc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base);
+void ipu_smfc_exit(struct ipu_soc *ipu);
+
 #endif				/* __IPU_PRV_H__ */
diff --git a/drivers/gpu/ipu-v3/ipu-smfc.c b/drivers/gpu/ipu-v3/ipu-smfc.c
new file mode 100644
index 000000000000..e4f85ad286fc
--- /dev/null
+++ b/drivers/gpu/ipu-v3/ipu-smfc.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2008-2010 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+#define DEBUG
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <video/imx-ipu-v3.h>
+
+#include "ipu-prv.h"
+
+struct ipu_smfc_priv {
+	void __iomem *base;
+	spinlock_t lock;
+};
+
+/*SMFC Registers */
+#define SMFC_MAP	0x0000
+#define SMFC_WMC	0x0004
+#define SMFC_BS		0x0008
+
+int ipu_smfc_set_burstsize(struct ipu_soc *ipu, int channel, int burstsize)
+{
+	struct ipu_smfc_priv *smfc = ipu->smfc_priv;
+	unsigned long flags;
+	u32 val, shift;
+
+	spin_lock_irqsave(&smfc->lock, flags);
+
+	shift = channel * 4;
+	val = readl(smfc->base + SMFC_BS);
+	val &= ~(0xf << shift);
+	val |= burstsize << shift;
+	writel(val, smfc->base + SMFC_BS);
+
+	spin_unlock_irqrestore(&smfc->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_smfc_set_burstsize);
+
+int ipu_smfc_map_channel(struct ipu_soc *ipu, int channel, int csi_id, int mipi_id)
+{
+	struct ipu_smfc_priv *smfc = ipu->smfc_priv;
+	unsigned long flags;
+	u32 val, shift;
+
+	spin_lock_irqsave(&smfc->lock, flags);
+
+	shift = channel * 3;
+	val = readl(smfc->base + SMFC_MAP);
+	val &= ~(0x7 << shift);
+	val |= ((csi_id << 2) | mipi_id) << shift;
+	writel(val, smfc->base + SMFC_MAP);
+
+	spin_unlock_irqrestore(&smfc->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ipu_smfc_map_channel);
+
+int ipu_smfc_init(struct ipu_soc *ipu, struct device *dev,
+		  unsigned long base)
+{
+	struct ipu_smfc_priv *smfc;
+
+	smfc = devm_kzalloc(dev, sizeof(*smfc), GFP_KERNEL);
+	if (!smfc)
+		return -ENOMEM;
+
+	ipu->smfc_priv = smfc;
+	spin_lock_init(&smfc->lock);
+
+	smfc->base = devm_ioremap(dev, base, PAGE_SIZE);
+	if (!smfc->base)
+		return -ENOMEM;
+
+	pr_debug("%s: ioremap 0x%08lx -> %p\n", __func__, base, smfc->base);
+
+	return 0;
+}
+
+void ipu_smfc_exit(struct ipu_soc *ipu)
+{
+}
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index c4d14ead5837..e639387eee3f 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -160,6 +160,12 @@ int ipu_dp_set_window_pos(struct ipu_dp *, u16 x_pos, u16 y_pos);
 int ipu_dp_set_global_alpha(struct ipu_dp *dp, bool enable, u8 alpha,
 		bool bg_chan);
 
+/*
+ * IPU Sensor Multiple FIFO Controller (SMFC) functions
+ */
+int ipu_smfc_map_channel(struct ipu_soc *ipu, int channel, int csi_id, int mipi_id);
+int ipu_smfc_set_burstsize(struct ipu_soc *ipu, int channel, int burstsize);
+
 #define IPU_CPMEM_WORD(word, ofs, size) ((((word) * 160 + (ofs)) << 8) | (size))
 
 #define IPU_FIELD_UBO		IPU_CPMEM_WORD(0, 46, 22)
-- 
cgit v1.2.3-71-gd317


From e90460970fde8bdccf2147e899cb9953943e16d2 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 16 May 2012 17:28:29 +0200
Subject: gpu: ipu-v3: Add ipu_idmac_get_current_buffer function

This function returns the currently active buffer (0 or 1)
of a double buffered IDMAC channel. It is to be used by the
CSI driver.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/ipu-common.c | 9 +++++++++
 include/video/imx-ipu-v3.h      | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index 2aecc474c478..9459f4091a27 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -661,6 +661,15 @@ int ipu_module_disable(struct ipu_soc *ipu, u32 mask)
 }
 EXPORT_SYMBOL_GPL(ipu_module_disable);
 
+int ipu_idmac_get_current_buffer(struct ipuv3_channel *channel)
+{
+	struct ipu_soc *ipu = channel->ipu;
+	unsigned int chno = channel->num;
+
+	return (ipu_cm_read(ipu, IPU_CHA_CUR_BUF(chno)) & idma_mask(chno)) ? 1 : 0;
+}
+EXPORT_SYMBOL_GPL(ipu_idmac_get_current_buffer);
+
 void ipu_idmac_select_buffer(struct ipuv3_channel *channel, u32 buf_num)
 {
 	struct ipu_soc *ipu = channel->ipu;
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index e639387eee3f..f3714301d740 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -103,6 +103,7 @@ int ipu_idmac_wait_busy(struct ipuv3_channel *channel, int ms);
 
 void ipu_idmac_set_double_buffer(struct ipuv3_channel *channel,
 		bool doublebuffer);
+int ipu_idmac_get_current_buffer(struct ipuv3_channel *channel);
 void ipu_idmac_select_buffer(struct ipuv3_channel *channel, u32 buf_num);
 
 /*
-- 
cgit v1.2.3-71-gd317


From 3f5a8a946d860b9022a23bf1ed5ab76f6fdd7e6e Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Tue, 22 May 2012 17:08:48 +0200
Subject: gpu: ipu-v3: Add CSI and SMFC module enable wrappers

IPU_CONF_..._EN bits are implementation details, not to be made public.
Add wrappers around ipu_module_enable/disable, so the CSI V4L2 driver
can enable/disable the CSI and SMFC modules.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/ipu-common.c | 24 ++++++++++++++++++++++++
 include/video/imx-ipu-v3.h      |  8 ++++++++
 2 files changed, 32 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index 9459f4091a27..1442b9e1cf8d 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -661,6 +661,30 @@ int ipu_module_disable(struct ipu_soc *ipu, u32 mask)
 }
 EXPORT_SYMBOL_GPL(ipu_module_disable);
 
+int ipu_csi_enable(struct ipu_soc *ipu, int csi)
+{
+	return ipu_module_enable(ipu, csi ? IPU_CONF_CSI1_EN : IPU_CONF_CSI0_EN);
+}
+EXPORT_SYMBOL_GPL(ipu_csi_enable);
+
+int ipu_csi_disable(struct ipu_soc *ipu, int csi)
+{
+	return ipu_module_disable(ipu, csi ? IPU_CONF_CSI1_EN : IPU_CONF_CSI0_EN);
+}
+EXPORT_SYMBOL_GPL(ipu_csi_disable);
+
+int ipu_smfc_enable(struct ipu_soc *ipu)
+{
+	return ipu_module_enable(ipu, IPU_CONF_SMFC_EN);
+}
+EXPORT_SYMBOL_GPL(ipu_smfc_enable);
+
+int ipu_smfc_disable(struct ipu_soc *ipu)
+{
+	return ipu_module_disable(ipu, IPU_CONF_SMFC_EN);
+}
+EXPORT_SYMBOL_GPL(ipu_smfc_disable);
+
 int ipu_idmac_get_current_buffer(struct ipuv3_channel *channel)
 {
 	struct ipu_soc *ipu = channel->ipu;
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index f3714301d740..fe6053e6ae6d 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -161,9 +161,17 @@ int ipu_dp_set_window_pos(struct ipu_dp *, u16 x_pos, u16 y_pos);
 int ipu_dp_set_global_alpha(struct ipu_dp *dp, bool enable, u8 alpha,
 		bool bg_chan);
 
+/*
+ * IPU CMOS Sensor Interface (csi) functions
+ */
+int ipu_csi_enable(struct ipu_soc *ipu, int csi);
+int ipu_csi_disable(struct ipu_soc *ipu, int csi);
+
 /*
  * IPU Sensor Multiple FIFO Controller (SMFC) functions
  */
+int ipu_smfc_enable(struct ipu_soc *ipu);
+int ipu_smfc_disable(struct ipu_soc *ipu);
 int ipu_smfc_map_channel(struct ipu_soc *ipu, int channel, int csi_id, int mipi_id);
 int ipu_smfc_set_burstsize(struct ipu_soc *ipu, int channel, int burstsize);
 
-- 
cgit v1.2.3-71-gd317


From d6ca8ca7ec555bdd3372687d0d775c837a09ff6e Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 23 May 2012 17:08:19 +0200
Subject: gpu: ipu-v3: Register the CSI modules

This patch registers the two CSI platform devices per IPU.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/ipu-common.c | 37 ++++++++++++++++++++++++++++++++-----
 include/video/imx-ipu-v3.h      |  1 +
 2 files changed, 33 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index 1442b9e1cf8d..719788ce7d9f 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -1014,6 +1014,7 @@ static void platform_device_unregister_children(struct platform_device *pdev)
 struct ipu_platform_reg {
 	struct ipu_client_platformdata pdata;
 	const char *name;
+	int reg_offset;
 };
 
 static const struct ipu_platform_reg client_reg[] = {
@@ -1035,13 +1036,29 @@ static const struct ipu_platform_reg client_reg[] = {
 			.dma[1] = -EINVAL,
 		},
 		.name = "imx-ipuv3-crtc",
+	}, {
+		.pdata = {
+			.csi = 0,
+			.dma[0] = IPUV3_CHANNEL_CSI0,
+			.dma[1] = -EINVAL,
+		},
+		.reg_offset = IPU_CM_CSI0_REG_OFS,
+		.name = "imx-ipuv3-camera",
+	}, {
+		.pdata = {
+			.csi = 1,
+			.dma[0] = IPUV3_CHANNEL_CSI1,
+			.dma[1] = -EINVAL,
+		},
+		.reg_offset = IPU_CM_CSI1_REG_OFS,
+		.name = "imx-ipuv3-camera",
 	},
 };
 
 static DEFINE_MUTEX(ipu_client_id_mutex);
 static int ipu_client_id;
 
-static int ipu_add_client_devices(struct ipu_soc *ipu)
+static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
 {
 	struct device *dev = ipu->dev;
 	unsigned i;
@@ -1055,9 +1072,19 @@ static int ipu_add_client_devices(struct ipu_soc *ipu)
 	for (i = 0; i < ARRAY_SIZE(client_reg); i++) {
 		const struct ipu_platform_reg *reg = &client_reg[i];
 		struct platform_device *pdev;
-
-		pdev = platform_device_register_data(dev, reg->name,
-			id++, &reg->pdata, sizeof(reg->pdata));
+		struct resource res;
+
+		if (reg->reg_offset) {
+			memset(&res, 0, sizeof(res));
+			res.flags = IORESOURCE_MEM;
+			res.start = ipu_base + ipu->devtype->cm_ofs + reg->reg_offset;
+			res.end = res.start + PAGE_SIZE - 1;
+			pdev = platform_device_register_resndata(dev, reg->name,
+				id++, &res, 1, &reg->pdata, sizeof(reg->pdata));
+		} else {
+			pdev = platform_device_register_data(dev, reg->name,
+				id++, &reg->pdata, sizeof(reg->pdata));
+		}
 
 		if (IS_ERR(pdev))
 			goto err_register;
@@ -1253,7 +1280,7 @@ static int ipu_probe(struct platform_device *pdev)
 	if (ret)
 		goto failed_submodules_init;
 
-	ret = ipu_add_client_devices(ipu);
+	ret = ipu_add_client_devices(ipu, ipu_base);
 	if (ret) {
 		dev_err(&pdev->dev, "adding client devices failed with %d\n",
 				ret);
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index fe6053e6ae6d..61d6d25caf95 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -331,6 +331,7 @@ static inline void ipu_cpmem_set_burstsize(struct ipu_ch_param __iomem *p,
 };
 
 struct ipu_client_platformdata {
+	int csi;
 	int di;
 	int dc;
 	int dp;
-- 
cgit v1.2.3-71-gd317


From 4f4cf5ad6fc1b16dc8dc9d750bb80b35eba5e98d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 May 2014 10:34:49 -0400
Subject: SUNRPC: Move congestion window constants to header file

I would like to use one of the RPC client's congestion algorithm
constants in transport-specific code.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xprt.h |  6 ++++++
 net/sunrpc/xprt.c           | 28 +++++++++-------------------
 2 files changed, 15 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3e5efb2b236e..5903d2c0ab4d 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -24,6 +24,12 @@
 #define RPC_MAX_SLOT_TABLE_LIMIT	(65536U)
 #define RPC_MAX_SLOT_TABLE	RPC_MAX_SLOT_TABLE_LIMIT
 
+#define RPC_CWNDSHIFT		(8U)
+#define RPC_CWNDSCALE		(1U << RPC_CWNDSHIFT)
+#define RPC_INITCWND		RPC_CWNDSCALE
+#define RPC_MAXCWND(xprt)	((xprt)->max_reqs << RPC_CWNDSHIFT)
+#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
+
 /*
  * This describes a timeout strategy
  */
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d173f79947c6..2d1d5a643b95 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -71,24 +71,6 @@ static void	 xprt_destroy(struct rpc_xprt *xprt);
 static DEFINE_SPINLOCK(xprt_list_lock);
 static LIST_HEAD(xprt_list);
 
-/*
- * The transport code maintains an estimate on the maximum number of out-
- * standing RPC requests, using a smoothed version of the congestion
- * avoidance implemented in 44BSD. This is basically the Van Jacobson
- * congestion algorithm: If a retransmit occurs, the congestion window is
- * halved; otherwise, it is incremented by 1/cwnd when
- *
- *	-	a reply is received and
- *	-	a full number of requests are outstanding and
- *	-	the congestion window hasn't been updated recently.
- */
-#define RPC_CWNDSHIFT		(8U)
-#define RPC_CWNDSCALE		(1U << RPC_CWNDSHIFT)
-#define RPC_INITCWND		RPC_CWNDSCALE
-#define RPC_MAXCWND(xprt)	((xprt)->max_reqs << RPC_CWNDSHIFT)
-
-#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
-
 /**
  * xprt_register_transport - register a transport implementation
  * @transport: transport to register
@@ -446,7 +428,15 @@ EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
  * @task: recently completed RPC request used to adjust window
  * @result: result code of completed RPC request
  *
- * We use a time-smoothed congestion estimator to avoid heavy oscillation.
+ * The transport code maintains an estimate on the maximum number of out-
+ * standing RPC requests, using a smoothed version of the congestion
+ * avoidance implemented in 44BSD. This is basically the Van Jacobson
+ * congestion algorithm: If a retransmit occurs, the congestion window is
+ * halved; otherwise, it is incremented by 1/cwnd when
+ *
+ *	-	a reply is received and
+ *	-	a full number of requests are outstanding and
+ *	-	the congestion window hasn't been updated recently.
  */
 void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result)
 {
-- 
cgit v1.2.3-71-gd317


From 11f0431be2f99c574a65c6dfc0ca205511500f29 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 3 Jun 2014 10:30:28 -0400
Subject: dm: remove symbol export for dm_set_device_limits

There is no need for code other than DM core to use dm_set_device_limits
so remove its EXPORT_SYMBOL_GPL.  Also, cleanup a couple whitespace nits.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-table.c         | 5 ++---
 drivers/md/dm.c               | 1 -
 include/linux/device-mapper.h | 8 +-------
 3 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 50601ec7017a..5f59f1e3e5b1 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -465,8 +465,8 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
 }
 EXPORT_SYMBOL(dm_get_device);
 
-int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
-			 sector_t start, sector_t len, void *data)
+static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
+				sector_t start, sector_t len, void *data)
 {
 	struct queue_limits *limits = data;
 	struct block_device *bdev = dev->bdev;
@@ -499,7 +499,6 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
 					  (unsigned int) (PAGE_SIZE >> 9));
 	return 0;
 }
-EXPORT_SYMBOL_GPL(dm_set_device_limits);
 
 /*
  * Decrement a device's use count and remove it if necessary.
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3234a753a80d..bf1a1eaad9a9 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1498,7 +1498,6 @@ static int dm_merge_bvec(struct request_queue *q,
 	 * just one page.
 	 */
 	else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
-
 		max_size = 0;
 
 out:
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 0adca299f238..e1707de043ae 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -115,12 +115,6 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
 
 void dm_error(const char *message);
 
-/*
- * Combine device limits.
- */
-int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
-			 sector_t start, sector_t len, void *data);
-
 struct dm_dev {
 	struct block_device *bdev;
 	fmode_t mode;
@@ -132,7 +126,7 @@ struct dm_dev {
  * are opened/closed correctly.
  */
 int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
-						 struct dm_dev **result);
+		  struct dm_dev **result);
 void dm_put_device(struct dm_target *ti, struct dm_dev *d);
 
 /*
-- 
cgit v1.2.3-71-gd317


From b14903e10a06347234b387f7364f86aa07252d9f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 4 Jun 2014 16:46:00 +0200
Subject: regulator: add regulator_can_change_voltage stub

When CONFIG_REGULATOR is not set, we cannot call
regulator_can_change_voltage() from a device driver, which results
in a build error like

video/fbdev/omap2/dss/hdmi5.c: In function 'hdmi_init_regulator':
video/fbdev/omap2/dss/hdmi5.c:149:2: error: implicit declaration of function 'regulator_can_change_voltage' [-Werror=implicit-function-declaration]

even for drivers that don't require the regulator API normally.
Such a use was recently added in the omap2+ hdmi driver.

This avoids the problem by adding a static inline function
stub in the API header, as we have for most of the other
regulator functions as well.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Mark Brown <broonie@kernel.org>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/consumer.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index e530681bea70..d60b92a7fc25 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -391,6 +391,11 @@ static inline void regulator_bulk_free(int num_consumers,
 {
 }
 
+static inline int regulator_can_change_voltage(struct regulator *regulator)
+{
+	return 0;
+}
+
 static inline int regulator_set_voltage(struct regulator *regulator,
 					int min_uV, int max_uV)
 {
-- 
cgit v1.2.3-71-gd317


From 8385fd841468868e0b37a722530d75b0e8bfc5a8 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Wed, 4 Jun 2014 10:00:16 -0700
Subject: IB/core: Fix sparse warnings about redeclared functions

Fix a few functions that are declared with __attribute_const__ in the
ib_verbs.h header file but defined without it in verbs.c.  This gets rid
of the following sparse warnings:

    drivers/infiniband/core/verbs.c:51:5: error: symbol 'ib_rate_to_mult' redeclared with different type (originally declared at include/rdma/ib_verbs.h:469) - different modifiers
    drivers/infiniband/core/verbs.c:68:14: error: symbol 'mult_to_ib_rate' redeclared with different type (originally declared at include/rdma/ib_verbs.h:607) - different modifiers
    drivers/infiniband/core/verbs.c:85:5: error: symbol 'ib_rate_to_mbps' redeclared with different type (originally declared at include/rdma/ib_verbs.h:476) - different modifiers
    drivers/infiniband/core/verbs.c:111:1: error: symbol 'rdma_node_get_transport' redeclared with different type (originally declared at include/rdma/ib_verbs.h:84) - different modifiers

Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/core/verbs.c |  8 ++++----
 include/rdma/ib_verbs.h         | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 92525f855d82..c2b89cc5dbca 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -48,7 +48,7 @@
 
 #include "core_priv.h"
 
-int ib_rate_to_mult(enum ib_rate rate)
+__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
 {
 	switch (rate) {
 	case IB_RATE_2_5_GBPS: return  1;
@@ -65,7 +65,7 @@ int ib_rate_to_mult(enum ib_rate rate)
 }
 EXPORT_SYMBOL(ib_rate_to_mult);
 
-enum ib_rate mult_to_ib_rate(int mult)
+__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
 {
 	switch (mult) {
 	case 1:  return IB_RATE_2_5_GBPS;
@@ -82,7 +82,7 @@ enum ib_rate mult_to_ib_rate(int mult)
 }
 EXPORT_SYMBOL(mult_to_ib_rate);
 
-int ib_rate_to_mbps(enum ib_rate rate)
+__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
 {
 	switch (rate) {
 	case IB_RATE_2_5_GBPS: return 2500;
@@ -107,7 +107,7 @@ int ib_rate_to_mbps(enum ib_rate rate)
 }
 EXPORT_SYMBOL(ib_rate_to_mbps);
 
-enum rdma_transport_type
+__attribute_const__ enum rdma_transport_type
 rdma_node_get_transport(enum rdma_node_type node_type)
 {
 	switch (node_type) {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index acd825182977..e9da1bc89d25 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -80,8 +80,8 @@ enum rdma_transport_type {
 	RDMA_TRANSPORT_USNIC_UDP
 };
 
-enum rdma_transport_type
-rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__;
+__attribute_const__ enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type);
 
 enum rdma_link_layer {
 	IB_LINK_LAYER_UNSPECIFIED,
@@ -466,14 +466,14 @@ enum ib_rate {
  * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
  * @rate: rate to convert.
  */
-int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
+__attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
 
 /**
  * ib_rate_to_mbps - Convert the IB rate enum to Mbps.
  * For example, IB_RATE_2_5_GBPS will be converted to 2500.
  * @rate: rate to convert.
  */
-int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__;
+__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
 
 enum ib_mr_create_flags {
 	IB_MR_SIGNATURE_EN = 1,
@@ -604,7 +604,7 @@ struct ib_mr_status {
  * enum.
  * @mult: multiple to convert.
  */
-enum ib_rate mult_to_ib_rate(int mult) __attribute_const__;
+__attribute_const__ enum ib_rate mult_to_ib_rate(int mult);
 
 struct ib_ah_attr {
 	struct ib_global_route	grh;
-- 
cgit v1.2.3-71-gd317


From beba4bb096201ceec0e8cfb7ce3172a53015bdaf Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 4 Jun 2014 14:29:33 -0400
Subject: tracing: Add __get_dynamic_array_len() macro for trace events

If a trace event uses a dynamic array for something other than a string
then there's currently no way the TP_printk() can figure out what size
it is. A __get_dynamic_array_len() is required to know the length.

This also simplifies the __get_bitmask() macro which required it as well,
but instead just hardcoded it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 9b7a989dcbcc..0fd06fef9fac 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -203,6 +203,10 @@
 #define __get_dynamic_array(field)	\
 		((void *)__entry + (__entry->__data_loc_##field & 0xffff))
 
+#undef __get_dynamic_array_len
+#define __get_dynamic_array_len(field)	\
+		((__entry->__data_loc_##field >> 16) & 0xffff)
+
 #undef __get_str
 #define __get_str(field) (char *)__get_dynamic_array(field)
 
@@ -211,7 +215,7 @@
 	({								\
 		void *__bitmask = __get_dynamic_array(field);		\
 		unsigned int __bitmask_size;				\
-		__bitmask_size = (__entry->__data_loc_##field >> 16) & 0xffff; \
+		__bitmask_size = __get_dynamic_array_len(field);	\
 		ftrace_print_bitmask_seq(p, __bitmask, __bitmask_size);	\
 	})
 
@@ -636,6 +640,7 @@ static inline void ftrace_test_probe_##call(void)			\
 #undef __print_symbolic
 #undef __print_hex
 #undef __get_dynamic_array
+#undef __get_dynamic_array_len
 #undef __get_str
 #undef __get_bitmask
 
@@ -700,6 +705,10 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
 #define __get_dynamic_array(field)	\
 		((void *)__entry + (__entry->__data_loc_##field & 0xffff))
 
+#undef __get_dynamic_array_len
+#define __get_dynamic_array_len(field)	\
+		((__entry->__data_loc_##field >> 16) & 0xffff)
+
 #undef __get_str
 #define __get_str(field) (char *)__get_dynamic_array(field)
 
-- 
cgit v1.2.3-71-gd317


From a2deb8b1e7a403fd63e08f87eb95217dd983e0d3 Mon Sep 17 00:00:00 2001
From: "Andrew J. Bennieston" <andrew.bennieston@citrix.com>
Date: Wed, 4 Jun 2014 10:30:46 +0100
Subject: xen-net{back, front}: Document multi-queue feature in netif.h

Document the multi-queue feature in terms of XenStore keys to be written
by the backend and by the frontend.

Signed-off-by: Andrew J. Bennieston <andrew.bennieston@citrix.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/xen/interface/io/netif.h | 53 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

(limited to 'include')

diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
index c50061db6098..70054cc0708d 100644
--- a/include/xen/interface/io/netif.h
+++ b/include/xen/interface/io/netif.h
@@ -50,6 +50,59 @@
  * node as before.
  */
 
+/*
+ * Multiple transmit and receive queues:
+ * If supported, the backend will write the key "multi-queue-max-queues" to
+ * the directory for that vif, and set its value to the maximum supported
+ * number of queues.
+ * Frontends that are aware of this feature and wish to use it can write the
+ * key "multi-queue-num-queues", set to the number they wish to use, which
+ * must be greater than zero, and no more than the value reported by the backend
+ * in "multi-queue-max-queues".
+ *
+ * Queues replicate the shared rings and event channels.
+ * "feature-split-event-channels" may optionally be used when using
+ * multiple queues, but is not mandatory.
+ *
+ * Each queue consists of one shared ring pair, i.e. there must be the same
+ * number of tx and rx rings.
+ *
+ * For frontends requesting just one queue, the usual event-channel and
+ * ring-ref keys are written as before, simplifying the backend processing
+ * to avoid distinguishing between a frontend that doesn't understand the
+ * multi-queue feature, and one that does, but requested only one queue.
+ *
+ * Frontends requesting two or more queues must not write the toplevel
+ * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys,
+ * instead writing those keys under sub-keys having the name "queue-N" where
+ * N is the integer ID of the queue for which those keys belong. Queues
+ * are indexed from zero. For example, a frontend with two queues and split
+ * event channels must write the following set of queue-related keys:
+ *
+ * /local/domain/1/device/vif/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vif/0/queue-0 = ""
+ * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>"
+ * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>"
+ * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>"
+ * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>"
+ * /local/domain/1/device/vif/0/queue-1 = ""
+ * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>"
+ * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1"
+ * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>"
+ * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>"
+ *
+ * If there is any inconsistency in the XenStore data, the backend may
+ * choose not to connect any queues, instead treating the request as an
+ * error. This includes scenarios where more (or fewer) queues were
+ * requested than the frontend provided details for.
+ *
+ * Mapping of packets to queues is considered to be a function of the
+ * transmitting system (backend or frontend) and is not negotiated
+ * between the two. Guests are free to transmit packets on any queue
+ * they choose, provided it has been set up correctly. Guests must be
+ * prepared to receive packets on any queue they have requested be set up.
+ */
+
 /*
  * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum
  * offload off or on. If it is missing then the feature is assumed to be on.
-- 
cgit v1.2.3-71-gd317


From 4f71d0cb76339a10fd445b0b281acc45c71b6271 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 4 Jun 2014 16:02:28 +1000
Subject: drm/dp: add a hw mutex around the transfer functions. (v2)

This should avoid races between connector probing and HPD
irqs in the future, currently mode_config.mutex blocks this
possibility.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_dp_helper.c      | 21 ++++++++++++++-------
 drivers/gpu/drm/i915/intel_dp.c      | 10 +++++-----
 drivers/gpu/drm/radeon/atombios_dp.c |  5 +++--
 drivers/gpu/drm/radeon/radeon_i2c.c  |  2 +-
 drivers/gpu/drm/tegra/dpaux.c        |  4 ++--
 include/drm/drm_dp_helper.h          |  7 ++++---
 6 files changed, 29 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 494219ccdf96..08e33b8b13a4 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -216,7 +216,7 @@ i2c_dp_aux_prepare_bus(struct i2c_adapter *adapter)
  *
  * IMPORTANT:
  * This interface is deprecated, please switch to the new dp aux helpers and
- * drm_dp_aux_register_i2c_bus().
+ * drm_dp_aux_register().
  */
 int
 i2c_dp_aux_add_bus(struct i2c_adapter *adapter)
@@ -382,7 +382,10 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
 	 * transactions.
 	 */
 	for (retry = 0; retry < 7; retry++) {
+
+		mutex_lock(&aux->hw_mutex);
 		err = aux->transfer(aux, &msg);
+		mutex_unlock(&aux->hw_mutex);
 		if (err < 0) {
 			if (err == -EBUSY)
 				continue;
@@ -596,7 +599,9 @@ static int drm_dp_i2c_do_msg(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
 	 * before giving up the AUX transaction.
 	 */
 	for (retry = 0; retry < 7; retry++) {
+		mutex_lock(&aux->hw_mutex);
 		err = aux->transfer(aux, msg);
+		mutex_unlock(&aux->hw_mutex);
 		if (err < 0) {
 			if (err == -EBUSY)
 				continue;
@@ -729,13 +734,15 @@ static const struct i2c_algorithm drm_dp_i2c_algo = {
 };
 
 /**
- * drm_dp_aux_register_i2c_bus() - register an I2C adapter for I2C-over-AUX
+ * drm_dp_aux_register() - initialise and register aux channel
  * @aux: DisplayPort AUX channel
  *
  * Returns 0 on success or a negative error code on failure.
  */
-int drm_dp_aux_register_i2c_bus(struct drm_dp_aux *aux)
+int drm_dp_aux_register(struct drm_dp_aux *aux)
 {
+	mutex_init(&aux->hw_mutex);
+
 	aux->ddc.algo = &drm_dp_i2c_algo;
 	aux->ddc.algo_data = aux;
 	aux->ddc.retries = 3;
@@ -750,14 +757,14 @@ int drm_dp_aux_register_i2c_bus(struct drm_dp_aux *aux)
 
 	return i2c_add_adapter(&aux->ddc);
 }
-EXPORT_SYMBOL(drm_dp_aux_register_i2c_bus);
+EXPORT_SYMBOL(drm_dp_aux_register);
 
 /**
- * drm_dp_aux_unregister_i2c_bus() - unregister an I2C-over-AUX adapter
+ * drm_dp_aux_unregister() - unregister an AUX adapter
  * @aux: DisplayPort AUX channel
  */
-void drm_dp_aux_unregister_i2c_bus(struct drm_dp_aux *aux)
+void drm_dp_aux_unregister(struct drm_dp_aux *aux)
 {
 	i2c_del_adapter(&aux->ddc);
 }
-EXPORT_SYMBOL(drm_dp_aux_unregister_i2c_bus);
+EXPORT_SYMBOL(drm_dp_aux_unregister);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index c4d883921282..d01bb430b5bc 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -702,9 +702,9 @@ intel_dp_aux_init(struct intel_dp *intel_dp, struct intel_connector *connector)
 	DRM_DEBUG_KMS("registering %s bus for %s\n", name,
 		      connector->base.kdev->kobj.name);
 
-	ret = drm_dp_aux_register_i2c_bus(&intel_dp->aux);
+	ret = drm_dp_aux_register(&intel_dp->aux);
 	if (ret < 0) {
-		DRM_ERROR("drm_dp_aux_register_i2c_bus() for %s failed (%d)\n",
+		DRM_ERROR("drm_dp_aux_register() for %s failed (%d)\n",
 			  name, ret);
 		return;
 	}
@@ -714,7 +714,7 @@ intel_dp_aux_init(struct intel_dp *intel_dp, struct intel_connector *connector)
 				intel_dp->aux.ddc.dev.kobj.name);
 	if (ret < 0) {
 		DRM_ERROR("sysfs_create_link() for %s failed (%d)\n", name, ret);
-		drm_dp_aux_unregister_i2c_bus(&intel_dp->aux);
+		drm_dp_aux_unregister(&intel_dp->aux);
 	}
 }
 
@@ -3662,7 +3662,7 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 
-	drm_dp_aux_unregister_i2c_bus(&intel_dp->aux);
+	drm_dp_aux_unregister(&intel_dp->aux);
 	drm_encoder_cleanup(encoder);
 	if (is_edp(intel_dp)) {
 		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
@@ -4244,7 +4244,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 	intel_dp->psr_setup_done = false;
 
 	if (!intel_edp_init_connector(intel_dp, intel_connector, &power_seq)) {
-		drm_dp_aux_unregister_i2c_bus(&intel_dp->aux);
+		drm_dp_aux_unregister(&intel_dp->aux);
 		if (is_edp(intel_dp)) {
 			cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
 			mutex_lock(&dev->mode_config.connection_mutex);
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 225f6c66effa..a54c44181a0f 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -222,11 +222,12 @@ void radeon_dp_aux_init(struct radeon_connector *radeon_connector)
 	radeon_connector->ddc_bus->rec.hpd = radeon_connector->hpd.hpd;
 	radeon_connector->ddc_bus->aux.dev = radeon_connector->base.kdev;
 	radeon_connector->ddc_bus->aux.transfer = radeon_dp_aux_transfer;
-	ret = drm_dp_aux_register_i2c_bus(&radeon_connector->ddc_bus->aux);
+
+	ret = drm_dp_aux_register(&radeon_connector->ddc_bus->aux);
 	if (!ret)
 		radeon_connector->ddc_bus->has_aux = true;
 
-	WARN(ret, "drm_dp_aux_register_i2c_bus() failed with error %d\n", ret);
+	WARN(ret, "drm_dp_aux_register() failed with error %d\n", ret);
 }
 
 /***** general DP utility functions *****/
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index 427ee4d6d0b5..add622008407 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -988,7 +988,7 @@ void radeon_i2c_destroy(struct radeon_i2c_chan *i2c)
 		return;
 	i2c_del_adapter(&i2c->adapter);
 	if (i2c->has_aux)
-		drm_dp_aux_unregister_i2c_bus(&i2c->aux);
+		drm_dp_aux_unregister(&i2c->aux);
 	kfree(i2c);
 }
 
diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index 005c19bd92df..2b725ba7facc 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -332,7 +332,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	dpaux->aux.transfer = tegra_dpaux_transfer;
 	dpaux->aux.dev = &pdev->dev;
 
-	err = drm_dp_aux_register_i2c_bus(&dpaux->aux);
+	err = drm_dp_aux_register(&dpaux->aux);
 	if (err < 0)
 		return err;
 
@@ -355,7 +355,7 @@ static int tegra_dpaux_remove(struct platform_device *pdev)
 {
 	struct tegra_dpaux *dpaux = platform_get_drvdata(pdev);
 
-	drm_dp_aux_unregister_i2c_bus(&dpaux->aux);
+	drm_dp_aux_unregister(&dpaux->aux);
 
 	mutex_lock(&dpaux_lock);
 	list_del(&dpaux->list);
diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index 488b41635dec..a21568bf1514 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -514,6 +514,7 @@ struct drm_dp_aux_msg {
  * @name: user-visible name of this AUX channel and the I2C-over-AUX adapter
  * @ddc: I2C adapter that can be used for I2C-over-AUX communication
  * @dev: pointer to struct device that is the parent for this AUX channel
+ * @hw_mutex: internal mutex used for locking transfers
  * @transfer: transfers a message representing a single AUX transaction
  *
  * The .dev field should be set to a pointer to the device that implements
@@ -546,7 +547,7 @@ struct drm_dp_aux {
 	const char *name;
 	struct i2c_adapter ddc;
 	struct device *dev;
-
+	struct mutex hw_mutex;
 	ssize_t (*transfer)(struct drm_dp_aux *aux,
 			    struct drm_dp_aux_msg *msg);
 };
@@ -605,7 +606,7 @@ int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link);
 int drm_dp_link_power_up(struct drm_dp_aux *aux, struct drm_dp_link *link);
 int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link);
 
-int drm_dp_aux_register_i2c_bus(struct drm_dp_aux *aux);
-void drm_dp_aux_unregister_i2c_bus(struct drm_dp_aux *aux);
+int drm_dp_aux_register(struct drm_dp_aux *aux);
+void drm_dp_aux_unregister(struct drm_dp_aux *aux);
 
 #endif /* _DRM_DP_HELPER_H_ */
-- 
cgit v1.2.3-71-gd317


From 51fd371bbaf94018a1223b4e2cf20b9880fd92d4 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Tue, 19 Nov 2013 12:10:12 -0500
Subject: drm: convert crtc and connection_mutex to ww_mutex (v5)

For atomic, it will be quite necessary to not need to care so much
about locking order.  And 'state' gives us a convenient place to stash a
ww_ctx for any sort of update that needs to grab multiple crtc locks.

Because we will want to eventually make locking even more fine grained
(giving locks to planes, connectors, etc), split out drm_modeset_lock
and drm_modeset_acquire_ctx to track acquired locks.

Atomic will use this to keep track of which locks have been acquired
in a transaction.

v1: original
v2: remove a few things not needed until atomic, for now
v3: update for v3 of connection_mutex patch..
v4: squash in docbook
v5: doc tweaks/fixes

Signed-off-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 Documentation/DocBook/drm.tmpl        |   6 +
 drivers/gpu/drm/Makefile              |   3 +-
 drivers/gpu/drm/drm_crtc.c            |  85 +++++++++---
 drivers/gpu/drm/drm_crtc_helper.c     |   3 +-
 drivers/gpu/drm/drm_fb_helper.c       |   4 +
 drivers/gpu/drm/drm_modeset_lock.c    | 247 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/drm_plane_helper.c    |   2 +-
 drivers/gpu/drm/i915/intel_crt.c      |   5 +-
 drivers/gpu/drm/i915/intel_display.c  |  56 +++++---
 drivers/gpu/drm/i915/intel_dp.c       |  14 +-
 drivers/gpu/drm/i915/intel_drv.h      |   6 +-
 drivers/gpu/drm/i915/intel_opregion.c |   4 +-
 drivers/gpu/drm/i915/intel_overlay.c  |   4 +-
 drivers/gpu/drm/i915/intel_panel.c    |   8 +-
 drivers/gpu/drm/i915/intel_sprite.c   |   2 +-
 drivers/gpu/drm/i915/intel_tv.c       |   5 +-
 drivers/gpu/drm/omapdrm/omap_crtc.c   |  10 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c   |   8 +-
 include/drm/drmP.h                    |   5 -
 include/drm/drm_crtc.h                |  15 ++-
 include/drm/drm_modeset_lock.h        | 126 +++++++++++++++++
 21 files changed, 531 insertions(+), 87 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_modeset_lock.c
 create mode 100644 include/drm/drm_modeset_lock.h

(limited to 'include')

diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 00f1c25e53df..efef63717ef6 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -1791,6 +1791,12 @@ void intel_crt_init(struct drm_device *dev)
       <title>KMS API Functions</title>
 !Edrivers/gpu/drm/drm_crtc.c
     </sect2>
+    <sect2>
+      <title>KMS Locking</title>
+!Pdrivers/gpu/drm/drm_modeset_lock.c kms locking
+!Iinclude/drm/drm_modeset_lock.h
+!Edrivers/gpu/drm/drm_modeset_lock.c
+    </sect2>
   </sect1>
 
   <!-- Internals: kms helper functions -->
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 863db8415c22..dd2ba4269740 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -13,7 +13,8 @@ drm-y       :=	drm_auth.o drm_buffer.o drm_bufs.o drm_cache.o \
 		drm_crtc.o drm_modes.o drm_edid.o \
 		drm_info.o drm_debugfs.o drm_encoder_slave.o \
 		drm_trace_points.o drm_global.o drm_prime.o \
-		drm_rect.o drm_vma_manager.o drm_flip_work.o
+		drm_rect.o drm_vma_manager.o drm_flip_work.o \
+		drm_modeset_lock.o
 
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 drm-$(CONFIG_DRM_GEM_CMA_HELPER) += drm_gem_cma_helper.o
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index f3b98d4b6f46..43735f38cd17 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -37,6 +37,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_lock.h>
 
 #include "drm_crtc_internal.h"
 
@@ -50,14 +51,42 @@
  */
 void drm_modeset_lock_all(struct drm_device *dev)
 {
-	struct drm_crtc *crtc;
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_modeset_acquire_ctx *ctx;
+	int ret;
 
-	mutex_lock(&dev->mode_config.mutex);
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (WARN_ON(!ctx))
+		return;
 
-	mutex_lock(&dev->mode_config.connection_mutex);
+	mutex_lock(&config->mutex);
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		mutex_lock_nest_lock(&crtc->mutex, &dev->mode_config.mutex);
+	drm_modeset_acquire_init(ctx, 0);
+
+retry:
+	ret = drm_modeset_lock(&config->connection_mutex, ctx);
+	if (ret)
+		goto fail;
+	ret = drm_modeset_lock_all_crtcs(dev, ctx);
+	if (ret)
+		goto fail;
+
+	WARN_ON(config->acquire_ctx);
+
+	/* now we hold the locks, so now that it is safe, stash the
+	 * ctx for drm_modeset_unlock_all():
+	 */
+	config->acquire_ctx = ctx;
+
+	drm_warn_on_modeset_not_all_locked(dev);
+
+	return;
+
+fail:
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(ctx);
+		goto retry;
+	}
 }
 EXPORT_SYMBOL(drm_modeset_lock_all);
 
@@ -69,12 +98,17 @@ EXPORT_SYMBOL(drm_modeset_lock_all);
  */
 void drm_modeset_unlock_all(struct drm_device *dev)
 {
-	struct drm_crtc *crtc;
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_modeset_acquire_ctx *ctx = config->acquire_ctx;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		mutex_unlock(&crtc->mutex);
+	if (WARN_ON(!ctx))
+		return;
+
+	config->acquire_ctx = NULL;
+	drm_modeset_drop_locks(ctx);
+	drm_modeset_acquire_fini(ctx);
 
-	mutex_unlock(&dev->mode_config.connection_mutex);
+	kfree(ctx);
 
 	mutex_unlock(&dev->mode_config.mutex);
 }
@@ -95,9 +129,9 @@ void drm_warn_on_modeset_not_all_locked(struct drm_device *dev)
 		return;
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		WARN_ON(!mutex_is_locked(&crtc->mutex));
+		WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
 
-	WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
 }
 EXPORT_SYMBOL(drm_warn_on_modeset_not_all_locked);
@@ -671,6 +705,8 @@ void drm_framebuffer_remove(struct drm_framebuffer *fb)
 }
 EXPORT_SYMBOL(drm_framebuffer_remove);
 
+DEFINE_WW_CLASS(crtc_ww_class);
+
 /**
  * drm_crtc_init_with_planes - Initialise a new CRTC object with
  *    specified primary and cursor planes.
@@ -690,6 +726,7 @@ int drm_crtc_init_with_planes(struct drm_device *dev, struct drm_crtc *crtc,
 			      void *cursor,
 			      const struct drm_crtc_funcs *funcs)
 {
+	struct drm_mode_config *config = &dev->mode_config;
 	int ret;
 
 	crtc->dev = dev;
@@ -697,8 +734,9 @@ int drm_crtc_init_with_planes(struct drm_device *dev, struct drm_crtc *crtc,
 	crtc->invert_dimensions = false;
 
 	drm_modeset_lock_all(dev);
-	mutex_init(&crtc->mutex);
-	mutex_lock_nest_lock(&crtc->mutex, &dev->mode_config.mutex);
+	drm_modeset_lock_init(&crtc->mutex);
+	/* dropped by _unlock_all(): */
+	drm_modeset_lock(&crtc->mutex, config->acquire_ctx);
 
 	ret = drm_mode_object_get(dev, &crtc->base, DRM_MODE_OBJECT_CRTC);
 	if (ret)
@@ -706,8 +744,8 @@ int drm_crtc_init_with_planes(struct drm_device *dev, struct drm_crtc *crtc,
 
 	crtc->base.properties = &crtc->properties;
 
-	list_add_tail(&crtc->head, &dev->mode_config.crtc_list);
-	dev->mode_config.num_crtc++;
+	list_add_tail(&crtc->head, &config->crtc_list);
+	config->num_crtc++;
 
 	crtc->primary = primary;
 	if (primary)
@@ -735,6 +773,8 @@ void drm_crtc_cleanup(struct drm_crtc *crtc)
 	kfree(crtc->gamma_store);
 	crtc->gamma_store = NULL;
 
+	drm_modeset_lock_fini(&crtc->mutex);
+
 	drm_mode_object_put(dev, &crtc->base);
 	list_del(&crtc->head);
 	dev->mode_config.num_crtc--;
@@ -1798,7 +1838,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 	DRM_DEBUG_KMS("[CONNECTOR:%d:?]\n", out_resp->connector_id);
 
 	mutex_lock(&dev->mode_config.mutex);
-	mutex_lock(&dev->mode_config.connection_mutex);
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 
 	connector = drm_connector_find(dev, out_resp->connector_id);
 	if (!connector) {
@@ -1897,7 +1937,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 	out_resp->count_encoders = encoders_count;
 
 out:
-	mutex_unlock(&dev->mode_config.connection_mutex);
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
 	mutex_unlock(&dev->mode_config.mutex);
 
 	return ret;
@@ -2481,7 +2521,7 @@ static int drm_mode_cursor_common(struct drm_device *dev,
 		return -ENOENT;
 	}
 
-	mutex_lock(&crtc->mutex);
+	drm_modeset_lock(&crtc->mutex, NULL);
 	if (req->flags & DRM_MODE_CURSOR_BO) {
 		if (!crtc->funcs->cursor_set && !crtc->funcs->cursor_set2) {
 			ret = -ENXIO;
@@ -2505,7 +2545,7 @@ static int drm_mode_cursor_common(struct drm_device *dev,
 		}
 	}
 out:
-	mutex_unlock(&crtc->mutex);
+	drm_modeset_unlock(&crtc->mutex);
 
 	return ret;
 
@@ -4198,7 +4238,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 	if (!crtc)
 		return -ENOENT;
 
-	mutex_lock(&crtc->mutex);
+	drm_modeset_lock(&crtc->mutex, NULL);
 	if (crtc->primary->fb == NULL) {
 		/* The framebuffer is currently unbound, presumably
 		 * due to a hotplug event, that userspace has not
@@ -4282,7 +4322,7 @@ out:
 		drm_framebuffer_unreference(fb);
 	if (old_fb)
 		drm_framebuffer_unreference(old_fb);
-	mutex_unlock(&crtc->mutex);
+	drm_modeset_unlock(&crtc->mutex);
 
 	return ret;
 }
@@ -4647,7 +4687,7 @@ EXPORT_SYMBOL(drm_format_vert_chroma_subsampling);
 void drm_mode_config_init(struct drm_device *dev)
 {
 	mutex_init(&dev->mode_config.mutex);
-	mutex_init(&dev->mode_config.connection_mutex);
+	drm_modeset_lock_init(&dev->mode_config.connection_mutex);
 	mutex_init(&dev->mode_config.idr_mutex);
 	mutex_init(&dev->mode_config.fb_lock);
 	INIT_LIST_HEAD(&dev->mode_config.fb_list);
@@ -4747,5 +4787,6 @@ void drm_mode_config_cleanup(struct drm_device *dev)
 	}
 
 	idr_destroy(&dev->mode_config.crtc_idr);
+	drm_modeset_lock_fini(&dev->mode_config.connection_mutex);
 }
 EXPORT_SYMBOL(drm_mode_config_cleanup);
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index b55d27c872f6..eb1c062e04b2 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -89,8 +89,7 @@ bool drm_helper_encoder_in_use(struct drm_encoder *encoder)
 	struct drm_device *dev = encoder->dev;
 
 	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
-	WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
-
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
 		if (connector->encoder == encoder)
 			return true;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 1ddc17464250..43329cee299f 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -331,6 +331,10 @@ static bool drm_fb_helper_force_kernel_mode(void)
 		if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 			continue;
 
+		/* NOTE: we use lockless flag below to avoid grabbing other
+		 * modeset locks.  So just trylock the underlying mutex
+		 * directly:
+		 */
 		if (!mutex_trylock(&dev->mode_config.mutex)) {
 			error = true;
 			continue;
diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
new file mode 100644
index 000000000000..7c2497dea1e9
--- /dev/null
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 2014 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_modeset_lock.h>
+
+/**
+ * DOC: kms locking
+ *
+ * As KMS moves toward more fine grained locking, and atomic ioctl where
+ * userspace can indirectly control locking order, it becomes necessary
+ * to use ww_mutex and acquire-contexts to avoid deadlocks.  But because
+ * the locking is more distributed around the driver code, we want a bit
+ * of extra utility/tracking out of our acquire-ctx.  This is provided
+ * by drm_modeset_lock / drm_modeset_acquire_ctx.
+ *
+ * For basic principles of ww_mutex, see: Documentation/ww-mutex-design.txt
+ *
+ * The basic usage pattern is to:
+ *
+ *     drm_modeset_acquire_init(&ctx)
+ *   retry:
+ *     foreach (lock in random_ordered_set_of_locks) {
+ *       ret = drm_modeset_lock(lock, &ctx)
+ *       if (ret == -EDEADLK) {
+ *          drm_modeset_backoff(&ctx);
+ *          goto retry;
+ *       }
+ *     }
+ *
+ *     ... do stuff ...
+ *
+ *     drm_modeset_drop_locks(&ctx);
+ *     drm_modeset_acquire_fini(&ctx);
+ */
+
+
+/**
+ * drm_modeset_acquire_init - initialize acquire context
+ * @ctx: the acquire context
+ * @flags: for future
+ */
+void drm_modeset_acquire_init(struct drm_modeset_acquire_ctx *ctx,
+		uint32_t flags)
+{
+	ww_acquire_init(&ctx->ww_ctx, &crtc_ww_class);
+	INIT_LIST_HEAD(&ctx->locked);
+}
+EXPORT_SYMBOL(drm_modeset_acquire_init);
+
+/**
+ * drm_modeset_acquire_fini - cleanup acquire context
+ * @ctx: the acquire context
+ */
+void drm_modeset_acquire_fini(struct drm_modeset_acquire_ctx *ctx)
+{
+	ww_acquire_fini(&ctx->ww_ctx);
+}
+EXPORT_SYMBOL(drm_modeset_acquire_fini);
+
+/**
+ * drm_modeset_drop_locks - drop all locks
+ * @ctx: the acquire context
+ *
+ * Drop all locks currently held against this acquire context.
+ */
+void drm_modeset_drop_locks(struct drm_modeset_acquire_ctx *ctx)
+{
+	WARN_ON(ctx->contended);
+	while (!list_empty(&ctx->locked)) {
+		struct drm_modeset_lock *lock;
+
+		lock = list_first_entry(&ctx->locked,
+				struct drm_modeset_lock, head);
+
+		drm_modeset_unlock(lock);
+	}
+}
+EXPORT_SYMBOL(drm_modeset_drop_locks);
+
+static inline int modeset_lock(struct drm_modeset_lock *lock,
+		struct drm_modeset_acquire_ctx *ctx,
+		bool interruptible, bool slow)
+{
+	int ret;
+
+	WARN_ON(ctx->contended);
+
+	if (interruptible && slow) {
+		ret = ww_mutex_lock_slow_interruptible(&lock->mutex, &ctx->ww_ctx);
+	} else if (interruptible) {
+		ret = ww_mutex_lock_interruptible(&lock->mutex, &ctx->ww_ctx);
+	} else if (slow) {
+		ww_mutex_lock_slow(&lock->mutex, &ctx->ww_ctx);
+		ret = 0;
+	} else {
+		ret = ww_mutex_lock(&lock->mutex, &ctx->ww_ctx);
+	}
+	if (!ret) {
+		WARN_ON(!list_empty(&lock->head));
+		list_add(&lock->head, &ctx->locked);
+	} else if (ret == -EALREADY) {
+		/* we already hold the lock.. this is fine.  For atomic
+		 * we will need to be able to drm_modeset_lock() things
+		 * without having to keep track of what is already locked
+		 * or not.
+		 */
+		ret = 0;
+	} else if (ret == -EDEADLK) {
+		ctx->contended = lock;
+	}
+
+	return ret;
+}
+
+static int modeset_backoff(struct drm_modeset_acquire_ctx *ctx,
+		bool interruptible)
+{
+	struct drm_modeset_lock *contended = ctx->contended;
+
+	ctx->contended = NULL;
+
+	if (WARN_ON(!contended))
+		return 0;
+
+	drm_modeset_drop_locks(ctx);
+
+	return modeset_lock(contended, ctx, interruptible, true);
+}
+
+/**
+ * drm_modeset_backoff - deadlock avoidance backoff
+ * @ctx: the acquire context
+ *
+ * If deadlock is detected (ie. drm_modeset_lock() returns -EDEADLK),
+ * you must call this function to drop all currently held locks and
+ * block until the contended lock becomes available.
+ */
+void drm_modeset_backoff(struct drm_modeset_acquire_ctx *ctx)
+{
+	modeset_backoff(ctx, false);
+}
+EXPORT_SYMBOL(drm_modeset_backoff);
+
+/**
+ * drm_modeset_backoff_interruptible - deadlock avoidance backoff
+ * @ctx: the acquire context
+ *
+ * Interruptible version of drm_modeset_backoff()
+ */
+int drm_modeset_backoff_interruptible(struct drm_modeset_acquire_ctx *ctx)
+{
+	return modeset_backoff(ctx, true);
+}
+EXPORT_SYMBOL(drm_modeset_backoff_interruptible);
+
+/**
+ * drm_modeset_lock - take modeset lock
+ * @lock: lock to take
+ * @ctx: acquire ctx
+ *
+ * If ctx is not NULL, then its ww acquire context is used and the
+ * lock will be tracked by the context and can be released by calling
+ * drm_modeset_drop_locks().  If -EDEADLK is returned, this means a
+ * deadlock scenario has been detected and it is an error to attempt
+ * to take any more locks without first calling drm_modeset_backoff().
+ */
+int drm_modeset_lock(struct drm_modeset_lock *lock,
+		struct drm_modeset_acquire_ctx *ctx)
+{
+	if (ctx)
+		return modeset_lock(lock, ctx, false, false);
+
+	ww_mutex_lock(&lock->mutex, NULL);
+	return 0;
+}
+EXPORT_SYMBOL(drm_modeset_lock);
+
+/**
+ * drm_modeset_lock_interruptible - take modeset lock
+ * @lock: lock to take
+ * @ctx: acquire ctx
+ *
+ * Interruptible version of drm_modeset_lock()
+ */
+int drm_modeset_lock_interruptible(struct drm_modeset_lock *lock,
+		struct drm_modeset_acquire_ctx *ctx)
+{
+	if (ctx)
+		return modeset_lock(lock, ctx, true, false);
+
+	return ww_mutex_lock_interruptible(&lock->mutex, NULL);
+}
+EXPORT_SYMBOL(drm_modeset_lock_interruptible);
+
+/**
+ * drm_modeset_unlock - drop modeset lock
+ * @lock: lock to release
+ */
+void drm_modeset_unlock(struct drm_modeset_lock *lock)
+{
+	list_del_init(&lock->head);
+	ww_mutex_unlock(&lock->mutex);
+}
+EXPORT_SYMBOL(drm_modeset_unlock);
+
+/* Temporary.. until we have sufficiently fine grained locking, there
+ * are a couple scenarios where it is convenient to grab all crtc locks.
+ * It is planned to remove this:
+ */
+int drm_modeset_lock_all_crtcs(struct drm_device *dev,
+		struct drm_modeset_acquire_ctx *ctx)
+{
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_crtc *crtc;
+	int ret = 0;
+
+	list_for_each_entry(crtc, &config->crtc_list, head) {
+		ret = drm_modeset_lock(&crtc->mutex, ctx);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_modeset_lock_all_crtcs);
diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c
index f3e0a23abf4e..1b15643b4586 100644
--- a/drivers/gpu/drm/drm_plane_helper.c
+++ b/drivers/gpu/drm/drm_plane_helper.c
@@ -60,7 +60,7 @@ static int get_connectors_for_crtc(struct drm_crtc *crtc,
 	 * need to grab the connection_mutex here to be able to make these
 	 * checks.
 	 */
-	WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
 		if (connector->encoder && connector->encoder->crtc == crtc) {
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 1fc91df58296..5a045d3bd77e 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -630,6 +630,7 @@ intel_crt_detect(struct drm_connector *connector, bool force)
 	enum intel_display_power_domain power_domain;
 	enum drm_connector_status status;
 	struct intel_load_detect_pipe tmp;
+	struct drm_modeset_acquire_ctx ctx;
 
 	intel_runtime_pm_get(dev_priv);
 
@@ -673,12 +674,12 @@ intel_crt_detect(struct drm_connector *connector, bool force)
 	}
 
 	/* for pre-945g platforms use load detect */
-	if (intel_get_load_detect_pipe(connector, NULL, &tmp)) {
+	if (intel_get_load_detect_pipe(connector, NULL, &tmp, &ctx)) {
 		if (intel_crt_detect_ddc(connector))
 			status = connector_status_connected;
 		else
 			status = intel_crt_load_detect(crt);
-		intel_release_load_detect_pipe(connector, &tmp);
+		intel_release_load_detect_pipe(connector, &tmp, &ctx);
 	} else
 		status = connector_status_unknown;
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c2976ade823f..1ce4ad4626e4 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2576,7 +2576,7 @@ void intel_display_handle_reset(struct drm_device *dev)
 	for_each_crtc(dev, crtc) {
 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
-		mutex_lock(&crtc->mutex);
+		drm_modeset_lock(&crtc->mutex, NULL);
 		/*
 		 * FIXME: Once we have proper support for primary planes (and
 		 * disabling them without disabling the entire crtc) allow again
@@ -2587,7 +2587,7 @@ void intel_display_handle_reset(struct drm_device *dev)
 							       crtc->primary->fb,
 							       crtc->x,
 							       crtc->y);
-		mutex_unlock(&crtc->mutex);
+		drm_modeset_unlock(&crtc->mutex);
 	}
 }
 
@@ -8307,7 +8307,8 @@ mode_fits_in_fbdev(struct drm_device *dev,
 
 bool intel_get_load_detect_pipe(struct drm_connector *connector,
 				struct drm_display_mode *mode,
-				struct intel_load_detect_pipe *old)
+				struct intel_load_detect_pipe *old,
+				struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_crtc *intel_crtc;
 	struct intel_encoder *intel_encoder =
@@ -8317,13 +8318,19 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
 	struct drm_crtc *crtc = NULL;
 	struct drm_device *dev = encoder->dev;
 	struct drm_framebuffer *fb;
-	int i = -1;
+	struct drm_mode_config *config = &dev->mode_config;
+	int ret, i = -1;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n",
 		      connector->base.id, connector->name,
 		      encoder->base.id, encoder->name);
 
-	mutex_lock(&dev->mode_config.connection_mutex);
+	drm_modeset_acquire_init(ctx, 0);
+
+retry:
+	ret = drm_modeset_lock(&config->connection_mutex, ctx);
+	if (ret)
+		goto fail_unlock;
 
 	/*
 	 * Algorithm gets a little messy:
@@ -8339,7 +8346,9 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
 	if (encoder->crtc) {
 		crtc = encoder->crtc;
 
-		mutex_lock(&crtc->mutex);
+		ret = drm_modeset_lock(&crtc->mutex, ctx);
+		if (ret)
+			goto fail_unlock;
 
 		old->dpms_mode = connector->dpms;
 		old->load_detect_temp = false;
@@ -8367,10 +8376,12 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
 	 */
 	if (!crtc) {
 		DRM_DEBUG_KMS("no pipe available for load-detect\n");
-		goto fail_unlock_connector;
+		goto fail_unlock;
 	}
 
-	mutex_lock(&crtc->mutex);
+	ret = drm_modeset_lock(&crtc->mutex, ctx);
+	if (ret)
+		goto fail_unlock;
 	intel_encoder->new_crtc = to_intel_crtc(crtc);
 	to_intel_connector(connector)->new_encoder = intel_encoder;
 
@@ -8420,15 +8431,21 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
 		intel_crtc->new_config = &intel_crtc->config;
 	else
 		intel_crtc->new_config = NULL;
-	mutex_unlock(&crtc->mutex);
-fail_unlock_connector:
-	mutex_unlock(&dev->mode_config.connection_mutex);
+fail_unlock:
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(ctx);
+		goto retry;
+	}
+
+	drm_modeset_drop_locks(ctx);
+	drm_modeset_acquire_fini(ctx);
 
 	return false;
 }
 
 void intel_release_load_detect_pipe(struct drm_connector *connector,
-				    struct intel_load_detect_pipe *old)
+				    struct intel_load_detect_pipe *old,
+				    struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_encoder *intel_encoder =
 		intel_attached_encoder(connector);
@@ -8452,8 +8469,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
 			drm_framebuffer_unreference(old->release_fb);
 		}
 
-		mutex_unlock(&crtc->mutex);
-		mutex_unlock(&connector->dev->mode_config.connection_mutex);
+		goto unlock;
 		return;
 	}
 
@@ -8461,8 +8477,9 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
 	if (old->dpms_mode != DRM_MODE_DPMS_ON)
 		connector->funcs->dpms(connector, old->dpms_mode);
 
-	mutex_unlock(&crtc->mutex);
-	mutex_unlock(&connector->dev->mode_config.connection_mutex);
+unlock:
+	drm_modeset_drop_locks(ctx);
+	drm_modeset_acquire_fini(ctx);
 }
 
 static int i9xx_pll_refclk(struct drm_device *dev,
@@ -10995,7 +11012,7 @@ enum pipe intel_get_pipe_from_connector(struct intel_connector *connector)
 	struct drm_encoder *encoder = connector->base.encoder;
 	struct drm_device *dev = connector->base.dev;
 
-	WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 
 	if (!encoder)
 		return INVALID_PIPE;
@@ -11805,6 +11822,7 @@ static void intel_enable_pipe_a(struct drm_device *dev)
 	struct intel_connector *connector;
 	struct drm_connector *crt = NULL;
 	struct intel_load_detect_pipe load_detect_temp;
+	struct drm_modeset_acquire_ctx ctx;
 
 	/* We can't just switch on the pipe A, we need to set things up with a
 	 * proper mode and output configuration. As a gross hack, enable pipe A
@@ -11821,8 +11839,8 @@ static void intel_enable_pipe_a(struct drm_device *dev)
 	if (!crt)
 		return;
 
-	if (intel_get_load_detect_pipe(crt, NULL, &load_detect_temp))
-		intel_release_load_detect_pipe(crt, &load_detect_temp);
+	if (intel_get_load_detect_pipe(crt, NULL, &load_detect_temp, &ctx))
+		intel_release_load_detect_pipe(crt, &load_detect_temp, &ctx);
 
 
 }
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index d01bb430b5bc..2d5d9b010073 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1154,7 +1154,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
 	u32 pp;
 	u32 pp_stat_reg, pp_ctrl_reg;
 
-	WARN_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 
 	if (!intel_dp->want_panel_vdd && edp_have_panel_vdd(intel_dp)) {
 		struct intel_digital_port *intel_dig_port =
@@ -1191,9 +1191,9 @@ static void edp_panel_vdd_work(struct work_struct *__work)
 						 struct intel_dp, panel_vdd_work);
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 
-	mutex_lock(&dev->mode_config.connection_mutex);
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 	edp_panel_vdd_off_sync(intel_dp);
-	mutex_unlock(&dev->mode_config.connection_mutex);
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
 }
 
 static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync)
@@ -3666,9 +3666,9 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 	drm_encoder_cleanup(encoder);
 	if (is_edp(intel_dp)) {
 		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
-		mutex_lock(&dev->mode_config.connection_mutex);
+		drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 		edp_panel_vdd_off_sync(intel_dp);
-		mutex_unlock(&dev->mode_config.connection_mutex);
+		drm_modeset_unlock(&dev->mode_config.connection_mutex);
 	}
 	kfree(intel_dig_port);
 }
@@ -4247,9 +4247,9 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 		drm_dp_aux_unregister(&intel_dp->aux);
 		if (is_edp(intel_dp)) {
 			cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
-			mutex_lock(&dev->mode_config.connection_mutex);
+			drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 			edp_panel_vdd_off_sync(intel_dp);
-			mutex_unlock(&dev->mode_config.connection_mutex);
+			drm_modeset_unlock(&dev->mode_config.connection_mutex);
 		}
 		drm_sysfs_connector_remove(connector);
 		drm_connector_cleanup(connector);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index f1d5897c96cd..0de04983501e 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -748,9 +748,11 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
 			 struct intel_digital_port *dport);
 bool intel_get_load_detect_pipe(struct drm_connector *connector,
 				struct drm_display_mode *mode,
-				struct intel_load_detect_pipe *old);
+				struct intel_load_detect_pipe *old,
+				struct drm_modeset_acquire_ctx *ctx);
 void intel_release_load_detect_pipe(struct drm_connector *connector,
-				    struct intel_load_detect_pipe *old);
+				    struct intel_load_detect_pipe *old,
+				    struct drm_modeset_acquire_ctx *ctx);
 int intel_pin_and_fence_fb_obj(struct drm_device *dev,
 			       struct drm_i915_gem_object *obj,
 			       struct intel_engine_cs *pipelined);
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index b812e9d39f38..2e2c71fcc9ed 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -410,7 +410,7 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
 	if (bclp > 255)
 		return ASLC_BACKLIGHT_FAILED;
 
-	mutex_lock(&dev->mode_config.connection_mutex);
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 
 	/*
 	 * Update backlight on all connectors that support backlight (usually
@@ -421,7 +421,7 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
 		intel_panel_set_backlight(intel_connector, bclp, 255);
 	iowrite32(DIV_ROUND_UP(bclp * 100, 255) | ASLE_CBLV_VALID, &asle->cblv);
 
-	mutex_unlock(&dev->mode_config.connection_mutex);
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
 
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index e97ea33e0117..0396d1312b5c 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -688,7 +688,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	u32 swidth, swidthsw, sheight, ostride;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
+	BUG_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 	BUG_ON(!overlay);
 
 	ret = intel_overlay_release_old_vid(overlay);
@@ -793,7 +793,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
 	int ret;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(!mutex_is_locked(&dev->mode_config.connection_mutex));
+	BUG_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 
 	ret = intel_overlay_recover_from_interrupt(overlay);
 	if (ret != 0)
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index d4d415665475..2e1338a5d488 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -876,12 +876,12 @@ static int intel_backlight_device_update_status(struct backlight_device *bd)
 	struct intel_connector *connector = bl_get_data(bd);
 	struct drm_device *dev = connector->base.dev;
 
-	mutex_lock(&dev->mode_config.connection_mutex);
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 	DRM_DEBUG_KMS("updating intel_backlight, brightness=%d/%d\n",
 		      bd->props.brightness, bd->props.max_brightness);
 	intel_panel_set_backlight(connector, bd->props.brightness,
 				  bd->props.max_brightness);
-	mutex_unlock(&dev->mode_config.connection_mutex);
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
 	return 0;
 }
 
@@ -893,9 +893,9 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd)
 	int ret;
 
 	intel_runtime_pm_get(dev_priv);
-	mutex_lock(&dev->mode_config.connection_mutex);
+	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 	ret = intel_panel_get_backlight(connector);
-	mutex_unlock(&dev->mode_config.connection_mutex);
+	drm_modeset_unlock(&dev->mode_config.connection_mutex);
 	intel_runtime_pm_put(dev_priv);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index d6acd6bd0bf0..1b66ddcdfb33 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -55,7 +55,7 @@ static bool intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl
 	int scanline, min, max, vblank_start;
 	DEFINE_WAIT(wait);
 
-	WARN_ON(!mutex_is_locked(&crtc->base.mutex));
+	WARN_ON(!drm_modeset_is_locked(&crtc->base.mutex));
 
 	vblank_start = mode->crtc_vblank_start;
 	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 25850a86d70c..67c6c9a2eb1c 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1321,10 +1321,11 @@ intel_tv_detect(struct drm_connector *connector, bool force)
 
 	if (force) {
 		struct intel_load_detect_pipe tmp;
+		struct drm_modeset_acquire_ctx ctx;
 
-		if (intel_get_load_detect_pipe(connector, &mode, &tmp)) {
+		if (intel_get_load_detect_pipe(connector, &mode, &tmp, &ctx)) {
 			type = intel_tv_detect_type(intel_tv, connector);
-			intel_release_load_detect_pipe(connector, &tmp);
+			intel_release_load_detect_pipe(connector, &tmp, &ctx);
 		} else
 			return connector_status_unknown;
 	} else
diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
index e3c47a8005ff..2d28dc337cfb 100644
--- a/drivers/gpu/drm/omapdrm/omap_crtc.c
+++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
@@ -319,13 +319,13 @@ static void page_flip_worker(struct work_struct *work)
 	struct drm_display_mode *mode = &crtc->mode;
 	struct drm_gem_object *bo;
 
-	mutex_lock(&crtc->mutex);
+	drm_modeset_lock(&crtc->mutex, NULL);
 	omap_plane_mode_set(omap_crtc->plane, crtc, crtc->primary->fb,
 			0, 0, mode->hdisplay, mode->vdisplay,
 			crtc->x << 16, crtc->y << 16,
 			mode->hdisplay << 16, mode->vdisplay << 16,
 			vblank_cb, crtc);
-	mutex_unlock(&crtc->mutex);
+	drm_modeset_unlock(&crtc->mutex);
 
 	bo = omap_framebuffer_bo(crtc->primary->fb, 0);
 	drm_gem_object_unreference_unlocked(bo);
@@ -465,7 +465,7 @@ static void apply_worker(struct work_struct *work)
 	 * the callbacks and list modification all serialized
 	 * with respect to modesetting ioctls from userspace.
 	 */
-	mutex_lock(&crtc->mutex);
+	drm_modeset_lock(&crtc->mutex, NULL);
 	dispc_runtime_get();
 
 	/*
@@ -510,7 +510,7 @@ static void apply_worker(struct work_struct *work)
 
 out:
 	dispc_runtime_put();
-	mutex_unlock(&crtc->mutex);
+	drm_modeset_unlock(&crtc->mutex);
 }
 
 int omap_crtc_apply(struct drm_crtc *crtc,
@@ -518,7 +518,7 @@ int omap_crtc_apply(struct drm_crtc *crtc,
 {
 	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
 
-	WARN_ON(!mutex_is_locked(&crtc->mutex));
+	WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
 
 	/* no need to queue it again if it is already queued: */
 	if (apply->queued)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index e7199b454ca0..8f3edc4710f2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -187,7 +187,7 @@ int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 	 * can do this since the caller in the drm core doesn't check anything
 	 * which is protected by any looks.
 	 */
-	mutex_unlock(&crtc->mutex);
+	drm_modeset_unlock(&crtc->mutex);
 	drm_modeset_lock_all(dev_priv->dev);
 
 	/* A lot of the code assumes this */
@@ -252,7 +252,7 @@ int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 	ret = 0;
 out:
 	drm_modeset_unlock_all(dev_priv->dev);
-	mutex_lock(&crtc->mutex);
+	drm_modeset_lock(&crtc->mutex, NULL);
 
 	return ret;
 }
@@ -273,7 +273,7 @@ int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 	 * can do this since the caller in the drm core doesn't check anything
 	 * which is protected by any looks.
 	 */
-	mutex_unlock(&crtc->mutex);
+	drm_modeset_unlock(&crtc->mutex);
 	drm_modeset_lock_all(dev_priv->dev);
 
 	vmw_cursor_update_position(dev_priv, shown,
@@ -281,7 +281,7 @@ int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 				   du->cursor_y + du->hotspot_y);
 
 	drm_modeset_unlock_all(dev_priv->dev);
-	mutex_lock(&crtc->mutex);
+	drm_modeset_lock(&crtc->mutex, NULL);
 
 	return 0;
 }
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 76ccaabd0418..475ca5cf3c20 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1186,11 +1186,6 @@ static inline int drm_device_is_unplugged(struct drm_device *dev)
 	return ret;
 }
 
-static inline bool drm_modeset_is_locked(struct drm_device *dev)
-{
-	return mutex_is_locked(&dev->mode_config.mutex);
-}
-
 static inline bool drm_is_render_client(const struct drm_file *file_priv)
 {
 	return file_priv->minor->type == DRM_MINOR_RENDER;
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 6c295df7b0df..a7fac5686915 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -33,6 +33,7 @@
 #include <linux/hdmi.h>
 #include <drm/drm_mode.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_modeset_lock.h>
 
 struct drm_device;
 struct drm_mode_set;
@@ -205,6 +206,10 @@ struct drm_property {
 	struct list_head enum_blob_list;
 };
 
+void drm_modeset_lock_all(struct drm_device *dev);
+void drm_modeset_unlock_all(struct drm_device *dev);
+void drm_warn_on_modeset_not_all_locked(struct drm_device *dev);
+
 struct drm_crtc;
 struct drm_connector;
 struct drm_encoder;
@@ -280,6 +285,7 @@ struct drm_crtc_funcs {
  * drm_crtc - central CRTC control structure
  * @dev: parent DRM device
  * @head: list management
+ * @mutex: per-CRTC locking
  * @base: base KMS object for ID tracking etc.
  * @primary: primary plane for this CRTC
  * @cursor: cursor plane for this CRTC
@@ -314,7 +320,7 @@ struct drm_crtc {
 	 * state, ...) and a write lock for everything which can be update
 	 * without a full modeset (fb, cursor data, ...)
 	 */
-	struct mutex mutex;
+	struct drm_modeset_lock mutex;
 
 	struct drm_mode_object base;
 
@@ -738,7 +744,8 @@ struct drm_mode_group {
  */
 struct drm_mode_config {
 	struct mutex mutex; /* protects configuration (mode lists etc.) */
-	struct mutex connection_mutex; /* protects connector->encoder and encoder->crtc links */
+	struct drm_modeset_lock connection_mutex; /* protects connector->encoder and encoder->crtc links */
+	struct drm_modeset_acquire_ctx *acquire_ctx; /* for legacy _lock_all() / _unlock_all() */
 	struct mutex idr_mutex; /* for IDR management */
 	struct idr crtc_idr; /* use this idr for all IDs, fb, crtc, connector, modes - just makes life easier */
 	/* this is limited to one for now */
@@ -839,10 +846,6 @@ struct drm_prop_enum_list {
 	char *name;
 };
 
-extern void drm_modeset_lock_all(struct drm_device *dev);
-extern void drm_modeset_unlock_all(struct drm_device *dev);
-extern void drm_warn_on_modeset_not_all_locked(struct drm_device *dev);
-
 extern int drm_crtc_init_with_planes(struct drm_device *dev,
 				     struct drm_crtc *crtc,
 				     struct drm_plane *primary,
diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h
new file mode 100644
index 000000000000..402aa7a6a058
--- /dev/null
+++ b/include/drm/drm_modeset_lock.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef DRM_MODESET_LOCK_H_
+#define DRM_MODESET_LOCK_H_
+
+#include <linux/ww_mutex.h>
+
+struct drm_modeset_lock;
+
+/**
+ * drm_modeset_acquire_ctx - locking context (see ww_acquire_ctx)
+ * @ww_ctx: base acquire ctx
+ * @contended: used internally for -EDEADLK handling
+ * @locked: list of held locks
+ *
+ * Each thread competing for a set of locks must use one acquire
+ * ctx.  And if any lock fxn returns -EDEADLK, it must backoff and
+ * retry.
+ */
+struct drm_modeset_acquire_ctx {
+
+	struct ww_acquire_ctx ww_ctx;
+
+	/**
+	 * Contended lock: if a lock is contended you should only call
+	 * drm_modeset_backoff() which drops locks and slow-locks the
+	 * contended lock.
+	 */
+	struct drm_modeset_lock *contended;
+
+	/**
+	 * list of held locks (drm_modeset_lock)
+	 */
+	struct list_head locked;
+};
+
+/**
+ * drm_modeset_lock - used for locking modeset resources.
+ * @mutex: resource locking
+ * @head: used to hold it's place on state->locked list when
+ *    part of an atomic update
+ *
+ * Used for locking CRTCs and other modeset resources.
+ */
+struct drm_modeset_lock {
+	/**
+	 * modeset lock
+	 */
+	struct ww_mutex mutex;
+
+	/**
+	 * Resources that are locked as part of an atomic update are added
+	 * to a list (so we know what to unlock at the end).
+	 */
+	struct list_head head;
+};
+
+extern struct ww_class crtc_ww_class;
+
+void drm_modeset_acquire_init(struct drm_modeset_acquire_ctx *ctx,
+		uint32_t flags);
+void drm_modeset_acquire_fini(struct drm_modeset_acquire_ctx *ctx);
+void drm_modeset_drop_locks(struct drm_modeset_acquire_ctx *ctx);
+void drm_modeset_backoff(struct drm_modeset_acquire_ctx *ctx);
+int drm_modeset_backoff_interruptible(struct drm_modeset_acquire_ctx *ctx);
+
+/**
+ * drm_modeset_lock_init - initialize lock
+ * @lock: lock to init
+ */
+static inline void drm_modeset_lock_init(struct drm_modeset_lock *lock)
+{
+	ww_mutex_init(&lock->mutex, &crtc_ww_class);
+	INIT_LIST_HEAD(&lock->head);
+}
+
+/**
+ * drm_modeset_lock_fini - cleanup lock
+ * @lock: lock to cleanup
+ */
+static inline void drm_modeset_lock_fini(struct drm_modeset_lock *lock)
+{
+	WARN_ON(!list_empty(&lock->head));
+}
+
+/**
+ * drm_modeset_is_locked - equivalent to mutex_is_locked()
+ * @lock: lock to check
+ */
+static inline bool drm_modeset_is_locked(struct drm_modeset_lock *lock)
+{
+	return ww_mutex_is_locked(&lock->mutex);
+}
+
+int drm_modeset_lock(struct drm_modeset_lock *lock,
+		struct drm_modeset_acquire_ctx *ctx);
+int drm_modeset_lock_interruptible(struct drm_modeset_lock *lock,
+		struct drm_modeset_acquire_ctx *ctx);
+void drm_modeset_unlock(struct drm_modeset_lock *lock);
+
+struct drm_device;
+int drm_modeset_lock_all_crtcs(struct drm_device *dev,
+		struct drm_modeset_acquire_ctx *ctx);
+
+#endif /* DRM_MODESET_LOCK_H_ */
-- 
cgit v1.2.3-71-gd317


From 5ea1f752ae04be403a3dc8ec876a60d7f5f6990a Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Fri, 30 May 2014 12:29:48 -0400
Subject: drm: add drm_fb_helper_restore_fbdev_mode_unlocked()

All drm_fb_helper_restore_fbdev_mode() call sites, save one, do the same
locking.  Simplify this into drm_fb_helper_restore_fbdev_mode_unlocked().

Signed-off-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/armada/armada_fbdev.c     |  4 +--
 drivers/gpu/drm/drm_fb_cma_helper.c       |  9 ++----
 drivers/gpu/drm/drm_fb_helper.c           | 50 ++++++++++++++++++++++---------
 drivers/gpu/drm/exynos/exynos_drm_fbdev.c |  4 +--
 drivers/gpu/drm/gma500/psb_drv.c          |  4 +--
 drivers/gpu/drm/i915/intel_fbdev.c        |  6 +---
 drivers/gpu/drm/msm/msm_drv.c             |  7 ++---
 drivers/gpu/drm/omapdrm/omap_drv.c        |  4 +--
 drivers/gpu/drm/tegra/fb.c                |  7 ++---
 include/drm/drm_fb_helper.h               |  2 +-
 10 files changed, 48 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/armada/armada_fbdev.c b/drivers/gpu/drm/armada/armada_fbdev.c
index 948cb14c561e..fd166f532ab9 100644
--- a/drivers/gpu/drm/armada/armada_fbdev.c
+++ b/drivers/gpu/drm/armada/armada_fbdev.c
@@ -181,10 +181,8 @@ void armada_fbdev_lastclose(struct drm_device *dev)
 {
 	struct armada_private *priv = dev->dev_private;
 
-	drm_modeset_lock_all(dev);
 	if (priv->fbdev)
-		drm_fb_helper_restore_fbdev_mode(priv->fbdev);
-	drm_modeset_unlock_all(dev);
+		drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev);
 }
 
 void armada_fbdev_fini(struct drm_device *dev)
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index 61b5a47ad239..f27c883be391 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -429,13 +429,8 @@ EXPORT_SYMBOL_GPL(drm_fbdev_cma_fini);
  */
 void drm_fbdev_cma_restore_mode(struct drm_fbdev_cma *fbdev_cma)
 {
-	if (fbdev_cma) {
-		struct drm_device *dev = fbdev_cma->fb_helper.dev;
-
-		drm_modeset_lock_all(dev);
-		drm_fb_helper_restore_fbdev_mode(&fbdev_cma->fb_helper);
-		drm_modeset_unlock_all(dev);
-	}
+	if (fbdev_cma)
+		drm_fb_helper_restore_fbdev_mode_unlocked(&fbdev_cma->fb_helper);
 }
 EXPORT_SYMBOL_GPL(drm_fbdev_cma_restore_mode);
 
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 43329cee299f..d5d8cea1a679 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -273,15 +273,7 @@ int drm_fb_helper_debug_leave(struct fb_info *info)
 }
 EXPORT_SYMBOL(drm_fb_helper_debug_leave);
 
-/**
- * drm_fb_helper_restore_fbdev_mode - restore fbdev configuration
- * @fb_helper: fbcon to restore
- *
- * This should be called from driver's drm ->lastclose callback
- * when implementing an fbcon on top of kms using this helper. This ensures that
- * the user isn't greeted with a black screen when e.g. X dies.
- */
-bool drm_fb_helper_restore_fbdev_mode(struct drm_fb_helper *fb_helper)
+static bool restore_fbdev_mode(struct drm_fb_helper *fb_helper)
 {
 	struct drm_device *dev = fb_helper->dev;
 	struct drm_plane *plane;
@@ -311,7 +303,40 @@ bool drm_fb_helper_restore_fbdev_mode(struct drm_fb_helper *fb_helper)
 	}
 	return error;
 }
-EXPORT_SYMBOL(drm_fb_helper_restore_fbdev_mode);
+/**
+ * drm_fb_helper_restore_fbdev_mode - restore fbdev configuration
+ * @fb_helper: fbcon to restore
+ *
+ * This should be called from driver's drm ->lastclose callback
+ * when implementing an fbcon on top of kms using this helper. This ensures that
+ * the user isn't greeted with a black screen when e.g. X dies.
+ *
+ * Use this variant if you need to bypass locking (panic), or already
+ * hold all modeset locks.  Otherwise use drm_fb_helper_restore_fbdev_mode_unlocked()
+ */
+static bool drm_fb_helper_restore_fbdev_mode(struct drm_fb_helper *fb_helper)
+{
+	return restore_fbdev_mode(fb_helper);
+}
+
+/**
+ * drm_fb_helper_restore_fbdev_mode_unlocked - restore fbdev configuration
+ * @fb_helper: fbcon to restore
+ *
+ * This should be called from driver's drm ->lastclose callback
+ * when implementing an fbcon on top of kms using this helper. This ensures that
+ * the user isn't greeted with a black screen when e.g. X dies.
+ */
+bool drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper)
+{
+	struct drm_device *dev = fb_helper->dev;
+	bool ret;
+	drm_modeset_lock_all(dev);
+	ret = restore_fbdev_mode(fb_helper);
+	drm_modeset_unlock_all(dev);
+	return ret;
+}
+EXPORT_SYMBOL(drm_fb_helper_restore_fbdev_mode_unlocked);
 
 /*
  * restore fbcon display for all kms driver's using this helper, used for sysrq
@@ -824,7 +849,6 @@ EXPORT_SYMBOL(drm_fb_helper_check_var);
 int drm_fb_helper_set_par(struct fb_info *info)
 {
 	struct drm_fb_helper *fb_helper = info->par;
-	struct drm_device *dev = fb_helper->dev;
 	struct fb_var_screeninfo *var = &info->var;
 
 	if (var->pixclock != 0) {
@@ -832,9 +856,7 @@ int drm_fb_helper_set_par(struct fb_info *info)
 		return -EINVAL;
 	}
 
-	drm_modeset_lock_all(dev);
-	drm_fb_helper_restore_fbdev_mode(fb_helper);
-	drm_modeset_unlock_all(dev);
+	drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper);
 
 	if (fb_helper->delayed_hotplug) {
 		fb_helper->delayed_hotplug = false;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index 14bbaa33b477..d771b467cf0c 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -367,7 +367,5 @@ void exynos_drm_fbdev_restore_mode(struct drm_device *dev)
 	if (!private || !private->fb_helper)
 		return;
 
-	drm_modeset_lock_all(dev);
-	drm_fb_helper_restore_fbdev_mode(private->fb_helper);
-	drm_modeset_unlock_all(dev);
+	drm_fb_helper_restore_fbdev_mode_unlocked(private->fb_helper);
 }
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index 0a3101a3db19..59ea45e5c97e 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -112,11 +112,9 @@ static void psb_driver_lastclose(struct drm_device *dev)
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct psb_fbdev *fbdev = dev_priv->fbdev;
 
-	drm_modeset_lock_all(dev);
-	ret = drm_fb_helper_restore_fbdev_mode(&fbdev->psb_fb_helper);
+	ret = drm_fb_helper_restore_fbdev_mode_unlocked(&fbdev->psb_fb_helper);
 	if (ret)
 		DRM_DEBUG("failed to restore crtc mode\n");
-	drm_modeset_unlock_all(dev);
 
 	return;
 }
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index e2d416149f53..6ea2d75464da 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -687,11 +687,7 @@ void intel_fbdev_restore_mode(struct drm_device *dev)
 	if (!dev_priv->fbdev)
 		return;
 
-	drm_modeset_lock_all(dev);
-
-	ret = drm_fb_helper_restore_fbdev_mode(&dev_priv->fbdev->helper);
+	ret = drm_fb_helper_restore_fbdev_mode_unlocked(&dev_priv->fbdev->helper);
 	if (ret)
 		DRM_DEBUG("failed to restore crtc mode\n");
-
-	drm_modeset_unlock_all(dev);
 }
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index c071aacf2752..0d2562fb681e 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -386,11 +386,8 @@ static void msm_preclose(struct drm_device *dev, struct drm_file *file)
 static void msm_lastclose(struct drm_device *dev)
 {
 	struct msm_drm_private *priv = dev->dev_private;
-	if (priv->fbdev) {
-		drm_modeset_lock_all(dev);
-		drm_fb_helper_restore_fbdev_mode(priv->fbdev);
-		drm_modeset_unlock_all(dev);
-	}
+	if (priv->fbdev)
+		drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev);
 }
 
 static irqreturn_t msm_irq(int irq, void *arg)
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index c8270e4b26f3..002b9721e85a 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -588,9 +588,7 @@ static void dev_lastclose(struct drm_device *dev)
 		}
 	}
 
-	drm_modeset_lock_all(dev);
-	ret = drm_fb_helper_restore_fbdev_mode(priv->fbdev);
-	drm_modeset_unlock_all(dev);
+	ret = drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev);
 	if (ret)
 		DBG("failed to restore crtc mode");
 }
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index f7fca09d4921..9798a7080322 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -346,11 +346,8 @@ static void tegra_fbdev_free(struct tegra_fbdev *fbdev)
 
 void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev)
 {
-	if (fbdev) {
-		drm_modeset_lock_all(fbdev->base.dev);
-		drm_fb_helper_restore_fbdev_mode(&fbdev->base);
-		drm_modeset_unlock_all(fbdev->base.dev);
-	}
+	if (fbdev)
+		drm_fb_helper_restore_fbdev_mode_unlocked(&fbdev->base);
 }
 
 static void tegra_fb_output_poll_changed(struct drm_device *drm)
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 6e622f7d481d..7997246d4039 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -108,7 +108,7 @@ int drm_fb_helper_set_par(struct fb_info *info);
 int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 			    struct fb_info *info);
 
-bool drm_fb_helper_restore_fbdev_mode(struct drm_fb_helper *fb_helper);
+bool drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper);
 void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helper,
 			    uint32_t fb_width, uint32_t fb_height);
 void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch,
-- 
cgit v1.2.3-71-gd317


From af5fcba7f38f3166392f4087ab734433c84f160b Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 4 Jun 2014 17:19:48 -0700
Subject: udp: Generic functions to set checksum

Added udp_set_csum and udp6_set_csum functions to set UDP checksums
in packets. These are for simple UDP packets such as those that might
be created in UDP tunnels.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_checksum.h | 12 ++++++++++++
 include/net/udp.h          |  9 +++++++++
 net/ipv4/udp.c             | 37 +++++++++++++++++++++++++++++++++++++
 net/ipv6/ip6_checksum.c    | 38 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 96 insertions(+)

(limited to 'include')

diff --git a/include/net/ip6_checksum.h b/include/net/ip6_checksum.h
index 8ac5c21f8456..55236cb71174 100644
--- a/include/net/ip6_checksum.h
+++ b/include/net/ip6_checksum.h
@@ -82,5 +82,17 @@ static inline void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
 }
 #endif
 
+static inline __sum16 udp_v6_check(int len,
+				   const struct in6_addr *saddr,
+				   const struct in6_addr *daddr,
+				   __wsum base)
+{
+	return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base);
+}
+
+void udp6_set_csum(bool nocheck, struct sk_buff *skb,
+		   const struct in6_addr *saddr,
+		   const struct in6_addr *daddr, int len);
+
 int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto);
 #endif
diff --git a/include/net/udp.h b/include/net/udp.h
index 5eb86874bcd6..2ecfc6e15609 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -147,6 +147,15 @@ static inline __wsum udp_csum(struct sk_buff *skb)
 	return csum;
 }
 
+static inline __sum16 udp_v4_check(int len, __be32 saddr,
+				   __be32 daddr, __wsum base)
+{
+	return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base);
+}
+
+void udp_set_csum(bool nocheck, struct sk_buff *skb,
+		  __be32 saddr, __be32 daddr, int len);
+
 /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
 static inline void udp_lib_hash(struct sock *sk)
 {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e07d52b8617a..a84f6762ea9e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -762,6 +762,43 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
 }
 EXPORT_SYMBOL_GPL(udp4_hwcsum);
 
+/* Function to set UDP checksum for an IPv4 UDP packet. This is intended
+ * for the simple case like when setting the checksum for a UDP tunnel.
+ */
+void udp_set_csum(bool nocheck, struct sk_buff *skb,
+		  __be32 saddr, __be32 daddr, int len)
+{
+	struct udphdr *uh = udp_hdr(skb);
+
+	if (nocheck)
+		uh->check = 0;
+	else if (skb_is_gso(skb))
+		uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+	else if (skb_dst(skb) && skb_dst(skb)->dev &&
+		 (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~udp_v4_check(len, saddr, daddr, 0);
+	} else {
+		__wsum csum;
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		uh->check = 0;
+		csum = skb_checksum(skb, 0, len, 0);
+		uh->check = udp_v4_check(len, saddr, daddr, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+}
+EXPORT_SYMBOL(udp_set_csum);
+
 static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
 {
 	struct sock *sk = skb->sk;
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index da26224a5993..9a4d7322fb22 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -84,3 +84,41 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
 					   ip6_compute_pseudo);
 }
 EXPORT_SYMBOL(udp6_csum_init);
+
+/* Function to set UDP checksum for an IPv6 UDP packet. This is intended
+ * for the simple case like when setting the checksum for a UDP tunnel.
+ */
+void udp6_set_csum(bool nocheck, struct sk_buff *skb,
+		   const struct in6_addr *saddr,
+		   const struct in6_addr *daddr, int len)
+{
+	struct udphdr *uh = udp_hdr(skb);
+
+	if (nocheck)
+		uh->check = 0;
+	else if (skb_is_gso(skb))
+		uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+	else if (skb_dst(skb) && skb_dst(skb)->dev &&
+		 (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~udp_v6_check(len, saddr, daddr, 0);
+	} else {
+		__wsum csum;
+
+		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
+
+		uh->check = 0;
+		csum = skb_checksum(skb, 0, len, 0);
+		uh->check = udp_v6_check(len, saddr, daddr, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+}
+EXPORT_SYMBOL(udp6_set_csum);
-- 
cgit v1.2.3-71-gd317


From 7e2b10c1e52ca37fb522be49f4be367f9311d0cd Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 4 Jun 2014 17:20:02 -0700
Subject: net: Support for multiple checksums with gso

When creating a GSO packet segment we may need to set more than
one checksum in the packet (for instance a TCP checksum and
UDP checksum for VXLAN encapsulation). To be efficient, we want
to do checksum calculation for any part of the packet at most once.

This patch adds csum_start offset to skb_gso_cb. This tracks the
starting offset for skb->csum which is initially set in skb_segment.
When a protocol needs to compute a transport checksum it calls
gso_make_checksum which computes the checksum value from the start
of transport header to csum_start and then adds in skb->csum to get
the full checksum. skb->csum and csum_start are then updated to reflect
the checksum of the resultant packet starting from the transport header.

This patch also adds a flag to skbuff, encap_hdr_csum, which is set
in *gso_segment fucntions to indicate that a tunnel protocol needs
checksum calculation

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h    | 26 +++++++++++++++++++++++++-
 net/core/skbuff.c         |  8 +++++++-
 net/ipv4/ip_tunnel_core.c |  8 ++++++++
 3 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7a9beeb1c458..d8d397acb52c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -567,7 +567,8 @@ struct sk_buff {
 	 * headers if needed
 	 */
 	__u8			encapsulation:1;
-	/* 6/8 bit hole (depending on ndisc_nodetype presence) */
+	__u8			encap_hdr_csum:1;
+	/* 5/7 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
@@ -2988,6 +2989,7 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 struct skb_gso_cb {
 	int	mac_offset;
 	int	encap_level;
+	__u16	csum_start;
 };
 #define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
 
@@ -3012,6 +3014,28 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
 	return 0;
 }
 
+/* Compute the checksum for a gso segment. First compute the checksum value
+ * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
+ * then add in skb->csum (checksum from csum_start to end of packet).
+ * skb->csum and csum_start are then updated to reflect the checksum of the
+ * resultant packet starting from the transport header-- the resultant checksum
+ * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo
+ * header.
+ */
+static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
+{
+	int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) -
+	    skb_transport_offset(skb);
+	__u16 csum;
+
+	csum = csum_fold(csum_partial(skb_transport_header(skb),
+				      plen, skb->csum));
+	skb->csum = res;
+	SKB_GSO_CB(skb)->csum_start -= plen;
+
+	return csum;
+}
+
 static inline bool skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3f6c7e8be8a4..05f4bef2ce12 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2885,7 +2885,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 	if (unlikely(!proto))
 		return ERR_PTR(-EINVAL);
 
-	csum = !!can_checksum_protocol(features, proto);
+	csum = !head_skb->encap_hdr_csum &&
+	    !!can_checksum_protocol(features, proto);
+
 	__skb_push(head_skb, doffset);
 	headroom = skb_headroom(head_skb);
 	pos = skb_headlen(head_skb);
@@ -2983,6 +2985,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 			nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
 							    skb_put(nskb, len),
 							    len, 0);
+			SKB_GSO_CB(nskb)->csum_start =
+			    skb_headroom(nskb) + offset;
 			continue;
 		}
 
@@ -3052,6 +3056,8 @@ perform_csum_check:
 			nskb->csum = skb_checksum(nskb, doffset,
 						  nskb->len - doffset, 0);
 			nskb->ip_summed = CHECKSUM_NONE;
+			SKB_GSO_CB(nskb)->csum_start =
+			    skb_headroom(nskb) + doffset;
 		}
 	} while ((offset += len) < head_skb->len);
 
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 847e69cbff7e..f4c987bb7e94 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -135,6 +135,14 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
 		return skb;
 	}
 
+	/* If packet is not gso and we are resolving any partial checksum,
+	 * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
+	 * on the outer header without confusing devices that implement
+	 * NETIF_F_IP_CSUM with encapsulation.
+	 */
+	if (csum_help)
+		skb->encapsulation = 0;
+
 	if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
 		err = skb_checksum_help(skb);
 		if (unlikely(err))
-- 
cgit v1.2.3-71-gd317


From 0f4f4ffa7b7c3d29d0537a126145c9f8d8ed5dbc Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 4 Jun 2014 17:20:16 -0700
Subject: net: Add GSO support for UDP tunnels with checksum

Added a new netif feature for GSO_UDP_TUNNEL_CSUM. This indicates
that a device is capable of computing the UDP checksum in the
encapsulating header of a UDP tunnel.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h |  1 +
 include/linux/skbuff.h          |  2 ++
 net/ipv4/af_inet.c              |  1 +
 net/ipv4/tcp_offload.c          |  1 +
 net/ipv4/udp.c                  | 40 +++++++++++++++++++---------------------
 net/ipv4/udp_offload.c          |  4 +++-
 net/ipv6/ip6_offload.c          |  1 +
 net/ipv6/udp_offload.c          |  4 +++-
 8 files changed, 31 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index c26d0ec2ef3a..f1338e0f9866 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -45,6 +45,7 @@ enum {
 	NETIF_F_GSO_IPIP_BIT,		/* ... IPIP tunnel with TSO */
 	NETIF_F_GSO_SIT_BIT,		/* ... SIT tunnel with TSO */
 	NETIF_F_GSO_UDP_TUNNEL_BIT,	/* ... UDP TUNNEL with TSO */
+	NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */
 	NETIF_F_GSO_MPLS_BIT,		/* ... MPLS segmentation */
 	/**/NETIF_F_GSO_LAST =		/* last bit, see GSO_MASK */
 		NETIF_F_GSO_MPLS_BIT,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d8d397acb52c..5a6d10a538f5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -345,6 +345,8 @@ enum {
 	SKB_GSO_UDP_TUNNEL = 1 << 9,
 
 	SKB_GSO_MPLS = 1 << 10,
+
+	SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
 };
 
 #if BITS_PER_LONG > 32
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0e9bb08a91e4..0070ab87109b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1258,6 +1258,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_SIT |
 		       SKB_GSO_TCPV6 |
 		       SKB_GSO_UDP_TUNNEL |
+		       SKB_GSO_UDP_TUNNEL_CSUM |
 		       SKB_GSO_MPLS |
 		       0)))
 		goto out;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index d8de7b9e0720..c02f2d2e7bab 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -61,6 +61,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 			       SKB_GSO_SIT |
 			       SKB_GSO_MPLS |
 			       SKB_GSO_UDP_TUNNEL |
+			       SKB_GSO_UDP_TUNNEL_CSUM |
 			       0) ||
 			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
 			goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a84f6762ea9e..8d8c33d84c9a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2528,7 +2528,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
 	__be16 protocol = skb->protocol;
 	netdev_features_t enc_features;
-	int outer_hlen;
+	int udp_offset, outer_hlen;
+	unsigned int oldlen;
+	bool need_csum;
+
+	oldlen = (u16)~skb->len;
 
 	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
 		goto out;
@@ -2540,6 +2544,10 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 	skb->mac_len = skb_inner_network_offset(skb);
 	skb->protocol = htons(ETH_P_TEB);
 
+	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
+	if (need_csum)
+		skb->encap_hdr_csum = 1;
+
 	/* segment inner packet. */
 	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
 	segs = skb_mac_gso_segment(skb, enc_features);
@@ -2550,10 +2558,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 	}
 
 	outer_hlen = skb_tnl_header_len(skb);
+	udp_offset = outer_hlen - tnl_hlen;
 	skb = segs;
 	do {
 		struct udphdr *uh;
-		int udp_offset = outer_hlen - tnl_hlen;
+		int len;
 
 		skb_reset_inner_headers(skb);
 		skb->encapsulation = 1;
@@ -2564,31 +2573,20 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 		skb_reset_mac_header(skb);
 		skb_set_network_header(skb, mac_len);
 		skb_set_transport_header(skb, udp_offset);
+		len = skb->len - udp_offset;
 		uh = udp_hdr(skb);
-		uh->len = htons(skb->len - udp_offset);
-
-		/* csum segment if tunnel sets skb with csum. */
-		if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) {
-			struct iphdr *iph = ip_hdr(skb);
+		uh->len = htons(len);
 
-			uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-						       skb->len - udp_offset,
-						       IPPROTO_UDP, 0);
-			uh->check = csum_fold(skb_checksum(skb, udp_offset,
-							   skb->len - udp_offset, 0));
-			if (uh->check == 0)
-				uh->check = CSUM_MANGLED_0;
+		if (need_csum) {
+			__be32 delta = htonl(oldlen + len);
 
-		} else if (protocol == htons(ETH_P_IPV6)) {
-			struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-			u32 len = skb->len - udp_offset;
+			uh->check = ~csum_fold((__force __wsum)
+					       ((__force u32)uh->check +
+						(__force u32)delta));
+			uh->check = gso_make_checksum(skb, ~uh->check);
 
-			uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
-						     len, IPPROTO_UDP, 0);
-			uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0));
 			if (uh->check == 0)
 				uh->check = CSUM_MANGLED_0;
-			skb->ip_summed = CHECKSUM_NONE;
 		}
 
 		skb->protocol = protocol;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 88b4023ecfcf..5c23f4765af9 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -56,7 +56,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 	__wsum csum;
 
 	if (skb->encapsulation &&
-	    skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) {
+	    (skb_shinfo(skb)->gso_type &
+	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
 		segs = skb_udp_tunnel_segment(skb, features);
 		goto out;
 	}
@@ -71,6 +72,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 
 		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
 				      SKB_GSO_UDP_TUNNEL |
+				      SKB_GSO_UDP_TUNNEL_CSUM |
 				      SKB_GSO_IPIP |
 				      SKB_GSO_GRE | SKB_GSO_MPLS) ||
 			     !(type & (SKB_GSO_UDP))))
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index b2f091566f88..d54c5744e3db 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -100,6 +100,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_IPIP |
 		       SKB_GSO_SIT |
 		       SKB_GSO_UDP_TUNNEL |
+		       SKB_GSO_UDP_TUNNEL_CSUM |
 		       SKB_GSO_MPLS |
 		       SKB_GSO_TCPV6 |
 		       0)))
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index b261ee8b83fc..79da8b305ced 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -63,6 +63,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		if (unlikely(type & ~(SKB_GSO_UDP |
 				      SKB_GSO_DODGY |
 				      SKB_GSO_UDP_TUNNEL |
+				      SKB_GSO_UDP_TUNNEL_CSUM |
 				      SKB_GSO_GRE |
 				      SKB_GSO_IPIP |
 				      SKB_GSO_SIT |
@@ -76,7 +77,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		goto out;
 	}
 
-	if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
+	if (skb->encapsulation && skb_shinfo(skb)->gso_type &
+	    (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
 		segs = skb_udp_tunnel_segment(skb, features);
 	else {
 		/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
-- 
cgit v1.2.3-71-gd317


From 4749c09c37030ccdc44aecebe0f71b02a377fc14 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 4 Jun 2014 17:20:23 -0700
Subject: gre: Call gso_make_checksum

Call gso_make_checksum. This should have the benefit of using a
checksum that may have been previously computed for the packet.

This also adds NETIF_F_GSO_GRE_CSUM to differentiate devices that
offload GRE GSO with and without the GRE checksum offloaed.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h |  2 ++
 include/linux/skbuff.h          |  2 ++
 include/net/gre.h               |  5 +++--
 net/ipv4/af_inet.c              |  1 +
 net/ipv4/gre_demux.c            |  3 ++-
 net/ipv4/gre_offload.c          | 10 ++++++++--
 net/ipv4/tcp_offload.c          |  1 +
 net/ipv4/udp_offload.c          |  3 ++-
 net/ipv6/ip6_offload.c          |  1 +
 net/ipv6/udp_offload.c          |  1 +
 net/mpls/mpls_gso.c             |  1 +
 11 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index f1338e0f9866..e5a589435e2b 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -42,6 +42,7 @@ enum {
 	NETIF_F_TSO6_BIT,		/* ... TCPv6 segmentation */
 	NETIF_F_FSO_BIT,		/* ... FCoE segmentation */
 	NETIF_F_GSO_GRE_BIT,		/* ... GRE with TSO */
+	NETIF_F_GSO_GRE_CSUM_BIT,	/* ... GRE with csum with TSO */
 	NETIF_F_GSO_IPIP_BIT,		/* ... IPIP tunnel with TSO */
 	NETIF_F_GSO_SIT_BIT,		/* ... SIT tunnel with TSO */
 	NETIF_F_GSO_UDP_TUNNEL_BIT,	/* ... UDP TUNNEL with TSO */
@@ -112,6 +113,7 @@ enum {
 #define NETIF_F_RXFCS		__NETIF_F(RXFCS)
 #define NETIF_F_RXALL		__NETIF_F(RXALL)
 #define NETIF_F_GSO_GRE		__NETIF_F(GSO_GRE)
+#define NETIF_F_GSO_GRE_CSUM	__NETIF_F(GSO_GRE_CSUM)
 #define NETIF_F_GSO_IPIP	__NETIF_F(GSO_IPIP)
 #define NETIF_F_GSO_SIT		__NETIF_F(GSO_SIT)
 #define NETIF_F_GSO_UDP_TUNNEL	__NETIF_F(GSO_UDP_TUNNEL)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5a6d10a538f5..c705808bef9c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -347,6 +347,8 @@ enum {
 	SKB_GSO_MPLS = 1 << 10,
 
 	SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
+
+	SKB_GSO_GRE_CSUM = 1 << 12,
 };
 
 #if BITS_PER_LONG > 32
diff --git a/include/net/gre.h b/include/net/gre.h
index 70046a0b0b89..b53182018743 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -37,9 +37,10 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 		      int hdr_len);
 
 static inline struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
-						  bool gre_csum)
+						  bool csum)
 {
-	return iptunnel_handle_offloads(skb, gre_csum, SKB_GSO_GRE);
+	return iptunnel_handle_offloads(skb, csum,
+					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 }
 
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0070ab87109b..d5e6836cf772 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1254,6 +1254,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
+		       SKB_GSO_GRE_CSUM |
 		       SKB_GSO_IPIP |
 		       SKB_GSO_SIT |
 		       SKB_GSO_TCPV6 |
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index fbfd829f4049..4e9619bca732 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -84,7 +84,8 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 			ptr--;
 		}
 		if (tpi->flags&TUNNEL_CSUM &&
-		    !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
+		    !(skb_shinfo(skb)->gso_type &
+		      (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
 			*ptr = 0;
 			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
 								 skb->len, 0));
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index f1d32280cb54..24deb3928b9e 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -42,6 +42,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_DODGY |
 				  SKB_GSO_TCP_ECN |
 				  SKB_GSO_GRE |
+				  SKB_GSO_GRE_CSUM |
 				  SKB_GSO_IPIP)))
 		goto out;
 
@@ -55,6 +56,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 		goto out;
 
 	csum = !!(greh->flags & GRE_CSUM);
+	if (csum)
+		skb->encap_hdr_csum = 1;
 
 	if (unlikely(!pskb_may_pull(skb, ghl)))
 		goto out;
@@ -94,10 +97,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 				}
 			}
 
-			greh = (struct gre_base_hdr *)(skb->data);
+			skb_reset_transport_header(skb);
+
+			greh = (struct gre_base_hdr *)
+			    skb_transport_header(skb);
 			pcsum = (__be32 *)(greh + 1);
 			*pcsum = 0;
-			*(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
+			*(__sum16 *)pcsum = gso_make_checksum(skb, 0);
 		}
 		__skb_push(skb, tnl_hlen - ghl);
 
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index c02f2d2e7bab..4e86c59ec7f7 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -57,6 +57,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
 			       SKB_GSO_GRE |
+			       SKB_GSO_GRE_CSUM |
 			       SKB_GSO_IPIP |
 			       SKB_GSO_SIT |
 			       SKB_GSO_MPLS |
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 5c23f4765af9..7b1840110173 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -74,7 +74,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 				      SKB_GSO_UDP_TUNNEL |
 				      SKB_GSO_UDP_TUNNEL_CSUM |
 				      SKB_GSO_IPIP |
-				      SKB_GSO_GRE | SKB_GSO_MPLS) ||
+				      SKB_GSO_GRE | SKB_GSO_GRE_CSUM |
+				      SKB_GSO_MPLS) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index d54c5744e3db..65eda2a8af48 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -97,6 +97,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
+		       SKB_GSO_GRE_CSUM |
 		       SKB_GSO_IPIP |
 		       SKB_GSO_SIT |
 		       SKB_GSO_UDP_TUNNEL |
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 79da8b305ced..0ae3d98f83e0 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -65,6 +65,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 				      SKB_GSO_UDP_TUNNEL |
 				      SKB_GSO_UDP_TUNNEL_CSUM |
 				      SKB_GSO_GRE |
+				      SKB_GSO_GRE_CSUM |
 				      SKB_GSO_IPIP |
 				      SKB_GSO_SIT |
 				      SKB_GSO_MPLS) ||
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 851cd880b0c0..6b38d083e1c9 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -33,6 +33,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_DODGY |
 				  SKB_GSO_TCP_ECN |
 				  SKB_GSO_GRE |
+				  SKB_GSO_GRE_CSUM |
 				  SKB_GSO_IPIP |
 				  SKB_GSO_MPLS)))
 		goto out;
-- 
cgit v1.2.3-71-gd317


From 359a0ea9875ef4f32c8425bbe1ae348e1fd2ed2a Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 4 Jun 2014 17:20:29 -0700
Subject: vxlan: Add support for UDP checksums (v4 sending, v6 zero csums)

Added VXLAN link configuration for sending UDP checksums, and allowing
TX and RX of UDP6 checksums.

Also, call common iptunnel_handle_offloads and added GSO support for
checksums.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c           | 120 +++++++++++++++++++++---------------------
 include/net/vxlan.h           |  12 ++++-
 include/uapi/linux/if_link.h  |   3 ++
 net/openvswitch/vport-vxlan.c |   2 +-
 4 files changed, 74 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index e68c8eb4ea8e..4e2caaf8b5da 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -135,7 +135,7 @@ struct vxlan_dev {
 	__u16		  port_max;
 	__u8		  tos;		/* TOS override */
 	__u8		  ttl;
-	u32		  flags;	/* VXLAN_F_* below */
+	u32		  flags;	/* VXLAN_F_* in vxlan.h */
 
 	struct work_struct sock_work;
 	struct work_struct igmp_join;
@@ -150,13 +150,6 @@ struct vxlan_dev {
 	struct hlist_head fdb_head[FDB_HASH_SIZE];
 };
 
-#define VXLAN_F_LEARN	0x01
-#define VXLAN_F_PROXY	0x02
-#define VXLAN_F_RSC	0x04
-#define VXLAN_F_L2MISS	0x08
-#define VXLAN_F_L3MISS	0x10
-#define VXLAN_F_IPV6	0x20 /* internal flag */
-
 /* salt for hash table */
 static u32 vxlan_salt __read_mostly;
 static struct workqueue_struct *vxlan_wq;
@@ -1601,18 +1594,11 @@ __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(vxlan_src_port);
 
-static int handle_offloads(struct sk_buff *skb)
+static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
+						    bool udp_csum)
 {
-	if (skb_is_gso(skb)) {
-		int err = skb_unclone(skb, GFP_ATOMIC);
-		if (unlikely(err))
-			return err;
-
-		skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
-	} else if (skb->ip_summed != CHECKSUM_PARTIAL)
-		skb->ip_summed = CHECKSUM_NONE;
-
-	return 0;
+	int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+	return iptunnel_handle_offloads(skb, udp_csum, type);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1629,10 +1615,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 	int min_headroom;
 	int err;
 
-	if (!skb->encapsulation) {
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
-	}
+	skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk));
+	if (IS_ERR(skb))
+		return -EINVAL;
 
 	skb_scrub_packet(skb, xnet);
 
@@ -1666,27 +1651,14 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 	uh->source = src_port;
 
 	uh->len = htons(skb->len);
-	uh->check = 0;
 
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
 	skb_dst_set(skb, dst);
 
-	if (!skb_is_gso(skb) && !(dst->dev->features & NETIF_F_IPV6_CSUM)) {
-		__wsum csum = skb_checksum(skb, 0, skb->len, 0);
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		uh->check = csum_ipv6_magic(saddr, daddr, skb->len,
-					    IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_ipv6_magic(saddr, daddr,
-					     skb->len, IPPROTO_UDP, 0);
-	}
+	udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb,
+		      saddr, daddr, skb->len);
 
 	__skb_push(skb, sizeof(*ip6h));
 	skb_reset_network_header(skb);
@@ -1702,10 +1674,6 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 	ip6h->daddr	  = *daddr;
 	ip6h->saddr	  = *saddr;
 
-	err = handle_offloads(skb);
-	if (err)
-		return err;
-
 	ip6tunnel_xmit(skb, dev);
 	return 0;
 }
@@ -1721,10 +1689,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 	int min_headroom;
 	int err;
 
-	if (!skb->encapsulation) {
-		skb_reset_inner_headers(skb);
-		skb->encapsulation = 1;
-	}
+	skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx);
+	if (IS_ERR(skb))
+		return -EINVAL;
 
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ VXLAN_HLEN + sizeof(struct iphdr)
@@ -1756,11 +1723,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 	uh->source = src_port;
 
 	uh->len = htons(skb->len);
-	uh->check = 0;
 
-	err = handle_offloads(skb);
-	if (err)
-		return err;
+	udp_set_csum(vs->sock->sk->sk_no_check_tx, skb,
+		     src, dst, skb->len);
 
 	return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
 			     tos, ttl, df, xnet);
@@ -2405,7 +2370,7 @@ static void vxlan_del_work(struct work_struct *work)
  * could be used for both IPv4 and IPv6 communications, but
  * users may set bindv6only=1.
  */
-static struct socket *create_v6_sock(struct net *net, __be16 port)
+static struct socket *create_v6_sock(struct net *net, __be16 port, u32 flags)
 {
 	struct sock *sk;
 	struct socket *sock;
@@ -2442,18 +2407,25 @@ static struct socket *create_v6_sock(struct net *net, __be16 port)
 
 	/* Disable multicast loopback */
 	inet_sk(sk)->mc_loop = 0;
+
+	if (flags & VXLAN_F_UDP_ZERO_CSUM6_TX)
+		udp_set_no_check6_tx(sk, true);
+
+	if (flags & VXLAN_F_UDP_ZERO_CSUM6_RX)
+		udp_set_no_check6_rx(sk, true);
+
 	return sock;
 }
 
 #else
 
-static struct socket *create_v6_sock(struct net *net, __be16 port)
+static struct socket *create_v6_sock(struct net *net, __be16 port, u32 flags)
 {
 		return ERR_PTR(-EPFNOSUPPORT);
 }
 #endif
 
-static struct socket *create_v4_sock(struct net *net, __be16 port)
+static struct socket *create_v4_sock(struct net *net, __be16 port, u32 flags)
 {
 	struct sock *sk;
 	struct socket *sock;
@@ -2486,18 +2458,24 @@ static struct socket *create_v4_sock(struct net *net, __be16 port)
 
 	/* Disable multicast loopback */
 	inet_sk(sk)->mc_loop = 0;
+
+	if (!(flags & VXLAN_F_UDP_CSUM))
+		sock->sk->sk_no_check_tx = 1;
+
 	return sock;
 }
 
 /* Create new listen socket if needed */
 static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
-					      vxlan_rcv_t *rcv, void *data, bool ipv6)
+					      vxlan_rcv_t *rcv, void *data,
+					      u32 flags)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 	struct vxlan_sock *vs;
 	struct socket *sock;
 	struct sock *sk;
 	unsigned int h;
+	bool ipv6 = !!(flags & VXLAN_F_IPV6);
 
 	vs = kzalloc(sizeof(*vs), GFP_KERNEL);
 	if (!vs)
@@ -2509,9 +2487,9 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
 	INIT_WORK(&vs->del_work, vxlan_del_work);
 
 	if (ipv6)
-		sock = create_v6_sock(net, port);
+		sock = create_v6_sock(net, port, flags);
 	else
-		sock = create_v4_sock(net, port);
+		sock = create_v4_sock(net, port, flags);
 	if (IS_ERR(sock)) {
 		kfree(vs);
 		return ERR_CAST(sock);
@@ -2549,12 +2527,12 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
-				  bool no_share, bool ipv6)
+				  bool no_share, u32 flags)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 	struct vxlan_sock *vs;
 
-	vs = vxlan_socket_create(net, port, rcv, data, ipv6);
+	vs = vxlan_socket_create(net, port, rcv, data, flags);
 	if (!IS_ERR(vs))
 		return vs;
 
@@ -2587,7 +2565,7 @@ static void vxlan_sock_work(struct work_struct *work)
 	__be16 port = vxlan->dst_port;
 	struct vxlan_sock *nvs;
 
-	nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false, vxlan->flags & VXLAN_F_IPV6);
+	nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false, vxlan->flags);
 	spin_lock(&vn->sock_lock);
 	if (!IS_ERR(nvs))
 		vxlan_vs_add_dev(nvs, vxlan);
@@ -2711,6 +2689,17 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 	if (data[IFLA_VXLAN_PORT])
 		vxlan->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
 
+	if (data[IFLA_VXLAN_UDP_CSUM] && nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
+		vxlan->flags |= VXLAN_F_UDP_CSUM;
+
+	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
+	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
+		vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
+
+	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] &&
+	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
+		vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
+
 	if (vxlan_find_vni(net, vni, vxlan->dst_port)) {
 		pr_info("duplicate VNI %u\n", vni);
 		return -EEXIST;
@@ -2774,7 +2763,10 @@ static size_t vxlan_get_size(const struct net_device *dev)
 		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_AGEING */
 		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_LIMIT */
 		nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
-		nla_total_size(sizeof(__be16))+ /* IFLA_VXLAN_PORT */
+		nla_total_size(sizeof(__be16)) + /* IFLA_VXLAN_PORT */
+		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
+		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */
+		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
 		0;
 }
 
@@ -2834,7 +2826,13 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			!!(vxlan->flags & VXLAN_F_L3MISS)) ||
 	    nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) ||
 	    nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax) ||
-	    nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->dst_port))
+	    nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->dst_port) ||
+	    nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
+			!!(vxlan->flags & VXLAN_F_UDP_CSUM)) ||
+	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
+	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)))
 		goto nla_put_failure;
 
 	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 7bb4084b1bd0..12196ce661d9 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -24,9 +24,19 @@ struct vxlan_sock {
 	struct udp_offload udp_offloads;
 };
 
+#define VXLAN_F_LEARN			0x01
+#define VXLAN_F_PROXY			0x02
+#define VXLAN_F_RSC			0x04
+#define VXLAN_F_L2MISS			0x08
+#define VXLAN_F_L3MISS			0x10
+#define VXLAN_F_IPV6			0x20
+#define VXLAN_F_UDP_CSUM		0x40
+#define VXLAN_F_UDP_ZERO_CSUM6_TX	0x80
+#define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
+
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
-				  bool no_share, bool ipv6);
+				  bool no_share, u32 flags);
 
 void vxlan_sock_release(struct vxlan_sock *vs);
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 622e7910b8cc..b38534895db5 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -319,6 +319,9 @@ enum {
 	IFLA_VXLAN_PORT,	/* destination port */
 	IFLA_VXLAN_GROUP6,
 	IFLA_VXLAN_LOCAL6,
+	IFLA_VXLAN_UDP_CSUM,
+	IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+	IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index a93efa3f64c3..0edbd95c60e7 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -122,7 +122,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
 	vxlan_port = vxlan_vport(vport);
 	strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
 
-	vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, false);
+	vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
 	if (IS_ERR(vs)) {
 		ovs_vport_free(vport);
 		return (void *)vs;
-- 
cgit v1.2.3-71-gd317


From ca05e3a7551b71891d42d637d3a1e443c6bbd781 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Sun, 1 Jun 2014 23:47:24 +0200
Subject: isdn/capi: move capi_info2str to capidrv.c

capi_info2str() is apparently meant to be of general utility. It is
actually only used in capidrv.c. So move it from capiutil.c to
capidrv.c and (obviously) stop exporting it.

And, since we're touching this, merge the two versions of this
function.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Tilman Schmidt <tilman@imap.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/capi/capidrv.c   | 195 ++++++++++++++++++++++++++++++++++++++++
 drivers/isdn/capi/capiutil.c  | 200 ------------------------------------------
 include/linux/isdn/capiutil.h |   5 --
 3 files changed, 195 insertions(+), 205 deletions(-)

(limited to 'include')

diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c
index cc9f1927a322..70e67f6a22ff 100644
--- a/drivers/isdn/capi/capidrv.c
+++ b/drivers/isdn/capi/capidrv.c
@@ -763,6 +763,201 @@ static inline int new_bchan(capidrv_contr *card)
 }
 
 /* ------------------------------------------------------------------- */
+static char *capi_info2str(u16 reason)
+{
+#ifndef CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON
+	return "..";
+#else
+	switch (reason) {
+
+/*-- informative values (corresponding message was processed) -----*/
+	case 0x0001:
+		return "NCPI not supported by current protocol, NCPI ignored";
+	case 0x0002:
+		return "Flags not supported by current protocol, flags ignored";
+	case 0x0003:
+		return "Alert already sent by another application";
+
+/*-- error information concerning CAPI_REGISTER -----*/
+	case 0x1001:
+		return "Too many applications";
+	case 0x1002:
+		return "Logical block size too small, must be at least 128 Bytes";
+	case 0x1003:
+		return "Buffer exceeds 64 kByte";
+	case 0x1004:
+		return "Message buffer size too small, must be at least 1024 Bytes";
+	case 0x1005:
+		return "Max. number of logical connections not supported";
+	case 0x1006:
+		return "Reserved";
+	case 0x1007:
+		return "The message could not be accepted because of an internal busy condition";
+	case 0x1008:
+		return "OS resource error (no memory ?)";
+	case 0x1009:
+		return "CAPI not installed";
+	case 0x100A:
+		return "Controller does not support external equipment";
+	case 0x100B:
+		return "Controller does only support external equipment";
+
+/*-- error information concerning message exchange functions -----*/
+	case 0x1101:
+		return "Illegal application number";
+	case 0x1102:
+		return "Illegal command or subcommand or message length less than 12 bytes";
+	case 0x1103:
+		return "The message could not be accepted because of a queue full condition !! The error code does not imply that CAPI cannot receive messages directed to another controller, PLCI or NCCI";
+	case 0x1104:
+		return "Queue is empty";
+	case 0x1105:
+		return "Queue overflow, a message was lost !! This indicates a configuration error. The only recovery from this error is to perform a CAPI_RELEASE";
+	case 0x1106:
+		return "Unknown notification parameter";
+	case 0x1107:
+		return "The Message could not be accepted because of an internal busy condition";
+	case 0x1108:
+		return "OS Resource error (no memory ?)";
+	case 0x1109:
+		return "CAPI not installed";
+	case 0x110A:
+		return "Controller does not support external equipment";
+	case 0x110B:
+		return "Controller does only support external equipment";
+
+/*-- error information concerning resource / coding problems -----*/
+	case 0x2001:
+		return "Message not supported in current state";
+	case 0x2002:
+		return "Illegal Controller / PLCI / NCCI";
+	case 0x2003:
+		return "Out of PLCI";
+	case 0x2004:
+		return "Out of NCCI";
+	case 0x2005:
+		return "Out of LISTEN";
+	case 0x2006:
+		return "Out of FAX resources (protocol T.30)";
+	case 0x2007:
+		return "Illegal message parameter coding";
+
+/*-- error information concerning requested services  -----*/
+	case 0x3001:
+		return "B1 protocol not supported";
+	case 0x3002:
+		return "B2 protocol not supported";
+	case 0x3003:
+		return "B3 protocol not supported";
+	case 0x3004:
+		return "B1 protocol parameter not supported";
+	case 0x3005:
+		return "B2 protocol parameter not supported";
+	case 0x3006:
+		return "B3 protocol parameter not supported";
+	case 0x3007:
+		return "B protocol combination not supported";
+	case 0x3008:
+		return "NCPI not supported";
+	case 0x3009:
+		return "CIP Value unknown";
+	case 0x300A:
+		return "Flags not supported (reserved bits)";
+	case 0x300B:
+		return "Facility not supported";
+	case 0x300C:
+		return "Data length not supported by current protocol";
+	case 0x300D:
+		return "Reset procedure not supported by current protocol";
+
+/*-- informations about the clearing of a physical connection -----*/
+	case 0x3301:
+		return "Protocol error layer 1 (broken line or B-channel removed by signalling protocol)";
+	case 0x3302:
+		return "Protocol error layer 2";
+	case 0x3303:
+		return "Protocol error layer 3";
+	case 0x3304:
+		return "Another application got that call";
+/*-- T.30 specific reasons -----*/
+	case 0x3311:
+		return "Connecting not successful (remote station is no FAX G3 machine)";
+	case 0x3312:
+		return "Connecting not successful (training error)";
+	case 0x3313:
+		return "Disconnected before transfer (remote station does not support transfer mode, e.g. resolution)";
+	case 0x3314:
+		return "Disconnected during transfer (remote abort)";
+	case 0x3315:
+		return "Disconnected during transfer (remote procedure error, e.g. unsuccessful repetition of T.30 commands)";
+	case 0x3316:
+		return "Disconnected during transfer (local tx data underrun)";
+	case 0x3317:
+		return "Disconnected during transfer (local rx data overflow)";
+	case 0x3318:
+		return "Disconnected during transfer (local abort)";
+	case 0x3319:
+		return "Illegal parameter coding (e.g. SFF coding error)";
+
+/*-- disconnect causes from the network according to ETS 300 102-1/Q.931 -----*/
+	case 0x3481: return "Unallocated (unassigned) number";
+	case 0x3482: return "No route to specified transit network";
+	case 0x3483: return "No route to destination";
+	case 0x3486: return "Channel unacceptable";
+	case 0x3487:
+		return "Call awarded and being delivered in an established channel";
+	case 0x3490: return "Normal call clearing";
+	case 0x3491: return "User busy";
+	case 0x3492: return "No user responding";
+	case 0x3493: return "No answer from user (user alerted)";
+	case 0x3495: return "Call rejected";
+	case 0x3496: return "Number changed";
+	case 0x349A: return "Non-selected user clearing";
+	case 0x349B: return "Destination out of order";
+	case 0x349C: return "Invalid number format";
+	case 0x349D: return "Facility rejected";
+	case 0x349E: return "Response to STATUS ENQUIRY";
+	case 0x349F: return "Normal, unspecified";
+	case 0x34A2: return "No circuit / channel available";
+	case 0x34A6: return "Network out of order";
+	case 0x34A9: return "Temporary failure";
+	case 0x34AA: return "Switching equipment congestion";
+	case 0x34AB: return "Access information discarded";
+	case 0x34AC: return "Requested circuit / channel not available";
+	case 0x34AF: return "Resources unavailable, unspecified";
+	case 0x34B1: return "Quality of service unavailable";
+	case 0x34B2: return "Requested facility not subscribed";
+	case 0x34B9: return "Bearer capability not authorized";
+	case 0x34BA: return "Bearer capability not presently available";
+	case 0x34BF: return "Service or option not available, unspecified";
+	case 0x34C1: return "Bearer capability not implemented";
+	case 0x34C2: return "Channel type not implemented";
+	case 0x34C5: return "Requested facility not implemented";
+	case 0x34C6: return "Only restricted digital information bearer capability is available";
+	case 0x34CF: return "Service or option not implemented, unspecified";
+	case 0x34D1: return "Invalid call reference value";
+	case 0x34D2: return "Identified channel does not exist";
+	case 0x34D3: return "A suspended call exists, but this call identity does not";
+	case 0x34D4: return "Call identity in use";
+	case 0x34D5: return "No call suspended";
+	case 0x34D6: return "Call having the requested call identity has been cleared";
+	case 0x34D8: return "Incompatible destination";
+	case 0x34DB: return "Invalid transit network selection";
+	case 0x34DF: return "Invalid message, unspecified";
+	case 0x34E0: return "Mandatory information element is missing";
+	case 0x34E1: return "Message type non-existent or not implemented";
+	case 0x34E2: return "Message not compatible with call state or message type non-existent or not implemented";
+	case 0x34E3: return "Information element non-existent or not implemented";
+	case 0x34E4: return "Invalid information element contents";
+	case 0x34E5: return "Message not compatible with call state";
+	case 0x34E6: return "Recovery on timer expiry";
+	case 0x34EF: return "Protocol error, unspecified";
+	case 0x34FF: return "Interworking, unspecified";
+
+	default: return "No additional information";
+	}
+#endif
+}
 
 static void handle_controller(_cmsg *cmsg)
 {
diff --git a/drivers/isdn/capi/capiutil.c b/drivers/isdn/capi/capiutil.c
index d26f17033b68..6e797e502cfa 100644
--- a/drivers/isdn/capi/capiutil.c
+++ b/drivers/isdn/capi/capiutil.c
@@ -22,205 +22,6 @@
 
 /* from CAPI2.0 DDK AVM Berlin GmbH */
 
-#ifndef CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON
-char *capi_info2str(u16 reason)
-{
-	return "..";
-}
-#else
-char *capi_info2str(u16 reason)
-{
-	switch (reason) {
-
-/*-- informative values (corresponding message was processed) -----*/
-	case 0x0001:
-		return "NCPI not supported by current protocol, NCPI ignored";
-	case 0x0002:
-		return "Flags not supported by current protocol, flags ignored";
-	case 0x0003:
-		return "Alert already sent by another application";
-
-/*-- error information concerning CAPI_REGISTER -----*/
-	case 0x1001:
-		return "Too many applications";
-	case 0x1002:
-		return "Logical block size too small, must be at least 128 Bytes";
-	case 0x1003:
-		return "Buffer exceeds 64 kByte";
-	case 0x1004:
-		return "Message buffer size too small, must be at least 1024 Bytes";
-	case 0x1005:
-		return "Max. number of logical connections not supported";
-	case 0x1006:
-		return "Reserved";
-	case 0x1007:
-		return "The message could not be accepted because of an internal busy condition";
-	case 0x1008:
-		return "OS resource error (no memory ?)";
-	case 0x1009:
-		return "CAPI not installed";
-	case 0x100A:
-		return "Controller does not support external equipment";
-	case 0x100B:
-		return "Controller does only support external equipment";
-
-/*-- error information concerning message exchange functions -----*/
-	case 0x1101:
-		return "Illegal application number";
-	case 0x1102:
-		return "Illegal command or subcommand or message length less than 12 bytes";
-	case 0x1103:
-		return "The message could not be accepted because of a queue full condition !! The error code does not imply that CAPI cannot receive messages directed to another controller, PLCI or NCCI";
-	case 0x1104:
-		return "Queue is empty";
-	case 0x1105:
-		return "Queue overflow, a message was lost !! This indicates a configuration error. The only recovery from this error is to perform a CAPI_RELEASE";
-	case 0x1106:
-		return "Unknown notification parameter";
-	case 0x1107:
-		return "The Message could not be accepted because of an internal busy condition";
-	case 0x1108:
-		return "OS Resource error (no memory ?)";
-	case 0x1109:
-		return "CAPI not installed";
-	case 0x110A:
-		return "Controller does not support external equipment";
-	case 0x110B:
-		return "Controller does only support external equipment";
-
-/*-- error information concerning resource / coding problems -----*/
-	case 0x2001:
-		return "Message not supported in current state";
-	case 0x2002:
-		return "Illegal Controller / PLCI / NCCI";
-	case 0x2003:
-		return "Out of PLCI";
-	case 0x2004:
-		return "Out of NCCI";
-	case 0x2005:
-		return "Out of LISTEN";
-	case 0x2006:
-		return "Out of FAX resources (protocol T.30)";
-	case 0x2007:
-		return "Illegal message parameter coding";
-
-/*-- error information concerning requested services  -----*/
-	case 0x3001:
-		return "B1 protocol not supported";
-	case 0x3002:
-		return "B2 protocol not supported";
-	case 0x3003:
-		return "B3 protocol not supported";
-	case 0x3004:
-		return "B1 protocol parameter not supported";
-	case 0x3005:
-		return "B2 protocol parameter not supported";
-	case 0x3006:
-		return "B3 protocol parameter not supported";
-	case 0x3007:
-		return "B protocol combination not supported";
-	case 0x3008:
-		return "NCPI not supported";
-	case 0x3009:
-		return "CIP Value unknown";
-	case 0x300A:
-		return "Flags not supported (reserved bits)";
-	case 0x300B:
-		return "Facility not supported";
-	case 0x300C:
-		return "Data length not supported by current protocol";
-	case 0x300D:
-		return "Reset procedure not supported by current protocol";
-
-/*-- informations about the clearing of a physical connection -----*/
-	case 0x3301:
-		return "Protocol error layer 1 (broken line or B-channel removed by signalling protocol)";
-	case 0x3302:
-		return "Protocol error layer 2";
-	case 0x3303:
-		return "Protocol error layer 3";
-	case 0x3304:
-		return "Another application got that call";
-/*-- T.30 specific reasons -----*/
-	case 0x3311:
-		return "Connecting not successful (remote station is no FAX G3 machine)";
-	case 0x3312:
-		return "Connecting not successful (training error)";
-	case 0x3313:
-		return "Disconnected before transfer (remote station does not support transfer mode, e.g. resolution)";
-	case 0x3314:
-		return "Disconnected during transfer (remote abort)";
-	case 0x3315:
-		return "Disconnected during transfer (remote procedure error, e.g. unsuccessful repetition of T.30 commands)";
-	case 0x3316:
-		return "Disconnected during transfer (local tx data underrun)";
-	case 0x3317:
-		return "Disconnected during transfer (local rx data overflow)";
-	case 0x3318:
-		return "Disconnected during transfer (local abort)";
-	case 0x3319:
-		return "Illegal parameter coding (e.g. SFF coding error)";
-
-/*-- disconnect causes from the network according to ETS 300 102-1/Q.931 -----*/
-	case 0x3481: return "Unallocated (unassigned) number";
-	case 0x3482: return "No route to specified transit network";
-	case 0x3483: return "No route to destination";
-	case 0x3486: return "Channel unacceptable";
-	case 0x3487:
-		return "Call awarded and being delivered in an established channel";
-	case 0x3490: return "Normal call clearing";
-	case 0x3491: return "User busy";
-	case 0x3492: return "No user responding";
-	case 0x3493: return "No answer from user (user alerted)";
-	case 0x3495: return "Call rejected";
-	case 0x3496: return "Number changed";
-	case 0x349A: return "Non-selected user clearing";
-	case 0x349B: return "Destination out of order";
-	case 0x349C: return "Invalid number format";
-	case 0x349D: return "Facility rejected";
-	case 0x349E: return "Response to STATUS ENQUIRY";
-	case 0x349F: return "Normal, unspecified";
-	case 0x34A2: return "No circuit / channel available";
-	case 0x34A6: return "Network out of order";
-	case 0x34A9: return "Temporary failure";
-	case 0x34AA: return "Switching equipment congestion";
-	case 0x34AB: return "Access information discarded";
-	case 0x34AC: return "Requested circuit / channel not available";
-	case 0x34AF: return "Resources unavailable, unspecified";
-	case 0x34B1: return "Quality of service unavailable";
-	case 0x34B2: return "Requested facility not subscribed";
-	case 0x34B9: return "Bearer capability not authorized";
-	case 0x34BA: return "Bearer capability not presently available";
-	case 0x34BF: return "Service or option not available, unspecified";
-	case 0x34C1: return "Bearer capability not implemented";
-	case 0x34C2: return "Channel type not implemented";
-	case 0x34C5: return "Requested facility not implemented";
-	case 0x34C6: return "Only restricted digital information bearer capability is available";
-	case 0x34CF: return "Service or option not implemented, unspecified";
-	case 0x34D1: return "Invalid call reference value";
-	case 0x34D2: return "Identified channel does not exist";
-	case 0x34D3: return "A suspended call exists, but this call identity does not";
-	case 0x34D4: return "Call identity in use";
-	case 0x34D5: return "No call suspended";
-	case 0x34D6: return "Call having the requested call identity has been cleared";
-	case 0x34D8: return "Incompatible destination";
-	case 0x34DB: return "Invalid transit network selection";
-	case 0x34DF: return "Invalid message, unspecified";
-	case 0x34E0: return "Mandatory information element is missing";
-	case 0x34E1: return "Message type non-existent or not implemented";
-	case 0x34E2: return "Message not compatible with call state or message type non-existent or not implemented";
-	case 0x34E3: return "Information element non-existent or not implemented";
-	case 0x34E4: return "Invalid information element contents";
-	case 0x34E5: return "Message not compatible with call state";
-	case 0x34E6: return "Recovery on timer expiry";
-	case 0x34EF: return "Protocol error, unspecified";
-	case 0x34FF: return "Interworking, unspecified";
-
-	default: return "No additional information";
-	}
-}
-#endif
-
 typedef struct {
 	int typ;
 	size_t off;
@@ -1073,4 +874,3 @@ EXPORT_SYMBOL(capi_cmsg_header);
 EXPORT_SYMBOL(capi_cmd2str);
 EXPORT_SYMBOL(capi_cmsg2str);
 EXPORT_SYMBOL(capi_message2str);
-EXPORT_SYMBOL(capi_info2str);
diff --git a/include/linux/isdn/capiutil.h b/include/linux/isdn/capiutil.h
index 5a52f2c94f3f..44bd6046e6e2 100644
--- a/include/linux/isdn/capiutil.h
+++ b/include/linux/isdn/capiutil.h
@@ -164,11 +164,6 @@ unsigned capi_cmsg_header(_cmsg * cmsg, __u16 _ApplId,
 			  __u8 _Command, __u8 _Subcommand,
 			  __u16 _Messagenumber, __u32 _Controller);
 
-/*
- * capi_info2str generated a readable string for Capi2.0 reasons.
- */
-char *capi_info2str(__u16 reason);
-
 /*-----------------------------------------------------------------------*/
 
 /*
-- 
cgit v1.2.3-71-gd317


From 7daf8d54c17c6dae0c949aa37cbc6bb5604a4b2e Mon Sep 17 00:00:00 2001
From: Matt Roper <matthew.d.roper@intel.com>
Date: Thu, 29 May 2014 08:06:52 -0700
Subject: drm/plane-helper: Add drm_plane_helper_check_update() (v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pull the parameter checking from drm_primary_helper_update() out into
its own function; drivers that provide their own setplane()
implementations rather than using the helper may still want to share
this parameter checking logic.

A few of the checks here were also updated based on suggestions by
Ville Syrjälä.

v3:
 - s/primary_helper/plane_helper/ --- this checking logic may be useful
   for other types of planes as well.
 - Fix visibility check (need to dereference visibility pointer)
v2:
 - Pass src/dest/clip rects and min/max scaling down to helper to avoid
   duplication of effort between helper and drivers (suggested by
   Ville).
 - Allow caller to specify whether the primary plane should be
   updatable while the crtc is disabled.

Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Chon Ming Lee <chon.ming.lee@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Acked-by: Dave Airlie <airlied@gmail.com>
[danvet: Include header properly and fixup declaration mismatch to
make this compile.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_plane_helper.c | 125 ++++++++++++++++++++++++++++---------
 include/drm/drm_plane_helper.h     |  22 +++++++
 2 files changed, 117 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c
index 37b9333cee49..6d133149cc74 100644
--- a/drivers/gpu/drm/drm_plane_helper.c
+++ b/drivers/gpu/drm/drm_plane_helper.c
@@ -25,6 +25,7 @@
 
 #include <linux/list.h>
 #include <drm/drmP.h>
+#include <drm/drm_plane_helper.h>
 #include <drm/drm_rect.h>
 #include <drm/drm_plane_helper.h>
 
@@ -73,6 +74,79 @@ static int get_connectors_for_crtc(struct drm_crtc *crtc,
 	return count;
 }
 
+/**
+ * drm_plane_helper_check_update() - Check plane update for validity
+ * @plane: plane object to update
+ * @crtc: owning CRTC of owning plane
+ * @fb: framebuffer to flip onto plane
+ * @src: source coordinates in 16.16 fixed point
+ * @dest: integer destination coordinates
+ * @clip: integer clipping coordinates
+ * @min_scale: minimum @src:@dest scaling factor in 16.16 fixed point
+ * @max_scale: maximum @src:@dest scaling factor in 16.16 fixed point
+ * @can_position: is it legal to position the plane such that it
+ *                doesn't cover the entire crtc?  This will generally
+ *                only be false for primary planes.
+ * @can_update_disabled: can the plane be updated while the crtc
+ *                       is disabled?
+ * @visible: output parameter indicating whether plane is still visible after
+ *           clipping
+ *
+ * Checks that a desired plane update is valid.  Drivers that provide
+ * their own plane handling rather than helper-provided implementations may
+ * still wish to call this function to avoid duplication of error checking
+ * code.
+ *
+ * RETURNS:
+ * Zero if update appears valid, error code on failure
+ */
+int drm_plane_helper_check_update(struct drm_plane *plane,
+				    struct drm_crtc *crtc,
+				    struct drm_framebuffer *fb,
+				    struct drm_rect *src,
+				    struct drm_rect *dest,
+				    const struct drm_rect *clip,
+				    int min_scale,
+				    int max_scale,
+				    bool can_position,
+				    bool can_update_disabled,
+				    bool *visible)
+{
+	int hscale, vscale;
+
+	if (!crtc->enabled && !can_update_disabled) {
+		DRM_DEBUG_KMS("Cannot update plane of a disabled CRTC.\n");
+		return -EINVAL;
+	}
+
+	/* Check scaling */
+	hscale = drm_rect_calc_hscale(src, dest, min_scale, max_scale);
+	vscale = drm_rect_calc_vscale(src, dest, min_scale, max_scale);
+	if (hscale < 0 || vscale < 0) {
+		DRM_DEBUG_KMS("Invalid scaling of plane\n");
+		return -ERANGE;
+	}
+
+	*visible = drm_rect_clip_scaled(src, dest, clip, hscale, vscale);
+	if (!*visible)
+		/*
+		 * Plane isn't visible; some drivers can handle this
+		 * so we just return success here.  Drivers that can't
+		 * (including those that use the primary plane helper's
+		 * update function) will return an error from their
+		 * update_plane handler.
+		 */
+		return 0;
+
+	if (!can_position && !drm_rect_equals(dest, clip)) {
+		DRM_DEBUG_KMS("Plane must cover entire CRTC\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_plane_helper_check_update);
+
 /**
  * drm_primary_helper_update() - Helper for primary plane update
  * @plane: plane object to update
@@ -121,51 +195,42 @@ int drm_primary_helper_update(struct drm_plane *plane, struct drm_crtc *crtc,
 		.x = src_x >> 16,
 		.y = src_y >> 16,
 	};
+	struct drm_rect src = {
+		.x1 = src_x,
+		.y1 = src_y,
+		.x2 = src_x + src_w,
+		.y2 = src_y + src_h,
+	};
 	struct drm_rect dest = {
 		.x1 = crtc_x,
 		.y1 = crtc_y,
 		.x2 = crtc_x + crtc_w,
 		.y2 = crtc_y + crtc_h,
 	};
-	struct drm_rect clip = {
+	const struct drm_rect clip = {
 		.x2 = crtc->mode.hdisplay,
 		.y2 = crtc->mode.vdisplay,
 	};
 	struct drm_connector **connector_list;
 	int num_connectors, ret;
+	bool visible;
 
-	if (!crtc->enabled) {
-		DRM_DEBUG_KMS("Cannot update primary plane of a disabled CRTC.\n");
-		return -EINVAL;
-	}
-
-	/* Disallow subpixel positioning */
-	if ((src_x | src_y | src_w | src_h) & SUBPIXEL_MASK) {
-		DRM_DEBUG_KMS("Primary plane does not support subpixel positioning\n");
-		return -EINVAL;
-	}
-
-	/* Disallow scaling */
-	src_w >>= 16;
-	src_h >>= 16;
-	if (crtc_w != src_w || crtc_h != src_h) {
-		DRM_DEBUG_KMS("Can't scale primary plane\n");
-		return -EINVAL;
-	}
-
-	/* Make sure primary plane covers entire CRTC */
-	drm_rect_intersect(&dest, &clip);
-	if (dest.x1 != 0 || dest.y1 != 0 ||
-	    dest.x2 != crtc->mode.hdisplay || dest.y2 != crtc->mode.vdisplay) {
-		DRM_DEBUG_KMS("Primary plane must cover entire CRTC\n");
-		return -EINVAL;
-	}
-
-	/* Framebuffer must be big enough to cover entire plane */
-	ret = drm_crtc_check_viewport(crtc, crtc_x, crtc_y, &crtc->mode, fb);
+	ret = drm_plane_helper_check_update(plane, crtc, fb,
+					    &src, &dest, &clip,
+					    DRM_PLANE_HELPER_NO_SCALING,
+					    DRM_PLANE_HELPER_NO_SCALING,
+					    false, false, &visible);
 	if (ret)
 		return ret;
 
+	if (!visible)
+		/*
+		 * Primary plane isn't visible.  Note that unless a driver
+		 * provides their own disable function, this will just
+		 * wind up returning -EINVAL to userspace.
+		 */
+		return plane->funcs->disable_plane(plane);
+
 	/* Find current connectors for CRTC */
 	num_connectors = get_connectors_for_crtc(crtc, NULL, 0);
 	BUG_ON(num_connectors == 0);
diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h
index c5e7ab9503c8..52e6870534b2 100644
--- a/include/drm/drm_plane_helper.h
+++ b/include/drm/drm_plane_helper.h
@@ -24,6 +24,17 @@
 #ifndef DRM_PLANE_HELPER_H
 #define DRM_PLANE_HELPER_H
 
+#include <drm/drm_rect.h>
+
+/*
+ * Drivers that don't allow primary plane scaling may pass this macro in place
+ * of the min/max scale parameters of the update checker function.
+ *
+ * Due to src being in 16.16 fixed point and dest being in integer pixels,
+ * 1<<16 represents no scaling.
+ */
+#define DRM_PLANE_HELPER_NO_SCALING (1<<16)
+
 /**
  * DOC: plane helpers
  *
@@ -31,6 +42,17 @@
  * planes.
  */
 
+extern int drm_plane_helper_check_update(struct drm_plane *plane,
+					 struct drm_crtc *crtc,
+					 struct drm_framebuffer *fb,
+					 struct drm_rect *src,
+					 struct drm_rect *dest,
+					 const struct drm_rect *clip,
+					 int min_scale,
+					 int max_scale,
+					 bool can_position,
+					 bool can_update_disabled,
+					 bool *visible);
 extern int drm_primary_helper_update(struct drm_plane *plane,
 				     struct drm_crtc *crtc,
 				     struct drm_framebuffer *fb,
-- 
cgit v1.2.3-71-gd317


From 4fc828e24cd9c385d3a44e1b499ec7fc70239d8a Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr@hp.com>
Date: Fri, 2 May 2014 11:24:15 -0700
Subject: locking/rwsem: Support optimistic spinning

We have reached the point where our mutexes are quite fine tuned
for a number of situations. This includes the use of heuristics
and optimistic spinning, based on MCS locking techniques.

Exclusive ownership of read-write semaphores are, conceptually,
just about the same as mutexes, making them close cousins. To
this end we need to make them both perform similarly, and
right now, rwsems are simply not up to it. This was discovered
by both reverting commit 4fc3f1d6 (mm/rmap, migration: Make
rmap_walk_anon() and try_to_unmap_anon() more scalable) and
similarly, converting some other mutexes (ie: i_mmap_mutex) to
rwsems. This creates a situation where users have to choose
between a rwsem and mutex taking into account this important
performance difference. Specifically, biggest difference between
both locks is when we fail to acquire a mutex in the fastpath,
optimistic spinning comes in to play and we can avoid a large
amount of unnecessary sleeping and overhead of moving tasks in
and out of wait queue. Rwsems do not have such logic.

This patch, based on the work from Tim Chen and I, adds support
for write-side optimistic spinning when the lock is contended.
It also includes support for the recently added cancelable MCS
locking for adaptive spinning. Note that is is only applicable
to the xadd method, and the spinlock rwsem variant remains intact.

Allowing optimistic spinning before putting the writer on the wait
queue reduces wait queue contention and provided greater chance
for the rwsem to get acquired. With these changes, rwsem is on par
with mutex. The performance benefits can be seen on a number of
workloads. For instance, on a 8 socket, 80 core 64bit Westmere box,
aim7 shows the following improvements in throughput:

 +--------------+---------------------+-----------------+
 |   Workload   | throughput-increase | number of users |
 +--------------+---------------------+-----------------+
 | alltests     | 20%                 | >1000           |
 | custom       | 27%, 60%            | 10-100, >1000   |
 | high_systime | 36%, 30%            | >100, >1000     |
 | shared       | 58%, 29%            | 10-100, >1000   |
 +--------------+---------------------+-----------------+

There was also improvement on smaller systems, such as a quad-core
x86-64 laptop running a 30Gb PostgreSQL (pgbench) workload for up
to +60% in throughput for over 50 clients. Additionally, benefits
were also noticed in exim (mail server) workloads. Furthermore, no
performance regression have been seen at all.

Based-on-work-from: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
[peterz: rej fixup due to comment patches, sched/rt.h header]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Alex Shi <alex.shi@linaro.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: "Paul E.McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Scott J Norton" <scott.norton@hp.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <jbacik@fusionio.com>
Link: http://lkml.kernel.org/r/1399055055.6275.15.camel@buesod1.americas.hpqcorp.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rwsem.h       |  25 ++++-
 kernel/locking/rwsem-xadd.c | 225 ++++++++++++++++++++++++++++++++++++++------
 kernel/locking/rwsem.c      |  31 +++++-
 3 files changed, 248 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 03f3b05e8ec1..3e108f154cb6 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -16,6 +16,7 @@
 
 #include <linux/atomic.h>
 
+struct optimistic_spin_queue;
 struct rw_semaphore;
 
 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
@@ -23,9 +24,17 @@ struct rw_semaphore;
 #else
 /* All arch specific implementations share the same struct */
 struct rw_semaphore {
-	long			count;
-	raw_spinlock_t		wait_lock;
-	struct list_head	wait_list;
+	long count;
+	raw_spinlock_t wait_lock;
+	struct list_head wait_list;
+#ifdef CONFIG_SMP
+	/*
+	 * Write owner. Used as a speculative check to see
+	 * if the owner is running on the cpu.
+	 */
+	struct task_struct *owner;
+	struct optimistic_spin_queue *osq; /* spinner MCS lock */
+#endif
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	dep_map;
 #endif
@@ -55,11 +64,21 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 # define __RWSEM_DEP_MAP_INIT(lockname)
 #endif
 
+#ifdef CONFIG_SMP
+#define __RWSEM_INITIALIZER(name)			\
+	{ RWSEM_UNLOCKED_VALUE,				\
+	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
+	  LIST_HEAD_INIT((name).wait_list),		\
+	  NULL, /* owner */				\
+	  NULL /* mcs lock */                           \
+	  __RWSEM_DEP_MAP_INIT(name) }
+#else
 #define __RWSEM_INITIALIZER(name)			\
 	{ RWSEM_UNLOCKED_VALUE,				\
 	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
 	  LIST_HEAD_INIT((name).wait_list)		\
 	  __RWSEM_DEP_MAP_INIT(name) }
+#endif
 
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index b4219ff87b8c..4a75278142cd 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -5,11 +5,17 @@
  *
  * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
  * and Michel Lespinasse <walken@google.com>
+ *
+ * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
+ * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
  */
 #include <linux/rwsem.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/export.h>
+#include <linux/sched/rt.h>
+
+#include "mcs_spinlock.h"
 
 /*
  * Guide to the rw_semaphore's count field for common values.
@@ -76,6 +82,10 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	sem->count = RWSEM_UNLOCKED_VALUE;
 	raw_spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
+#ifdef CONFIG_SMP
+	sem->owner = NULL;
+	sem->osq = NULL;
+#endif
 }
 
 EXPORT_SYMBOL(__init_rwsem);
@@ -190,7 +200,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
 }
 
 /*
- * wait for the read lock to be granted
+ * Wait for the read lock to be granted
  */
 __visible
 struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
@@ -237,64 +247,221 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	return sem;
 }
 
+static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
+{
+	if (!(count & RWSEM_ACTIVE_MASK)) {
+		/* try acquiring the write lock */
+		if (sem->count == RWSEM_WAITING_BIAS &&
+		    cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
+			    RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
+			if (!list_is_singular(&sem->wait_list))
+				rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+			return true;
+		}
+	}
+	return false;
+}
+
+#ifdef CONFIG_SMP
 /*
- * wait until we successfully acquire the write lock
+ * Try to acquire write lock before the writer has been put on wait queue.
+ */
+static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
+{
+	long old, count = ACCESS_ONCE(sem->count);
+
+	while (true) {
+		if (!(count == 0 || count == RWSEM_WAITING_BIAS))
+			return false;
+
+		old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
+		if (old == count)
+			return true;
+
+		count = old;
+	}
+}
+
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
+{
+	struct task_struct *owner;
+	bool on_cpu = true;
+
+	if (need_resched())
+		return 0;
+
+	rcu_read_lock();
+	owner = ACCESS_ONCE(sem->owner);
+	if (owner)
+		on_cpu = owner->on_cpu;
+	rcu_read_unlock();
+
+	/*
+	 * If sem->owner is not set, the rwsem owner may have
+	 * just acquired it and not set the owner yet or the rwsem
+	 * has been released.
+	 */
+	return on_cpu;
+}
+
+static inline bool owner_running(struct rw_semaphore *sem,
+				 struct task_struct *owner)
+{
+	if (sem->owner != owner)
+		return false;
+
+	/*
+	 * Ensure we emit the owner->on_cpu, dereference _after_ checking
+	 * sem->owner still matches owner, if that fails, owner might
+	 * point to free()d memory, if it still matches, the rcu_read_lock()
+	 * ensures the memory stays valid.
+	 */
+	barrier();
+
+	return owner->on_cpu;
+}
+
+static noinline
+bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
+{
+	rcu_read_lock();
+	while (owner_running(sem, owner)) {
+		if (need_resched())
+			break;
+
+		arch_mutex_cpu_relax();
+	}
+	rcu_read_unlock();
+
+	/*
+	 * We break out the loop above on need_resched() or when the
+	 * owner changed, which is a sign for heavy contention. Return
+	 * success only when sem->owner is NULL.
+	 */
+	return sem->owner == NULL;
+}
+
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
+{
+	struct task_struct *owner;
+	bool taken = false;
+
+	preempt_disable();
+
+	/* sem->wait_lock should not be held when doing optimistic spinning */
+	if (!rwsem_can_spin_on_owner(sem))
+		goto done;
+
+	if (!osq_lock(&sem->osq))
+		goto done;
+
+	while (true) {
+		owner = ACCESS_ONCE(sem->owner);
+		if (owner && !rwsem_spin_on_owner(sem, owner))
+			break;
+
+		/* wait_lock will be acquired if write_lock is obtained */
+		if (rwsem_try_write_lock_unqueued(sem)) {
+			taken = true;
+			break;
+		}
+
+		/*
+		 * When there's no owner, we might have preempted between the
+		 * owner acquiring the lock and setting the owner field. If
+		 * we're an RT task that will live-lock because we won't let
+		 * the owner complete.
+		 */
+		if (!owner && (need_resched() || rt_task(current)))
+			break;
+
+		/*
+		 * The cpu_relax() call is a compiler barrier which forces
+		 * everything in this loop to be re-loaded. We don't need
+		 * memory barriers as we'll eventually observe the right
+		 * values at the cost of a few extra spins.
+		 */
+		arch_mutex_cpu_relax();
+	}
+	osq_unlock(&sem->osq);
+done:
+	preempt_enable();
+	return taken;
+}
+
+#else
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
+{
+	return false;
+}
+#endif
+
+/*
+ * Wait until we successfully acquire the write lock
  */
 __visible
 struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
 {
-	long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS;
+	long count;
+	bool waiting = true; /* any queued threads before us */
 	struct rwsem_waiter waiter;
-	struct task_struct *tsk = current;
 
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
+	/* undo write bias from down_write operation, stop active locking */
+	count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
+
+	/* do optimistic spinning and steal lock if possible */
+	if (rwsem_optimistic_spin(sem))
+		return sem;
+
+	/*
+	 * Optimistic spinning failed, proceed to the slowpath
+	 * and block until we can acquire the sem.
+	 */
+	waiter.task = current;
 	waiter.type = RWSEM_WAITING_FOR_WRITE;
 
 	raw_spin_lock_irq(&sem->wait_lock);
+
+	/* account for this before adding a new element to the list */
 	if (list_empty(&sem->wait_list))
-		adjustment += RWSEM_WAITING_BIAS;
+		waiting = false;
+
 	list_add_tail(&waiter.list, &sem->wait_list);
 
 	/* we're now waiting on the lock, but no longer actively locking */
-	count = rwsem_atomic_update(adjustment, sem);
+	if (waiting) {
+		count = ACCESS_ONCE(sem->count);
 
-	/* If there were already threads queued before us and there are no
-	 * active writers, the lock must be read owned; so we try to wake
-	 * any read locks that were queued ahead of us. */
-	if (count > RWSEM_WAITING_BIAS &&
-	    adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
-		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+		/*
+		 * If there were already threads queued before us and there are no
+		 * active writers, the lock must be read owned; so we try to wake
+		 * any read locks that were queued ahead of us.
+		 */
+		if (count > RWSEM_WAITING_BIAS)
+			sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+
+	} else
+		count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
 
 	/* wait until we successfully acquire the lock */
-	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+	set_current_state(TASK_UNINTERRUPTIBLE);
 	while (true) {
-		if (!(count & RWSEM_ACTIVE_MASK)) {
-			/* Try acquiring the write lock. */
-			count = RWSEM_ACTIVE_WRITE_BIAS;
-			if (!list_is_singular(&sem->wait_list))
-				count += RWSEM_WAITING_BIAS;
-
-			if (sem->count == RWSEM_WAITING_BIAS &&
-			    cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) ==
-							RWSEM_WAITING_BIAS)
-				break;
-		}
-
+		if (rwsem_try_write_lock(count, sem))
+			break;
 		raw_spin_unlock_irq(&sem->wait_lock);
 
 		/* Block until there are no active lockers. */
 		do {
 			schedule();
-			set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+			set_current_state(TASK_UNINTERRUPTIBLE);
 		} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
 
 		raw_spin_lock_irq(&sem->wait_lock);
 	}
+	__set_current_state(TASK_RUNNING);
 
 	list_del(&waiter.list);
 	raw_spin_unlock_irq(&sem->wait_lock);
-	tsk->state = TASK_RUNNING;
 
 	return sem;
 }
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index cfff1435bdfb..42f806de49d4 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -12,6 +12,27 @@
 
 #include <linux/atomic.h>
 
+#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+	sem->owner = current;
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+	sem->owner = NULL;
+}
+
+#else
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+}
+#endif
+
 /*
  * lock for reading
  */
@@ -48,6 +69,7 @@ void __sched down_write(struct rw_semaphore *sem)
 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(down_write);
@@ -59,8 +81,11 @@ int down_write_trylock(struct rw_semaphore *sem)
 {
 	int ret = __down_write_trylock(sem);
 
-	if (ret == 1)
+	if (ret == 1) {
 		rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
+		rwsem_set_owner(sem);
+	}
+
 	return ret;
 }
 
@@ -85,6 +110,7 @@ void up_write(struct rw_semaphore *sem)
 {
 	rwsem_release(&sem->dep_map, 1, _RET_IP_);
 
+	rwsem_clear_owner(sem);
 	__up_write(sem);
 }
 
@@ -99,6 +125,7 @@ void downgrade_write(struct rw_semaphore *sem)
 	 * lockdep: a downgraded write will live on as a write
 	 * dependency.
 	 */
+	rwsem_clear_owner(sem);
 	__downgrade_write(sem);
 }
 
@@ -122,6 +149,7 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
 	rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(_down_write_nest_lock);
@@ -141,6 +169,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
 	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(down_write_nested);
-- 
cgit v1.2.3-71-gd317


From dbb5eafa23fdb7a00d64b04e23a3efc59d6474cb Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr@hp.com>
Date: Mon, 19 May 2014 17:27:57 -0700
Subject: locking/rwsem: Fix warnings for CONFIG_RWSEM_GENERIC_SPINLOCK

Optimistic spinning is only used by the xadd variant
of rw-semaphores. Make sure that we use the old version
of the __RWSEM_INITIALIZER macro for systems that rely
on the spinlock one, otherwise warnings can be triggered,
such as the following reported on an arm box:

  ipc/ipcns_notifier.c:22:8: warning: excess elements in struct initializer [enabled by default]
  ipc/ipcns_notifier.c:22:8: warning: (near initialization for 'ipcns_chain.rwsem') [enabled by default]
  ipc/ipcns_notifier.c:22:8: warning: excess elements in struct initializer [enabled by default]
  ipc/ipcns_notifier.c:22:8: warning: (near initialization for 'ipcns_chain.rwsem') [enabled by default]

Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: Alex Shi <alex.shi@linaro.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <jbacik@fusionio.com>
Link: http://lkml.kernel.org/r/1400545677.6399.10.camel@buesod1.americas.hpqcorp.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rwsem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 3e108f154cb6..8d79708146aa 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -64,7 +64,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 # define __RWSEM_DEP_MAP_INIT(lockname)
 #endif
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && !defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
 #define __RWSEM_INITIALIZER(name)			\
 	{ RWSEM_UNLOCKED_VALUE,				\
 	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
-- 
cgit v1.2.3-71-gd317


From fa93384f40deeb294fd29f2fdcadbd0ebc2dedf1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 23 May 2014 13:20:42 +0300
Subject: sched: Fix signedness bug in yield_to()

yield_to() is supposed to return -ESRCH if there is no task to
yield to, but because the type is bool that is the same as returning
true.

The only place I see which cares is kvm_vcpu_on_spin().

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Raghavendra <raghavendra.kt@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: kvm@vger.kernel.org
Link: http://lkml.kernel.org/r/20140523102042.GA7267@mwanda
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kvm_host.h | 2 +-
 include/linux/sched.h    | 2 +-
 kernel/sched/core.c      | 2 +-
 virt/kvm/kvm_main.c      | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7d21cf9f4380..3c4bcf146159 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -584,7 +584,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
-bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
+int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0f91d00efd87..6790c3b42072 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2180,7 +2180,7 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 #endif
 
-extern bool yield_to(struct task_struct *p, bool preempt);
+extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
 /**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 321d800e4baa..afcc84234a3e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4195,7 +4195,7 @@ EXPORT_SYMBOL(yield);
  *	false (0) if we failed to boost the target.
  *	-ESRCH if there's no task to yield to.
  */
-bool __sched yield_to(struct task_struct *p, bool preempt)
+int __sched yield_to(struct task_struct *p, bool preempt)
 {
 	struct task_struct *curr = current;
 	struct rq *rq, *p_rq;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 56baae8c2f56..86d1c457458d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1708,11 +1708,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
 #endif /* !CONFIG_S390 */
 
-bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
+int kvm_vcpu_yield_to(struct kvm_vcpu *target)
 {
 	struct pid *pid;
 	struct task_struct *task = NULL;
-	bool ret = false;
+	int ret = 0;
 
 	rcu_read_lock();
 	pid = rcu_dereference(target->pid);
-- 
cgit v1.2.3-71-gd317


From 63b2ca30bdb3dbf60bc7ac5f46713c0d32308261 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Mon, 26 May 2014 18:19:37 -0400
Subject: sched: Let 'struct sched_group_power' care about CPU capacity

It is better not to think about compute capacity as being equivalent
to "CPU power".  The upcoming "power aware" scheduler work may create
confusion with the notion of energy consumption if "power" is used too
liberally.

Since struct sched_group_power is really about compute capacity of sched
groups, let's rename it to struct sched_group_capacity. Similarly sgp
becomes sgc. Related variables and functions dealing with groups are also
adjusted accordingly.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: linaro-kernel@lists.linaro.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/n/tip-5yeix833vvgf2uyj5o36hpu9@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |   2 +-
 kernel/sched/core.c   |  81 +++++++++++++++----------------
 kernel/sched/fair.c   | 131 +++++++++++++++++++++++++-------------------------
 kernel/sched/sched.h  |  16 +++---
 4 files changed, 115 insertions(+), 115 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6790c3b42072..a96f03598c61 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1013,7 +1013,7 @@ typedef const int (*sched_domain_flags_f)(void);
 struct sd_data {
 	struct sched_domain **__percpu sd;
 	struct sched_group **__percpu sg;
-	struct sched_group_power **__percpu sgp;
+	struct sched_group_capacity **__percpu sgc;
 };
 
 struct sched_domain_topology_level {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index afcc84234a3e..2e1fb0902200 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5221,14 +5221,13 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 		}
 
 		/*
-		 * Even though we initialize ->power to something semi-sane,
-		 * we leave power_orig unset. This allows us to detect if
+		 * Even though we initialize ->capacity to something semi-sane,
+		 * we leave capacity_orig unset. This allows us to detect if
 		 * domain iteration is still funny without causing /0 traps.
 		 */
-		if (!group->sgp->power_orig) {
+		if (!group->sgc->capacity_orig) {
 			printk(KERN_CONT "\n");
-			printk(KERN_ERR "ERROR: domain->cpu_power not "
-					"set\n");
+			printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
 			break;
 		}
 
@@ -5250,9 +5249,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 
 		printk(KERN_CONT " %s", str);
-		if (group->sgp->power != SCHED_POWER_SCALE) {
-			printk(KERN_CONT " (cpu_power = %d)",
-				group->sgp->power);
+		if (group->sgc->capacity != SCHED_POWER_SCALE) {
+			printk(KERN_CONT " (cpu_capacity = %d)",
+				group->sgc->capacity);
 		}
 
 		group = group->next;
@@ -5466,7 +5465,7 @@ static struct root_domain *alloc_rootdomain(void)
 	return rd;
 }
 
-static void free_sched_groups(struct sched_group *sg, int free_sgp)
+static void free_sched_groups(struct sched_group *sg, int free_sgc)
 {
 	struct sched_group *tmp, *first;
 
@@ -5477,8 +5476,8 @@ static void free_sched_groups(struct sched_group *sg, int free_sgp)
 	do {
 		tmp = sg->next;
 
-		if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
-			kfree(sg->sgp);
+		if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))
+			kfree(sg->sgc);
 
 		kfree(sg);
 		sg = tmp;
@@ -5496,7 +5495,7 @@ static void free_sched_domain(struct rcu_head *rcu)
 	if (sd->flags & SD_OVERLAP) {
 		free_sched_groups(sd->groups, 1);
 	} else if (atomic_dec_and_test(&sd->groups->ref)) {
-		kfree(sd->groups->sgp);
+		kfree(sd->groups->sgc);
 		kfree(sd->groups);
 	}
 	kfree(sd);
@@ -5707,17 +5706,17 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 
 		cpumask_or(covered, covered, sg_span);
 
-		sg->sgp = *per_cpu_ptr(sdd->sgp, i);
-		if (atomic_inc_return(&sg->sgp->ref) == 1)
+		sg->sgc = *per_cpu_ptr(sdd->sgc, i);
+		if (atomic_inc_return(&sg->sgc->ref) == 1)
 			build_group_mask(sd, sg);
 
 		/*
-		 * Initialize sgp->power such that even if we mess up the
+		 * Initialize sgc->capacity such that even if we mess up the
 		 * domains and no possible iteration will get us here, we won't
 		 * die on a /0 trap.
 		 */
-		sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span);
-		sg->sgp->power_orig = sg->sgp->power;
+		sg->sgc->capacity = SCHED_POWER_SCALE * cpumask_weight(sg_span);
+		sg->sgc->capacity_orig = sg->sgc->capacity;
 
 		/*
 		 * Make sure the first group of this domain contains the
@@ -5755,8 +5754,8 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
 
 	if (sg) {
 		*sg = *per_cpu_ptr(sdd->sg, cpu);
-		(*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
-		atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */
+		(*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu);
+		atomic_set(&(*sg)->sgc->ref, 1); /* for claim_allocations */
 	}
 
 	return cpu;
@@ -5819,16 +5818,16 @@ build_sched_groups(struct sched_domain *sd, int cpu)
 }
 
 /*
- * Initialize sched groups cpu_power.
+ * Initialize sched groups cpu_capacity.
  *
- * cpu_power indicates the capacity of sched group, which is used while
+ * cpu_capacity indicates the capacity of sched group, which is used while
  * distributing the load between different sched groups in a sched domain.
- * Typically cpu_power for all the groups in a sched domain will be same unless
- * there are asymmetries in the topology. If there are asymmetries, group
- * having more cpu_power will pickup more load compared to the group having
- * less cpu_power.
+ * Typically cpu_capacity for all the groups in a sched domain will be same
+ * unless there are asymmetries in the topology. If there are asymmetries,
+ * group having more cpu_capacity will pickup more load compared to the
+ * group having less cpu_capacity.
  */
-static void init_sched_groups_power(int cpu, struct sched_domain *sd)
+static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
 {
 	struct sched_group *sg = sd->groups;
 
@@ -5842,8 +5841,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 	if (cpu != group_balance_cpu(sg))
 		return;
 
-	update_group_power(sd, cpu);
-	atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
+	update_group_capacity(sd, cpu);
+	atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
 }
 
 /*
@@ -5934,8 +5933,8 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
 	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
 		*per_cpu_ptr(sdd->sg, cpu) = NULL;
 
-	if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
-		*per_cpu_ptr(sdd->sgp, cpu) = NULL;
+	if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref))
+		*per_cpu_ptr(sdd->sgc, cpu) = NULL;
 }
 
 #ifdef CONFIG_NUMA
@@ -6337,14 +6336,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 		if (!sdd->sg)
 			return -ENOMEM;
 
-		sdd->sgp = alloc_percpu(struct sched_group_power *);
-		if (!sdd->sgp)
+		sdd->sgc = alloc_percpu(struct sched_group_capacity *);
+		if (!sdd->sgc)
 			return -ENOMEM;
 
 		for_each_cpu(j, cpu_map) {
 			struct sched_domain *sd;
 			struct sched_group *sg;
-			struct sched_group_power *sgp;
+			struct sched_group_capacity *sgc;
 
 		       	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
@@ -6362,12 +6361,12 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 
 			*per_cpu_ptr(sdd->sg, j) = sg;
 
-			sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(),
+			sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
-			if (!sgp)
+			if (!sgc)
 				return -ENOMEM;
 
-			*per_cpu_ptr(sdd->sgp, j) = sgp;
+			*per_cpu_ptr(sdd->sgc, j) = sgc;
 		}
 	}
 
@@ -6394,15 +6393,15 @@ static void __sdt_free(const struct cpumask *cpu_map)
 
 			if (sdd->sg)
 				kfree(*per_cpu_ptr(sdd->sg, j));
-			if (sdd->sgp)
-				kfree(*per_cpu_ptr(sdd->sgp, j));
+			if (sdd->sgc)
+				kfree(*per_cpu_ptr(sdd->sgc, j));
 		}
 		free_percpu(sdd->sd);
 		sdd->sd = NULL;
 		free_percpu(sdd->sg);
 		sdd->sg = NULL;
-		free_percpu(sdd->sgp);
-		sdd->sgp = NULL;
+		free_percpu(sdd->sgc);
+		sdd->sgc = NULL;
 	}
 }
 
@@ -6479,7 +6478,7 @@ static int build_sched_domains(const struct cpumask *cpu_map,
 
 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
 			claim_allocations(i, sd);
-			init_sched_groups_power(i, sd);
+			init_sched_groups_capacity(i, sd);
 		}
 	}
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e401e446e87c..36bd4d23fca8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4369,8 +4369,8 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 			avg_load += load;
 		}
 
-		/* Adjust by relative CPU power of the group */
-		avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power;
+		/* Adjust by relative CPU capacity of the group */
+		avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgc->capacity;
 
 		if (local_group) {
 			this_load = avg_load;
@@ -5532,7 +5532,7 @@ struct sg_lb_stats {
 	unsigned long group_load; /* Total load over the CPUs of the group */
 	unsigned long sum_weighted_load; /* Weighted load of group's tasks */
 	unsigned long load_per_task;
-	unsigned long group_power;
+	unsigned long group_capacity;
 	unsigned int sum_nr_running; /* Nr tasks running in the group */
 	unsigned int group_capacity_factor;
 	unsigned int idle_cpus;
@@ -5553,7 +5553,7 @@ struct sd_lb_stats {
 	struct sched_group *busiest;	/* Busiest group in this sd */
 	struct sched_group *local;	/* Local group in this sd */
 	unsigned long total_load;	/* Total load of all groups in sd */
-	unsigned long total_pwr;	/* Total power of all groups in sd */
+	unsigned long total_capacity;	/* Total capacity of all groups in sd */
 	unsigned long avg_load;	/* Average load across all groups in sd */
 
 	struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */
@@ -5572,7 +5572,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
 		.busiest = NULL,
 		.local = NULL,
 		.total_load = 0UL,
-		.total_pwr = 0UL,
+		.total_capacity = 0UL,
 		.busiest_stat = {
 			.avg_load = 0UL,
 		},
@@ -5681,7 +5681,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 		power >>= SCHED_POWER_SHIFT;
 	}
 
-	sdg->sgp->power_orig = power;
+	sdg->sgc->capacity_orig = power;
 
 	if (sched_feat(ARCH_POWER))
 		power *= arch_scale_freq_power(sd, cpu);
@@ -5697,26 +5697,26 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 		power = 1;
 
 	cpu_rq(cpu)->cpu_power = power;
-	sdg->sgp->power = power;
+	sdg->sgc->capacity = power;
 }
 
-void update_group_power(struct sched_domain *sd, int cpu)
+void update_group_capacity(struct sched_domain *sd, int cpu)
 {
 	struct sched_domain *child = sd->child;
 	struct sched_group *group, *sdg = sd->groups;
-	unsigned long power, power_orig;
+	unsigned long capacity, capacity_orig;
 	unsigned long interval;
 
 	interval = msecs_to_jiffies(sd->balance_interval);
 	interval = clamp(interval, 1UL, max_load_balance_interval);
-	sdg->sgp->next_update = jiffies + interval;
+	sdg->sgc->next_update = jiffies + interval;
 
 	if (!child) {
 		update_cpu_power(sd, cpu);
 		return;
 	}
 
-	power_orig = power = 0;
+	capacity_orig = capacity = 0;
 
 	if (child->flags & SD_OVERLAP) {
 		/*
@@ -5725,31 +5725,31 @@ void update_group_power(struct sched_domain *sd, int cpu)
 		 */
 
 		for_each_cpu(cpu, sched_group_cpus(sdg)) {
-			struct sched_group_power *sgp;
+			struct sched_group_capacity *sgc;
 			struct rq *rq = cpu_rq(cpu);
 
 			/*
-			 * build_sched_domains() -> init_sched_groups_power()
+			 * build_sched_domains() -> init_sched_groups_capacity()
 			 * gets here before we've attached the domains to the
 			 * runqueues.
 			 *
 			 * Use power_of(), which is set irrespective of domains
 			 * in update_cpu_power().
 			 *
-			 * This avoids power/power_orig from being 0 and
+			 * This avoids capacity/capacity_orig from being 0 and
 			 * causing divide-by-zero issues on boot.
 			 *
-			 * Runtime updates will correct power_orig.
+			 * Runtime updates will correct capacity_orig.
 			 */
 			if (unlikely(!rq->sd)) {
-				power_orig += power_of(cpu);
-				power += power_of(cpu);
+				capacity_orig += power_of(cpu);
+				capacity += power_of(cpu);
 				continue;
 			}
 
-			sgp = rq->sd->groups->sgp;
-			power_orig += sgp->power_orig;
-			power += sgp->power;
+			sgc = rq->sd->groups->sgc;
+			capacity_orig += sgc->capacity_orig;
+			capacity += sgc->capacity;
 		}
 	} else  {
 		/*
@@ -5759,14 +5759,14 @@ void update_group_power(struct sched_domain *sd, int cpu)
 
 		group = child->groups;
 		do {
-			power_orig += group->sgp->power_orig;
-			power += group->sgp->power;
+			capacity_orig += group->sgc->capacity_orig;
+			capacity += group->sgc->capacity;
 			group = group->next;
 		} while (group != child->groups);
 	}
 
-	sdg->sgp->power_orig = power_orig;
-	sdg->sgp->power = power;
+	sdg->sgc->capacity_orig = capacity_orig;
+	sdg->sgc->capacity = capacity;
 }
 
 /*
@@ -5786,9 +5786,9 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 		return 0;
 
 	/*
-	 * If ~90% of the cpu_power is still there, we're good.
+	 * If ~90% of the cpu_capacity is still there, we're good.
 	 */
-	if (group->sgp->power * 32 > group->sgp->power_orig * 29)
+	if (group->sgc->capacity * 32 > group->sgc->capacity_orig * 29)
 		return 1;
 
 	return 0;
@@ -5825,7 +5825,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 
 static inline int sg_imbalanced(struct sched_group *group)
 {
-	return group->sgp->imbalance;
+	return group->sgc->imbalance;
 }
 
 /*
@@ -5833,22 +5833,23 @@ static inline int sg_imbalanced(struct sched_group *group)
  *
  * Avoid the issue where N*frac(smt_power) >= 1 creates 'phantom' cores by
  * first dividing out the smt factor and computing the actual number of cores
- * and limit power unit capacity with that.
+ * and limit unit capacity with that.
  */
 static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group)
 {
 	unsigned int capacity_factor, smt, cpus;
-	unsigned int power, power_orig;
+	unsigned int capacity, capacity_orig;
 
-	power = group->sgp->power;
-	power_orig = group->sgp->power_orig;
+	capacity = group->sgc->capacity;
+	capacity_orig = group->sgc->capacity_orig;
 	cpus = group->group_weight;
 
-	/* smt := ceil(cpus / power), assumes: 1 < smt_power < 2 */
-	smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, power_orig);
+	/* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */
+	smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, capacity_orig);
 	capacity_factor = cpus / smt; /* cores */
 
-	capacity_factor = min_t(unsigned, capacity_factor, DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE));
+	capacity_factor = min_t(unsigned,
+		capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_POWER_SCALE));
 	if (!capacity_factor)
 		capacity_factor = fix_small_capacity(env->sd, group);
 
@@ -5892,9 +5893,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 			sgs->idle_cpus++;
 	}
 
-	/* Adjust by relative CPU power of the group */
-	sgs->group_power = group->sgp->power;
-	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_power;
+	/* Adjust by relative CPU capacity of the group */
+	sgs->group_capacity = group->sgc->capacity;
+	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_capacity;
 
 	if (sgs->sum_nr_running)
 		sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
@@ -6009,8 +6010,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 			sgs = &sds->local_stat;
 
 			if (env->idle != CPU_NEWLY_IDLE ||
-			    time_after_eq(jiffies, sg->sgp->next_update))
-				update_group_power(env->sd, env->dst_cpu);
+			    time_after_eq(jiffies, sg->sgc->next_update))
+				update_group_capacity(env->sd, env->dst_cpu);
 		}
 
 		update_sg_lb_stats(env, sg, load_idx, local_group, sgs);
@@ -6040,7 +6041,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 next_group:
 		/* Now, start updating sd_lb_stats */
 		sds->total_load += sgs->group_load;
-		sds->total_pwr += sgs->group_power;
+		sds->total_capacity += sgs->group_capacity;
 
 		sg = sg->next;
 	} while (sg != env->sd->groups);
@@ -6087,7 +6088,7 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
 		return 0;
 
 	env->imbalance = DIV_ROUND_CLOSEST(
-		sds->busiest_stat.avg_load * sds->busiest_stat.group_power,
+		sds->busiest_stat.avg_load * sds->busiest_stat.group_capacity,
 		SCHED_POWER_SCALE);
 
 	return 1;
@@ -6103,7 +6104,7 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
 static inline
 void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 {
-	unsigned long tmp, pwr_now = 0, pwr_move = 0;
+	unsigned long tmp, capa_now = 0, capa_move = 0;
 	unsigned int imbn = 2;
 	unsigned long scaled_busy_load_per_task;
 	struct sg_lb_stats *local, *busiest;
@@ -6118,7 +6119,7 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 
 	scaled_busy_load_per_task =
 		(busiest->load_per_task * SCHED_POWER_SCALE) /
-		busiest->group_power;
+		busiest->group_capacity;
 
 	if (busiest->avg_load + scaled_busy_load_per_task >=
 	    local->avg_load + (scaled_busy_load_per_task * imbn)) {
@@ -6132,34 +6133,34 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 	 * moving them.
 	 */
 
-	pwr_now += busiest->group_power *
+	capa_now += busiest->group_capacity *
 			min(busiest->load_per_task, busiest->avg_load);
-	pwr_now += local->group_power *
+	capa_now += local->group_capacity *
 			min(local->load_per_task, local->avg_load);
-	pwr_now /= SCHED_POWER_SCALE;
+	capa_now /= SCHED_POWER_SCALE;
 
 	/* Amount of load we'd subtract */
 	if (busiest->avg_load > scaled_busy_load_per_task) {
-		pwr_move += busiest->group_power *
+		capa_move += busiest->group_capacity *
 			    min(busiest->load_per_task,
 				busiest->avg_load - scaled_busy_load_per_task);
 	}
 
 	/* Amount of load we'd add */
-	if (busiest->avg_load * busiest->group_power <
+	if (busiest->avg_load * busiest->group_capacity <
 	    busiest->load_per_task * SCHED_POWER_SCALE) {
-		tmp = (busiest->avg_load * busiest->group_power) /
-		      local->group_power;
+		tmp = (busiest->avg_load * busiest->group_capacity) /
+		      local->group_capacity;
 	} else {
 		tmp = (busiest->load_per_task * SCHED_POWER_SCALE) /
-		      local->group_power;
+		      local->group_capacity;
 	}
-	pwr_move += local->group_power *
+	capa_move += local->group_capacity *
 		    min(local->load_per_task, local->avg_load + tmp);
-	pwr_move /= SCHED_POWER_SCALE;
+	capa_move /= SCHED_POWER_SCALE;
 
 	/* Move if we gain throughput */
-	if (pwr_move > pwr_now)
+	if (capa_move > capa_now)
 		env->imbalance = busiest->load_per_task;
 }
 
@@ -6207,7 +6208,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 			(busiest->sum_nr_running - busiest->group_capacity_factor);
 
 		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
-		load_above_capacity /= busiest->group_power;
+		load_above_capacity /= busiest->group_capacity;
 	}
 
 	/*
@@ -6222,8 +6223,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 
 	/* How much load to actually move to equalise the imbalance */
 	env->imbalance = min(
-		max_pull * busiest->group_power,
-		(sds->avg_load - local->avg_load) * local->group_power
+		max_pull * busiest->group_capacity,
+		(sds->avg_load - local->avg_load) * local->group_capacity
 	) / SCHED_POWER_SCALE;
 
 	/*
@@ -6278,7 +6279,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 	if (!sds.busiest || busiest->sum_nr_running == 0)
 		goto out_balanced;
 
-	sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr;
+	sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_capacity;
 
 	/*
 	 * If the busiest group is imbalanced the below checks don't
@@ -6611,7 +6612,7 @@ more_balance:
 		 * We failed to reach balance because of affinity.
 		 */
 		if (sd_parent) {
-			int *group_imbalance = &sd_parent->groups->sgp->imbalance;
+			int *group_imbalance = &sd_parent->groups->sgc->imbalance;
 
 			if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0) {
 				*group_imbalance = 1;
@@ -6998,7 +6999,7 @@ static inline void set_cpu_sd_state_busy(void)
 		goto unlock;
 	sd->nohz_idle = 0;
 
-	atomic_inc(&sd->groups->sgp->nr_busy_cpus);
+	atomic_inc(&sd->groups->sgc->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -7015,7 +7016,7 @@ void set_cpu_sd_state_idle(void)
 		goto unlock;
 	sd->nohz_idle = 1;
 
-	atomic_dec(&sd->groups->sgp->nr_busy_cpus);
+	atomic_dec(&sd->groups->sgc->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -7219,7 +7220,7 @@ end:
  * of an idle cpu is the system.
  *   - This rq has more than one task.
  *   - At any scheduler domain level, this cpu's scheduler group has multiple
- *     busy cpu's exceeding the group's power.
+ *     busy cpu's exceeding the group's capacity.
  *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
  *     domain span are idle.
  */
@@ -7227,7 +7228,7 @@ static inline int nohz_kick_needed(struct rq *rq)
 {
 	unsigned long now = jiffies;
 	struct sched_domain *sd;
-	struct sched_group_power *sgp;
+	struct sched_group_capacity *sgc;
 	int nr_busy, cpu = rq->cpu;
 
 	if (unlikely(rq->idle_balance))
@@ -7257,8 +7258,8 @@ static inline int nohz_kick_needed(struct rq *rq)
 	sd = rcu_dereference(per_cpu(sd_busy, cpu));
 
 	if (sd) {
-		sgp = sd->groups->sgp;
-		nr_busy = atomic_read(&sgp->nr_busy_cpus);
+		sgc = sd->groups->sgc;
+		nr_busy = atomic_read(&sgc->nr_busy_cpus);
 
 		if (nr_busy > 1)
 			goto need_kick_unlock;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 600e2291a75c..a5b957d53c92 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -728,15 +728,15 @@ DECLARE_PER_CPU(struct sched_domain *, sd_numa);
 DECLARE_PER_CPU(struct sched_domain *, sd_busy);
 DECLARE_PER_CPU(struct sched_domain *, sd_asym);
 
-struct sched_group_power {
+struct sched_group_capacity {
 	atomic_t ref;
 	/*
-	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
-	 * single CPU.
+	 * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
+	 * for a single CPU.
 	 */
-	unsigned int power, power_orig;
+	unsigned int capacity, capacity_orig;
 	unsigned long next_update;
-	int imbalance; /* XXX unrelated to power but shared group state */
+	int imbalance; /* XXX unrelated to capacity but shared group state */
 	/*
 	 * Number of busy cpus in this group.
 	 */
@@ -750,7 +750,7 @@ struct sched_group {
 	atomic_t ref;
 
 	unsigned int group_weight;
-	struct sched_group_power *sgp;
+	struct sched_group_capacity *sgc;
 
 	/*
 	 * The CPUs this group covers.
@@ -773,7 +773,7 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
  */
 static inline struct cpumask *sched_group_mask(struct sched_group *sg)
 {
-	return to_cpumask(sg->sgp->cpumask);
+	return to_cpumask(sg->sgc->cpumask);
 }
 
 /**
@@ -1167,7 +1167,7 @@ extern const struct sched_class idle_sched_class;
 
 #ifdef CONFIG_SMP
 
-extern void update_group_power(struct sched_domain *sd, int cpu);
+extern void update_group_capacity(struct sched_domain *sd, int cpu);
 
 extern void trigger_load_balance(struct rq *rq);
 
-- 
cgit v1.2.3-71-gd317


From ca8ce3d0b144c318a5a9ce99649053e9029061ea Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Mon, 26 May 2014 18:19:39 -0400
Subject: sched: Final power vs. capacity cleanups

It is better not to think about compute capacity as being equivalent
to "CPU power".  The upcoming "power aware" scheduler work may create
confusion with the notion of energy consumption if "power" is used too
liberally.

This contains the architecture visible changes.  Incidentally, only ARM
takes advantage of the available pow^H^H^Hcapacity scaling hooks and
therefore those changes outside kernel/sched/ are confined to one ARM
specific file.  The default arch_scale_smt_power() hook is not overridden
by anyone.

Replacements are as follows:

	arch_scale_freq_power  --> arch_scale_freq_capacity
	arch_scale_smt_power   --> arch_scale_smt_capacity
	SCHED_POWER_SCALE      --> SCHED_CAPACITY_SCALE
	SCHED_POWER_SHIFT      --> SCHED_CAPACITY_SHIFT

The local usage of "power" in arch/arm/kernel/topology.c is also changed
to "capacity" as appropriate.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: linaro-kernel@lists.linaro.org
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Brown <broonie@linaro.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: devicetree@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/n/tip-48zba9qbznvglwelgq2cfygh@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/topology.c | 54 +++++++++++++++++++++---------------------
 include/linux/sched.h      |  6 ++---
 kernel/sched/core.c        |  6 ++---
 kernel/sched/fair.c        | 59 +++++++++++++++++++++++-----------------------
 4 files changed, 63 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 71e1fec6d31a..d42a7db22236 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -26,30 +26,30 @@
 #include <asm/topology.h>
 
 /*
- * cpu power scale management
+ * cpu capacity scale management
  */
 
 /*
- * cpu power table
+ * cpu capacity table
  * This per cpu data structure describes the relative capacity of each core.
  * On a heteregenous system, cores don't have the same computation capacity
- * and we reflect that difference in the cpu_power field so the scheduler can
- * take this difference into account during load balance. A per cpu structure
- * is preferred because each CPU updates its own cpu_power field during the
- * load balance except for idle cores. One idle core is selected to run the
- * rebalance_domains for all idle cores and the cpu_power can be updated
- * during this sequence.
+ * and we reflect that difference in the cpu_capacity field so the scheduler
+ * can take this difference into account during load balance. A per cpu
+ * structure is preferred because each CPU updates its own cpu_capacity field
+ * during the load balance except for idle cores. One idle core is selected
+ * to run the rebalance_domains for all idle cores and the cpu_capacity can be
+ * updated during this sequence.
  */
 static DEFINE_PER_CPU(unsigned long, cpu_scale);
 
-unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
 {
 	return per_cpu(cpu_scale, cpu);
 }
 
-static void set_power_scale(unsigned int cpu, unsigned long power)
+static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
 {
-	per_cpu(cpu_scale, cpu) = power;
+	per_cpu(cpu_scale, cpu) = capacity;
 }
 
 #ifdef CONFIG_OF
@@ -62,11 +62,11 @@ struct cpu_efficiency {
  * Table of relative efficiency of each processors
  * The efficiency value must fit in 20bit and the final
  * cpu_scale value must be in the range
- *   0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ *   0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2
  * in order to return at most 1 when DIV_ROUND_CLOSEST
  * is used to compute the capacity of a CPU.
  * Processors that are not defined in the table,
- * use the default SCHED_POWER_SCALE value for cpu_scale.
+ * use the default SCHED_CAPACITY_SCALE value for cpu_scale.
  */
 static const struct cpu_efficiency table_efficiency[] = {
 	{"arm,cortex-a15", 3891},
@@ -83,9 +83,9 @@ static unsigned long middle_capacity = 1;
  * Iterate all CPUs' descriptor in DT and compute the efficiency
  * (as per table_efficiency). Also calculate a middle efficiency
  * as close as possible to  (max{eff_i} - min{eff_i}) / 2
- * This is later used to scale the cpu_power field such that an
- * 'average' CPU is of middle power. Also see the comments near
- * table_efficiency[] and update_cpu_power().
+ * This is later used to scale the cpu_capacity field such that an
+ * 'average' CPU is of middle capacity. Also see the comments near
+ * table_efficiency[] and update_cpu_capacity().
  */
 static void __init parse_dt_topology(void)
 {
@@ -141,15 +141,15 @@ static void __init parse_dt_topology(void)
 	 * cpu_scale because all CPUs have the same capacity. Otherwise, we
 	 * compute a middle_capacity factor that will ensure that the capacity
 	 * of an 'average' CPU of the system will be as close as possible to
-	 * SCHED_POWER_SCALE, which is the default value, but with the
+	 * SCHED_CAPACITY_SCALE, which is the default value, but with the
 	 * constraint explained near table_efficiency[].
 	 */
 	if (4*max_capacity < (3*(max_capacity + min_capacity)))
 		middle_capacity = (min_capacity + max_capacity)
-				>> (SCHED_POWER_SHIFT+1);
+				>> (SCHED_CAPACITY_SHIFT+1);
 	else
 		middle_capacity = ((max_capacity / 3)
-				>> (SCHED_POWER_SHIFT-1)) + 1;
+				>> (SCHED_CAPACITY_SHIFT-1)) + 1;
 
 }
 
@@ -158,20 +158,20 @@ static void __init parse_dt_topology(void)
  * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
  * function returns directly for SMP system.
  */
-static void update_cpu_power(unsigned int cpu)
+static void update_cpu_capacity(unsigned int cpu)
 {
 	if (!cpu_capacity(cpu))
 		return;
 
-	set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+	set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
 
-	printk(KERN_INFO "CPU%u: update cpu_power %lu\n",
-		cpu, arch_scale_freq_power(NULL, cpu));
+	printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n",
+		cpu, arch_scale_freq_capacity(NULL, cpu));
 }
 
 #else
 static inline void parse_dt_topology(void) {}
-static inline void update_cpu_power(unsigned int cpuid) {}
+static inline void update_cpu_capacity(unsigned int cpuid) {}
 #endif
 
  /*
@@ -267,7 +267,7 @@ void store_cpu_topology(unsigned int cpuid)
 
 	update_siblings_masks(cpuid);
 
-	update_cpu_power(cpuid);
+	update_cpu_capacity(cpuid);
 
 	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
 		cpuid, cpu_topology[cpuid].thread_id,
@@ -297,7 +297,7 @@ void __init init_cpu_topology(void)
 {
 	unsigned int cpu;
 
-	/* init core mask and power*/
+	/* init core mask and capacity */
 	for_each_possible_cpu(cpu) {
 		struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
 
@@ -307,7 +307,7 @@ void __init init_cpu_topology(void)
 		cpumask_clear(&cpu_topo->core_sibling);
 		cpumask_clear(&cpu_topo->thread_sibling);
 
-		set_power_scale(cpu, SCHED_POWER_SCALE);
+		set_capacity_scale(cpu, SCHED_CAPACITY_SCALE);
 	}
 	smp_wmb();
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a96f03598c61..322110affe63 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -854,10 +854,10 @@ enum cpu_idle_type {
 };
 
 /*
- * Increase resolution of cpu_power calculations
+ * Increase resolution of cpu_capacity calculations
  */
-#define SCHED_POWER_SHIFT	10
-#define SCHED_POWER_SCALE	(1L << SCHED_POWER_SHIFT)
+#define SCHED_CAPACITY_SHIFT	10
+#define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
 
 /*
  * sched-domains (multiprocessor balancing) declarations:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 07bc78a50329..7ba4f5413a10 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5249,7 +5249,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 
 		printk(KERN_CONT " %s", str);
-		if (group->sgc->capacity != SCHED_POWER_SCALE) {
+		if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
 			printk(KERN_CONT " (cpu_capacity = %d)",
 				group->sgc->capacity);
 		}
@@ -5715,7 +5715,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 		 * domains and no possible iteration will get us here, we won't
 		 * die on a /0 trap.
 		 */
-		sg->sgc->capacity = SCHED_POWER_SCALE * cpumask_weight(sg_span);
+		sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
 		sg->sgc->capacity_orig = sg->sgc->capacity;
 
 		/*
@@ -6921,7 +6921,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
 		rq->rd = NULL;
-		rq->cpu_capacity = SCHED_POWER_SCALE;
+		rq->cpu_capacity = SCHED_CAPACITY_SCALE;
 		rq->post_schedule = 0;
 		rq->active_balance = 0;
 		rq->next_balance = jiffies;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 58684f684fa8..dc7d6527a282 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1062,9 +1062,9 @@ static void update_numa_stats(struct numa_stats *ns, int nid)
 	if (!cpus)
 		return;
 
-	ns->load = (ns->load * SCHED_POWER_SCALE) / ns->compute_capacity;
+	ns->load = (ns->load * SCHED_CAPACITY_SCALE) / ns->compute_capacity;
 	ns->task_capacity =
-		DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_POWER_SCALE);
+		DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE);
 	ns->has_free_capacity = (ns->nr_running < ns->task_capacity);
 }
 
@@ -4370,7 +4370,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 		}
 
 		/* Adjust by relative CPU capacity of the group */
-		avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgc->capacity;
+		avg_load = (avg_load * SCHED_CAPACITY_SCALE) / group->sgc->capacity;
 
 		if (local_group) {
 			this_load = avg_load;
@@ -5609,10 +5609,10 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
 
 static unsigned long default_scale_capacity(struct sched_domain *sd, int cpu)
 {
-	return SCHED_POWER_SCALE;
+	return SCHED_CAPACITY_SCALE;
 }
 
-unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
+unsigned long __weak arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
 {
 	return default_scale_capacity(sd, cpu);
 }
@@ -5627,7 +5627,7 @@ static unsigned long default_scale_smt_capacity(struct sched_domain *sd, int cpu
 	return smt_gain;
 }
 
-unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
+unsigned long __weak arch_scale_smt_capacity(struct sched_domain *sd, int cpu)
 {
 	return default_scale_smt_capacity(sd, cpu);
 }
@@ -5658,10 +5658,10 @@ static unsigned long scale_rt_capacity(int cpu)
 		available = total - avg;
 	}
 
-	if (unlikely((s64)total < SCHED_POWER_SCALE))
-		total = SCHED_POWER_SCALE;
+	if (unlikely((s64)total < SCHED_CAPACITY_SCALE))
+		total = SCHED_CAPACITY_SCALE;
 
-	total >>= SCHED_POWER_SHIFT;
+	total >>= SCHED_CAPACITY_SHIFT;
 
 	return div_u64(available, total);
 }
@@ -5669,29 +5669,29 @@ static unsigned long scale_rt_capacity(int cpu)
 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 {
 	unsigned long weight = sd->span_weight;
-	unsigned long capacity = SCHED_POWER_SCALE;
+	unsigned long capacity = SCHED_CAPACITY_SCALE;
 	struct sched_group *sdg = sd->groups;
 
 	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
 		if (sched_feat(ARCH_POWER))
-			capacity *= arch_scale_smt_power(sd, cpu);
+			capacity *= arch_scale_smt_capacity(sd, cpu);
 		else
 			capacity *= default_scale_smt_capacity(sd, cpu);
 
-		capacity >>= SCHED_POWER_SHIFT;
+		capacity >>= SCHED_CAPACITY_SHIFT;
 	}
 
 	sdg->sgc->capacity_orig = capacity;
 
 	if (sched_feat(ARCH_POWER))
-		capacity *= arch_scale_freq_power(sd, cpu);
+		capacity *= arch_scale_freq_capacity(sd, cpu);
 	else
 		capacity *= default_scale_capacity(sd, cpu);
 
-	capacity >>= SCHED_POWER_SHIFT;
+	capacity >>= SCHED_CAPACITY_SHIFT;
 
 	capacity *= scale_rt_capacity(cpu);
-	capacity >>= SCHED_POWER_SHIFT;
+	capacity >>= SCHED_CAPACITY_SHIFT;
 
 	if (!capacity)
 		capacity = 1;
@@ -5780,7 +5780,7 @@ static inline int
 fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 {
 	/*
-	 * Only siblings can have significantly less than SCHED_POWER_SCALE
+	 * Only siblings can have significantly less than SCHED_CAPACITY_SCALE
 	 */
 	if (!(sd->flags & SD_SHARE_CPUPOWER))
 		return 0;
@@ -5845,11 +5845,11 @@ static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *gro
 	cpus = group->group_weight;
 
 	/* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */
-	smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, capacity_orig);
+	smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, capacity_orig);
 	capacity_factor = cpus / smt; /* cores */
 
 	capacity_factor = min_t(unsigned,
-		capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_POWER_SCALE));
+		capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE));
 	if (!capacity_factor)
 		capacity_factor = fix_small_capacity(env->sd, group);
 
@@ -5895,7 +5895,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 
 	/* Adjust by relative CPU capacity of the group */
 	sgs->group_capacity = group->sgc->capacity;
-	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_capacity;
+	sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
 
 	if (sgs->sum_nr_running)
 		sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
@@ -6089,7 +6089,7 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
 
 	env->imbalance = DIV_ROUND_CLOSEST(
 		sds->busiest_stat.avg_load * sds->busiest_stat.group_capacity,
-		SCHED_POWER_SCALE);
+		SCHED_CAPACITY_SCALE);
 
 	return 1;
 }
@@ -6118,7 +6118,7 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 		imbn = 1;
 
 	scaled_busy_load_per_task =
-		(busiest->load_per_task * SCHED_POWER_SCALE) /
+		(busiest->load_per_task * SCHED_CAPACITY_SCALE) /
 		busiest->group_capacity;
 
 	if (busiest->avg_load + scaled_busy_load_per_task >=
@@ -6137,7 +6137,7 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 			min(busiest->load_per_task, busiest->avg_load);
 	capa_now += local->group_capacity *
 			min(local->load_per_task, local->avg_load);
-	capa_now /= SCHED_POWER_SCALE;
+	capa_now /= SCHED_CAPACITY_SCALE;
 
 	/* Amount of load we'd subtract */
 	if (busiest->avg_load > scaled_busy_load_per_task) {
@@ -6148,16 +6148,16 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 
 	/* Amount of load we'd add */
 	if (busiest->avg_load * busiest->group_capacity <
-	    busiest->load_per_task * SCHED_POWER_SCALE) {
+	    busiest->load_per_task * SCHED_CAPACITY_SCALE) {
 		tmp = (busiest->avg_load * busiest->group_capacity) /
 		      local->group_capacity;
 	} else {
-		tmp = (busiest->load_per_task * SCHED_POWER_SCALE) /
+		tmp = (busiest->load_per_task * SCHED_CAPACITY_SCALE) /
 		      local->group_capacity;
 	}
 	capa_move += local->group_capacity *
 		    min(local->load_per_task, local->avg_load + tmp);
-	capa_move /= SCHED_POWER_SCALE;
+	capa_move /= SCHED_CAPACITY_SCALE;
 
 	/* Move if we gain throughput */
 	if (capa_move > capa_now)
@@ -6207,7 +6207,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 		load_above_capacity =
 			(busiest->sum_nr_running - busiest->group_capacity_factor);
 
-		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
+		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_CAPACITY_SCALE);
 		load_above_capacity /= busiest->group_capacity;
 	}
 
@@ -6225,7 +6225,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 	env->imbalance = min(
 		max_pull * busiest->group_capacity,
 		(sds->avg_load - local->avg_load) * local->group_capacity
-	) / SCHED_POWER_SCALE;
+	) / SCHED_CAPACITY_SCALE;
 
 	/*
 	 * if *imbalance is less than the average load per runnable task
@@ -6279,7 +6279,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 	if (!sds.busiest || busiest->sum_nr_running == 0)
 		goto out_balanced;
 
-	sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_capacity;
+	sds.avg_load = (SCHED_CAPACITY_SCALE * sds.total_load)
+						/ sds.total_capacity;
 
 	/*
 	 * If the busiest group is imbalanced the below checks don't
@@ -6378,7 +6379,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 			continue;
 
 		capacity = capacity_of(i);
-		capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_POWER_SCALE);
+		capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE);
 		if (!capacity_factor)
 			capacity_factor = fix_small_capacity(env->sd, group);
 
-- 
cgit v1.2.3-71-gd317


From 5d4dfddd4f02b028d6ddaaa04d75d3b0cad1c9ae Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 27 May 2014 13:50:41 -0400
Subject: sched: Rename capacity related flags

It is better not to think about compute capacity as being equivalent
to "CPU power".  The upcoming "power aware" scheduler work may create
confusion with the notion of energy consumption if "power" is used too
liberally.

Let's rename the following feature flags since they do relate to capacity:

	SD_SHARE_CPUPOWER  -> SD_SHARE_CPUCAPACITY
	ARCH_POWER         -> ARCH_CAPACITY
	NONTASK_POWER      -> NONTASK_CAPACITY

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: linaro-kernel@lists.linaro.org
Cc: Andy Fleming <afleming@freescale.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: devicetree@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/n/tip-e93lpnxb87owfievqatey6b5@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/kernel/smp.c |  2 +-
 include/linux/sched.h     |  4 ++--
 kernel/sched/core.c       | 14 +++++++-------
 kernel/sched/fair.c       |  8 ++++----
 kernel/sched/features.h   |  8 ++++----
 5 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 10ffffef0414..c51d16379cba 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -770,7 +770,7 @@ int setup_profiling_timer(unsigned int multiplier)
 /* cpumask of CPUs with asymetric SMT dependancy */
 static const int powerpc_smt_flags(void)
 {
-	int flags = SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
+	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
 
 	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
 		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 322110affe63..ce93768a3312 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -869,7 +869,7 @@ enum cpu_idle_type {
 #define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
 #define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-#define SD_SHARE_CPUPOWER	0x0080	/* Domain members share cpu power */
+#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu power */
 #define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
@@ -881,7 +881,7 @@ enum cpu_idle_type {
 #ifdef CONFIG_SCHED_SMT
 static inline const int cpu_smt_flags(void)
 {
-	return SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
+	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
 }
 #endif
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7ba4f5413a10..5976ca579d3e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -872,7 +872,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
 	rq->clock_task += delta;
 
 #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
-	if ((irq_delta + steal) && sched_feat(NONTASK_POWER))
+	if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
 		sched_rt_avg_update(rq, irq_delta + steal);
 #endif
 }
@@ -5309,7 +5309,7 @@ static int sd_degenerate(struct sched_domain *sd)
 			 SD_BALANCE_NEWIDLE |
 			 SD_BALANCE_FORK |
 			 SD_BALANCE_EXEC |
-			 SD_SHARE_CPUPOWER |
+			 SD_SHARE_CPUCAPACITY |
 			 SD_SHARE_PKG_RESOURCES |
 			 SD_SHARE_POWERDOMAIN)) {
 		if (sd->groups != sd->groups->next)
@@ -5340,7 +5340,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 				SD_BALANCE_NEWIDLE |
 				SD_BALANCE_FORK |
 				SD_BALANCE_EXEC |
-				SD_SHARE_CPUPOWER |
+				SD_SHARE_CPUCAPACITY |
 				SD_SHARE_PKG_RESOURCES |
 				SD_PREFER_SIBLING |
 				SD_SHARE_POWERDOMAIN);
@@ -5947,7 +5947,7 @@ static int sched_domains_curr_level;
 /*
  * SD_flags allowed in topology descriptions.
  *
- * SD_SHARE_CPUPOWER      - describes SMT topologies
+ * SD_SHARE_CPUCAPACITY      - describes SMT topologies
  * SD_SHARE_PKG_RESOURCES - describes shared caches
  * SD_NUMA                - describes NUMA topologies
  * SD_SHARE_POWERDOMAIN   - describes shared power domain
@@ -5956,7 +5956,7 @@ static int sched_domains_curr_level;
  * SD_ASYM_PACKING        - describes SMT quirks
  */
 #define TOPOLOGY_SD_FLAGS		\
-	(SD_SHARE_CPUPOWER |		\
+	(SD_SHARE_CPUCAPACITY |		\
 	 SD_SHARE_PKG_RESOURCES |	\
 	 SD_NUMA |			\
 	 SD_ASYM_PACKING |		\
@@ -6002,7 +6002,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
 					| 1*SD_BALANCE_FORK
 					| 0*SD_BALANCE_WAKE
 					| 1*SD_WAKE_AFFINE
-					| 0*SD_SHARE_CPUPOWER
+					| 0*SD_SHARE_CPUCAPACITY
 					| 0*SD_SHARE_PKG_RESOURCES
 					| 0*SD_SERIALIZE
 					| 0*SD_PREFER_SIBLING
@@ -6024,7 +6024,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
 	 * Convert topological properties into behaviour.
 	 */
 
-	if (sd->flags & SD_SHARE_CPUPOWER) {
+	if (sd->flags & SD_SHARE_CPUCAPACITY) {
 		sd->imbalance_pct = 110;
 		sd->smt_gain = 1178; /* ~15% */
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dc7d6527a282..d3c731222199 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5672,8 +5672,8 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 	unsigned long capacity = SCHED_CAPACITY_SCALE;
 	struct sched_group *sdg = sd->groups;
 
-	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
-		if (sched_feat(ARCH_POWER))
+	if ((sd->flags & SD_SHARE_CPUCAPACITY) && weight > 1) {
+		if (sched_feat(ARCH_CAPACITY))
 			capacity *= arch_scale_smt_capacity(sd, cpu);
 		else
 			capacity *= default_scale_smt_capacity(sd, cpu);
@@ -5683,7 +5683,7 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 
 	sdg->sgc->capacity_orig = capacity;
 
-	if (sched_feat(ARCH_POWER))
+	if (sched_feat(ARCH_CAPACITY))
 		capacity *= arch_scale_freq_capacity(sd, cpu);
 	else
 		capacity *= default_scale_capacity(sd, cpu);
@@ -5782,7 +5782,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 	/*
 	 * Only siblings can have significantly less than SCHED_CAPACITY_SCALE
 	 */
-	if (!(sd->flags & SD_SHARE_CPUPOWER))
+	if (!(sd->flags & SD_SHARE_CPUCAPACITY))
 		return 0;
 
 	/*
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 5716929a2e3a..90284d117fe6 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -37,18 +37,18 @@ SCHED_FEAT(CACHE_HOT_BUDDY, true)
 SCHED_FEAT(WAKEUP_PREEMPTION, true)
 
 /*
- * Use arch dependent cpu power functions
+ * Use arch dependent cpu capacity functions
  */
-SCHED_FEAT(ARCH_POWER, true)
+SCHED_FEAT(ARCH_CAPACITY, true)
 
 SCHED_FEAT(HRTICK, false)
 SCHED_FEAT(DOUBLE_TICK, false)
 SCHED_FEAT(LB_BIAS, true)
 
 /*
- * Decrement CPU power based on time not spent running tasks
+ * Decrement CPU capacity based on time not spent running tasks
  */
-SCHED_FEAT(NONTASK_POWER, true)
+SCHED_FEAT(NONTASK_CAPACITY, true)
 
 /*
  * Queue remote wakeups on the target CPU and process them
-- 
cgit v1.2.3-71-gd317


From dfc68f29ae67f2a6e799b44e6a4eb3417dffbfcd Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Wed, 4 Jun 2014 10:31:15 -0700
Subject: sched, trace: Add a tracepoint for IPI-less remote wakeups

Remote wakeups of polling CPUs are a valuable performance
improvement; add a tracepoint to make it much easier to verify that
they're working.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: nicolas.pitre@linaro.org
Cc: daniel.lezcano@linaro.org
Cc: umgwanakikbuti@gmail.com
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/16205aee116772aa686814f9b13bccb562108047.1401902905.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/trace/events/sched.h | 20 ++++++++++++++++++++
 kernel/sched/core.c          |  4 ++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 67e1bbf83695..0a68d5ae584e 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -530,6 +530,26 @@ TRACE_EVENT(sched_swap_numa,
 			__entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
 			__entry->dst_cpu, __entry->dst_nid)
 );
+
+/*
+ * Tracepoint for waking a polling cpu without an IPI.
+ */
+TRACE_EVENT(sched_wake_idle_without_ipi,
+
+	TP_PROTO(int cpu),
+
+	TP_ARGS(cpu),
+
+	TP_STRUCT__entry(
+		__field(	int,	cpu	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu	= cpu;
+	),
+
+	TP_printk("cpu=%d", __entry->cpu)
+);
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5976ca579d3e..e4c0ddd3db8e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -564,6 +564,8 @@ void resched_task(struct task_struct *p)
 
 	if (set_nr_and_not_polling(p))
 		smp_send_reschedule(cpu);
+	else
+		trace_sched_wake_idle_without_ipi(cpu);
 }
 
 void resched_cpu(int cpu)
@@ -647,6 +649,8 @@ static void wake_up_idle_cpu(int cpu)
 	smp_mb();
 	if (!tsk_is_polling(rq->idle))
 		smp_send_reschedule(cpu);
+	else
+		trace_sched_wake_idle_without_ipi(cpu);
 }
 
 static bool wake_up_full_nohz_cpu(int cpu)
-- 
cgit v1.2.3-71-gd317


From 53b25335dd60981ad608da7890420898a34469a6 Mon Sep 17 00:00:00 2001
From: Vince Weaver <vincent.weaver@maine.edu>
Date: Fri, 16 May 2014 17:12:12 -0400
Subject: perf: Disable sampled events if no PMU interrupt

Add common code to generate -ENOTSUPP at event creation time if an
architecture attempts to create a sampled event and
PERF_PMU_NO_INTERRUPT is set.

This adds a new pmu->capabilities flag.  Initially we only support
PERF_PMU_NO_INTERRUPT (to indicate a PMU has no support for generating
hardware interrupts) but there are other capabilities that can be
added later.

Signed-off-by: Vince Weaver <vincent.weaver@maine.edu>
Acked-by: Will Deacon <will.deacon@arm.com>
[peterz: rename to PERF_PMU_CAP_* and moved the pmu::capabilities word into a hole]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1405161708060.11099@vincent-weaver-1.umelst.maine.edu
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 10 ++++++++++
 kernel/events/core.c       |  7 +++++++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index af6dcf1d9e47..267c8f37012c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -166,6 +166,11 @@ struct perf_event;
  */
 #define PERF_EVENT_TXN 0x1
 
+/**
+ * pmu::capabilities flags
+ */
+#define PERF_PMU_CAP_NO_INTERRUPT		0x01
+
 /**
  * struct pmu - generic performance monitoring unit
  */
@@ -178,6 +183,11 @@ struct pmu {
 	const char			*name;
 	int				type;
 
+	/*
+	 * various common per-pmu feature flags
+	 */
+	int				capabilities;
+
 	int * __percpu			pmu_disable_count;
 	struct perf_cpu_context * __percpu pmu_cpu_context;
 	int				task_ctx_nr;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a62d142ad498..e9ef0c6646af 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7120,6 +7120,13 @@ SYSCALL_DEFINE5(perf_event_open,
 		}
 	}
 
+	if (is_sampling_event(event)) {
+		if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
+			err = -ENOTSUPP;
+			goto err_alloc;
+		}
+	}
+
 	account_event(event);
 
 	/*
-- 
cgit v1.2.3-71-gd317


From bac52139f0b7ab31330e98fd87fc5a2664951050 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Date: Thu, 22 May 2014 12:50:07 +0530
Subject: perf: Add new conditional branch filter 'PERF_SAMPLE_BRANCH_COND'

This patch introduces new branch filter PERF_SAMPLE_BRANCH_COND which
will extend the existing perf ABI. This will filter branches which are
conditional. Various architectures can provide this functionality either
with HW filtering support (if present) or with SW filtering of captured
branch instructions.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Reviewed-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: mpe@ellerman.id.au
Cc: benh@kernel.crashing.org
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1400743210-32289-1-git-send-email-khandual@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index e3fc8f09d110..d9cd853818ad 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -163,8 +163,9 @@ enum perf_branch_sample_type {
 	PERF_SAMPLE_BRANCH_ABORT_TX	= 1U << 7, /* transaction aborts */
 	PERF_SAMPLE_BRANCH_IN_TX	= 1U << 8, /* in transaction */
 	PERF_SAMPLE_BRANCH_NO_TX	= 1U << 9, /* not in transaction */
+	PERF_SAMPLE_BRANCH_COND		= 1U << 10, /* conditional branches */
 
-	PERF_SAMPLE_BRANCH_MAX		= 1U << 10, /* non-ABI */
+	PERF_SAMPLE_BRANCH_MAX		= 1U << 11, /* non-ABI */
 };
 
 #define PERF_SAMPLE_BRANCH_PLM_ALL \
-- 
cgit v1.2.3-71-gd317


From d16218030f7d5195fcf83a424d8e48b89b013a51 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 29 Apr 2014 17:19:57 +0200
Subject: drm/dsi: Support device shutdown

Hook up the MIPI DSI bus's .shutdown() function to allow drivers to
implement code that should be run when a device is shut down.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/drm_mipi_dsi.c | 10 ++++++++++
 include/drm/drm_mipi_dsi.h     |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index 09821f46d768..e633df2f68d8 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -282,6 +282,14 @@ static int mipi_dsi_drv_remove(struct device *dev)
 	return drv->remove(dsi);
 }
 
+static void mipi_dsi_drv_shutdown(struct device *dev)
+{
+	struct mipi_dsi_driver *drv = to_mipi_dsi_driver(dev->driver);
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev);
+
+	drv->shutdown(dsi);
+}
+
 /**
  * mipi_dsi_driver_register - register a driver for DSI devices
  * @drv: DSI driver structure
@@ -293,6 +301,8 @@ int mipi_dsi_driver_register(struct mipi_dsi_driver *drv)
 		drv->driver.probe = mipi_dsi_drv_probe;
 	if (drv->remove)
 		drv->driver.remove = mipi_dsi_drv_remove;
+	if (drv->shutdown)
+		drv->driver.shutdown = mipi_dsi_drv_shutdown;
 
 	return driver_register(&drv->driver);
 }
diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index 7209df15a3cd..944f33f8ba38 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -135,11 +135,13 @@ ssize_t mipi_dsi_dcs_read(struct mipi_dsi_device *dsi, unsigned int channel,
  * @driver: device driver model driver
  * @probe: callback for device binding
  * @remove: callback for device unbinding
+ * @shutdown: called at shutdown time to quiesce the device
  */
 struct mipi_dsi_driver {
 	struct device_driver driver;
 	int(*probe)(struct mipi_dsi_device *dsi);
 	int(*remove)(struct mipi_dsi_device *dsi);
+	void (*shutdown)(struct mipi_dsi_device *dsi);
 };
 
 #define to_mipi_dsi_driver(d) container_of(d, struct mipi_dsi_driver, driver)
-- 
cgit v1.2.3-71-gd317


From 762380ad9322951cea4ce9d24864265f9c66a916 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 5 Jun 2014 13:38:39 -0600
Subject: block: add notion of a chunk size for request merging

Some drivers have different limits on what size a request should
optimally be, depending on the offset of the request. Similar to
dividing a device into chunks. Add a setting that allows the driver
to inform the block layer of such a chunk size. The block layer will
then prevent merging across the chunks.

This is needed to optimally support NVMe with a non-zero stripe size.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c            |  3 ++-
 block/blk-settings.c   | 18 ++++++++++++++++++
 include/linux/blkdev.h | 22 +++++++++++++++++++++-
 3 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/block/bio.c b/block/bio.c
index 96d28eee8a1e..97e832cc9b9c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -849,7 +849,8 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
 		 unsigned int offset)
 {
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-	return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
+
+	return __bio_add_page(q, bio, page, len, offset, blk_max_size_offset(q, bio->bi_iter.bi_sector));
 }
 EXPORT_SYMBOL(bio_add_page);
 
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 5d21239bc859..a2b9cb195e70 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
 	lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+	lim->chunk_sectors = 0;
 	lim->max_write_same_sectors = 0;
 	lim->max_discard_sectors = 0;
 	lim->discard_granularity = 0;
@@ -276,6 +277,23 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
+/**
+ * blk_queue_chunk_sectors - set size of the chunk for this queue
+ * @q:  the request queue for the device
+ * @chunk_sectors:  chunk sectors in the usual 512b unit
+ *
+ * Description:
+ *    If a driver doesn't want IOs to cross a given chunk size, it can set
+ *    this limit and prevent merging across chunks. Note that the chunk size
+ *    must currently be a power-of-2 in sectors.
+ **/
+void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors)
+{
+	BUG_ON(!is_power_of_2(chunk_sectors));
+	q->limits.chunk_sectors = chunk_sectors;
+}
+EXPORT_SYMBOL(blk_queue_chunk_sectors);
+
 /**
  * blk_queue_max_discard_sectors - set max sectors for a single discard
  * @q:  the request queue for the device
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3cd426e971db..dc2c703f05fd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -280,6 +280,7 @@ struct queue_limits {
 	unsigned long		seg_boundary_mask;
 
 	unsigned int		max_hw_sectors;
+	unsigned int		chunk_sectors;
 	unsigned int		max_sectors;
 	unsigned int		max_segment_size;
 	unsigned int		physical_block_size;
@@ -910,6 +911,20 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
 	return q->limits.max_sectors;
 }
 
+/*
+ * Return maximum size of a request at given offset. Only valid for
+ * file system requests.
+ */
+static inline unsigned int blk_max_size_offset(struct request_queue *q,
+					       sector_t offset)
+{
+	if (!q->limits.chunk_sectors)
+		return q->limits.max_hw_sectors;
+
+	return q->limits.chunk_sectors -
+			(offset & (q->limits.chunk_sectors - 1));
+}
+
 static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
 {
 	struct request_queue *q = rq->q;
@@ -917,7 +932,11 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
 	if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
 		return q->limits.max_hw_sectors;
 
-	return blk_queue_get_max_sectors(q, rq->cmd_flags);
+	if (!q->limits.chunk_sectors)
+		return blk_queue_get_max_sectors(q, rq->cmd_flags);
+
+	return min(blk_max_size_offset(q, blk_rq_pos(rq)),
+			blk_queue_get_max_sectors(q, rq->cmd_flags));
 }
 
 static inline unsigned int blk_rq_count_bios(struct request *rq)
@@ -983,6 +1002,7 @@ extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
+extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
-- 
cgit v1.2.3-71-gd317


From ca8e2ad71013049bc88a10b11d83712bfe56cdd4 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 11 Apr 2014 15:23:00 +0200
Subject: drm: Introduce drm_dev_set_unique()

Add a helper function that allows drivers to statically set the unique
name of the device. This will allow platform and USB drivers to get rid
of their DRM bus implementations and directly use drm_dev_alloc() and
drm_dev_register().

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/drm_ioctl.c | 24 ++++++++++++++++++------
 drivers/gpu/drm/drm_stub.c  | 26 ++++++++++++++++++++++++++
 include/drm/drmP.h          |  2 ++
 3 files changed, 46 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 38269d5aa333..69c61f392e66 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -131,13 +131,25 @@ static int drm_set_busid(struct drm_device *dev, struct drm_file *file_priv)
 	if (master->unique != NULL)
 		drm_unset_busid(dev, master);
 
-	ret = dev->driver->bus->set_busid(dev, master);
-	if (ret)
-		goto err;
+	if (dev->driver->bus && dev->driver->bus->set_busid) {
+		ret = dev->driver->bus->set_busid(dev, master);
+		if (ret) {
+			drm_unset_busid(dev, master);
+			return ret;
+		}
+	} else {
+		if (WARN(dev->unique == NULL,
+			 "No drm_bus.set_busid() implementation provided by "
+			 "%ps. Use drm_dev_set_unique() to set the unique "
+			 "name explicitly.", dev->driver))
+			return -EINVAL;
+
+		master->unique = kstrdup(dev->unique, GFP_KERNEL);
+		if (master->unique)
+			master->unique_len = strlen(dev->unique);
+	}
+
 	return 0;
-err:
-	drm_unset_busid(dev, master);
-	return ret;
 }
 
 /**
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index 3727ac8bc310..6d55392376dd 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -650,6 +650,7 @@ static void drm_dev_release(struct kref *ref)
 	drm_minor_free(dev, DRM_MINOR_CONTROL);
 
 	mutex_destroy(&dev->master_mutex);
+	kfree(dev->unique);
 	kfree(dev);
 }
 
@@ -777,3 +778,28 @@ void drm_dev_unregister(struct drm_device *dev)
 	drm_minor_unregister(dev, DRM_MINOR_CONTROL);
 }
 EXPORT_SYMBOL(drm_dev_unregister);
+
+/**
+ * drm_dev_set_unique - Set the unique name of a DRM device
+ * @dev: device of which to set the unique name
+ * @fmt: format string for unique name
+ *
+ * Sets the unique name of a DRM device using the specified format string and
+ * a variable list of arguments. Drivers can use this at driver probe time if
+ * the unique name of the devices they drive is static.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int drm_dev_set_unique(struct drm_device *dev, const char *fmt, ...)
+{
+	va_list ap;
+
+	kfree(dev->unique);
+
+	va_start(ap, fmt);
+	dev->unique = kvasprintf(GFP_KERNEL, fmt, ap);
+	va_end(ap);
+
+	return dev->unique ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(drm_dev_set_unique);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 475ca5cf3c20..83222db41566 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1058,6 +1058,7 @@ struct drm_device {
 	struct drm_minor *render;		/**< Render node */
 	atomic_t unplugged;			/**< Flag whether dev is dead */
 	struct inode *anon_inode;		/**< inode for private address-space */
+	char *unique;				/**< unique name of the device */
 	/*@} */
 
 	/** \name Locks */
@@ -1617,6 +1618,7 @@ void drm_dev_ref(struct drm_device *dev);
 void drm_dev_unref(struct drm_device *dev);
 int drm_dev_register(struct drm_device *dev, unsigned long flags);
 void drm_dev_unregister(struct drm_device *dev);
+int drm_dev_set_unique(struct drm_device *dev, const char *fmt, ...);
 
 struct drm_minor *drm_minor_acquire(unsigned int minor_id);
 void drm_minor_release(struct drm_minor *minor);
-- 
cgit v1.2.3-71-gd317


From 1c03a2d04d7ab6d27c1fef8614f08187d974bd21 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 2 Jun 2014 22:49:28 +0530
Subject: cpufreq: add support for intermediate (stable) frequencies

Douglas Anderson, recently pointed out an interesting problem due to which
udelay() was expiring earlier than it should.

While transitioning between frequencies few platforms may temporarily switch to
a stable frequency, waiting for the main PLL to stabilize.

For example: When we transition between very low frequencies on exynos, like
between 200MHz and 300MHz, we may temporarily switch to a PLL running at 800MHz.
No CPUFREQ notification is sent for that. That means there's a period of time
when we're running at 800MHz but loops_per_jiffy is calibrated at between 200MHz
and 300MHz. And so udelay behaves badly.

To get this fixed in a generic way, introduce another set of callbacks
get_intermediate() and target_intermediate(), only for drivers with
target_index() and CPUFREQ_ASYNC_NOTIFICATION unset.

get_intermediate() should return a stable intermediate frequency platform wants
to switch to, and target_intermediate() should set CPU to that frequency,
before jumping to the frequency corresponding to 'index'. Core will take care of
sending notifications and driver doesn't have to handle them in
target_intermediate() or target_index().

NOTE: ->target_index() should restore to policy->restore_freq in case of
failures as core would send notifications for that.

Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/cpu-freq/cpu-drivers.txt | 29 ++++++++++++++-
 drivers/cpufreq/cpufreq.c              | 67 ++++++++++++++++++++++++++++++----
 include/linux/cpufreq.h                | 25 +++++++++++++
 3 files changed, 112 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
index b045fe54986a..14f4e6336d88 100644
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ b/Documentation/cpu-freq/cpu-drivers.txt
@@ -26,6 +26,7 @@ Contents:
 1.4  target/target_index or setpolicy?
 1.5  target/target_index
 1.6  setpolicy
+1.7  get_intermediate and target_intermediate
 2.   Frequency Table Helpers
 
 
@@ -79,6 +80,10 @@ cpufreq_driver.attr -		A pointer to a NULL-terminated list of
 				"struct freq_attr" which allow to
 				export values to sysfs.
 
+cpufreq_driver.get_intermediate
+and target_intermediate		Used to switch to stable frequency while
+				changing CPU frequency.
+
 
 1.2 Per-CPU Initialization
 --------------------------
@@ -151,7 +156,7 @@ Some cpufreq-capable processors switch the frequency between certain
 limits on their own. These shall use the ->setpolicy call
 
 
-1.4. target/target_index
+1.5. target/target_index
 -------------
 
 The target_index call has two arguments: struct cpufreq_policy *policy,
@@ -160,6 +165,9 @@ and unsigned int index (into the exposed frequency table).
 The CPUfreq driver must set the new frequency when called here. The
 actual frequency must be determined by freq_table[index].frequency.
 
+It should always restore to earlier frequency (i.e. policy->restore_freq) in
+case of errors, even if we switched to intermediate frequency earlier.
+
 Deprecated:
 ----------
 The target call has three arguments: struct cpufreq_policy *policy,
@@ -179,7 +187,7 @@ Here again the frequency table helper might assist you - see section 2
 for details.
 
 
-1.5 setpolicy
+1.6 setpolicy
 ---------------
 
 The setpolicy call only takes a struct cpufreq_policy *policy as
@@ -190,6 +198,23 @@ setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a
 powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check
 the reference implementation in drivers/cpufreq/longrun.c
 
+1.7 get_intermediate and target_intermediate
+--------------------------------------------
+
+Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION unset.
+
+get_intermediate should return a stable intermediate frequency platform wants to
+switch to, and target_intermediate() should set CPU to to that frequency, before
+jumping to the frequency corresponding to 'index'. Core will take care of
+sending notifications and driver doesn't have to handle them in
+target_intermediate() or target_index().
+
+Drivers can return '0' from get_intermediate() in case they don't wish to switch
+to intermediate frequency for some target frequency. In that case core will
+directly call ->target_index().
+
+NOTE: ->target_index() should restore to policy->restore_freq in case of
+failures as core would send notifications for that.
 
 
 2. Frequency Table Helpers
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index ae11dd51f81d..aed2b0cb83dc 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1816,20 +1816,55 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
  *                              GOVERNORS                            *
  *********************************************************************/
 
+/* Must set freqs->new to intermediate frequency */
+static int __target_intermediate(struct cpufreq_policy *policy,
+				 struct cpufreq_freqs *freqs, int index)
+{
+	int ret;
+
+	freqs->new = cpufreq_driver->get_intermediate(policy, index);
+
+	/* We don't need to switch to intermediate freq */
+	if (!freqs->new)
+		return 0;
+
+	pr_debug("%s: cpu: %d, switching to intermediate freq: oldfreq: %u, intermediate freq: %u\n",
+		 __func__, policy->cpu, freqs->old, freqs->new);
+
+	cpufreq_freq_transition_begin(policy, freqs);
+	ret = cpufreq_driver->target_intermediate(policy, index);
+	cpufreq_freq_transition_end(policy, freqs, ret);
+
+	if (ret)
+		pr_err("%s: Failed to change to intermediate frequency: %d\n",
+		       __func__, ret);
+
+	return ret;
+}
+
 static int __target_index(struct cpufreq_policy *policy,
 			  struct cpufreq_frequency_table *freq_table, int index)
 {
-	struct cpufreq_freqs freqs;
+	struct cpufreq_freqs freqs = {.old = policy->cur, .flags = 0};
+	unsigned int intermediate_freq = 0;
 	int retval = -EINVAL;
 	bool notify;
 
 	notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION);
-
 	if (notify) {
-		freqs.old = policy->cur;
-		freqs.new = freq_table[index].frequency;
-		freqs.flags = 0;
+		/* Handle switching to intermediate frequency */
+		if (cpufreq_driver->get_intermediate) {
+			retval = __target_intermediate(policy, &freqs, index);
+			if (retval)
+				return retval;
+
+			intermediate_freq = freqs.new;
+			/* Set old freq to intermediate */
+			if (intermediate_freq)
+				freqs.old = freqs.new;
+		}
 
+		freqs.new = freq_table[index].frequency;
 		pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n",
 			 __func__, policy->cpu, freqs.old, freqs.new);
 
@@ -1841,9 +1876,23 @@ static int __target_index(struct cpufreq_policy *policy,
 		pr_err("%s: Failed to change cpu frequency: %d\n", __func__,
 		       retval);
 
-	if (notify)
+	if (notify) {
 		cpufreq_freq_transition_end(policy, &freqs, retval);
 
+		/*
+		 * Failed after setting to intermediate freq? Driver should have
+		 * reverted back to initial frequency and so should we. Check
+		 * here for intermediate_freq instead of get_intermediate, in
+		 * case we have't switched to intermediate freq at all.
+		 */
+		if (unlikely(retval && intermediate_freq)) {
+			freqs.old = intermediate_freq;
+			freqs.new = policy->restore_freq;
+			cpufreq_freq_transition_begin(policy, &freqs);
+			cpufreq_freq_transition_end(policy, &freqs, 0);
+		}
+	}
+
 	return retval;
 }
 
@@ -1875,6 +1924,9 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 	if (target_freq == policy->cur)
 		return 0;
 
+	/* Save last value to restore later on errors */
+	policy->restore_freq = policy->cur;
+
 	if (cpufreq_driver->target)
 		retval = cpufreq_driver->target(policy, target_freq, relation);
 	else if (cpufreq_driver->target_index) {
@@ -2361,7 +2413,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	    !(driver_data->setpolicy || driver_data->target_index ||
 		    driver_data->target) ||
 	     (driver_data->setpolicy && (driver_data->target_index ||
-		    driver_data->target)))
+		    driver_data->target)) ||
+	     (!!driver_data->get_intermediate != !!driver_data->target_intermediate))
 		return -EINVAL;
 
 	pr_debug("trying to register driver %s\n", driver_data->name);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 3f458896d45c..ec4112d257bc 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -75,6 +75,7 @@ struct cpufreq_policy {
 	unsigned int		max;    /* in kHz */
 	unsigned int		cur;    /* in kHz, only needed if cpufreq
 					 * governors are used */
+	unsigned int		restore_freq; /* = policy->cur before transition */
 	unsigned int		suspend_freq; /* freq to set during suspend */
 
 	unsigned int		policy; /* see above */
@@ -221,11 +222,35 @@ struct cpufreq_driver {
 
 	/* define one out of two */
 	int	(*setpolicy)	(struct cpufreq_policy *policy);
+
+	/*
+	 * On failure, should always restore frequency to policy->restore_freq
+	 * (i.e. old freq).
+	 */
 	int	(*target)	(struct cpufreq_policy *policy,	/* Deprecated */
 				 unsigned int target_freq,
 				 unsigned int relation);
 	int	(*target_index)	(struct cpufreq_policy *policy,
 				 unsigned int index);
+	/*
+	 * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION
+	 * unset.
+	 *
+	 * get_intermediate should return a stable intermediate frequency
+	 * platform wants to switch to and target_intermediate() should set CPU
+	 * to to that frequency, before jumping to the frequency corresponding
+	 * to 'index'. Core will take care of sending notifications and driver
+	 * doesn't have to handle them in target_intermediate() or
+	 * target_index().
+	 *
+	 * Drivers can return '0' from get_intermediate() in case they don't
+	 * wish to switch to intermediate frequency for some target frequency.
+	 * In that case core will directly call ->target_index().
+	 */
+	unsigned int (*get_intermediate)(struct cpufreq_policy *policy,
+					 unsigned int index);
+	int	(*target_intermediate)(struct cpufreq_policy *policy,
+				       unsigned int index);
 
 	/* should be defined, if possible */
 	unsigned int	(*get)	(unsigned int cpu);
-- 
cgit v1.2.3-71-gd317


From 46cfd6ea23b0a207c87269d86457727dc4485708 Mon Sep 17 00:00:00 2001
From: Konrad Zapalowicz <bergo.torino@gmail.com>
Date: Thu, 5 Jun 2014 20:27:42 +0200
Subject: net: phy: fix sparse warning in fixed.c

This commit fixes the following sparse warning:

drivers/net/phy/fixed.c:207
    - warning: symbol 'fixed_phy_del' was not declared.
      Should it be static?

by adding symbol definition to the phy_fixed.h API file. It is ok to do
because the function in question is an exported symbol.

Signed-off-by: Konrad Zapalowicz <bergo.torino@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy_fixed.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 4f2478b47136..ae612acebb53 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -17,6 +17,7 @@ extern int fixed_phy_add(unsigned int irq, int phy_id,
 extern int fixed_phy_register(unsigned int irq,
 			      struct fixed_phy_status *status,
 			      struct device_node *np);
+extern void fixed_phy_del(int phy_addr);
 #else
 static inline int fixed_phy_add(unsigned int irq, int phy_id,
 				struct fixed_phy_status *status)
@@ -29,6 +30,10 @@ static inline int fixed_phy_register(unsigned int irq,
 {
 	return -ENODEV;
 }
+static inline int fixed_phy_del(int phy_addr)
+{
+	return -ENODEV;
+}
 #endif /* CONFIG_FIXED_PHY */
 
 /*
-- 
cgit v1.2.3-71-gd317


From f98a128a55ff85d0087de89f304f10bd75e792aa Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Thu, 17 Apr 2014 08:55:50 +0800
Subject: ceph: update inode fields according to issued caps

Cap message and request reply from non-auth MDS may carry stale
information (corresponding locks are in LOCK states) even they
have the newest inode version. So client should update inode fields
according to issued caps.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/caps.c               | 58 ++++++++++++++++++++----------------
 fs/ceph/inode.c              | 70 ++++++++++++++++++++++++--------------------
 include/linux/ceph/ceph_fs.h |  2 ++
 3 files changed, 73 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index de39a03f5b71..5f6d24ede794 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2476,7 +2476,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 
 	__check_cap_issue(ci, cap, newcaps);
 
-	if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
+	if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
+	    (issued & CEPH_CAP_AUTH_EXCL) == 0) {
 		inode->i_mode = le32_to_cpu(grant->mode);
 		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
 		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
@@ -2485,7 +2486,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 		     from_kgid(&init_user_ns, inode->i_gid));
 	}
 
-	if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
+	if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
+	    (issued & CEPH_CAP_LINK_EXCL) == 0) {
 		set_nlink(inode, le32_to_cpu(grant->nlink));
 		if (inode->i_nlink == 0 &&
 		    (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
@@ -2512,31 +2514,35 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 	if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
 		queue_revalidate = 1;
 
-	/* size/ctime/mtime/atime? */
-	queue_trunc = ceph_fill_file_size(inode, issued,
-					  le32_to_cpu(grant->truncate_seq),
-					  le64_to_cpu(grant->truncate_size),
-					  size);
-	ceph_decode_timespec(&mtime, &grant->mtime);
-	ceph_decode_timespec(&atime, &grant->atime);
-	ceph_decode_timespec(&ctime, &grant->ctime);
-	ceph_fill_file_time(inode, issued,
-			    le32_to_cpu(grant->time_warp_seq), &ctime, &mtime,
-			    &atime);
-
-
-	/* file layout may have changed */
-	ci->i_layout = grant->layout;
-
-	/* max size increase? */
-	if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
-		dout("max_size %lld -> %llu\n", ci->i_max_size, max_size);
-		ci->i_max_size = max_size;
-		if (max_size >= ci->i_wanted_max_size) {
-			ci->i_wanted_max_size = 0;  /* reset */
-			ci->i_requested_max_size = 0;
+	if (newcaps & CEPH_CAP_ANY_RD) {
+		/* ctime/mtime/atime? */
+		ceph_decode_timespec(&mtime, &grant->mtime);
+		ceph_decode_timespec(&atime, &grant->atime);
+		ceph_decode_timespec(&ctime, &grant->ctime);
+		ceph_fill_file_time(inode, issued,
+				    le32_to_cpu(grant->time_warp_seq),
+				    &ctime, &mtime, &atime);
+	}
+
+	if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
+		/* file layout may have changed */
+		ci->i_layout = grant->layout;
+		/* size/truncate_seq? */
+		queue_trunc = ceph_fill_file_size(inode, issued,
+					le32_to_cpu(grant->truncate_seq),
+					le64_to_cpu(grant->truncate_size),
+					size);
+		/* max size increase? */
+		if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
+			dout("max_size %lld -> %llu\n",
+			     ci->i_max_size, max_size);
+			ci->i_max_size = max_size;
+			if (max_size >= ci->i_wanted_max_size) {
+				ci->i_wanted_max_size = 0;  /* reset */
+				ci->i_requested_max_size = 0;
+			}
+			wake = 1;
 		}
-		wake = 1;
 	}
 
 	/* check cap bits */
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 233c6f96910a..f9e7399877d6 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -585,14 +585,15 @@ static int fill_inode(struct inode *inode,
 	struct ceph_mds_reply_inode *info = iinfo->in;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int i;
-	int issued = 0, implemented;
+	int issued = 0, implemented, new_issued;
 	struct timespec mtime, atime, ctime;
 	u32 nsplits;
 	struct ceph_inode_frag *frag;
 	struct rb_node *rb_node;
 	struct ceph_buffer *xattr_blob = NULL;
 	int err = 0;
-	int queue_trunc = 0;
+	bool queue_trunc = false;
+	bool new_version = false;
 
 	dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
 	     inode, ceph_vinop(inode), le64_to_cpu(info->version),
@@ -623,19 +624,23 @@ static int fill_inode(struct inode *inode,
 	 *   3    2     skip
 	 *   3    3     update
 	 */
-	if (le64_to_cpu(info->version) > 0 &&
-	    (ci->i_version & ~1) >= le64_to_cpu(info->version))
-		goto no_change;
-	
+	if (ci->i_version == 0 ||
+	    ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+	     le64_to_cpu(info->version) > (ci->i_version & ~1)))
+		new_version = true;
+
 	issued = __ceph_caps_issued(ci, &implemented);
 	issued |= implemented | __ceph_caps_dirty(ci);
+	new_issued = ~issued & le32_to_cpu(info->cap.caps);
 
 	/* update inode */
 	ci->i_version = le64_to_cpu(info->version);
 	inode->i_version++;
 	inode->i_rdev = le32_to_cpu(info->rdev);
+	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
 
-	if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
+	if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
+	    (issued & CEPH_CAP_AUTH_EXCL) == 0) {
 		inode->i_mode = le32_to_cpu(info->mode);
 		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
 		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
@@ -644,23 +649,35 @@ static int fill_inode(struct inode *inode,
 		     from_kgid(&init_user_ns, inode->i_gid));
 	}
 
-	if ((issued & CEPH_CAP_LINK_EXCL) == 0)
+	if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) &&
+	    (issued & CEPH_CAP_LINK_EXCL) == 0)
 		set_nlink(inode, le32_to_cpu(info->nlink));
 
-	/* be careful with mtime, atime, size */
-	ceph_decode_timespec(&atime, &info->atime);
-	ceph_decode_timespec(&mtime, &info->mtime);
-	ceph_decode_timespec(&ctime, &info->ctime);
-	queue_trunc = ceph_fill_file_size(inode, issued,
-					  le32_to_cpu(info->truncate_seq),
-					  le64_to_cpu(info->truncate_size),
-					  le64_to_cpu(info->size));
-	ceph_fill_file_time(inode, issued,
-			    le32_to_cpu(info->time_warp_seq),
-			    &ctime, &mtime, &atime);
-
-	ci->i_layout = info->layout;
-	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+	if (new_version || (new_issued & CEPH_CAP_ANY_RD)) {
+		/* be careful with mtime, atime, size */
+		ceph_decode_timespec(&atime, &info->atime);
+		ceph_decode_timespec(&mtime, &info->mtime);
+		ceph_decode_timespec(&ctime, &info->ctime);
+		ceph_fill_file_time(inode, issued,
+				le32_to_cpu(info->time_warp_seq),
+				&ctime, &mtime, &atime);
+	}
+
+	if (new_version ||
+	    (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
+		ci->i_layout = info->layout;
+		queue_trunc = ceph_fill_file_size(inode, issued,
+					le32_to_cpu(info->truncate_seq),
+					le64_to_cpu(info->truncate_size),
+					le64_to_cpu(info->size));
+		/* only update max_size on auth cap */
+		if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+		    ci->i_max_size != le64_to_cpu(info->max_size)) {
+			dout("max_size %lld -> %llu\n", ci->i_max_size,
+					le64_to_cpu(info->max_size));
+			ci->i_max_size = le64_to_cpu(info->max_size);
+		}
+	}
 
 	/* xattrs */
 	/* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */
@@ -745,15 +762,6 @@ static int fill_inode(struct inode *inode,
 		dout(" marking %p complete (empty)\n", inode);
 		__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
 	}
-no_change:
-	/* only update max_size on auth cap */
-	if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
-	    ci->i_max_size != le64_to_cpu(info->max_size)) {
-		dout("max_size %lld -> %llu\n", ci->i_max_size,
-		     le64_to_cpu(info->max_size));
-		ci->i_max_size = le64_to_cpu(info->max_size);
-	}
-
 	spin_unlock(&ci->i_ceph_lock);
 
 	/* queue truncate if we saw i_size decrease */
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 5f6db18d72e8..3c97d5e9b951 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -625,6 +625,8 @@ int ceph_flags_to_mode(int flags);
 			   CEPH_CAP_LINK_EXCL |		\
 			   CEPH_CAP_XATTR_EXCL |	\
 			   CEPH_CAP_FILE_EXCL)
+#define CEPH_CAP_ANY_FILE_RD (CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | \
+			      CEPH_CAP_FILE_SHARED)
 #define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |	\
 			      CEPH_CAP_FILE_EXCL)
 #define CEPH_CAP_ANY_WR   (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR)
-- 
cgit v1.2.3-71-gd317


From 513a8243d67f8e8d27f2883bd2f18bc87c7ca376 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Tue, 13 May 2014 11:19:26 +0400
Subject: libceph: mon_get_version request infrastructure

Add support for mon_get_version requests to libceph.  This reuses much
of the ceph_mon_generic_request infrastructure, with one exception.
Older OSDs don't set mon_get_version reply hdr->tid even if the
original request had a non-zero tid, which makes it impossible to
lookup ceph_mon_generic_request contexts by tid in get_generic_reply()
for such replies.  As a workaround, we allocate a reply message on the
reply path.  This can probably interfere with revoke, but I don't see
a better way.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 include/linux/ceph/mon_client.h |   9 ++-
 net/ceph/ceph_common.c          |   2 +
 net/ceph/debugfs.c              |   2 +
 net/ceph/mon_client.c           | 123 ++++++++++++++++++++++++++++++++++++++--
 4 files changed, 128 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index a486f390dfbe..585ef9450e9d 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -40,9 +40,9 @@ struct ceph_mon_request {
 };
 
 /*
- * ceph_mon_generic_request is being used for the statfs and poolop requests
- * which are bening done a bit differently because we need to get data back
- * to the caller
+ * ceph_mon_generic_request is being used for the statfs, poolop and
+ * mon_get_version requests which are being done a bit differently
+ * because we need to get data back to the caller
  */
 struct ceph_mon_generic_request {
 	struct kref kref;
@@ -108,6 +108,9 @@ extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
 extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
 			       struct ceph_statfs *buf);
 
+extern int ceph_monc_do_get_version(struct ceph_mon_client *monc,
+				    const char *what, u64 *newest);
+
 extern int ceph_monc_open_session(struct ceph_mon_client *monc);
 
 extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 67d7721d237e..1675021d8c12 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -72,6 +72,8 @@ const char *ceph_msg_type_name(int type)
 	case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
 	case CEPH_MSG_STATFS: return "statfs";
 	case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
+	case CEPH_MSG_MON_GET_VERSION: return "mon_get_version";
+	case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply";
 	case CEPH_MSG_MDS_MAP: return "mds_map";
 	case CEPH_MSG_CLIENT_SESSION: return "client_session";
 	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 8903dcee8d8e..d1a62c69a9f4 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -129,6 +129,8 @@ static int monc_show(struct seq_file *s, void *p)
 			seq_printf(s, "%llu statfs\n", req->tid);
 		else if (op == CEPH_MSG_POOLOP)
 			seq_printf(s, "%llu poolop\n", req->tid);
+		else if (op == CEPH_MSG_MON_GET_VERSION)
+			seq_printf(s, "%llu mon_get_version", req->tid);
 		else
 			seq_printf(s, "%llu unknown\n", req->tid);
 	}
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2ac9ef35110b..11d8d2f2708a 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -477,14 +477,13 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
 	return m;
 }
 
-static int do_generic_request(struct ceph_mon_client *monc,
-			      struct ceph_mon_generic_request *req)
+static int __do_generic_request(struct ceph_mon_client *monc, u64 tid,
+				struct ceph_mon_generic_request *req)
 {
 	int err;
 
 	/* register request */
-	mutex_lock(&monc->mutex);
-	req->tid = ++monc->last_tid;
+	req->tid = tid != 0 ? tid : ++monc->last_tid;
 	req->request->hdr.tid = cpu_to_le64(req->tid);
 	__insert_generic_request(monc, req);
 	monc->num_generic_requests++;
@@ -496,13 +495,24 @@ static int do_generic_request(struct ceph_mon_client *monc,
 	mutex_lock(&monc->mutex);
 	rb_erase(&req->node, &monc->generic_request_tree);
 	monc->num_generic_requests--;
-	mutex_unlock(&monc->mutex);
 
 	if (!err)
 		err = req->result;
 	return err;
 }
 
+static int do_generic_request(struct ceph_mon_client *monc,
+			      struct ceph_mon_generic_request *req)
+{
+	int err;
+
+	mutex_lock(&monc->mutex);
+	err = __do_generic_request(monc, 0, req);
+	mutex_unlock(&monc->mutex);
+
+	return err;
+}
+
 /*
  * statfs
  */
@@ -579,6 +589,96 @@ out:
 }
 EXPORT_SYMBOL(ceph_monc_do_statfs);
 
+static void handle_get_version_reply(struct ceph_mon_client *monc,
+				     struct ceph_msg *msg)
+{
+	struct ceph_mon_generic_request *req;
+	u64 tid = le64_to_cpu(msg->hdr.tid);
+	void *p = msg->front.iov_base;
+	void *end = p + msg->front_alloc_len;
+	u64 handle;
+
+	dout("%s %p tid %llu\n", __func__, msg, tid);
+
+	ceph_decode_need(&p, end, 2*sizeof(u64), bad);
+	handle = ceph_decode_64(&p);
+	if (tid != 0 && tid != handle)
+		goto bad;
+
+	mutex_lock(&monc->mutex);
+	req = __lookup_generic_req(monc, handle);
+	if (req) {
+		*(u64 *)req->buf = ceph_decode_64(&p);
+		req->result = 0;
+		get_generic_request(req);
+	}
+	mutex_unlock(&monc->mutex);
+	if (req) {
+		complete_all(&req->completion);
+		put_generic_request(req);
+	}
+
+	return;
+bad:
+	pr_err("corrupt mon_get_version reply\n");
+	ceph_msg_dump(msg);
+}
+
+/*
+ * Send MMonGetVersion and wait for the reply.
+ *
+ * @what: one of "mdsmap", "osdmap" or "monmap"
+ */
+int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what,
+			     u64 *newest)
+{
+	struct ceph_mon_generic_request *req;
+	void *p, *end;
+	u64 tid;
+	int err;
+
+	req = kzalloc(sizeof(*req), GFP_NOFS);
+	if (!req)
+		return -ENOMEM;
+
+	kref_init(&req->kref);
+	req->buf = newest;
+	req->buf_len = sizeof(*newest);
+	init_completion(&req->completion);
+
+	req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION,
+				    sizeof(u64) + sizeof(u32) + strlen(what),
+				    GFP_NOFS, true);
+	if (!req->request) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	req->reply = ceph_msg_new(CEPH_MSG_MON_GET_VERSION_REPLY, 1024,
+				  GFP_NOFS, true);
+	if (!req->reply) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	p = req->request->front.iov_base;
+	end = p + req->request->front_alloc_len;
+
+	/* fill out request */
+	mutex_lock(&monc->mutex);
+	tid = ++monc->last_tid;
+	ceph_encode_64(&p, tid); /* handle */
+	ceph_encode_string(&p, end, what, strlen(what));
+
+	err = __do_generic_request(monc, tid, req);
+
+	mutex_unlock(&monc->mutex);
+out:
+	kref_put(&req->kref, release_generic_request);
+	return err;
+}
+EXPORT_SYMBOL(ceph_monc_do_get_version);
+
 /*
  * pool ops
  */
@@ -981,6 +1081,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 		handle_statfs_reply(monc, msg);
 		break;
 
+	case CEPH_MSG_MON_GET_VERSION_REPLY:
+		handle_get_version_reply(monc, msg);
+		break;
+
 	case CEPH_MSG_POOLOP_REPLY:
 		handle_poolop_reply(monc, msg);
 		break;
@@ -1029,6 +1133,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
 	case CEPH_MSG_AUTH_REPLY:
 		m = ceph_msg_get(monc->m_auth_reply);
 		break;
+	case CEPH_MSG_MON_GET_VERSION_REPLY:
+		if (le64_to_cpu(hdr->tid) != 0)
+			return get_generic_reply(con, hdr, skip);
+
+		/*
+		 * Older OSDs don't set reply tid even if the orignal
+		 * request had a non-zero tid.  Workaround this weirdness
+		 * by falling through to the allocate case.
+		 */
 	case CEPH_MSG_MON_MAP:
 	case CEPH_MSG_MDS_MAP:
 	case CEPH_MSG_OSD_MAP:
-- 
cgit v1.2.3-71-gd317


From 6044cde6f2a94d88142d4401624152a741866338 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Tue, 13 May 2014 11:19:27 +0400
Subject: libceph: add ceph_monc_wait_osdmap()

Add ceph_monc_wait_osdmap(), which will block until the osdmap with the
specified epoch is received or timeout occurs.

Export both of these as they are going to be needed by rbd.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 include/linux/ceph/mon_client.h |  2 ++
 net/ceph/mon_client.c           | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index 585ef9450e9d..deb47e45ac7c 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -104,6 +104,8 @@ extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);
 extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
 
 extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
+extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
+				 unsigned long timeout);
 
 extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
 			       struct ceph_statfs *buf);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 11d8d2f2708a..067d3af2eaf6 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -296,6 +296,33 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
 		__send_subscribe(monc);
 	mutex_unlock(&monc->mutex);
 }
+EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
+
+int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
+			  unsigned long timeout)
+{
+	unsigned long started = jiffies;
+	int ret;
+
+	mutex_lock(&monc->mutex);
+	while (monc->have_osdmap < epoch) {
+		mutex_unlock(&monc->mutex);
+
+		if (timeout != 0 && time_after_eq(jiffies, started + timeout))
+			return -ETIMEDOUT;
+
+		ret = wait_event_interruptible_timeout(monc->client->auth_wq,
+					 monc->have_osdmap >= epoch, timeout);
+		if (ret < 0)
+			return ret;
+
+		mutex_lock(&monc->mutex);
+	}
+
+	mutex_unlock(&monc->mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_monc_wait_osdmap);
 
 /*
  *
-- 
cgit v1.2.3-71-gd317


From e041e328c4b41e1db79bfe5ba9992c2ed771ad19 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 21 May 2014 17:32:19 +0200
Subject: perf: Fix perf_event_comm() vs. exec() assumption

perf_event_comm() assumes that set_task_comm() is only called on
exec(), and in particular that its only called on current.

Neither are true, as Dave reported a WARN triggered by set_task_comm()
being called on !current.

Separate the exec() hook from the comm hook.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/20140521153219.GH5226@laptop.programming.kicks-ass.net
[ Build fix. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/exec.c                  |  1 +
 include/linux/perf_event.h |  4 +++-
 kernel/events/core.c       | 28 ++++++++++++++++------------
 3 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 238b7aa26f68..a038a41a3677 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1110,6 +1110,7 @@ void setup_new_exec(struct linux_binprm * bprm)
 	else
 		set_dumpable(current->mm, suid_dumpable);
 
+	perf_event_exec();
 	set_task_comm(current, kbasename(bprm->filename));
 
 	/* Set the new mm task size. We have to do that late because it may
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3ef6ea12806a..9b5cd1992a88 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -695,6 +695,7 @@ extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 
+extern void perf_event_exec(void);
 extern void perf_event_comm(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
 
@@ -772,7 +773,7 @@ extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
 extern int __perf_event_disable(void *info);
 extern void perf_event_task_tick(void);
-#else
+#else /* !CONFIG_PERF_EVENTS: */
 static inline void
 perf_event_task_sched_in(struct task_struct *prev,
 			 struct task_struct *task)			{ }
@@ -802,6 +803,7 @@ static inline int perf_unregister_guest_info_callbacks
 (struct perf_guest_info_callbacks *callbacks)				{ return 0; }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
+static inline void perf_event_exec(void)				{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 440eefc67397..647698f91988 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2970,6 +2970,22 @@ out:
 	local_irq_restore(flags);
 }
 
+void perf_event_exec(void)
+{
+	struct perf_event_context *ctx;
+	int ctxn;
+
+	rcu_read_lock();
+	for_each_task_context_nr(ctxn) {
+		ctx = current->perf_event_ctxp[ctxn];
+		if (!ctx)
+			continue;
+
+		perf_event_enable_on_exec(ctx);
+	}
+	rcu_read_unlock();
+}
+
 /*
  * Cross CPU call to read the hardware event
  */
@@ -5057,18 +5073,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 void perf_event_comm(struct task_struct *task)
 {
 	struct perf_comm_event comm_event;
-	struct perf_event_context *ctx;
-	int ctxn;
-
-	rcu_read_lock();
-	for_each_task_context_nr(ctxn) {
-		ctx = task->perf_event_ctxp[ctxn];
-		if (!ctx)
-			continue;
-
-		perf_event_enable_on_exec(ctx);
-	}
-	rcu_read_unlock();
 
 	if (!atomic_read(&nr_comm_events))
 		return;
-- 
cgit v1.2.3-71-gd317


From 82b897782d10fcc4930c9d4a15b175348fdd2871 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 28 May 2014 11:45:04 +0300
Subject: perf: Differentiate exec() and non-exec() comm events

perf tools like 'perf report' can aggregate samples by comm strings,
which generally works.  However, there are other potential use-cases.
For example, to pair up 'calls' with 'returns' accurately (from branch
events like Intel BTS) it is necessary to identify whether the process
has exec'd.  Although a comm event is generated when an 'exec' happens
it is also generated whenever the comm string is changed on a whim
(e.g. by prctl PR_SET_NAME).  This patch adds a flag to the comm event
to differentiate one case from the other.

In order to determine whether the kernel supports the new flag, a
selection bit named 'exec' is added to struct perf_event_attr.  The
bit does nothing but will cause perf_event_open() to fail if the bit
is set on kernels that do not have it defined.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/537D9EBE.7030806@intel.com
Cc: Paul Mackerras <paulus@samba.org>
Cc: Dave Jones <davej@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/exec.c                       | 6 +++---
 include/linux/perf_event.h      | 4 ++--
 include/linux/sched.h           | 6 +++++-
 include/uapi/linux/perf_event.h | 9 +++++++--
 kernel/events/core.c            | 4 ++--
 5 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index a038a41a3677..a3d33fe592d6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1046,13 +1046,13 @@ EXPORT_SYMBOL_GPL(get_task_comm);
  * so that a new one can be started
  */
 
-void set_task_comm(struct task_struct *tsk, const char *buf)
+void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
 {
 	task_lock(tsk);
 	trace_task_rename(tsk, buf);
 	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
 	task_unlock(tsk);
-	perf_event_comm(tsk);
+	perf_event_comm(tsk, exec);
 }
 
 int flush_old_exec(struct linux_binprm * bprm)
@@ -1111,7 +1111,7 @@ void setup_new_exec(struct linux_binprm * bprm)
 		set_dumpable(current->mm, suid_dumpable);
 
 	perf_event_exec();
-	set_task_comm(current, kbasename(bprm->filename));
+	__set_task_comm(current, kbasename(bprm->filename), true);
 
 	/* Set the new mm task size. We have to do that late because it may
 	 * depend on TIF_32BIT which is only updated in flush_thread() on
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b4c1d4685bf0..707617a8c0f6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -707,7 +707,7 @@ extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 
 extern void perf_event_exec(void);
-extern void perf_event_comm(struct task_struct *tsk);
+extern void perf_event_comm(struct task_struct *tsk, bool exec);
 extern void perf_event_fork(struct task_struct *tsk);
 
 /* Callchains */
@@ -815,7 +815,7 @@ static inline int perf_unregister_guest_info_callbacks
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_exec(void)				{ }
-static inline void perf_event_comm(struct task_struct *tsk)		{ }
+static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
 static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 221b2bde3723..ad86e1d7dbc2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2379,7 +2379,11 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
-extern void set_task_comm(struct task_struct *tsk, const char *from);
+extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
+static inline void set_task_comm(struct task_struct *tsk, const char *from)
+{
+	__set_task_comm(tsk, from, false);
+}
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index d9cd853818ad..5312fae47218 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -302,8 +302,8 @@ struct perf_event_attr {
 				exclude_callchain_kernel : 1, /* exclude kernel callchains */
 				exclude_callchain_user   : 1, /* exclude user callchains */
 				mmap2          :  1, /* include mmap with inode data     */
-
-				__reserved_1   : 40;
+				comm_exec      :  1, /* flag comm events that are due to an exec */
+				__reserved_1   : 39;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -502,7 +502,12 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
 #define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
 
+/*
+ * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
+ * different events so can reuse the same bit position.
+ */
 #define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
+#define PERF_RECORD_MISC_COMM_EXEC		(1 << 13)
 /*
  * Indicates that the content of PERF_SAMPLE_IP points to
  * the actual instruction that triggered the event. See also
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8fac2056d51e..7da5e561e89a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5090,7 +5090,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 		       NULL);
 }
 
-void perf_event_comm(struct task_struct *task)
+void perf_event_comm(struct task_struct *task, bool exec)
 {
 	struct perf_comm_event comm_event;
 
@@ -5104,7 +5104,7 @@ void perf_event_comm(struct task_struct *task)
 		.event_id  = {
 			.header = {
 				.type = PERF_RECORD_COMM,
-				.misc = 0,
+				.misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
 				/* .size */
 			},
 			/* .pid */
-- 
cgit v1.2.3-71-gd317


From 70af2f8a4f48d6cebdf92d533d3aef37853ce6de Mon Sep 17 00:00:00 2001
From: Waiman Long <Waiman.Long@hp.com>
Date: Mon, 3 Feb 2014 13:18:49 +0100
Subject: locking/rwlocks: Introduce 'qrwlocks' - fair, queued rwlocks

This rwlock uses the arch_spin_lock_t as a waitqueue, and assuming the
arch_spin_lock_t is a fair lock (ticket,mcs etc..) the resulting
rwlock is a fair lock.

It fits in the same 8 bytes as the regular rwlock_t by folding the
reader and writer count into a single integer, using the remaining 4
bytes for the arch_spinlock_t.

Architectures that can single-copy adress bytes can optimize
queue_write_unlock() with a 0 write to the LSB (the write count).

Performance as measured by Davidlohr Bueso (rwlock_t -> qrwlock_t):

 +--------------+-------------+---------------+
 |   Workload   |   #users    |     delta     |
 +--------------+-------------+---------------+
 | alltests     | > 1400      | -4.83%        |
 | custom       | 0-100,> 100 | +1.43%,-1.57% |
 | high_systime | > 1000      | -2.61         |
 | shared       | all         | +0.32         |
 +--------------+-------------+---------------+

http://www.stgolabs.net/qrwlock-stuff/aim7-results-vs-rwsem_optsin/

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
[peterz: near complete rewrite]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Paul E.McKenney" <paulmck@linux.vnet.ibm.com>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/n/tip-gac1nnl3wvs2ij87zv2xkdzq@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/qrwlock.h       | 166 ++++++++++++++++++++++++++++++++++++
 include/asm-generic/qrwlock_types.h |  21 +++++
 kernel/Kconfig.locks                |   7 ++
 kernel/locking/Makefile             |   1 +
 kernel/locking/qrwlock.c            | 133 +++++++++++++++++++++++++++++
 5 files changed, 328 insertions(+)
 create mode 100644 include/asm-generic/qrwlock.h
 create mode 100644 include/asm-generic/qrwlock_types.h
 create mode 100644 kernel/locking/qrwlock.c

(limited to 'include')

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
new file mode 100644
index 000000000000..6383d54bf983
--- /dev/null
+++ b/include/asm-generic/qrwlock.h
@@ -0,0 +1,166 @@
+/*
+ * Queue read/write lock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ */
+#ifndef __ASM_GENERIC_QRWLOCK_H
+#define __ASM_GENERIC_QRWLOCK_H
+
+#include <linux/atomic.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
+#include <asm-generic/qrwlock_types.h>
+
+/*
+ * Writer states & reader shift and bias
+ */
+#define	_QW_WAITING	1		/* A writer is waiting	   */
+#define	_QW_LOCKED	0xff		/* A writer holds the lock */
+#define	_QW_WMASK	0xff		/* Writer mask		   */
+#define	_QR_SHIFT	8		/* Reader count shift	   */
+#define _QR_BIAS	(1U << _QR_SHIFT)
+
+/*
+ * External function declarations
+ */
+extern void queue_read_lock_slowpath(struct qrwlock *lock);
+extern void queue_write_lock_slowpath(struct qrwlock *lock);
+
+/**
+ * queue_read_can_lock- would read_trylock() succeed?
+ * @lock: Pointer to queue rwlock structure
+ */
+static inline int queue_read_can_lock(struct qrwlock *lock)
+{
+	return !(atomic_read(&lock->cnts) & _QW_WMASK);
+}
+
+/**
+ * queue_write_can_lock- would write_trylock() succeed?
+ * @lock: Pointer to queue rwlock structure
+ */
+static inline int queue_write_can_lock(struct qrwlock *lock)
+{
+	return !atomic_read(&lock->cnts);
+}
+
+/**
+ * queue_read_trylock - try to acquire read lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static inline int queue_read_trylock(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	cnts = atomic_read(&lock->cnts);
+	if (likely(!(cnts & _QW_WMASK))) {
+		cnts = (u32)atomic_add_return(_QR_BIAS, &lock->cnts);
+		if (likely(!(cnts & _QW_WMASK)))
+			return 1;
+		atomic_sub(_QR_BIAS, &lock->cnts);
+	}
+	return 0;
+}
+
+/**
+ * queue_write_trylock - try to acquire write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static inline int queue_write_trylock(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	cnts = atomic_read(&lock->cnts);
+	if (unlikely(cnts))
+		return 0;
+
+	return likely(atomic_cmpxchg(&lock->cnts,
+				     cnts, cnts | _QW_LOCKED) == cnts);
+}
+/**
+ * queue_read_lock - acquire read lock of a queue rwlock
+ * @lock: Pointer to queue rwlock structure
+ */
+static inline void queue_read_lock(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	cnts = atomic_add_return(_QR_BIAS, &lock->cnts);
+	if (likely(!(cnts & _QW_WMASK)))
+		return;
+
+	/* The slowpath will decrement the reader count, if necessary. */
+	queue_read_lock_slowpath(lock);
+}
+
+/**
+ * queue_write_lock - acquire write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+static inline void queue_write_lock(struct qrwlock *lock)
+{
+	/* Optimize for the unfair lock case where the fair flag is 0. */
+	if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)
+		return;
+
+	queue_write_lock_slowpath(lock);
+}
+
+/**
+ * queue_read_unlock - release read lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+static inline void queue_read_unlock(struct qrwlock *lock)
+{
+	/*
+	 * Atomically decrement the reader count
+	 */
+	smp_mb__before_atomic();
+	atomic_sub(_QR_BIAS, &lock->cnts);
+}
+
+#ifndef queue_write_unlock
+/**
+ * queue_write_unlock - release write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+static inline void queue_write_unlock(struct qrwlock *lock)
+{
+	/*
+	 * If the writer field is atomic, it can be cleared directly.
+	 * Otherwise, an atomic subtraction will be used to clear it.
+	 */
+	smp_mb__before_atomic();
+	atomic_sub(_QW_LOCKED, &lock->cnts);
+}
+#endif
+
+/*
+ * Remapping rwlock architecture specific functions to the corresponding
+ * queue rwlock functions.
+ */
+#define arch_read_can_lock(l)	queue_read_can_lock(l)
+#define arch_write_can_lock(l)	queue_write_can_lock(l)
+#define arch_read_lock(l)	queue_read_lock(l)
+#define arch_write_lock(l)	queue_write_lock(l)
+#define arch_read_trylock(l)	queue_read_trylock(l)
+#define arch_write_trylock(l)	queue_write_trylock(l)
+#define arch_read_unlock(l)	queue_read_unlock(l)
+#define arch_write_unlock(l)	queue_write_unlock(l)
+
+#endif /* __ASM_GENERIC_QRWLOCK_H */
diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h
new file mode 100644
index 000000000000..4d76f24df518
--- /dev/null
+++ b/include/asm-generic/qrwlock_types.h
@@ -0,0 +1,21 @@
+#ifndef __ASM_GENERIC_QRWLOCK_TYPES_H
+#define __ASM_GENERIC_QRWLOCK_TYPES_H
+
+#include <linux/types.h>
+#include <asm/spinlock_types.h>
+
+/*
+ * The queue read/write lock data structure
+ */
+
+typedef struct qrwlock {
+	atomic_t		cnts;
+	arch_spinlock_t		lock;
+} arch_rwlock_t;
+
+#define	__ARCH_RW_LOCK_UNLOCKED {		\
+	.cnts = ATOMIC_INIT(0),			\
+	.lock = __ARCH_SPIN_LOCK_UNLOCKED,	\
+}
+
+#endif /* __ASM_GENERIC_QRWLOCK_TYPES_H */
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index d2b32ac27a39..35536d9c0964 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -223,3 +223,10 @@ endif
 config MUTEX_SPIN_ON_OWNER
 	def_bool y
 	depends on SMP && !DEBUG_MUTEXES
+
+config ARCH_USE_QUEUE_RWLOCK
+	bool
+
+config QUEUE_RWLOCK
+	def_bool y if ARCH_USE_QUEUE_RWLOCK
+	depends on SMP
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index b8bdcd4785b7..8541bfdfd232 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -24,4 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
 obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
+obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
 obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
new file mode 100644
index 000000000000..fb5b8ac411a5
--- /dev/null
+++ b/kernel/locking/qrwlock.c
@@ -0,0 +1,133 @@
+/*
+ * Queue read/write lock
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
+ *
+ * Authors: Waiman Long <waiman.long@hp.com>
+ */
+#include <linux/smp.h>
+#include <linux/bug.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/mutex.h>
+#include <asm/qrwlock.h>
+
+/**
+ * rspin_until_writer_unlock - inc reader count & spin until writer is gone
+ * @lock  : Pointer to queue rwlock structure
+ * @writer: Current queue rwlock writer status byte
+ *
+ * In interrupt context or at the head of the queue, the reader will just
+ * increment the reader count & wait until the writer releases the lock.
+ */
+static __always_inline void
+rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
+{
+	while ((cnts & _QW_WMASK) == _QW_LOCKED) {
+		arch_mutex_cpu_relax();
+		cnts = smp_load_acquire((u32 *)&lock->cnts);
+	}
+}
+
+/**
+ * queue_read_lock_slowpath - acquire read lock of a queue rwlock
+ * @lock: Pointer to queue rwlock structure
+ */
+void queue_read_lock_slowpath(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	/*
+	 * Readers come here when they cannot get the lock without waiting
+	 */
+	if (unlikely(in_interrupt())) {
+		/*
+		 * Readers in interrupt context will spin until the lock is
+		 * available without waiting in the queue.
+		 */
+		cnts = smp_load_acquire((u32 *)&lock->cnts);
+		rspin_until_writer_unlock(lock, cnts);
+		return;
+	}
+	atomic_sub(_QR_BIAS, &lock->cnts);
+
+	/*
+	 * Put the reader into the wait queue
+	 */
+	arch_spin_lock(&lock->lock);
+
+	/*
+	 * At the head of the wait queue now, wait until the writer state
+	 * goes to 0 and then try to increment the reader count and get
+	 * the lock. It is possible that an incoming writer may steal the
+	 * lock in the interim, so it is necessary to check the writer byte
+	 * to make sure that the write lock isn't taken.
+	 */
+	while (atomic_read(&lock->cnts) & _QW_WMASK)
+		arch_mutex_cpu_relax();
+
+	cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
+	rspin_until_writer_unlock(lock, cnts);
+
+	/*
+	 * Signal the next one in queue to become queue head
+	 */
+	arch_spin_unlock(&lock->lock);
+}
+EXPORT_SYMBOL(queue_read_lock_slowpath);
+
+/**
+ * queue_write_lock_slowpath - acquire write lock of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ */
+void queue_write_lock_slowpath(struct qrwlock *lock)
+{
+	u32 cnts;
+
+	/* Put the writer into the wait queue */
+	arch_spin_lock(&lock->lock);
+
+	/* Try to acquire the lock directly if no reader is present */
+	if (!atomic_read(&lock->cnts) &&
+	    (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
+		goto unlock;
+
+	/*
+	 * Set the waiting flag to notify readers that a writer is pending,
+	 * or wait for a previous writer to go away.
+	 */
+	for (;;) {
+		cnts = atomic_read(&lock->cnts);
+		if (!(cnts & _QW_WMASK) &&
+		    (atomic_cmpxchg(&lock->cnts, cnts,
+				    cnts | _QW_WAITING) == cnts))
+			break;
+
+		arch_mutex_cpu_relax();
+	}
+
+	/* When no more readers, set the locked flag */
+	for (;;) {
+		cnts = atomic_read(&lock->cnts);
+		if ((cnts == _QW_WAITING) &&
+		    (atomic_cmpxchg(&lock->cnts, _QW_WAITING,
+				    _QW_LOCKED) == _QW_WAITING))
+			break;
+
+		arch_mutex_cpu_relax();
+	}
+unlock:
+	arch_spin_unlock(&lock->lock);
+}
+EXPORT_SYMBOL(queue_write_lock_slowpath);
-- 
cgit v1.2.3-71-gd317


From f27b087b81b70513b8c61ec20596c868f7b93474 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 6 Jun 2014 07:57:37 -0600
Subject: block: add blk_rq_set_block_pc()

With the optimizations around not clearing the full request at alloc
time, we are leaving some of the needed init for REQ_TYPE_BLOCK_PC
up to the user allocating the request.

Add a blk_rq_set_block_pc() that sets the command type to
REQ_TYPE_BLOCK_PC, and properly initializes the members associated
with this type of request. Update callers to use this function instead
of manipulating rq->cmd_type directly.

Includes fixes from Christoph Hellwig <hch@lst.de> for my half-assed
attempt.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c                            | 18 ++++++++++++++++++
 block/bsg.c                                 |  3 ++-
 block/scsi_ioctl.c                          |  6 +++---
 drivers/block/pktcdvd.c                     |  2 +-
 drivers/cdrom/cdrom.c                       |  2 +-
 drivers/scsi/device_handler/scsi_dh_alua.c  |  2 +-
 drivers/scsi/device_handler/scsi_dh_emc.c   |  2 +-
 drivers/scsi/device_handler/scsi_dh_hp_sw.c |  4 ++--
 drivers/scsi/device_handler/scsi_dh_rdac.c  |  2 +-
 drivers/scsi/osd/osd_initiator.c            |  4 ++--
 drivers/scsi/osst.c                         |  2 +-
 drivers/scsi/scsi_error.c                   |  3 ++-
 drivers/scsi/scsi_lib.c                     |  2 +-
 drivers/scsi/sg.c                           |  3 +--
 drivers/scsi/st.c                           |  2 +-
 drivers/target/target_core_pscsi.c          |  3 ++-
 include/linux/blkdev.h                      |  1 +
 17 files changed, 41 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 40d654861c33..9aca8c71e70b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1218,6 +1218,8 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio,
 	if (unlikely(!rq))
 		return ERR_PTR(-ENOMEM);
 
+	blk_rq_set_block_pc(rq);
+
 	for_each_bio(bio) {
 		struct bio *bounce_bio = bio;
 		int ret;
@@ -1234,6 +1236,22 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio,
 }
 EXPORT_SYMBOL(blk_make_request);
 
+/**
+ * blk_rq_set_block_pc - initialize a requeest to type BLOCK_PC
+ * @rq:		request to be initialized
+ *
+ */
+void blk_rq_set_block_pc(struct request *rq)
+{
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->__data_len = 0;
+	rq->__sector = (sector_t) -1;
+	rq->bio = rq->biotail = NULL;
+	memset(rq->__cmd, 0, sizeof(rq->__cmd));
+	rq->cmd = rq->__cmd;
+}
+EXPORT_SYMBOL(blk_rq_set_block_pc);
+
 /**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
diff --git a/block/bsg.c b/block/bsg.c
index e5214c148096..ff46addde5d8 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -196,7 +196,6 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
 	 * fill in request structure
 	 */
 	rq->cmd_len = hdr->request_len;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	rq->timeout = msecs_to_jiffies(hdr->timeout);
 	if (!rq->timeout)
@@ -273,6 +272,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
 	rq = blk_get_request(q, rw, GFP_KERNEL);
 	if (!rq)
 		return ERR_PTR(-ENOMEM);
+	blk_rq_set_block_pc(rq);
+
 	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
 	if (ret)
 		goto out;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 9c28a5b38042..14695c6221c8 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -229,7 +229,6 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
 	 * fill in request structure
 	 */
 	rq->cmd_len = hdr->cmd_len;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	rq->timeout = msecs_to_jiffies(hdr->timeout);
 	if (!rq->timeout)
@@ -311,6 +310,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
 	if (!rq)
 		return -ENOMEM;
+	blk_rq_set_block_pc(rq);
 
 	if (blk_fill_sghdr_rq(q, rq, hdr, mode)) {
 		blk_put_request(rq);
@@ -491,7 +491,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 	memset(sense, 0, sizeof(sense));
 	rq->sense = sense;
 	rq->sense_len = 0;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(rq);
 
 	blk_execute_rq(q, disk, rq, 0);
 
@@ -524,7 +524,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 	int err;
 
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(rq);
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
 	rq->cmd[0] = cmd;
 	rq->cmd[4] = data;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index ef166ad2dbad..758ac442c5b5 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -704,6 +704,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 
 	rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
 			     WRITE : READ, __GFP_WAIT);
+	blk_rq_set_block_pc(rq);
 
 	if (cgc->buflen) {
 		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -716,7 +717,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 	memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
 
 	rq->timeout = 60*HZ;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	if (cgc->quiet)
 		rq->cmd_flags |= REQ_QUIET;
 
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 49ac5662585b..0f40c95049c0 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2184,6 +2184,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 			ret = -ENOMEM;
 			break;
 		}
+		blk_rq_set_block_pc(rq);
 
 		ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
 		if (ret) {
@@ -2203,7 +2204,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 		rq->cmd[9] = 0xf8;
 
 		rq->cmd_len = 12;
-		rq->cmd_type = REQ_TYPE_BLOCK_PC;
 		rq->timeout = 60 * HZ;
 		bio = rq->bio;
 
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 5248c888552b..7bcf67eec921 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -120,6 +120,7 @@ static struct request *get_alua_req(struct scsi_device *sdev,
 			    "%s: blk_get_request failed\n", __func__);
 		return NULL;
 	}
+	blk_rq_set_block_pc(rq);
 
 	if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
 		blk_put_request(rq);
@@ -128,7 +129,6 @@ static struct request *get_alua_req(struct scsi_device *sdev,
 		return NULL;
 	}
 
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			 REQ_FAILFAST_DRIVER;
 	rq->retries = ALUA_FAILOVER_RETRIES;
diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index e1c8be06de9d..6f07f7fe3aa1 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -280,6 +280,7 @@ static struct request *get_req(struct scsi_device *sdev, int cmd,
 		return NULL;
 	}
 
+	blk_rq_set_block_pc(rq);
 	rq->cmd_len = COMMAND_SIZE(cmd);
 	rq->cmd[0] = cmd;
 
@@ -304,7 +305,6 @@ static struct request *get_req(struct scsi_device *sdev, int cmd,
 		break;
 	}
 
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			 REQ_FAILFAST_DRIVER;
 	rq->timeout = CLARIION_TIMEOUT;
diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
index 084062bb8ee9..e9d9fea9e272 100644
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
@@ -120,7 +120,7 @@ retry:
 	if (!req)
 		return SCSI_DH_RES_TEMP_UNAVAIL;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			  REQ_FAILFAST_DRIVER;
 	req->cmd_len = COMMAND_SIZE(TEST_UNIT_READY);
@@ -250,7 +250,7 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *h)
 	if (!req)
 		return SCSI_DH_RES_TEMP_UNAVAIL;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			  REQ_FAILFAST_DRIVER;
 	req->cmd_len = COMMAND_SIZE(START_STOP);
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 4b9cf93f3fb6..826069db9848 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -279,6 +279,7 @@ static struct request *get_rdac_req(struct scsi_device *sdev,
 				"get_rdac_req: blk_get_request failed.\n");
 		return NULL;
 	}
+	blk_rq_set_block_pc(rq);
 
 	if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
 		blk_put_request(rq);
@@ -287,7 +288,6 @@ static struct request *get_rdac_req(struct scsi_device *sdev,
 		return NULL;
 	}
 
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			 REQ_FAILFAST_DRIVER;
 	rq->retries = RDAC_RETRIES;
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index bac04c2335aa..5f4cbf0c4759 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1570,6 +1570,7 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
 		if (unlikely(!req))
 			return ERR_PTR(-ENOMEM);
 
+		blk_rq_set_block_pc(req);
 		return req;
 	}
 }
@@ -1590,7 +1591,6 @@ static int _init_blk_request(struct osd_request *or,
 	}
 
 	or->request = req;
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= REQ_QUIET;
 
 	req->timeout = or->timeout;
@@ -1608,7 +1608,7 @@ static int _init_blk_request(struct osd_request *or,
 				ret = PTR_ERR(req);
 				goto out;
 			}
-			req->cmd_type = REQ_TYPE_BLOCK_PC;
+			blk_rq_set_block_pc(req);
 			or->in.req = or->request->next_rq = req;
 		}
 	} else if (has_in)
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 21883a2d6324..0727ea7cc387 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -365,7 +365,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
 	if (!req)
 		return DRIVER_ERROR << 24;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_QUIET;
 
 	SRpnt->bio = NULL;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index f17aa7aa7879..af624619d547 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1951,6 +1951,8 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 	 */
 	req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL);
 
+	blk_rq_set_block_pc(req);
+
 	req->cmd[0] = ALLOW_MEDIUM_REMOVAL;
 	req->cmd[1] = 0;
 	req->cmd[2] = 0;
@@ -1960,7 +1962,6 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 
 	req->cmd_len = COMMAND_SIZE(req->cmd[0]);
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= REQ_QUIET;
 	req->timeout = 10 * HZ;
 	req->retries = 5;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a0c95cac91f0..c3c1697b143e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -195,6 +195,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);
 	if (!req)
 		return ret;
+	blk_rq_set_block_pc(req);
 
 	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
 					buffer, bufflen, __GFP_WAIT))
@@ -206,7 +207,6 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	req->sense_len = 0;
 	req->retries = retries;
 	req->timeout = timeout;
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT;
 
 	/*
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index df5e961484e1..53268aaba559 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1653,10 +1653,9 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd)
 	if (!rq)
 		return -ENOMEM;
 
+	blk_rq_set_block_pc(rq);
 	memcpy(rq->cmd, cmd, hp->cmd_len);
-
 	rq->cmd_len = hp->cmd_len;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	srp->rq = rq;
 	rq->end_io_data = srp;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index afc834e172c6..14eb4b256a03 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -484,7 +484,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
 	if (!req)
 		return DRIVER_ERROR << 24;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_QUIET;
 
 	mdata->null_mapped = 1;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 0f199f6a0738..94d00df28f39 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -1055,6 +1055,8 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 			ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 			goto fail;
 		}
+
+		blk_rq_set_block_pc(req);
 	} else {
 		BUG_ON(!cmd->data_length);
 
@@ -1071,7 +1073,6 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 		}
 	}
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->end_io = pscsi_req_done;
 	req->end_io_data = cmd;
 	req->cmd_len = scsi_command_size(pt->pscsi_cdb);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index dc2c703f05fd..31e11051f1ba 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -796,6 +796,7 @@ extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern struct request *blk_make_request(struct request_queue *, struct bio *,
 					gfp_t);
+extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len);
-- 
cgit v1.2.3-71-gd317


From a4391c6465d9c978fd4bded12e34bdde3f5458f0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 5 Jun 2014 15:21:56 -0600
Subject: blk-mq: bump max tag depth to 10K tags

For some scsi-mq cases, the tag map can be huge. So increase the
max number of tags we support.

Additionally, don't fail with EINVAL if a user requests too many
tags. Warn that the tag depth has been adjusted down, and store
the new value inside the tag_set passed in.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 13 ++++++++++++-
 include/linux/blk-mq.h |  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4e4cd6208052..a6ee74e27957 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1967,13 +1967,19 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+/*
+ * Alloc a tag set to be associated with one or more request queues.
+ * May fail with EINVAL for various error conditions. May adjust the
+ * requested depth down, if if it too large. In that case, the set
+ * value will be stored in set->queue_depth.
+ */
 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 {
 	int i;
 
 	if (!set->nr_hw_queues)
 		return -EINVAL;
-	if (!set->queue_depth || set->queue_depth > BLK_MQ_MAX_DEPTH)
+	if (!set->queue_depth)
 		return -EINVAL;
 	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
 		return -EINVAL;
@@ -1981,6 +1987,11 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 	if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue)
 		return -EINVAL;
 
+	if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
+		pr_info("blk-mq: reduced tag depth to %u\n",
+			BLK_MQ_MAX_DEPTH);
+		set->queue_depth = BLK_MQ_MAX_DEPTH;
+	}
 
 	set->tags = kmalloc_node(set->nr_hw_queues *
 				 sizeof(struct blk_mq_tags *),
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0feedebfde48..a002cf191427 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -135,7 +135,7 @@ enum {
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
 
-	BLK_MQ_MAX_DEPTH	= 2048,
+	BLK_MQ_MAX_DEPTH	= 10240,
 
 	BLK_MQ_CPU_WORK_BATCH	= 8,
 };
-- 
cgit v1.2.3-71-gd317


From ce369a545aac3da653dd95d8117093a862bf94d3 Mon Sep 17 00:00:00 2001
From: Andrii Tseglytskyi <andrii.tseglytskyi@ti.com>
Date: Fri, 16 May 2014 05:45:58 -0500
Subject: ARM: OMAP5+: dpll: support Duty Cycle Correction(DCC)

Duty Cycle Correction(DCC) needs to be enabled if the MPU is to run at
frequencies beyond 1.4GHz for OMAP5, DRA75x, DRA72x.

MPU DPLL has a limitation on the maximum frequency it can be locked
at. Duty Cycle Correction circuit is used to recover a correct duty
cycle for achieving higher frequencies (hardware internally switches
output to M3 output(CLKOUTHIF) from M2 output (CLKOUT)).

For further information, See the note on OMAP5432 Technical Reference
Manual(SWPU282U) chapter 3.6.3.3.1 "DPLLs Output Clocks Parameters",
and also the "OMAP543x ES2.0 DM Operating Conditions Addendum v0.5"
chapter 2.1 "Micro Processor Unit (MPU)". Equivalent information is
present in relevant DRA75x, 72x documentation(SPRUHP2E, SPRUHI2P).

Signed-off-by: Andrii Tseglytskyi <andrii.tseglytskyi@ti.com>
Signed-off-by: Taras Kondratiuk <taras@ti.com>
Signed-off-by: J Keerthy <j-keerthy@ti.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
[t-kristo@ti.com: added TRM / DM references for DCC clock rate]
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 arch/arm/mach-omap2/dpll3xxx.c | 9 +++++++++
 include/linux/clk/ti.h         | 4 ++++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c
index fcd8036af910..6d7ba37e2257 100644
--- a/arch/arm/mach-omap2/dpll3xxx.c
+++ b/arch/arm/mach-omap2/dpll3xxx.c
@@ -319,6 +319,15 @@ static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel)
 
 	/* Set DPLL multiplier, divider */
 	v = omap2_clk_readl(clk, dd->mult_div1_reg);
+
+	/* Handle Duty Cycle Correction */
+	if (dd->dcc_mask) {
+		if (dd->last_rounded_rate >= dd->dcc_rate)
+			v |= dd->dcc_mask; /* Enable DCC */
+		else
+			v &= ~dd->dcc_mask; /* Disable DCC */
+	}
+
 	v &= ~(dd->mult_mask | dd->div1_mask);
 	v |= dd->last_rounded_m << __ffs(dd->mult_mask);
 	v |= (dd->last_rounded_n - 1) << __ffs(dd->div1_mask);
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 4231c41bed51..e8d8a35034a5 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -41,6 +41,8 @@
  * @idlest_reg: register containing the DPLL idle status bitfield
  * @autoidle_mask: mask of the DPLL autoidle mode bitfield in @autoidle_reg
  * @freqsel_mask: mask of the DPLL jitter correction bitfield in @control_reg
+ * @dcc_mask: mask of the DPLL DCC correction bitfield @mult_div1_reg
+ * @dcc_rate: rate atleast which DCC @dcc_mask must be set
  * @idlest_mask: mask of the DPLL idle status bitfield in @idlest_reg
  * @lpmode_mask: mask of the DPLL low-power mode bitfield in @control_reg
  * @m4xen_mask: mask of the DPLL M4X multiplier bitfield in @control_reg
@@ -86,6 +88,8 @@ struct dpll_data {
 	u32			idlest_mask;
 	u32			dco_mask;
 	u32			sddiv_mask;
+	u32			dcc_mask;
+	unsigned long		dcc_rate;
 	u32			lpmode_mask;
 	u32			m4xen_mask;
 	u8			auto_recal_bit;
-- 
cgit v1.2.3-71-gd317


From bb3632c6101b2fad07e6246721466b984b1e0e9d Mon Sep 17 00:00:00 2001
From: Todd E Brandt <todd.e.brandt@linux.intel.com>
Date: Fri, 6 Jun 2014 05:40:17 -0700
Subject: PM / sleep: trace events for suspend/resume

Adds trace events that give finer resolution into suspend/resume. These
events are graphed in the timelines generated by the analyze_suspend.py
script. They represent large areas of time consumed that are typical to
suspend and resume.

The event is triggered by calling the function "trace_suspend_resume"
with three arguments: a string (the name of the event to be displayed
in the timeline), an integer (case specific number, such as the power
state or cpu number), and a boolean (where true is used to denote the start
of the timeline event, and false to denote the end).

The suspend_resume trace event reproduces the data that the machine_suspend
trace event did, so the latter has been removed.

Signed-off-by: Todd Brandt <todd.e.brandt@intel.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sleep.c         |  3 +++
 drivers/base/power/main.c    | 16 ++++++++++++++++
 drivers/base/syscore.c       |  5 +++++
 include/trace/events/power.h | 42 +++++++++++++++++++++++++-----------------
 kernel/cpu.c                 |  5 +++++
 kernel/power/hibernate.c     |  3 +++
 kernel/power/process.c       |  3 +++
 kernel/power/suspend.c       | 14 ++++++++++++--
 8 files changed, 72 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index c11e3795431b..b3e3cc73ba79 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -19,6 +19,7 @@
 #include <linux/acpi.h>
 #include <linux/module.h>
 #include <asm/io.h>
+#include <trace/events/power.h>
 
 #include "internal.h"
 #include "sleep.h"
@@ -501,6 +502,7 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
 
 	ACPI_FLUSH_CPU_CACHE();
 
+	trace_suspend_resume(TPS("acpi_suspend"), acpi_state, true);
 	switch (acpi_state) {
 	case ACPI_STATE_S1:
 		barrier();
@@ -516,6 +518,7 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
 		pr_info(PREFIX "Low-level resume complete\n");
 		break;
 	}
+	trace_suspend_resume(TPS("acpi_suspend"), acpi_state, false);
 
 	/* This violates the spec but is required for bug compatibility. */
 	acpi_write_bit_register(ACPI_BITREG_SCI_ENABLE, 1);
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 343ffad59377..de3fe4fe350a 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -545,6 +545,7 @@ static void dpm_resume_noirq(pm_message_t state)
 	struct device *dev;
 	ktime_t starttime = ktime_get();
 
+	trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, true);
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 
@@ -587,6 +588,7 @@ static void dpm_resume_noirq(pm_message_t state)
 	dpm_show_time(starttime, state, "noirq");
 	resume_device_irqs();
 	cpuidle_resume();
+	trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, false);
 }
 
 /**
@@ -664,6 +666,7 @@ static void dpm_resume_early(pm_message_t state)
 	struct device *dev;
 	ktime_t starttime = ktime_get();
 
+	trace_suspend_resume(TPS("dpm_resume_early"), state.event, true);
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 
@@ -703,6 +706,7 @@ static void dpm_resume_early(pm_message_t state)
 	mutex_unlock(&dpm_list_mtx);
 	async_synchronize_full();
 	dpm_show_time(starttime, state, "early");
+	trace_suspend_resume(TPS("dpm_resume_early"), state.event, false);
 }
 
 /**
@@ -834,6 +838,7 @@ void dpm_resume(pm_message_t state)
 	struct device *dev;
 	ktime_t starttime = ktime_get();
 
+	trace_suspend_resume(TPS("dpm_resume"), state.event, true);
 	might_sleep();
 
 	mutex_lock(&dpm_list_mtx);
@@ -875,6 +880,7 @@ void dpm_resume(pm_message_t state)
 	dpm_show_time(starttime, state, NULL);
 
 	cpufreq_resume();
+	trace_suspend_resume(TPS("dpm_resume"), state.event, false);
 }
 
 /**
@@ -932,6 +938,7 @@ void dpm_complete(pm_message_t state)
 {
 	struct list_head list;
 
+	trace_suspend_resume(TPS("dpm_complete"), state.event, true);
 	might_sleep();
 
 	INIT_LIST_HEAD(&list);
@@ -951,6 +958,7 @@ void dpm_complete(pm_message_t state)
 	}
 	list_splice(&list, &dpm_list);
 	mutex_unlock(&dpm_list_mtx);
+	trace_suspend_resume(TPS("dpm_complete"), state.event, false);
 }
 
 /**
@@ -1086,6 +1094,7 @@ static int dpm_suspend_noirq(pm_message_t state)
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, true);
 	cpuidle_pause();
 	suspend_device_irqs();
 	mutex_lock(&dpm_list_mtx);
@@ -1126,6 +1135,7 @@ static int dpm_suspend_noirq(pm_message_t state)
 	} else {
 		dpm_show_time(starttime, state, "noirq");
 	}
+	trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, false);
 	return error;
 }
 
@@ -1222,6 +1232,7 @@ static int dpm_suspend_late(pm_message_t state)
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true);
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
 	async_error = 0;
@@ -1257,6 +1268,7 @@ static int dpm_suspend_late(pm_message_t state)
 	} else {
 		dpm_show_time(starttime, state, "late");
 	}
+	trace_suspend_resume(TPS("dpm_suspend_late"), state.event, false);
 	return error;
 }
 
@@ -1461,6 +1473,7 @@ int dpm_suspend(pm_message_t state)
 	ktime_t starttime = ktime_get();
 	int error = 0;
 
+	trace_suspend_resume(TPS("dpm_suspend"), state.event, true);
 	might_sleep();
 
 	cpufreq_suspend();
@@ -1498,6 +1511,7 @@ int dpm_suspend(pm_message_t state)
 		dpm_save_failed_step(SUSPEND_SUSPEND);
 	} else
 		dpm_show_time(starttime, state, NULL);
+	trace_suspend_resume(TPS("dpm_suspend"), state.event, false);
 	return error;
 }
 
@@ -1582,6 +1596,7 @@ int dpm_prepare(pm_message_t state)
 {
 	int error = 0;
 
+	trace_suspend_resume(TPS("dpm_prepare"), state.event, true);
 	might_sleep();
 
 	mutex_lock(&dpm_list_mtx);
@@ -1612,6 +1627,7 @@ int dpm_prepare(pm_message_t state)
 		put_device(dev);
 	}
 	mutex_unlock(&dpm_list_mtx);
+	trace_suspend_resume(TPS("dpm_prepare"), state.event, false);
 	return error;
 }
 
diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
index e8d11b6630ee..dbb8350ea8dc 100644
--- a/drivers/base/syscore.c
+++ b/drivers/base/syscore.c
@@ -10,6 +10,7 @@
 #include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <trace/events/power.h>
 
 static LIST_HEAD(syscore_ops_list);
 static DEFINE_MUTEX(syscore_ops_lock);
@@ -49,6 +50,7 @@ int syscore_suspend(void)
 	struct syscore_ops *ops;
 	int ret = 0;
 
+	trace_suspend_resume(TPS("syscore_suspend"), 0, true);
 	pr_debug("Checking wakeup interrupts\n");
 
 	/* Return error code if there are any wakeup interrupts pending. */
@@ -70,6 +72,7 @@ int syscore_suspend(void)
 				"Interrupts enabled after %pF\n", ops->suspend);
 		}
 
+	trace_suspend_resume(TPS("syscore_suspend"), 0, false);
 	return 0;
 
  err_out:
@@ -92,6 +95,7 @@ void syscore_resume(void)
 {
 	struct syscore_ops *ops;
 
+	trace_suspend_resume(TPS("syscore_resume"), 0, true);
 	WARN_ONCE(!irqs_disabled(),
 		"Interrupts enabled before system core resume.\n");
 
@@ -103,6 +107,7 @@ void syscore_resume(void)
 			WARN_ONCE(!irqs_disabled(),
 				"Interrupts enabled after %pF\n", ops->resume);
 		}
+	trace_suspend_resume(TPS("syscore_resume"), 0, false);
 }
 EXPORT_SYMBOL_GPL(syscore_resume);
 #endif /* CONFIG_PM_SLEEP */
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 9a7e08d61258..f88c8573e66c 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -7,6 +7,9 @@
 #include <linux/ktime.h>
 #include <linux/pm_qos.h>
 #include <linux/tracepoint.h>
+#include <linux/ftrace_event.h>
+
+#define TPS(x)  tracepoint_string(x)
 
 DECLARE_EVENT_CLASS(cpu,
 
@@ -97,23 +100,6 @@ DEFINE_EVENT(cpu, cpu_frequency,
 	TP_ARGS(frequency, cpu_id)
 );
 
-TRACE_EVENT(machine_suspend,
-
-	TP_PROTO(unsigned int state),
-
-	TP_ARGS(state),
-
-	TP_STRUCT__entry(
-		__field(	u32,		state		)
-	),
-
-	TP_fast_assign(
-		__entry->state = state;
-	),
-
-	TP_printk("state=%lu", (unsigned long)__entry->state)
-);
-
 TRACE_EVENT(device_pm_report_time,
 
 	TP_PROTO(struct device *dev, const char *pm_ops, s64 ops_time,
@@ -151,6 +137,28 @@ TRACE_EVENT(device_pm_report_time,
 		__entry->ops_time, __entry->error)
 );
 
+TRACE_EVENT(suspend_resume,
+
+	TP_PROTO(const char *action, int val, bool start),
+
+	TP_ARGS(action, val, start),
+
+	TP_STRUCT__entry(
+		__field(const char *, action)
+		__field(int, val)
+		__field(bool, start)
+	),
+
+	TP_fast_assign(
+		__entry->action = action;
+		__entry->val = val;
+		__entry->start = start;
+	),
+
+	TP_printk("%s[%u] %s", __entry->action, (unsigned int)__entry->val,
+		(__entry->start)?"begin":"end")
+);
+
 DECLARE_EVENT_CLASS(wakeup_source,
 
 	TP_PROTO(const char *name, unsigned int state),
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a9e710eef0e2..76deba01322a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -20,6 +20,7 @@
 #include <linux/gfp.h>
 #include <linux/suspend.h>
 #include <linux/lockdep.h>
+#include <trace/events/power.h>
 
 #include "smpboot.h"
 
@@ -522,7 +523,9 @@ int disable_nonboot_cpus(void)
 	for_each_online_cpu(cpu) {
 		if (cpu == first_cpu)
 			continue;
+		trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
 		error = _cpu_down(cpu, 1);
+		trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
 		if (!error)
 			cpumask_set_cpu(cpu, frozen_cpus);
 		else {
@@ -566,7 +569,9 @@ void __ref enable_nonboot_cpus(void)
 	arch_enable_nonboot_cpus_begin();
 
 	for_each_cpu(cpu, frozen_cpus) {
+		trace_suspend_resume(TPS("CPU_ON"), cpu, true);
 		error = _cpu_up(cpu, 1);
+		trace_suspend_resume(TPS("CPU_ON"), cpu, false);
 		if (!error) {
 			printk(KERN_INFO "CPU%d is up\n", cpu);
 			continue;
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index df88d55dc436..49e0a20fd010 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -28,6 +28,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/ctype.h>
 #include <linux/genhd.h>
+#include <trace/events/power.h>
 
 #include "power.h"
 
@@ -292,7 +293,9 @@ static int create_image(int platform_mode)
 
 	in_suspend = 1;
 	save_processor_state();
+	trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
 	error = swsusp_arch_suspend();
+	trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
 	if (error)
 		printk(KERN_ERR "PM: Error %d creating hibernation image\n",
 			error);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 06ec8869dbf1..0ca8d83e2369 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -17,6 +17,7 @@
 #include <linux/delay.h>
 #include <linux/workqueue.h>
 #include <linux/kmod.h>
+#include <trace/events/power.h>
 
 /* 
  * Timeout for stopping processes
@@ -175,6 +176,7 @@ void thaw_processes(void)
 	struct task_struct *g, *p;
 	struct task_struct *curr = current;
 
+	trace_suspend_resume(TPS("thaw_processes"), 0, true);
 	if (pm_freezing)
 		atomic_dec(&system_freezing_cnt);
 	pm_freezing = false;
@@ -201,6 +203,7 @@ void thaw_processes(void)
 
 	schedule();
 	printk("done.\n");
+	trace_suspend_resume(TPS("thaw_processes"), 0, false);
 }
 
 void thaw_kernel_threads(void)
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 963e6d0f050b..4dd8822f732a 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -177,7 +177,9 @@ static int suspend_prepare(suspend_state_t state)
 	if (error)
 		goto Finish;
 
+	trace_suspend_resume(TPS("freeze_processes"), 0, true);
 	error = suspend_freeze_processes();
+	trace_suspend_resume(TPS("freeze_processes"), 0, false);
 	if (!error)
 		return 0;
 
@@ -240,7 +242,9 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	 * all the devices are suspended.
 	 */
 	if (state == PM_SUSPEND_FREEZE) {
+		trace_suspend_resume(TPS("machine_suspend"), state, true);
 		freeze_enter();
+		trace_suspend_resume(TPS("machine_suspend"), state, false);
 		goto Platform_wake;
 	}
 
@@ -256,7 +260,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	if (!error) {
 		*wakeup = pm_wakeup_pending();
 		if (!(suspend_test(TEST_CORE) || *wakeup)) {
+			trace_suspend_resume(TPS("machine_suspend"),
+				state, true);
 			error = suspend_ops->enter(state);
+			trace_suspend_resume(TPS("machine_suspend"),
+				state, false);
 			events_check_enabled = false;
 		}
 		syscore_resume();
@@ -294,7 +302,6 @@ int suspend_devices_and_enter(suspend_state_t state)
 	if (need_suspend_ops(state) && !suspend_ops)
 		return -ENOSYS;
 
-	trace_machine_suspend(state);
 	if (need_suspend_ops(state) && suspend_ops->begin) {
 		error = suspend_ops->begin(state);
 		if (error)
@@ -331,7 +338,6 @@ int suspend_devices_and_enter(suspend_state_t state)
 	else if (state == PM_SUSPEND_FREEZE && freeze_ops->end)
 		freeze_ops->end();
 
-	trace_machine_suspend(PWR_EVENT_EXIT);
 	return error;
 
  Recover_platform:
@@ -365,6 +371,7 @@ static int enter_state(suspend_state_t state)
 {
 	int error;
 
+	trace_suspend_resume(TPS("suspend_enter"), state, true);
 	if (state == PM_SUSPEND_FREEZE) {
 #ifdef CONFIG_PM_DEBUG
 		if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) {
@@ -382,9 +389,11 @@ static int enter_state(suspend_state_t state)
 	if (state == PM_SUSPEND_FREEZE)
 		freeze_begin();
 
+	trace_suspend_resume(TPS("sync_filesystems"), 0, true);
 	printk(KERN_INFO "PM: Syncing filesystems ... ");
 	sys_sync();
 	printk("done.\n");
+	trace_suspend_resume(TPS("sync_filesystems"), 0, false);
 
 	pr_debug("PM: Preparing system for %s sleep\n", pm_states[state].label);
 	error = suspend_prepare(state);
@@ -394,6 +403,7 @@ static int enter_state(suspend_state_t state)
 	if (suspend_test(TEST_FREEZER))
 		goto Finish;
 
+	trace_suspend_resume(TPS("suspend_enter"), state, false);
 	pr_debug("PM: Entering %s sleep\n", pm_states[state].label);
 	pm_restrict_gfp_mask();
 	error = suspend_devices_and_enter(state);
-- 
cgit v1.2.3-71-gd317


From 999e568354b8c797f344a2154761dd94cc84e4ac Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 3 Jun 2014 17:33:35 -0400
Subject: nfs4: remove unused CHANGE_SECURITY_LABEL

This constant has the wrong value.  And we don't use it.  And it's been
removed from the 4.2 spec anyway.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfs/nfs4proc.c    | 2 +-
 include/linux/nfs4.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 397be39c6dc8..7f55fed8dc64 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2750,7 +2750,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
 
 #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
 #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
-#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL)
+#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL)
 
 static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
 {
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 12c2cb947df5..a1e3064a8d99 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -399,8 +399,6 @@ enum lock_type4 {
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
-#define FATTR4_WORD2_CHANGE_SECURITY_LABEL \
-					(1UL << 17)
 
 /* MDS threshold bitmap bits */
 #define THRESHOLD_RD                    (1UL << 0)
-- 
cgit v1.2.3-71-gd317


From 0bf4828983dff062cd502f27ab8644b32774e72e Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Wed, 28 May 2014 15:12:01 -0500
Subject: svcrdma: refactor marshalling logic

This patch refactors the NFSRDMA server marshalling logic to
remove the intermediary map structures.  It also fixes an existing bug
where the NFSRDMA server was not minding the device fast register page
list length limitations.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          |   3 +-
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 643 +++++++++++++------------------
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 230 ++---------
 net/sunrpc/xprtrdma/svc_rdma_transport.c |  62 +--
 4 files changed, 332 insertions(+), 606 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 0b8e3e6bdacf..5cf99a016368 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -115,14 +115,13 @@ struct svc_rdma_fastreg_mr {
 	struct list_head frmr_list;
 };
 struct svc_rdma_req_map {
-	struct svc_rdma_fastreg_mr *frmr;
 	unsigned long count;
 	union {
 		struct kvec sge[RPCSVC_MAXPAGES];
 		struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
+		unsigned long lkey[RPCSVC_MAXPAGES];
 	};
 };
-#define RDMACTXT_F_FAST_UNREG	1
 #define RDMACTXT_F_LAST_CTXT	2
 
 #define	SVCRDMA_DEVCAP_FAST_REG		1	/* fast mr registration */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 8d904e4eef15..52d9f2ce20b0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -69,7 +70,8 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 
 	/* Set up the XDR head */
 	rqstp->rq_arg.head[0].iov_base = page_address(page);
-	rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length);
+	rqstp->rq_arg.head[0].iov_len =
+		min_t(size_t, byte_count, ctxt->sge[0].length);
 	rqstp->rq_arg.len = byte_count;
 	rqstp->rq_arg.buflen = byte_count;
 
@@ -85,7 +87,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 		page = ctxt->pages[sge_no];
 		put_page(rqstp->rq_pages[sge_no]);
 		rqstp->rq_pages[sge_no] = page;
-		bc -= min(bc, ctxt->sge[sge_no].length);
+		bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
 		rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
 		sge_no++;
 	}
@@ -113,291 +115,265 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_arg.tail[0].iov_len = 0;
 }
 
-/* Encode a read-chunk-list as an array of IB SGE
- *
- * Assumptions:
- * - chunk[0]->position points to pages[0] at an offset of 0
- * - pages[] is not physically or virtually contiguous and consists of
- *   PAGE_SIZE elements.
- *
- * Output:
- * - sge array pointing into pages[] array.
- * - chunk_sge array specifying sge index and count for each
- *   chunk in the read list
- *
- */
-static int map_read_chunks(struct svcxprt_rdma *xprt,
-			   struct svc_rqst *rqstp,
-			   struct svc_rdma_op_ctxt *head,
-			   struct rpcrdma_msg *rmsgp,
-			   struct svc_rdma_req_map *rpl_map,
-			   struct svc_rdma_req_map *chl_map,
-			   int ch_count,
-			   int byte_count)
+static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
 {
-	int sge_no;
-	int sge_bytes;
-	int page_off;
-	int page_no;
-	int ch_bytes;
-	int ch_no;
-	struct rpcrdma_read_chunk *ch;
+	if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
+	     RDMA_TRANSPORT_IWARP)
+		return 1;
+	else
+		return min_t(int, sge_count, xprt->sc_max_sge);
+}
 
-	sge_no = 0;
-	page_no = 0;
-	page_off = 0;
-	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
-	ch_no = 0;
-	ch_bytes = ntohl(ch->rc_target.rs_length);
-	head->arg.head[0] = rqstp->rq_arg.head[0];
-	head->arg.tail[0] = rqstp->rq_arg.tail[0];
-	head->arg.pages = &head->pages[head->count];
-	head->hdr_count = head->count; /* save count of hdr pages */
-	head->arg.page_base = 0;
-	head->arg.page_len = ch_bytes;
-	head->arg.len = rqstp->rq_arg.len + ch_bytes;
-	head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
-	head->count++;
-	chl_map->ch[0].start = 0;
-	while (byte_count) {
-		rpl_map->sge[sge_no].iov_base =
-			page_address(rqstp->rq_arg.pages[page_no]) + page_off;
-		sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
-		rpl_map->sge[sge_no].iov_len = sge_bytes;
-		/*
-		 * Don't bump head->count here because the same page
-		 * may be used by multiple SGE.
-		 */
-		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
-		rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
+typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
+			      struct svc_rqst *rqstp,
+			      struct svc_rdma_op_ctxt *head,
+			      int *page_no,
+			      u32 *page_offset,
+			      u32 rs_handle,
+			      u32 rs_length,
+			      u64 rs_offset,
+			      int last);
+
+/* Issue an RDMA_READ using the local lkey to map the data sink */
+static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
+			       struct svc_rqst *rqstp,
+			       struct svc_rdma_op_ctxt *head,
+			       int *page_no,
+			       u32 *page_offset,
+			       u32 rs_handle,
+			       u32 rs_length,
+			       u64 rs_offset,
+			       int last)
+{
+	struct ib_send_wr read_wr;
+	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+	int ret, read, pno;
+	u32 pg_off = *page_offset;
+	u32 pg_no = *page_no;
+
+	ctxt->direction = DMA_FROM_DEVICE;
+	ctxt->read_hdr = head;
+	pages_needed =
+		min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed));
+	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
+
+	for (pno = 0; pno < pages_needed; pno++) {
+		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+		head->arg.page_len += len;
+		head->arg.len += len;
+		if (!pg_off)
+			head->count++;
+		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
 		rqstp->rq_next_page = rqstp->rq_respages + 1;
+		ctxt->sge[pno].addr =
+			ib_dma_map_page(xprt->sc_cm_id->device,
+					head->arg.pages[pg_no], pg_off,
+					PAGE_SIZE - pg_off,
+					DMA_FROM_DEVICE);
+		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+					   ctxt->sge[pno].addr);
+		if (ret)
+			goto err;
+		atomic_inc(&xprt->sc_dma_used);
 
-		byte_count -= sge_bytes;
-		ch_bytes -= sge_bytes;
-		sge_no++;
-		/*
-		 * If all bytes for this chunk have been mapped to an
-		 * SGE, move to the next SGE
-		 */
-		if (ch_bytes == 0) {
-			chl_map->ch[ch_no].count =
-				sge_no - chl_map->ch[ch_no].start;
-			ch_no++;
-			ch++;
-			chl_map->ch[ch_no].start = sge_no;
-			ch_bytes = ntohl(ch->rc_target.rs_length);
-			/* If bytes remaining account for next chunk */
-			if (byte_count) {
-				head->arg.page_len += ch_bytes;
-				head->arg.len += ch_bytes;
-				head->arg.buflen += ch_bytes;
-			}
+		/* The lkey here is either a local dma lkey or a dma_mr lkey */
+		ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+		ctxt->sge[pno].length = len;
+		ctxt->count++;
+
+		/* adjust offset and wrap to next page if needed */
+		pg_off += len;
+		if (pg_off == PAGE_SIZE) {
+			pg_off = 0;
+			pg_no++;
 		}
-		/*
-		 * If this SGE consumed all of the page, move to the
-		 * next page
-		 */
-		if ((sge_bytes + page_off) == PAGE_SIZE) {
-			page_no++;
-			page_off = 0;
-			/*
-			 * If there are still bytes left to map, bump
-			 * the page count
-			 */
-			if (byte_count)
-				head->count++;
-		} else
-			page_off += sge_bytes;
+		rs_length -= len;
 	}
-	BUG_ON(byte_count != 0);
-	return sge_no;
+
+	if (last && rs_length == 0)
+		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+	else
+		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+
+	memset(&read_wr, 0, sizeof(read_wr));
+	read_wr.wr_id = (unsigned long)ctxt;
+	read_wr.opcode = IB_WR_RDMA_READ;
+	ctxt->wr_op = read_wr.opcode;
+	read_wr.send_flags = IB_SEND_SIGNALED;
+	read_wr.wr.rdma.rkey = rs_handle;
+	read_wr.wr.rdma.remote_addr = rs_offset;
+	read_wr.sg_list = ctxt->sge;
+	read_wr.num_sge = pages_needed;
+
+	ret = svc_rdma_send(xprt, &read_wr);
+	if (ret) {
+		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		goto err;
+	}
+
+	/* return current location in page array */
+	*page_no = pg_no;
+	*page_offset = pg_off;
+	ret = read;
+	atomic_inc(&rdma_stat_read);
+	return ret;
+ err:
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 0);
+	return ret;
 }
 
-/* Map a read-chunk-list to an XDR and fast register the page-list.
- *
- * Assumptions:
- * - chunk[0]	position points to pages[0] at an offset of 0
- * - pages[]	will be made physically contiguous by creating a one-off memory
- *		region using the fastreg verb.
- * - byte_count is # of bytes in read-chunk-list
- * - ch_count	is # of chunks in read-chunk-list
- *
- * Output:
- * - sge array pointing into pages[] array.
- * - chunk_sge array specifying sge index and count for each
- *   chunk in the read list
- */
-static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
+/* Issue an RDMA_READ using an FRMR to map the data sink */
+static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 				struct svc_rqst *rqstp,
 				struct svc_rdma_op_ctxt *head,
-				struct rpcrdma_msg *rmsgp,
-				struct svc_rdma_req_map *rpl_map,
-				struct svc_rdma_req_map *chl_map,
-				int ch_count,
-				int byte_count)
+				int *page_no,
+				u32 *page_offset,
+				u32 rs_handle,
+				u32 rs_length,
+				u64 rs_offset,
+				int last)
 {
-	int page_no;
-	int ch_no;
-	u32 offset;
-	struct rpcrdma_read_chunk *ch;
-	struct svc_rdma_fastreg_mr *frmr;
-	int ret = 0;
+	struct ib_send_wr read_wr;
+	struct ib_send_wr inv_wr;
+	struct ib_send_wr fastreg_wr;
+	u8 key;
+	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
+	int ret, read, pno;
+	u32 pg_off = *page_offset;
+	u32 pg_no = *page_no;
 
-	frmr = svc_rdma_get_frmr(xprt);
 	if (IS_ERR(frmr))
 		return -ENOMEM;
 
-	head->frmr = frmr;
-	head->arg.head[0] = rqstp->rq_arg.head[0];
-	head->arg.tail[0] = rqstp->rq_arg.tail[0];
-	head->arg.pages = &head->pages[head->count];
-	head->hdr_count = head->count; /* save count of hdr pages */
-	head->arg.page_base = 0;
-	head->arg.page_len = byte_count;
-	head->arg.len = rqstp->rq_arg.len + byte_count;
-	head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
+	ctxt->direction = DMA_FROM_DEVICE;
+	ctxt->frmr = frmr;
+	pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
+	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
 
-	/* Fast register the page list */
-	frmr->kva = page_address(rqstp->rq_arg.pages[0]);
+	frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
 	frmr->direction = DMA_FROM_DEVICE;
 	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
-	frmr->map_len = byte_count;
-	frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
-	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
-		frmr->page_list->page_list[page_no] =
+	frmr->map_len = pages_needed << PAGE_SHIFT;
+	frmr->page_list_len = pages_needed;
+
+	for (pno = 0; pno < pages_needed; pno++) {
+		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+		head->arg.page_len += len;
+		head->arg.len += len;
+		if (!pg_off)
+			head->count++;
+		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
+		rqstp->rq_next_page = rqstp->rq_respages + 1;
+		frmr->page_list->page_list[pno] =
 			ib_dma_map_page(xprt->sc_cm_id->device,
-					rqstp->rq_arg.pages[page_no], 0,
+					head->arg.pages[pg_no], 0,
 					PAGE_SIZE, DMA_FROM_DEVICE);
-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-					 frmr->page_list->page_list[page_no]))
-			goto fatal_err;
+		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+					   frmr->page_list->page_list[pno]);
+		if (ret)
+			goto err;
 		atomic_inc(&xprt->sc_dma_used);
-		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
-	}
-	head->count += page_no;
-
-	/* rq_respages points one past arg pages */
-	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
 
-	/* Create the reply and chunk maps */
-	offset = 0;
-	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
-	for (ch_no = 0; ch_no < ch_count; ch_no++) {
-		int len = ntohl(ch->rc_target.rs_length);
-		rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
-		rpl_map->sge[ch_no].iov_len = len;
-		chl_map->ch[ch_no].count = 1;
-		chl_map->ch[ch_no].start = ch_no;
-		offset += len;
-		ch++;
+		/* adjust offset and wrap to next page if needed */
+		pg_off += len;
+		if (pg_off == PAGE_SIZE) {
+			pg_off = 0;
+			pg_no++;
+		}
+		rs_length -= len;
 	}
 
-	ret = svc_rdma_fastreg(xprt, frmr);
-	if (ret)
-		goto fatal_err;
-
-	return ch_no;
-
- fatal_err:
-	printk("svcrdma: error fast registering xdr for xprt %p", xprt);
-	svc_rdma_put_frmr(xprt, frmr);
-	return -EIO;
-}
-
-static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
-			     struct svc_rdma_op_ctxt *ctxt,
-			     struct svc_rdma_fastreg_mr *frmr,
-			     struct kvec *vec,
-			     u64 *sgl_offset,
-			     int count)
-{
-	int i;
-	unsigned long off;
+	if (last && rs_length == 0)
+		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+	else
+		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
 
-	ctxt->count = count;
-	ctxt->direction = DMA_FROM_DEVICE;
-	for (i = 0; i < count; i++) {
-		ctxt->sge[i].length = 0; /* in case map fails */
-		if (!frmr) {
-			BUG_ON(!virt_to_page(vec[i].iov_base));
-			off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
-			ctxt->sge[i].addr =
-				ib_dma_map_page(xprt->sc_cm_id->device,
-						virt_to_page(vec[i].iov_base),
-						off,
-						vec[i].iov_len,
-						DMA_FROM_DEVICE);
-			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-						 ctxt->sge[i].addr))
-				return -EINVAL;
-			ctxt->sge[i].lkey = xprt->sc_dma_lkey;
-			atomic_inc(&xprt->sc_dma_used);
-		} else {
-			ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
-			ctxt->sge[i].lkey = frmr->mr->lkey;
-		}
-		ctxt->sge[i].length = vec[i].iov_len;
-		*sgl_offset = *sgl_offset + vec[i].iov_len;
+	/* Bump the key */
+	key = (u8)(frmr->mr->lkey & 0x000000FF);
+	ib_update_fast_reg_key(frmr->mr, ++key);
+
+	ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+	ctxt->sge[0].lkey = frmr->mr->lkey;
+	ctxt->sge[0].length = read;
+	ctxt->count = 1;
+	ctxt->read_hdr = head;
+
+	/* Prepare FASTREG WR */
+	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+	fastreg_wr.send_flags = IB_SEND_SIGNALED;
+	fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
+	fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
+	fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
+	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+	fastreg_wr.wr.fast_reg.length = frmr->map_len;
+	fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
+	fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
+	fastreg_wr.next = &read_wr;
+
+	/* Prepare RDMA_READ */
+	memset(&read_wr, 0, sizeof(read_wr));
+	read_wr.send_flags = IB_SEND_SIGNALED;
+	read_wr.wr.rdma.rkey = rs_handle;
+	read_wr.wr.rdma.remote_addr = rs_offset;
+	read_wr.sg_list = ctxt->sge;
+	read_wr.num_sge = 1;
+	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
+		read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+		read_wr.wr_id = (unsigned long)ctxt;
+		read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
+	} else {
+		read_wr.opcode = IB_WR_RDMA_READ;
+		read_wr.next = &inv_wr;
+		/* Prepare invalidate */
+		memset(&inv_wr, 0, sizeof(inv_wr));
+		inv_wr.wr_id = (unsigned long)ctxt;
+		inv_wr.opcode = IB_WR_LOCAL_INV;
+		inv_wr.send_flags = IB_SEND_SIGNALED;
+		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
+	}
+	ctxt->wr_op = read_wr.opcode;
+
+	/* Post the chain */
+	ret = svc_rdma_send(xprt, &fastreg_wr);
+	if (ret) {
+		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		goto err;
 	}
-	return 0;
-}
 
-static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
-{
-	if ((rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
-	     RDMA_TRANSPORT_IWARP) &&
-	    sge_count > 1)
-		return 1;
-	else
-		return min_t(int, sge_count, xprt->sc_max_sge);
+	/* return current location in page array */
+	*page_no = pg_no;
+	*page_offset = pg_off;
+	ret = read;
+	atomic_inc(&rdma_stat_read);
+	return ret;
+ err:
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 0);
+	svc_rdma_put_frmr(xprt, frmr);
+	return ret;
 }
 
-/*
- * Use RDMA_READ to read data from the advertised client buffer into the
- * XDR stream starting at rq_arg.head[0].iov_base.
- * Each chunk in the array
- * contains the following fields:
- * discrim      - '1', This isn't used for data placement
- * position     - The xdr stream offset (the same for every chunk)
- * handle       - RMR for client memory region
- * length       - data transfer length
- * offset       - 64 bit tagged offset in remote memory region
- *
- * On our side, we need to read into a pagelist. The first page immediately
- * follows the RPC header.
- *
- * This function returns:
- * 0 - No error and no read-list found.
- *
- * 1 - Successful read-list processing. The data is not yet in
- * the pagelist and therefore the RPC request must be deferred. The
- * I/O completion will enqueue the transport again and
- * svc_rdma_recvfrom will complete the request.
- *
- * <0 - Error processing/posting read-list.
- *
- * NOTE: The ctxt must not be touched after the last WR has been posted
- * because the I/O completion processing may occur on another
- * processor and free / modify the context. Ne touche pas!
- */
-static int rdma_read_xdr(struct svcxprt_rdma *xprt,
-			 struct rpcrdma_msg *rmsgp,
-			 struct svc_rqst *rqstp,
-			 struct svc_rdma_op_ctxt *hdr_ctxt)
+static int rdma_read_chunks(struct svcxprt_rdma *xprt,
+			    struct rpcrdma_msg *rmsgp,
+			    struct svc_rqst *rqstp,
+			    struct svc_rdma_op_ctxt *head)
 {
-	struct ib_send_wr read_wr;
-	struct ib_send_wr inv_wr;
-	int err = 0;
-	int ch_no;
-	int ch_count;
-	int byte_count;
-	int sge_count;
-	u64 sgl_offset;
+	int page_no, ch_count, ret;
 	struct rpcrdma_read_chunk *ch;
-	struct svc_rdma_op_ctxt *ctxt = NULL;
-	struct svc_rdma_req_map *rpl_map;
-	struct svc_rdma_req_map *chl_map;
+	u32 page_offset, byte_count;
+	u64 rs_offset;
+	rdma_reader_fn reader;
 
 	/* If no read list is present, return 0 */
 	ch = svc_rdma_get_read_chunk(rmsgp);
@@ -408,122 +384,55 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 	if (ch_count > RPCSVC_MAXPAGES)
 		return -EINVAL;
 
-	/* Allocate temporary reply and chunk maps */
-	rpl_map = svc_rdma_get_req_map();
-	chl_map = svc_rdma_get_req_map();
+	/* The request is completed when the RDMA_READs complete. The
+	 * head context keeps all the pages that comprise the
+	 * request.
+	 */
+	head->arg.head[0] = rqstp->rq_arg.head[0];
+	head->arg.tail[0] = rqstp->rq_arg.tail[0];
+	head->arg.pages = &head->pages[head->count];
+	head->hdr_count = head->count;
+	head->arg.page_base = 0;
+	head->arg.page_len = 0;
+	head->arg.len = rqstp->rq_arg.len;
+	head->arg.buflen = rqstp->rq_arg.buflen;
 
-	if (!xprt->sc_frmr_pg_list_len)
-		sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
-					    rpl_map, chl_map, ch_count,
-					    byte_count);
+	/* Use FRMR if supported */
+	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
+		reader = rdma_read_chunk_frmr;
 	else
-		sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
-						 rpl_map, chl_map, ch_count,
-						 byte_count);
-	if (sge_count < 0) {
-		err = -EIO;
-		goto out;
-	}
-
-	sgl_offset = 0;
-	ch_no = 0;
+		reader = rdma_read_chunk_lcl;
 
+	page_no = 0; page_offset = 0;
 	for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
-	     ch->rc_discrim != 0; ch++, ch_no++) {
-		u64 rs_offset;
-next_sge:
-		ctxt = svc_rdma_get_context(xprt);
-		ctxt->direction = DMA_FROM_DEVICE;
-		ctxt->frmr = hdr_ctxt->frmr;
-		ctxt->read_hdr = NULL;
-		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+	     ch->rc_discrim != 0; ch++) {
 
-		/* Prepare READ WR */
-		memset(&read_wr, 0, sizeof read_wr);
-		read_wr.wr_id = (unsigned long)ctxt;
-		read_wr.opcode = IB_WR_RDMA_READ;
-		ctxt->wr_op = read_wr.opcode;
-		read_wr.send_flags = IB_SEND_SIGNALED;
-		read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle);
 		xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
 				 &rs_offset);
-		read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset;
-		read_wr.sg_list = ctxt->sge;
-		read_wr.num_sge =
-			rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
-		err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
-					&rpl_map->sge[chl_map->ch[ch_no].start],
-					&sgl_offset,
-					read_wr.num_sge);
-		if (err) {
-			svc_rdma_unmap_dma(ctxt);
-			svc_rdma_put_context(ctxt, 0);
-			goto out;
-		}
-		if (((ch+1)->rc_discrim == 0) &&
-		    (read_wr.num_sge == chl_map->ch[ch_no].count)) {
-			/*
-			 * Mark the last RDMA_READ with a bit to
-			 * indicate all RPC data has been fetched from
-			 * the client and the RPC needs to be enqueued.
-			 */
-			set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-			if (hdr_ctxt->frmr) {
-				set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
-				/*
-				 * Invalidate the local MR used to map the data
-				 * sink.
-				 */
-				if (xprt->sc_dev_caps &
-				    SVCRDMA_DEVCAP_READ_W_INV) {
-					read_wr.opcode =
-						IB_WR_RDMA_READ_WITH_INV;
-					ctxt->wr_op = read_wr.opcode;
-					read_wr.ex.invalidate_rkey =
-						ctxt->frmr->mr->lkey;
-				} else {
-					/* Prepare INVALIDATE WR */
-					memset(&inv_wr, 0, sizeof inv_wr);
-					inv_wr.opcode = IB_WR_LOCAL_INV;
-					inv_wr.send_flags = IB_SEND_SIGNALED;
-					inv_wr.ex.invalidate_rkey =
-						hdr_ctxt->frmr->mr->lkey;
-					read_wr.next = &inv_wr;
-				}
-			}
-			ctxt->read_hdr = hdr_ctxt;
-		}
-		/* Post the read */
-		err = svc_rdma_send(xprt, &read_wr);
-		if (err) {
-			printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
-			       err);
-			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-			svc_rdma_unmap_dma(ctxt);
-			svc_rdma_put_context(ctxt, 0);
-			goto out;
+		byte_count = ntohl(ch->rc_target.rs_length);
+
+		while (byte_count > 0) {
+			ret = reader(xprt, rqstp, head,
+				     &page_no, &page_offset,
+				     ntohl(ch->rc_target.rs_handle),
+				     byte_count, rs_offset,
+				     ((ch+1)->rc_discrim == 0) /* last */
+				     );
+			if (ret < 0)
+				goto err;
+			byte_count -= ret;
+			rs_offset += ret;
+			head->arg.buflen += ret;
 		}
-		atomic_inc(&rdma_stat_read);
-
-		if (read_wr.num_sge < chl_map->ch[ch_no].count) {
-			chl_map->ch[ch_no].count -= read_wr.num_sge;
-			chl_map->ch[ch_no].start += read_wr.num_sge;
-			goto next_sge;
-		}
-		sgl_offset = 0;
-		err = 1;
 	}
-
- out:
-	svc_rdma_put_req_map(rpl_map);
-	svc_rdma_put_req_map(chl_map);
-
+	ret = 1;
+ err:
 	/* Detach arg pages. svc_recv will replenish them */
-	for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
-		rqstp->rq_pages[ch_no] = NULL;
+	for (page_no = 0;
+	     &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
+		rqstp->rq_pages[page_no] = NULL;
 
-	return err;
+	return ret;
 }
 
 static int rdma_read_complete(struct svc_rqst *rqstp,
@@ -595,13 +504,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 				  struct svc_rdma_op_ctxt,
 				  dto_q);
 		list_del_init(&ctxt->dto_q);
-	}
-	if (ctxt) {
 		spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
 		return rdma_read_complete(rqstp, ctxt);
-	}
-
-	if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
+	} else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
 		ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
 				  struct svc_rdma_op_ctxt,
 				  dto_q);
@@ -621,7 +526,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
 			goto close_out;
 
-		BUG_ON(ret);
 		goto out;
 	}
 	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
@@ -644,12 +548,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	}
 
 	/* Read read-list data. */
-	ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
+	ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
 	if (ret > 0) {
 		/* read-list posted, defer until data received from client. */
 		goto defer;
-	}
-	if (ret < 0) {
+	} else if (ret < 0) {
 		/* Post of read-list failed, free context. */
 		svc_rdma_put_context(ctxt, 1);
 		return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 7e024a51617e..49fd21a5c215 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -49,152 +50,6 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
-/* Encode an XDR as an array of IB SGE
- *
- * Assumptions:
- * - head[0] is physically contiguous.
- * - tail[0] is physically contiguous.
- * - pages[] is not physically or virtually contiguous and consists of
- *   PAGE_SIZE elements.
- *
- * Output:
- * SGE[0]              reserved for RCPRDMA header
- * SGE[1]              data from xdr->head[]
- * SGE[2..sge_count-2] data from xdr->pages[]
- * SGE[sge_count-1]    data from xdr->tail.
- *
- * The max SGE we need is the length of the XDR / pagesize + one for
- * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
- * reserves a page for both the request and the reply header, and this
- * array is only concerned with the reply we are assured that we have
- * on extra page for the RPCRMDA header.
- */
-static int fast_reg_xdr(struct svcxprt_rdma *xprt,
-			struct xdr_buf *xdr,
-			struct svc_rdma_req_map *vec)
-{
-	int sge_no;
-	u32 sge_bytes;
-	u32 page_bytes;
-	u32 page_off;
-	int page_no = 0;
-	u8 *frva;
-	struct svc_rdma_fastreg_mr *frmr;
-
-	frmr = svc_rdma_get_frmr(xprt);
-	if (IS_ERR(frmr))
-		return -ENOMEM;
-	vec->frmr = frmr;
-
-	/* Skip the RPCRDMA header */
-	sge_no = 1;
-
-	/* Map the head. */
-	frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
-	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
-	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
-	vec->count = 2;
-	sge_no++;
-
-	/* Map the XDR head */
-	frmr->kva = frva;
-	frmr->direction = DMA_TO_DEVICE;
-	frmr->access_flags = 0;
-	frmr->map_len = PAGE_SIZE;
-	frmr->page_list_len = 1;
-	page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
-	frmr->page_list->page_list[page_no] =
-		ib_dma_map_page(xprt->sc_cm_id->device,
-				virt_to_page(xdr->head[0].iov_base),
-				page_off,
-				PAGE_SIZE - page_off,
-				DMA_TO_DEVICE);
-	if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-				 frmr->page_list->page_list[page_no]))
-		goto fatal_err;
-	atomic_inc(&xprt->sc_dma_used);
-
-	/* Map the XDR page list */
-	page_off = xdr->page_base;
-	page_bytes = xdr->page_len + page_off;
-	if (!page_bytes)
-		goto encode_tail;
-
-	/* Map the pages */
-	vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
-	vec->sge[sge_no].iov_len = page_bytes;
-	sge_no++;
-	while (page_bytes) {
-		struct page *page;
-
-		page = xdr->pages[page_no++];
-		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
-		page_bytes -= sge_bytes;
-
-		frmr->page_list->page_list[page_no] =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					page, page_off,
-					sge_bytes, DMA_TO_DEVICE);
-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-					 frmr->page_list->page_list[page_no]))
-			goto fatal_err;
-
-		atomic_inc(&xprt->sc_dma_used);
-		page_off = 0; /* reset for next time through loop */
-		frmr->map_len += PAGE_SIZE;
-		frmr->page_list_len++;
-	}
-	vec->count++;
-
- encode_tail:
-	/* Map tail */
-	if (0 == xdr->tail[0].iov_len)
-		goto done;
-
-	vec->count++;
-	vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
-
-	if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
-	    ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
-		/*
-		 * If head and tail use the same page, we don't need
-		 * to map it again.
-		 */
-		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
-	} else {
-		void *va;
-
-		/* Map another page for the tail */
-		page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
-		va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
-		vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
-
-		frmr->page_list->page_list[page_no] =
-		    ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
-				    page_off,
-				    PAGE_SIZE,
-				    DMA_TO_DEVICE);
-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-					 frmr->page_list->page_list[page_no]))
-			goto fatal_err;
-		atomic_inc(&xprt->sc_dma_used);
-		frmr->map_len += PAGE_SIZE;
-		frmr->page_list_len++;
-	}
-
- done:
-	if (svc_rdma_fastreg(xprt, frmr))
-		goto fatal_err;
-
-	return 0;
-
- fatal_err:
-	printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
-	vec->frmr = NULL;
-	svc_rdma_put_frmr(xprt, frmr);
-	return -EIO;
-}
-
 static int map_xdr(struct svcxprt_rdma *xprt,
 		   struct xdr_buf *xdr,
 		   struct svc_rdma_req_map *vec)
@@ -208,9 +63,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
 	BUG_ON(xdr->len !=
 	       (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
 
-	if (xprt->sc_frmr_pg_list_len)
-		return fast_reg_xdr(xprt, xdr, vec);
-
 	/* Skip the first sge, this is for the RPCRDMA header */
 	sge_no = 1;
 
@@ -282,8 +134,6 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
 }
 
 /* Assumptions:
- * - We are using FRMR
- *     - or -
  * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
  */
 static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
@@ -327,23 +177,16 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 		sge_bytes = min_t(size_t,
 			  bc, vec->sge[xdr_sge_no].iov_len-sge_off);
 		sge[sge_no].length = sge_bytes;
-		if (!vec->frmr) {
-			sge[sge_no].addr =
-				dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
-					    sge_bytes, DMA_TO_DEVICE);
-			xdr_off += sge_bytes;
-			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-						 sge[sge_no].addr))
-				goto err;
-			atomic_inc(&xprt->sc_dma_used);
-			sge[sge_no].lkey = xprt->sc_dma_lkey;
-		} else {
-			sge[sge_no].addr = (unsigned long)
-				vec->sge[xdr_sge_no].iov_base + sge_off;
-			sge[sge_no].lkey = vec->frmr->mr->lkey;
-		}
+		sge[sge_no].addr =
+			dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
+				    sge_bytes, DMA_TO_DEVICE);
+		xdr_off += sge_bytes;
+		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+					 sge[sge_no].addr))
+			goto err;
+		atomic_inc(&xprt->sc_dma_used);
+		sge[sge_no].lkey = xprt->sc_dma_lkey;
 		ctxt->count++;
-		ctxt->frmr = vec->frmr;
 		sge_off = 0;
 		sge_no++;
 		xdr_sge_no++;
@@ -369,7 +212,6 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 	return 0;
  err:
 	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_frmr(xprt, vec->frmr);
 	svc_rdma_put_context(ctxt, 0);
 	/* Fatal error, close transport */
 	return -EIO;
@@ -397,10 +239,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
 	res_ary = (struct rpcrdma_write_array *)
 		&rdma_resp->rm_body.rm_chunks[1];
 
-	if (vec->frmr)
-		max_write = vec->frmr->map_len;
-	else
-		max_write = xprt->sc_max_sge * PAGE_SIZE;
+	max_write = xprt->sc_max_sge * PAGE_SIZE;
 
 	/* Write chunks start at the pagelist */
 	for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
@@ -472,10 +311,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
 	res_ary = (struct rpcrdma_write_array *)
 		&rdma_resp->rm_body.rm_chunks[2];
 
-	if (vec->frmr)
-		max_write = vec->frmr->map_len;
-	else
-		max_write = xprt->sc_max_sge * PAGE_SIZE;
+	max_write = xprt->sc_max_sge * PAGE_SIZE;
 
 	/* xdr offset starts at RPC message */
 	nchunks = ntohl(arg_ary->wc_nchunks);
@@ -545,7 +381,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		      int byte_count)
 {
 	struct ib_send_wr send_wr;
-	struct ib_send_wr inv_wr;
 	int sge_no;
 	int sge_bytes;
 	int page_no;
@@ -559,7 +394,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		       "svcrdma: could not post a receive buffer, err=%d."
 		       "Closing transport %p.\n", ret, rdma);
 		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
-		svc_rdma_put_frmr(rdma, vec->frmr);
 		svc_rdma_put_context(ctxt, 0);
 		return -ENOTCONN;
 	}
@@ -567,11 +401,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	/* Prepare the context */
 	ctxt->pages[0] = page;
 	ctxt->count = 1;
-	ctxt->frmr = vec->frmr;
-	if (vec->frmr)
-		set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
-	else
-		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
 
 	/* Prepare the SGE for the RPCRDMA Header */
 	ctxt->sge[0].lkey = rdma->sc_dma_lkey;
@@ -590,21 +419,15 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		int xdr_off = 0;
 		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
 		byte_count -= sge_bytes;
-		if (!vec->frmr) {
-			ctxt->sge[sge_no].addr =
-				dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
-					    sge_bytes, DMA_TO_DEVICE);
-			xdr_off += sge_bytes;
-			if (ib_dma_mapping_error(rdma->sc_cm_id->device,
-						 ctxt->sge[sge_no].addr))
-				goto err;
-			atomic_inc(&rdma->sc_dma_used);
-			ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
-		} else {
-			ctxt->sge[sge_no].addr = (unsigned long)
-				vec->sge[sge_no].iov_base;
-			ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
-		}
+		ctxt->sge[sge_no].addr =
+			dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
+				    sge_bytes, DMA_TO_DEVICE);
+		xdr_off += sge_bytes;
+		if (ib_dma_mapping_error(rdma->sc_cm_id->device,
+					 ctxt->sge[sge_no].addr))
+			goto err;
+		atomic_inc(&rdma->sc_dma_used);
+		ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
 		ctxt->sge[sge_no].length = sge_bytes;
 	}
 	BUG_ON(byte_count != 0);
@@ -627,6 +450,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 			ctxt->sge[page_no+1].length = 0;
 	}
 	rqstp->rq_next_page = rqstp->rq_respages + 1;
+
 	BUG_ON(sge_no > rdma->sc_max_sge);
 	memset(&send_wr, 0, sizeof send_wr);
 	ctxt->wr_op = IB_WR_SEND;
@@ -635,15 +459,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	send_wr.num_sge = sge_no;
 	send_wr.opcode = IB_WR_SEND;
 	send_wr.send_flags =  IB_SEND_SIGNALED;
-	if (vec->frmr) {
-		/* Prepare INVALIDATE WR */
-		memset(&inv_wr, 0, sizeof inv_wr);
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.send_flags = IB_SEND_SIGNALED;
-		inv_wr.ex.invalidate_rkey =
-			vec->frmr->mr->lkey;
-		send_wr.next = &inv_wr;
-	}
 
 	ret = svc_rdma_send(rdma, &send_wr);
 	if (ret)
@@ -653,7 +468,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
 
  err:
 	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_frmr(rdma, vec->frmr);
 	svc_rdma_put_context(ctxt, 1);
 	return -EIO;
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 02db8d9cc994..e7323fbbd348 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -162,7 +163,6 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
 		schedule_timeout_uninterruptible(msecs_to_jiffies(500));
 	}
 	map->count = 0;
-	map->frmr = NULL;
 	return map;
 }
 
@@ -338,22 +338,21 @@ static void process_context(struct svcxprt_rdma *xprt,
 
 	switch (ctxt->wr_op) {
 	case IB_WR_SEND:
-		if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
-			svc_rdma_put_frmr(xprt, ctxt->frmr);
+		BUG_ON(ctxt->frmr);
 		svc_rdma_put_context(ctxt, 1);
 		break;
 
 	case IB_WR_RDMA_WRITE:
+		BUG_ON(ctxt->frmr);
 		svc_rdma_put_context(ctxt, 0);
 		break;
 
 	case IB_WR_RDMA_READ:
 	case IB_WR_RDMA_READ_WITH_INV:
+		svc_rdma_put_frmr(xprt, ctxt->frmr);
 		if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
 			struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
 			BUG_ON(!read_hdr);
-			if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
-				svc_rdma_put_frmr(xprt, ctxt->frmr);
 			spin_lock_bh(&xprt->sc_rq_dto_lock);
 			set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
 			list_add_tail(&read_hdr->dto_q,
@@ -365,6 +364,7 @@ static void process_context(struct svcxprt_rdma *xprt,
 		break;
 
 	default:
+		BUG_ON(1);
 		printk(KERN_ERR "svcrdma: unexpected completion type, "
 		       "opcode=%d\n",
 		       ctxt->wr_op);
@@ -380,29 +380,42 @@ static void process_context(struct svcxprt_rdma *xprt,
 static void sq_cq_reap(struct svcxprt_rdma *xprt)
 {
 	struct svc_rdma_op_ctxt *ctxt = NULL;
-	struct ib_wc wc;
+	struct ib_wc wc_a[6];
+	struct ib_wc *wc;
 	struct ib_cq *cq = xprt->sc_sq_cq;
 	int ret;
 
+	memset(wc_a, 0, sizeof(wc_a));
+
 	if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
 		return;
 
 	ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
 	atomic_inc(&rdma_stat_sq_poll);
-	while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
-		if (wc.status != IB_WC_SUCCESS)
-			/* Close the transport */
-			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+	while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
+		int i;
 
-		/* Decrement used SQ WR count */
-		atomic_dec(&xprt->sc_sq_count);
-		wake_up(&xprt->sc_send_wait);
+		for (i = 0; i < ret; i++) {
+			wc = &wc_a[i];
+			if (wc->status != IB_WC_SUCCESS) {
+				dprintk("svcrdma: sq wc err status %d\n",
+					wc->status);
 
-		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
-		if (ctxt)
-			process_context(xprt, ctxt);
+				/* Close the transport */
+				set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+			}
 
-		svc_xprt_put(&xprt->sc_xprt);
+			/* Decrement used SQ WR count */
+			atomic_dec(&xprt->sc_sq_count);
+			wake_up(&xprt->sc_send_wait);
+
+			ctxt = (struct svc_rdma_op_ctxt *)
+				(unsigned long)wc->wr_id;
+			if (ctxt)
+				process_context(xprt, ctxt);
+
+			svc_xprt_put(&xprt->sc_xprt);
+		}
 	}
 
 	if (ctxt)
@@ -995,7 +1008,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 			need_dma_mr = 0;
 		break;
 	case RDMA_TRANSPORT_IB:
-		if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+		if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
+			need_dma_mr = 1;
+			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+		} else if (!(devattr.device_cap_flags &
+			     IB_DEVICE_LOCAL_DMA_LKEY)) {
 			need_dma_mr = 1;
 			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
 		} else
@@ -1192,14 +1209,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
 
 	/*
-	 * If there are fewer SQ WR available than required to send a
-	 * simple response, return false.
-	 */
-	if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3))
-		return 0;
-
-	/*
-	 * ...or there are already waiters on the SQ,
+	 * If there are already waiters on the SQ,
 	 * return false.
 	 */
 	if (waitqueue_active(&rdma->sc_send_wait))
-- 
cgit v1.2.3-71-gd317


From f972eb63b1003fae68d7b7e9b674d4ba5db681c2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 19 May 2014 15:13:47 -0400
Subject: perf: Pass protection and flags bits through mmap2 interface

The mmap2 interface was missing the protection and flags bits needed to
accurately determine if a mmap memory area was shared or private and
if it was readable or not.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
[tweaked patch to compile and wrote changelog]
Signed-off-by: Don Zickus <dzickus@redhat.com>
Link: http://lkml.kernel.org/r/1400526833-141779-2-git-send-email-dzickus@redhat.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 include/uapi/linux/perf_event.h |  1 +
 kernel/events/core.c            | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 5312fae47218..9269de254874 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -705,6 +705,7 @@ enum perf_event_type {
 	 *	u32				min;
 	 *	u64				ino;
 	 *	u64				ino_generation;
+	 *	u32				prot, flags;
 	 *	char				filename[];
 	 * 	struct sample_id		sample_id;
 	 * };
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7da5e561e89a..eea1955c7868 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -40,6 +40,7 @@
 #include <linux/mm_types.h>
 #include <linux/cgroup.h>
 #include <linux/module.h>
+#include <linux/mman.h>
 
 #include "internal.h"
 
@@ -5127,6 +5128,7 @@ struct perf_mmap_event {
 	int			maj, min;
 	u64			ino;
 	u64			ino_generation;
+	u32			prot, flags;
 
 	struct {
 		struct perf_event_header	header;
@@ -5168,6 +5170,8 @@ static void perf_event_mmap_output(struct perf_event *event,
 		mmap_event->event_id.header.size += sizeof(mmap_event->min);
 		mmap_event->event_id.header.size += sizeof(mmap_event->ino);
 		mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
+		mmap_event->event_id.header.size += sizeof(mmap_event->prot);
+		mmap_event->event_id.header.size += sizeof(mmap_event->flags);
 	}
 
 	perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
@@ -5186,6 +5190,8 @@ static void perf_event_mmap_output(struct perf_event *event,
 		perf_output_put(&handle, mmap_event->min);
 		perf_output_put(&handle, mmap_event->ino);
 		perf_output_put(&handle, mmap_event->ino_generation);
+		perf_output_put(&handle, mmap_event->prot);
+		perf_output_put(&handle, mmap_event->flags);
 	}
 
 	__output_copy(&handle, mmap_event->file_name,
@@ -5204,6 +5210,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 	struct file *file = vma->vm_file;
 	int maj = 0, min = 0;
 	u64 ino = 0, gen = 0;
+	u32 prot = 0, flags = 0;
 	unsigned int size;
 	char tmp[16];
 	char *buf = NULL;
@@ -5234,6 +5241,28 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 		gen = inode->i_generation;
 		maj = MAJOR(dev);
 		min = MINOR(dev);
+
+		if (vma->vm_flags & VM_READ)
+			prot |= PROT_READ;
+		if (vma->vm_flags & VM_WRITE)
+			prot |= PROT_WRITE;
+		if (vma->vm_flags & VM_EXEC)
+			prot |= PROT_EXEC;
+
+		if (vma->vm_flags & VM_MAYSHARE)
+			flags = MAP_SHARED;
+		else
+			flags = MAP_PRIVATE;
+
+		if (vma->vm_flags & VM_DENYWRITE)
+			flags |= MAP_DENYWRITE;
+		if (vma->vm_flags & VM_MAYEXEC)
+			flags |= MAP_EXECUTABLE;
+		if (vma->vm_flags & VM_LOCKED)
+			flags |= MAP_LOCKED;
+		if (vma->vm_flags & VM_HUGETLB)
+			flags |= MAP_HUGETLB;
+
 		goto got_name;
 	} else {
 		name = (char *)arch_vma_name(vma);
@@ -5274,6 +5303,8 @@ got_name:
 	mmap_event->min = min;
 	mmap_event->ino = ino;
 	mmap_event->ino_generation = gen;
+	mmap_event->prot = prot;
+	mmap_event->flags = flags;
 
 	if (!(vma->vm_flags & VM_EXEC))
 		mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
@@ -5314,6 +5345,8 @@ void perf_event_mmap(struct vm_area_struct *vma)
 		/* .min (attr_mmap2 only) */
 		/* .ino (attr_mmap2 only) */
 		/* .ino_generation (attr_mmap2 only) */
+		/* .prot (attr_mmap2 only) */
+		/* .flags (attr_mmap2 only) */
 	};
 
 	perf_event_mmap_event(&mmap_event);
-- 
cgit v1.2.3-71-gd317


From f95aeb17f57c4c98b7f33627e5f51353fd094a93 Mon Sep 17 00:00:00 2001
From: Damien Lespiau <damien.lespiau@intel.com>
Date: Mon, 9 Jun 2014 14:39:49 +0100
Subject: drm: Remove DRM_ARRAY_SIZE() for ARRAY_SIZE()

I cannot see a need to provide a DRM_ version of ARRAY_SIZE(), only used
in a few places. I suspect its usage has been spread by copy & paste
rather than anything else.

Let's just remove it for plain ARRAY_SIZE().

Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/armada/armada_drv.c     | 2 +-
 drivers/gpu/drm/exynos/exynos_drm_drv.c | 2 +-
 drivers/gpu/drm/gma500/psb_drv.c        | 2 +-
 drivers/gpu/drm/i810/i810_dma.c         | 2 +-
 drivers/gpu/drm/i915/i915_dma.c         | 2 +-
 drivers/gpu/drm/i915/i915_ioc32.c       | 2 +-
 drivers/gpu/drm/mga/mga_ioc32.c         | 2 +-
 drivers/gpu/drm/mga/mga_state.c         | 2 +-
 drivers/gpu/drm/nouveau/nouveau_ioc32.c | 2 +-
 drivers/gpu/drm/qxl/qxl_ioctl.c         | 2 +-
 drivers/gpu/drm/r128/r128_ioc32.c       | 2 +-
 drivers/gpu/drm/r128/r128_state.c       | 2 +-
 drivers/gpu/drm/radeon/radeon_ioc32.c   | 2 +-
 drivers/gpu/drm/radeon/radeon_kms.c     | 2 +-
 drivers/gpu/drm/radeon/radeon_state.c   | 2 +-
 drivers/gpu/drm/savage/savage_bci.c     | 2 +-
 drivers/gpu/drm/sis/sis_mm.c            | 2 +-
 drivers/gpu/drm/via/via_dma.c           | 2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     | 2 +-
 include/drm/drmP.h                      | 2 --
 20 files changed, 19 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c
index 567cfbde0883..8ab3cd1a8cdb 100644
--- a/drivers/gpu/drm/armada/armada_drv.c
+++ b/drivers/gpu/drm/armada/armada_drv.c
@@ -402,7 +402,7 @@ static struct platform_driver armada_drm_platform_driver = {
 
 static int __init armada_drm_init(void)
 {
-	armada_drm_driver.num_ioctls = DRM_ARRAY_SIZE(armada_ioctls);
+	armada_drm_driver.num_ioctls = ARRAY_SIZE(armada_ioctls);
 	return platform_driver_register(&armada_drm_platform_driver);
 }
 module_init(armada_drm_init);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 5d225dd58a87..d91f27777537 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -573,7 +573,7 @@ static int exynos_drm_platform_probe(struct platform_device *pdev)
 	int ret;
 
 	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
-	exynos_drm_driver.num_ioctls = DRM_ARRAY_SIZE(exynos_ioctls);
+	exynos_drm_driver.num_ioctls = ARRAY_SIZE(exynos_ioctls);
 
 #ifdef CONFIG_DRM_EXYNOS_FIMD
 	ret = platform_driver_register(&fimd_driver);
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index 59ea45e5c97e..6e8fe9ec02b5 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -477,7 +477,7 @@ static struct drm_driver driver = {
 	.lastclose = psb_driver_lastclose,
 	.preclose = psb_driver_preclose,
 
-	.num_ioctls = DRM_ARRAY_SIZE(psb_ioctls),
+	.num_ioctls = ARRAY_SIZE(psb_ioctls),
 	.device_is_agp = psb_driver_device_is_agp,
 	.irq_preinstall = psb_irq_preinstall,
 	.irq_postinstall = psb_irq_postinstall,
diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c
index aeace37415aa..e88bac1d781f 100644
--- a/drivers/gpu/drm/i810/i810_dma.c
+++ b/drivers/gpu/drm/i810/i810_dma.c
@@ -1251,7 +1251,7 @@ const struct drm_ioctl_desc i810_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I810_FLIP, i810_flip_bufs, DRM_AUTH|DRM_UNLOCKED),
 };
 
-int i810_max_ioctl = DRM_ARRAY_SIZE(i810_ioctls);
+int i810_max_ioctl = ARRAY_SIZE(i810_ioctls);
 
 /**
  * Determine if the device really is AGP or not.
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index b9159ade5e85..4c22a5b7f4c5 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1986,7 +1986,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
-int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
+int i915_max_ioctl = ARRAY_SIZE(i915_ioctls);
 
 /*
  * This is really ugly: Because old userspace abused the linux agp interface to
diff --git a/drivers/gpu/drm/i915/i915_ioc32.c b/drivers/gpu/drm/i915/i915_ioc32.c
index 3c59584161c2..2e0613e26251 100644
--- a/drivers/gpu/drm/i915/i915_ioc32.c
+++ b/drivers/gpu/drm/i915/i915_ioc32.c
@@ -208,7 +208,7 @@ long i915_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	if (nr < DRM_COMMAND_BASE)
 		return drm_compat_ioctl(filp, cmd, arg);
 
-	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(i915_compat_ioctls))
+	if (nr < DRM_COMMAND_BASE + ARRAY_SIZE(i915_compat_ioctls))
 		fn = i915_compat_ioctls[nr - DRM_COMMAND_BASE];
 
 	if (fn != NULL)
diff --git a/drivers/gpu/drm/mga/mga_ioc32.c b/drivers/gpu/drm/mga/mga_ioc32.c
index 86b4bb804852..729bfd56b55f 100644
--- a/drivers/gpu/drm/mga/mga_ioc32.c
+++ b/drivers/gpu/drm/mga/mga_ioc32.c
@@ -214,7 +214,7 @@ long mga_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	if (nr < DRM_COMMAND_BASE)
 		return drm_compat_ioctl(filp, cmd, arg);
 
-	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(mga_compat_ioctls))
+	if (nr < DRM_COMMAND_BASE + ARRAY_SIZE(mga_compat_ioctls))
 		fn = mga_compat_ioctls[nr - DRM_COMMAND_BASE];
 
 	if (fn != NULL)
diff --git a/drivers/gpu/drm/mga/mga_state.c b/drivers/gpu/drm/mga/mga_state.c
index 3cb58df5237e..792f924496fc 100644
--- a/drivers/gpu/drm/mga/mga_state.c
+++ b/drivers/gpu/drm/mga/mga_state.c
@@ -1099,4 +1099,4 @@ const struct drm_ioctl_desc mga_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(MGA_DMA_BOOTSTRAP, mga_dma_bootstrap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 };
 
-int mga_max_ioctl = DRM_ARRAY_SIZE(mga_ioctls);
+int mga_max_ioctl = ARRAY_SIZE(mga_ioctls);
diff --git a/drivers/gpu/drm/nouveau/nouveau_ioc32.c b/drivers/gpu/drm/nouveau/nouveau_ioc32.c
index c1a7e5a73a26..462679a8fec5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ioc32.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ioc32.c
@@ -57,7 +57,7 @@ long nouveau_compat_ioctl(struct file *filp, unsigned int cmd,
 		return drm_compat_ioctl(filp, cmd, arg);
 
 #if 0
-	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(mga_compat_ioctls))
+	if (nr < DRM_COMMAND_BASE + ARRAY_SIZE(mga_compat_ioctls))
 		fn = nouveau_compat_ioctls[nr - DRM_COMMAND_BASE];
 #endif
 	if (fn != NULL)
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index 0bb86e6d41b4..b110883f8253 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -451,4 +451,4 @@ const struct drm_ioctl_desc qxl_ioctls[] = {
 			  DRM_AUTH|DRM_UNLOCKED),
 };
 
-int qxl_max_ioctls = DRM_ARRAY_SIZE(qxl_ioctls);
+int qxl_max_ioctls = ARRAY_SIZE(qxl_ioctls);
diff --git a/drivers/gpu/drm/r128/r128_ioc32.c b/drivers/gpu/drm/r128/r128_ioc32.c
index b0d0fd3e4376..663f38c63ba6 100644
--- a/drivers/gpu/drm/r128/r128_ioc32.c
+++ b/drivers/gpu/drm/r128/r128_ioc32.c
@@ -203,7 +203,7 @@ long r128_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	if (nr < DRM_COMMAND_BASE)
 		return drm_compat_ioctl(filp, cmd, arg);
 
-	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(r128_compat_ioctls))
+	if (nr < DRM_COMMAND_BASE + ARRAY_SIZE(r128_compat_ioctls))
 		fn = r128_compat_ioctls[nr - DRM_COMMAND_BASE];
 
 	if (fn != NULL)
diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c
index 97064dd434c2..575e986f82a7 100644
--- a/drivers/gpu/drm/r128/r128_state.c
+++ b/drivers/gpu/drm/r128/r128_state.c
@@ -1641,4 +1641,4 @@ const struct drm_ioctl_desc r128_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(R128_GETPARAM, r128_getparam, DRM_AUTH),
 };
 
-int r128_max_ioctl = DRM_ARRAY_SIZE(r128_ioctls);
+int r128_max_ioctl = ARRAY_SIZE(r128_ioctls);
diff --git a/drivers/gpu/drm/radeon/radeon_ioc32.c b/drivers/gpu/drm/radeon/radeon_ioc32.c
index bdb0f93e73bc..0b98ea134579 100644
--- a/drivers/gpu/drm/radeon/radeon_ioc32.c
+++ b/drivers/gpu/drm/radeon/radeon_ioc32.c
@@ -399,7 +399,7 @@ long radeon_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	if (nr < DRM_COMMAND_BASE)
 		return drm_compat_ioctl(filp, cmd, arg);
 
-	if (nr < DRM_COMMAND_BASE + DRM_ARRAY_SIZE(radeon_compat_ioctls))
+	if (nr < DRM_COMMAND_BASE + ARRAY_SIZE(radeon_compat_ioctls))
 		fn = radeon_compat_ioctls[nr - DRM_COMMAND_BASE];
 
 	if (fn != NULL)
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index eaaedba04675..f0717373a508 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -859,4 +859,4 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
-int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
+int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index b576549fc783..23bb64fd775f 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -3258,4 +3258,4 @@ struct drm_ioctl_desc radeon_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
 };
 
-int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
+int radeon_max_ioctl = ARRAY_SIZE(radeon_ioctls);
diff --git a/drivers/gpu/drm/savage/savage_bci.c b/drivers/gpu/drm/savage/savage_bci.c
index d2b2df9e26f3..c97cdc9ab239 100644
--- a/drivers/gpu/drm/savage/savage_bci.c
+++ b/drivers/gpu/drm/savage/savage_bci.c
@@ -1079,4 +1079,4 @@ const struct drm_ioctl_desc savage_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_EVENT_WAIT, savage_bci_event_wait, DRM_AUTH),
 };
 
-int savage_max_ioctl = DRM_ARRAY_SIZE(savage_ioctls);
+int savage_max_ioctl = ARRAY_SIZE(savage_ioctls);
diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c
index 0573be0d2933..77f288e4a0a6 100644
--- a/drivers/gpu/drm/sis/sis_mm.c
+++ b/drivers/gpu/drm/sis/sis_mm.c
@@ -359,4 +359,4 @@ const struct drm_ioctl_desc sis_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(SIS_FB_INIT, sis_fb_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
 };
 
-int sis_max_ioctl = DRM_ARRAY_SIZE(sis_ioctls);
+int sis_max_ioctl = ARRAY_SIZE(sis_ioctls);
diff --git a/drivers/gpu/drm/via/via_dma.c b/drivers/gpu/drm/via/via_dma.c
index a18479c6b6da..6fc0648dd37f 100644
--- a/drivers/gpu/drm/via/via_dma.c
+++ b/drivers/gpu/drm/via/via_dma.c
@@ -737,4 +737,4 @@ const struct drm_ioctl_desc via_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(VIA_BLIT_SYNC, via_dma_blit_sync, DRM_AUTH)
 };
 
-int via_max_ioctl = DRM_ARRAY_SIZE(via_ioctls);
+int via_max_ioctl = ARRAY_SIZE(via_ioctls);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 6bdd15eea7e8..246a62bab378 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1417,7 +1417,7 @@ static struct drm_driver driver = {
 	.enable_vblank = vmw_enable_vblank,
 	.disable_vblank = vmw_disable_vblank,
 	.ioctls = vmw_ioctls,
-	.num_ioctls = DRM_ARRAY_SIZE(vmw_ioctls),
+	.num_ioctls = ARRAY_SIZE(vmw_ioctls),
 	.master_create = vmw_master_create,
 	.master_destroy = vmw_master_destroy,
 	.master_set = vmw_master_set,
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 83222db41566..8af71a8e2c00 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -234,8 +234,6 @@ int drm_err(const char *func, const char *format, ...);
 /** \name Internal types and structures */
 /*@{*/
 
-#define DRM_ARRAY_SIZE(x) ARRAY_SIZE(x)
-
 #define DRM_IF_VERSION(maj, min) (maj << 16 | min)
 
 /**
-- 
cgit v1.2.3-71-gd317


From 7d824b6f9cf28917d8a05891ef423fb0e4e34c69 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.cz>
Date: Wed, 7 May 2014 17:37:51 +0200
Subject: btrfs: balance filter: add limit of processed chunks

This started as debugging helper, to watch the effects of converting
between raid levels on multiple devices, but could be useful standalone.

In my case the usage filter was not finegrained enough and led to
converting too many chunks at once. Another example use is in connection
with drange+devid or vrange filters that allow to work with a specific
chunk or even with a chunk on a given device.

The limit filter applies last, the value of 0 means no limiting.

CC: Ilya Dryomov <idryomov@gmail.com>
CC: Hugo Mills <hugo@carfax.org.uk>
Signed-off-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/ctree.h           |  7 ++++++-
 fs/btrfs/volumes.c         | 18 ++++++++++++++++++
 fs/btrfs/volumes.h         |  1 +
 include/uapi/linux/btrfs.h |  3 ++-
 4 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ba6b88528dc7..e6f899dc5e47 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -840,7 +840,10 @@ struct btrfs_disk_balance_args {
 	/* BTRFS_BALANCE_ARGS_* */
 	__le64 flags;
 
-	__le64 unused[8];
+	/* BTRFS_BALANCE_ARGS_LIMIT value */
+	__le64 limit;
+
+	__le64 unused[7];
 } __attribute__ ((__packed__));
 
 /*
@@ -2897,6 +2900,7 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
 	cpu->vend = le64_to_cpu(disk->vend);
 	cpu->target = le64_to_cpu(disk->target);
 	cpu->flags = le64_to_cpu(disk->flags);
+	cpu->limit = le64_to_cpu(disk->limit);
 }
 
 static inline void
@@ -2914,6 +2918,7 @@ btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
 	disk->vend = cpu_to_le64(cpu->vend);
 	disk->target = cpu_to_le64(cpu->target);
 	disk->flags = cpu_to_le64(cpu->flags);
+	disk->limit = cpu_to_le64(cpu->limit);
 }
 
 /* struct btrfs_super_block */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 49d7fab73360..3b761a456acd 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2922,6 +2922,16 @@ static int should_balance_chunk(struct btrfs_root *root,
 		return 0;
 	}
 
+	/*
+	 * limited by count, must be the last filter
+	 */
+	if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
+		if (bargs->limit == 0)
+			return 0;
+		else
+			bargs->limit--;
+	}
+
 	return 1;
 }
 
@@ -2944,6 +2954,9 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
 	int ret;
 	int enospc_errors = 0;
 	bool counting = true;
+	u64 limit_data = bctl->data.limit;
+	u64 limit_meta = bctl->meta.limit;
+	u64 limit_sys = bctl->sys.limit;
 
 	/* step one make some room on all the devices */
 	devices = &fs_info->fs_devices->devices;
@@ -2982,6 +2995,11 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
 	memset(&bctl->stat, 0, sizeof(bctl->stat));
 	spin_unlock(&fs_info->balance_lock);
 again:
+	if (!counting) {
+		bctl->data.limit = limit_data;
+		bctl->meta.limit = limit_meta;
+		bctl->sys.limit = limit_sys;
+	}
 	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
 	key.offset = (u64)-1;
 	key.type = BTRFS_CHUNK_ITEM_KEY;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 80754f9dd3df..1a15bbeb65e2 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -255,6 +255,7 @@ struct map_lookup {
 #define BTRFS_BALANCE_ARGS_DEVID	(1ULL << 2)
 #define BTRFS_BALANCE_ARGS_DRANGE	(1ULL << 3)
 #define BTRFS_BALANCE_ARGS_VRANGE	(1ULL << 4)
+#define BTRFS_BALANCE_ARGS_LIMIT	(1ULL << 5)
 
 /*
  * Profile changing flags.  When SOFT is set we won't relocate chunk if
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index b4d69092fbdb..901a3c563f60 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -211,7 +211,8 @@ struct btrfs_balance_args {
 
 	__u64 flags;
 
-	__u64 unused[8];
+	__u64 limit;		/* limit number of processed chunks */
+	__u64 unused[7];
 } __attribute__ ((__packed__));
 
 /* report balance progress to userspace */
-- 
cgit v1.2.3-71-gd317


From 80a773fbfc2d6b5b2478377e8ac271d495f55e73 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.cz>
Date: Wed, 7 May 2014 18:17:06 +0200
Subject: btrfs: retrieve more info from FS_INFO ioctl

Provide the basic information about filesystem through the ioctl:
* b-tree node size (same as leaf size)
* sector size
* expected alignment of CLONE_RANGE and EXTENT_SAME ioctl arguments

Backward compatibility: if the values are 0, kernel does not provide
this information, the applications should ignore them.

Signed-off-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/ioctl.c           | 4 ++++
 include/uapi/linux/btrfs.h | 6 +++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2f6d7b13b5bd..c6c8e3560e73 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2574,6 +2574,10 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
 	}
 	mutex_unlock(&fs_devices->device_list_mutex);
 
+	fi_args->nodesize = root->fs_info->super_copy->nodesize;
+	fi_args->sectorsize = root->fs_info->super_copy->sectorsize;
+	fi_args->clone_alignment = root->fs_info->super_copy->sectorsize;
+
 	if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
 		ret = -EFAULT;
 
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 901a3c563f60..7554fd381a56 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -181,7 +181,11 @@ struct btrfs_ioctl_fs_info_args {
 	__u64 max_id;				/* out */
 	__u64 num_devices;			/* out */
 	__u8 fsid[BTRFS_FSID_SIZE];		/* out */
-	__u64 reserved[124];			/* pad to 1k */
+	__u32 nodesize;				/* out */
+	__u32 sectorsize;			/* out */
+	__u32 clone_alignment;			/* out */
+	__u32 reserved32;
+	__u64 reserved[122];			/* pad to 1k */
 };
 
 struct btrfs_ioctl_feature_flags {
-- 
cgit v1.2.3-71-gd317


From 5d02626d3167cd4214bf611362e08dfd1e98c62e Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de@gmail.com>
Date: Thu, 5 Jun 2014 09:52:10 -0400
Subject: drm/edid: Store all supported hdmi deep color modes in
 drm_display_info

HDMI deep color setup must know which modes are supported if
it needs to degrade gracefully, as only 12 bpc / dc_36 is
guaranteed, but 10 bpc / dc_30 is optional. The maximum bpc
is not sufficient for this.

Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/drm_edid.c | 3 +++
 include/drm/drm_crtc.h     | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 7be21781d3bd..dfa9769b26b5 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -3471,18 +3471,21 @@ static bool drm_assign_hdmi_deep_color_info(struct edid *edid,
 
 			if (hdmi[6] & DRM_EDID_HDMI_DC_30) {
 				dc_bpc = 10;
+				info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_30;
 				DRM_DEBUG("%s: HDMI sink does deep color 30.\n",
 						  connector->name);
 			}
 
 			if (hdmi[6] & DRM_EDID_HDMI_DC_36) {
 				dc_bpc = 12;
+				info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_36;
 				DRM_DEBUG("%s: HDMI sink does deep color 36.\n",
 						  connector->name);
 			}
 
 			if (hdmi[6] & DRM_EDID_HDMI_DC_48) {
 				dc_bpc = 16;
+				info->edid_hdmi_dc_modes |= DRM_EDID_HDMI_DC_48;
 				DRM_DEBUG("%s: HDMI sink does deep color 48.\n",
 						  connector->name);
 			}
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index a7fac5686915..251b75e6bf7a 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -121,6 +121,9 @@ struct drm_display_info {
 	enum subpixel_order subpixel_order;
 	u32 color_formats;
 
+	/* Mask of supported hdmi deep color modes */
+	u8 edid_hdmi_dc_modes;
+
 	u8 cea_rev;
 };
 
-- 
cgit v1.2.3-71-gd317


From 65fcf668ee7f2de2fbd580e1297336045f1ef6f4 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 2 Jun 2014 16:13:21 -0400
Subject: drm/radeon: add query for number of active CUs

Query to find out how many compute units on a GPU.
Useful for OpenCL usermode drivers.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/cik.c        | 12 +++++++++++-
 drivers/gpu/drm/radeon/evergreen.c  | 12 ++++++++++++
 drivers/gpu/drm/radeon/ni.c         | 12 ++++++++++++
 drivers/gpu/drm/radeon/r600.c       |  3 +++
 drivers/gpu/drm/radeon/radeon.h     |  6 ++++++
 drivers/gpu/drm/radeon/radeon_drv.c |  3 ++-
 drivers/gpu/drm/radeon/radeon_kms.c | 16 ++++++++++++++++
 drivers/gpu/drm/radeon/rv770.c      |  3 +++
 drivers/gpu/drm/radeon/si.c         | 11 ++++++++++-
 include/uapi/drm/radeon_drm.h       |  2 +-
 10 files changed, 76 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index e4b2f2b51bb8..dcd4518a9b08 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -80,6 +80,7 @@ extern int sumo_rlc_init(struct radeon_device *rdev);
 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 extern void si_rlc_reset(struct radeon_device *rdev);
 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
+static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 extern int cik_sdma_resume(struct radeon_device *rdev);
 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 extern void cik_sdma_fini(struct radeon_device *rdev);
@@ -3257,7 +3258,7 @@ static void cik_gpu_init(struct radeon_device *rdev)
 	u32 mc_shared_chmap, mc_arb_ramcfg;
 	u32 hdp_host_path_cntl;
 	u32 tmp;
-	int i, j;
+	int i, j, k;
 
 	switch (rdev->family) {
 	case CHIP_BONAIRE:
@@ -3446,6 +3447,15 @@ static void cik_gpu_init(struct radeon_device *rdev)
 		     rdev->config.cik.max_sh_per_se,
 		     rdev->config.cik.max_backends_per_se);
 
+	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
+		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
+			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) {
+				rdev->config.cik.active_cus +=
+					hweight32(cik_get_cu_active_bitmap(rdev, i, j));
+			}
+		}
+	}
+
 	/* set HW defaults for 3D engine */
 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
 
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 653eff814504..e2f605224e8c 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3337,6 +3337,18 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 			disabled_rb_mask &= ~(1 << i);
 	}
 
+	for (i = 0; i < rdev->config.evergreen.num_ses; i++) {
+		u32 simd_disable_bitmap;
+
+		WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+		WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+		simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16;
+		simd_disable_bitmap |= 0xffffffff << rdev->config.evergreen.max_simds;
+		tmp <<= 16;
+		tmp |= simd_disable_bitmap;
+	}
+	rdev->config.evergreen.active_simds = hweight32(~tmp);
+
 	WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 	WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index c0fd8f647cf0..5a33ca681867 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1057,6 +1057,18 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 			disabled_rb_mask &= ~(1 << i);
 	}
 
+	for (i = 0; i < rdev->config.cayman.max_shader_engines; i++) {
+		u32 simd_disable_bitmap;
+
+		WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+		WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
+		simd_disable_bitmap = (RREG32(CC_GC_SHADER_PIPE_CONFIG) & 0xffff0000) >> 16;
+		simd_disable_bitmap |= 0xffffffff << rdev->config.cayman.max_simds_per_se;
+		tmp <<= 16;
+		tmp |= simd_disable_bitmap;
+	}
+	rdev->config.cayman.active_simds = hweight32(~tmp);
+
 	WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 	WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
 
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index c2ff17cebd91..c66952d4b00c 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1958,6 +1958,9 @@ static void r600_gpu_init(struct radeon_device *rdev)
 	if (tmp < rdev->config.r600.max_simds) {
 		rdev->config.r600.max_simds = tmp;
 	}
+	tmp = rdev->config.r600.max_simds -
+		r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R6XX_MAX_SIMDS_MASK);
+	rdev->config.r600.active_simds = tmp;
 
 	disabled_rb_mask = (RREG32(CC_RB_BACKEND_DISABLE) >> 16) & R6XX_MAX_BACKENDS_MASK;
 	tmp = (tiling_config & PIPE_TILING__MASK) >> PIPE_TILING__SHIFT;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index dd77111096bd..4b0bbf88d5c0 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1932,6 +1932,7 @@ struct r600_asic {
 	unsigned		tiling_group_size;
 	unsigned		tile_config;
 	unsigned		backend_map;
+	unsigned		active_simds;
 };
 
 struct rv770_asic {
@@ -1957,6 +1958,7 @@ struct rv770_asic {
 	unsigned		tiling_group_size;
 	unsigned		tile_config;
 	unsigned		backend_map;
+	unsigned		active_simds;
 };
 
 struct evergreen_asic {
@@ -1983,6 +1985,7 @@ struct evergreen_asic {
 	unsigned tiling_group_size;
 	unsigned tile_config;
 	unsigned backend_map;
+	unsigned active_simds;
 };
 
 struct cayman_asic {
@@ -2021,6 +2024,7 @@ struct cayman_asic {
 	unsigned multi_gpu_tile_size;
 
 	unsigned tile_config;
+	unsigned active_simds;
 };
 
 struct si_asic {
@@ -2051,6 +2055,7 @@ struct si_asic {
 
 	unsigned tile_config;
 	uint32_t tile_mode_array[32];
+	uint32_t active_cus;
 };
 
 struct cik_asic {
@@ -2082,6 +2087,7 @@ struct cik_asic {
 	unsigned tile_config;
 	uint32_t tile_mode_array[32];
 	uint32_t macrotile_mode_array[16];
+	uint32_t active_cus;
 };
 
 union radeon_asic_config {
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index b7a2ec2d1598..6e3017413386 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -81,9 +81,10 @@
  *   2.37.0 - allow GS ring setup on r6xx/r7xx
  *   2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN),
  *            CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
+ *   2.39.0 - Add INFO query for number of active CUs
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	38
+#define KMS_DRIVER_MINOR	39
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index eaaedba04675..5cd70f9e7311 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -513,6 +513,22 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		value_size = sizeof(uint64_t);
 		value64 = atomic64_read(&rdev->gtt_usage);
 		break;
+	case RADEON_INFO_ACTIVE_CU_COUNT:
+		if (rdev->family >= CHIP_BONAIRE)
+			*value = rdev->config.cik.active_cus;
+		else if (rdev->family >= CHIP_TAHITI)
+			*value = rdev->config.si.active_cus;
+		else if (rdev->family >= CHIP_CAYMAN)
+			*value = rdev->config.cayman.active_simds;
+		else if (rdev->family >= CHIP_CEDAR)
+			*value = rdev->config.evergreen.active_simds;
+		else if (rdev->family >= CHIP_RV770)
+			*value = rdev->config.rv770.active_simds;
+		else if (rdev->family >= CHIP_R600)
+			*value = rdev->config.r600.active_simds;
+		else
+			*value = 1;
+		break;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 97b776666b75..da8703d8d455 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -1327,6 +1327,9 @@ static void rv770_gpu_init(struct radeon_device *rdev)
 	if (tmp < rdev->config.rv770.max_simds) {
 		rdev->config.rv770.max_simds = tmp;
 	}
+	tmp = rdev->config.rv770.max_simds -
+		r600_count_pipe_bits((cc_gc_shader_pipe_config >> 16) & R7XX_MAX_SIMDS_MASK);
+	rdev->config.rv770.active_simds = tmp;
 
 	switch (rdev->config.rv770.max_tile_pipes) {
 	case 1:
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index ec13e8df4c30..730cee2c34cf 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -71,6 +71,7 @@ MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
 
+static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 static void si_pcie_gen3_enable(struct radeon_device *rdev);
 static void si_program_aspm(struct radeon_device *rdev);
 extern void sumo_rlc_fini(struct radeon_device *rdev);
@@ -2900,7 +2901,7 @@ static void si_gpu_init(struct radeon_device *rdev)
 	u32 sx_debug_1;
 	u32 hdp_host_path_cntl;
 	u32 tmp;
-	int i, j;
+	int i, j, k;
 
 	switch (rdev->family) {
 	case CHIP_TAHITI:
@@ -3098,6 +3099,14 @@ static void si_gpu_init(struct radeon_device *rdev)
 		     rdev->config.si.max_sh_per_se,
 		     rdev->config.si.max_cu_per_sh);
 
+	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
+		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
+			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
+				rdev->config.si.active_cus +=
+					hweight32(si_get_cu_active_bitmap(rdev, i, j));
+			}
+		}
+	}
 
 	/* set HW defaults for 3D engine */
 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index aefa2f6afa3b..1cc0b610f162 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -1007,7 +1007,7 @@ struct drm_radeon_cs {
 #define RADEON_INFO_NUM_BYTES_MOVED	0x1d
 #define RADEON_INFO_VRAM_USAGE		0x1e
 #define RADEON_INFO_GTT_USAGE		0x1f
-
+#define RADEON_INFO_ACTIVE_CU_COUNT	0x20
 
 struct drm_radeon_info {
 	uint32_t		request;
-- 
cgit v1.2.3-71-gd317


From 8b8b36834d0fff67fc8668093f4312dd04dcf21d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 10 Jun 2014 09:46:00 -0400
Subject: ring-buffer: Check if buffer exists before polling

The per_cpu buffers are created one per possible CPU. But these do
not mean that those CPUs are online, nor do they even exist.

With the addition of the ring buffer polling, it assumes that the
caller polls on an existing buffer. But this is not the case if
the user reads trace_pipe from a CPU that does not exist, and this
causes the kernel to crash.

Simple fix is to check the cpu against buffer bitmask against to see
if the buffer was allocated or not and return -ENODEV if it is
not.

More updates were done to pass the -ENODEV back up to userspace.

Link: http://lkml.kernel.org/r/5393DB61.6060707@oracle.com

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  2 +-
 kernel/trace/ring_buffer.c  |  5 ++++-
 kernel/trace/trace.c        | 22 ++++++++++++++++------
 3 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index d69cf637a15a..49a4d6f59108 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
 	__ring_buffer_alloc((size), (flags), &__key);	\
 })
 
-void ring_buffer_wait(struct ring_buffer *buffer, int cpu);
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu);
 int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
 			  struct file *filp, poll_table *poll_table);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c634868c2921..7c56c3d06943 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -543,7 +543,7 @@ static void rb_wake_up_waiters(struct irq_work *work)
  * as data is added to any of the @buffer's cpu buffers. Otherwise
  * it will wait for data to be added to a specific cpu buffer.
  */
-void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	DEFINE_WAIT(wait);
@@ -557,6 +557,8 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
 	if (cpu == RING_BUFFER_ALL_CPUS)
 		work = &buffer->irq_work;
 	else {
+		if (!cpumask_test_cpu(cpu, buffer->cpumask))
+			return -ENODEV;
 		cpu_buffer = buffer->buffers[cpu];
 		work = &cpu_buffer->irq_work;
 	}
@@ -591,6 +593,7 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
 		schedule();
 
 	finish_wait(&work->waiters, &wait);
+	return 0;
 }
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 16f7038d1f4d..56422f1decba 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1085,13 +1085,13 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
-static void wait_on_pipe(struct trace_iterator *iter)
+static int wait_on_pipe(struct trace_iterator *iter)
 {
 	/* Iterators are static, they should be filled or empty */
 	if (trace_buffer_iter(iter, iter->cpu_file))
-		return;
+		return 0;
 
-	ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
+	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
 }
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -4378,6 +4378,7 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
 static int tracing_wait_pipe(struct file *filp)
 {
 	struct trace_iterator *iter = filp->private_data;
+	int ret;
 
 	while (trace_empty(iter)) {
 
@@ -4399,10 +4400,13 @@ static int tracing_wait_pipe(struct file *filp)
 
 		mutex_unlock(&iter->mutex);
 
-		wait_on_pipe(iter);
+		ret = wait_on_pipe(iter);
 
 		mutex_lock(&iter->mutex);
 
+		if (ret)
+			return ret;
+
 		if (signal_pending(current))
 			return -EINTR;
 	}
@@ -5327,8 +5331,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 				goto out_unlock;
 			}
 			mutex_unlock(&trace_types_lock);
-			wait_on_pipe(iter);
+			ret = wait_on_pipe(iter);
 			mutex_lock(&trace_types_lock);
+			if (ret) {
+				size = ret;
+				goto out_unlock;
+			}
 			if (signal_pending(current)) {
 				size = -EINTR;
 				goto out_unlock;
@@ -5538,8 +5546,10 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 			goto out;
 		}
 		mutex_unlock(&trace_types_lock);
-		wait_on_pipe(iter);
+		ret = wait_on_pipe(iter);
 		mutex_lock(&trace_types_lock);
+		if (ret)
+			goto out;
 		if (signal_pending(current)) {
 			ret = -EINTR;
 			goto out;
-- 
cgit v1.2.3-71-gd317


From 962bd40bc30e412828e091bfda041b7547e779c8 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Tue, 10 Jun 2014 12:24:40 -0400
Subject: locks: add missing memory barrier in break_deleg

break_deleg is subject to the same potential race as break_lease. Add
a memory barrier to prevent it.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 include/linux/fs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c3f46e499dd0..22ae79650b82 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1914,6 +1914,12 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
 
 static inline int break_deleg(struct inode *inode, unsigned int mode)
 {
+	/*
+	 * Since this check is lockless, we must ensure that any refcounts
+	 * taken are done before checking inode->i_flock. Otherwise, we could
+	 * end up racing with tasks trying to set a new lease on this file.
+	 */
+	smp_mb();
 	if (inode->i_flock)
 		return __break_lease(inode, mode, FL_DELEG);
 	return 0;
-- 
cgit v1.2.3-71-gd317


From 30dc5e63d6a5ad24894b5512d10b228d73645a44 Mon Sep 17 00:00:00 2001
From: Tatyana Nikolova <Tatyana.E.Nikolova@intel.com>
Date: Wed, 26 Mar 2014 17:07:35 -0500
Subject: RDMA/core: Add support for iWARP Port Mapper user space service

This patch adds iWARP Port Mapper (IWPM) Version 2 support.  The iWARP
Port Mapper implementation is based on the port mapper specification
section in the Sockets Direct Protocol paper -
http://www.rdmaconsortium.org/home/draft-pinkerton-iwarp-sdp-v1.0.pdf

Existing iWARP RDMA providers use the same IP address as the native
TCP/IP stack when creating RDMA connections.  They need a mechanism to
claim the TCP ports used for RDMA connections to prevent TCP port
collisions when other host applications use TCP ports.  The iWARP Port
Mapper provides a standard mechanism to accomplish this.  Without this
service it is possible for RDMA application to bind/listen on the same
port which is already being used by native TCP host application.  If
that happens the incoming TCP connection data can be passed to the
RDMA stack with error.

The iWARP Port Mapper solution doesn't contain any changes to the
existing network stack in the kernel space.  All the changes are
contained with the infiniband tree and also in user space.

The iWARP Port Mapper service is implemented as a user space daemon
process.  Source for the IWPM service is located at
http://git.openfabrics.org/git?p=~tnikolova/libiwpm-1.0.0/.git;a=summary

The iWARP driver (port mapper client) sends to the IWPM service the
local IP address and TCP port it has received from the RDMA
application, when starting a connection.  The IWPM service performs a
socket bind from user space to get an available TCP port, called a
mapped port, and communicates it back to the client.  In that sense,
the IWPM service is used to map the TCP port, which the RDMA
application uses to any port available from the host TCP port
space. The mapped ports are used in iWARP RDMA connections to avoid
collisions with native TCP stack which is aware that these ports are
taken. When an RDMA connection using a mapped port is terminated, the
client notifies the IWPM service, which then releases the TCP port.

The message exchange between the IWPM service and the iWARP drivers
(between user space and kernel space) is implemented using netlink
sockets.

1) Netlink interface functions are added: ibnl_unicast() and
   ibnl_mulitcast() for sending netlink messages to user space

2) The signature of the existing ibnl_put_msg() is changed to be more
   generic

3) Two netlink clients are added: RDMA_NL_NES, RDMA_NL_C4IW
   corresponding to the two iWarp drivers - nes and cxgb4 which use
   the IWPM service

4) Enums are added to enumerate the attributes in the netlink
   messages, which are exchanged between the user space IWPM service
   and the iWARP drivers

Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: PJ Waskiewicz <pj.waskiewicz@solidfire.com>

[ Fold in range checking fixes and nlh_next removal as suggested by Dan
  Carpenter and Steve Wise.  Fix sparse endianness in hash.  - Roland ]

Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/core/Makefile    |   2 +-
 drivers/infiniband/core/cma.c       |   3 +-
 drivers/infiniband/core/iwpm_msg.c  | 685 ++++++++++++++++++++++++++++++++++++
 drivers/infiniband/core/iwpm_util.c | 607 ++++++++++++++++++++++++++++++++
 drivers/infiniband/core/iwpm_util.h | 238 +++++++++++++
 drivers/infiniband/core/netlink.c   |  18 +-
 include/rdma/iw_portmap.h           | 199 +++++++++++
 include/rdma/rdma_netlink.h         |  23 +-
 include/uapi/rdma/rdma_netlink.h    |  96 ++++-
 9 files changed, 1865 insertions(+), 6 deletions(-)
 create mode 100644 drivers/infiniband/core/iwpm_msg.c
 create mode 100644 drivers/infiniband/core/iwpm_util.c
 create mode 100644 drivers/infiniband/core/iwpm_util.h
 create mode 100644 include/rdma/iw_portmap.h

(limited to 'include')

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 3ab3865544bb..ffd0af6734af 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -18,7 +18,7 @@ ib_sa-y :=			sa_query.o multicast.o
 
 ib_cm-y :=			cm.o
 
-iw_cm-y :=			iwcm.o
+iw_cm-y :=			iwcm.o iwpm_util.o iwpm_msg.o
 
 rdma_cm-y :=			cma.o
 
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 42c3058e6e9c..d570030d899c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -3607,7 +3607,8 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
 
 			id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
 						sizeof *id_stats, RDMA_NL_RDMA_CM,
-						RDMA_NL_RDMA_CM_ID_STATS);
+						RDMA_NL_RDMA_CM_ID_STATS,
+						NLM_F_MULTI);
 			if (!id_stats)
 				goto out;
 
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
new file mode 100644
index 000000000000..b85ddbc979e0
--- /dev/null
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -0,0 +1,685 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "iwpm_util.h"
+
+static const char iwpm_ulib_name[] = "iWarpPortMapperUser";
+static int iwpm_ulib_version = 3;
+static int iwpm_user_pid = IWPM_PID_UNDEFINED;
+static atomic_t echo_nlmsg_seq;
+
+int iwpm_valid_pid(void)
+{
+	return iwpm_user_pid > 0;
+}
+EXPORT_SYMBOL(iwpm_valid_pid);
+
+/*
+ * iwpm_register_pid - Send a netlink query to user space
+ *                     for the iwarp port mapper pid
+ *
+ * nlmsg attributes:
+ *	[IWPM_NLA_REG_PID_SEQ]
+ *	[IWPM_NLA_REG_IF_NAME]
+ *	[IWPM_NLA_REG_IBDEV_NAME]
+ *	[IWPM_NLA_REG_ULIB_NAME]
+ */
+int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
+{
+	struct sk_buff *skb = NULL;
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	struct nlmsghdr *nlh;
+	u32 msg_seq;
+	const char *err_str = "";
+	int ret = -EINVAL;
+
+	if (!iwpm_valid_client(nl_client)) {
+		err_str = "Invalid port mapper client";
+		goto pid_query_error;
+	}
+	if (iwpm_registered_client(nl_client))
+		return 0;
+	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REG_PID, &nlh, nl_client);
+	if (!skb) {
+		err_str = "Unable to create a nlmsg";
+		goto pid_query_error;
+	}
+	nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+	nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL);
+	if (!nlmsg_request) {
+		err_str = "Unable to allocate netlink request";
+		goto pid_query_error;
+	}
+	msg_seq = atomic_read(&echo_nlmsg_seq);
+
+	/* fill in the pid request message */
+	err_str = "Unable to put attribute of the nlmsg";
+	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ);
+	if (ret)
+		goto pid_query_error;
+	ret = ibnl_put_attr(skb, nlh, IWPM_IFNAME_SIZE,
+				pm_msg->if_name, IWPM_NLA_REG_IF_NAME);
+	if (ret)
+		goto pid_query_error;
+	ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE,
+				pm_msg->dev_name, IWPM_NLA_REG_IBDEV_NAME);
+	if (ret)
+		goto pid_query_error;
+	ret = ibnl_put_attr(skb, nlh, IWPM_ULIBNAME_SIZE,
+				(char *)iwpm_ulib_name, IWPM_NLA_REG_ULIB_NAME);
+	if (ret)
+		goto pid_query_error;
+
+	pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n",
+		__func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name);
+
+	ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
+	if (ret) {
+		skb = NULL; /* skb is freed in the netlink send-op handling */
+		iwpm_set_registered(nl_client, 1);
+		iwpm_user_pid = IWPM_PID_UNAVAILABLE;
+		err_str = "Unable to send a nlmsg";
+		goto pid_query_error;
+	}
+	nlmsg_request->req_buffer = pm_msg;
+	ret = iwpm_wait_complete_req(nlmsg_request);
+	return ret;
+pid_query_error:
+	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+	if (skb)
+		dev_kfree_skb(skb);
+	if (nlmsg_request)
+		iwpm_free_nlmsg_request(&nlmsg_request->kref);
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_register_pid);
+
+/*
+ * iwpm_add_mapping - Send a netlink add mapping message
+ *                    to the port mapper
+ * nlmsg attributes:
+ *	[IWPM_NLA_MANAGE_MAPPING_SEQ]
+ *	[IWPM_NLA_MANAGE_ADDR]
+ */
+int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
+{
+	struct sk_buff *skb = NULL;
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	struct nlmsghdr *nlh;
+	u32 msg_seq;
+	const char *err_str = "";
+	int ret = -EINVAL;
+
+	if (!iwpm_valid_client(nl_client)) {
+		err_str = "Invalid port mapper client";
+		goto add_mapping_error;
+	}
+	if (!iwpm_registered_client(nl_client)) {
+		err_str = "Unregistered port mapper client";
+		goto add_mapping_error;
+	}
+	if (!iwpm_valid_pid())
+		return 0;
+	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_ADD_MAPPING, &nlh, nl_client);
+	if (!skb) {
+		err_str = "Unable to create a nlmsg";
+		goto add_mapping_error;
+	}
+	nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+	nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL);
+	if (!nlmsg_request) {
+		err_str = "Unable to allocate netlink request";
+		goto add_mapping_error;
+	}
+	msg_seq = atomic_read(&echo_nlmsg_seq);
+	/* fill in the add mapping message */
+	err_str = "Unable to put attribute of the nlmsg";
+	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+				IWPM_NLA_MANAGE_MAPPING_SEQ);
+	if (ret)
+		goto add_mapping_error;
+	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+				&pm_msg->loc_addr, IWPM_NLA_MANAGE_ADDR);
+	if (ret)
+		goto add_mapping_error;
+	nlmsg_request->req_buffer = pm_msg;
+
+	ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+	if (ret) {
+		skb = NULL; /* skb is freed in the netlink send-op handling */
+		iwpm_user_pid = IWPM_PID_UNDEFINED;
+		err_str = "Unable to send a nlmsg";
+		goto add_mapping_error;
+	}
+	ret = iwpm_wait_complete_req(nlmsg_request);
+	return ret;
+add_mapping_error:
+	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+	if (skb)
+		dev_kfree_skb(skb);
+	if (nlmsg_request)
+		iwpm_free_nlmsg_request(&nlmsg_request->kref);
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_add_mapping);
+
+/*
+ * iwpm_add_and_query_mapping - Send a netlink add and query
+ *                              mapping message to the port mapper
+ * nlmsg attributes:
+ *	[IWPM_NLA_QUERY_MAPPING_SEQ]
+ *	[IWPM_NLA_QUERY_LOCAL_ADDR]
+ *	[IWPM_NLA_QUERY_REMOTE_ADDR]
+ */
+int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
+{
+	struct sk_buff *skb = NULL;
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	struct nlmsghdr *nlh;
+	u32 msg_seq;
+	const char *err_str = "";
+	int ret = -EINVAL;
+
+	if (!iwpm_valid_client(nl_client)) {
+		err_str = "Invalid port mapper client";
+		goto query_mapping_error;
+	}
+	if (!iwpm_registered_client(nl_client)) {
+		err_str = "Unregistered port mapper client";
+		goto query_mapping_error;
+	}
+	if (!iwpm_valid_pid())
+		return 0;
+	ret = -ENOMEM;
+	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_QUERY_MAPPING, &nlh, nl_client);
+	if (!skb) {
+		err_str = "Unable to create a nlmsg";
+		goto query_mapping_error;
+	}
+	nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+	nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq,
+				nl_client, GFP_KERNEL);
+	if (!nlmsg_request) {
+		err_str = "Unable to allocate netlink request";
+		goto query_mapping_error;
+	}
+	msg_seq = atomic_read(&echo_nlmsg_seq);
+
+	/* fill in the query message */
+	err_str = "Unable to put attribute of the nlmsg";
+	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+				IWPM_NLA_QUERY_MAPPING_SEQ);
+	if (ret)
+		goto query_mapping_error;
+	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+				&pm_msg->loc_addr, IWPM_NLA_QUERY_LOCAL_ADDR);
+	if (ret)
+		goto query_mapping_error;
+	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+				&pm_msg->rem_addr, IWPM_NLA_QUERY_REMOTE_ADDR);
+	if (ret)
+		goto query_mapping_error;
+	nlmsg_request->req_buffer = pm_msg;
+
+	ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+	if (ret) {
+		skb = NULL; /* skb is freed in the netlink send-op handling */
+		err_str = "Unable to send a nlmsg";
+		goto query_mapping_error;
+	}
+	ret = iwpm_wait_complete_req(nlmsg_request);
+	return ret;
+query_mapping_error:
+	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+	if (skb)
+		dev_kfree_skb(skb);
+	if (nlmsg_request)
+		iwpm_free_nlmsg_request(&nlmsg_request->kref);
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_add_and_query_mapping);
+
+/*
+ * iwpm_remove_mapping - Send a netlink remove mapping message
+ *                       to the port mapper
+ * nlmsg attributes:
+ *	[IWPM_NLA_MANAGE_MAPPING_SEQ]
+ *	[IWPM_NLA_MANAGE_ADDR]
+ */
+int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
+{
+	struct sk_buff *skb = NULL;
+	struct nlmsghdr *nlh;
+	u32 msg_seq;
+	const char *err_str = "";
+	int ret = -EINVAL;
+
+	if (!iwpm_valid_client(nl_client)) {
+		err_str = "Invalid port mapper client";
+		goto remove_mapping_error;
+	}
+	if (!iwpm_registered_client(nl_client)) {
+		err_str = "Unregistered port mapper client";
+		goto remove_mapping_error;
+	}
+	if (!iwpm_valid_pid())
+		return 0;
+	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REMOVE_MAPPING, &nlh, nl_client);
+	if (!skb) {
+		ret = -ENOMEM;
+		err_str = "Unable to create a nlmsg";
+		goto remove_mapping_error;
+	}
+	msg_seq = atomic_read(&echo_nlmsg_seq);
+	nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+	err_str = "Unable to put attribute of the nlmsg";
+	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+				IWPM_NLA_MANAGE_MAPPING_SEQ);
+	if (ret)
+		goto remove_mapping_error;
+	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+				local_addr, IWPM_NLA_MANAGE_ADDR);
+	if (ret)
+		goto remove_mapping_error;
+
+	ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+	if (ret) {
+		skb = NULL; /* skb is freed in the netlink send-op handling */
+		iwpm_user_pid = IWPM_PID_UNDEFINED;
+		err_str = "Unable to send a nlmsg";
+		goto remove_mapping_error;
+	}
+	iwpm_print_sockaddr(local_addr,
+			"remove_mapping: Local sockaddr:");
+	return 0;
+remove_mapping_error:
+	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+	if (skb)
+		dev_kfree_skb_any(skb);
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_remove_mapping);
+
+/* netlink attribute policy for the received response to register pid request */
+static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
+	[IWPM_NLA_RREG_PID_SEQ]     = { .type = NLA_U32 },
+	[IWPM_NLA_RREG_IBDEV_NAME]  = { .type = NLA_STRING,
+					.len = IWPM_DEVNAME_SIZE - 1 },
+	[IWPM_NLA_RREG_ULIB_NAME]   = { .type = NLA_STRING,
+					.len = IWPM_ULIBNAME_SIZE - 1 },
+	[IWPM_NLA_RREG_ULIB_VER]    = { .type = NLA_U16 },
+	[IWPM_NLA_RREG_PID_ERR]     = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_register_pid_cb - Process a port mapper response to
+ *                        iwpm_register_pid()
+ */
+int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	struct nlattr *nltb[IWPM_NLA_RREG_PID_MAX];
+	struct iwpm_dev_data *pm_msg;
+	char *dev_name, *iwpm_name;
+	u32 msg_seq;
+	u8 nl_client;
+	u16 iwpm_version;
+	const char *msg_type = "Register Pid response";
+
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_RREG_PID_MAX,
+				resp_reg_policy, nltb, msg_type))
+		return -EINVAL;
+
+	msg_seq = nla_get_u32(nltb[IWPM_NLA_RREG_PID_SEQ]);
+	nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+	if (!nlmsg_request) {
+		pr_info("%s: Could not find a matching request (seq = %u)\n",
+				 __func__, msg_seq);
+		return -EINVAL;
+	}
+	pm_msg = nlmsg_request->req_buffer;
+	nl_client = nlmsg_request->nl_client;
+	dev_name = (char *)nla_data(nltb[IWPM_NLA_RREG_IBDEV_NAME]);
+	iwpm_name = (char *)nla_data(nltb[IWPM_NLA_RREG_ULIB_NAME]);
+	iwpm_version = nla_get_u16(nltb[IWPM_NLA_RREG_ULIB_VER]);
+
+	/* check device name, ulib name and version */
+	if (strcmp(pm_msg->dev_name, dev_name) ||
+			strcmp(iwpm_ulib_name, iwpm_name) ||
+			iwpm_version != iwpm_ulib_version) {
+
+		pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n",
+				__func__, dev_name, iwpm_name, iwpm_version);
+		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+		goto register_pid_response_exit;
+	}
+	iwpm_user_pid = cb->nlh->nlmsg_pid;
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+	pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
+			__func__, iwpm_user_pid);
+	if (iwpm_valid_client(nl_client))
+		iwpm_set_registered(nl_client, 1);
+register_pid_response_exit:
+	nlmsg_request->request_done = 1;
+	/* always for found nlmsg_request */
+	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+	barrier();
+	wake_up(&nlmsg_request->waitq);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_register_pid_cb);
+
+/* netlink attribute policy for the received response to add mapping request */
+static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
+	[IWPM_NLA_MANAGE_MAPPING_SEQ]     = { .type = NLA_U32 },
+	[IWPM_NLA_MANAGE_ADDR]            = { .len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RMANAGE_MAPPING_ERR]	  = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_add_mapping_cb - Process a port mapper response to
+ *                       iwpm_add_mapping()
+ */
+int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct iwpm_sa_data *pm_msg;
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	struct nlattr *nltb[IWPM_NLA_RMANAGE_MAPPING_MAX];
+	struct sockaddr_storage *local_sockaddr;
+	struct sockaddr_storage *mapped_sockaddr;
+	const char *msg_type;
+	u32 msg_seq;
+
+	msg_type = "Add Mapping response";
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_RMANAGE_MAPPING_MAX,
+				resp_add_policy, nltb, msg_type))
+		return -EINVAL;
+
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+
+	msg_seq = nla_get_u32(nltb[IWPM_NLA_MANAGE_MAPPING_SEQ]);
+	nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+	if (!nlmsg_request) {
+		pr_info("%s: Could not find a matching request (seq = %u)\n",
+				 __func__, msg_seq);
+		return -EINVAL;
+	}
+	pm_msg = nlmsg_request->req_buffer;
+	local_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_MANAGE_ADDR]);
+	mapped_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR]);
+
+	if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr)) {
+		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+		goto add_mapping_response_exit;
+	}
+	if (mapped_sockaddr->ss_family != local_sockaddr->ss_family) {
+		pr_info("%s: Sockaddr family doesn't match the requested one\n",
+				__func__);
+		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+		goto add_mapping_response_exit;
+	}
+	memcpy(&pm_msg->mapped_loc_addr, mapped_sockaddr,
+			sizeof(*mapped_sockaddr));
+	iwpm_print_sockaddr(&pm_msg->loc_addr,
+			"add_mapping: Local sockaddr:");
+	iwpm_print_sockaddr(&pm_msg->mapped_loc_addr,
+			"add_mapping: Mapped local sockaddr:");
+
+add_mapping_response_exit:
+	nlmsg_request->request_done = 1;
+	/* always for found request */
+	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+	barrier();
+	wake_up(&nlmsg_request->waitq);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_add_mapping_cb);
+
+/* netlink attribute policy for the response to add and query mapping request */
+static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
+	[IWPM_NLA_QUERY_MAPPING_SEQ]      = { .type = NLA_U32 },
+	[IWPM_NLA_QUERY_LOCAL_ADDR]       = { .len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_QUERY_REMOTE_ADDR]      = { .len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RQUERY_MAPPING_ERR]	  = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_add_and_query_mapping_cb - Process a port mapper response to
+ *                                 iwpm_add_and_query_mapping()
+ */
+int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	struct iwpm_sa_data *pm_msg;
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX];
+	struct sockaddr_storage *local_sockaddr, *remote_sockaddr;
+	struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr;
+	const char *msg_type;
+	u32 msg_seq;
+	u16 err_code;
+
+	msg_type = "Query Mapping response";
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX,
+				resp_query_policy, nltb, msg_type))
+		return -EINVAL;
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+
+	msg_seq = nla_get_u32(nltb[IWPM_NLA_QUERY_MAPPING_SEQ]);
+	nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+	if (!nlmsg_request) {
+		pr_info("%s: Could not find a matching request (seq = %u)\n",
+				 __func__, msg_seq);
+			return -EINVAL;
+	}
+	pm_msg = nlmsg_request->req_buffer;
+	local_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
+	remote_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
+	mapped_loc_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
+	mapped_rem_sockaddr = (struct sockaddr_storage *)
+			nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]);
+
+	err_code = nla_get_u16(nltb[IWPM_NLA_RQUERY_MAPPING_ERR]);
+	if (err_code == IWPM_REMOTE_QUERY_REJECT) {
+		pr_info("%s: Received a Reject (pid = %u, echo seq = %u)\n",
+			__func__, cb->nlh->nlmsg_pid, msg_seq);
+		nlmsg_request->err_code = IWPM_REMOTE_QUERY_REJECT;
+	}
+	if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr) ||
+		iwpm_compare_sockaddr(remote_sockaddr, &pm_msg->rem_addr)) {
+		pr_info("%s: Incorrect local sockaddr\n", __func__);
+		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+		goto query_mapping_response_exit;
+	}
+	if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family ||
+		mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) {
+		pr_info("%s: Sockaddr family doesn't match the requested one\n",
+				__func__);
+		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+		goto query_mapping_response_exit;
+	}
+	memcpy(&pm_msg->mapped_loc_addr, mapped_loc_sockaddr,
+			sizeof(*mapped_loc_sockaddr));
+	memcpy(&pm_msg->mapped_rem_addr, mapped_rem_sockaddr,
+			sizeof(*mapped_rem_sockaddr));
+
+	iwpm_print_sockaddr(&pm_msg->loc_addr,
+			"query_mapping: Local sockaddr:");
+	iwpm_print_sockaddr(&pm_msg->mapped_loc_addr,
+			"query_mapping: Mapped local sockaddr:");
+	iwpm_print_sockaddr(&pm_msg->rem_addr,
+			"query_mapping: Remote sockaddr:");
+	iwpm_print_sockaddr(&pm_msg->mapped_rem_addr,
+			"query_mapping: Mapped remote sockaddr:");
+query_mapping_response_exit:
+	nlmsg_request->request_done = 1;
+	/* always for found request */
+	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+	barrier();
+	wake_up(&nlmsg_request->waitq);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
+
+/* netlink attribute policy for the received request for mapping info */
+static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
+	[IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING,
+					.len = IWPM_ULIBNAME_SIZE - 1 },
+	[IWPM_NLA_MAPINFO_ULIB_VER]  = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_mapping_info_cb - Process a port mapper request for mapping info
+ */
+int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nlattr *nltb[IWPM_NLA_MAPINFO_REQ_MAX];
+	const char *msg_type = "Mapping Info response";
+	int iwpm_pid;
+	u8 nl_client;
+	char *iwpm_name;
+	u16 iwpm_version;
+	int ret = -EINVAL;
+
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_REQ_MAX,
+				resp_mapinfo_policy, nltb, msg_type)) {
+		pr_info("%s: Unable to parse nlmsg\n", __func__);
+		return ret;
+	}
+	iwpm_name = (char *)nla_data(nltb[IWPM_NLA_MAPINFO_ULIB_NAME]);
+	iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]);
+	if (strcmp(iwpm_ulib_name, iwpm_name) ||
+			iwpm_version != iwpm_ulib_version) {
+		pr_info("%s: Invalid port mapper name = %s version = %d\n",
+				__func__, iwpm_name, iwpm_version);
+		return ret;
+	}
+	nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+	if (!iwpm_valid_client(nl_client)) {
+		pr_info("%s: Invalid port mapper client = %d\n",
+				__func__, nl_client);
+		return ret;
+	}
+	iwpm_set_registered(nl_client, 0);
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+	if (!iwpm_mapinfo_available())
+		return 0;
+	iwpm_pid = cb->nlh->nlmsg_pid;
+	pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
+		 __func__, iwpm_pid);
+	ret = iwpm_send_mapinfo(nl_client, iwpm_pid);
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_mapping_info_cb);
+
+/* netlink attribute policy for the received mapping info ack */
+static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
+	[IWPM_NLA_MAPINFO_SEQ]    =   { .type = NLA_U32 },
+	[IWPM_NLA_MAPINFO_SEND_NUM] = { .type = NLA_U32 },
+	[IWPM_NLA_MAPINFO_ACK_NUM] =  { .type = NLA_U32 }
+};
+
+/*
+ * iwpm_ack_mapping_info_cb - Process a port mapper ack for
+ *                            the provided mapping info records
+ */
+int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nlattr *nltb[IWPM_NLA_MAPINFO_NUM_MAX];
+	u32 mapinfo_send, mapinfo_ack;
+	const char *msg_type = "Mapping Info Ack";
+
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_NUM_MAX,
+				ack_mapinfo_policy, nltb, msg_type))
+		return -EINVAL;
+	mapinfo_send = nla_get_u32(nltb[IWPM_NLA_MAPINFO_SEND_NUM]);
+	mapinfo_ack = nla_get_u32(nltb[IWPM_NLA_MAPINFO_ACK_NUM]);
+	if (mapinfo_ack != mapinfo_send)
+		pr_info("%s: Invalid mapinfo number (sent = %u ack-ed = %u)\n",
+			__func__, mapinfo_send, mapinfo_ack);
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_ack_mapping_info_cb);
+
+/* netlink attribute policy for the received port mapper error message */
+static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
+	[IWPM_NLA_ERR_SEQ]        = { .type = NLA_U32 },
+	[IWPM_NLA_ERR_CODE]       = { .type = NLA_U16 },
+};
+
+/*
+ * iwpm_mapping_error_cb - Process a port mapper error message
+ */
+int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	int nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+	struct nlattr *nltb[IWPM_NLA_ERR_MAX];
+	u32 msg_seq;
+	u16 err_code;
+	const char *msg_type = "Mapping Error Msg";
+
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_ERR_MAX,
+				map_error_policy, nltb, msg_type))
+		return -EINVAL;
+
+	msg_seq = nla_get_u32(nltb[IWPM_NLA_ERR_SEQ]);
+	err_code = nla_get_u16(nltb[IWPM_NLA_ERR_CODE]);
+	pr_info("%s: Received msg seq = %u err code = %u client = %d\n",
+				__func__, msg_seq, err_code, nl_client);
+	/* look for nlmsg_request */
+	nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+	if (!nlmsg_request) {
+		/* not all errors have associated requests */
+		pr_debug("Could not find matching req (seq = %u)\n", msg_seq);
+		return 0;
+	}
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+	nlmsg_request->err_code = err_code;
+	nlmsg_request->request_done = 1;
+	/* always for found request */
+	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+	barrier();
+	wake_up(&nlmsg_request->waitq);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_mapping_error_cb);
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
new file mode 100644
index 000000000000..69e9f84c1605
--- /dev/null
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "iwpm_util.h"
+
+#define IWPM_HASH_BUCKET_SIZE	512
+#define IWPM_HASH_BUCKET_MASK	(IWPM_HASH_BUCKET_SIZE - 1)
+
+static LIST_HEAD(iwpm_nlmsg_req_list);
+static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock);
+
+static struct hlist_head *iwpm_hash_bucket;
+static DEFINE_SPINLOCK(iwpm_mapinfo_lock);
+
+static DEFINE_MUTEX(iwpm_admin_lock);
+static struct iwpm_admin_data iwpm_admin;
+
+int iwpm_init(u8 nl_client)
+{
+	if (iwpm_valid_client(nl_client))
+		return -EINVAL;
+	mutex_lock(&iwpm_admin_lock);
+	if (atomic_read(&iwpm_admin.refcount) == 0) {
+		iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE *
+					sizeof(struct hlist_head), GFP_KERNEL);
+		if (!iwpm_hash_bucket) {
+			mutex_unlock(&iwpm_admin_lock);
+			pr_err("%s Unable to create mapinfo hash table\n", __func__);
+			return -ENOMEM;
+		}
+	}
+	atomic_inc(&iwpm_admin.refcount);
+	mutex_unlock(&iwpm_admin_lock);
+	iwpm_set_valid(nl_client, 1);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_init);
+
+static void free_hash_bucket(void);
+
+int iwpm_exit(u8 nl_client)
+{
+
+	if (!iwpm_valid_client(nl_client))
+		return -EINVAL;
+	mutex_lock(&iwpm_admin_lock);
+	if (atomic_read(&iwpm_admin.refcount) == 0) {
+		mutex_unlock(&iwpm_admin_lock);
+		pr_err("%s Incorrect usage - negative refcount\n", __func__);
+		return -EINVAL;
+	}
+	if (atomic_dec_and_test(&iwpm_admin.refcount)) {
+		free_hash_bucket();
+		pr_debug("%s: Mapinfo hash table is destroyed\n", __func__);
+	}
+	mutex_unlock(&iwpm_admin_lock);
+	iwpm_set_valid(nl_client, 0);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_exit);
+
+static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *,
+					       struct sockaddr_storage *);
+
+int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
+			struct sockaddr_storage *mapped_sockaddr,
+			u8 nl_client)
+{
+	struct hlist_head *hash_bucket_head;
+	struct iwpm_mapping_info *map_info;
+	unsigned long flags;
+
+	if (!iwpm_valid_client(nl_client))
+		return -EINVAL;
+	map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
+	if (!map_info) {
+		pr_err("%s: Unable to allocate a mapping info\n", __func__);
+		return -ENOMEM;
+	}
+	memcpy(&map_info->local_sockaddr, local_sockaddr,
+	       sizeof(struct sockaddr_storage));
+	memcpy(&map_info->mapped_sockaddr, mapped_sockaddr,
+	       sizeof(struct sockaddr_storage));
+	map_info->nl_client = nl_client;
+
+	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+	if (iwpm_hash_bucket) {
+		hash_bucket_head = get_hash_bucket_head(
+					&map_info->local_sockaddr,
+					&map_info->mapped_sockaddr);
+		hlist_add_head(&map_info->hlist_node, hash_bucket_head);
+	}
+	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(iwpm_create_mapinfo);
+
+int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
+			struct sockaddr_storage *mapped_local_addr)
+{
+	struct hlist_node *tmp_hlist_node;
+	struct hlist_head *hash_bucket_head;
+	struct iwpm_mapping_info *map_info = NULL;
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+	if (iwpm_hash_bucket) {
+		hash_bucket_head = get_hash_bucket_head(
+					local_sockaddr,
+					mapped_local_addr);
+		hlist_for_each_entry_safe(map_info, tmp_hlist_node,
+					hash_bucket_head, hlist_node) {
+
+			if (!iwpm_compare_sockaddr(&map_info->mapped_sockaddr,
+						mapped_local_addr)) {
+
+				hlist_del_init(&map_info->hlist_node);
+				kfree(map_info);
+				ret = 0;
+				break;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(iwpm_remove_mapinfo);
+
+static void free_hash_bucket(void)
+{
+	struct hlist_node *tmp_hlist_node;
+	struct iwpm_mapping_info *map_info;
+	unsigned long flags;
+	int i;
+
+	/* remove all the mapinfo data from the list */
+	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+	for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+		hlist_for_each_entry_safe(map_info, tmp_hlist_node,
+			&iwpm_hash_bucket[i], hlist_node) {
+
+				hlist_del_init(&map_info->hlist_node);
+				kfree(map_info);
+			}
+	}
+	/* free the hash list */
+	kfree(iwpm_hash_bucket);
+	iwpm_hash_bucket = NULL;
+	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+}
+
+struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
+					u8 nl_client, gfp_t gfp)
+{
+	struct iwpm_nlmsg_request *nlmsg_request = NULL;
+	unsigned long flags;
+
+	nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp);
+	if (!nlmsg_request) {
+		pr_err("%s Unable to allocate a nlmsg_request\n", __func__);
+		return NULL;
+	}
+	spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+	list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list);
+	spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+
+	kref_init(&nlmsg_request->kref);
+	kref_get(&nlmsg_request->kref);
+	nlmsg_request->nlmsg_seq = nlmsg_seq;
+	nlmsg_request->nl_client = nl_client;
+	nlmsg_request->request_done = 0;
+	nlmsg_request->err_code = 0;
+	return nlmsg_request;
+}
+
+void iwpm_free_nlmsg_request(struct kref *kref)
+{
+	struct iwpm_nlmsg_request *nlmsg_request;
+	unsigned long flags;
+
+	nlmsg_request = container_of(kref, struct iwpm_nlmsg_request, kref);
+
+	spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+	list_del_init(&nlmsg_request->inprocess_list);
+	spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+
+	if (!nlmsg_request->request_done)
+		pr_debug("%s Freeing incomplete nlmsg request (seq = %u).\n",
+			__func__, nlmsg_request->nlmsg_seq);
+	kfree(nlmsg_request);
+}
+
+struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq)
+{
+	struct iwpm_nlmsg_request *nlmsg_request;
+	struct iwpm_nlmsg_request *found_request = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+	list_for_each_entry(nlmsg_request, &iwpm_nlmsg_req_list,
+			    inprocess_list) {
+		if (nlmsg_request->nlmsg_seq == echo_seq) {
+			found_request = nlmsg_request;
+			kref_get(&nlmsg_request->kref);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+	return found_request;
+}
+
+int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request)
+{
+	int ret;
+	init_waitqueue_head(&nlmsg_request->waitq);
+
+	ret = wait_event_timeout(nlmsg_request->waitq,
+			(nlmsg_request->request_done != 0), IWPM_NL_TIMEOUT);
+	if (!ret) {
+		ret = -EINVAL;
+		pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n",
+			__func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq);
+	} else {
+		ret = nlmsg_request->err_code;
+	}
+	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+	return ret;
+}
+
+int iwpm_get_nlmsg_seq(void)
+{
+	return atomic_inc_return(&iwpm_admin.nlmsg_seq);
+}
+
+int iwpm_valid_client(u8 nl_client)
+{
+	if (nl_client >= RDMA_NL_NUM_CLIENTS)
+		return 0;
+	return iwpm_admin.client_list[nl_client];
+}
+
+void iwpm_set_valid(u8 nl_client, int valid)
+{
+	if (nl_client >= RDMA_NL_NUM_CLIENTS)
+		return;
+	iwpm_admin.client_list[nl_client] = valid;
+}
+
+/* valid client */
+int iwpm_registered_client(u8 nl_client)
+{
+	return iwpm_admin.reg_list[nl_client];
+}
+
+/* valid client */
+void iwpm_set_registered(u8 nl_client, int reg)
+{
+	iwpm_admin.reg_list[nl_client] = reg;
+}
+
+int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
+				struct sockaddr_storage *b_sockaddr)
+{
+	if (a_sockaddr->ss_family != b_sockaddr->ss_family)
+		return 1;
+	if (a_sockaddr->ss_family == AF_INET) {
+		struct sockaddr_in *a4_sockaddr =
+			(struct sockaddr_in *)a_sockaddr;
+		struct sockaddr_in *b4_sockaddr =
+			(struct sockaddr_in *)b_sockaddr;
+		if (!memcmp(&a4_sockaddr->sin_addr,
+			&b4_sockaddr->sin_addr, sizeof(struct in_addr))
+			&& a4_sockaddr->sin_port == b4_sockaddr->sin_port)
+				return 0;
+
+	} else if (a_sockaddr->ss_family == AF_INET6) {
+		struct sockaddr_in6 *a6_sockaddr =
+			(struct sockaddr_in6 *)a_sockaddr;
+		struct sockaddr_in6 *b6_sockaddr =
+			(struct sockaddr_in6 *)b_sockaddr;
+		if (!memcmp(&a6_sockaddr->sin6_addr,
+			&b6_sockaddr->sin6_addr, sizeof(struct in6_addr))
+			&& a6_sockaddr->sin6_port == b6_sockaddr->sin6_port)
+				return 0;
+
+	} else {
+		pr_err("%s: Invalid sockaddr family\n", __func__);
+	}
+	return 1;
+}
+
+struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
+						int nl_client)
+{
+	struct sk_buff *skb = NULL;
+
+	skb = dev_alloc_skb(NLMSG_GOODSIZE);
+	if (!skb) {
+		pr_err("%s Unable to allocate skb\n", __func__);
+		goto create_nlmsg_exit;
+	}
+	if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op,
+			   NLM_F_REQUEST))) {
+		pr_warn("%s: Unable to put the nlmsg header\n", __func__);
+		dev_kfree_skb(skb);
+		skb = NULL;
+	}
+create_nlmsg_exit:
+	return skb;
+}
+
+int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
+				   const struct nla_policy *nlmsg_policy,
+				   struct nlattr *nltb[], const char *msg_type)
+{
+	int nlh_len = 0;
+	int ret;
+	const char *err_str = "";
+
+	ret = nlmsg_validate(cb->nlh, nlh_len, policy_max-1, nlmsg_policy);
+	if (ret) {
+		err_str = "Invalid attribute";
+		goto parse_nlmsg_error;
+	}
+	ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max-1, nlmsg_policy);
+	if (ret) {
+		err_str = "Unable to parse the nlmsg";
+		goto parse_nlmsg_error;
+	}
+	ret = iwpm_validate_nlmsg_attr(nltb, policy_max);
+	if (ret) {
+		err_str = "Invalid NULL attribute";
+		goto parse_nlmsg_error;
+	}
+	return 0;
+parse_nlmsg_error:
+	pr_warn("%s: %s (msg type %s ret = %d)\n",
+			__func__, err_str, msg_type, ret);
+	return ret;
+}
+
+void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg)
+{
+	struct sockaddr_in6 *sockaddr_v6;
+	struct sockaddr_in *sockaddr_v4;
+
+	switch (sockaddr->ss_family) {
+	case AF_INET:
+		sockaddr_v4 = (struct sockaddr_in *)sockaddr;
+		pr_debug("%s IPV4 %pI4: %u(0x%04X)\n",
+			msg, &sockaddr_v4->sin_addr,
+			ntohs(sockaddr_v4->sin_port),
+			ntohs(sockaddr_v4->sin_port));
+		break;
+	case AF_INET6:
+		sockaddr_v6 = (struct sockaddr_in6 *)sockaddr;
+		pr_debug("%s IPV6 %pI6: %u(0x%04X)\n",
+			msg, &sockaddr_v6->sin6_addr,
+			ntohs(sockaddr_v6->sin6_port),
+			ntohs(sockaddr_v6->sin6_port));
+		break;
+	default:
+		break;
+	}
+}
+
+static u32 iwpm_ipv6_jhash(struct sockaddr_in6 *ipv6_sockaddr)
+{
+	u32 ipv6_hash = jhash(&ipv6_sockaddr->sin6_addr, sizeof(struct in6_addr), 0);
+	u32 hash = jhash_2words(ipv6_hash, (__force u32) ipv6_sockaddr->sin6_port, 0);
+	return hash;
+}
+
+static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr)
+{
+	u32 ipv4_hash = jhash(&ipv4_sockaddr->sin_addr, sizeof(struct in_addr), 0);
+	u32 hash = jhash_2words(ipv4_hash, (__force u32) ipv4_sockaddr->sin_port, 0);
+	return hash;
+}
+
+static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage
+					       *local_sockaddr,
+					       struct sockaddr_storage
+					       *mapped_sockaddr)
+{
+	u32 local_hash, mapped_hash, hash;
+
+	if (local_sockaddr->ss_family == AF_INET) {
+		local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr);
+		mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr);
+
+	} else if (local_sockaddr->ss_family == AF_INET6) {
+		local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr);
+		mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr);
+	} else {
+		pr_err("%s: Invalid sockaddr family\n", __func__);
+		return NULL;
+	}
+
+	if (local_hash == mapped_hash) /* if port mapper isn't available */
+		hash = local_hash;
+	else
+		hash = jhash_2words(local_hash, mapped_hash, 0);
+
+	return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK];
+}
+
+static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
+{
+	struct sk_buff *skb = NULL;
+	struct nlmsghdr *nlh;
+	u32 msg_seq;
+	const char *err_str = "";
+	int ret = -EINVAL;
+
+	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_MAPINFO_NUM, &nlh, nl_client);
+	if (!skb) {
+		err_str = "Unable to create a nlmsg";
+		goto mapinfo_num_error;
+	}
+	nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+	msg_seq = 0;
+	err_str = "Unable to put attribute of mapinfo number nlmsg";
+	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_MAPINFO_SEQ);
+	if (ret)
+		goto mapinfo_num_error;
+	ret = ibnl_put_attr(skb, nlh, sizeof(u32),
+				&mapping_num, IWPM_NLA_MAPINFO_SEND_NUM);
+	if (ret)
+		goto mapinfo_num_error;
+	ret = ibnl_unicast(skb, nlh, iwpm_pid);
+	if (ret) {
+		skb = NULL;
+		err_str = "Unable to send a nlmsg";
+		goto mapinfo_num_error;
+	}
+	pr_debug("%s: Sent mapping number = %d\n", __func__, mapping_num);
+	return 0;
+mapinfo_num_error:
+	pr_info("%s: %s\n", __func__, err_str);
+	if (skb)
+		dev_kfree_skb(skb);
+	return ret;
+}
+
+static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
+{
+	struct nlmsghdr *nlh = NULL;
+	int ret = 0;
+
+	if (!skb)
+		return ret;
+	if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
+			   RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
+		pr_warn("%s Unable to put NLMSG_DONE\n", __func__);
+		return -ENOMEM;
+	}
+	nlh->nlmsg_type = NLMSG_DONE;
+	ret = ibnl_unicast(skb, (struct nlmsghdr *)skb->data, iwpm_pid);
+	if (ret)
+		pr_warn("%s Unable to send a nlmsg\n", __func__);
+	return ret;
+}
+
+int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
+{
+	struct iwpm_mapping_info *map_info;
+	struct sk_buff *skb = NULL;
+	struct nlmsghdr *nlh;
+	int skb_num = 0, mapping_num = 0;
+	int i = 0, nlmsg_bytes = 0;
+	unsigned long flags;
+	const char *err_str = "";
+	int ret;
+
+	skb = dev_alloc_skb(NLMSG_GOODSIZE);
+	if (!skb) {
+		ret = -ENOMEM;
+		err_str = "Unable to allocate skb";
+		goto send_mapping_info_exit;
+	}
+	skb_num++;
+	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+	for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+		hlist_for_each_entry(map_info, &iwpm_hash_bucket[i],
+				     hlist_node) {
+			if (map_info->nl_client != nl_client)
+				continue;
+			nlh = NULL;
+			if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
+					RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
+				ret = -ENOMEM;
+				err_str = "Unable to put the nlmsg header";
+				goto send_mapping_info_unlock;
+			}
+			err_str = "Unable to put attribute of the nlmsg";
+			ret = ibnl_put_attr(skb, nlh,
+					sizeof(struct sockaddr_storage),
+					&map_info->local_sockaddr,
+					IWPM_NLA_MAPINFO_LOCAL_ADDR);
+			if (ret)
+				goto send_mapping_info_unlock;
+
+			ret = ibnl_put_attr(skb, nlh,
+					sizeof(struct sockaddr_storage),
+					&map_info->mapped_sockaddr,
+					IWPM_NLA_MAPINFO_MAPPED_ADDR);
+			if (ret)
+				goto send_mapping_info_unlock;
+
+			iwpm_print_sockaddr(&map_info->local_sockaddr,
+				"send_mapping_info: Local sockaddr:");
+			iwpm_print_sockaddr(&map_info->mapped_sockaddr,
+				"send_mapping_info: Mapped local sockaddr:");
+			mapping_num++;
+			nlmsg_bytes += nlh->nlmsg_len;
+
+			/* check if all mappings can fit in one skb */
+			if (NLMSG_GOODSIZE - nlmsg_bytes < nlh->nlmsg_len * 2) {
+				/* and leave room for NLMSG_DONE */
+				nlmsg_bytes = 0;
+				skb_num++;
+				spin_unlock_irqrestore(&iwpm_mapinfo_lock,
+						       flags);
+				/* send the skb */
+				ret = send_nlmsg_done(skb, nl_client, iwpm_pid);
+				skb = NULL;
+				if (ret) {
+					err_str = "Unable to send map info";
+					goto send_mapping_info_exit;
+				}
+				if (skb_num == IWPM_MAPINFO_SKB_COUNT) {
+					ret = -ENOMEM;
+					err_str = "Insufficient skbs for map info";
+					goto send_mapping_info_exit;
+				}
+				skb = dev_alloc_skb(NLMSG_GOODSIZE);
+				if (!skb) {
+					ret = -ENOMEM;
+					err_str = "Unable to allocate skb";
+					goto send_mapping_info_exit;
+				}
+				spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+			}
+		}
+	}
+send_mapping_info_unlock:
+	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+send_mapping_info_exit:
+	if (ret) {
+		pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret);
+		if (skb)
+			dev_kfree_skb(skb);
+		return ret;
+	}
+	send_nlmsg_done(skb, nl_client, iwpm_pid);
+	return send_mapinfo_num(mapping_num, nl_client, iwpm_pid);
+}
+
+int iwpm_mapinfo_available(void)
+{
+	unsigned long flags;
+	int full_bucket = 0, i = 0;
+
+	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+	if (iwpm_hash_bucket) {
+		for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+			if (!hlist_empty(&iwpm_hash_bucket[i])) {
+				full_bucket = 1;
+				break;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+	return full_bucket;
+}
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
new file mode 100644
index 000000000000..9777c869a140
--- /dev/null
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _IWPM_UTIL_H
+#define _IWPM_UTIL_H
+
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <linux/jhash.h>
+#include <linux/kref.h>
+#include <net/netlink.h>
+#include <linux/errno.h>
+#include <rdma/iw_portmap.h>
+#include <rdma/rdma_netlink.h>
+
+
+#define IWPM_NL_RETRANS		3
+#define IWPM_NL_TIMEOUT		(10*HZ)
+#define IWPM_MAPINFO_SKB_COUNT	20
+
+#define IWPM_PID_UNDEFINED     -1
+#define IWPM_PID_UNAVAILABLE   -2
+
+struct iwpm_nlmsg_request {
+	struct list_head    inprocess_list;
+	__u32               nlmsg_seq;
+	void                *req_buffer;
+	u8	            nl_client;
+	u8                  request_done;
+	u16                 err_code;
+	wait_queue_head_t   waitq;
+	struct kref         kref;
+};
+
+struct iwpm_mapping_info {
+	struct hlist_node hlist_node;
+	struct sockaddr_storage local_sockaddr;
+	struct sockaddr_storage mapped_sockaddr;
+	u8     nl_client;
+};
+
+struct iwpm_admin_data {
+	atomic_t refcount;
+	atomic_t nlmsg_seq;
+	int      client_list[RDMA_NL_NUM_CLIENTS];
+	int      reg_list[RDMA_NL_NUM_CLIENTS];
+};
+
+/**
+ * iwpm_get_nlmsg_request - Allocate and initialize netlink message request
+ * @nlmsg_seq: Sequence number of the netlink message
+ * @nl_client: The index of the netlink client
+ * @gfp: Indicates how the memory for the request should be allocated
+ *
+ * Returns the newly allocated netlink request object if successful,
+ * otherwise returns NULL
+ */
+struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
+						u8 nl_client, gfp_t gfp);
+
+/**
+ * iwpm_free_nlmsg_request - Deallocate netlink message request
+ * @kref: Holds reference of netlink message request
+ */
+void iwpm_free_nlmsg_request(struct kref *kref);
+
+/**
+ * iwpm_find_nlmsg_request - Find netlink message request in the request list
+ * @echo_seq: Sequence number of the netlink request to find
+ *
+ * Returns the found netlink message request,
+ * if not found, returns NULL
+ */
+struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq);
+
+/**
+ * iwpm_wait_complete_req - Block while servicing the netlink request
+ * @nlmsg_request: Netlink message request to service
+ *
+ * Wakes up, after the request is completed or expired
+ * Returns 0 if the request is complete without error
+ */
+int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request);
+
+/**
+ * iwpm_get_nlmsg_seq - Get the sequence number for a netlink
+ *			message to send to the port mapper
+ *
+ * Returns the sequence number for the netlink message.
+ */
+int iwpm_get_nlmsg_seq(void);
+
+/**
+ * iwpm_valid_client - Check if the port mapper client is valid
+ * @nl_client: The index of the netlink client
+ *
+ * Valid clients need to call iwpm_init() before using
+ * the port mapper
+ */
+int iwpm_valid_client(u8 nl_client);
+
+/**
+ * iwpm_set_valid - Set the port mapper client to valid or not
+ * @nl_client: The index of the netlink client
+ * @valid: 1 if valid or 0 if invalid
+ */
+void iwpm_set_valid(u8 nl_client, int valid);
+
+/**
+ * iwpm_registered_client - Check if the port mapper client is registered
+ * @nl_client: The index of the netlink client
+ *
+ * Call iwpm_register_pid() to register a client
+ */
+int iwpm_registered_client(u8 nl_client);
+
+/**
+ * iwpm_set_registered - Set the port mapper client to registered or not
+ * @nl_client: The index of the netlink client
+ * @reg: 1 if registered or 0 if not
+ */
+void iwpm_set_registered(u8 nl_client, int reg);
+
+/**
+ * iwpm_send_mapinfo - Send local and mapped IPv4/IPv6 address info of
+ *                     a client to the user space port mapper
+ * @nl_client: The index of the netlink client
+ * @iwpm_pid: The pid of the user space port mapper
+ *
+ * If successful, returns the number of sent mapping info records
+ */
+int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid);
+
+/**
+ * iwpm_mapinfo_available - Check if any mapping info records is available
+ *		            in the hash table
+ *
+ * Returns 1 if mapping information is available, otherwise returns 0
+ */
+int iwpm_mapinfo_available(void);
+
+/**
+ * iwpm_compare_sockaddr - Compare two sockaddr storage structs
+ *
+ * Returns 0 if they are holding the same ip/tcp address info,
+ * otherwise returns 1
+ */
+int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
+			struct sockaddr_storage *b_sockaddr);
+
+/**
+ * iwpm_validate_nlmsg_attr - Check for NULL netlink attributes
+ * @nltb: Holds address of each netlink message attributes
+ * @nla_count: Number of netlink message attributes
+ *
+ * Returns error if any of the nla_count attributes is NULL
+ */
+static inline int iwpm_validate_nlmsg_attr(struct nlattr *nltb[],
+					   int nla_count)
+{
+	int i;
+	for (i = 1; i < nla_count; i++) {
+		if (!nltb[i])
+			return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * iwpm_create_nlmsg - Allocate skb and form a netlink message
+ * @nl_op: Netlink message opcode
+ * @nlh: Holds address of the netlink message header in skb
+ * @nl_client: The index of the netlink client
+ *
+ * Returns the newly allcated skb, or NULL if the tailroom of the skb
+ * is insufficient to store the message header and payload
+ */
+struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
+					int nl_client);
+
+/**
+ * iwpm_parse_nlmsg - Validate and parse the received netlink message
+ * @cb: Netlink callback structure
+ * @policy_max: Maximum attribute type to be expected
+ * @nlmsg_policy: Validation policy
+ * @nltb: Array to store policy_max parsed elements
+ * @msg_type: Type of netlink message
+ *
+ * Returns 0 on success or a negative error code
+ */
+int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
+				const struct nla_policy *nlmsg_policy,
+				struct nlattr *nltb[], const char *msg_type);
+
+/**
+ * iwpm_print_sockaddr - Print IPv4/IPv6 address and TCP port
+ * @sockaddr: Socket address to print
+ * @msg: Message to print
+ */
+void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg);
+#endif
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index a1e9cba84944..23dd5a5c7597 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -103,13 +103,13 @@ int ibnl_remove_client(int index)
 EXPORT_SYMBOL(ibnl_remove_client);
 
 void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
-		   int len, int client, int op)
+		   int len, int client, int op, int flags)
 {
 	unsigned char *prev_tail;
 
 	prev_tail = skb_tail_pointer(skb);
 	*nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
-			 len, NLM_F_MULTI);
+			 len, flags);
 	if (!*nlh)
 		goto out_nlmsg_trim;
 	(*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
@@ -172,6 +172,20 @@ static void ibnl_rcv(struct sk_buff *skb)
 	mutex_unlock(&ibnl_mutex);
 }
 
+int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+			__u32 pid)
+{
+	return nlmsg_unicast(nls, skb, pid);
+}
+EXPORT_SYMBOL(ibnl_unicast);
+
+int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+			unsigned int group, gfp_t flags)
+{
+	return nlmsg_multicast(nls, skb, 0, group, flags);
+}
+EXPORT_SYMBOL(ibnl_multicast);
+
 int __init ibnl_init(void)
 {
 	struct netlink_kernel_cfg cfg = {
diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h
new file mode 100644
index 000000000000..928b2775e992
--- /dev/null
+++ b/include/rdma/iw_portmap.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _IW_PORTMAP_H
+#define _IW_PORTMAP_H
+
+#define IWPM_ULIBNAME_SIZE	32
+#define IWPM_DEVNAME_SIZE	32
+#define IWPM_IFNAME_SIZE	16
+#define IWPM_IPADDR_SIZE	16
+
+enum {
+	IWPM_INVALID_NLMSG_ERR = 10,
+	IWPM_CREATE_MAPPING_ERR,
+	IWPM_DUPLICATE_MAPPING_ERR,
+	IWPM_UNKNOWN_MAPPING_ERR,
+	IWPM_CLIENT_DEV_INFO_ERR,
+	IWPM_USER_LIB_INFO_ERR,
+	IWPM_REMOTE_QUERY_REJECT
+};
+
+struct iwpm_dev_data {
+	char dev_name[IWPM_DEVNAME_SIZE];
+	char if_name[IWPM_IFNAME_SIZE];
+};
+
+struct iwpm_sa_data {
+	struct sockaddr_storage loc_addr;
+	struct sockaddr_storage mapped_loc_addr;
+	struct sockaddr_storage rem_addr;
+	struct sockaddr_storage mapped_rem_addr;
+};
+
+/**
+ * iwpm_init - Allocate resources for the iwarp port mapper
+ *
+ * Should be called when network interface goes up.
+ */
+int iwpm_init(u8);
+
+/**
+ * iwpm_exit - Deallocate resources for the iwarp port mapper
+ *
+ * Should be called when network interface goes down.
+ */
+int iwpm_exit(u8);
+
+/**
+ * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid
+ *
+ * Returns true if the pid is greater than zero, otherwise returns false
+ */
+int iwpm_valid_pid(void);
+
+/**
+ * iwpm_register_pid - Send a netlink query to userspace
+ *                     to get the iwarp port mapper pid
+ * @pm_msg: Contains driver info to send to the userspace port mapper
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_add_mapping - Send a netlink add mapping request to
+ *                    the userspace port mapper
+ * @pm_msg: Contains the local ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
+ * If the request is successful, the pm_msg stores
+ * the port mapper response (mapped address info)
+ */
+int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_add_and_query_mapping - Send a netlink add and query mapping request
+ *				 to the userspace port mapper
+ * @pm_msg: Contains the local and remote ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
+ * If the request is successful, the pm_msg stores the
+ * port mapper response (mapped local and remote address info)
+ */
+int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
+
+/**
+ * iwpm_remove_mapping - Send a netlink remove mapping request
+ *                       to the userspace port mapper
+ *
+ * @local_addr: Local ip/tcp address to remove
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client);
+
+/**
+ * iwpm_register_pid_cb - Process the port mapper response to
+ *                        iwpm_register_pid query
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * If successful, the function receives the userspace port mapper pid
+ * which is used in future communication with the port mapper
+ */
+int iwpm_register_pid_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_add_mapping_cb - Process the port mapper response to
+ *                       iwpm_add_mapping request
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_add_and_query_mapping_cb - Process the port mapper response to
+ *                                 iwpm_add_and_query_mapping request
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_mapping_error_cb - Process port mapper notification for error
+ *
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_mapping_error_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_mapping_info_cb - Process a notification that the userspace
+ *                        port mapper daemon is started
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Using the received port mapper pid, send all the local mapping
+ * info records to the userspace port mapper
+ */
+int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_ack_mapping_info_cb - Process the port mapper ack for
+ *                            the provided local mapping info records
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ */
+int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
+
+/**
+ * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
+ *                       info in a hash table
+ * @local_addr: Local ip/tcp address
+ * @mapped_addr: Mapped local ip/tcp address
+ * @nl_client: The index of the netlink client
+ */
+int iwpm_create_mapinfo(struct sockaddr_storage *local_addr,
+			struct sockaddr_storage *mapped_addr, u8 nl_client);
+
+/**
+ * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
+ *                       info from the hash table
+ * @local_addr: Local ip/tcp address
+ * @mapped_addr: Mapped local ip/tcp address
+ *
+ * Returns err code if mapping info is not found in the hash table,
+ * otherwise returns 0
+ */
+int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr,
+			struct sockaddr_storage *mapped_addr);
+
+#endif /* _IW_PORTMAP_H */
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index e38de79eeb48..0790882e0c9b 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -43,7 +43,7 @@ int ibnl_remove_client(int index);
  * Returns the allocated buffer on success and NULL on failure.
  */
 void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
-		   int len, int client, int op);
+		   int len, int client, int op, int flags);
 /**
  * Put a new attribute in a supplied skb.
  * @skb: The netlink skb.
@@ -56,4 +56,25 @@ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
 int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
 		  int len, void *data, int type);
 
+/**
+ * Send the supplied skb to a specific userspace PID.
+ * @skb: The netlink skb
+ * @nlh: Header of the netlink message to send
+ * @pid: Userspace netlink process ID
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+			__u32 pid);
+
+/**
+ * Send the supplied skb to a netlink group.
+ * @skb: The netlink skb
+ * @nlh: Header of the netlink message to send
+ * @group: Netlink group ID
+ * @flags: allocation flags
+ * Returns 0 on success or a negative error code.
+ */
+int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+			unsigned int group, gfp_t flags);
+
 #endif /* _RDMA_NETLINK_H */
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 8297285b6288..de69170a30ce 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -4,7 +4,16 @@
 #include <linux/types.h>
 
 enum {
-	RDMA_NL_RDMA_CM = 1
+	RDMA_NL_RDMA_CM = 1,
+	RDMA_NL_NES,
+	RDMA_NL_C4IW,
+	RDMA_NL_NUM_CLIENTS
+};
+
+enum {
+	RDMA_NL_GROUP_CM = 1,
+	RDMA_NL_GROUP_IWPM,
+	RDMA_NL_NUM_GROUPS
 };
 
 #define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
@@ -22,6 +31,18 @@ enum {
 	RDMA_NL_RDMA_CM_NUM_ATTR,
 };
 
+/* iwarp port mapper op-codes */
+enum {
+	RDMA_NL_IWPM_REG_PID = 0,
+	RDMA_NL_IWPM_ADD_MAPPING,
+	RDMA_NL_IWPM_QUERY_MAPPING,
+	RDMA_NL_IWPM_REMOVE_MAPPING,
+	RDMA_NL_IWPM_HANDLE_ERR,
+	RDMA_NL_IWPM_MAPINFO,
+	RDMA_NL_IWPM_MAPINFO_NUM,
+	RDMA_NL_IWPM_NUM_OPS
+};
+
 struct rdma_cm_id_stats {
 	__u32	qp_num;
 	__u32	bound_dev_if;
@@ -33,5 +54,78 @@ struct rdma_cm_id_stats {
 	__u8	qp_type;
 };
 
+enum {
+	IWPM_NLA_REG_PID_UNSPEC = 0,
+	IWPM_NLA_REG_PID_SEQ,
+	IWPM_NLA_REG_IF_NAME,
+	IWPM_NLA_REG_IBDEV_NAME,
+	IWPM_NLA_REG_ULIB_NAME,
+	IWPM_NLA_REG_PID_MAX
+};
+
+enum {
+	IWPM_NLA_RREG_PID_UNSPEC = 0,
+	IWPM_NLA_RREG_PID_SEQ,
+	IWPM_NLA_RREG_IBDEV_NAME,
+	IWPM_NLA_RREG_ULIB_NAME,
+	IWPM_NLA_RREG_ULIB_VER,
+	IWPM_NLA_RREG_PID_ERR,
+	IWPM_NLA_RREG_PID_MAX
+
+};
+
+enum {
+	IWPM_NLA_MANAGE_MAPPING_UNSPEC = 0,
+	IWPM_NLA_MANAGE_MAPPING_SEQ,
+	IWPM_NLA_MANAGE_ADDR,
+	IWPM_NLA_MANAGE_MAPPED_LOC_ADDR,
+	IWPM_NLA_RMANAGE_MAPPING_ERR,
+	IWPM_NLA_RMANAGE_MAPPING_MAX
+};
+
+#define IWPM_NLA_MANAGE_MAPPING_MAX 3
+#define IWPM_NLA_QUERY_MAPPING_MAX  4
+#define IWPM_NLA_MAPINFO_SEND_MAX   3
+
+enum {
+	IWPM_NLA_QUERY_MAPPING_UNSPEC = 0,
+	IWPM_NLA_QUERY_MAPPING_SEQ,
+	IWPM_NLA_QUERY_LOCAL_ADDR,
+	IWPM_NLA_QUERY_REMOTE_ADDR,
+	IWPM_NLA_RQUERY_MAPPED_LOC_ADDR,
+	IWPM_NLA_RQUERY_MAPPED_REM_ADDR,
+	IWPM_NLA_RQUERY_MAPPING_ERR,
+	IWPM_NLA_RQUERY_MAPPING_MAX
+};
+
+enum {
+	IWPM_NLA_MAPINFO_REQ_UNSPEC = 0,
+	IWPM_NLA_MAPINFO_ULIB_NAME,
+	IWPM_NLA_MAPINFO_ULIB_VER,
+	IWPM_NLA_MAPINFO_REQ_MAX
+};
+
+enum {
+	IWPM_NLA_MAPINFO_UNSPEC = 0,
+	IWPM_NLA_MAPINFO_LOCAL_ADDR,
+	IWPM_NLA_MAPINFO_MAPPED_ADDR,
+	IWPM_NLA_MAPINFO_MAX
+};
+
+enum {
+	IWPM_NLA_MAPINFO_NUM_UNSPEC = 0,
+	IWPM_NLA_MAPINFO_SEQ,
+	IWPM_NLA_MAPINFO_SEND_NUM,
+	IWPM_NLA_MAPINFO_ACK_NUM,
+	IWPM_NLA_MAPINFO_NUM_MAX
+};
+
+enum {
+	IWPM_NLA_ERR_UNSPEC = 0,
+	IWPM_NLA_ERR_SEQ,
+	IWPM_NLA_ERR_CODE,
+	IWPM_NLA_ERR_MAX
+};
+
 
 #endif /* _UAPI_RDMA_NETLINK_H */
-- 
cgit v1.2.3-71-gd317


From b5097e956a4d2919ee248d6481e4204c5568ed5c Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Tue, 10 Jun 2014 20:04:50 +0200
Subject: block: add __init to elv_register

elv_register is only called by elevator init functions:

__init cfq_init
__init deadline_init
__init noop_init

Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/elevator.c         | 2 +-
 include/linux/elevator.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/block/elevator.c b/block/elevator.c
index 1e01b66a0b92..f35edddfe9b5 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -845,7 +845,7 @@ void elv_unregister_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(elv_unregister_queue);
 
-int elv_register(struct elevator_type *e)
+int __init elv_register(struct elevator_type *e)
 {
 	char *def = "";
 
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index df63bd3a8cf1..4ff262e2bf37 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -144,7 +144,7 @@ extern void elv_drain_elevator(struct request_queue *);
  * io scheduler registration
  */
 extern void __init load_default_elevator_module(void);
-extern int elv_register(struct elevator_type *);
+extern int __init elv_register(struct elevator_type *);
 extern void elv_unregister(struct elevator_type *);
 
 /*
-- 
cgit v1.2.3-71-gd317


From 87a1ef8058d9ab26bc289ea4f27bc3c11ce9acb8 Mon Sep 17 00:00:00 2001
From: David Cohen <david.a.cohen@linux.intel.com>
Date: Tue, 15 Apr 2014 13:06:05 -0700
Subject: watchdog: add Intel MID watchdog driver support

Add initial Intel MID watchdog driver support.

This driver is an initial implementation of generic Intel MID watchdog
driver. Currently it supports Intel Merrifield platform.

Signed-off-by: Eric Ernst <eric.ernst@intel.com>
Signed-off-by: David Cohen <david.a.cohen@linux.intel.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/Kconfig                    |  13 ++
 drivers/watchdog/Makefile                   |   1 +
 drivers/watchdog/intel-mid_wdt.c            | 184 ++++++++++++++++++++++++++++
 include/linux/platform_data/intel-mid_wdt.h |  22 ++++
 4 files changed, 220 insertions(+)
 create mode 100644 drivers/watchdog/intel-mid_wdt.c
 create mode 100644 include/linux/platform_data/intel-mid_wdt.h

(limited to 'include')

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index cbd5ac7b8832..c845527b503a 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -665,6 +665,19 @@ config INTEL_SCU_WATCHDOG
 
 	  To compile this driver as a module, choose M here.
 
+config INTEL_MID_WATCHDOG
+	tristate "Intel MID Watchdog Timer"
+	depends on X86_INTEL_MID
+	select WATCHDOG_CORE
+	---help---
+	  Watchdog timer driver built into the Intel SCU for Intel MID
+	  Platforms.
+
+	  This driver currently supports only the watchdog evolution
+	  implementation in SCU, available for Merrifield generation.
+
+	  To compile this driver as a module, choose M here.
+
 config ITCO_WDT
 	tristate "Intel TCO Timer/Watchdog"
 	depends on (X86 || IA64) && PCI
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 1384531eaa45..7b8a91ed20e7 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -112,6 +112,7 @@ obj-$(CONFIG_W83977F_WDT) += w83977f_wdt.o
 obj-$(CONFIG_MACHZ_WDT) += machzwd.o
 obj-$(CONFIG_SBC_EPX_C3_WATCHDOG) += sbc_epx_c3.o
 obj-$(CONFIG_INTEL_SCU_WATCHDOG) += intel_scu_watchdog.o
+obj-$(CONFIG_INTEL_MID_WATCHDOG) += intel-mid_wdt.o
 
 # M32R Architecture
 
diff --git a/drivers/watchdog/intel-mid_wdt.c b/drivers/watchdog/intel-mid_wdt.c
new file mode 100644
index 000000000000..ca66e8e74635
--- /dev/null
+++ b/drivers/watchdog/intel-mid_wdt.c
@@ -0,0 +1,184 @@
+/*
+ *      intel-mid_wdt: generic Intel MID SCU watchdog driver
+ *
+ *      Platforms supported so far:
+ *      - Merrifield only
+ *
+ *      Copyright (C) 2014 Intel Corporation. All rights reserved.
+ *      Contact: David Cohen <david.a.cohen@linux.intel.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of version 2 of the GNU General
+ *      Public License as published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/nmi.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+#include <linux/platform_data/intel-mid_wdt.h>
+
+#include <asm/intel_scu_ipc.h>
+#include <asm/intel-mid.h>
+
+#define IPC_WATCHDOG 0xf8
+
+#define MID_WDT_PRETIMEOUT		15
+#define MID_WDT_TIMEOUT_MIN		(1 + MID_WDT_PRETIMEOUT)
+#define MID_WDT_TIMEOUT_MAX		170
+#define MID_WDT_DEFAULT_TIMEOUT		90
+
+/* SCU watchdog messages */
+enum {
+	SCU_WATCHDOG_START = 0,
+	SCU_WATCHDOG_STOP,
+	SCU_WATCHDOG_KEEPALIVE,
+};
+
+static inline int wdt_command(int sub, u32 *in, int inlen)
+{
+	return intel_scu_ipc_command(IPC_WATCHDOG, sub, in, inlen, NULL, 0);
+}
+
+static int wdt_start(struct watchdog_device *wd)
+{
+	int ret, in_size;
+	int timeout = wd->timeout;
+	struct ipc_wd_start {
+		u32 pretimeout;
+		u32 timeout;
+	} ipc_wd_start = { timeout - MID_WDT_PRETIMEOUT, timeout };
+
+	/*
+	 * SCU expects the input size for watchdog IPC to
+	 * be based on 4 bytes
+	 */
+	in_size = DIV_ROUND_UP(sizeof(ipc_wd_start), 4);
+
+	ret = wdt_command(SCU_WATCHDOG_START, (u32 *)&ipc_wd_start, in_size);
+	if (ret) {
+		struct device *dev = watchdog_get_drvdata(wd);
+		dev_crit(dev, "error starting watchdog: %d\n", ret);
+	}
+
+	return ret;
+}
+
+static int wdt_ping(struct watchdog_device *wd)
+{
+	int ret;
+
+	ret = wdt_command(SCU_WATCHDOG_KEEPALIVE, NULL, 0);
+	if (ret) {
+		struct device *dev = watchdog_get_drvdata(wd);
+		dev_crit(dev, "Error executing keepalive: 0x%x\n", ret);
+	}
+
+	return ret;
+}
+
+static int wdt_stop(struct watchdog_device *wd)
+{
+	int ret;
+
+	ret = wdt_command(SCU_WATCHDOG_STOP, NULL, 0);
+	if (ret) {
+		struct device *dev = watchdog_get_drvdata(wd);
+		dev_crit(dev, "Error stopping watchdog: 0x%x\n", ret);
+	}
+
+	return ret;
+}
+
+static irqreturn_t mid_wdt_irq(int irq, void *dev_id)
+{
+	panic("Kernel Watchdog");
+
+	/* This code should not be reached */
+	return IRQ_HANDLED;
+}
+
+static const struct watchdog_info mid_wdt_info = {
+	.identity = "Intel MID SCU watchdog",
+	.options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT,
+};
+
+static const struct watchdog_ops mid_wdt_ops = {
+	.owner = THIS_MODULE,
+	.start = wdt_start,
+	.stop = wdt_stop,
+	.ping = wdt_ping,
+};
+
+static int mid_wdt_probe(struct platform_device *pdev)
+{
+	struct watchdog_device *wdt_dev;
+	struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
+	int ret;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "missing platform data\n");
+		return -EINVAL;
+	}
+
+	if (pdata->probe) {
+		ret = pdata->probe(pdev);
+		if (ret)
+			return ret;
+	}
+
+	wdt_dev = devm_kzalloc(&pdev->dev, sizeof(*wdt_dev), GFP_KERNEL);
+	if (!wdt_dev)
+		return -ENOMEM;
+
+	wdt_dev->info = &mid_wdt_info;
+	wdt_dev->ops = &mid_wdt_ops;
+	wdt_dev->min_timeout = MID_WDT_TIMEOUT_MIN;
+	wdt_dev->max_timeout = MID_WDT_TIMEOUT_MAX;
+	wdt_dev->timeout = MID_WDT_DEFAULT_TIMEOUT;
+
+	watchdog_set_drvdata(wdt_dev, &pdev->dev);
+	platform_set_drvdata(pdev, wdt_dev);
+
+	ret = devm_request_irq(&pdev->dev, pdata->irq, mid_wdt_irq,
+			       IRQF_SHARED | IRQF_NO_SUSPEND, "watchdog",
+			       wdt_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "error requesting warning irq %d\n",
+			pdata->irq);
+		return ret;
+	}
+
+	ret = watchdog_register_device(wdt_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "error registering watchdog device\n");
+		return ret;
+	}
+
+	dev_info(&pdev->dev, "Intel MID watchdog device probed\n");
+
+	return 0;
+}
+
+static int mid_wdt_remove(struct platform_device *pdev)
+{
+	struct watchdog_device *wd = platform_get_drvdata(pdev);
+	watchdog_unregister_device(wd);
+	return 0;
+}
+
+static struct platform_driver mid_wdt_driver = {
+	.probe		= mid_wdt_probe,
+	.remove		= mid_wdt_remove,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "intel_mid_wdt",
+	},
+};
+
+module_platform_driver(mid_wdt_driver);
+
+MODULE_AUTHOR("David Cohen <david.a.cohen@linux.intel.com>");
+MODULE_DESCRIPTION("Watchdog Driver for Intel MID platform");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/intel-mid_wdt.h b/include/linux/platform_data/intel-mid_wdt.h
new file mode 100644
index 000000000000..b98253466ace
--- /dev/null
+++ b/include/linux/platform_data/intel-mid_wdt.h
@@ -0,0 +1,22 @@
+/*
+ *      intel-mid_wdt: generic Intel MID SCU watchdog driver
+ *
+ *      Copyright (C) 2014 Intel Corporation. All rights reserved.
+ *      Contact: David Cohen <david.a.cohen@linux.intel.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of version 2 of the GNU General
+ *      Public License as published by the Free Software Foundation.
+ */
+
+#ifndef __INTEL_MID_WDT_H__
+#define __INTEL_MID_WDT_H__
+
+#include <linux/platform_device.h>
+
+struct intel_mid_wdt_pdata {
+	int irq;
+	int (*probe)(struct platform_device *pdev);
+};
+
+#endif /*__INTEL_MID_WDT_H__*/
-- 
cgit v1.2.3-71-gd317


From 23adbe12ef7d3d4195e80800ab36b37bee28cd03 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Tue, 10 Jun 2014 12:45:42 -0700
Subject: fs,userns: Change inode_capable to capable_wrt_inode_uidgid

The kernel has no concept of capabilities with respect to inodes; inodes
exist independently of namespaces.  For example, inode_capable(inode,
CAP_LINUX_IMMUTABLE) would be nonsense.

This patch changes inode_capable to check for uid and gid mappings and
renames it to capable_wrt_inode_uidgid, which should make it more
obvious what it does.

Fixes CVE-2014-4014.

Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Serge Hallyn <serge.hallyn@ubuntu.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: stable@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/attr.c                  |  8 ++++----
 fs/inode.c                 | 10 +++++++---
 fs/namei.c                 | 11 ++++++-----
 fs/xfs/xfs_ioctl.c         |  2 +-
 include/linux/capability.h |  2 +-
 kernel/capability.c        | 20 ++++++++------------
 6 files changed, 27 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/fs/attr.c b/fs/attr.c
index 5d4e59d56e85..6530ced19697 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -50,14 +50,14 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
 	if ((ia_valid & ATTR_UID) &&
 	    (!uid_eq(current_fsuid(), inode->i_uid) ||
 	     !uid_eq(attr->ia_uid, inode->i_uid)) &&
-	    !inode_capable(inode, CAP_CHOWN))
+	    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
 		return -EPERM;
 
 	/* Make sure caller can chgrp. */
 	if ((ia_valid & ATTR_GID) &&
 	    (!uid_eq(current_fsuid(), inode->i_uid) ||
 	    (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
-	    !inode_capable(inode, CAP_CHOWN))
+	    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
 		return -EPERM;
 
 	/* Make sure a caller can chmod. */
@@ -67,7 +67,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
 		/* Also check the setgid bit! */
 		if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
 				inode->i_gid) &&
-		    !inode_capable(inode, CAP_FSETID))
+		    !capable_wrt_inode_uidgid(inode, CAP_FSETID))
 			attr->ia_mode &= ~S_ISGID;
 	}
 
@@ -160,7 +160,7 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
 		umode_t mode = attr->ia_mode;
 
 		if (!in_group_p(inode->i_gid) &&
-		    !inode_capable(inode, CAP_FSETID))
+		    !capable_wrt_inode_uidgid(inode, CAP_FSETID))
 			mode &= ~S_ISGID;
 		inode->i_mode = mode;
 	}
diff --git a/fs/inode.c b/fs/inode.c
index 2feb9b69f1be..6eecb7ff0b9a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1839,14 +1839,18 @@ EXPORT_SYMBOL(inode_init_owner);
  * inode_owner_or_capable - check current task permissions to inode
  * @inode: inode being checked
  *
- * Return true if current either has CAP_FOWNER to the inode, or
- * owns the file.
+ * Return true if current either has CAP_FOWNER in a namespace with the
+ * inode owner uid mapped, or owns the file.
  */
 bool inode_owner_or_capable(const struct inode *inode)
 {
+	struct user_namespace *ns;
+
 	if (uid_eq(current_fsuid(), inode->i_uid))
 		return true;
-	if (inode_capable(inode, CAP_FOWNER))
+
+	ns = current_user_ns();
+	if (ns_capable(ns, CAP_FOWNER) && kuid_has_mapping(ns, inode->i_uid))
 		return true;
 	return false;
 }
diff --git a/fs/namei.c b/fs/namei.c
index 80168273396b..985c6f368485 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -332,10 +332,11 @@ int generic_permission(struct inode *inode, int mask)
 
 	if (S_ISDIR(inode->i_mode)) {
 		/* DACs are overridable for directories */
-		if (inode_capable(inode, CAP_DAC_OVERRIDE))
+		if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
 			return 0;
 		if (!(mask & MAY_WRITE))
-			if (inode_capable(inode, CAP_DAC_READ_SEARCH))
+			if (capable_wrt_inode_uidgid(inode,
+						     CAP_DAC_READ_SEARCH))
 				return 0;
 		return -EACCES;
 	}
@@ -345,7 +346,7 @@ int generic_permission(struct inode *inode, int mask)
 	 * at least one exec bit set.
 	 */
 	if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
-		if (inode_capable(inode, CAP_DAC_OVERRIDE))
+		if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
 			return 0;
 
 	/*
@@ -353,7 +354,7 @@ int generic_permission(struct inode *inode, int mask)
 	 */
 	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
 	if (mask == MAY_READ)
-		if (inode_capable(inode, CAP_DAC_READ_SEARCH))
+		if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH))
 			return 0;
 
 	return -EACCES;
@@ -2379,7 +2380,7 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
 		return 0;
 	if (uid_eq(dir->i_uid, fsuid))
 		return 0;
-	return !inode_capable(inode, CAP_FOWNER);
+	return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
 }
 
 /*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0b18776b075e..6152cbe353e8 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1215,7 +1215,7 @@ xfs_ioctl_setattr(
 		 * cleared upon successful return from chown()
 		 */
 		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-		    !inode_capable(VFS_I(ip), CAP_FSETID))
+		    !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))
 			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
 
 		/*
diff --git a/include/linux/capability.h b/include/linux/capability.h
index a6ee1f9a5018..84b13ad67c1c 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -210,7 +210,7 @@ extern bool has_ns_capability_noaudit(struct task_struct *t,
 				      struct user_namespace *ns, int cap);
 extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
-extern bool inode_capable(const struct inode *inode, int cap);
+extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
 extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
 
 /* audit system wants to get cap info from files as well */
diff --git a/kernel/capability.c b/kernel/capability.c
index 84b2bbf443e7..a5cf13c018ce 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -424,23 +424,19 @@ bool capable(int cap)
 EXPORT_SYMBOL(capable);
 
 /**
- * inode_capable - Check superior capability over inode
+ * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
  * @inode: The inode in question
  * @cap: The capability in question
  *
- * Return true if the current task has the given superior capability
- * targeted at it's own user namespace and that the given inode is owned
- * by the current user namespace or a child namespace.
- *
- * Currently we check to see if an inode is owned by the current
- * user namespace by seeing if the inode's owner maps into the
- * current user namespace.
- *
+ * Return true if the current task has the given capability targeted at
+ * its own user namespace and that the given inode's uid and gid are
+ * mapped into the current user namespace.
  */
-bool inode_capable(const struct inode *inode, int cap)
+bool capable_wrt_inode_uidgid(const struct inode *inode, int cap)
 {
 	struct user_namespace *ns = current_user_ns();
 
-	return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid);
+	return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) &&
+		kgid_has_mapping(ns, inode->i_gid);
 }
-EXPORT_SYMBOL(inode_capable);
+EXPORT_SYMBOL(capable_wrt_inode_uidgid);
-- 
cgit v1.2.3-71-gd317


From 602cb5bbae9868fe48989efa78aca62415309fcf Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Tue, 10 Jun 2014 15:18:40 -0700
Subject: mfd/rtc: sec/s5m: rename SEC* symbols to S5M

Prepare for adding support for S2MPS14 RTC device to the rtc-s5m driver:

1. Rename SEC* symbols to S5M.
2. Add S5M prefix to some of defines which are different between S5M876X
   and S2MPS14.

This is only a rename-like patch, new code is not added.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Sangbeom Kim <sbkim73@samsung.com>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-s5m.c           | 66 +++++++++++++++++------------------
 include/linux/mfd/samsung/rtc.h | 76 ++++++++++++++++++++---------------------
 2 files changed, 71 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index 8ec2d6a1dbe1..b37df8c790f2 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -30,10 +30,10 @@
 
 /*
  * Maximum number of retries for checking changes in UDR field
- * of SEC_RTC_UDR_CON register (to limit possible endless loop).
+ * of S5M_RTC_UDR_CON register (to limit possible endless loop).
  *
  * After writing to RTC registers (setting time or alarm) read the UDR field
- * in SEC_RTC_UDR_CON register. UDR is auto-cleared when data have
+ * in S5M_RTC_UDR_CON register. UDR is auto-cleared when data have
  * been transferred.
  */
 #define UDR_READ_RETRY_CNT	5
@@ -54,7 +54,7 @@ static const struct regmap_config s5m_rtc_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
 
-	.max_register = SEC_RTC_REG_MAX,
+	.max_register = S5M_RTC_REG_MAX,
 };
 
 static const struct regmap_config s2mps14_rtc_regmap_config = {
@@ -119,8 +119,8 @@ static inline int s5m8767_wait_for_udr_update(struct s5m_rtc_info *info)
 	unsigned int data;
 
 	do {
-		ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &data);
-	} while (--retry && (data & RTC_UDR_MASK) && !ret);
+		ret = regmap_read(info->regmap, S5M_RTC_UDR_CON, &data);
+	} while (--retry && (data & S5M_RTC_UDR_MASK) && !ret);
 
 	if (!retry)
 		dev_err(info->dev, "waiting for UDR update, reached max number of retries\n");
@@ -133,16 +133,16 @@ static inline int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info)
 	int ret;
 	unsigned int data;
 
-	ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &data);
+	ret = regmap_read(info->regmap, S5M_RTC_UDR_CON, &data);
 	if (ret < 0) {
 		dev_err(info->dev, "failed to read update reg(%d)\n", ret);
 		return ret;
 	}
 
-	data |= RTC_TIME_EN_MASK;
-	data |= RTC_UDR_MASK;
+	data |= S5M_RTC_TIME_EN_MASK;
+	data |= S5M_RTC_UDR_MASK;
 
-	ret = regmap_write(info->regmap, SEC_RTC_UDR_CON, data);
+	ret = regmap_write(info->regmap, S5M_RTC_UDR_CON, data);
 	if (ret < 0) {
 		dev_err(info->dev, "failed to write update reg(%d)\n", ret);
 		return ret;
@@ -158,17 +158,17 @@ static inline int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info)
 	int ret;
 	unsigned int data;
 
-	ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &data);
+	ret = regmap_read(info->regmap, S5M_RTC_UDR_CON, &data);
 	if (ret < 0) {
 		dev_err(info->dev, "%s: fail to read update reg(%d)\n",
 			__func__, ret);
 		return ret;
 	}
 
-	data &= ~RTC_TIME_EN_MASK;
-	data |= RTC_UDR_MASK;
+	data &= ~S5M_RTC_TIME_EN_MASK;
+	data |= S5M_RTC_UDR_MASK;
 
-	ret = regmap_write(info->regmap, SEC_RTC_UDR_CON, data);
+	ret = regmap_write(info->regmap, S5M_RTC_UDR_CON, data);
 	if (ret < 0) {
 		dev_err(info->dev, "%s: fail to write update reg(%d)\n",
 			__func__, ret);
@@ -218,7 +218,7 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	u8 data[8];
 	int ret;
 
-	ret = regmap_bulk_read(info->regmap, SEC_RTC_SEC, data, 8);
+	ret = regmap_bulk_read(info->regmap, S5M_RTC_SEC, data, 8);
 	if (ret < 0)
 		return ret;
 
@@ -266,7 +266,7 @@ static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
 		1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday,
 		tm->tm_hour, tm->tm_min, tm->tm_sec, tm->tm_wday);
 
-	ret = regmap_raw_write(info->regmap, SEC_RTC_SEC, data, 8);
+	ret = regmap_raw_write(info->regmap, S5M_RTC_SEC, data, 8);
 	if (ret < 0)
 		return ret;
 
@@ -282,20 +282,20 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	unsigned int val;
 	int ret, i;
 
-	ret = regmap_bulk_read(info->regmap, SEC_ALARM0_SEC, data, 8);
+	ret = regmap_bulk_read(info->regmap, S5M_ALARM0_SEC, data, 8);
 	if (ret < 0)
 		return ret;
 
 	switch (info->device_type) {
 	case S5M8763X:
 		s5m8763_data_to_tm(data, &alrm->time);
-		ret = regmap_read(info->regmap, SEC_ALARM0_CONF, &val);
+		ret = regmap_read(info->regmap, S5M_ALARM0_CONF, &val);
 		if (ret < 0)
 			return ret;
 
 		alrm->enabled = !!val;
 
-		ret = regmap_read(info->regmap, SEC_RTC_STATUS, &val);
+		ret = regmap_read(info->regmap, S5M_RTC_STATUS, &val);
 		if (ret < 0)
 			return ret;
 
@@ -318,7 +318,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 		}
 
 		alrm->pending = 0;
-		ret = regmap_read(info->regmap, SEC_RTC_STATUS, &val);
+		ret = regmap_read(info->regmap, S5M_RTC_STATUS, &val);
 		if (ret < 0)
 			return ret;
 		break;
@@ -327,7 +327,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 		return -EINVAL;
 	}
 
-	if (val & ALARM0_STATUS)
+	if (val & S5M_ALARM0_STATUS)
 		alrm->pending = 1;
 	else
 		alrm->pending = 0;
@@ -341,7 +341,7 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
 	int ret, i;
 	struct rtc_time tm;
 
-	ret = regmap_bulk_read(info->regmap, SEC_ALARM0_SEC, data, 8);
+	ret = regmap_bulk_read(info->regmap, S5M_ALARM0_SEC, data, 8);
 	if (ret < 0)
 		return ret;
 
@@ -352,14 +352,14 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
 
 	switch (info->device_type) {
 	case S5M8763X:
-		ret = regmap_write(info->regmap, SEC_ALARM0_CONF, 0);
+		ret = regmap_write(info->regmap, S5M_ALARM0_CONF, 0);
 		break;
 
 	case S5M8767X:
 		for (i = 0; i < 7; i++)
 			data[i] &= ~ALARM_ENABLE_MASK;
 
-		ret = regmap_raw_write(info->regmap, SEC_ALARM0_SEC, data, 8);
+		ret = regmap_raw_write(info->regmap, S5M_ALARM0_SEC, data, 8);
 		if (ret < 0)
 			return ret;
 
@@ -381,7 +381,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
 	u8 alarm0_conf;
 	struct rtc_time tm;
 
-	ret = regmap_bulk_read(info->regmap, SEC_ALARM0_SEC, data, 8);
+	ret = regmap_bulk_read(info->regmap, S5M_ALARM0_SEC, data, 8);
 	if (ret < 0)
 		return ret;
 
@@ -393,7 +393,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
 	switch (info->device_type) {
 	case S5M8763X:
 		alarm0_conf = 0x77;
-		ret = regmap_write(info->regmap, SEC_ALARM0_CONF, alarm0_conf);
+		ret = regmap_write(info->regmap, S5M_ALARM0_CONF, alarm0_conf);
 		break;
 
 	case S5M8767X:
@@ -408,7 +408,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
 		if (data[RTC_YEAR1] & 0x7f)
 			data[RTC_YEAR1] |= ALARM_ENABLE_MASK;
 
-		ret = regmap_raw_write(info->regmap, SEC_ALARM0_SEC, data, 8);
+		ret = regmap_raw_write(info->regmap, S5M_ALARM0_SEC, data, 8);
 		if (ret < 0)
 			return ret;
 		ret = s5m8767_rtc_set_alarm_reg(info);
@@ -450,7 +450,7 @@ static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	if (ret < 0)
 		return ret;
 
-	ret = regmap_raw_write(info->regmap, SEC_ALARM0_SEC, data, 8);
+	ret = regmap_raw_write(info->regmap, S5M_ALARM0_SEC, data, 8);
 	if (ret < 0)
 		return ret;
 
@@ -495,7 +495,7 @@ static const struct rtc_class_ops s5m_rtc_ops = {
 static void s5m_rtc_enable_wtsr(struct s5m_rtc_info *info, bool enable)
 {
 	int ret;
-	ret = regmap_update_bits(info->regmap, SEC_WTSR_SMPL_CNTL,
+	ret = regmap_update_bits(info->regmap, S5M_WTSR_SMPL_CNTL,
 				 WTSR_ENABLE_MASK,
 				 enable ? WTSR_ENABLE_MASK : 0);
 	if (ret < 0)
@@ -506,7 +506,7 @@ static void s5m_rtc_enable_wtsr(struct s5m_rtc_info *info, bool enable)
 static void s5m_rtc_enable_smpl(struct s5m_rtc_info *info, bool enable)
 {
 	int ret;
-	ret = regmap_update_bits(info->regmap, SEC_WTSR_SMPL_CNTL,
+	ret = regmap_update_bits(info->regmap, S5M_WTSR_SMPL_CNTL,
 				 SMPL_ENABLE_MASK,
 				 enable ? SMPL_ENABLE_MASK : 0);
 	if (ret < 0)
@@ -521,7 +521,7 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
 	int ret;
 	struct rtc_time tm;
 
-	ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &tp_read);
+	ret = regmap_read(info->regmap, S5M_RTC_UDR_CON, &tp_read);
 	if (ret < 0) {
 		dev_err(info->dev, "%s: fail to read control reg(%d)\n",
 			__func__, ret);
@@ -533,7 +533,7 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
 	data[1] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
 
 	info->rtc_24hr_mode = 1;
-	ret = regmap_raw_write(info->regmap, SEC_ALARM0_CONF, data, 2);
+	ret = regmap_raw_write(info->regmap, S5M_ALARM0_CONF, data, 2);
 	if (ret < 0) {
 		dev_err(info->dev, "%s: fail to write controlm reg(%d)\n",
 			__func__, ret);
@@ -555,7 +555,7 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
 		ret = s5m_rtc_set_time(info->dev, &tm);
 	}
 
-	ret = regmap_update_bits(info->regmap, SEC_RTC_UDR_CON,
+	ret = regmap_update_bits(info->regmap, S5M_RTC_UDR_CON,
 				 RTC_TCON_MASK, tp_read | RTC_TCON_MASK);
 	if (ret < 0)
 		dev_err(info->dev, "%s: fail to update TCON reg(%d)\n",
@@ -676,7 +676,7 @@ static void s5m_rtc_shutdown(struct platform_device *pdev)
 	if (info->wtsr_smpl) {
 		for (i = 0; i < 3; i++) {
 			s5m_rtc_enable_wtsr(info, false);
-			regmap_read(info->regmap, SEC_WTSR_SMPL_CNTL, &val);
+			regmap_read(info->regmap, S5M_WTSR_SMPL_CNTL, &val);
 			pr_debug("%s: WTSR_SMPL reg(0x%02x)\n", __func__, val);
 			if (val & WTSR_ENABLE_MASK)
 				pr_emerg("%s: fail to disable WTSR\n",
diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h
index 3e02b768d537..207fcfbde82e 100644
--- a/include/linux/mfd/samsung/rtc.h
+++ b/include/linux/mfd/samsung/rtc.h
@@ -18,38 +18,38 @@
 #ifndef __LINUX_MFD_SEC_RTC_H
 #define __LINUX_MFD_SEC_RTC_H
 
-enum sec_rtc_reg {
-	SEC_RTC_SEC,
-	SEC_RTC_MIN,
-	SEC_RTC_HOUR,
-	SEC_RTC_WEEKDAY,
-	SEC_RTC_DATE,
-	SEC_RTC_MONTH,
-	SEC_RTC_YEAR1,
-	SEC_RTC_YEAR2,
-	SEC_ALARM0_SEC,
-	SEC_ALARM0_MIN,
-	SEC_ALARM0_HOUR,
-	SEC_ALARM0_WEEKDAY,
-	SEC_ALARM0_DATE,
-	SEC_ALARM0_MONTH,
-	SEC_ALARM0_YEAR1,
-	SEC_ALARM0_YEAR2,
-	SEC_ALARM1_SEC,
-	SEC_ALARM1_MIN,
-	SEC_ALARM1_HOUR,
-	SEC_ALARM1_WEEKDAY,
-	SEC_ALARM1_DATE,
-	SEC_ALARM1_MONTH,
-	SEC_ALARM1_YEAR1,
-	SEC_ALARM1_YEAR2,
-	SEC_ALARM0_CONF,
-	SEC_ALARM1_CONF,
-	SEC_RTC_STATUS,
-	SEC_WTSR_SMPL_CNTL,
-	SEC_RTC_UDR_CON,
+enum s5m_rtc_reg {
+	S5M_RTC_SEC,
+	S5M_RTC_MIN,
+	S5M_RTC_HOUR,
+	S5M_RTC_WEEKDAY,
+	S5M_RTC_DATE,
+	S5M_RTC_MONTH,
+	S5M_RTC_YEAR1,
+	S5M_RTC_YEAR2,
+	S5M_ALARM0_SEC,
+	S5M_ALARM0_MIN,
+	S5M_ALARM0_HOUR,
+	S5M_ALARM0_WEEKDAY,
+	S5M_ALARM0_DATE,
+	S5M_ALARM0_MONTH,
+	S5M_ALARM0_YEAR1,
+	S5M_ALARM0_YEAR2,
+	S5M_ALARM1_SEC,
+	S5M_ALARM1_MIN,
+	S5M_ALARM1_HOUR,
+	S5M_ALARM1_WEEKDAY,
+	S5M_ALARM1_DATE,
+	S5M_ALARM1_MONTH,
+	S5M_ALARM1_YEAR1,
+	S5M_ALARM1_YEAR2,
+	S5M_ALARM0_CONF,
+	S5M_ALARM1_CONF,
+	S5M_RTC_STATUS,
+	S5M_WTSR_SMPL_CNTL,
+	S5M_RTC_UDR_CON,
 
-	SEC_RTC_REG_MAX,
+	S5M_RTC_REG_MAX,
 };
 
 enum s2mps_rtc_reg {
@@ -88,9 +88,9 @@ enum s2mps_rtc_reg {
 #define HOUR_12			(1 << 7)
 #define HOUR_AMPM		(1 << 6)
 #define HOUR_PM			(1 << 5)
-#define ALARM0_STATUS		(1 << 1)
-#define ALARM1_STATUS		(1 << 2)
-#define UPDATE_AD		(1 << 0)
+#define S5M_ALARM0_STATUS	(1 << 1)
+#define S5M_ALARM1_STATUS	(1 << 2)
+#define S5M_UPDATE_AD		(1 << 0)
 
 #define S2MPS_ALARM0_STATUS	(1 << 2)
 #define S2MPS_ALARM1_STATUS	(1 << 1)
@@ -101,16 +101,16 @@ enum s2mps_rtc_reg {
 #define MODEL24_SHIFT		1
 #define MODEL24_MASK		(1 << MODEL24_SHIFT)
 /* RTC Update Register1 */
-#define RTC_UDR_SHIFT		0
-#define RTC_UDR_MASK		(1 << RTC_UDR_SHIFT)
+#define S5M_RTC_UDR_SHIFT	0
+#define S5M_RTC_UDR_MASK	(1 << S5M_RTC_UDR_SHIFT)
 #define S2MPS_RTC_WUDR_SHIFT	4
 #define S2MPS_RTC_WUDR_MASK	(1 << S2MPS_RTC_WUDR_SHIFT)
 #define S2MPS_RTC_RUDR_SHIFT	0
 #define S2MPS_RTC_RUDR_MASK	(1 << S2MPS_RTC_RUDR_SHIFT)
 #define RTC_TCON_SHIFT		1
 #define RTC_TCON_MASK		(1 << RTC_TCON_SHIFT)
-#define RTC_TIME_EN_SHIFT	3
-#define RTC_TIME_EN_MASK	(1 << RTC_TIME_EN_SHIFT)
+#define S5M_RTC_TIME_EN_SHIFT	3
+#define S5M_RTC_TIME_EN_MASK	(1 << S5M_RTC_TIME_EN_SHIFT)
 
 /* RTC Hour register */
 #define HOUR_PM_SHIFT		6
-- 
cgit v1.2.3-71-gd317


From 0c5f5d9af311013aabc519b68df19533d0d51cda Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Tue, 10 Jun 2014 15:18:43 -0700
Subject: rtc: s5m: use shorter time of register update

Set the time needed for updating alarm and time registers to 0.45 ms.
The default is 7.32 ms which is too long and leads to warnings when
setting alarm or time:

	s5m-rtc: waiting for UDR update, reached max number of retries

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Sangbeom Kim <sbkim73@samsung.com>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-s5m.c           |  7 +++++++
 include/linux/mfd/samsung/rtc.h | 10 ++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index 22137d4dbadf..3751ef90f93c 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -519,6 +519,13 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
 	u8 data[2];
 	int ret;
 
+	/* UDR update time. Default of 7.32 ms is too long. */
+	ret = regmap_update_bits(info->regmap, S5M_RTC_UDR_CON,
+			S5M_RTC_UDR_T_MASK, S5M_RTC_UDR_T_450_US);
+	if (ret < 0)
+		dev_err(info->dev, "%s: fail to change UDR time: %d\n",
+				__func__, ret);
+
 	/* Set RTC control register : Binary mode, 24hour mode */
 	data[0] = (1 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
 	data[1] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h
index 207fcfbde82e..b6401e7661c7 100644
--- a/include/linux/mfd/samsung/rtc.h
+++ b/include/linux/mfd/samsung/rtc.h
@@ -111,6 +111,16 @@ enum s2mps_rtc_reg {
 #define RTC_TCON_MASK		(1 << RTC_TCON_SHIFT)
 #define S5M_RTC_TIME_EN_SHIFT	3
 #define S5M_RTC_TIME_EN_MASK	(1 << S5M_RTC_TIME_EN_SHIFT)
+/*
+ * UDR_T field in S5M_RTC_UDR_CON register determines the time needed
+ * for updating alarm and time registers. Default is 7.32 ms.
+ */
+#define S5M_RTC_UDR_T_SHIFT	6
+#define S5M_RTC_UDR_T_MASK	(0x3 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_7320_US	(0x0 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_1830_US	(0x1 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_3660_US	(0x2 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_450_US	(0x3 << S5M_RTC_UDR_T_SHIFT)
 
 /* RTC Hour register */
 #define HOUR_PM_SHIFT		6
-- 
cgit v1.2.3-71-gd317


From e8bca479c3f269ebb3a3acea5ef63314bb677060 Mon Sep 17 00:00:00 2001
From: Todd E Brandt <todd.e.brandt@linux.intel.com>
Date: Tue, 10 Jun 2014 07:31:22 -0700
Subject: PM / sleep: trace events for device PM callbacks

Adds two trace events which supply the same info that initcall_debug
provides, but via ftrace instead of dmesg. The existing initcall_debug
calls require the pm_print_times_enabled var to be set (either via
sysfs or via the kernel cmd line). The new trace events provide all the
same info as the initcall_debug prints but with less overhead, and also
with coverage of device prepare and complete device callbacks.

These events replace the device_pm_report_time event (which has been
removed). device_pm_callback_start is called first and provides the device
and callback info. device_pm_callback_end is called after with the
device name and error info. The time and pid are gathered from the trace
data headers.

Signed-off-by: Todd Brandt <todd.e.brandt@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/main.c    | 14 ++++++++---
 include/trace/events/power.h | 60 +++++++++++++++++++++++++++++++-------------
 2 files changed, 52 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index de3fe4fe350a..bf412961a934 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -214,9 +214,6 @@ static void initcall_debug_report(struct device *dev, ktime_t calltime,
 		pr_info("call %s+ returned %d after %Ld usecs\n", dev_name(dev),
 			error, (unsigned long long)nsecs >> 10);
 	}
-
-	trace_device_pm_report_time(dev, info, nsecs, pm_verb(state.event),
-				    error);
 }
 
 /**
@@ -387,7 +384,9 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev,
 	calltime = initcall_debug_start(dev);
 
 	pm_dev_dbg(dev, state, info);
+	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev);
+	trace_device_pm_callback_end(dev, error);
 	suspend_report_result(cb, error);
 
 	initcall_debug_report(dev, calltime, error, state, info);
@@ -919,7 +918,9 @@ static void device_complete(struct device *dev, pm_message_t state)
 
 	if (callback) {
 		pm_dev_dbg(dev, state, info);
+		trace_device_pm_callback_start(dev, info, state.event);
 		callback(dev);
+		trace_device_pm_callback_end(dev, 0);
 	}
 
 	device_unlock(dev);
@@ -1307,7 +1308,9 @@ static int legacy_suspend(struct device *dev, pm_message_t state,
 
 	calltime = initcall_debug_start(dev);
 
+	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev, state);
+	trace_device_pm_callback_end(dev, error);
 	suspend_report_result(cb, error);
 
 	initcall_debug_report(dev, calltime, error, state, info);
@@ -1563,8 +1566,11 @@ static int device_prepare(struct device *dev, pm_message_t state)
 		callback = dev->driver->pm->prepare;
 	}
 
-	if (callback)
+	if (callback) {
+		trace_device_pm_callback_start(dev, info, state.event);
 		ret = callback(dev);
+		trace_device_pm_callback_end(dev, ret);
+	}
 
 	device_unlock(dev);
 
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index f88c8573e66c..d19840b0cac8 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -93,6 +93,17 @@ TRACE_EVENT(pstate_sample,
 #define PWR_EVENT_EXIT -1
 #endif
 
+#define pm_verb_symbolic(event) \
+	__print_symbolic(event, \
+		{ PM_EVENT_SUSPEND, "suspend" }, \
+		{ PM_EVENT_RESUME, "resume" }, \
+		{ PM_EVENT_FREEZE, "freeze" }, \
+		{ PM_EVENT_QUIESCE, "quiesce" }, \
+		{ PM_EVENT_HIBERNATE, "hibernate" }, \
+		{ PM_EVENT_THAW, "thaw" }, \
+		{ PM_EVENT_RESTORE, "restore" }, \
+		{ PM_EVENT_RECOVER, "recover" })
+
 DEFINE_EVENT(cpu, cpu_frequency,
 
 	TP_PROTO(unsigned int frequency, unsigned int cpu_id),
@@ -100,41 +111,54 @@ DEFINE_EVENT(cpu, cpu_frequency,
 	TP_ARGS(frequency, cpu_id)
 );
 
-TRACE_EVENT(device_pm_report_time,
+TRACE_EVENT(device_pm_callback_start,
 
-	TP_PROTO(struct device *dev, const char *pm_ops, s64 ops_time,
-		 char *pm_event_str, int error),
+	TP_PROTO(struct device *dev, const char *pm_ops, int event),
 
-	TP_ARGS(dev, pm_ops, ops_time, pm_event_str, error),
+	TP_ARGS(dev, pm_ops, event),
 
 	TP_STRUCT__entry(
 		__string(device, dev_name(dev))
 		__string(driver, dev_driver_string(dev))
 		__string(parent, dev->parent ? dev_name(dev->parent) : "none")
 		__string(pm_ops, pm_ops ? pm_ops : "none ")
-		__string(pm_event_str, pm_event_str)
-		__field(s64, ops_time)
-		__field(int, error)
+		__field(int, event)
 	),
 
 	TP_fast_assign(
-		const char *tmp = dev->parent ? dev_name(dev->parent) : "none";
-		const char *tmp_i = pm_ops ? pm_ops : "none ";
+		__assign_str(device, dev_name(dev));
+		__assign_str(driver, dev_driver_string(dev));
+		__assign_str(parent,
+			dev->parent ? dev_name(dev->parent) : "none");
+		__assign_str(pm_ops, pm_ops ? pm_ops : "none ");
+		__entry->event = event;
+	),
+
+	TP_printk("%s %s, parent: %s, %s[%s]", __get_str(driver),
+		__get_str(device), __get_str(parent), __get_str(pm_ops),
+		pm_verb_symbolic(__entry->event))
+);
+
+TRACE_EVENT(device_pm_callback_end,
+
+	TP_PROTO(struct device *dev, int error),
 
+	TP_ARGS(dev, error),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__string(driver, dev_driver_string(dev))
+		__field(int, error)
+	),
+
+	TP_fast_assign(
 		__assign_str(device, dev_name(dev));
 		__assign_str(driver, dev_driver_string(dev));
-		__assign_str(parent, tmp);
-		__assign_str(pm_ops, tmp_i);
-		__assign_str(pm_event_str, pm_event_str);
-		__entry->ops_time = ops_time;
 		__entry->error = error;
 	),
 
-	/* ops_str has an extra space at the end */
-	TP_printk("%s %s parent=%s state=%s ops=%snsecs=%lld err=%d",
-		__get_str(driver), __get_str(device), __get_str(parent),
-		__get_str(pm_event_str), __get_str(pm_ops),
-		__entry->ops_time, __entry->error)
+	TP_printk("%s %s, err=%d",
+		__get_str(driver), __get_str(device), __entry->error)
 );
 
 TRACE_EVENT(suspend_resume,
-- 
cgit v1.2.3-71-gd317


From 6cc55e096f1f2a8585cf8dc9049862f2376f66d4 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <octavian.purdila@intel.com>
Date: Fri, 6 Jun 2014 17:32:37 +0300
Subject: tcp: add gfp parameter to tcp_fragment

tcp_fragment can be called from process context (from tso_fragment).
Add a new gfp parameter to allow it to preserve atomic memory if
possible.

Signed-off-by: Octavian Purdila <octavian.purdila@intel.com>
Reviewed-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  2 +-
 net/ipv4/tcp_input.c  |  5 +++--
 net/ipv4/tcp_output.c | 15 ++++++++-------
 3 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index e80abe4486cb..7286db80e8b8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -535,7 +535,7 @@ void tcp_retransmit_timer(struct sock *sk);
 void tcp_xmit_retransmit_queue(struct sock *);
 void tcp_simple_retransmit(struct sock *);
 int tcp_trim_head(struct sock *, struct sk_buff *, u32);
-int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int);
+int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t);
 
 void tcp_send_probe0(struct sock *);
 void tcp_send_partial(struct sock *);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 931529d5daa2..40661fc1e233 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1167,7 +1167,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 			}
 			pkt_len = new_len;
 		}
-		err = tcp_fragment(sk, skb, pkt_len, mss);
+		err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
 		if (err < 0)
 			return err;
 	}
@@ -2241,7 +2241,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 				break;
 
 			mss = skb_shinfo(skb)->gso_size;
-			err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
+			err = tcp_fragment(sk, skb, (packets - oldcnt) * mss,
+					   mss, GFP_ATOMIC);
 			if (err < 0)
 				break;
 			cnt = packets;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d463c35db33d..ad7549f1d0ad 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1074,7 +1074,7 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
  * Remember, these are still headerless SKBs at this point.
  */
 int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
-		 unsigned int mss_now)
+		 unsigned int mss_now, gfp_t gfp)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *buff;
@@ -1089,11 +1089,11 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 	if (nsize < 0)
 		nsize = 0;
 
-	if (skb_unclone(skb, GFP_ATOMIC))
+	if (skb_unclone(skb, gfp))
 		return -ENOMEM;
 
 	/* Get a new skb... force flag on. */
-	buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+	buff = sk_stream_alloc_skb(sk, nsize, gfp);
 	if (buff == NULL)
 		return -ENOMEM; /* We'll just try again later. */
 
@@ -1625,7 +1625,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 
 	/* All of a TSO frame must be composed of paged data.  */
 	if (skb->len != skb->data_len)
-		return tcp_fragment(sk, skb, len, mss_now);
+		return tcp_fragment(sk, skb, len, mss_now, gfp);
 
 	buff = sk_stream_alloc_skb(sk, 0, gfp);
 	if (unlikely(buff == NULL))
@@ -2122,7 +2122,8 @@ void tcp_send_loss_probe(struct sock *sk)
 		goto rearm_timer;
 
 	if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
-		if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
+		if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
+					  GFP_ATOMIC)))
 			goto rearm_timer;
 		skb = tcp_write_queue_tail(sk);
 	}
@@ -2463,7 +2464,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 		return -EAGAIN;
 
 	if (skb->len > cur_mss) {
-		if (tcp_fragment(sk, skb, cur_mss, cur_mss))
+		if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC))
 			return -ENOMEM; /* We'll try again later. */
 	} else {
 		int oldpcount = tcp_skb_pcount(skb);
@@ -3244,7 +3245,7 @@ int tcp_write_wakeup(struct sock *sk)
 		    skb->len > mss) {
 			seg_size = min(seg_size, mss);
 			TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
-			if (tcp_fragment(sk, skb, seg_size, mss))
+			if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
 				return -1;
 		} else if (!tcp_skb_pcount(skb))
 			tcp_set_skb_tso_segs(sk, skb, mss);
-- 
cgit v1.2.3-71-gd317


From 07f8ac4a1e26e8283542cdaf658a6e2a12fd6980 Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@web.de>
Date: Sat, 7 Jun 2014 18:26:28 +0200
Subject: bridge: add export of multicast database adjacent to net_dev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With this new, exported function br_multicast_list_adjacent(net_dev) a
list of IPv4/6 addresses is returned. This list contains all multicast
addresses sensed by the bridge multicast snooping feature on all bridge
ports of the bridge interface of net_dev, excluding addresses from the
specified net_device itself.

Adding bridge support to the batman-adv multicast optimization requires
batman-adv knowing about the existence of bridged-in multicast
listeners to be able to reliably serve them with multicast packets.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 18 +++++++++++++++
 net/bridge/br_multicast.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_private.h   | 12 ----------
 3 files changed, 76 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 1085ffeef956..44d6eb0eb852 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -16,9 +16,27 @@
 #include <linux/netdevice.h>
 #include <uapi/linux/if_bridge.h>
 
+struct br_ip {
+	union {
+		__be32	ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct in6_addr ip6;
+#endif
+	} u;
+	__be16		proto;
+	__u16           vid;
+};
+
+struct br_ip_list {
+	struct list_head list;
+	struct br_ip addr;
+};
+
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
 
 typedef int br_should_route_hook_t(struct sk_buff *skb);
 extern br_should_route_hook_t __rcu *br_should_route_hook;
+int br_multicast_list_adjacent(struct net_device *dev,
+			       struct list_head *br_ip_list);
 
 #endif
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index b3f17c9b4d06..772476b7c4b7 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/export.h>
 #include <linux/if_ether.h>
 #include <linux/igmp.h>
 #include <linux/jhash.h>
@@ -2141,3 +2142,60 @@ unlock:
 
 	return err;
 }
+
+/**
+ * br_multicast_list_adjacent - Returns snooped multicast addresses
+ * @dev:	The bridge port adjacent to which to retrieve addresses
+ * @br_ip_list:	The list to store found, snooped multicast IP addresses in
+ *
+ * Creates a list of IP addresses (struct br_ip_list) sensed by the multicast
+ * snooping feature on all bridge ports of dev's bridge device, excluding
+ * the addresses from dev itself.
+ *
+ * Returns the number of items added to br_ip_list.
+ *
+ * Notes:
+ * - br_ip_list needs to be initialized by caller
+ * - br_ip_list might contain duplicates in the end
+ *   (needs to be taken care of by caller)
+ * - br_ip_list needs to be freed by caller
+ */
+int br_multicast_list_adjacent(struct net_device *dev,
+			       struct list_head *br_ip_list)
+{
+	struct net_bridge *br;
+	struct net_bridge_port *port;
+	struct net_bridge_port_group *group;
+	struct br_ip_list *entry;
+	int count = 0;
+
+	rcu_read_lock();
+	if (!br_ip_list || !br_port_exists(dev))
+		goto unlock;
+
+	port = br_port_get_rcu(dev);
+	if (!port || !port->br)
+		goto unlock;
+
+	br = port->br;
+
+	list_for_each_entry_rcu(port, &br->port_list, list) {
+		if (!port->dev || port->dev == dev)
+			continue;
+
+		hlist_for_each_entry_rcu(group, &port->mglist, mglist) {
+			entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+			if (!entry)
+				goto unlock;
+
+			entry->addr = group->addr;
+			list_add(&entry->list, br_ip_list);
+			count++;
+		}
+	}
+
+unlock:
+	rcu_read_unlock();
+	return count;
+}
+EXPORT_SYMBOL_GPL(br_multicast_list_adjacent);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 97c5e46dde72..50e2ab021484 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -54,18 +54,6 @@ struct mac_addr
 	unsigned char	addr[ETH_ALEN];
 };
 
-struct br_ip
-{
-	union {
-		__be32	ip4;
-#if IS_ENABLED(CONFIG_IPV6)
-		struct in6_addr ip6;
-#endif
-	} u;
-	__be16		proto;
-	__u16		vid;
-};
-
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 /* our own querier */
 struct bridge_mcast_own_query {
-- 
cgit v1.2.3-71-gd317


From 2cd4143192e8c60f66cb32c3a30c76d0470a372d Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@web.de>
Date: Sat, 7 Jun 2014 18:26:29 +0200
Subject: bridge: memorize and export selected IGMP/MLD querier port
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adding bridge support to the batman-adv multicast optimization requires
batman-adv knowing about the existence of bridged-in IGMP/MLD queriers
to be able to reliably serve any multicast listener behind this same
bridge.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h |  1 +
 net/bridge/br_multicast.c | 72 +++++++++++++++++++++++++++++++++++++++++++----
 net/bridge/br_private.h   |  1 +
 3 files changed, 68 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 44d6eb0eb852..fd22789d7b2e 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -38,5 +38,6 @@ typedef int br_should_route_hook_t(struct sk_buff *skb);
 extern br_should_route_hook_t __rcu *br_should_route_hook;
 int br_multicast_list_adjacent(struct net_device *dev,
 			       struct list_head *br_ip_list);
+bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
 
 #endif
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 772476b7c4b7..cd3cf394c477 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1081,6 +1081,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 #endif
 
 static bool br_ip4_multicast_select_querier(struct net_bridge *br,
+					    struct net_bridge_port *port,
 					    __be32 saddr)
 {
 	if (!timer_pending(&br->ip4_own_query.timer) &&
@@ -1098,11 +1099,15 @@ static bool br_ip4_multicast_select_querier(struct net_bridge *br,
 update:
 	br->ip4_querier.addr.u.ip4 = saddr;
 
+	/* update protected by general multicast_lock by caller */
+	rcu_assign_pointer(br->ip4_querier.port, port);
+
 	return true;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static bool br_ip6_multicast_select_querier(struct net_bridge *br,
+					    struct net_bridge_port *port,
 					    struct in6_addr *saddr)
 {
 	if (!timer_pending(&br->ip6_own_query.timer) &&
@@ -1117,19 +1122,23 @@ static bool br_ip6_multicast_select_querier(struct net_bridge *br,
 update:
 	br->ip6_querier.addr.u.ip6 = *saddr;
 
+	/* update protected by general multicast_lock by caller */
+	rcu_assign_pointer(br->ip6_querier.port, port);
+
 	return true;
 }
 #endif
 
 static bool br_multicast_select_querier(struct net_bridge *br,
+					struct net_bridge_port *port,
 					struct br_ip *saddr)
 {
 	switch (saddr->proto) {
 	case htons(ETH_P_IP):
-		return br_ip4_multicast_select_querier(br, saddr->u.ip4);
+		return br_ip4_multicast_select_querier(br, port, saddr->u.ip4);
 #if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
-		return br_ip6_multicast_select_querier(br, &saddr->u.ip6);
+		return br_ip6_multicast_select_querier(br, port, &saddr->u.ip6);
 #endif
 	}
 
@@ -1201,7 +1210,7 @@ static void br_multicast_query_received(struct net_bridge *br,
 					struct br_ip *saddr,
 					unsigned long max_delay)
 {
-	if (!br_multicast_select_querier(br, saddr))
+	if (!br_multicast_select_querier(br, port, saddr))
 		return;
 
 	br_multicast_update_query_timer(br, query, max_delay);
@@ -1804,12 +1813,14 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 }
 
 static void br_multicast_query_expired(struct net_bridge *br,
-				       struct bridge_mcast_own_query *query)
+				       struct bridge_mcast_own_query *query,
+				       struct bridge_mcast_querier *querier)
 {
 	spin_lock(&br->multicast_lock);
 	if (query->startup_sent < br->multicast_startup_query_count)
 		query->startup_sent++;
 
+	rcu_assign_pointer(querier, NULL);
 	br_multicast_send_query(br, NULL, query);
 	spin_unlock(&br->multicast_lock);
 }
@@ -1818,7 +1829,7 @@ static void br_ip4_multicast_query_expired(unsigned long data)
 {
 	struct net_bridge *br = (void *)data;
 
-	br_multicast_query_expired(br, &br->ip4_own_query);
+	br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1826,7 +1837,7 @@ static void br_ip6_multicast_query_expired(unsigned long data)
 {
 	struct net_bridge *br = (void *)data;
 
-	br_multicast_query_expired(br, &br->ip6_own_query);
+	br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
 }
 #endif
 
@@ -1849,8 +1860,10 @@ void br_multicast_init(struct net_bridge *br)
 	br->multicast_membership_interval = 260 * HZ;
 
 	br->ip4_other_query.delay_time = 0;
+	br->ip4_querier.port = NULL;
 #if IS_ENABLED(CONFIG_IPV6)
 	br->ip6_other_query.delay_time = 0;
+	br->ip6_querier.port = NULL;
 #endif
 
 	spin_lock_init(&br->multicast_lock);
@@ -2199,3 +2212,50 @@ unlock:
 	return count;
 }
 EXPORT_SYMBOL_GPL(br_multicast_list_adjacent);
+
+/**
+ * br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port
+ * @dev: The bridge port adjacent to which to check for a querier
+ * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6
+ *
+ * Checks whether the given interface has a bridge on top and if so returns
+ * true if a selected querier is behind one of the other ports of this
+ * bridge. Otherwise returns false.
+ */
+bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
+{
+	struct net_bridge *br;
+	struct net_bridge_port *port;
+	bool ret = false;
+
+	rcu_read_lock();
+	if (!br_port_exists(dev))
+		goto unlock;
+
+	port = br_port_get_rcu(dev);
+	if (!port || !port->br)
+		goto unlock;
+
+	br = port->br;
+
+	switch (proto) {
+	case ETH_P_IP:
+		if (!timer_pending(&br->ip4_other_query.timer) ||
+		    rcu_dereference(br->ip4_querier.port) == port)
+			goto unlock;
+		break;
+	case ETH_P_IPV6:
+		if (!timer_pending(&br->ip6_other_query.timer) ||
+		    rcu_dereference(br->ip6_querier.port) == port)
+			goto unlock;
+		break;
+	default:
+		goto unlock;
+	}
+
+	ret = true;
+unlock:
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 50e2ab021484..8346e9504cdb 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -70,6 +70,7 @@ struct bridge_mcast_other_query {
 /* selected querier */
 struct bridge_mcast_querier {
 	struct br_ip addr;
+	struct net_bridge_port __rcu	*port;
 };
 #endif
 
-- 
cgit v1.2.3-71-gd317


From e430f34ee5192c84bcabd3c79ab7e2388b5eec74 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 6 Jun 2014 14:46:06 -0700
Subject: net: filter: cleanup A/X name usage

The macro 'A' used in internal BPF interpreter:
 #define A regs[insn->a_reg]
was easily confused with the name of classic BPF register 'A', since
'A' would mean two different things depending on context.

This patch is trying to clean up the naming and clarify its usage in the
following way:

- A and X are names of two classic BPF registers

- BPF_REG_A denotes internal BPF register R0 used to map classic register A
  in internal BPF programs generated from classic

- BPF_REG_X denotes internal BPF register R7 used to map classic register X
  in internal BPF programs generated from classic

- internal BPF instruction format:
struct sock_filter_int {
        __u8    code;           /* opcode */
        __u8    dst_reg:4;      /* dest register */
        __u8    src_reg:4;      /* source register */
        __s16   off;            /* signed offset */
        __s32   imm;            /* signed immediate constant */
};

- BPF_X/BPF_K is 1 bit used to encode source operand of instruction
In classic:
  BPF_X - means use register X as source operand
  BPF_K - means use 32-bit immediate as source operand
In internal:
  BPF_X - means use 'src_reg' register as source operand
  BPF_K - means use 32-bit immediate as source operand

Suggested-by: Chema Gonzalez <chema@google.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Chema Gonzalez <chema@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt |   2 +-
 arch/x86/net/bpf_jit_comp.c         | 260 ++++++++++++++++++------------------
 include/linux/filter.h              | 156 ++++++++++++----------
 net/core/filter.c                   | 198 +++++++++++++--------------
 4 files changed, 314 insertions(+), 302 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 58c443926647..9f49b8690500 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -805,7 +805,7 @@ to seccomp_data, for converted BPF filters R1 points to a skb.
 
 A program, that is translated internally consists of the following elements:
 
-  op:16, jt:8, jf:8, k:32    ==>    op:8, a_reg:4, x_reg:4, off:16, imm:32
+  op:16, jt:8, jf:8, k:32    ==>    op:8, dst_reg:4, src_reg:4, off:16, imm:32
 
 So far 87 internal BPF instructions were implemented. 8-bit 'op' opcode field
 has room for new instructions. Some of them may use 16/24/32 byte encoding. New
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 080f3f071bb0..99bef86ed6df 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -64,10 +64,10 @@ static inline bool is_simm32(s64 value)
 	return value == (s64) (s32) value;
 }
 
-/* mov A, X */
-#define EMIT_mov(A, X) \
-	do {if (A != X) \
-		EMIT3(add_2mod(0x48, A, X), 0x89, add_2reg(0xC0, A, X)); \
+/* mov dst, src */
+#define EMIT_mov(DST, SRC) \
+	do {if (DST != SRC) \
+		EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
 	} while (0)
 
 static int bpf_size_to_x86_bytes(int bpf_size)
@@ -194,16 +194,16 @@ static inline u8 add_2mod(u8 byte, u32 r1, u32 r2)
 	return byte;
 }
 
-/* encode dest register 'a_reg' into x64 opcode 'byte' */
-static inline u8 add_1reg(u8 byte, u32 a_reg)
+/* encode 'dst_reg' register into x64 opcode 'byte' */
+static inline u8 add_1reg(u8 byte, u32 dst_reg)
 {
-	return byte + reg2hex[a_reg];
+	return byte + reg2hex[dst_reg];
 }
 
-/* encode dest 'a_reg' and src 'x_reg' registers into x64 opcode 'byte' */
-static inline u8 add_2reg(u8 byte, u32 a_reg, u32 x_reg)
+/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */
+static inline u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
 {
-	return byte + reg2hex[a_reg] + (reg2hex[x_reg] << 3);
+	return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
 }
 
 struct jit_context {
@@ -286,9 +286,9 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 	}
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
-		const s32 K = insn->imm;
-		u32 a_reg = insn->a_reg;
-		u32 x_reg = insn->x_reg;
+		const s32 imm32 = insn->imm;
+		u32 dst_reg = insn->dst_reg;
+		u32 src_reg = insn->src_reg;
 		u8 b1 = 0, b2 = 0, b3 = 0;
 		s64 jmp_offset;
 		u8 jmp_cond;
@@ -315,32 +315,32 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			case BPF_XOR: b2 = 0x31; break;
 			}
 			if (BPF_CLASS(insn->code) == BPF_ALU64)
-				EMIT1(add_2mod(0x48, a_reg, x_reg));
-			else if (is_ereg(a_reg) || is_ereg(x_reg))
-				EMIT1(add_2mod(0x40, a_reg, x_reg));
-			EMIT2(b2, add_2reg(0xC0, a_reg, x_reg));
+				EMIT1(add_2mod(0x48, dst_reg, src_reg));
+			else if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT1(add_2mod(0x40, dst_reg, src_reg));
+			EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
 			break;
 
-			/* mov A, X */
+			/* mov dst, src */
 		case BPF_ALU64 | BPF_MOV | BPF_X:
-			EMIT_mov(a_reg, x_reg);
+			EMIT_mov(dst_reg, src_reg);
 			break;
 
-			/* mov32 A, X */
+			/* mov32 dst, src */
 		case BPF_ALU | BPF_MOV | BPF_X:
-			if (is_ereg(a_reg) || is_ereg(x_reg))
-				EMIT1(add_2mod(0x40, a_reg, x_reg));
-			EMIT2(0x89, add_2reg(0xC0, a_reg, x_reg));
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT1(add_2mod(0x40, dst_reg, src_reg));
+			EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
 			break;
 
-			/* neg A */
+			/* neg dst */
 		case BPF_ALU | BPF_NEG:
 		case BPF_ALU64 | BPF_NEG:
 			if (BPF_CLASS(insn->code) == BPF_ALU64)
-				EMIT1(add_1mod(0x48, a_reg));
-			else if (is_ereg(a_reg))
-				EMIT1(add_1mod(0x40, a_reg));
-			EMIT2(0xF7, add_1reg(0xD8, a_reg));
+				EMIT1(add_1mod(0x48, dst_reg));
+			else if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
+			EMIT2(0xF7, add_1reg(0xD8, dst_reg));
 			break;
 
 		case BPF_ALU | BPF_ADD | BPF_K:
@@ -354,9 +354,9 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 		case BPF_ALU64 | BPF_OR | BPF_K:
 		case BPF_ALU64 | BPF_XOR | BPF_K:
 			if (BPF_CLASS(insn->code) == BPF_ALU64)
-				EMIT1(add_1mod(0x48, a_reg));
-			else if (is_ereg(a_reg))
-				EMIT1(add_1mod(0x40, a_reg));
+				EMIT1(add_1mod(0x48, dst_reg));
+			else if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
 
 			switch (BPF_OP(insn->code)) {
 			case BPF_ADD: b3 = 0xC0; break;
@@ -366,10 +366,10 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			case BPF_XOR: b3 = 0xF0; break;
 			}
 
-			if (is_imm8(K))
-				EMIT3(0x83, add_1reg(b3, a_reg), K);
+			if (is_imm8(imm32))
+				EMIT3(0x83, add_1reg(b3, dst_reg), imm32);
 			else
-				EMIT2_off32(0x81, add_1reg(b3, a_reg), K);
+				EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32);
 			break;
 
 		case BPF_ALU64 | BPF_MOV | BPF_K:
@@ -377,23 +377,23 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			 * use 'mov eax, imm32' (which zero-extends imm32)
 			 * to save 2 bytes
 			 */
-			if (K < 0) {
+			if (imm32 < 0) {
 				/* 'mov rax, imm32' sign extends imm32 */
-				b1 = add_1mod(0x48, a_reg);
+				b1 = add_1mod(0x48, dst_reg);
 				b2 = 0xC7;
 				b3 = 0xC0;
-				EMIT3_off32(b1, b2, add_1reg(b3, a_reg), K);
+				EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
 				break;
 			}
 
 		case BPF_ALU | BPF_MOV | BPF_K:
 			/* mov %eax, imm32 */
-			if (is_ereg(a_reg))
-				EMIT1(add_1mod(0x40, a_reg));
-			EMIT1_off32(add_1reg(0xB8, a_reg), K);
+			if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
+			EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
 			break;
 
-			/* A %= X, A /= X, A %= K, A /= K */
+			/* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */
 		case BPF_ALU | BPF_MOD | BPF_X:
 		case BPF_ALU | BPF_DIV | BPF_X:
 		case BPF_ALU | BPF_MOD | BPF_K:
@@ -406,14 +406,14 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			EMIT1(0x52); /* push rdx */
 
 			if (BPF_SRC(insn->code) == BPF_X)
-				/* mov r11, X */
-				EMIT_mov(AUX_REG, x_reg);
+				/* mov r11, src_reg */
+				EMIT_mov(AUX_REG, src_reg);
 			else
-				/* mov r11, K */
-				EMIT3_off32(0x49, 0xC7, 0xC3, K);
+				/* mov r11, imm32 */
+				EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
 
-			/* mov rax, A */
-			EMIT_mov(BPF_REG_0, a_reg);
+			/* mov rax, dst_reg */
+			EMIT_mov(BPF_REG_0, dst_reg);
 
 			/* xor edx, edx
 			 * equivalent to 'xor rdx, rdx', but one byte less
@@ -421,7 +421,7 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			EMIT2(0x31, 0xd2);
 
 			if (BPF_SRC(insn->code) == BPF_X) {
-				/* if (X == 0) return 0 */
+				/* if (src_reg == 0) return 0 */
 
 				/* cmp r11, 0 */
 				EMIT4(0x49, 0x83, 0xFB, 0x00);
@@ -457,8 +457,8 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			EMIT1(0x5A); /* pop rdx */
 			EMIT1(0x58); /* pop rax */
 
-			/* mov A, r11 */
-			EMIT_mov(a_reg, AUX_REG);
+			/* mov dst_reg, r11 */
+			EMIT_mov(dst_reg, AUX_REG);
 			break;
 
 		case BPF_ALU | BPF_MUL | BPF_K:
@@ -468,15 +468,15 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			EMIT1(0x50); /* push rax */
 			EMIT1(0x52); /* push rdx */
 
-			/* mov r11, A */
-			EMIT_mov(AUX_REG, a_reg);
+			/* mov r11, dst_reg */
+			EMIT_mov(AUX_REG, dst_reg);
 
 			if (BPF_SRC(insn->code) == BPF_X)
-				/* mov rax, X */
-				EMIT_mov(BPF_REG_0, x_reg);
+				/* mov rax, src_reg */
+				EMIT_mov(BPF_REG_0, src_reg);
 			else
-				/* mov rax, K */
-				EMIT3_off32(0x48, 0xC7, 0xC0, K);
+				/* mov rax, imm32 */
+				EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
 
 			if (BPF_CLASS(insn->code) == BPF_ALU64)
 				EMIT1(add_1mod(0x48, AUX_REG));
@@ -491,8 +491,8 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			EMIT1(0x5A); /* pop rdx */
 			EMIT1(0x58); /* pop rax */
 
-			/* mov A, r11 */
-			EMIT_mov(a_reg, AUX_REG);
+			/* mov dst_reg, r11 */
+			EMIT_mov(dst_reg, AUX_REG);
 			break;
 
 			/* shifts */
@@ -503,39 +503,39 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 		case BPF_ALU64 | BPF_RSH | BPF_K:
 		case BPF_ALU64 | BPF_ARSH | BPF_K:
 			if (BPF_CLASS(insn->code) == BPF_ALU64)
-				EMIT1(add_1mod(0x48, a_reg));
-			else if (is_ereg(a_reg))
-				EMIT1(add_1mod(0x40, a_reg));
+				EMIT1(add_1mod(0x48, dst_reg));
+			else if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
 
 			switch (BPF_OP(insn->code)) {
 			case BPF_LSH: b3 = 0xE0; break;
 			case BPF_RSH: b3 = 0xE8; break;
 			case BPF_ARSH: b3 = 0xF8; break;
 			}
-			EMIT3(0xC1, add_1reg(b3, a_reg), K);
+			EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
 			break;
 
 		case BPF_ALU | BPF_END | BPF_FROM_BE:
-			switch (K) {
+			switch (imm32) {
 			case 16:
 				/* emit 'ror %ax, 8' to swap lower 2 bytes */
 				EMIT1(0x66);
-				if (is_ereg(a_reg))
+				if (is_ereg(dst_reg))
 					EMIT1(0x41);
-				EMIT3(0xC1, add_1reg(0xC8, a_reg), 8);
+				EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
 				break;
 			case 32:
 				/* emit 'bswap eax' to swap lower 4 bytes */
-				if (is_ereg(a_reg))
+				if (is_ereg(dst_reg))
 					EMIT2(0x41, 0x0F);
 				else
 					EMIT1(0x0F);
-				EMIT1(add_1reg(0xC8, a_reg));
+				EMIT1(add_1reg(0xC8, dst_reg));
 				break;
 			case 64:
 				/* emit 'bswap rax' to swap 8 bytes */
-				EMIT3(add_1mod(0x48, a_reg), 0x0F,
-				      add_1reg(0xC8, a_reg));
+				EMIT3(add_1mod(0x48, dst_reg), 0x0F,
+				      add_1reg(0xC8, dst_reg));
 				break;
 			}
 			break;
@@ -543,117 +543,117 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 		case BPF_ALU | BPF_END | BPF_FROM_LE:
 			break;
 
-			/* ST: *(u8*)(a_reg + off) = imm */
+			/* ST: *(u8*)(dst_reg + off) = imm */
 		case BPF_ST | BPF_MEM | BPF_B:
-			if (is_ereg(a_reg))
+			if (is_ereg(dst_reg))
 				EMIT2(0x41, 0xC6);
 			else
 				EMIT1(0xC6);
 			goto st;
 		case BPF_ST | BPF_MEM | BPF_H:
-			if (is_ereg(a_reg))
+			if (is_ereg(dst_reg))
 				EMIT3(0x66, 0x41, 0xC7);
 			else
 				EMIT2(0x66, 0xC7);
 			goto st;
 		case BPF_ST | BPF_MEM | BPF_W:
-			if (is_ereg(a_reg))
+			if (is_ereg(dst_reg))
 				EMIT2(0x41, 0xC7);
 			else
 				EMIT1(0xC7);
 			goto st;
 		case BPF_ST | BPF_MEM | BPF_DW:
-			EMIT2(add_1mod(0x48, a_reg), 0xC7);
+			EMIT2(add_1mod(0x48, dst_reg), 0xC7);
 
 st:			if (is_imm8(insn->off))
-				EMIT2(add_1reg(0x40, a_reg), insn->off);
+				EMIT2(add_1reg(0x40, dst_reg), insn->off);
 			else
-				EMIT1_off32(add_1reg(0x80, a_reg), insn->off);
+				EMIT1_off32(add_1reg(0x80, dst_reg), insn->off);
 
-			EMIT(K, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
+			EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
 			break;
 
-			/* STX: *(u8*)(a_reg + off) = x_reg */
+			/* STX: *(u8*)(dst_reg + off) = src_reg */
 		case BPF_STX | BPF_MEM | BPF_B:
 			/* emit 'mov byte ptr [rax + off], al' */
-			if (is_ereg(a_reg) || is_ereg(x_reg) ||
+			if (is_ereg(dst_reg) || is_ereg(src_reg) ||
 			    /* have to add extra byte for x86 SIL, DIL regs */
-			    x_reg == BPF_REG_1 || x_reg == BPF_REG_2)
-				EMIT2(add_2mod(0x40, a_reg, x_reg), 0x88);
+			    src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+				EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
 			else
 				EMIT1(0x88);
 			goto stx;
 		case BPF_STX | BPF_MEM | BPF_H:
-			if (is_ereg(a_reg) || is_ereg(x_reg))
-				EMIT3(0x66, add_2mod(0x40, a_reg, x_reg), 0x89);
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
 			else
 				EMIT2(0x66, 0x89);
 			goto stx;
 		case BPF_STX | BPF_MEM | BPF_W:
-			if (is_ereg(a_reg) || is_ereg(x_reg))
-				EMIT2(add_2mod(0x40, a_reg, x_reg), 0x89);
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
 			else
 				EMIT1(0x89);
 			goto stx;
 		case BPF_STX | BPF_MEM | BPF_DW:
-			EMIT2(add_2mod(0x48, a_reg, x_reg), 0x89);
+			EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
 stx:			if (is_imm8(insn->off))
-				EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off);
+				EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
 			else
-				EMIT1_off32(add_2reg(0x80, a_reg, x_reg),
+				EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
 					    insn->off);
 			break;
 
-			/* LDX: a_reg = *(u8*)(x_reg + off) */
+			/* LDX: dst_reg = *(u8*)(src_reg + off) */
 		case BPF_LDX | BPF_MEM | BPF_B:
 			/* emit 'movzx rax, byte ptr [rax + off]' */
-			EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB6);
+			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
 			goto ldx;
 		case BPF_LDX | BPF_MEM | BPF_H:
 			/* emit 'movzx rax, word ptr [rax + off]' */
-			EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB7);
+			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
 			goto ldx;
 		case BPF_LDX | BPF_MEM | BPF_W:
 			/* emit 'mov eax, dword ptr [rax+0x14]' */
-			if (is_ereg(a_reg) || is_ereg(x_reg))
-				EMIT2(add_2mod(0x40, x_reg, a_reg), 0x8B);
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
 			else
 				EMIT1(0x8B);
 			goto ldx;
 		case BPF_LDX | BPF_MEM | BPF_DW:
 			/* emit 'mov rax, qword ptr [rax+0x14]' */
-			EMIT2(add_2mod(0x48, x_reg, a_reg), 0x8B);
+			EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
 ldx:			/* if insn->off == 0 we can save one extra byte, but
 			 * special case of x86 r13 which always needs an offset
 			 * is not worth the hassle
 			 */
 			if (is_imm8(insn->off))
-				EMIT2(add_2reg(0x40, x_reg, a_reg), insn->off);
+				EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
 			else
-				EMIT1_off32(add_2reg(0x80, x_reg, a_reg),
+				EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
 					    insn->off);
 			break;
 
-			/* STX XADD: lock *(u32*)(a_reg + off) += x_reg */
+			/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
 		case BPF_STX | BPF_XADD | BPF_W:
 			/* emit 'lock add dword ptr [rax + off], eax' */
-			if (is_ereg(a_reg) || is_ereg(x_reg))
-				EMIT3(0xF0, add_2mod(0x40, a_reg, x_reg), 0x01);
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
 			else
 				EMIT2(0xF0, 0x01);
 			goto xadd;
 		case BPF_STX | BPF_XADD | BPF_DW:
-			EMIT3(0xF0, add_2mod(0x48, a_reg, x_reg), 0x01);
+			EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
 xadd:			if (is_imm8(insn->off))
-				EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off);
+				EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
 			else
-				EMIT1_off32(add_2reg(0x80, a_reg, x_reg),
+				EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
 					    insn->off);
 			break;
 
 			/* call */
 		case BPF_JMP | BPF_CALL:
-			func = (u8 *) __bpf_call_base + K;
+			func = (u8 *) __bpf_call_base + imm32;
 			jmp_offset = func - (image + addrs[i]);
 			if (ctx->seen_ld_abs) {
 				EMIT2(0x41, 0x52); /* push %r10 */
@@ -663,9 +663,9 @@ xadd:			if (is_imm8(insn->off))
 				 */
 				jmp_offset += 4;
 			}
-			if (!K || !is_simm32(jmp_offset)) {
+			if (!imm32 || !is_simm32(jmp_offset)) {
 				pr_err("unsupported bpf func %d addr %p image %p\n",
-				       K, func, image);
+				       imm32, func, image);
 				return -EINVAL;
 			}
 			EMIT1_off32(0xE8, jmp_offset);
@@ -682,21 +682,21 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP | BPF_JGE | BPF_X:
 		case BPF_JMP | BPF_JSGT | BPF_X:
 		case BPF_JMP | BPF_JSGE | BPF_X:
-			/* cmp a_reg, x_reg */
-			EMIT3(add_2mod(0x48, a_reg, x_reg), 0x39,
-			      add_2reg(0xC0, a_reg, x_reg));
+			/* cmp dst_reg, src_reg */
+			EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39,
+			      add_2reg(0xC0, dst_reg, src_reg));
 			goto emit_cond_jmp;
 
 		case BPF_JMP | BPF_JSET | BPF_X:
-			/* test a_reg, x_reg */
-			EMIT3(add_2mod(0x48, a_reg, x_reg), 0x85,
-			      add_2reg(0xC0, a_reg, x_reg));
+			/* test dst_reg, src_reg */
+			EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85,
+			      add_2reg(0xC0, dst_reg, src_reg));
 			goto emit_cond_jmp;
 
 		case BPF_JMP | BPF_JSET | BPF_K:
-			/* test a_reg, imm32 */
-			EMIT1(add_1mod(0x48, a_reg));
-			EMIT2_off32(0xF7, add_1reg(0xC0, a_reg), K);
+			/* test dst_reg, imm32 */
+			EMIT1(add_1mod(0x48, dst_reg));
+			EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
 			goto emit_cond_jmp;
 
 		case BPF_JMP | BPF_JEQ | BPF_K:
@@ -705,13 +705,13 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP | BPF_JGE | BPF_K:
 		case BPF_JMP | BPF_JSGT | BPF_K:
 		case BPF_JMP | BPF_JSGE | BPF_K:
-			/* cmp a_reg, imm8/32 */
-			EMIT1(add_1mod(0x48, a_reg));
+			/* cmp dst_reg, imm8/32 */
+			EMIT1(add_1mod(0x48, dst_reg));
 
-			if (is_imm8(K))
-				EMIT3(0x83, add_1reg(0xF8, a_reg), K);
+			if (is_imm8(imm32))
+				EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
 			else
-				EMIT2_off32(0x81, add_1reg(0xF8, a_reg), K);
+				EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
 
 emit_cond_jmp:		/* convert BPF opcode to x86 */
 			switch (BPF_OP(insn->code)) {
@@ -773,27 +773,27 @@ emit_jmp:
 			func = sk_load_word;
 			goto common_load;
 		case BPF_LD | BPF_ABS | BPF_W:
-			func = CHOOSE_LOAD_FUNC(K, sk_load_word);
+			func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
 common_load:		ctx->seen_ld_abs = true;
 			jmp_offset = func - (image + addrs[i]);
 			if (!func || !is_simm32(jmp_offset)) {
 				pr_err("unsupported bpf func %d addr %p image %p\n",
-				       K, func, image);
+				       imm32, func, image);
 				return -EINVAL;
 			}
 			if (BPF_MODE(insn->code) == BPF_ABS) {
 				/* mov %esi, imm32 */
-				EMIT1_off32(0xBE, K);
+				EMIT1_off32(0xBE, imm32);
 			} else {
-				/* mov %rsi, x_reg */
-				EMIT_mov(BPF_REG_2, x_reg);
-				if (K) {
-					if (is_imm8(K))
+				/* mov %rsi, src_reg */
+				EMIT_mov(BPF_REG_2, src_reg);
+				if (imm32) {
+					if (is_imm8(imm32))
 						/* add %esi, imm8 */
-						EMIT3(0x83, 0xC6, K);
+						EMIT3(0x83, 0xC6, imm32);
 					else
 						/* add %esi, imm32 */
-						EMIT2_off32(0x81, 0xC6, K);
+						EMIT2_off32(0x81, 0xC6, imm32);
 				}
 			}
 			/* skb pointer is in R6 (%rbx), it will be copied into
@@ -808,13 +808,13 @@ common_load:		ctx->seen_ld_abs = true;
 			func = sk_load_half;
 			goto common_load;
 		case BPF_LD | BPF_ABS | BPF_H:
-			func = CHOOSE_LOAD_FUNC(K, sk_load_half);
+			func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
 			goto common_load;
 		case BPF_LD | BPF_IND | BPF_B:
 			func = sk_load_byte;
 			goto common_load;
 		case BPF_LD | BPF_ABS | BPF_B:
-			func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
+			func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
 			goto common_load;
 
 		case BPF_JMP | BPF_EXIT:
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f0c2ad43b4af..a7e3c48d73a7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -78,161 +78,173 @@ enum {
 
 /* Helper macros for filter block array initializers. */
 
-/* ALU ops on registers, bpf_add|sub|...: A += X */
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
 
-#define BPF_ALU64_REG(OP, A, X)					\
+#define BPF_ALU64_REG(OP, DST, SRC)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,	\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
 		.imm   = 0 })
 
-#define BPF_ALU32_REG(OP, A, X)					\
+#define BPF_ALU32_REG(OP, DST, SRC)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU | BPF_OP(OP) | BPF_X,		\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
 		.imm   = 0 })
 
-/* ALU ops on immediates, bpf_add|sub|...: A += IMM */
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
 
-#define BPF_ALU64_IMM(OP, A, IMM)				\
+#define BPF_ALU64_IMM(OP, DST, IMM)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
-		.a_reg = A,					\
-		.x_reg = 0,					\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = IMM })
 
-#define BPF_ALU32_IMM(OP, A, IMM)				\
+#define BPF_ALU32_IMM(OP, DST, IMM)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
-		.a_reg = A,					\
-		.x_reg = 0,					\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = IMM })
 
 /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
 
-#define BPF_ENDIAN(TYPE, A, LEN)				\
+#define BPF_ENDIAN(TYPE, DST, LEN)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU | BPF_END | BPF_SRC(TYPE),	\
-		.a_reg = A,					\
-		.x_reg = 0,					\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = LEN })
 
-/* Short form of mov, A = X */
+/* Short form of mov, dst_reg = src_reg */
 
-#define BPF_MOV64_REG(A, X)					\
+#define BPF_MOV64_REG(DST, SRC)					\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
 		.imm   = 0 })
 
-#define BPF_MOV32_REG(A, X)					\
+#define BPF_MOV32_REG(DST, SRC)					\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
 		.imm   = 0 })
 
-/* Short form of mov, A = IMM */
+/* Short form of mov, dst_reg = imm32 */
 
-#define BPF_MOV64_IMM(A, IMM)					\
+#define BPF_MOV64_IMM(DST, IMM)					\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU64 | BPF_MOV | BPF_K,		\
-		.a_reg = A,					\
-		.x_reg = 0,					\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = IMM })
 
-#define BPF_MOV32_IMM(A, IMM)					\
+#define BPF_MOV32_IMM(DST, IMM)					\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU | BPF_MOV | BPF_K,		\
-		.a_reg = A,					\
-		.x_reg = 0,					\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = IMM })
 
-/* Short form of mov based on type, BPF_X: A = X,  BPF_K: A = IMM */
+/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
 
-#define BPF_MOV64_RAW(TYPE, A, X, IMM)				\
+#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)			\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE),	\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
 		.imm   = IMM })
 
-#define BPF_MOV32_RAW(TYPE, A, X, IMM)				\
+#define BPF_MOV32_RAW(TYPE, DST, SRC, IMM)			\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU | BPF_MOV | BPF_SRC(TYPE),	\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
 		.imm   = IMM })
 
-/* Direct packet access, R0 = *(uint *) (skb->data + OFF) */
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
 
-#define BPF_LD_ABS(SIZE, OFF)					\
+#define BPF_LD_ABS(SIZE, IMM)					\
 	((struct sock_filter_int) {				\
 		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,	\
-		.a_reg = 0,					\
-		.x_reg = 0,					\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
-		.imm   = OFF })
+		.imm   = IMM })
 
-/* Indirect packet access, R0 = *(uint *) (skb->data + X + OFF) */
+/* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */
 
-#define BPF_LD_IND(SIZE, X, OFF)				\
+#define BPF_LD_IND(SIZE, SRC, IMM)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_IND,	\
-		.a_reg = 0,					\
-		.x_reg = X,					\
+		.dst_reg = 0,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
-		.imm   = OFF })
+		.imm   = IMM })
 
-/* Memory store, A = *(uint *) (X + OFF), and vice versa */
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
 
-#define BPF_LDX_MEM(SIZE, A, X, OFF)				\
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF)			\
 	((struct sock_filter_int) {				\
 		.code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,	\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = OFF,					\
 		.imm   = 0 })
 
-#define BPF_STX_MEM(SIZE, A, X, OFF)				\
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF)			\
 	((struct sock_filter_int) {				\
 		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,	\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = OFF,					\
 		.imm   = 0 })
 
-/* Conditional jumps against registers, if (A 'op' X) goto pc + OFF */
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
 
-#define BPF_JMP_REG(OP, A, X, OFF)				\
+#define BPF_JMP_REG(OP, DST, SRC, OFF)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_JMP | BPF_OP(OP) | BPF_X,		\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = OFF,					\
 		.imm   = 0 })
 
-/* Conditional jumps against immediates, if (A 'op' IMM) goto pc + OFF */
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
 
-#define BPF_JMP_IMM(OP, A, IMM, OFF)				\
+#define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_JMP | BPF_OP(OP) | BPF_K,		\
-		.a_reg = A,					\
-		.x_reg = 0,					\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
 		.off   = OFF,					\
 		.imm   = IMM })
 
@@ -241,18 +253,18 @@ enum {
 #define BPF_EMIT_CALL(FUNC)					\
 	((struct sock_filter_int) {				\
 		.code  = BPF_JMP | BPF_CALL,			\
-		.a_reg = 0,					\
-		.x_reg = 0,					\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = ((FUNC) - __bpf_call_base) })
 
 /* Raw code statement block */
 
-#define BPF_RAW_INSN(CODE, A, X, OFF, IMM)			\
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)			\
 	((struct sock_filter_int) {				\
 		.code  = CODE,					\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = OFF,					\
 		.imm   = IMM })
 
@@ -261,8 +273,8 @@ enum {
 #define BPF_EXIT_INSN()						\
 	((struct sock_filter_int) {				\
 		.code  = BPF_JMP | BPF_EXIT,			\
-		.a_reg = 0,					\
-		.x_reg = 0,					\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = 0 })
 
@@ -287,8 +299,8 @@ enum {
 
 struct sock_filter_int {
 	__u8	code;		/* opcode */
-	__u8	a_reg:4;	/* dest register */
-	__u8	x_reg:4;	/* source register */
+	__u8	dst_reg:4;	/* dest register */
+	__u8	src_reg:4;	/* source register */
 	__s16	off;		/* signed offset */
 	__s32	imm;		/* signed immediate constant */
 };
diff --git a/net/core/filter.c b/net/core/filter.c
index 6bd2e350e751..b3f21751b238 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -59,12 +59,12 @@
 #define BPF_R10	regs[BPF_REG_10]
 
 /* Named registers */
-#define A	regs[insn->a_reg]
-#define X	regs[insn->x_reg]
+#define DST	regs[insn->dst_reg]
+#define SRC	regs[insn->src_reg]
 #define FP	regs[BPF_REG_FP]
 #define ARG1	regs[BPF_REG_ARG1]
 #define CTX	regs[BPF_REG_CTX]
-#define K	insn->imm
+#define IMM	insn->imm
 
 /* No hurry in this branch
  *
@@ -264,7 +264,7 @@ static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *ins
 	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
 	ARG1 = (u64) (unsigned long) ctx;
 
-	/* Register for user BPF programs need to be reset first. */
+	/* Registers used in classic BPF programs need to be reset first. */
 	regs[BPF_REG_A] = 0;
 	regs[BPF_REG_X] = 0;
 
@@ -274,16 +274,16 @@ select_insn:
 	/* ALU */
 #define ALU(OPCODE, OP)			\
 	ALU64_##OPCODE##_X:		\
-		A = A OP X;		\
+		DST = DST OP SRC;	\
 		CONT;			\
 	ALU_##OPCODE##_X:		\
-		A = (u32) A OP (u32) X;	\
+		DST = (u32) DST OP (u32) SRC;	\
 		CONT;			\
 	ALU64_##OPCODE##_K:		\
-		A = A OP K;		\
+		DST = DST OP IMM;		\
 		CONT;			\
 	ALU_##OPCODE##_K:		\
-		A = (u32) A OP (u32) K;	\
+		DST = (u32) DST OP (u32) IMM;	\
 		CONT;
 
 	ALU(ADD,  +)
@@ -296,92 +296,92 @@ select_insn:
 	ALU(MUL,  *)
 #undef ALU
 	ALU_NEG:
-		A = (u32) -A;
+		DST = (u32) -DST;
 		CONT;
 	ALU64_NEG:
-		A = -A;
+		DST = -DST;
 		CONT;
 	ALU_MOV_X:
-		A = (u32) X;
+		DST = (u32) SRC;
 		CONT;
 	ALU_MOV_K:
-		A = (u32) K;
+		DST = (u32) IMM;
 		CONT;
 	ALU64_MOV_X:
-		A = X;
+		DST = SRC;
 		CONT;
 	ALU64_MOV_K:
-		A = K;
+		DST = IMM;
 		CONT;
 	ALU64_ARSH_X:
-		(*(s64 *) &A) >>= X;
+		(*(s64 *) &DST) >>= SRC;
 		CONT;
 	ALU64_ARSH_K:
-		(*(s64 *) &A) >>= K;
+		(*(s64 *) &DST) >>= IMM;
 		CONT;
 	ALU64_MOD_X:
-		if (unlikely(X == 0))
+		if (unlikely(SRC == 0))
 			return 0;
-		tmp = A;
-		A = do_div(tmp, X);
+		tmp = DST;
+		DST = do_div(tmp, SRC);
 		CONT;
 	ALU_MOD_X:
-		if (unlikely(X == 0))
+		if (unlikely(SRC == 0))
 			return 0;
-		tmp = (u32) A;
-		A = do_div(tmp, (u32) X);
+		tmp = (u32) DST;
+		DST = do_div(tmp, (u32) SRC);
 		CONT;
 	ALU64_MOD_K:
-		tmp = A;
-		A = do_div(tmp, K);
+		tmp = DST;
+		DST = do_div(tmp, IMM);
 		CONT;
 	ALU_MOD_K:
-		tmp = (u32) A;
-		A = do_div(tmp, (u32) K);
+		tmp = (u32) DST;
+		DST = do_div(tmp, (u32) IMM);
 		CONT;
 	ALU64_DIV_X:
-		if (unlikely(X == 0))
+		if (unlikely(SRC == 0))
 			return 0;
-		do_div(A, X);
+		do_div(DST, SRC);
 		CONT;
 	ALU_DIV_X:
-		if (unlikely(X == 0))
+		if (unlikely(SRC == 0))
 			return 0;
-		tmp = (u32) A;
-		do_div(tmp, (u32) X);
-		A = (u32) tmp;
+		tmp = (u32) DST;
+		do_div(tmp, (u32) SRC);
+		DST = (u32) tmp;
 		CONT;
 	ALU64_DIV_K:
-		do_div(A, K);
+		do_div(DST, IMM);
 		CONT;
 	ALU_DIV_K:
-		tmp = (u32) A;
-		do_div(tmp, (u32) K);
-		A = (u32) tmp;
+		tmp = (u32) DST;
+		do_div(tmp, (u32) IMM);
+		DST = (u32) tmp;
 		CONT;
 	ALU_END_TO_BE:
-		switch (K) {
+		switch (IMM) {
 		case 16:
-			A = (__force u16) cpu_to_be16(A);
+			DST = (__force u16) cpu_to_be16(DST);
 			break;
 		case 32:
-			A = (__force u32) cpu_to_be32(A);
+			DST = (__force u32) cpu_to_be32(DST);
 			break;
 		case 64:
-			A = (__force u64) cpu_to_be64(A);
+			DST = (__force u64) cpu_to_be64(DST);
 			break;
 		}
 		CONT;
 	ALU_END_TO_LE:
-		switch (K) {
+		switch (IMM) {
 		case 16:
-			A = (__force u16) cpu_to_le16(A);
+			DST = (__force u16) cpu_to_le16(DST);
 			break;
 		case 32:
-			A = (__force u32) cpu_to_le32(A);
+			DST = (__force u32) cpu_to_le32(DST);
 			break;
 		case 64:
-			A = (__force u64) cpu_to_le64(A);
+			DST = (__force u64) cpu_to_le64(DST);
 			break;
 		}
 		CONT;
@@ -401,85 +401,85 @@ select_insn:
 		insn += insn->off;
 		CONT;
 	JMP_JEQ_X:
-		if (A == X) {
+		if (DST == SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JEQ_K:
-		if (A == K) {
+		if (DST == IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JNE_X:
-		if (A != X) {
+		if (DST != SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JNE_K:
-		if (A != K) {
+		if (DST != IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JGT_X:
-		if (A > X) {
+		if (DST > SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JGT_K:
-		if (A > K) {
+		if (DST > IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JGE_X:
-		if (A >= X) {
+		if (DST >= SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JGE_K:
-		if (A >= K) {
+		if (DST >= IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JSGT_X:
-		if (((s64) A) > ((s64) X)) {
+		if (((s64) DST) > ((s64) SRC)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JSGT_K:
-		if (((s64) A) > ((s64) K)) {
+		if (((s64) DST) > ((s64) IMM)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JSGE_X:
-		if (((s64) A) >= ((s64) X)) {
+		if (((s64) DST) >= ((s64) SRC)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JSGE_K:
-		if (((s64) A) >= ((s64) K)) {
+		if (((s64) DST) >= ((s64) IMM)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JSET_X:
-		if (A & X) {
+		if (DST & SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JSET_K:
-		if (A & K) {
+		if (DST & IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
@@ -488,15 +488,15 @@ select_insn:
 		return BPF_R0;
 
 	/* STX and ST and LDX*/
-#define LDST(SIZEOP, SIZE)					\
-	STX_MEM_##SIZEOP:					\
-		*(SIZE *)(unsigned long) (A + insn->off) = X;	\
-		CONT;						\
-	ST_MEM_##SIZEOP:					\
-		*(SIZE *)(unsigned long) (A + insn->off) = K;	\
-		CONT;						\
-	LDX_MEM_##SIZEOP:					\
-		A = *(SIZE *)(unsigned long) (X + insn->off);	\
+#define LDST(SIZEOP, SIZE)						\
+	STX_MEM_##SIZEOP:						\
+		*(SIZE *)(unsigned long) (DST + insn->off) = SRC;	\
+		CONT;							\
+	ST_MEM_##SIZEOP:						\
+		*(SIZE *)(unsigned long) (DST + insn->off) = IMM;	\
+		CONT;							\
+	LDX_MEM_##SIZEOP:						\
+		DST = *(SIZE *)(unsigned long) (SRC + insn->off);	\
 		CONT;
 
 	LDST(B,   u8)
@@ -504,16 +504,16 @@ select_insn:
 	LDST(W,  u32)
 	LDST(DW, u64)
 #undef LDST
-	STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */
-		atomic_add((u32) X, (atomic_t *)(unsigned long)
-			   (A + insn->off));
+	STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
+		atomic_add((u32) SRC, (atomic_t *)(unsigned long)
+			   (DST + insn->off));
 		CONT;
-	STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
-		atomic64_add((u64) X, (atomic64_t *)(unsigned long)
-			     (A + insn->off));
+	STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
+		atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
+			     (DST + insn->off));
 		CONT;
-	LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + K)) */
-		off = K;
+	LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
+		off = IMM;
 load_word:
 		/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
 		 * only appearing in the programs where ctx ==
@@ -527,51 +527,51 @@ load_word:
 		 * BPF_R6-BPF_R9, and store return value into BPF_R0.
 		 *
 		 * Implicit input:
-		 *   ctx
+		 *   ctx == skb == BPF_R6 == CTX
 		 *
 		 * Explicit input:
-		 *   X == any register
-		 *   K == 32-bit immediate
+		 *   SRC == any register
+		 *   IMM == 32-bit immediate
 		 *
 		 * Output:
 		 *   BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
 		 */
 
-		ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp);
+		ptr = load_pointer((struct sk_buff *) CTX, off, 4, &tmp);
 		if (likely(ptr != NULL)) {
 			BPF_R0 = get_unaligned_be32(ptr);
 			CONT;
 		}
 
 		return 0;
-	LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + K)) */
-		off = K;
+	LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
+		off = IMM;
 load_half:
-		ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
+		ptr = load_pointer((struct sk_buff *) CTX, off, 2, &tmp);
 		if (likely(ptr != NULL)) {
 			BPF_R0 = get_unaligned_be16(ptr);
 			CONT;
 		}
 
 		return 0;
-	LD_ABS_B: /* BPF_R0 = *(u8 *) (ctx + K) */
-		off = K;
+	LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
+		off = IMM;
 load_byte:
-		ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
+		ptr = load_pointer((struct sk_buff *) CTX, off, 1, &tmp);
 		if (likely(ptr != NULL)) {
 			BPF_R0 = *(u8 *)ptr;
 			CONT;
 		}
 
 		return 0;
-	LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + X + K)) */
-		off = K + X;
+	LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
+		off = IMM + SRC;
 		goto load_word;
-	LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + X + K)) */
-		off = K + X;
+	LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
+		off = IMM + SRC;
 		goto load_half;
-	LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + X + K) */
-		off = K + X;
+	LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
+		off = IMM + SRC;
 		goto load_byte;
 
 	default_label:
@@ -675,7 +675,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_PROTOCOL:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
 
-		/* A = *(u16 *) (ctx + offsetof(protocol)) */
+		/* A = *(u16 *) (CTX + offsetof(protocol)) */
 		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
 				      offsetof(struct sk_buff, protocol));
 		/* A = ntohs(A) [emitting a nop or swap16] */
@@ -741,7 +741,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
 
-		/* A = *(u16 *) (ctx + offsetof(vlan_tci)) */
+		/* A = *(u16 *) (CTX + offsetof(vlan_tci)) */
 		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
 				      offsetof(struct sk_buff, vlan_tci));
 		if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
@@ -760,13 +760,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
 	case SKF_AD_OFF + SKF_AD_CPU:
 	case SKF_AD_OFF + SKF_AD_RANDOM:
-		/* arg1 = ctx */
+		/* arg1 = CTX */
 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
 		/* arg2 = A */
 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
 		/* arg3 = X */
 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
-		/* Emit call(ctx, arg2=A, arg3=X) */
+		/* Emit call(arg1=CTX, arg2=A, arg3=X) */
 		switch (fp->k) {
 		case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
 			*insn = BPF_EMIT_CALL(__skb_get_pay_offset);
@@ -941,12 +941,12 @@ do_pass:
 				 */
 				*insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
 
-				insn->a_reg = BPF_REG_A;
-				insn->x_reg = BPF_REG_TMP;
+				insn->dst_reg = BPF_REG_A;
+				insn->src_reg = BPF_REG_TMP;
 				bpf_src = BPF_X;
 			} else {
-				insn->a_reg = BPF_REG_A;
-				insn->x_reg = BPF_REG_X;
+				insn->dst_reg = BPF_REG_A;
+				insn->src_reg = BPF_REG_X;
 				insn->imm = fp->k;
 				bpf_src = BPF_SRC(fp->code);
 			}
-- 
cgit v1.2.3-71-gd317


From 22c7aaa57e80853b4904a46c18f97db0036a3b97 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Tue, 10 Jun 2014 18:27:59 +0300
Subject: Target/iscsi: Fix sendtargets response pdu for iser transport

In case the transport is iser we should not include the
iscsi target info in the sendtargets text response pdu.
This causes sendtargets response to include the target
info twice.

Modify iscsit_build_sendtargets_response to filter
transport types that don't match.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Reported-by: Slava Shwartsman <valyushash@gmail.com>
Cc: stable@vger.kernel.org # 3.11+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/infiniband/ulp/isert/ib_isert.c |  2 +-
 drivers/target/iscsi/iscsi_target.c     | 14 ++++++++++----
 include/target/iscsi/iscsi_transport.h  |  3 ++-
 3 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 647a5e2beee4..ba619fa84662 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -2318,7 +2318,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 	int rc;
 
 	isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
-	rc = iscsit_build_text_rsp(cmd, conn, hdr);
+	rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_INFINIBAND);
 	if (rc < 0)
 		return rc;
 
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index e0d98344e4bd..b87721a01b74 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -3389,7 +3389,9 @@ static bool iscsit_check_inaddr_any(struct iscsi_np *np)
 
 #define SENDTARGETS_BUF_LIMIT 32768U
 
-static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd)
+static int
+iscsit_build_sendtargets_response(struct iscsi_cmd *cmd,
+				  enum iscsit_transport_type network_transport)
 {
 	char *payload = NULL;
 	struct iscsi_conn *conn = cmd->conn;
@@ -3466,6 +3468,9 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd)
 				struct iscsi_np *np = tpg_np->tpg_np;
 				bool inaddr_any = iscsit_check_inaddr_any(np);
 
+				if (np->np_network_transport != network_transport)
+					continue;
+
 				if (!target_name_printed) {
 					len = sprintf(buf, "TargetName=%s",
 						      tiqn->tiqn);
@@ -3517,11 +3522,12 @@ eob:
 
 int
 iscsit_build_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
-		      struct iscsi_text_rsp *hdr)
+		      struct iscsi_text_rsp *hdr,
+		      enum iscsit_transport_type network_transport)
 {
 	int text_length, padding;
 
-	text_length = iscsit_build_sendtargets_response(cmd);
+	text_length = iscsit_build_sendtargets_response(cmd, network_transport);
 	if (text_length < 0)
 		return text_length;
 
@@ -3559,7 +3565,7 @@ static int iscsit_send_text_rsp(
 	u32 tx_size = 0;
 	int text_length, iov_count = 0, rc;
 
-	rc = iscsit_build_text_rsp(cmd, conn, hdr);
+	rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_TCP);
 	if (rc < 0)
 		return rc;
 
diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h
index 33b487b5da92..daef9daa500c 100644
--- a/include/target/iscsi/iscsi_transport.h
+++ b/include/target/iscsi/iscsi_transport.h
@@ -70,7 +70,8 @@ extern void iscsit_build_nopin_rsp(struct iscsi_cmd *, struct iscsi_conn *,
 extern void iscsit_build_task_mgt_rsp(struct iscsi_cmd *, struct iscsi_conn *,
 				struct iscsi_tm_rsp *);
 extern int iscsit_build_text_rsp(struct iscsi_cmd *, struct iscsi_conn *,
-				struct iscsi_text_rsp *);
+				struct iscsi_text_rsp *,
+				enum iscsit_transport_type);
 extern void iscsit_build_reject(struct iscsi_cmd *, struct iscsi_conn *,
 				struct iscsi_reject *);
 extern int iscsit_build_logout_rsp(struct iscsi_cmd *, struct iscsi_conn *,
-- 
cgit v1.2.3-71-gd317


From 2426bd456a61407388b6e61fc5f98dbcbebc50e2 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Tue, 10 Jun 2014 11:07:47 -0700
Subject: target: Report correct response length for some commands

When an initiator sends an allocation length bigger than what its
command consumes, the target should only return the actual response data
and set the residual length to the unused part of the allocation length.

Add a helper function that command handlers (INQUIRY, READ CAPACITY,
etc) can use to do this correctly, and use this code to get the correct
residual for commands that don't use the full initiator allocation in the
handlers for READ CAPACITY, READ CAPACITY(16), INQUIRY, MODE SENSE and
REPORT LUNS.

This addresses a handful of failures as reported by Christophe with
the Windows Certification Kit:

  http://permalink.gmane.org/gmane.linux.scsi.target.devel/6515

Signed-off-by: Roland Dreier <roland@purestorage.com>
Tested-by: Christophe Vu-Brugier <cvubrugier@yahoo.fr>
Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_sbc.c       |  4 ++--
 drivers/target/target_core_spc.c       |  9 ++++++---
 drivers/target/target_core_transport.c | 17 +++++++++++++++++
 include/target/target_core_backend.h   |  1 +
 4 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 97a33603795d..1d3a626bf24f 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -81,7 +81,7 @@ sbc_emulate_readcapacity(struct se_cmd *cmd)
 		transport_kunmap_data_sg(cmd);
 	}
 
-	target_complete_cmd(cmd, GOOD);
+	target_complete_cmd_with_length(cmd, GOOD, 8);
 	return 0;
 }
 
@@ -137,7 +137,7 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd)
 		transport_kunmap_data_sg(cmd);
 	}
 
-	target_complete_cmd(cmd, GOOD);
+	target_complete_cmd_with_length(cmd, GOOD, 32);
 	return 0;
 }
 
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index 17b5b7e099fa..6cd7222738fc 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -716,6 +716,7 @@ spc_emulate_inquiry(struct se_cmd *cmd)
 	unsigned char *buf;
 	sense_reason_t ret;
 	int p;
+	int len = 0;
 
 	buf = kzalloc(SE_INQUIRY_BUF, GFP_KERNEL);
 	if (!buf) {
@@ -737,6 +738,7 @@ spc_emulate_inquiry(struct se_cmd *cmd)
 		}
 
 		ret = spc_emulate_inquiry_std(cmd, buf);
+		len = buf[4] + 5;
 		goto out;
 	}
 
@@ -744,6 +746,7 @@ spc_emulate_inquiry(struct se_cmd *cmd)
 		if (cdb[2] == evpd_handlers[p].page) {
 			buf[1] = cdb[2];
 			ret = evpd_handlers[p].emulate(cmd, buf);
+			len = get_unaligned_be16(&buf[2]) + 4;
 			goto out;
 		}
 	}
@@ -760,7 +763,7 @@ out:
 	kfree(buf);
 
 	if (!ret)
-		target_complete_cmd(cmd, GOOD);
+		target_complete_cmd_with_length(cmd, GOOD, len);
 	return ret;
 }
 
@@ -1098,7 +1101,7 @@ set_length:
 		transport_kunmap_data_sg(cmd);
 	}
 
-	target_complete_cmd(cmd, GOOD);
+	target_complete_cmd_with_length(cmd, GOOD, length);
 	return 0;
 }
 
@@ -1274,7 +1277,7 @@ done:
 	buf[3] = (lun_count & 0xff);
 	transport_kunmap_data_sg(cmd);
 
-	target_complete_cmd(cmd, GOOD);
+	target_complete_cmd_with_length(cmd, GOOD, 8 + lun_count * 8);
 	return 0;
 }
 EXPORT_SYMBOL(spc_emulate_report_luns);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 15dbf6e97289..c9e8b35a954f 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -703,6 +703,23 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 }
 EXPORT_SYMBOL(target_complete_cmd);
 
+void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length)
+{
+	if (scsi_status == SAM_STAT_GOOD && length < cmd->data_length) {
+		if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) {
+			cmd->residual_count += cmd->data_length - length;
+		} else {
+			cmd->se_cmd_flags |= SCF_UNDERFLOW_BIT;
+			cmd->residual_count = cmd->data_length - length;
+		}
+
+		cmd->data_length = length;
+	}
+
+	target_complete_cmd(cmd, scsi_status);
+}
+EXPORT_SYMBOL(target_complete_cmd_with_length);
+
 static void target_add_to_state_list(struct se_cmd *cmd)
 {
 	struct se_device *dev = cmd->se_dev;
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 3a1c1eea1fff..9adc1bca1178 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -59,6 +59,7 @@ int	transport_subsystem_register(struct se_subsystem_api *);
 void	transport_subsystem_release(struct se_subsystem_api *);
 
 void	target_complete_cmd(struct se_cmd *, u8);
+void	target_complete_cmd_with_length(struct se_cmd *, u8, int);
 
 sense_reason_t	spc_parse_cdb(struct se_cmd *cmd, unsigned int *size);
 sense_reason_t	spc_emulate_report_luns(struct se_cmd *cmd);
-- 
cgit v1.2.3-71-gd317


From 67cb9366ff5f99868100198efba5ca88aaa6ad25 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 11 Jun 2014 18:19:28 +0200
Subject: ktime: add ktime_after and ktime_before helper

Add two minimal helper functions analogous to time_before() and
time_after() that will later on both be needed by SCTP code.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ktime.h    | 24 ++++++++++++++++++++++++
 net/sctp/sm_make_chunk.c |  2 +-
 2 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 31c0cd1c941a..de9e46e6bcc9 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -304,6 +304,30 @@ static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2)
 	return 0;
 }
 
+/**
+ * ktime_after - Compare if a ktime_t value is bigger than another one.
+ * @cmp1:	comparable1
+ * @cmp2:	comparable2
+ *
+ * Return: true if cmp1 happened after cmp2.
+ */
+static inline bool ktime_after(const ktime_t cmp1, const ktime_t cmp2)
+{
+	return ktime_compare(cmp1, cmp2) > 0;
+}
+
+/**
+ * ktime_before - Compare if a ktime_t value is smaller than another one.
+ * @cmp1:	comparable1
+ * @cmp2:	comparable2
+ *
+ * Return: true if cmp1 happened before cmp2.
+ */
+static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2)
+{
+	return ktime_compare(cmp1, cmp2) < 0;
+}
+
 static inline s64 ktime_to_us(const ktime_t kt)
 {
 	struct timeval tv = ktime_to_timeval(kt);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index fee5552ddf92..ae0e616a7ca5 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1782,7 +1782,7 @@ no_hmac:
 	else
 		kt = ktime_get();
 
-	if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) {
+	if (!asoc && ktime_before(bear_cookie->expiration, kt)) {
 		/*
 		 * Section 3.3.10.3 Stale Cookie Error (3)
 		 *
-- 
cgit v1.2.3-71-gd317


From e575235fc6026bb75e166ff68f84118c62d73f94 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 11 Jun 2014 18:19:30 +0200
Subject: net: sctp: migrate most recently used transport to ktime

Be more precise in transport path selection and use ktime
helpers instead of jiffies to compare and pick the better
primary and secondary recently used transports. This also
avoids any side-effects during a possible roll-over, and
could lead to better path decision-making.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 6 +++---
 net/sctp/associola.c       | 8 +++++---
 net/sctp/endpointola.c     | 2 +-
 net/sctp/transport.c       | 2 +-
 4 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 0dfcc92600e8..f38588bf3462 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -838,10 +838,10 @@ struct sctp_transport {
 	unsigned long sackdelay;
 	__u32 sackfreq;
 
-	/* When was the last time (in jiffies) that we heard from this
-	 * transport?  We use this to pick new active and retran paths.
+	/* When was the last time that we heard from this transport? We use
+	 * this to pick new active and retran paths.
 	 */
-	unsigned long last_time_heard;
+	ktime_t last_time_heard;
 
 	/* Last time(in jiffies) when cwnd is reduced due to the congestion
 	 * indication based on ECNE chunk.
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 9f1cc6f1535d..620c99e19e77 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1036,7 +1036,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 		}
 
 		if (chunk->transport)
-			chunk->transport->last_time_heard = jiffies;
+			chunk->transport->last_time_heard = ktime_get();
 
 		/* Run through the state machine. */
 		error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype,
@@ -1283,11 +1283,13 @@ static void sctp_select_active_and_retran_path(struct sctp_association *asoc)
 		    trans->state == SCTP_PF)
 			continue;
 		if (trans_pri == NULL ||
-		    trans->last_time_heard > trans_pri->last_time_heard) {
+		    ktime_after(trans->last_time_heard,
+				trans_pri->last_time_heard)) {
 			trans_sec = trans_pri;
 			trans_pri = trans;
 		} else if (trans_sec == NULL ||
-			   trans->last_time_heard > trans_sec->last_time_heard) {
+			   ktime_after(trans->last_time_heard,
+				       trans_sec->last_time_heard)) {
 			trans_sec = trans;
 		}
 	}
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 3d9f429858dc..9da76ba4d10f 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -481,7 +481,7 @@ normal:
 		}
 
 		if (chunk->transport)
-			chunk->transport->last_time_heard = jiffies;
+			chunk->transport->last_time_heard = ktime_get();
 
 		error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state,
 				   ep, asoc, chunk, GFP_ATOMIC);
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 1d348d15b33d..7dd672fa651f 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -72,7 +72,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
 	 */
 	peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
 
-	peer->last_time_heard = jiffies;
+	peer->last_time_heard = ktime_get();
 	peer->last_time_ecne_reduced = jiffies;
 
 	peer->param_flags = SPP_HB_DISABLE |
-- 
cgit v1.2.3-71-gd317


From 8846bab180fa2bcfe02d4ba5288fbaba12c8f4f3 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 11 Jun 2014 12:09:57 +0300
Subject: scsi_cmnd: Introduce scsi_transfer_length helper

In case protection information exists on the wire
scsi transports should include it in the transfer
byte count (even if protection information does not
exist in the host memory space). This helper will
compute the total transfer length from the scsi
command data length and protection attributes.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: stable@vger.kernel.org # 3.15+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/scsi/scsi_cmnd.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index dd7c998221b3..a100c6e266c7 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -7,6 +7,7 @@
 #include <linux/types.h>
 #include <linux/timer.h>
 #include <linux/scatterlist.h>
+#include <scsi/scsi_device.h>
 
 struct Scsi_Host;
 struct scsi_device;
@@ -306,4 +307,20 @@ static inline void set_driver_byte(struct scsi_cmnd *cmd, char status)
 	cmd->result = (cmd->result & 0x00ffffff) | (status << 24);
 }
 
+static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd)
+{
+	unsigned int xfer_len = blk_rq_bytes(scmd->request);
+	unsigned int prot_op = scsi_get_prot_op(scmd);
+	unsigned int sector_size = scmd->device->sector_size;
+
+	switch (prot_op) {
+	case SCSI_PROT_NORMAL:
+	case SCSI_PROT_WRITE_STRIP:
+	case SCSI_PROT_READ_INSERT:
+		return xfer_len;
+	}
+
+	return xfer_len + (xfer_len >> ilog2(sector_size)) * 8;
+}
+
 #endif /* _SCSI_SCSI_CMND_H */
-- 
cgit v1.2.3-71-gd317


From 2940474af79744411da0cb63b041ad52c57bc443 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jun 2014 13:49:23 +0200
Subject: block: remove elv_abort_queue and blk_abort_flushes

elv_abort_queue has no callers, and blk_abort_flushes is only called by
elv_abort_queue.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-flush.c        | 38 --------------------------------------
 block/blk.h              |  1 -
 block/elevator.c         | 20 --------------------
 include/linux/elevator.h |  1 -
 4 files changed, 60 deletions(-)

(limited to 'include')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 8ffee4b5f93d..3cb5e9e7108a 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -421,44 +421,6 @@ void blk_insert_flush(struct request *rq)
 	blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
 }
 
-/**
- * blk_abort_flushes - @q is being aborted, abort flush requests
- * @q: request_queue being aborted
- *
- * To be called from elv_abort_queue().  @q is being aborted.  Prepare all
- * FLUSH/FUA requests for abortion.
- *
- * CONTEXT:
- * spin_lock_irq(q->queue_lock)
- */
-void blk_abort_flushes(struct request_queue *q)
-{
-	struct request *rq, *n;
-	int i;
-
-	/*
-	 * Requests in flight for data are already owned by the dispatch
-	 * queue or the device driver.  Just restore for normal completion.
-	 */
-	list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) {
-		list_del_init(&rq->flush.list);
-		blk_flush_restore_request(rq);
-	}
-
-	/*
-	 * We need to give away requests on flush queues.  Restore for
-	 * normal completion and put them on the dispatch queue.
-	 */
-	for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) {
-		list_for_each_entry_safe(rq, n, &q->flush_queue[i],
-					 flush.list) {
-			list_del_init(&rq->flush.list);
-			blk_flush_restore_request(rq);
-			list_add_tail(&rq->queuelist, &q->queue_head);
-		}
-	}
-}
-
 /**
  * blkdev_issue_flush - queue a flush
  * @bdev:	blockdev to issue flush for
diff --git a/block/blk.h b/block/blk.h
index 45385e9abf6f..6748c4f8d7a1 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -84,7 +84,6 @@ static inline void blk_clear_rq_complete(struct request *rq)
 #define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED)
 
 void blk_insert_flush(struct request *rq);
-void blk_abort_flushes(struct request_queue *q);
 
 static inline struct request *__elv_next_request(struct request_queue *q)
 {
diff --git a/block/elevator.c b/block/elevator.c
index f35edddfe9b5..34bded18910e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -729,26 +729,6 @@ int elv_may_queue(struct request_queue *q, int rw)
 	return ELV_MQUEUE_MAY;
 }
 
-void elv_abort_queue(struct request_queue *q)
-{
-	struct request *rq;
-
-	blk_abort_flushes(q);
-
-	while (!list_empty(&q->queue_head)) {
-		rq = list_entry_rq(q->queue_head.next);
-		rq->cmd_flags |= REQ_QUIET;
-		trace_block_rq_abort(q, rq);
-		/*
-		 * Mark this request as started so we don't trigger
-		 * any debug logic in the end I/O path.
-		 */
-		blk_start_request(rq);
-		__blk_end_request_all(rq, -EIO);
-	}
-}
-EXPORT_SYMBOL(elv_abort_queue);
-
 void elv_completed_request(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 4ff262e2bf37..e2a6bd7fb133 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -133,7 +133,6 @@ extern struct request *elv_latter_request(struct request_queue *, struct request
 extern int elv_register_queue(struct request_queue *q);
 extern void elv_unregister_queue(struct request_queue *q);
 extern int elv_may_queue(struct request_queue *, int);
-extern void elv_abort_queue(struct request_queue *);
 extern void elv_completed_request(struct request_queue *, struct request *);
 extern int elv_set_request(struct request_queue *q, struct request *rq,
 			   struct bio *bio, gfp_t gfp_mask);
-- 
cgit v1.2.3-71-gd317


From da91309e0a7e8966d916a74cce42ed170fde06bf Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Mon, 9 Jun 2014 10:24:38 +0300
Subject: cpumask: Utility function to set n'th cpu - local cpu first

This function sets the n'th cpu - local cpu's first.
For example: in a 16 cores server with even cpu's local, will get the
following values:
cpumask_set_cpu_local_first(0, numa, cpumask) => cpu 0 is set
cpumask_set_cpu_local_first(1, numa, cpumask) => cpu 2 is set
...
cpumask_set_cpu_local_first(7, numa, cpumask) => cpu 14 is set
cpumask_set_cpu_local_first(8, numa, cpumask) => cpu 1 is set
cpumask_set_cpu_local_first(9, numa, cpumask) => cpu 3 is set
...
cpumask_set_cpu_local_first(15, numa, cpumask) => cpu 15 is set

Curently this function will be used by multi queue networking devices to
calculate the irq affinity mask, such that as many local cpu's as
possible will be utilized to handle the mq device irq's.

Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cpumask.h |  8 +++++++
 lib/cpumask.c           | 63 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d08e4d2a9b92..d5ef249735d2 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -142,6 +142,13 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask,
 	return 1;
 }
 
+static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+{
+	set_bit(0, cpumask_bits(dstp));
+
+	return 0;
+}
+
 #define for_each_cpu(cpu, mask)			\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
 #define for_each_cpu_not(cpu, mask)		\
@@ -192,6 +199,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
 
 int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
 int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
+int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp);
 
 /**
  * for_each_cpu - iterate over every cpu in a mask
diff --git a/lib/cpumask.c b/lib/cpumask.c
index b810b753c607..c101230658eb 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -164,3 +164,66 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 	memblock_free_early(__pa(mask), cpumask_size());
 }
 #endif
+
+/**
+ * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first
+ *
+ * @i: index number
+ * @numa_node: local numa_node
+ * @dstp: cpumask with the relevant cpu bit set according to the policy
+ *
+ * This function sets the cpumask according to a numa aware policy.
+ * cpumask could be used as an affinity hint for the IRQ related to a
+ * queue. When the policy is to spread queues across cores - local cores
+ * first.
+ *
+ * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set
+ * the cpu bit and need to re-call the function.
+ */
+int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+{
+	cpumask_var_t mask;
+	int cpu;
+	int ret = 0;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	i %= num_online_cpus();
+
+	if (!cpumask_of_node(numa_node)) {
+		/* Use all online cpu's for non numa aware system */
+		cpumask_copy(mask, cpu_online_mask);
+	} else {
+		int n;
+
+		cpumask_and(mask,
+			    cpumask_of_node(numa_node), cpu_online_mask);
+
+		n = cpumask_weight(mask);
+		if (i >= n) {
+			i -= n;
+
+			/* If index > number of local cpu's, mask out local
+			 * cpu's
+			 */
+			cpumask_andnot(mask, cpu_online_mask, mask);
+		}
+	}
+
+	for_each_cpu(cpu, mask) {
+		if (--i < 0)
+			goto out;
+	}
+
+	ret = -EAGAIN;
+
+out:
+	free_cpumask_var(mask);
+
+	if (!ret)
+		cpumask_set_cpu(cpu, dstp);
+
+	return ret;
+}
+EXPORT_SYMBOL(cpumask_set_cpu_local_first);
-- 
cgit v1.2.3-71-gd317


From bad93e9d4eeb0d2d6b79204d6cedc7f2e7b256f1 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <octavian.purdila@intel.com>
Date: Thu, 12 Jun 2014 01:36:26 +0300
Subject: net: add __pskb_copy_fclone and pskb_copy_for_clone

There are several instances where a pskb_copy or __pskb_copy is
immediately followed by an skb_clone.

Add a couple of new functions to allow the copy skb to be allocated
from the fclone cache and thus speed up subsequent skb_clone calls.

Cc: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Marek Lindner <mareklindner@neomailbox.ch>
Cc: Simon Wunderlich <sw@simonwunderlich.de>
Cc: Antonio Quartulli <antonio@meshcoding.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Cc: Arvid Brodin <arvid.brodin@alten.se>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Cc: Lauro Ramos Venancio <lauro.venancio@openbossa.org>
Cc: Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Allan Stephens <allan.stephens@windriver.com>
Cc: Andrew Hendry <andrew.hendry@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Reviewed-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Signed-off-by: Octavian Purdila <octavian.purdila@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h                 | 16 +++++++++++++++-
 net/batman-adv/distributed-arp-table.c |  2 +-
 net/batman-adv/network-coding.c        |  2 +-
 net/bluetooth/hci_sock.c               |  6 +++---
 net/core/skbuff.c                      | 14 +++++++++-----
 net/nfc/llcp_core.c                    |  4 ++--
 net/nfc/rawsock.c                      |  4 ++--
 net/tipc/bcast.c                       |  2 +-
 8 files changed, 34 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c705808bef9c..1f50bfe2243d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -744,7 +744,13 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
-struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask);
+struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
+				   gfp_t gfp_mask, bool fclone);
+static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom,
+					  gfp_t gfp_mask)
+{
+	return __pskb_copy_fclone(skb, headroom, gfp_mask, false);
+}
 
 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
@@ -2238,6 +2244,14 @@ static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
 	return __pskb_copy(skb, skb_headroom(skb), gfp_mask);
 }
 
+
+static inline struct sk_buff *pskb_copy_for_clone(struct sk_buff *skb,
+						  gfp_t gfp_mask)
+{
+	return __pskb_copy_fclone(skb, skb_headroom(skb), gfp_mask, true);
+}
+
+
 /**
  *	skb_clone_writable - is the header of a clone writable
  *	@skb: buffer to check
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index dcd99b2bea3c..f2c066b21716 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -594,7 +594,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
 		if (!neigh_node)
 			goto free_orig;
 
-		tmp_skb = pskb_copy(skb, GFP_ATOMIC);
+		tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
 		if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb,
 							   cand[i].orig_node,
 							   packet_subtype)) {
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 40a2fc4bcf4c..8d04d174669e 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1344,7 +1344,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
 	struct ethhdr *ethhdr;
 
 	/* Copy skb header to change the mac header */
-	skb = pskb_copy(skb, GFP_ATOMIC);
+	skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
 	if (!skb)
 		return;
 
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f608bffdb8b9..80d25c150a65 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -143,7 +143,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 
 		if (!skb_copy) {
 			/* Create a private copy with headroom */
-			skb_copy = __pskb_copy(skb, 1, GFP_ATOMIC);
+			skb_copy = __pskb_copy_fclone(skb, 1, GFP_ATOMIC, true);
 			if (!skb_copy)
 				continue;
 
@@ -247,8 +247,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 			struct hci_mon_hdr *hdr;
 
 			/* Create a private copy with headroom */
-			skb_copy = __pskb_copy(skb, HCI_MON_HDR_SIZE,
-					       GFP_ATOMIC);
+			skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE,
+						      GFP_ATOMIC, true);
 			if (!skb_copy)
 				continue;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 05f4bef2ce12..b9e85e6cb26a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -951,10 +951,13 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 EXPORT_SYMBOL(skb_copy);
 
 /**
- *	__pskb_copy	-	create copy of an sk_buff with private head.
+ *	__pskb_copy_fclone	-  create copy of an sk_buff with private head.
  *	@skb: buffer to copy
  *	@headroom: headroom of new skb
  *	@gfp_mask: allocation priority
+ *	@fclone: if true allocate the copy of the skb from the fclone
+ *	cache instead of the head cache; it is recommended to set this
+ *	to true for the cases where the copy will likely be cloned
  *
  *	Make a copy of both an &sk_buff and part of its data, located
  *	in header. Fragmented data remain shared. This is used when
@@ -964,11 +967,12 @@ EXPORT_SYMBOL(skb_copy);
  *	The returned buffer has a reference count of 1.
  */
 
-struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
+struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
+				   gfp_t gfp_mask, bool fclone)
 {
 	unsigned int size = skb_headlen(skb) + headroom;
-	struct sk_buff *n = __alloc_skb(size, gfp_mask,
-					skb_alloc_rx_flag(skb), NUMA_NO_NODE);
+	int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0);
+	struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);
 
 	if (!n)
 		goto out;
@@ -1008,7 +1012,7 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
 out:
 	return n;
 }
-EXPORT_SYMBOL(__pskb_copy);
+EXPORT_SYMBOL(__pskb_copy_fclone);
 
 /**
  *	pskb_expand_head - reallocate header of &sk_buff
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index f6278da68763..51e788797317 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -680,8 +680,8 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
 			continue;
 
 		if (skb_copy == NULL) {
-			skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE,
-					       GFP_ATOMIC);
+			skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE,
+						      GFP_ATOMIC, true);
 
 			if (skb_copy == NULL)
 				continue;
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 55eefee311eb..11c3544ea546 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -378,8 +378,8 @@ void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb,
 
 	sk_for_each(sk, &raw_sk_list.head) {
 		if (!skb_copy) {
-			skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE,
-				     GFP_ATOMIC);
+			skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE,
+						      GFP_ATOMIC, true);
 			if (!skb_copy)
 				continue;
 
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 671f9817b4f4..26631679a1fa 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -653,7 +653,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
 			tipc_bearer_send(b->identity, buf, &b->bcast_addr);
 		} else {
 			/* Avoid concurrent buffer access */
-			tbuf = pskb_copy(buf, GFP_ATOMIC);
+			tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC);
 			if (!tbuf)
 				break;
 			tipc_bearer_send(b->identity, tbuf, &b->bcast_addr);
-- 
cgit v1.2.3-71-gd317


From efd0f11d85e16d375dddeb77e8e78f43c67c5b13 Mon Sep 17 00:00:00 2001
From: Dmitry Popov <ixaphire@qrator.net>
Date: Wed, 11 Jun 2014 15:09:14 +0400
Subject: ip_vti: fix sparse warnings for VTI_ISVTI

This patch fixes the following sparse warnings:

net/ipv4/ip_tunnel.c:245:53: warning: restricted __be16 degrades to integer
net/ipv4/ip_vti.c:321:19: warning: incorrect type in assignment (different base types)
net/ipv4/ip_vti.c:321:19:    expected restricted __be16 [addressable] [assigned] [usertype] i_flags
net/ipv4/ip_vti.c:321:19:    got int
net/ipv4/ip_vti.c:447:24: warning: incorrect type in assignment (different base types)
net/ipv4/ip_vti.c:447:24:    expected restricted __be16 [usertype] i_flags
net/ipv4/ip_vti.c:447:24:    got int

Since VTI_ISVTI is always used with ip_tunnel_parm->i_flags (which is __be16),
we can __force cast VTI_ISVTI to __be16 in header file.

Signed-off-by: Dmitry Popov <ixaphire@qrator.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_tunnel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index aee73d0611fb..3bce9e9d9f7c 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -100,7 +100,7 @@ enum {
 #define IFLA_GRE_MAX	(__IFLA_GRE_MAX - 1)
 
 /* VTI-mode i_flags */
-#define VTI_ISVTI 0x0001
+#define VTI_ISVTI ((__force __be16)0x0001)
 
 enum {
 	IFLA_VTI_UNSPEC,
-- 
cgit v1.2.3-71-gd317


From 5d0c2b95bc57cf8fdc0e7b3e9d7e751eb65ad052 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 10 Jun 2014 18:54:13 -0700
Subject: net: Preserve CHECKSUM_COMPLETE at validation

Currently when the first checksum in a packet is validated using
CHECKSUM_COMPLETE, ip_summed is overwritten to be CHECKSUM_UNNECESSARY
so that any subsequent checksums in the packet are not correctly
validated.

This patch adds csum_valid flag in sk_buff and uses that to indicate
validated checksum instead of setting CHECKSUM_UNNECESSARY. The bit
is set accordingly in the skb_checksum_validate_* functions. The flag
is checked in skb_checksum_complete, so that validation is communicated
between checksum_init and checksum_complete sequence in TCP and UDP.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1f50bfe2243d..72a53805858a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -572,7 +572,8 @@ struct sk_buff {
 	 */
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
-	/* 5/7 bit hole (depending on ndisc_nodetype presence) */
+	__u8			csum_valid:1;
+	/* 4/6 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
@@ -2735,7 +2736,7 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb);
 
 static inline int skb_csum_unnecessary(const struct sk_buff *skb)
 {
-	return skb->ip_summed & CHECKSUM_UNNECESSARY;
+	return ((skb->ip_summed & CHECKSUM_UNNECESSARY) || skb->csum_valid);
 }
 
 /**
@@ -2769,10 +2770,8 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
 						  bool zero_okay,
 						  __sum16 check)
 {
-	if (skb_csum_unnecessary(skb)) {
-		return false;
-	} else if (zero_okay && !check) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
+		skb->csum_valid = 1;
 		return false;
 	}
 
@@ -2799,15 +2798,20 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
 {
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		if (!csum_fold(csum_add(psum, skb->csum))) {
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			skb->csum_valid = 1;
 			return 0;
 		}
 	}
 
 	skb->csum = psum;
 
-	if (complete || skb->len <= CHECKSUM_BREAK)
-		return __skb_checksum_complete(skb);
+	if (complete || skb->len <= CHECKSUM_BREAK) {
+		__sum16 csum;
+
+		csum = __skb_checksum_complete(skb);
+		skb->csum_valid = !csum;
+		return csum;
+	}
 
 	return 0;
 }
@@ -2831,6 +2835,7 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
 				zero_okay, check, compute_pseudo)	\
 ({									\
 	__sum16 __ret = 0;						\
+	skb->csum_valid = 0;						\
 	if (__skb_checksum_validate_needed(skb, zero_okay, check))	\
 		__ret = __skb_checksum_validate_complete(skb,		\
 				complete, compute_pseudo(skb, proto));	\
-- 
cgit v1.2.3-71-gd317


From 7e3cead5172927732f51fde77fef6f521e22f209 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 10 Jun 2014 18:54:19 -0700
Subject: net: Save software checksum complete

In skb_checksum complete, if we need to compute the checksum for the
packet (via skb_checksum) save the result as CHECKSUM_COMPLETE.
Subsequent checksum verification can use this.

Also, added csum_complete_sw flag to distinguish between software and
hardware generated checksum complete, we should always be able to trust
the software computation.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  3 ++-
 net/core/datagram.c    | 14 +++++++++-----
 net/ipv4/gre_offload.c |  6 ++++--
 net/sunrpc/socklib.c   |  3 ++-
 4 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 72a53805858a..5b5cd3189c98 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -573,7 +573,8 @@ struct sk_buff {
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
-	/* 4/6 bit hole (depending on ndisc_nodetype presence) */
+	__u8			csum_complete_sw:1;
+	/* 3/5 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
diff --git a/net/core/datagram.c b/net/core/datagram.c
index a16ed7bbe376..6b1c04ca1d50 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -739,11 +739,15 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
 	__sum16 sum;
 
 	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
-	if (likely(!sum)) {
-		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
-			netdev_rx_csum_fault(skb->dev);
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	}
+	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !sum &&
+	    !skb->csum_complete_sw)
+		netdev_rx_csum_fault(skb->dev);
+
+	/* Save checksum complete for later use */
+	skb->csum = sum;
+	skb->ip_summed = CHECKSUM_COMPLETE;
+	skb->csum_complete_sw = 1;
+
 	return sum;
 }
 EXPORT_SYMBOL(__skb_checksum_complete_head);
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 24deb3928b9e..eb92deb12666 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -131,10 +131,12 @@ static __sum16 gro_skb_checksum(struct sk_buff *skb)
 		csum_partial(skb->data, skb_gro_offset(skb), 0));
 	sum = csum_fold(NAPI_GRO_CB(skb)->csum);
 	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) {
-		if (unlikely(!sum))
+		if (unlikely(!sum) && !skb->csum_complete_sw)
 			netdev_rx_csum_fault(skb->dev);
-	} else
+	} else {
 		skb->ip_summed = CHECKSUM_COMPLETE;
+		skb->csum_complete_sw = 1;
+	}
 
 	return sum;
 }
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 0a648c502fc3..2df87f78e518 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -173,7 +173,8 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
 		return -1;
 	if (csum_fold(desc.csum))
 		return -1;
-	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
+	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+	    !skb->csum_complete_sw)
 		netdev_rx_csum_fault(skb->dev);
 	return 0;
 no_checksum:
-- 
cgit v1.2.3-71-gd317


From 6e765a009ad33845033f94cf47159327f2ba59db Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 11 Jun 2014 20:35:18 +0200
Subject: net_sched: drr: warn when qdisc is not work conserving

The DRR scheduler requires that items on the active list are work
conserving, i.e. do not hold on to skbs for throttling purposes, etc.
Attaching e.g. tbf renders DRR useless because all other classes on the
active list are delayed as well.

So, warn users that this configuration won't work as expected; we
already do this in couple of other qdiscs, see e.g.

commit b00355db3f88d96810a60011a30cfb2c3469409d
('pkt_sched: sch_hfsc: sch_htb: Add non-work-conserving warning handler')

The 'const' change is needed to avoid compiler warning ("discards 'const'
qualifier from pointer target type").

tested with:
drr_hier() {
        parent=$1
        classes=$2
        for i in  $(seq 1 $classes); do
                classid=$parent$(printf %x $i)
                tc class add dev eth0 parent $parent classid $classid drr
		tc qdisc add dev eth0 parent $classid tbf rate 64kbit burst 256kbit limit 64kbit
        done
}
tc qdisc add dev eth0 root handle 1: drr
drr_hier 1: 32
tc filter add dev eth0 protocol all pref 1 parent 1: handle 1 flow hash keys dst perturb 1 divisor 32

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 2 +-
 net/sched/sch_api.c     | 2 +-
 net/sched/sch_drr.c     | 4 +++-
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 891d80d2c4d2..ec030cd76616 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -96,7 +96,7 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
 					struct nlattr *tab);
 void qdisc_put_rtab(struct qdisc_rate_table *tab);
 void qdisc_put_stab(struct qdisc_size_table *tab);
-void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc);
+void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc);
 int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		    struct net_device *dev, struct netdev_queue *txq,
 		    spinlock_t *root_lock);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index fd14df56e5ff..58bed7599db7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -563,7 +563,7 @@ out:
 }
 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
 
-void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
+void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
 {
 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 8302717ea303..7bbbfe112192 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -391,8 +391,10 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
 	while (1) {
 		cl = list_first_entry(&q->active, struct drr_class, alist);
 		skb = cl->qdisc->ops->peek(cl->qdisc);
-		if (skb == NULL)
+		if (skb == NULL) {
+			qdisc_warn_nonwc(__func__, cl->qdisc);
 			goto out;
+		}
 
 		len = qdisc_pkt_len(skb);
 		if (len <= cl->deficit) {
-- 
cgit v1.2.3-71-gd317


From 8d0207652cbe27d1f962050737848e5ad4671958 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Apr 2014 04:27:08 -0400
Subject: ->splice_write() via ->write_iter()

iter_file_splice_write() - a ->splice_write() instance that gathers the
pipe buffers, builds a bio_vec-based iov_iter covering those and feeds
it to ->write_iter().  A bunch of simple cases coverted to that...

[AV: fixed the braino spotted by Cyrill]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c        |   2 +-
 fs/exofs/file.c       |   2 +-
 fs/ext2/file.c        |   2 +-
 fs/ext3/file.c        |   2 +-
 fs/ext4/file.c        |   2 +-
 fs/f2fs/file.c        |   2 +-
 fs/gfs2/file.c        |   4 +-
 fs/jfs/file.c         |   2 +-
 fs/ramfs/file-mmu.c   |   2 +-
 fs/ramfs/file-nommu.c |   2 +-
 fs/reiserfs/file.c    |   2 +-
 fs/splice.c           | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ubifs/file.c       |   2 +-
 fs/xfs/xfs_file.c     |  43 +---------------
 fs/xfs/xfs_trace.h    |   1 -
 include/linux/fs.h    |   2 +
 16 files changed, 156 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4e36b8ea8aa4..e68e150b1b16 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1583,7 +1583,7 @@ const struct file_operations def_blk_fops = {
 	.compat_ioctl	= compat_blkdev_ioctl,
 #endif
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
 
 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 5b7f6be5a2d5..71bf8e4fb5d4 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -77,7 +77,7 @@ const struct file_operations exofs_file_operations = {
 	.fsync		= exofs_file_fsync,
 	.flush		= exofs_flush,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
 
 const struct inode_operations exofs_file_inode_operations = {
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 970c6aca15cc..7c87b22a7228 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -75,7 +75,7 @@ const struct file_operations ext2_file_operations = {
 	.release	= ext2_release_file,
 	.fsync		= ext2_fsync,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
 
 #ifdef CONFIG_EXT2_FS_XIP
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index c833b1226d4d..a062fa1e1b11 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -63,7 +63,7 @@ const struct file_operations ext3_file_operations = {
 	.release	= ext3_release_file,
 	.fsync		= ext3_sync_file,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
 
 const struct inode_operations ext3_file_inode_operations = {
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 48383a5f37a1..708aad768199 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -599,7 +599,7 @@ const struct file_operations ext4_file_operations = {
 	.release	= ext4_release_file,
 	.fsync		= ext4_sync_file,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.fallocate	= ext4_fallocate,
 };
 
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 22f4900dd8eb..e4ba4b93f96a 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -692,5 +692,5 @@ const struct file_operations f2fs_file_operations = {
 	.compat_ioctl	= f2fs_compat_ioctl,
 #endif
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index ca932cd358d3..01b4c5b1bff8 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1068,7 +1068,7 @@ const struct file_operations gfs2_file_fops = {
 	.lock		= gfs2_lock,
 	.flock		= gfs2_flock,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.setlease	= gfs2_setlease,
 	.fallocate	= gfs2_fallocate,
 };
@@ -1098,7 +1098,7 @@ const struct file_operations gfs2_file_fops_nolock = {
 	.release	= gfs2_release,
 	.fsync		= gfs2_fsync,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.setlease	= generic_setlease,
 	.fallocate	= gfs2_fallocate,
 };
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index cc744ecaf51f..33aa0cc1f8b8 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -157,7 +157,7 @@ const struct file_operations jfs_file_operations = {
 	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.fsync		= jfs_fsync,
 	.release	= jfs_release,
 	.unlocked_ioctl = jfs_ioctl,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 6ea0b9718a9d..4f56de822d2f 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -38,7 +38,7 @@ const struct file_operations ramfs_file_operations = {
 	.mmap		= generic_file_mmap,
 	.fsync		= noop_fsync,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.llseek		= generic_file_llseek,
 };
 
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 9ed420f8f3ca..dda012ad4208 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -43,7 +43,7 @@ const struct file_operations ramfs_file_operations = {
 	.write_iter		= generic_file_write_iter,
 	.fsync			= noop_fsync,
 	.splice_read		= generic_file_splice_read,
-	.splice_write		= generic_file_splice_write,
+	.splice_write		= iter_file_splice_write,
 	.llseek			= generic_file_llseek,
 };
 
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 7c8ecd6468db..f070cc827456 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -248,7 +248,7 @@ const struct file_operations reiserfs_file_operations = {
 	.read_iter = generic_file_read_iter,
 	.write_iter = generic_file_write_iter,
 	.splice_read = generic_file_splice_read,
-	.splice_write = generic_file_splice_write,
+	.splice_write = iter_file_splice_write,
 	.llseek = generic_file_llseek,
 };
 
diff --git a/fs/splice.c b/fs/splice.c
index f99e420744c7..f195a9b89fb2 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -32,6 +32,7 @@
 #include <linux/gfp.h>
 #include <linux/socket.h>
 #include <linux/compat.h>
+#include <linux/aio.h>
 #include "internal.h"
 
 /*
@@ -1052,6 +1053,145 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 
 EXPORT_SYMBOL(generic_file_splice_write);
 
+/**
+ * iter_file_splice_write - splice data from a pipe to a file
+ * @pipe:	pipe info
+ * @out:	file to write to
+ * @ppos:	position in @out
+ * @len:	number of bytes to splice
+ * @flags:	splice modifier flags
+ *
+ * Description:
+ *    Will either move or copy pages (determined by @flags options) from
+ *    the given pipe inode to the given file.
+ *    This one is ->write_iter-based.
+ *
+ */
+ssize_t
+iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+			  loff_t *ppos, size_t len, unsigned int flags)
+{
+	struct splice_desc sd = {
+		.total_len = len,
+		.flags = flags,
+		.pos = *ppos,
+		.u.file = out,
+	};
+	int nbufs = pipe->buffers;
+	struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
+					GFP_KERNEL);
+	ssize_t ret;
+
+	if (unlikely(!array))
+		return -ENOMEM;
+
+	pipe_lock(pipe);
+
+	splice_from_pipe_begin(&sd);
+	while (sd.total_len) {
+		struct iov_iter from;
+		struct kiocb kiocb;
+		size_t left;
+		int n, idx;
+
+		ret = splice_from_pipe_next(pipe, &sd);
+		if (ret <= 0)
+			break;
+
+		if (unlikely(nbufs < pipe->buffers)) {
+			kfree(array);
+			nbufs = pipe->buffers;
+			array = kcalloc(nbufs, sizeof(struct bio_vec),
+					GFP_KERNEL);
+			if (!array) {
+				ret = -ENOMEM;
+				break;
+			}
+		}
+
+		/* build the vector */
+		left = sd.total_len;
+		for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) {
+			struct pipe_buffer *buf = pipe->bufs + idx;
+			size_t this_len = buf->len;
+
+			if (this_len > left)
+				this_len = left;
+
+			if (idx == pipe->buffers - 1)
+				idx = -1;
+
+			ret = buf->ops->confirm(pipe, buf);
+			if (unlikely(ret)) {
+				if (ret == -ENODATA)
+					ret = 0;
+				goto done;
+			}
+
+			array[n].bv_page = buf->page;
+			array[n].bv_len = this_len;
+			array[n].bv_offset = buf->offset;
+			left -= this_len;
+		}
+
+		/* ... iov_iter */
+		from.type = ITER_BVEC | WRITE;
+		from.bvec = array;
+		from.nr_segs = n;
+		from.count = sd.total_len - left;
+		from.iov_offset = 0;
+
+		/* ... and iocb */
+		init_sync_kiocb(&kiocb, out);
+		kiocb.ki_pos = sd.pos;
+		kiocb.ki_nbytes = sd.total_len - left;
+
+		/* now, send it */
+		ret = out->f_op->write_iter(&kiocb, &from);
+		if (-EIOCBQUEUED == ret)
+			ret = wait_on_sync_kiocb(&kiocb);
+
+		if (ret <= 0)
+			break;
+
+		sd.num_spliced += ret;
+		sd.total_len -= ret;
+		*ppos = sd.pos = kiocb.ki_pos;
+
+		/* dismiss the fully eaten buffers, adjust the partial one */
+		while (ret) {
+			struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
+			if (ret >= buf->len) {
+				const struct pipe_buf_operations *ops = buf->ops;
+				ret -= buf->len;
+				buf->len = 0;
+				buf->ops = NULL;
+				ops->release(pipe, buf);
+				pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
+				pipe->nrbufs--;
+				if (pipe->files)
+					sd.need_wakeup = true;
+			} else {
+				buf->offset += ret;
+				buf->len -= ret;
+				ret = 0;
+			}
+		}
+	}
+done:
+	kfree(array);
+	splice_from_pipe_end(pipe, &sd);
+
+	pipe_unlock(pipe);
+
+	if (sd.num_spliced)
+		ret = sd.num_spliced;
+
+	return ret;
+}
+
+EXPORT_SYMBOL(iter_file_splice_write);
+
 static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 			  struct splice_desc *sd)
 {
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 6bc4e8efbccf..0888502a6041 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1585,7 +1585,7 @@ const struct file_operations ubifs_file_operations = {
 	.fsync          = ubifs_fsync,
 	.unlocked_ioctl = ubifs_ioctl,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl   = ubifs_compat_ioctl,
 #endif
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5446e86d3485..b1c489c1fb2e 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -342,47 +342,6 @@ xfs_file_splice_read(
 	return ret;
 }
 
-/*
- * xfs_file_splice_write() does not use xfs_rw_ilock() because
- * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- * couuld cause lock inversions between the aio_write path and the splice path
- * if someone is doing concurrent splice(2) based writes and write(2) based
- * writes to the same inode. The only real way to fix this is to re-implement
- * the generic code here with correct locking orders.
- */
-STATIC ssize_t
-xfs_file_splice_write(
-	struct pipe_inode_info	*pipe,
-	struct file		*outfilp,
-	loff_t			*ppos,
-	size_t			count,
-	unsigned int		flags)
-{
-	struct inode		*inode = outfilp->f_mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	int			ioflags = 0;
-	ssize_t			ret;
-
-	XFS_STATS_INC(xs_write_calls);
-
-	if (outfilp->f_mode & FMODE_NOCMTIME)
-		ioflags |= IO_INVIS;
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-		return -EIO;
-
-	xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-	trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
-	ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
-	if (ret > 0)
-		XFS_STATS_ADD(xs_write_bytes, ret);
-
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-	return ret;
-}
-
 /*
  * This routine is called to handle zeroing any space in the last block of the
  * file that is beyond the EOF.  We do this since the size is being increased
@@ -1442,7 +1401,7 @@ const struct file_operations xfs_file_operations = {
 	.read_iter	= xfs_file_read_iter,
 	.write_iter	= xfs_file_write_iter,
 	.splice_read	= xfs_file_splice_read,
-	.splice_write	= xfs_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.unlocked_ioctl	= xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= xfs_file_compat_ioctl,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 65d8c793a25c..53182f97cf01 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1060,7 +1060,6 @@ DEFINE_RW_EVENT(xfs_file_read);
 DEFINE_RW_EVENT(xfs_file_buffered_write);
 DEFINE_RW_EVENT(xfs_file_direct_write);
 DEFINE_RW_EVENT(xfs_file_splice_read);
-DEFINE_RW_EVENT(xfs_file_splice_write);
 
 DECLARE_EVENT_CLASS(xfs_page_class,
 	TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a6448849dbce..8bd8ed357c7b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2434,6 +2434,8 @@ extern ssize_t default_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
+extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
+		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
 		struct file *out, loff_t *, size_t len, unsigned int flags);
 
-- 
cgit v1.2.3-71-gd317


From 96f9bc8fbc2440d90e15f02398e1de43f674b433 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Apr 2014 04:35:49 -0400
Subject: fs/splice.c: remove unneeded exports

ocfs2 was using a bunch of splice.c guts...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/splice.c            | 15 +++++----------
 include/linux/splice.h | 10 ----------
 2 files changed, 5 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/splice.c b/fs/splice.c
index f195a9b89fb2..ab84051758a7 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -738,7 +738,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
  * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
  * a new page in the output file page cache and fill/dirty that.
  */
-int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 		 struct splice_desc *sd)
 {
 	struct file *file = sd->u.file;
@@ -773,7 +773,6 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 out:
 	return ret;
 }
-EXPORT_SYMBOL(pipe_to_file);
 
 static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
 {
@@ -803,7 +802,7 @@ static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
  *    locking is required around copying the pipe buffers to the
  *    destination.
  */
-int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
+static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
 			  splice_actor *actor)
 {
 	int ret;
@@ -850,7 +849,6 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
 
 	return 1;
 }
-EXPORT_SYMBOL(splice_from_pipe_feed);
 
 /**
  * splice_from_pipe_next - wait for some data to splice from
@@ -862,7 +860,7 @@ EXPORT_SYMBOL(splice_from_pipe_feed);
  *    value (one) if pipe buffers are available.  It will return zero
  *    or -errno if no more data needs to be spliced.
  */
-int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
+static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
 {
 	while (!pipe->nrbufs) {
 		if (!pipe->writers)
@@ -887,7 +885,6 @@ int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
 
 	return 1;
 }
-EXPORT_SYMBOL(splice_from_pipe_next);
 
 /**
  * splice_from_pipe_begin - start splicing from pipe
@@ -898,12 +895,11 @@ EXPORT_SYMBOL(splice_from_pipe_next);
  *    splice_from_pipe_next() and splice_from_pipe_feed() to
  *    initialize the necessary fields of @sd.
  */
-void splice_from_pipe_begin(struct splice_desc *sd)
+static void splice_from_pipe_begin(struct splice_desc *sd)
 {
 	sd->num_spliced = 0;
 	sd->need_wakeup = false;
 }
-EXPORT_SYMBOL(splice_from_pipe_begin);
 
 /**
  * splice_from_pipe_end - finish splicing from pipe
@@ -915,12 +911,11 @@ EXPORT_SYMBOL(splice_from_pipe_begin);
  *    be called after a loop containing splice_from_pipe_next() and
  *    splice_from_pipe_feed().
  */
-void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
+static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
 {
 	if (sd->need_wakeup)
 		wakeup_pipe_writers(pipe);
 }
-EXPORT_SYMBOL(splice_from_pipe_end);
 
 /**
  * __splice_from_pipe - splice data from a pipe to given actor
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 0e43906d2fda..da2751d3b93d 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -70,16 +70,6 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
 				splice_actor *);
 extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
 				  struct splice_desc *, splice_actor *);
-extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *,
-				 splice_actor *);
-extern int splice_from_pipe_next(struct pipe_inode_info *,
-				 struct splice_desc *);
-extern void splice_from_pipe_begin(struct splice_desc *);
-extern void splice_from_pipe_end(struct pipe_inode_info *,
-				 struct splice_desc *);
-extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *,
-			struct splice_desc *);
-
 extern ssize_t splice_to_pipe(struct pipe_inode_info *,
 			      struct splice_pipe_desc *);
 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
-- 
cgit v1.2.3-71-gd317


From 5f073850602084fbcbb987948ff3e70ae273f7d2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Apr 2014 13:31:50 -0400
Subject: kill generic_file_splice_write()

no callers left

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/splice.c        | 124 -----------------------------------------------------
 include/linux/fs.h |   2 -
 2 files changed, 126 deletions(-)

(limited to 'include')

diff --git a/fs/splice.c b/fs/splice.c
index ab84051758a7..8e7eef755a9b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -718,62 +718,6 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
 				    sd->len, &pos, more);
 }
 
-/*
- * This is a little more tricky than the file -> pipe splicing. There are
- * basically three cases:
- *
- *	- Destination page already exists in the address space and there
- *	  are users of it. For that case we have no other option that
- *	  copying the data. Tough luck.
- *	- Destination page already exists in the address space, but there
- *	  are no users of it. Make sure it's uptodate, then drop it. Fall
- *	  through to last case.
- *	- Destination page does not exist, we can add the pipe page to
- *	  the page cache and avoid the copy.
- *
- * If asked to move pages to the output file (SPLICE_F_MOVE is set in
- * sd->flags), we attempt to migrate pages from the pipe to the output
- * file address space page cache. This is possible if no one else has
- * the pipe page referenced outside of the pipe and page cache. If
- * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
- * a new page in the output file page cache and fill/dirty that.
- */
-static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
-		 struct splice_desc *sd)
-{
-	struct file *file = sd->u.file;
-	struct address_space *mapping = file->f_mapping;
-	unsigned int offset, this_len;
-	struct page *page;
-	void *fsdata;
-	int ret;
-
-	offset = sd->pos & ~PAGE_CACHE_MASK;
-
-	this_len = sd->len;
-	if (this_len + offset > PAGE_CACHE_SIZE)
-		this_len = PAGE_CACHE_SIZE - offset;
-
-	ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
-	if (unlikely(ret))
-		goto out;
-
-	if (buf->page != page) {
-		char *src = kmap_atomic(buf->page);
-		char *dst = kmap_atomic(page);
-
-		memcpy(dst + offset, src + buf->offset, this_len);
-		flush_dcache_page(page);
-		kunmap_atomic(dst);
-		kunmap_atomic(src);
-	}
-	ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
-				page, fsdata);
-out:
-	return ret;
-}
-
 static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
 {
 	smp_mb();
@@ -980,74 +924,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	return ret;
 }
 
-/**
- * generic_file_splice_write - splice data from a pipe to a file
- * @pipe:	pipe info
- * @out:	file to write to
- * @ppos:	position in @out
- * @len:	number of bytes to splice
- * @flags:	splice modifier flags
- *
- * Description:
- *    Will either move or copy pages (determined by @flags options) from
- *    the given pipe inode to the given file.
- *
- */
-ssize_t
-generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
-			  loff_t *ppos, size_t len, unsigned int flags)
-{
-	struct address_space *mapping = out->f_mapping;
-	struct inode *inode = mapping->host;
-	struct splice_desc sd = {
-		.total_len = len,
-		.flags = flags,
-		.pos = *ppos,
-		.u.file = out,
-	};
-	ssize_t ret;
-
-	pipe_lock(pipe);
-
-	splice_from_pipe_begin(&sd);
-	do {
-		ret = splice_from_pipe_next(pipe, &sd);
-		if (ret <= 0)
-			break;
-
-		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
-		ret = file_remove_suid(out);
-		if (!ret) {
-			ret = file_update_time(out);
-			if (!ret)
-				ret = splice_from_pipe_feed(pipe, &sd,
-							    pipe_to_file);
-		}
-		mutex_unlock(&inode->i_mutex);
-	} while (ret > 0);
-	splice_from_pipe_end(pipe, &sd);
-
-	pipe_unlock(pipe);
-
-	if (sd.num_spliced)
-		ret = sd.num_spliced;
-
-	if (ret > 0) {
-		int err;
-
-		err = generic_write_sync(out, *ppos, ret);
-		if (err)
-			ret = err;
-		else
-			*ppos += ret;
-		balance_dirty_pages_ratelimited(mapping);
-	}
-
-	return ret;
-}
-
-EXPORT_SYMBOL(generic_file_splice_write);
-
 /**
  * iter_file_splice_write - splice data from a pipe to a file
  * @pipe:	pipe info
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8bd8ed357c7b..4e92d551518d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2432,8 +2432,6 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t default_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
-extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
-		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
-- 
cgit v1.2.3-71-gd317


From 2bd0ae464a6cf7363bbf72c8545e0aa43caa57f0 Mon Sep 17 00:00:00 2001
From: "Wang, Xiaoming" <xiaoming.wang@intel.com>
Date: Thu, 12 Jun 2014 18:47:07 -0400
Subject: ALSA: compress: Cancel the optimization of compiler and fix the size
 of struct for all platform.

Cancel the optimization of compiler for struct snd_compr_avail
which size will be 0x1c in 32bit kernel while 0x20 in 64bit
kernel under the optimizer. That will make compaction between
32bit and 64bit. So add packed to fix the size of struct
snd_compr_avail to 0x1c for all platform.

Signed-off-by: Zhang Dongxing <dongxing.zhang@intel.com>
Signed-off-by: xiaoming wang <xiaoming.wang@intel.com>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/uapi/sound/compress_offload.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h
index 5759810e1c1b..21eed488783f 100644
--- a/include/uapi/sound/compress_offload.h
+++ b/include/uapi/sound/compress_offload.h
@@ -80,7 +80,7 @@ struct snd_compr_tstamp {
 struct snd_compr_avail {
 	__u64 avail;
 	struct snd_compr_tstamp tstamp;
-};
+} __attribute__((packed));
 
 enum snd_compr_direction {
 	SND_COMPRESS_PLAYBACK = 0,
-- 
cgit v1.2.3-71-gd317


From 26204e048d2ee0c65e0539f7cc2b66f845a19a41 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 11 Jun 2014 23:59:14 +0000
Subject: ALSA: core: Use ktime_get_ts()

do_posix_clock_monotonic_gettime() is a leftover from the initial
posix timer implementation which maps to ktime_get_ts().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h | 2 +-
 sound/core/timer.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index b4d6697085fe..d854fb31c000 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -932,7 +932,7 @@ static inline void snd_pcm_gettime(struct snd_pcm_runtime *runtime,
 				   struct timespec *tv)
 {
 	if (runtime->tstamp_type == SNDRV_PCM_TSTAMP_TYPE_MONOTONIC)
-		do_posix_clock_monotonic_gettime(tv);
+		ktime_get_ts(tv);
 	else
 		getnstimeofday(tv);
 }
diff --git a/sound/core/timer.c b/sound/core/timer.c
index cfd455a8ac1a..777a45e08e53 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -390,7 +390,7 @@ static void snd_timer_notify1(struct snd_timer_instance *ti, int event)
 	struct timespec tstamp;
 
 	if (timer_tstamp_monotonic)
-		do_posix_clock_monotonic_gettime(&tstamp);
+		ktime_get_ts(&tstamp);
 	else
 		getnstimeofday(&tstamp);
 	if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_START ||
@@ -1203,7 +1203,7 @@ static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri,
 	}
 	if (tu->last_resolution != resolution || ticks > 0) {
 		if (timer_tstamp_monotonic)
-			do_posix_clock_monotonic_gettime(&tstamp);
+			ktime_get_ts(&tstamp);
 		else
 			getnstimeofday(&tstamp);
 	}
-- 
cgit v1.2.3-71-gd317


From 1a539d372edd9832444e7a3daa710c444c014dc9 Mon Sep 17 00:00:00 2001
From: Tomas Pop <Tomas.Pop@sensirion.com>
Date: Thu, 5 Jun 2014 15:24:19 -0700
Subject: hwmon: add support for Sensirion SHTC1 sensor

Add support for Sensirion SHTC1 and compatible temperature and humidity
sensors.

Signed-off-by: Tomas Pop <tomas.pop@sensirion.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/shtc1           |  43 ++++++
 drivers/hwmon/Kconfig               |  10 ++
 drivers/hwmon/Makefile              |   1 +
 drivers/hwmon/shtc1.c               | 251 ++++++++++++++++++++++++++++++++++++
 include/linux/platform_data/shtc1.h |  23 ++++
 5 files changed, 328 insertions(+)
 create mode 100644 Documentation/hwmon/shtc1
 create mode 100644 drivers/hwmon/shtc1.c
 create mode 100644 include/linux/platform_data/shtc1.h

(limited to 'include')

diff --git a/Documentation/hwmon/shtc1 b/Documentation/hwmon/shtc1
new file mode 100644
index 000000000000..6b1e05458f0f
--- /dev/null
+++ b/Documentation/hwmon/shtc1
@@ -0,0 +1,43 @@
+Kernel driver shtc1
+===================
+
+Supported chips:
+  * Sensirion SHTC1
+    Prefix: 'shtc1'
+    Addresses scanned: none
+    Datasheet: http://www.sensirion.com/file/datasheet_shtc1
+
+  * Sensirion SHTW1
+    Prefix: 'shtw1'
+    Addresses scanned: none
+    Datasheet: Not publicly available
+
+Author:
+  Johannes Winkelmann <johannes.winkelmann@sensirion.com>
+
+Description
+-----------
+
+This driver implements support for the Sensirion SHTC1 chip, a humidity and
+temperature sensor. Temperature is measured in degrees celsius, relative
+humidity is expressed as a percentage. Driver can be used as well for SHTW1
+chip, which has the same electrical interface.
+
+The device communicates with the I2C protocol. All sensors are set to I2C
+address 0x70. See Documentation/i2c/instantiating-devices for methods to
+instantiate the device.
+
+There are two options configurable by means of shtc1_platform_data:
+1. blocking (pull the I2C clock line down while performing the measurement) or
+   non-blocking mode. Blocking mode will guarantee the fastest result but
+   the I2C bus will be busy during that time. By default, non-blocking mode
+   is used. Make sure clock-stretching works properly on your device if you
+   want to use blocking mode.
+2. high or low accuracy. High accuracy is used by default and using it is
+   strongly recommended.
+
+sysfs-Interface
+---------------
+
+temp1_input - temperature input
+humidity1_input - humidity input
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 00343166feb1..08531a128f53 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1124,6 +1124,16 @@ config SENSORS_SHT21
 	  This driver can also be built as a module.  If so, the module
 	  will be called sht21.
 
+config SENSORS_SHTC1
+	tristate "Sensiron humidity and temperature sensors. SHTC1 and compat."
+	depends on I2C
+	help
+	  If you say yes here you get support for the Sensiron SHTC1 and SHTW1
+	  humidity and temperature sensors.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called shtc1.
+
 config SENSORS_S3C
 	tristate "Samsung built-in ADC"
 	depends on S3C_ADC
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 11798ad7e801..3dc0f02f71d2 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -126,6 +126,7 @@ obj-$(CONFIG_SENSORS_SCH5627)	+= sch5627.o
 obj-$(CONFIG_SENSORS_SCH5636)	+= sch5636.o
 obj-$(CONFIG_SENSORS_SHT15)	+= sht15.o
 obj-$(CONFIG_SENSORS_SHT21)	+= sht21.o
+obj-$(CONFIG_SENSORS_SHTC1)	+= shtc1.o
 obj-$(CONFIG_SENSORS_SIS5595)	+= sis5595.o
 obj-$(CONFIG_SENSORS_SMM665)	+= smm665.o
 obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o
diff --git a/drivers/hwmon/shtc1.c b/drivers/hwmon/shtc1.c
new file mode 100644
index 000000000000..decd7df995ab
--- /dev/null
+++ b/drivers/hwmon/shtc1.c
@@ -0,0 +1,251 @@
+/* Sensirion SHTC1 humidity and temperature sensor driver
+ *
+ * Copyright (C) 2014 Sensirion AG, Switzerland
+ * Author: Johannes Winkelmann <johannes.winkelmann@sensirion.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/platform_data/shtc1.h>
+
+/* commands (high precision mode) */
+static const unsigned char shtc1_cmd_measure_blocking_hpm[]    = { 0x7C, 0xA2 };
+static const unsigned char shtc1_cmd_measure_nonblocking_hpm[] = { 0x78, 0x66 };
+
+/* commands (low precision mode) */
+static const unsigned char shtc1_cmd_measure_blocking_lpm[]    = { 0x64, 0x58 };
+static const unsigned char shtc1_cmd_measure_nonblocking_lpm[] = { 0x60, 0x9c };
+
+/* command for reading the ID register */
+static const unsigned char shtc1_cmd_read_id_reg[]	       = { 0xef, 0xc8 };
+
+/* constants for reading the ID register */
+#define SHTC1_ID	  0x07
+#define SHTC1_ID_REG_MASK 0x1f
+
+/* delays for non-blocking i2c commands, both in us */
+#define SHTC1_NONBLOCKING_WAIT_TIME_HPM  14400
+#define SHTC1_NONBLOCKING_WAIT_TIME_LPM   1000
+
+#define SHTC1_CMD_LENGTH      2
+#define SHTC1_RESPONSE_LENGTH 6
+
+struct shtc1_data {
+	struct i2c_client *client;
+	struct mutex update_lock;
+	bool valid;
+	unsigned long last_updated; /* in jiffies */
+
+	const unsigned char *command;
+	unsigned int nonblocking_wait_time; /* in us */
+
+	struct shtc1_platform_data setup;
+
+	int temperature; /* 1000 * temperature in dgr C */
+	int humidity; /* 1000 * relative humidity in %RH */
+};
+
+static int shtc1_update_values(struct i2c_client *client,
+			       struct shtc1_data *data,
+			       char *buf, int bufsize)
+{
+	int ret = i2c_master_send(client, data->command, SHTC1_CMD_LENGTH);
+	if (ret != SHTC1_CMD_LENGTH) {
+		dev_err(&client->dev, "failed to send command: %d\n", ret);
+		return ret < 0 ? ret : -EIO;
+	}
+
+	/*
+	 * In blocking mode (clock stretching mode) the I2C bus
+	 * is blocked for other traffic, thus the call to i2c_master_recv()
+	 * will wait until the data is ready. For non blocking mode, we
+	 * have to wait ourselves.
+	 */
+	if (!data->setup.blocking_io)
+		usleep_range(data->nonblocking_wait_time,
+			     data->nonblocking_wait_time + 1000);
+
+	ret = i2c_master_recv(client, buf, bufsize);
+	if (ret != bufsize) {
+		dev_err(&client->dev, "failed to read values: %d\n", ret);
+		return ret < 0 ? ret : -EIO;
+	}
+
+	return 0;
+}
+
+/* sysfs attributes */
+static struct shtc1_data *shtc1_update_client(struct device *dev)
+{
+	struct shtc1_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+	unsigned char buf[SHTC1_RESPONSE_LENGTH];
+	int val;
+	int ret = 0;
+
+	mutex_lock(&data->update_lock);
+
+	if (time_after(jiffies, data->last_updated + HZ / 10) || !data->valid) {
+		ret = shtc1_update_values(client, data, buf, sizeof(buf));
+		if (ret)
+			goto out;
+
+		/*
+		 * From datasheet:
+		 * T = -45 + 175 * ST / 2^16
+		 * RH = 100 * SRH / 2^16
+		 *
+		 * Adapted for integer fixed point (3 digit) arithmetic.
+		 */
+		val = be16_to_cpup((__be16 *)buf);
+		data->temperature = ((21875 * val) >> 13) - 45000;
+		val = be16_to_cpup((__be16 *)(buf + 3));
+		data->humidity = ((12500 * val) >> 13);
+
+		data->last_updated = jiffies;
+		data->valid = true;
+	}
+
+out:
+	mutex_unlock(&data->update_lock);
+
+	return ret == 0 ? data : ERR_PTR(ret);
+}
+
+static ssize_t temp1_input_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct shtc1_data *data = shtc1_update_client(dev);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	return sprintf(buf, "%d\n", data->temperature);
+}
+
+static ssize_t humidity1_input_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct shtc1_data *data = shtc1_update_client(dev);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	return sprintf(buf, "%d\n", data->humidity);
+}
+
+static DEVICE_ATTR_RO(temp1_input);
+static DEVICE_ATTR_RO(humidity1_input);
+
+static struct attribute *shtc1_attrs[] = {
+	&dev_attr_temp1_input.attr,
+	&dev_attr_humidity1_input.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(shtc1);
+
+static void shtc1_select_command(struct shtc1_data *data)
+{
+	if (data->setup.high_precision) {
+		data->command = data->setup.blocking_io ?
+				shtc1_cmd_measure_blocking_hpm :
+				shtc1_cmd_measure_nonblocking_hpm;
+		data->nonblocking_wait_time = SHTC1_NONBLOCKING_WAIT_TIME_HPM;
+
+	} else {
+		data->command = data->setup.blocking_io ?
+				shtc1_cmd_measure_blocking_lpm :
+				shtc1_cmd_measure_nonblocking_lpm;
+		data->nonblocking_wait_time = SHTC1_NONBLOCKING_WAIT_TIME_LPM;
+	}
+}
+
+static int shtc1_probe(struct i2c_client *client,
+		       const struct i2c_device_id *id)
+{
+	int ret;
+	char id_reg[2];
+	struct shtc1_data *data;
+	struct device *hwmon_dev;
+	struct i2c_adapter *adap = client->adapter;
+	struct device *dev = &client->dev;
+
+	if (!i2c_check_functionality(adap, I2C_FUNC_I2C)) {
+		dev_err(dev, "plain i2c transactions not supported\n");
+		return -ENODEV;
+	}
+
+	ret = i2c_master_send(client, shtc1_cmd_read_id_reg, SHTC1_CMD_LENGTH);
+	if (ret != SHTC1_CMD_LENGTH) {
+		dev_err(dev, "could not send read_id_reg command: %d\n", ret);
+		return ret < 0 ? ret : -ENODEV;
+	}
+	ret = i2c_master_recv(client, id_reg, sizeof(id_reg));
+	if (ret != sizeof(id_reg)) {
+		dev_err(dev, "could not read ID register: %d\n", ret);
+		return -ENODEV;
+	}
+	if ((id_reg[1] & SHTC1_ID_REG_MASK) != SHTC1_ID) {
+		dev_err(dev, "ID register doesn't match\n");
+		return -ENODEV;
+	}
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->setup.blocking_io = false;
+	data->setup.high_precision = true;
+	data->client = client;
+
+	if (client->dev.platform_data)
+		data->setup = *(struct shtc1_platform_data *)dev->platform_data;
+	shtc1_select_command(data);
+	mutex_init(&data->update_lock);
+
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev,
+							   client->name,
+							   data,
+							   shtc1_groups);
+	if (IS_ERR(hwmon_dev))
+		dev_dbg(dev, "unable to register hwmon device\n");
+
+	return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+/* device ID table */
+static const struct i2c_device_id shtc1_id[] = {
+	{ "shtc1", 0 },
+	{ "shtw1", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, shtc1_id);
+
+static struct i2c_driver shtc1_i2c_driver = {
+	.driver.name  = "shtc1",
+	.probe        = shtc1_probe,
+	.id_table     = shtc1_id,
+};
+
+module_i2c_driver(shtc1_i2c_driver);
+
+MODULE_AUTHOR("Johannes Winkelmann <johannes.winkelmann@sensirion.com>");
+MODULE_DESCRIPTION("Sensirion SHTC1 humidity and temperature sensor driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/shtc1.h b/include/linux/platform_data/shtc1.h
new file mode 100644
index 000000000000..7b8c353f7dc8
--- /dev/null
+++ b/include/linux/platform_data/shtc1.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2014 Sensirion AG, Switzerland
+ * Author: Johannes Winkelmann <johannes.winkelmann@sensirion.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __SHTC1_H_
+#define __SHTC1_H_
+
+struct shtc1_platform_data {
+	bool blocking_io;
+	bool high_precision;
+};
+#endif /* __SHTC1_H_ */
-- 
cgit v1.2.3-71-gd317


From f3db22feb5de6b98b7bae924c2d4b6c8d65bedae Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 11 Jun 2014 11:51:35 -0600
Subject: NVMe: Fix hot cpu notification dead lock

There is a potential dead lock if a cpu event occurs during nvme probe
since it registered with hot cpu notification. This fixes the race by
having the module register with notification outside of probe rather
than have each device register.

The actual work is done in a scheduled work queue instead of in the
notifier since assigning IO queues has the potential to block if the
driver creates additional queues.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 35 +++++++++++++++++++++++++----------
 include/linux/nvme.h      |  2 +-
 2 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 2af079e571fc..e0ac1210fe31 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -73,6 +73,7 @@ static LIST_HEAD(dev_list);
 static struct task_struct *nvme_thread;
 static struct workqueue_struct *nvme_workq;
 static wait_queue_head_t nvme_kthread_wait;
+static struct notifier_block nvme_nb;
 
 static void nvme_reset_failed_dev(struct work_struct *ws);
 
@@ -2115,14 +2116,25 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
 	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
 }
 
+static void nvme_cpu_workfn(struct work_struct *work)
+{
+	struct nvme_dev *dev = container_of(work, struct nvme_dev, cpu_work);
+	if (dev->initialized)
+		nvme_assign_io_queues(dev);
+}
+
 static int nvme_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *hcpu)
 {
-	struct nvme_dev *dev = container_of(self, struct nvme_dev, nb);
+	struct nvme_dev *dev;
+
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_DEAD:
-		nvme_assign_io_queues(dev);
+		spin_lock(&dev_list_lock);
+		list_for_each_entry(dev, &dev_list, node)
+			schedule_work(&dev->cpu_work);
+		spin_unlock(&dev_list_lock);
 		break;
 	}
 	return NOTIFY_OK;
@@ -2191,11 +2203,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	nvme_free_queues(dev, nr_io_queues + 1);
 	nvme_assign_io_queues(dev);
 
-	dev->nb.notifier_call = &nvme_cpu_notify;
-	result = register_hotcpu_notifier(&dev->nb);
-	if (result)
-		goto free_queues;
-
 	return 0;
 
  free_queues:
@@ -2495,8 +2502,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
 	int i;
 
 	dev->initialized = 0;
-	unregister_hotcpu_notifier(&dev->nb);
-
 	nvme_dev_list_remove(dev);
 
 	if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) {
@@ -2767,6 +2772,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	INIT_LIST_HEAD(&dev->namespaces);
 	dev->reset_workfn = nvme_reset_failed_dev;
 	INIT_WORK(&dev->reset_work, nvme_reset_workfn);
+	INIT_WORK(&dev->cpu_work, nvme_cpu_workfn);
 	dev->pci_dev = pdev;
 	pci_set_drvdata(pdev, dev);
 	result = nvme_set_instance(dev);
@@ -2836,6 +2842,7 @@ static void nvme_remove(struct pci_dev *pdev)
 
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->reset_work);
+	flush_work(&dev->cpu_work);
 	misc_deregister(&dev->miscdev);
 	nvme_dev_remove(dev);
 	nvme_dev_shutdown(dev);
@@ -2923,11 +2930,18 @@ static int __init nvme_init(void)
 	else if (result > 0)
 		nvme_major = result;
 
-	result = pci_register_driver(&nvme_driver);
+	nvme_nb.notifier_call = &nvme_cpu_notify;
+	result = register_hotcpu_notifier(&nvme_nb);
 	if (result)
 		goto unregister_blkdev;
+
+	result = pci_register_driver(&nvme_driver);
+	if (result)
+		goto unregister_hotcpu;
 	return 0;
 
+ unregister_hotcpu:
+	unregister_hotcpu_notifier(&nvme_nb);
  unregister_blkdev:
 	unregister_blkdev(nvme_major, "nvme");
  kill_workq:
@@ -2938,6 +2952,7 @@ static int __init nvme_init(void)
 static void __exit nvme_exit(void)
 {
 	pci_unregister_driver(&nvme_driver);
+	unregister_hotcpu_notifier(&nvme_nb);
 	unregister_blkdev(nvme_major, "nvme");
 	destroy_workqueue(nvme_workq);
 	BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 8541dd920bb7..2bf403195c09 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -90,7 +90,7 @@ struct nvme_dev {
 	struct miscdevice miscdev;
 	work_func_t reset_workfn;
 	struct work_struct reset_work;
-	struct notifier_block nb;
+	struct work_struct cpu_work;
 	char name[12];
 	char serial[20];
 	char model[40];
-- 
cgit v1.2.3-71-gd317


From 3d69bb6e4699251102dc145b7800dd012ddec375 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Fri, 13 Jun 2014 10:53:49 -0400
Subject: NVMe: Define Log Page constants

Taken from the 1.1a version of the spec

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 include/uapi/linux/nvme.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index a6fb2a360577..29a7d8619d8d 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -306,6 +306,10 @@ enum {
 	NVME_FEAT_WRITE_ATOMIC	= 0x0a,
 	NVME_FEAT_ASYNC_EVENT	= 0x0b,
 	NVME_FEAT_SW_PROGRESS	= 0x0c,
+	NVME_LOG_ERROR		= 0x01,
+	NVME_LOG_SMART		= 0x02,
+	NVME_LOG_FW_SLOT	= 0x03,
+	NVME_LOG_RESERVATION	= 0x80,
 	NVME_FWACT_REPL		= (0 << 3),
 	NVME_FWACT_REPL_ACTV	= (1 << 3),
 	NVME_FWACT_ACTV		= (2 << 3),
-- 
cgit v1.2.3-71-gd317


From cc68a8a5a4330a4bb72922d0c7a7044ae13ee692 Mon Sep 17 00:00:00 2001
From: Gerhard Heift <gerhard@heift.name>
Date: Thu, 30 Jan 2014 16:24:03 +0100
Subject: btrfs: new ioctl TREE_SEARCH_V2

This new ioctl call allows the user to supply a buffer of varying size in which
a tree search can store its results. This is much more flexible if you want to
receive items which are larger than the current fixed buffer of 3992 bytes or
if you want to fetch more items at once. Items larger than this buffer are for
example some of the type EXTENT_CSUM.

Signed-off-by: Gerhard Heift <Gerhard@Heift.Name>
Signed-off-by: Chris Mason <clm@fb.com>
Acked-by: David Sterba <dsterba@suse.cz>
---
 fs/btrfs/ioctl.c           | 41 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/btrfs.h | 10 ++++++++++
 2 files changed, 51 insertions(+)

(limited to 'include')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 393a543a519e..6ea15469c63f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2172,6 +2172,45 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
 	return ret;
 }
 
+static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
+					       void __user *argp)
+{
+	struct btrfs_ioctl_search_args_v2 __user *uarg;
+	struct btrfs_ioctl_search_args_v2 args;
+	struct inode *inode;
+	int ret;
+	size_t buf_size;
+	const size_t buf_limit = 16 * 1024 * 1024;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* copy search header and buffer size */
+	uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp;
+	if (copy_from_user(&args, uarg, sizeof(args)))
+		return -EFAULT;
+
+	buf_size = args.buf_size;
+
+	if (buf_size < sizeof(struct btrfs_ioctl_search_header))
+		return -EOVERFLOW;
+
+	/* limit result size to 16MB */
+	if (buf_size > buf_limit)
+		buf_size = buf_limit;
+
+	inode = file_inode(file);
+	ret = search_ioctl(inode, &args.key, &buf_size,
+			   (char *)(&uarg->buf[0]));
+	if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
+		ret = -EFAULT;
+	else if (ret == -EOVERFLOW &&
+		copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size)))
+		ret = -EFAULT;
+
+	return ret;
+}
+
 /*
  * Search INODE_REFs to identify path name of 'dirid' directory
  * in a 'tree_id' tree. and sets path name to 'name'.
@@ -5252,6 +5291,8 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_trans_end(file);
 	case BTRFS_IOC_TREE_SEARCH:
 		return btrfs_ioctl_tree_search(file, argp);
+	case BTRFS_IOC_TREE_SEARCH_V2:
+		return btrfs_ioctl_tree_search_v2(file, argp);
 	case BTRFS_IOC_INO_LOOKUP:
 		return btrfs_ioctl_ino_lookup(file, argp);
 	case BTRFS_IOC_INO_PATHS:
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 7554fd381a56..6f9c38ce45c7 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -306,6 +306,14 @@ struct btrfs_ioctl_search_args {
 	char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
 };
 
+struct btrfs_ioctl_search_args_v2 {
+	struct btrfs_ioctl_search_key key; /* in/out - search parameters */
+	__u64 buf_size;		   /* in - size of buffer
+					    * out - on EOVERFLOW: needed size
+					    *       to store item */
+	__u64 buf[0];                       /* out - found items */
+};
+
 struct btrfs_ioctl_clone_range_args {
   __s64 src_fd;
   __u64 src_offset, src_length;
@@ -558,6 +566,8 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
 				struct btrfs_ioctl_defrag_range_args)
 #define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \
 				   struct btrfs_ioctl_search_args)
+#define BTRFS_IOC_TREE_SEARCH_V2 _IOWR(BTRFS_IOCTL_MAGIC, 17, \
+					   struct btrfs_ioctl_search_args_v2)
 #define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
 				   struct btrfs_ioctl_ino_lookup_args)
 #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64)
-- 
cgit v1.2.3-71-gd317


From 4b28252cada3d0521ab59751f4240ecdfb9bba18 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sat, 14 Jun 2014 23:23:52 -0700
Subject: net: Fix GSO constants to match NETIF flags

Joseph Gasparakis reported that VXLAN GSO offload stopped working with
i40e device after recent UDP changes. The problem is that the
SKB_GSO_* bits are out of sync with the corresponding NETIF flags. This
patch fixes that. Also, we add BUILD_BUG_ONs in net_gso_ok for several
GSO constants that were missing to avoid the problem in the future.

Reported-by: Joseph Gasparakis <joseph.gasparakis@intel.com>
Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h |  1 +
 include/linux/netdevice.h       |  7 +++++++
 include/linux/skbuff.h          | 11 ++++++-----
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index e5a589435e2b..d99800cbdcf3 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -117,6 +117,7 @@ enum {
 #define NETIF_F_GSO_IPIP	__NETIF_F(GSO_IPIP)
 #define NETIF_F_GSO_SIT		__NETIF_F(GSO_SIT)
 #define NETIF_F_GSO_UDP_TUNNEL	__NETIF_F(GSO_UDP_TUNNEL)
+#define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM)
 #define NETIF_F_GSO_MPLS	__NETIF_F(GSO_MPLS)
 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
 #define NETIF_F_HW_VLAN_STAG_RX	__NETIF_F(HW_VLAN_STAG_RX)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index abe3de1db932..66f9a04ec270 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3305,6 +3305,13 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 	BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TCPV6   != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_FCOE    != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_GRE     != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_GRE_CSUM != (NETIF_F_GSO_GRE_CSUM >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_IPIP    != (NETIF_F_GSO_IPIP >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_SIT     != (NETIF_F_GSO_SIT >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_MPLS    != (NETIF_F_GSO_MPLS >> NETIF_F_GSO_SHIFT));
 
 	return (features & feature) == feature;
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5b5cd3189c98..e13ed90be7c2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -338,17 +338,18 @@ enum {
 
 	SKB_GSO_GRE = 1 << 6,
 
-	SKB_GSO_IPIP = 1 << 7,
+	SKB_GSO_GRE_CSUM = 1 << 7,
 
-	SKB_GSO_SIT = 1 << 8,
+	SKB_GSO_IPIP = 1 << 8,
 
-	SKB_GSO_UDP_TUNNEL = 1 << 9,
+	SKB_GSO_SIT = 1 << 9,
 
-	SKB_GSO_MPLS = 1 << 10,
+	SKB_GSO_UDP_TUNNEL = 1 << 10,
 
 	SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
 
-	SKB_GSO_GRE_CSUM = 1 << 12,
+	SKB_GSO_MPLS = 1 << 12,
+
 };
 
 #if BITS_PER_LONG > 32
-- 
cgit v1.2.3-71-gd317


From bbdff225ede6527f91184b2a7903df8aad803ace Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sat, 14 Jun 2014 23:24:20 -0700
Subject: udp: call __skb_checksum_complete when doing full checksum

In __udp_lib_checksum_complete check if checksum is being done over all
the data (len is equal to skb->len) and if it is call
__skb_checksum_complete instead of __skb_checksum_complete_head. This
allows checksum to be saved in checksum complete.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index 2ecfc6e15609..68a1fefe3dfe 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -111,7 +111,9 @@ struct sk_buff;
  */
 static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb)
 {
-	return __skb_checksum_complete_head(skb, UDP_SKB_CB(skb)->cscov);
+	return (UDP_SKB_CB(skb)->cscov == skb->len ?
+		__skb_checksum_complete(skb) :
+		__skb_checksum_complete_head(skb, UDP_SKB_CB(skb)->cscov));
 }
 
 static inline int udp_lib_checksum_complete(struct sk_buff *skb)
-- 
cgit v1.2.3-71-gd317


From e5eb4e30a51236079fb22bb9f75fcd31915b03c6 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sat, 14 Jun 2014 23:24:28 -0700
Subject: net: add skb_pop_rcv_encapsulation

This function is used by UDP encapsulation protocols in RX when
crossing encapsulation boundary. If ip_summed is set to
CHECKSUM_UNNECESSARY and encapsulation is not set, change to
CHECKSUM_NONE since the checksum has not been validated within the
encapsulation. Clears csum_valid by the same rationale.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e13ed90be7c2..ec89301ada41 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1854,6 +1854,18 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 	return pskb_may_pull(skb, skb_network_offset(skb) + len);
 }
 
+static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb)
+{
+	/* Only continue with checksum unnecessary if device indicated
+	 * it is valid across encapsulation (skb->encapsulation was set).
+	 */
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation)
+		skb->ip_summed = CHECKSUM_NONE;
+
+	skb->encapsulation = 0;
+	skb->csum_valid = 0;
+}
+
 /*
  * CPUs often take a performance hit when accessing unaligned memory
  * locations. The actual performance hit varies, it can be small if the
-- 
cgit v1.2.3-71-gd317


From 74b0c2d75fb4cc89173944e6d8f9eb47aca0c343 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 13 Jun 2014 15:14:34 +0200
Subject: drm/i915, HD-audio: Don't continue probing when nomodeset is given

When a machine is booted with nomodeset option, i915 driver skips the
whole initialization.  Meanwhile, HD-audio tries to bind wth i915 just
by request_symbol() without knowing that the initialization was
skipped, and eventually it hits WARN_ON() in i915_request_power_well()
and i915_release_power_well() wrongly but still continues probing,
even though it doesn't work at all.

In this patch, both functions are changed to return an error in case
of uninitialized state instead of WARN_ON(), so that HD-audio driver
can give up HDMI controller initialization at the right time.

Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: <stable@vger.kernel.org> [3.15]
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/gpu/drm/i915/intel_pm.c | 14 ++++++++------
 include/drm/i915_powerwell.h    |  4 ++--
 sound/pci/hda/hda_i915.c        | 12 ++++++------
 sound/pci/hda/hda_i915.h        |  4 ++--
 sound/pci/hda/hda_intel.c       |  7 ++++++-
 5 files changed, 24 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d1e53abec1b5..6463f0201cf2 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6024,30 +6024,32 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 static struct i915_power_domains *hsw_pwr;
 
 /* Display audio driver power well request */
-void i915_request_power_well(void)
+int i915_request_power_well(void)
 {
 	struct drm_i915_private *dev_priv;
 
-	if (WARN_ON(!hsw_pwr))
-		return;
+	if (!hsw_pwr)
+		return -ENODEV;
 
 	dev_priv = container_of(hsw_pwr, struct drm_i915_private,
 				power_domains);
 	intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(i915_request_power_well);
 
 /* Display audio driver power well release */
-void i915_release_power_well(void)
+int i915_release_power_well(void)
 {
 	struct drm_i915_private *dev_priv;
 
-	if (WARN_ON(!hsw_pwr))
-		return;
+	if (!hsw_pwr)
+		return -ENODEV;
 
 	dev_priv = container_of(hsw_pwr, struct drm_i915_private,
 				power_domains);
 	intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(i915_release_power_well);
 
diff --git a/include/drm/i915_powerwell.h b/include/drm/i915_powerwell.h
index cfdc884405b7..2baba9996094 100644
--- a/include/drm/i915_powerwell.h
+++ b/include/drm/i915_powerwell.h
@@ -30,7 +30,7 @@
 #define _I915_POWERWELL_H_
 
 /* For use by hda_i915 driver */
-extern void i915_request_power_well(void);
-extern void i915_release_power_well(void);
+extern int i915_request_power_well(void);
+extern int i915_release_power_well(void);
 
 #endif				/* _I915_POWERWELL_H_ */
diff --git a/sound/pci/hda/hda_i915.c b/sound/pci/hda/hda_i915.c
index 9d07e4edacdb..e9e8a4a4a9a1 100644
--- a/sound/pci/hda/hda_i915.c
+++ b/sound/pci/hda/hda_i915.c
@@ -22,20 +22,20 @@
 #include <drm/i915_powerwell.h>
 #include "hda_i915.h"
 
-static void (*get_power)(void);
-static void (*put_power)(void);
+static int (*get_power)(void);
+static int (*put_power)(void);
 
-void hda_display_power(bool enable)
+int hda_display_power(bool enable)
 {
 	if (!get_power || !put_power)
-		return;
+		return -ENODEV;
 
 	pr_debug("HDA display power %s \n",
 			enable ? "Enable" : "Disable");
 	if (enable)
-		get_power();
+		return get_power();
 	else
-		put_power();
+		return put_power();
 }
 
 int hda_i915_init(void)
diff --git a/sound/pci/hda/hda_i915.h b/sound/pci/hda/hda_i915.h
index 5a63da2c53e5..bfd835f8f1aa 100644
--- a/sound/pci/hda/hda_i915.h
+++ b/sound/pci/hda/hda_i915.h
@@ -17,11 +17,11 @@
 #define __SOUND_HDA_I915_H
 
 #ifdef CONFIG_SND_HDA_I915
-void hda_display_power(bool enable);
+int hda_display_power(bool enable);
 int hda_i915_init(void);
 int hda_i915_exit(void);
 #else
-static inline void hda_display_power(bool enable) {}
+static inline int hda_display_power(bool enable) { return 0; }
 static inline int hda_i915_init(void)
 {
 	return -ENODEV;
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index bb65a124e006..23fd6b9aecca 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1656,8 +1656,13 @@ static int azx_probe_continue(struct azx *chip)
 				"Error request power-well from i915\n");
 			goto out_free;
 		}
+		err = hda_display_power(true);
+		if (err < 0) {
+			dev_err(chip->card->dev,
+				"Cannot turn on display power on i915\n");
+			goto out_free;
+		}
 #endif
-		hda_display_power(true);
 	}
 
 	err = azx_first_init(chip);
-- 
cgit v1.2.3-71-gd317


From a0a7379e16b6e4c229d082f24c7e3ef9e812ed46 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 10 Jun 2014 10:53:01 +0200
Subject: netfilter: nf_tables: use u32 for chain use counter

Since 4fefee5 ("netfilter: nf_tables: allow to delete several objects
from a batch"), every new rule bumps the chain use counter. However,
this is limited to 16 bits, which means that it will overrun after
2^16 rules.

Use a u32 chain counter and check for overflows (just like we do for
table objects).

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 6 +++---
 net/netfilter/nf_tables_api.c     | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 7ee6ce6564ae..713b0b88bd5a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -503,9 +503,9 @@ enum nft_chain_flags {
  *	@net: net namespace that this chain belongs to
  *	@table: table that this chain belongs to
  *	@handle: chain handle
- *	@flags: bitmask of enum nft_chain_flags
  *	@use: number of jump references to this chain
  *	@level: length of longest path to this chain
+ *	@flags: bitmask of enum nft_chain_flags
  *	@name: name of the chain
  */
 struct nft_chain {
@@ -514,9 +514,9 @@ struct nft_chain {
 	struct net			*net;
 	struct nft_table		*table;
 	u64				handle;
-	u8				flags;
-	u16				use;
+	u32				use;
 	u16				level;
+	u8				flags;
 	char				name[NFT_CHAIN_MAXNAMELEN];
 };
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ba37c10e5139..5586426a6169 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1730,6 +1730,9 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 		if (!create || nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EINVAL;
 		handle = nf_tables_alloc_handle(table);
+
+		if (chain->use == UINT_MAX)
+			return -EOVERFLOW;
 	}
 
 	if (nla[NFTA_RULE_POSITION]) {
-- 
cgit v1.2.3-71-gd317


From a6e15a39048ec3229b9a53425f4384f55f6cc1b3 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 13 Jun 2014 13:30:35 -0700
Subject: PM / hibernate: introduce "nohibernate" boot parameter

To support using kernel features that are not compatible with hibernation,
this creates the "nohibernate" kernel boot parameter to disable both
hibernation and resume. This allows hibernation support to be a boot-time
choice instead of only a compile-time choice.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/kernel-parameters.txt |  3 +++
 include/linux/suspend.h             |  2 ++
 kernel/power/hibernate.c            | 31 ++++++++++++++++++++++++++++++-
 kernel/power/main.c                 |  6 ++----
 kernel/power/user.c                 |  3 +++
 5 files changed, 40 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6eaa9cdb7094..f8f0466b8b1d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2184,6 +2184,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			in certain environments such as networked servers or
 			real-time systems.
 
+	nohibernate	[HIBERNATION] Disable hibernation and resume.
+
 	nohz=		[KNL] Boottime enable/disable dynamic ticks
 			Valid arguments: on, off
 			Default: on
@@ -2980,6 +2982,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 		noresume	Don't check if there's a hibernation image
 				present during boot.
 		nocompress	Don't compress/decompress hibernation images.
+		no		Disable hibernation and resume.
 
 	retain_initrd	[RAM] Keep initrd memory after extraction
 
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index f76994b9396c..519064e0c943 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -327,6 +327,7 @@ extern unsigned long get_safe_page(gfp_t gfp_mask);
 extern void hibernation_set_ops(const struct platform_hibernation_ops *ops);
 extern int hibernate(void);
 extern bool system_entering_hibernation(void);
+extern bool hibernation_available(void);
 asmlinkage int swsusp_save(void);
 extern struct pbe *restore_pblist;
 #else /* CONFIG_HIBERNATION */
@@ -339,6 +340,7 @@ static inline void swsusp_unset_page_free(struct page *p) {}
 static inline void hibernation_set_ops(const struct platform_hibernation_ops *ops) {}
 static inline int hibernate(void) { return -ENOSYS; }
 static inline bool system_entering_hibernation(void) { return false; }
+static inline bool hibernation_available(void) { return false; }
 #endif /* CONFIG_HIBERNATION */
 
 /* Hibernation and suspend events */
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 49e0a20fd010..258f492f0347 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -35,6 +35,7 @@
 
 static int nocompress;
 static int noresume;
+static int nohibernate;
 static int resume_wait;
 static unsigned int resume_delay;
 static char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -62,6 +63,11 @@ bool freezer_test_done;
 
 static const struct platform_hibernation_ops *hibernation_ops;
 
+bool hibernation_available(void)
+{
+	return (nohibernate == 0);
+}
+
 /**
  * hibernation_set_ops - Set the global hibernate operations.
  * @ops: Hibernation operations to use in subsequent hibernation transitions.
@@ -642,6 +648,11 @@ int hibernate(void)
 {
 	int error;
 
+	if (!hibernation_available()) {
+		pr_debug("PM: Hibernation not available.\n");
+		return -EPERM;
+	}
+
 	lock_system_sleep();
 	/* The snapshot device should not be opened while we're running */
 	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
@@ -734,7 +745,7 @@ static int software_resume(void)
 	/*
 	 * If the user said "noresume".. bail out early.
 	 */
-	if (noresume)
+	if (noresume || !hibernation_available())
 		return 0;
 
 	/*
@@ -900,6 +911,9 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
 	int i;
 	char *start = buf;
 
+	if (!hibernation_available())
+		return sprintf(buf, "[disabled]\n");
+
 	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
 		if (!hibernation_modes[i])
 			continue;
@@ -934,6 +948,9 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
 	char *p;
 	int mode = HIBERNATION_INVALID;
 
+	if (!hibernation_available())
+		return -EPERM;
+
 	p = memchr(buf, '\n', n);
 	len = p ? p - buf : n;
 
@@ -1101,6 +1118,10 @@ static int __init hibernate_setup(char *str)
 		noresume = 1;
 	else if (!strncmp(str, "nocompress", 10))
 		nocompress = 1;
+	else if (!strncmp(str, "no", 2)) {
+		noresume = 1;
+		nohibernate = 1;
+	}
 	return 1;
 }
 
@@ -1125,9 +1146,17 @@ static int __init resumedelay_setup(char *str)
 	return 1;
 }
 
+static int __init nohibernate_setup(char *str)
+{
+	noresume = 1;
+	nohibernate = 1;
+	return 1;
+}
+
 __setup("noresume", noresume_setup);
 __setup("resume_offset=", resume_offset_setup);
 __setup("resume=", resume_setup);
 __setup("hibernate=", hibernate_setup);
 __setup("resumewait", resumewait_setup);
 __setup("resumedelay=", resumedelay_setup);
+__setup("nohibernate", nohibernate_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 573410d6647e..8e90f330f139 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -300,13 +300,11 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
 			s += sprintf(s,"%s ", pm_states[i].label);
 
 #endif
-#ifdef CONFIG_HIBERNATION
-	s += sprintf(s, "%s\n", "disk");
-#else
+	if (hibernation_available())
+		s += sprintf(s, "disk ");
 	if (s != buf)
 		/* convert the last space to a newline */
 		*(s-1) = '\n';
-#endif
 	return (s - buf);
 }
 
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 98d357584cd6..526e8911460a 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -49,6 +49,9 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 	struct snapshot_data *data;
 	int error;
 
+	if (!hibernation_available())
+		return -EPERM;
+
 	lock_system_sleep();
 
 	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
-- 
cgit v1.2.3-71-gd317


From 92c4d2ad3c85ed6ea2b6260a11df0013706b1462 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Sun, 1 Jun 2014 19:16:17 +0200
Subject: ACPI / processor replace __attribute__((packed)) by __packed

This patch fixes checkpatch warnings:

"WARNING: __packed is preferred over __attribute__((packed))"

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/processor.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 6eb1d3cb5104..9b9b6f29bbf3 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -53,7 +53,7 @@ struct acpi_power_register {
 	u8 bit_offset;
 	u8 access_size;
 	u64 address;
-} __attribute__ ((packed));
+} __packed;
 
 struct acpi_processor_cx {
 	u8 valid;
@@ -83,7 +83,7 @@ struct acpi_psd_package {
 	u64 domain;
 	u64 coord_type;
 	u64 num_processors;
-} __attribute__ ((packed));
+} __packed;
 
 struct acpi_pct_register {
 	u8 descriptor;
@@ -93,7 +93,7 @@ struct acpi_pct_register {
 	u8 bit_offset;
 	u8 reserved;
 	u64 address;
-} __attribute__ ((packed));
+} __packed;
 
 struct acpi_processor_px {
 	u64 core_frequency;	/* megahertz */
@@ -124,7 +124,7 @@ struct acpi_tsd_package {
 	u64 domain;
 	u64 coord_type;
 	u64 num_processors;
-} __attribute__ ((packed));
+} __packed;
 
 struct acpi_ptc_register {
 	u8 descriptor;
@@ -134,7 +134,7 @@ struct acpi_ptc_register {
 	u8 bit_offset;
 	u8 reserved;
 	u64 address;
-} __attribute__ ((packed));
+} __packed;
 
 struct acpi_processor_tx_tss {
 	u64 freqpercentage;	/* */
-- 
cgit v1.2.3-71-gd317


From 4ca2ad55553ef528c055761a9fa4d2c140f7318b Mon Sep 17 00:00:00 2001
From: Fugang Duan <b38611@freescale.com>
Date: Mon, 19 May 2014 15:46:41 +0800
Subject: ARM: imx6sl: add missing enet clock for imx6sl

There's a enet clock gate missing in clock tree, thus add it.

Signed-off-by: Fugang Duan <B38611@freescale.com>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 arch/arm/mach-imx/clk-imx6sl.c           | 1 +
 include/dt-bindings/clock/imx6sl-clock.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/arm/mach-imx/clk-imx6sl.c b/arch/arm/mach-imx/clk-imx6sl.c
index 21cf06cebade..5408ca70c8d6 100644
--- a/arch/arm/mach-imx/clk-imx6sl.c
+++ b/arch/arm/mach-imx/clk-imx6sl.c
@@ -312,6 +312,7 @@ static void __init imx6sl_clocks_init(struct device_node *ccm_node)
 	clks[IMX6SL_CLK_ECSPI2]       = imx_clk_gate2("ecspi2",       "ecspi_root",        base + 0x6c, 2);
 	clks[IMX6SL_CLK_ECSPI3]       = imx_clk_gate2("ecspi3",       "ecspi_root",        base + 0x6c, 4);
 	clks[IMX6SL_CLK_ECSPI4]       = imx_clk_gate2("ecspi4",       "ecspi_root",        base + 0x6c, 6);
+	clks[IMX6SL_CLK_ENET]         = imx_clk_gate2("enet",         "ipg",               base + 0x6c, 10);
 	clks[IMX6SL_CLK_EPIT1]        = imx_clk_gate2("epit1",        "perclk",            base + 0x6c, 12);
 	clks[IMX6SL_CLK_EPIT2]        = imx_clk_gate2("epit2",        "perclk",            base + 0x6c, 14);
 	clks[IMX6SL_CLK_EXTERN_AUDIO] = imx_clk_gate2("extern_audio", "extern_audio_podf", base + 0x6c, 16);
diff --git a/include/dt-bindings/clock/imx6sl-clock.h b/include/dt-bindings/clock/imx6sl-clock.h
index 7cf5c9969336..b91dd462ba85 100644
--- a/include/dt-bindings/clock/imx6sl-clock.h
+++ b/include/dt-bindings/clock/imx6sl-clock.h
@@ -145,6 +145,7 @@
 #define IMX6SL_CLK_USDHC4		132
 #define IMX6SL_CLK_PLL4_AUDIO_DIV	133
 #define IMX6SL_CLK_SPBA			134
-#define IMX6SL_CLK_END			135
+#define IMX6SL_CLK_ENET			135
+#define IMX6SL_CLK_END			136
 
 #endif /* __DT_BINDINGS_CLOCK_IMX6SL_H */
-- 
cgit v1.2.3-71-gd317


From 9796853e902447e53a17dae5df9eb609f0e31e6a Mon Sep 17 00:00:00 2001
From: Peter Griffin <peter.griffin@linaro.org>
Date: Mon, 16 Jun 2014 11:23:00 +0200
Subject: ARM: STi: DT: Properly define sti-ethclk & stmmaceth for stih415/6

This patch fixes two problems: -

1) The device tree isn't currently providing sti-ethclk which is
required by the dwmac glue code to correctly configure the ethernet
PHY clock speed.

This means depending on what the bootloader/jtag has
configured this clock to, and what switch/hub the board is plugged
into you most likely will NOT successfully negotiate a ethernet link.

2) The stmmaceth clock was associated with the wrong clock. It was
referencing the PHY clock rather than the interconnect clock which
clocks the IP.

This patch also brings us closer to not having to boot the upstream
kernel with the clk_ignore_unused parameter.

Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Peter Griffin <peter.griffin@linaro.org>
Signed-off-by: Maxime Coquelin <maxime.coquelin@st.com>
---
 arch/arm/boot/dts/stih415.dtsi           | 8 ++++----
 arch/arm/boot/dts/stih416.dtsi           | 8 ++++----
 include/dt-bindings/clock/stih415-clks.h | 1 +
 include/dt-bindings/clock/stih416-clks.h | 1 +
 4 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/arm/boot/dts/stih415.dtsi b/arch/arm/boot/dts/stih415.dtsi
index d6f254f302fe..a0f6f75fe3b5 100644
--- a/arch/arm/boot/dts/stih415.dtsi
+++ b/arch/arm/boot/dts/stih415.dtsi
@@ -169,8 +169,8 @@
 
 			pinctrl-names 	= "default";
 			pinctrl-0	= <&pinctrl_mii0>;
-			clock-names	= "stmmaceth";
-			clocks		= <&clk_s_a1_ls CLK_GMAC0_PHY>;
+			clock-names	= "stmmaceth", "sti-ethclk";
+			clocks		= <&clk_s_a1_ls CLK_ICN_IF_2>, <&clk_s_a1_ls CLK_GMAC0_PHY>;
 		};
 
 		ethernet1: dwmac@fef08000 {
@@ -192,8 +192,8 @@
 			reset-names		= "stmmaceth";
 			pinctrl-names 	= "default";
 			pinctrl-0	= <&pinctrl_mii1>;
-			clock-names	= "stmmaceth";
-			clocks		= <&clk_s_a0_ls CLK_ETH1_PHY>;
+			clock-names	= "stmmaceth", "sti-ethclk";
+			clocks		= <&clk_s_a0_ls CLK_ICN_REG>, <&clk_s_a0_ls CLK_ETH1_PHY>;
 		};
 
 		rc: rc@fe518000 {
diff --git a/arch/arm/boot/dts/stih416.dtsi b/arch/arm/boot/dts/stih416.dtsi
index 06473c5d9ea9..84758d76d064 100644
--- a/arch/arm/boot/dts/stih416.dtsi
+++ b/arch/arm/boot/dts/stih416.dtsi
@@ -175,8 +175,8 @@
 			reset-names		= "stmmaceth";
 			pinctrl-names 	= "default";
 			pinctrl-0	= <&pinctrl_mii0>;
-			clock-names	= "stmmaceth";
-			clocks		= <&clk_s_a1_ls CLK_GMAC0_PHY>;
+			clock-names	= "stmmaceth", "sti-ethclk";
+			clocks		= <&clk_s_a1_ls CLK_ICN_IF_2>, <&clk_s_a1_ls CLK_GMAC0_PHY>;
 		};
 
 		ethernet1: dwmac@fef08000 {
@@ -197,8 +197,8 @@
 			reset-names	= "stmmaceth";
 			pinctrl-names 	= "default";
 			pinctrl-0	= <&pinctrl_mii1>;
-			clock-names	= "stmmaceth";
-			clocks		= <&clk_s_a0_ls CLK_ETH1_PHY>;
+			clock-names	= "stmmaceth", "sti-ethclk";
+			clocks		= <&clk_s_a0_ls CLK_ICN_REG>, <&clk_s_a0_ls CLK_ETH1_PHY>;
 		};
 
 		rc: rc@fe518000 {
diff --git a/include/dt-bindings/clock/stih415-clks.h b/include/dt-bindings/clock/stih415-clks.h
index 0d2c7397e028..d80caa68aebd 100644
--- a/include/dt-bindings/clock/stih415-clks.h
+++ b/include/dt-bindings/clock/stih415-clks.h
@@ -10,6 +10,7 @@
 #define CLK_ETH1_PHY		4
 
 /* CLOCKGEN A1 */
+#define CLK_ICN_IF_2		0
 #define CLK_GMAC0_PHY		3
 
 #endif
diff --git a/include/dt-bindings/clock/stih416-clks.h b/include/dt-bindings/clock/stih416-clks.h
index 552c779eb6af..f9bdbd13568d 100644
--- a/include/dt-bindings/clock/stih416-clks.h
+++ b/include/dt-bindings/clock/stih416-clks.h
@@ -10,6 +10,7 @@
 #define CLK_ETH1_PHY		4
 
 /* CLOCKGEN A1 */
+#define CLK_ICN_IF_2		0
 #define CLK_GMAC0_PHY		3
 
 #endif
-- 
cgit v1.2.3-71-gd317


From 736ed4de766d4f0e8e6142dd4f9d73ef61835ed9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 17 Jun 2014 22:09:29 -0700
Subject: block: blk_max_size_offset() should check ->max_sectors

Commit 762380ad9322 inadvertently changed a check for max_sectors
to max_hw_sectors. Revert that part, so we still compare against
max_sectors.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 31e11051f1ba..713f8b62b435 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -920,7 +920,7 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q,
 					       sector_t offset)
 {
 	if (!q->limits.chunk_sectors)
-		return q->limits.max_hw_sectors;
+		return q->limits.max_sectors;
 
 	return q->limits.chunk_sectors -
 			(offset & (q->limits.chunk_sectors - 1));
-- 
cgit v1.2.3-71-gd317


From 8537b12034cf1fd3fab3da2c859d71f76846fae9 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Tue, 17 Jun 2014 22:12:35 -0700
Subject: blk-mq: bitmap tag: fix races on shared ::wake_index fields

Fix racy updates of shared blk_mq_bitmap_tags::wake_index
and blk_mq_hw_ctx::wake_index fields.

Cc: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-tag.c     | 32 +++++++++++++++++++++-----------
 block/blk-mq-tag.h     |  2 +-
 include/linux/blk-mq.h |  2 +-
 3 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 1aab39f71d95..6deb13055490 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -43,9 +43,16 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
 	return bt_has_free_tags(&tags->bitmap_tags);
 }
 
-static inline void bt_index_inc(unsigned int *index)
+static inline int bt_index_inc(int index)
 {
-	*index = (*index + 1) & (BT_WAIT_QUEUES - 1);
+	return (index + 1) & (BT_WAIT_QUEUES - 1);
+}
+
+static inline void bt_index_atomic_inc(atomic_t *index)
+{
+	int old = atomic_read(index);
+	int new = bt_index_inc(old);
+	atomic_cmpxchg(index, old, new);
 }
 
 /*
@@ -69,14 +76,14 @@ static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
 	int i, wake_index;
 
 	bt = &tags->bitmap_tags;
-	wake_index = bt->wake_index;
+	wake_index = atomic_read(&bt->wake_index);
 	for (i = 0; i < BT_WAIT_QUEUES; i++) {
 		struct bt_wait_state *bs = &bt->bs[wake_index];
 
 		if (waitqueue_active(&bs->wait))
 			wake_up(&bs->wait);
 
-		bt_index_inc(&wake_index);
+		wake_index = bt_index_inc(wake_index);
 	}
 }
 
@@ -212,12 +219,14 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
 					 struct blk_mq_hw_ctx *hctx)
 {
 	struct bt_wait_state *bs;
+	int wait_index;
 
 	if (!hctx)
 		return &bt->bs[0];
 
-	bs = &bt->bs[hctx->wait_index];
-	bt_index_inc(&hctx->wait_index);
+	wait_index = atomic_read(&hctx->wait_index);
+	bs = &bt->bs[wait_index];
+	bt_index_atomic_inc(&hctx->wait_index);
 	return bs;
 }
 
@@ -313,18 +322,19 @@ static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
 {
 	int i, wake_index;
 
-	wake_index = bt->wake_index;
+	wake_index = atomic_read(&bt->wake_index);
 	for (i = 0; i < BT_WAIT_QUEUES; i++) {
 		struct bt_wait_state *bs = &bt->bs[wake_index];
 
 		if (waitqueue_active(&bs->wait)) {
-			if (wake_index != bt->wake_index)
-				bt->wake_index = wake_index;
+			int o = atomic_read(&bt->wake_index);
+			if (wake_index != o)
+				atomic_cmpxchg(&bt->wake_index, o, wake_index);
 
 			return bs;
 		}
 
-		bt_index_inc(&wake_index);
+		wake_index = bt_index_inc(wake_index);
 	}
 
 	return NULL;
@@ -344,7 +354,7 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
 	bs = bt_wake_ptr(bt);
 	if (bs && atomic_dec_and_test(&bs->wait_cnt)) {
 		atomic_set(&bs->wait_cnt, bt->wake_cnt);
-		bt_index_inc(&bt->wake_index);
+		bt_index_atomic_inc(&bt->wake_index);
 		wake_up(&bs->wait);
 	}
 }
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 98696a65d4d4..6206ed17ef76 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -24,7 +24,7 @@ struct blk_mq_bitmap_tags {
 	unsigned int map_nr;
 	struct blk_align_bitmap *map;
 
-	unsigned int wake_index;
+	atomic_t wake_index;
 	struct bt_wait_state *bs;
 };
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a002cf191427..eb726b9c5762 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -42,7 +42,7 @@ struct blk_mq_hw_ctx {
 	unsigned int		nr_ctx;
 	struct blk_mq_ctx	**ctxs;
 
-	unsigned int		wait_index;
+	atomic_t		wait_index;
 
 	struct blk_mq_tags	*tags;
 
-- 
cgit v1.2.3-71-gd317


From 07f4d9d74a04aa7c72c5dae0ef97565f28f17b92 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 18 Jun 2014 13:32:31 +0200
Subject: ALSA: control: Protect user controls against concurrent access

The user-control put and get handlers as well as the tlv do not protect against
concurrent access from multiple threads. Since the state of the control is not
updated atomically it is possible that either two write operations or a write
and a read operation race against each other. Both can lead to arbitrary memory
disclosure. This patch introduces a new lock that protects user-controls from
concurrent access. Since applications typically access controls sequentially
than in parallel a single lock per card should be fine.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Jaroslav Kysela <perex@perex.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/core.h |  2 ++
 sound/core/control.c | 31 +++++++++++++++++++++++++------
 sound/core/init.c    |  1 +
 3 files changed, 28 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/core.h b/include/sound/core.h
index eedda2cdfe57..1df3f2fe5350 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -116,6 +116,8 @@ struct snd_card {
 	int user_ctl_count;		/* count of all user controls */
 	struct list_head controls;	/* all controls for this card */
 	struct list_head ctl_files;	/* active control files */
+	struct mutex user_ctl_lock;	/* protects user controls against
+					   concurrent access */
 
 	struct snd_info_entry *proc_root;	/* root for soundcard specific files */
 	struct snd_info_entry *proc_id;	/* the card id */
diff --git a/sound/core/control.c b/sound/core/control.c
index f038f5afafe2..00ab034f5fcb 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -991,6 +991,7 @@ static int snd_ctl_elem_unlock(struct snd_ctl_file *file,
 
 struct user_element {
 	struct snd_ctl_elem_info info;
+	struct snd_card *card;
 	void *elem_data;		/* element data */
 	unsigned long elem_data_size;	/* size of element data in bytes */
 	void *tlv_data;			/* TLV data */
@@ -1034,7 +1035,9 @@ static int snd_ctl_elem_user_get(struct snd_kcontrol *kcontrol,
 {
 	struct user_element *ue = kcontrol->private_data;
 
+	mutex_lock(&ue->card->user_ctl_lock);
 	memcpy(&ucontrol->value, ue->elem_data, ue->elem_data_size);
+	mutex_unlock(&ue->card->user_ctl_lock);
 	return 0;
 }
 
@@ -1043,10 +1046,12 @@ static int snd_ctl_elem_user_put(struct snd_kcontrol *kcontrol,
 {
 	int change;
 	struct user_element *ue = kcontrol->private_data;
-	
+
+	mutex_lock(&ue->card->user_ctl_lock);
 	change = memcmp(&ucontrol->value, ue->elem_data, ue->elem_data_size) != 0;
 	if (change)
 		memcpy(ue->elem_data, &ucontrol->value, ue->elem_data_size);
+	mutex_unlock(&ue->card->user_ctl_lock);
 	return change;
 }
 
@@ -1066,19 +1071,32 @@ static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kcontrol,
 		new_data = memdup_user(tlv, size);
 		if (IS_ERR(new_data))
 			return PTR_ERR(new_data);
+		mutex_lock(&ue->card->user_ctl_lock);
 		change = ue->tlv_data_size != size;
 		if (!change)
 			change = memcmp(ue->tlv_data, new_data, size);
 		kfree(ue->tlv_data);
 		ue->tlv_data = new_data;
 		ue->tlv_data_size = size;
+		mutex_unlock(&ue->card->user_ctl_lock);
 	} else {
-		if (! ue->tlv_data_size || ! ue->tlv_data)
-			return -ENXIO;
-		if (size < ue->tlv_data_size)
-			return -ENOSPC;
+		int ret = 0;
+
+		mutex_lock(&ue->card->user_ctl_lock);
+		if (!ue->tlv_data_size || !ue->tlv_data) {
+			ret = -ENXIO;
+			goto err_unlock;
+		}
+		if (size < ue->tlv_data_size) {
+			ret = -ENOSPC;
+			goto err_unlock;
+		}
 		if (copy_to_user(tlv, ue->tlv_data, ue->tlv_data_size))
-			return -EFAULT;
+			ret = -EFAULT;
+err_unlock:
+		mutex_unlock(&ue->card->user_ctl_lock);
+		if (ret)
+			return ret;
 	}
 	return change;
 }
@@ -1210,6 +1228,7 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
 	ue = kzalloc(sizeof(struct user_element) + private_size, GFP_KERNEL);
 	if (ue == NULL)
 		return -ENOMEM;
+	ue->card = card;
 	ue->info = *info;
 	ue->info.access = 0;
 	ue->elem_data = (char *)ue + sizeof(*ue);
diff --git a/sound/core/init.c b/sound/core/init.c
index 5ee83845c5de..7bdfd19e24a8 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -232,6 +232,7 @@ int snd_card_new(struct device *parent, int idx, const char *xid,
 	INIT_LIST_HEAD(&card->devices);
 	init_rwsem(&card->controls_rwsem);
 	rwlock_init(&card->ctl_files_rwlock);
+	mutex_init(&card->user_ctl_lock);
 	INIT_LIST_HEAD(&card->controls);
 	INIT_LIST_HEAD(&card->ctl_files);
 	spin_lock_init(&card->files_lock);
-- 
cgit v1.2.3-71-gd317


From f2af74123f8c5a735248547f4286a3adc28633c1 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Tue, 20 May 2014 09:38:03 -1000
Subject: tools: ffs-test: convert to new descriptor format fixing compilation
 error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit [ac8dde11: “usb: gadget: f_fs: Add flags to descriptors block”]
which introduced a new descriptor format for FunctionFS removed the
usb_functionfs_descs_head structure, which is still used by ffs-test.
tool.

Convert ffs-test by converting it to use the new header format.  For
testing kernels prior to 3.14 (when the new format was introduced) and
parsing of the legacy headers in the new kernels, provide a compilation
flag to make the tool use the old format.

Finally, include information as to when the legacy FunctionFS headers
format has been deprecated (which is also when the new one has been
introduced).

Reported-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/uapi/linux/usb/functionfs.h |  2 +-
 tools/usb/Makefile                  |  6 +++++-
 tools/usb/ffs-test.c                | 20 ++++++++++++++++++--
 3 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 2a4b4a72a4f9..ecb3a31f7ca6 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -53,7 +53,7 @@ struct usb_endpoint_descriptor_no_audio {
  * structure.  Any flags that are not recognised cause the whole block to be
  * rejected with -ENOSYS.
  *
- * Legacy descriptors format:
+ * Legacy descriptors format (deprecated as of 3.14):
  *
  * | off | name      | type         | description                          |
  * |-----+-----------+--------------+--------------------------------------|
diff --git a/tools/usb/Makefile b/tools/usb/Makefile
index acf2165c04e6..d576b3bac3cf 100644
--- a/tools/usb/Makefile
+++ b/tools/usb/Makefile
@@ -6,7 +6,11 @@ WARNINGS = -Wall -Wextra
 CFLAGS = $(WARNINGS) -g -I../include
 LDFLAGS = $(PTHREAD_LIBS)
 
-all: testusb ffs-test
+all: testusb ffs-test ffs-test-legacy
+
+ffs-test-legacy: ffs-test.c
+	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) -DUSE_LEGACY_DESC_HEAD
+
 %: %.c
 	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
 
diff --git a/tools/usb/ffs-test.c b/tools/usb/ffs-test.c
index fe1e66b6ef40..74b353d9eb50 100644
--- a/tools/usb/ffs-test.c
+++ b/tools/usb/ffs-test.c
@@ -1,5 +1,5 @@
 /*
- * ffs-test.c.c -- user mode filesystem api for usb composite function
+ * ffs-test.c -- user mode filesystem api for usb composite function
  *
  * Copyright (C) 2010 Samsung Electronics
  *                    Author: Michal Nazarewicz <mina86@mina86.com>
@@ -21,6 +21,8 @@
 
 /* $(CROSS_COMPILE)cc -Wall -Wextra -g -o ffs-test ffs-test.c -lpthread */
 
+/* Uncomment to make the tool use legacy FFS descriptor headers. */
+/* #define USE_LEGACY_DESC_HEAD */
 
 #define _BSD_SOURCE /* for endian.h */
 
@@ -106,7 +108,15 @@ static void _msg(unsigned level, const char *fmt, ...)
 /******************** Descriptors and Strings *******************************/
 
 static const struct {
-	struct usb_functionfs_descs_head header;
+	struct {
+		__le32 magic;
+		__le32 length;
+#ifndef USE_LEGACY_DESC_HEAD
+		__le32 flags;
+#endif
+		__le32 fs_count;
+		__le32 hs_count;
+	} __attribute__((packed)) header;
 	struct {
 		struct usb_interface_descriptor intf;
 		struct usb_endpoint_descriptor_no_audio sink;
@@ -114,7 +124,13 @@ static const struct {
 	} __attribute__((packed)) fs_descs, hs_descs;
 } __attribute__((packed)) descriptors = {
 	.header = {
+#ifdef USE_LEGACY_DESC_HEAD
 		.magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC),
+#else
+		.magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2),
+		.flags = cpu_to_le32(FUNCTIONFS_HAS_FS_DESC |
+				     FUNCTIONFS_HAS_HS_DESC),
+#endif
 		.length = cpu_to_le32(sizeof descriptors),
 		.fs_count = 3,
 		.hs_count = 3,
-- 
cgit v1.2.3-71-gd317


From 4af4206be2bd1933cae20c2b6fb2058dbc887f7c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 13 Apr 2014 20:58:54 +0200
Subject: tracing: Fix syscall_*regfunc() vs copy_process() race

syscall_regfunc() and syscall_unregfunc() should set/clear
TIF_SYSCALL_TRACEPOINT system-wide, but do_each_thread() can race
with copy_process() and miss the new child which was not added to
the process/thread lists yet.

Change copy_process() to update the child's TIF_SYSCALL_TRACEPOINT
under tasklist.

Link: http://lkml.kernel.org/p/20140413185854.GB20668@redhat.com

Cc: stable@vger.kernel.org # 2.6.33
Fixes: a871bd33a6c0 "tracing: Add syscall tracepoints"
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/syscall.h | 15 +++++++++++++++
 kernel/fork.c           |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index fed853f3d7aa..9674145e2f6a 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -4,6 +4,7 @@
 #include <linux/tracepoint.h>
 #include <linux/unistd.h>
 #include <linux/ftrace_event.h>
+#include <linux/thread_info.h>
 
 #include <asm/ptrace.h>
 
@@ -32,4 +33,18 @@ struct syscall_metadata {
 	struct ftrace_event_call *exit_event;
 };
 
+#if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_HAVE_SYSCALL_TRACEPOINTS)
+static inline void syscall_tracepoint_update(struct task_struct *p)
+{
+	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+		set_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
+	else
+		clear_tsk_thread_flag(p, TIF_SYSCALL_TRACEPOINT);
+}
+#else
+static inline void syscall_tracepoint_update(struct task_struct *p)
+{
+}
+#endif
+
 #endif /* _TRACE_SYSCALL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index d2799d1fc952..6a13c46cd87d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1487,7 +1487,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	total_forks++;
 	spin_unlock(&current->sighand->siglock);
+	syscall_tracepoint_update(p);
 	write_unlock_irq(&tasklist_lock);
+
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
 	if (clone_flags & CLONE_THREAD)
-- 
cgit v1.2.3-71-gd317


From 4d4c9cc839a308be3289a361ccba4447ee140552 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 17 Jun 2014 08:59:16 -0400
Subject: tracing: Add __field_struct macro for TRACE_EVENT()

Currently the __field() macro in TRACE_EVENT is only good for primitive
values, such as integers and pointers, but it fails on complex data types
such as structures or unions. This is because the __field() macro
determines if the variable is signed or not with the test of:

  (((type)(-1)) < (type)1)

Unfortunately, that fails when type is a structure.

Since trace events should support structures as fields a new macro
is created for such a case called __field_struct() which acts exactly
the same as __field() does but it does not do the signed type check
and just uses a constant false for that answer.

Cc: Tony Luck <tony.luck@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h                     | 33 ++++++++++++++++++++++++++++++
 samples/trace_events/trace-events-sample.h |  3 ++-
 2 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0fd06fef9fac..26b4f2e13275 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -44,6 +44,12 @@
 #undef __field_ext
 #define __field_ext(type, item, filter_type)	type	item;
 
+#undef __field_struct
+#define __field_struct(type, item)	type	item;
+
+#undef __field_struct_ext
+#define __field_struct_ext(type, item, filter_type)	type	item;
+
 #undef __array
 #define __array(type, item, len)	type	item[len];
 
@@ -122,6 +128,12 @@
 #undef __field_ext
 #define __field_ext(type, item, filter_type)
 
+#undef __field_struct
+#define __field_struct(type, item)
+
+#undef __field_struct_ext
+#define __field_struct_ext(type, item, filter_type)
+
 #undef __array
 #define __array(type, item, len)
 
@@ -315,9 +327,21 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = {	\
 	if (ret)							\
 		return ret;
 
+#undef __field_struct_ext
+#define __field_struct_ext(type, item, filter_type)			\
+	ret = trace_define_field(event_call, #type, #item,		\
+				 offsetof(typeof(field), item),		\
+				 sizeof(field.item),			\
+				 0, filter_type);			\
+	if (ret)							\
+		return ret;
+
 #undef __field
 #define __field(type, item)	__field_ext(type, item, FILTER_OTHER)
 
+#undef __field_struct
+#define __field_struct(type, item) __field_struct_ext(type, item, FILTER_OTHER)
+
 #undef __array
 #define __array(type, item, len)					\
 	do {								\
@@ -379,6 +403,12 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 #undef __field_ext
 #define __field_ext(type, item, filter_type)
 
+#undef __field_struct
+#define __field_struct(type, item)
+
+#undef __field_struct_ext
+#define __field_struct_ext(type, item, filter_type)
+
 #undef __array
 #define __array(type, item, len)
 
@@ -550,6 +580,9 @@ static inline notrace int ftrace_get_offsets_##call(			\
 #undef __field
 #define __field(type, item)
 
+#undef __field_struct
+#define __field_struct(type, item)
+
 #undef __array
 #define __array(type, item, len)
 
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index 6af373236d73..4b0113f73ee9 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -56,7 +56,8 @@
  * struct:  This defines the way the data will be stored in the ring buffer.
  *    There are currently two types of elements. __field and __array.
  *    a __field is broken up into (type, name). Where type can be any
- *    type but an array.
+ *    primitive type (integer, long or pointer). __field_struct() can
+ *    be any static complex data value (struct, union, but not an array).
  *    For an array. there are three fields. (type, name, size). The
  *    type of elements in the array, the name of the field and the size
  *    of the array.
-- 
cgit v1.2.3-71-gd317


From 2b8f2a28eac1d35a432705d269f02bdaeba9be8f Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Wed, 18 Jun 2014 11:01:41 +0200
Subject: net: phylib: add link_change_notify callback to phy device

Add a notify callback to inform phy drivers when the core is about to
do its link adjustment. No change for drivers that do not implement
this callback.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 3 +++
 include/linux/phy.h   | 9 +++++++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 3bc079a67a3d..f7c61812ea4a 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -720,6 +720,9 @@ void phy_state_machine(struct work_struct *work)
 
 	mutex_lock(&phydev->lock);
 
+	if (phydev->drv->link_change_notify)
+		phydev->drv->link_change_notify(phydev);
+
 	switch (phydev->state) {
 	case PHY_DOWN:
 	case PHY_STARTING:
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 864ddafad8cc..68041446c450 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -536,6 +536,15 @@ struct phy_driver {
 	/* See set_wol, but for checking whether Wake on LAN is enabled. */
 	void (*get_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol);
 
+	/*
+	 * Called to inform a PHY device driver when the core is about to
+	 * change the link state. This callback is supposed to be used as
+	 * fixup hook for drivers that need to take action when the link
+	 * state changes. Drivers are by no means allowed to mess with the
+	 * PHY device structure in their implementations.
+	 */
+	void (*link_change_notify)(struct phy_device *dev);
+
 	struct device_driver driver;
 };
 #define to_phy_driver(d) container_of(d, struct phy_driver, driver)
-- 
cgit v1.2.3-71-gd317


From e567bf7112518824830978d644dfb5a991e67d54 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Sun, 22 Jun 2014 16:32:48 -0600
Subject: Revert "block: add __init to elv_register"

This reverts commit b5097e956a4d2919ee248d6481e4204c5568ed5c.

The original commit is buggy, we do use the registration functions
at runtime, for instance when loading IO schedulers through sysfs.

Reported-by: Damien Wyart <damien.wyart@gmail.com>
---
 block/elevator.c         | 2 +-
 include/linux/elevator.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/block/elevator.c b/block/elevator.c
index 34bded18910e..24c28b659bb3 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -825,7 +825,7 @@ void elv_unregister_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(elv_unregister_queue);
 
-int __init elv_register(struct elevator_type *e)
+int elv_register(struct elevator_type *e)
 {
 	char *def = "";
 
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index e2a6bd7fb133..45a91474487d 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -143,7 +143,7 @@ extern void elv_drain_elevator(struct request_queue *);
  * io scheduler registration
  */
 extern void __init load_default_elevator_module(void);
-extern int __init elv_register(struct elevator_type *);
+extern int elv_register(struct elevator_type *);
 extern void elv_unregister(struct elevator_type *);
 
 /*
-- 
cgit v1.2.3-71-gd317


From 2da38e0c9465b89518b29328daeb7da0ca1690b7 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Thu, 19 Jun 2014 14:41:00 +0530
Subject: ALSA: compress: fix the struct alignment to 4 bytes

In 64bit systems the compiler can default align to 8bytes causing mis-match with
32bit usermode. Avoid this is future by ensuring all the structures shared with
usermode are packed and aligned to 4 bytes irrespective of arch used

[coding style fixes by tiwai]

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/uapi/sound/compress_offload.h | 14 +++++++-------
 include/uapi/sound/compress_params.h  | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/uapi/sound/compress_offload.h b/include/uapi/sound/compress_offload.h
index 21eed488783f..1964026b5e09 100644
--- a/include/uapi/sound/compress_offload.h
+++ b/include/uapi/sound/compress_offload.h
@@ -39,7 +39,7 @@
 struct snd_compressed_buffer {
 	__u32 fragment_size;
 	__u32 fragments;
-};
+} __attribute__((packed, aligned(4)));
 
 /**
  * struct snd_compr_params: compressed stream params
@@ -51,7 +51,7 @@ struct snd_compr_params {
 	struct snd_compressed_buffer buffer;
 	struct snd_codec codec;
 	__u8 no_wake_mode;
-};
+} __attribute__((packed, aligned(4)));
 
 /**
  * struct snd_compr_tstamp: timestamp descriptor
@@ -70,7 +70,7 @@ struct snd_compr_tstamp {
 	__u32 pcm_frames;
 	__u32 pcm_io_frames;
 	__u32 sampling_rate;
-};
+} __attribute__((packed, aligned(4)));
 
 /**
  * struct snd_compr_avail: avail descriptor
@@ -80,7 +80,7 @@ struct snd_compr_tstamp {
 struct snd_compr_avail {
 	__u64 avail;
 	struct snd_compr_tstamp tstamp;
-} __attribute__((packed));
+} __attribute__((packed, aligned(4)));
 
 enum snd_compr_direction {
 	SND_COMPRESS_PLAYBACK = 0,
@@ -107,7 +107,7 @@ struct snd_compr_caps {
 	__u32 max_fragments;
 	__u32 codecs[MAX_NUM_CODECS];
 	__u32 reserved[11];
-};
+} __attribute__((packed, aligned(4)));
 
 /**
  * struct snd_compr_codec_caps: query capability of codec
@@ -119,7 +119,7 @@ struct snd_compr_codec_caps {
 	__u32 codec;
 	__u32 num_descriptors;
 	struct snd_codec_desc descriptor[MAX_NUM_CODEC_DESCRIPTORS];
-};
+} __attribute__((packed, aligned(4)));
 
 /**
  * @SNDRV_COMPRESS_ENCODER_PADDING: no of samples appended by the encoder at the
@@ -140,7 +140,7 @@ enum {
 struct snd_compr_metadata {
 	 __u32 key;
 	 __u32 value[8];
-};
+} __attribute__((packed, aligned(4)));
 
 /**
  * compress path ioctl definitions
diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h
index 165e7059de75..d9bd9ca0d5b0 100644
--- a/include/uapi/sound/compress_params.h
+++ b/include/uapi/sound/compress_params.h
@@ -268,7 +268,7 @@ struct snd_enc_vorbis {
 	__u32 max_bit_rate;
 	__u32 min_bit_rate;
 	__u32 downmix;
-};
+} __attribute__((packed, aligned(4)));
 
 
 /**
@@ -284,7 +284,7 @@ struct snd_enc_real {
 	__u32 quant_bits;
 	__u32 start_region;
 	__u32 num_regions;
-};
+} __attribute__((packed, aligned(4)));
 
 /**
  * struct snd_enc_flac
@@ -308,12 +308,12 @@ struct snd_enc_real {
 struct snd_enc_flac {
 	__u32 num;
 	__u32 gain;
-};
+} __attribute__((packed, aligned(4)));
 
 struct snd_enc_generic {
 	__u32 bw;	/* encoder bandwidth */
 	__s32 reserved[15];
-};
+} __attribute__((packed, aligned(4)));
 
 union snd_codec_options {
 	struct snd_enc_wma wma;
@@ -321,7 +321,7 @@ union snd_codec_options {
 	struct snd_enc_real real;
 	struct snd_enc_flac flac;
 	struct snd_enc_generic generic;
-};
+} __attribute__((packed, aligned(4)));
 
 /** struct snd_codec_desc - description of codec capabilities
  * @max_ch: Maximum number of audio channels
@@ -358,7 +358,7 @@ struct snd_codec_desc {
 	__u32 formats;
 	__u32 min_buffer;
 	__u32 reserved[15];
-};
+} __attribute__((packed, aligned(4)));
 
 /** struct snd_codec
  * @id: Identifies the supported audio encoder/decoder.
@@ -399,6 +399,6 @@ struct snd_codec {
 	__u32 align;
 	union snd_codec_options options;
 	__u32 reserved[3];
-};
+} __attribute__((packed, aligned(4)));
 
 #endif
-- 
cgit v1.2.3-71-gd317


From b3acc56bfe1287c6b666e80edc70b89eea2a1a80 Mon Sep 17 00:00:00 2001
From: Petr Tesarik <ptesarik@suse.cz>
Date: Mon, 23 Jun 2014 13:22:03 -0700
Subject: kexec: save PG_head_mask in VMCOREINFO

To allow filtering of huge pages, makedumpfile must be able to identify
them in the dump.  This can be done by checking the appropriate page
flag, so communicate its value to makedumpfile through the VMCOREINFO
interface.

There's only one small catch.  Depending on how many page flags are
available on a given architecture, this bit can be called PG_head or
PG_compound.

I sent a similar patch back in 2012, but Eric Biederman did not like
using an #ifdef.  So, this time I'm adding a common symbol
(PG_head_mask) instead.

See https://lkml.org/lkml/2012/11/28/91 for the previous version.

Signed-off-by: Petr Tesarik <ptesarik@suse.cz>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 3 +++
 kernel/kexec.c             | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 3c545b48aeab..8304959ad336 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -360,6 +360,9 @@ static inline void ClearPageCompound(struct page *page)
 	ClearPageHead(page);
 }
 #endif
+
+#define PG_head_mask ((1L << PG_head))
+
 #else
 /*
  * Reduce page flag use as much as possible by overlapping
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 6748688813d0..369f41a94124 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1617,6 +1617,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 #ifdef CONFIG_MEMORY_FAILURE
 	VMCOREINFO_NUMBER(PG_hwpoison);
 #endif
+	VMCOREINFO_NUMBER(PG_head_mask);
 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
 
 	arch_crash_save_vmcoreinfo();
-- 
cgit v1.2.3-71-gd317


From f3aca3d09525f87731ba6b892c9b010570bc54b4 Mon Sep 17 00:00:00 2001
From: Aaron Tomlin <atomlin@redhat.com>
Date: Mon, 23 Jun 2014 13:22:05 -0700
Subject: nmi: provide the option to issue an NMI back trace to every cpu but
 current

Sometimes it is preferred not to use the trigger_all_cpu_backtrace()
routine when one wants to avoid capturing a back trace for current.  For
instance if one was previously captured recently.

This patch provides a new routine namely
trigger_allbutself_cpu_backtrace() which offers the flexibility to issue
an NMI to every cpu but current and capture a back trace accordingly.

Patch x86 and sparc to support new routine.

[dzickus@redhat.com: add stub in #else clause]
[dzickus@redhat.com: don't print message in single processor case, wrap with get/put_cpu based on Oleg's suggestion]
[sfr@canb.auug.org.au: undo C99ism]
Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Mateusz Guzik <mguzik@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/include/asm/irq_64.h |  2 +-
 arch/sparc/kernel/process_64.c  | 18 ++++++++++++------
 arch/x86/include/asm/irq.h      |  2 +-
 arch/x86/kernel/apic/hw_nmi.c   | 18 ++++++++++++++----
 include/linux/nmi.h             | 11 ++++++++++-
 5 files changed, 38 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index 375cffcf7dbd..91d219381306 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -89,7 +89,7 @@ static inline unsigned long get_softint(void)
 	return retval;
 }
 
-void arch_trigger_all_cpu_backtrace(void);
+void arch_trigger_all_cpu_backtrace(bool);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
 
 extern void *hardirq_stack[NR_CPUS];
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index b2988f25e230..027e09986194 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -239,7 +239,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp)
 	}
 }
 
-void arch_trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(bool include_self)
 {
 	struct thread_info *tp = current_thread_info();
 	struct pt_regs *regs = get_irq_regs();
@@ -251,16 +251,22 @@ void arch_trigger_all_cpu_backtrace(void)
 
 	spin_lock_irqsave(&global_cpu_snapshot_lock, flags);
 
-	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
-
 	this_cpu = raw_smp_processor_id();
 
-	__global_reg_self(tp, regs, this_cpu);
+	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
+
+	if (include_self)
+		__global_reg_self(tp, regs, this_cpu);
 
 	smp_fetch_global_regs();
 
 	for_each_online_cpu(cpu) {
-		struct global_reg_snapshot *gp = &global_cpu_snapshot[cpu].reg;
+		struct global_reg_snapshot *gp;
+
+		if (!include_self && cpu == this_cpu)
+			continue;
+
+		gp = &global_cpu_snapshot[cpu].reg;
 
 		__global_reg_poll(gp);
 
@@ -292,7 +298,7 @@ void arch_trigger_all_cpu_backtrace(void)
 
 static void sysrq_handle_globreg(int key)
 {
-	arch_trigger_all_cpu_backtrace();
+	arch_trigger_all_cpu_backtrace(true);
 }
 
 static struct sysrq_key_op sparc_globalreg_op = {
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index cb6cfcd034cf..a80cbb88ea91 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -43,7 +43,7 @@ extern int vector_used_by_percpu_irq(unsigned int vector);
 extern void init_ISA_irqs(void);
 
 #ifdef CONFIG_X86_LOCAL_APIC
-void arch_trigger_all_cpu_backtrace(void);
+void arch_trigger_all_cpu_backtrace(bool);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
 #endif
 
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index c3fcb5de5083..6a1e71bde323 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -33,31 +33,41 @@ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
 /* "in progress" flag of arch_trigger_all_cpu_backtrace */
 static unsigned long backtrace_flag;
 
-void arch_trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(bool include_self)
 {
 	int i;
+	int cpu = get_cpu();
 
-	if (test_and_set_bit(0, &backtrace_flag))
+	if (test_and_set_bit(0, &backtrace_flag)) {
 		/*
 		 * If there is already a trigger_all_cpu_backtrace() in progress
 		 * (backtrace_flag == 1), don't output double cpu dump infos.
 		 */
+		put_cpu();
 		return;
+	}
 
 	cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
+	if (!include_self)
+		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
 
-	printk(KERN_INFO "sending NMI to all CPUs:\n");
-	apic->send_IPI_all(NMI_VECTOR);
+	if (!cpumask_empty(to_cpumask(backtrace_mask))) {
+		pr_info("sending NMI to %s CPUs:\n",
+			(include_self ? "all" : "other"));
+		apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR);
+	}
 
 	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
 	for (i = 0; i < 10 * 1000; i++) {
 		if (cpumask_empty(to_cpumask(backtrace_mask)))
 			break;
 		mdelay(1);
+		touch_softlockup_watchdog();
 	}
 
 	clear_bit(0, &backtrace_flag);
 	smp_mb__after_atomic();
+	put_cpu();
 }
 
 static int
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 6a45fb583ff1..a17ab6398d7c 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -32,15 +32,24 @@ static inline void touch_nmi_watchdog(void)
 #ifdef arch_trigger_all_cpu_backtrace
 static inline bool trigger_all_cpu_backtrace(void)
 {
-	arch_trigger_all_cpu_backtrace();
+	arch_trigger_all_cpu_backtrace(true);
 
 	return true;
 }
+static inline bool trigger_allbutself_cpu_backtrace(void)
+{
+	arch_trigger_all_cpu_backtrace(false);
+	return true;
+}
 #else
 static inline bool trigger_all_cpu_backtrace(void)
 {
 	return false;
 }
+static inline bool trigger_allbutself_cpu_backtrace(void)
+{
+	return false;
+}
 #endif
 
 #ifdef CONFIG_LOCKUP_DETECTOR
-- 
cgit v1.2.3-71-gd317


From ed235875e2ca983197831337a986f0517074e1a0 Mon Sep 17 00:00:00 2001
From: Aaron Tomlin <atomlin@redhat.com>
Date: Mon, 23 Jun 2014 13:22:05 -0700
Subject: kernel/watchdog.c: print traces for all cpus on lockup detection

A 'softlockup' is defined as a bug that causes the kernel to loop in
kernel mode for more than a predefined period to time, without giving
other tasks a chance to run.

Currently, upon detection of this condition by the per-cpu watchdog
task, debug information (including a stack trace) is sent to the system
log.

On some occasions, we have observed that the "victim" rather than the
actual "culprit" (i.e.  the owner/holder of the contended resource) is
reported to the user.  Often this information has proven to be
insufficient to assist debugging efforts.

To avoid loss of useful debug information, for architectures which
support NMI, this patch makes it possible to improve soft lockup
reporting.  This is accomplished by issuing an NMI to each cpu to obtain
a stack trace.

If NMI is not supported we just revert back to the old method.  A sysctl
and boot-time parameter is available to toggle this feature.

[dzickus@redhat.com: add CONFIG_SMP in certain areas]
[akpm@linux-foundation.org: additional CONFIG_SMP=n optimisations]
[mq@suse.cz: fix warning]
Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Mateusz Guzik <mguzik@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Jan Moskyto Matejka <mq@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |  5 +++++
 Documentation/sysctl/kernel.txt     | 17 ++++++++++++++++
 include/linux/nmi.h                 |  1 +
 kernel/sysctl.c                     | 11 +++++++++++
 kernel/watchdog.c                   | 39 +++++++++++++++++++++++++++++++++++++
 5 files changed, 73 insertions(+)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 884904975d0b..c1b9aa8c5a52 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3130,6 +3130,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			[KNL] Should the soft-lockup detector generate panics.
 			Format: <integer>
 
+	softlockup_all_cpu_backtrace=
+			[KNL] Should the soft-lockup detector generate
+			backtraces on all cpus.
+			Format: <integer>
+
 	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
 			See Documentation/laptops/sonypi.txt
 
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 708bb7f1b7e0..c14374e71775 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -75,6 +75,7 @@ show up in /proc/sys/kernel:
 - shmall
 - shmmax                      [ sysv ipc ]
 - shmmni
+- softlockup_all_cpu_backtrace
 - stop-a                      [ SPARC only ]
 - sysrq                       ==> Documentation/sysrq.txt
 - sysctl_writes_strict
@@ -783,6 +784,22 @@ via the /proc/sys interface:
 
 ==============================================================
 
+softlockup_all_cpu_backtrace:
+
+This value controls the soft lockup detector thread's behavior
+when a soft lockup condition is detected as to whether or not
+to gather further debug information. If enabled, each cpu will
+be issued an NMI and instructed to capture stack trace.
+
+This feature is only applicable for architectures which support
+NMI.
+
+0: do nothing. This is the default behavior.
+
+1: on detection capture more debug information.
+
+==============================================================
+
 tainted:
 
 Non-zero if the kernel has been tainted.  Numeric values, which
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a17ab6398d7c..447775ee2c4b 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -57,6 +57,7 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *);
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
 extern int watchdog_user_enabled;
 extern int watchdog_thresh;
+extern int sysctl_softlockup_all_cpu_backtrace;
 struct ctl_table;
 extern int proc_dowatchdog(struct ctl_table *, int ,
 			   void __user *, size_t *, loff_t *);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 075d1903138f..75b22e22a72c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -860,6 +860,17 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#ifdef CONFIG_SMP
+	{
+		.procname	= "softlockup_all_cpu_backtrace",
+		.data		= &sysctl_softlockup_all_cpu_backtrace,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+#endif /* CONFIG_SMP */
 	{
 		.procname       = "nmi_watchdog",
 		.data           = &watchdog_user_enabled,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 30e482240dae..c3319bd1b040 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -31,6 +31,12 @@
 
 int watchdog_user_enabled = 1;
 int __read_mostly watchdog_thresh = 10;
+#ifdef CONFIG_SMP
+int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+#else
+#define sysctl_softlockup_all_cpu_backtrace 0
+#endif
+
 static int __read_mostly watchdog_running;
 static u64 __read_mostly sample_period;
 
@@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 #endif
+static unsigned long soft_lockup_nmi_warn;
 
 /* boot commands */
 /*
@@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str)
 }
 __setup("nosoftlockup", nosoftlockup_setup);
 /*  */
+#ifdef CONFIG_SMP
+static int __init softlockup_all_cpu_backtrace_setup(char *str)
+{
+	sysctl_softlockup_all_cpu_backtrace =
+		!!simple_strtol(str, NULL, 0);
+	return 1;
+}
+__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+#endif
 
 /*
  * Hard-lockup warnings should be triggered after just a few seconds. Soft-
@@ -271,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
 	struct pt_regs *regs = get_irq_regs();
 	int duration;
+	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
 
 	/* kick the hardlockup detector */
 	watchdog_interrupt_count();
@@ -317,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		if (__this_cpu_read(soft_watchdog_warn) == true)
 			return HRTIMER_RESTART;
 
+		if (softlockup_all_cpu_backtrace) {
+			/* Prevent multiple soft-lockup reports if one cpu is already
+			 * engaged in dumping cpu back traces
+			 */
+			if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
+				/* Someone else will report us. Let's give up */
+				__this_cpu_write(soft_watchdog_warn, true);
+				return HRTIMER_RESTART;
+			}
+		}
+
 		printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
 			smp_processor_id(), duration,
 			current->comm, task_pid_nr(current));
@@ -327,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		else
 			dump_stack();
 
+		if (softlockup_all_cpu_backtrace) {
+			/* Avoid generating two back traces for current
+			 * given that one is already made above
+			 */
+			trigger_allbutself_cpu_backtrace();
+
+			clear_bit(0, &soft_lockup_nmi_warn);
+			/* Barrier to sync with other cpus */
+			smp_mb__after_atomic();
+		}
+
 		if (softlockup_panic)
 			panic("softlockup: hung tasks");
 		__this_cpu_write(soft_watchdog_warn, true);
-- 
cgit v1.2.3-71-gd317


From fb7023e0e248a33cb00d0a9cdce0bcedaa1ad284 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 10 Jun 2014 10:09:52 -0700
Subject: drm/i915: BDW: Adding Reserved PCI IDs.

These PCI IDs are reserved on BSpec and can be used at any time in the future.
So let's add this now in order to avoid issues that we already faced on previous
platforms, like finding out about new ids when user reported accelaration weren't
enabled.

Cc: stable@vger.kernel.org
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 include/drm/i915_pciids.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 0572035673f3..a70d45647898 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -237,13 +237,21 @@
 #define INTEL_BDW_GT3D_IDS(info) \
 	_INTEL_BDW_D_IDS(3, info)
 
+#define INTEL_BDW_RSVDM_IDS(info) \
+	_INTEL_BDW_M_IDS(4, info)
+
+#define INTEL_BDW_RSVDD_IDS(info) \
+	_INTEL_BDW_D_IDS(4, info)
+
 #define INTEL_BDW_M_IDS(info) \
 	INTEL_BDW_GT12M_IDS(info), \
-	INTEL_BDW_GT3M_IDS(info)
+	INTEL_BDW_GT3M_IDS(info), \
+	INTEL_BDW_RSVDM_IDS(info)
 
 #define INTEL_BDW_D_IDS(info) \
 	INTEL_BDW_GT12D_IDS(info), \
-	INTEL_BDW_GT3D_IDS(info)
+	INTEL_BDW_GT3D_IDS(info), \
+	INTEL_BDW_RSVDD_IDS(info)
 
 #define INTEL_CHV_IDS(info) \
 	INTEL_VGA_DEVICE(0x22b0, info), \
-- 
cgit v1.2.3-71-gd317


From 3a4b0eda8e4b27e6aca86f9f4d327c1070815e30 Mon Sep 17 00:00:00 2001
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Tue, 24 Jun 2014 18:10:26 +0800
Subject: bio: remove unused macro bip_vec_idx()

Macro bip_vec_idx() was used by bio integrity originally, but no longer
used now. So remove it.

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5a645769f020..f91decbca96b 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -644,10 +644,6 @@ struct biovec_slab {
 
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 
-
-
-#define bip_vec_idx(bip, idx)	(&(bip->bip_vec[(idx)]))
-
 #define bip_for_each_vec(bvl, bip, iter)				\
 	for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter)
 
-- 
cgit v1.2.3-71-gd317


From 66cb45aa41315d1d9972cada354fbdf7870d7714 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 24 Jun 2014 16:22:24 -0600
Subject: block: add support for limiting gaps in SG lists

Another restriction inherited for NVMe - those devices don't support
SG lists that have "gaps" in them. Gaps refers to cases where the
previous SG entry doesn't end on a page boundary. For NVMe, all SG
entries must start at offset 0 (except the first) and end on a page
boundary (except the last).

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c            |  8 ++++++++
 block/blk-merge.c      | 10 ++++++++++
 include/linux/bio.h    |  9 +++++++++
 include/linux/blkdev.h |  1 +
 4 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/block/bio.c b/block/bio.c
index 8c2e55e39a1b..0ec61c9e536c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -746,6 +746,14 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 
 			goto done;
 		}
+
+		/*
+		 * If the queue doesn't support SG gaps and adding this
+		 * offset would create a gap, disallow it.
+		 */
+		if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) &&
+		    bvec_gap_to_prev(prev, offset))
+			return 0;
 	}
 
 	if (bio->bi_vcnt >= bio->bi_max_vecs)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index b3bf0df0f4c2..54535831f1e1 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -568,6 +568,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
 
 bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 {
+	struct request_queue *q = rq->q;
+
 	if (!rq_mergeable(rq) || !bio_mergeable(bio))
 		return false;
 
@@ -591,6 +593,14 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 	    !blk_write_same_mergeable(rq->bio, bio))
 		return false;
 
+	if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) {
+		struct bio_vec *bprev;
+
+		bprev = &rq->biotail->bi_io_vec[bio->bi_vcnt - 1];
+		if (bvec_gap_to_prev(bprev, bio->bi_io_vec[0].bv_offset))
+			return false;
+	}
+
 	return true;
 }
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index f91decbca96b..d2633ee099d9 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -186,6 +186,15 @@ static inline void *bio_data(struct bio *bio)
 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
 	__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q)))
 
+/*
+ * Check if adding a bio_vec after bprv with offset would create a gap in
+ * the SG list. Most drivers don't care about this, but some do.
+ */
+static inline bool bvec_gap_to_prev(struct bio_vec *bprv, unsigned int offset)
+{
+	return offset || ((bprv->bv_offset + bprv->bv_len) & (PAGE_SIZE - 1));
+}
+
 #define bio_io_error(bio) bio_endio((bio), -EIO)
 
 /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 713f8b62b435..8699bcf5f099 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -512,6 +512,7 @@ struct request_queue {
 #define QUEUE_FLAG_DEAD        19	/* queue tear-down finished */
 #define QUEUE_FLAG_INIT_DONE   20	/* queue is initialized */
 #define QUEUE_FLAG_NO_SG_MERGE 21	/* don't attempt to merge SG segments*/
+#define QUEUE_FLAG_SG_GAPS     22	/* queue doesn't support SG gaps */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
-- 
cgit v1.2.3-71-gd317


From f88649721268999bdff09777847080a52004f691 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 24 Jun 2014 10:05:11 -0700
Subject: ipv4: fix dst race in sk_dst_get()

When IP route cache had been removed in linux-3.6, we broke assumption
that dst entries were all freed after rcu grace period. DST_NOCACHE
dst were supposed to be freed from dst_release(). But it appears
we want to keep such dst around, either in UDP sockets or tunnels.

In sk_dst_get() we need to make sure dst refcount is not 0
before incrementing it, or else we might end up freeing a dst
twice.

DST_NOCACHE set on a dst does not mean this dst can not be attached
to a socket or a tunnel.

Then, before actual freeing, we need to observe a rcu grace period
to make sure all other cpus can catch the fact the dst is no longer
usable.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dormando <dormando@rydia.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h   |  4 ++--
 net/core/dst.c       | 16 +++++++++++-----
 net/ipv4/ip_tunnel.c | 14 +++++---------
 3 files changed, 18 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 07b7fcd60d80..173cae485de1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1730,8 +1730,8 @@ sk_dst_get(struct sock *sk)
 
 	rcu_read_lock();
 	dst = rcu_dereference(sk->sk_dst_cache);
-	if (dst)
-		dst_hold(dst);
+	if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+		dst = NULL;
 	rcu_read_unlock();
 	return dst;
 }
diff --git a/net/core/dst.c b/net/core/dst.c
index 80d6286c8b62..a028409ee438 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -269,6 +269,15 @@ again:
 }
 EXPORT_SYMBOL(dst_destroy);
 
+static void dst_destroy_rcu(struct rcu_head *head)
+{
+	struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
+
+	dst = dst_destroy(dst);
+	if (dst)
+		__dst_free(dst);
+}
+
 void dst_release(struct dst_entry *dst)
 {
 	if (dst) {
@@ -276,11 +285,8 @@ void dst_release(struct dst_entry *dst)
 
 		newrefcnt = atomic_dec_return(&dst->__refcnt);
 		WARN_ON(newrefcnt < 0);
-		if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
-			dst = dst_destroy(dst);
-			if (dst)
-				__dst_free(dst);
-		}
+		if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+			call_rcu(&dst->rcu_head, dst_destroy_rcu);
 	}
 }
 EXPORT_SYMBOL(dst_release);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 097b3e7c1e8f..54b6731dab55 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -73,12 +73,7 @@ static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
 {
 	struct dst_entry *old_dst;
 
-	if (dst) {
-		if (dst->flags & DST_NOCACHE)
-			dst = NULL;
-		else
-			dst_clone(dst);
-	}
+	dst_clone(dst);
 	old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
 	dst_release(old_dst);
 }
@@ -108,13 +103,14 @@ static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
 
 	rcu_read_lock();
 	dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
+	if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+		dst = NULL;
 	if (dst) {
 		if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
-			rcu_read_unlock();
 			tunnel_dst_reset(t);
-			return NULL;
+			dst_release(dst);
+			dst = NULL;
 		}
-		dst_hold(dst);
 	}
 	rcu_read_unlock();
 	return (struct rtable *)dst;
-- 
cgit v1.2.3-71-gd317


From 0b86dbf675e0170a191a9ca18e5e99fd39a678c0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 23 Jun 2014 08:44:40 +0100
Subject: Fix 32-bit regression in block device read(2)

blkdev_read_iter() wants to cap the iov_iter by the amount of data
remaining to the end of device.  That's what iov_iter_truncate() is for
(trim iter->count if it's above the given limit).  So far, so good, but
the argument of iov_iter_truncate() is size_t, so on 32bit boxen (in
case of a large device) we end up with that upper limit truncated down
to 32 bits *before* comparing it with iter->count.

Easily fixed by making iov_iter_truncate() take 64bit argument - it does
the right thing after such change (we only reach the assignment in there
when the current value of iter->count is greater than the limit, i.e.
for anything that would get truncated we don't reach the assignment at
all) and that argument is not the new value of iter->count - it's an
upper limit for such.

The overhead of passing u64 is not an issue - the thing is inlined, so
callers passing size_t won't pay any penalty.

Reported-and-tested-by: Theodore Tso <tytso@mit.edu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Tested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Tested-by: Bruno Wolff III <bruno@wolff.to>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/uio.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index e2231e47cec1..d54985e0705e 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -94,8 +94,20 @@ static inline size_t iov_iter_count(struct iov_iter *i)
 	return i->count;
 }
 
-static inline void iov_iter_truncate(struct iov_iter *i, size_t count)
+/*
+ * Cap the iov_iter by given limit; note that the second argument is
+ * *not* the new size - it's upper limit for such.  Passing it a value
+ * greater than the amount of data in iov_iter is fine - it'll just do
+ * nothing in that case.
+ */
+static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
 {
+	/*
+	 * count doesn't have to fit in size_t - comparison extends both
+	 * operands to u64 here and any value that would be truncated by
+	 * conversion in assignement is by definition greater than all
+	 * values of size_t, including old i->count.
+	 */
 	if (i->count > count)
 		i->count = count;
 }
-- 
cgit v1.2.3-71-gd317


From 9ad7860450ea65c6bbcbd52a9a25b54b07e35941 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Fri, 27 Jun 2014 10:41:00 -0500
Subject: Revert "tools: ffs-test: convert to new descriptor format fixing
 compilation error"

This reverts commit f2af74123f8c5a735248547f4286a3adc28633c1.

There is a better fix for this build error coming in a following
patch.

Signed-of-by: Felipe Balbi <balbi@ti.com>
---
 include/uapi/linux/usb/functionfs.h |  2 +-
 tools/usb/Makefile                  |  6 +-----
 tools/usb/ffs-test.c                | 20 ++------------------
 3 files changed, 4 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index ecb3a31f7ca6..2a4b4a72a4f9 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -53,7 +53,7 @@ struct usb_endpoint_descriptor_no_audio {
  * structure.  Any flags that are not recognised cause the whole block to be
  * rejected with -ENOSYS.
  *
- * Legacy descriptors format (deprecated as of 3.14):
+ * Legacy descriptors format:
  *
  * | off | name      | type         | description                          |
  * |-----+-----------+--------------+--------------------------------------|
diff --git a/tools/usb/Makefile b/tools/usb/Makefile
index d576b3bac3cf..acf2165c04e6 100644
--- a/tools/usb/Makefile
+++ b/tools/usb/Makefile
@@ -6,11 +6,7 @@ WARNINGS = -Wall -Wextra
 CFLAGS = $(WARNINGS) -g -I../include
 LDFLAGS = $(PTHREAD_LIBS)
 
-all: testusb ffs-test ffs-test-legacy
-
-ffs-test-legacy: ffs-test.c
-	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) -DUSE_LEGACY_DESC_HEAD
-
+all: testusb ffs-test
 %: %.c
 	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
 
diff --git a/tools/usb/ffs-test.c b/tools/usb/ffs-test.c
index 74b353d9eb50..fe1e66b6ef40 100644
--- a/tools/usb/ffs-test.c
+++ b/tools/usb/ffs-test.c
@@ -1,5 +1,5 @@
 /*
- * ffs-test.c -- user mode filesystem api for usb composite function
+ * ffs-test.c.c -- user mode filesystem api for usb composite function
  *
  * Copyright (C) 2010 Samsung Electronics
  *                    Author: Michal Nazarewicz <mina86@mina86.com>
@@ -21,8 +21,6 @@
 
 /* $(CROSS_COMPILE)cc -Wall -Wextra -g -o ffs-test ffs-test.c -lpthread */
 
-/* Uncomment to make the tool use legacy FFS descriptor headers. */
-/* #define USE_LEGACY_DESC_HEAD */
 
 #define _BSD_SOURCE /* for endian.h */
 
@@ -108,15 +106,7 @@ static void _msg(unsigned level, const char *fmt, ...)
 /******************** Descriptors and Strings *******************************/
 
 static const struct {
-	struct {
-		__le32 magic;
-		__le32 length;
-#ifndef USE_LEGACY_DESC_HEAD
-		__le32 flags;
-#endif
-		__le32 fs_count;
-		__le32 hs_count;
-	} __attribute__((packed)) header;
+	struct usb_functionfs_descs_head header;
 	struct {
 		struct usb_interface_descriptor intf;
 		struct usb_endpoint_descriptor_no_audio sink;
@@ -124,13 +114,7 @@ static const struct {
 	} __attribute__((packed)) fs_descs, hs_descs;
 } __attribute__((packed)) descriptors = {
 	.header = {
-#ifdef USE_LEGACY_DESC_HEAD
 		.magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC),
-#else
-		.magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2),
-		.flags = cpu_to_le32(FUNCTIONFS_HAS_FS_DESC |
-				     FUNCTIONFS_HAS_HS_DESC),
-#endif
 		.length = cpu_to_le32(sizeof descriptors),
 		.fs_count = 3,
 		.hs_count = 3,
-- 
cgit v1.2.3-71-gd317


From 09122141785348bf9539762a5f5dbbae3761c783 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Fri, 13 Jun 2014 15:38:04 +0200
Subject: usb: gadget: f_fs: resurect usb_functionfs_descs_head structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Even though usb_functionfs_descs_head structure is now deprecated,
it has been used by some user space tools.  Its removel in commit
[ac8dde1: “Add flags to descriptors block”] was an oversight
leading to build breakage for such tools.

Bring it back so that old user space tools can still be build
without problems on newer kernel versions.

Cc: <stable@vger.kernel.org>  # 3.14
Reported-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Reported-by: Krzysztof Opasiak <k.opasiak@samsung.com>
Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/uapi/linux/usb/functionfs.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 2a4b4a72a4f9..24b68c59dcf8 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -33,6 +33,13 @@ struct usb_endpoint_descriptor_no_audio {
 	__u8  bInterval;
 } __attribute__((packed));
 
+/* Legacy format, deprecated as of 3.14. */
+struct usb_functionfs_descs_head {
+	__le32 magic;
+	__le32 length;
+	__le32 fs_count;
+	__le32 hs_count;
+} __attribute__((packed, deprecated));
 
 /*
  * Descriptors format:
-- 
cgit v1.2.3-71-gd317


From ac5ccdba3a1659b3517e7e99ef7d35a6a2d77cf4 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 19 Jun 2014 21:22:56 +0300
Subject: iovec: move memcpy_from/toiovecend to lib/iovec.c

ERROR: "memcpy_fromiovecend" [drivers/vhost/vhost_scsi.ko] undefined!

commit 9f977ef7b671f6169eca78bf40f230fe84b7c7e5
    vhost-scsi: Include prot_bytes into expected data transfer length
in target-pending makes drivers/vhost/scsi.c call memcpy_fromiovecend().
This function is not available when CONFIG_NET is not enabled.

socket.h already includes uio.h, so no callers need updating.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/linux/socket.h |  4 ----
 include/linux/uio.h    |  5 ++++-
 lib/iovec.c            | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/core/iovec.c       | 55 --------------------------------------------------
 4 files changed, 59 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 8e98297f1388..ec538fc287a6 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -305,8 +305,6 @@ struct ucred {
 /* IPX options */
 #define IPX_TYPE	1
 
-extern int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
-			       int offset, int len);
 extern int csum_partial_copy_fromiovecend(unsigned char *kdata, 
 					  struct iovec *iov, 
 					  int offset, 
@@ -315,8 +313,6 @@ extern unsigned long iov_pages(const struct iovec *iov, int offset,
 			       unsigned long nr_segs);
 
 extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *address, int mode);
-extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
-			     int offset, int len);
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 
diff --git a/include/linux/uio.h b/include/linux/uio.h
index e2231e47cec1..04c8c4bb4927 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -111,6 +111,9 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
-
+int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
+			int offset, int len);
+int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
+		      int offset, int len);
 
 #endif
diff --git a/lib/iovec.c b/lib/iovec.c
index 454baa88bf27..7a7c2da4cddf 100644
--- a/lib/iovec.c
+++ b/lib/iovec.c
@@ -51,3 +51,58 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
 	return 0;
 }
 EXPORT_SYMBOL(memcpy_toiovec);
+
+/*
+ *	Copy kernel to iovec. Returns -EFAULT on error.
+ */
+
+int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
+		      int offset, int len)
+{
+	int copy;
+	for (; len > 0; ++iov) {
+		/* Skip over the finished iovecs */
+		if (unlikely(offset >= iov->iov_len)) {
+			offset -= iov->iov_len;
+			continue;
+		}
+		copy = min_t(unsigned int, iov->iov_len - offset, len);
+		if (copy_to_user(iov->iov_base + offset, kdata, copy))
+			return -EFAULT;
+		offset = 0;
+		kdata += copy;
+		len -= copy;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(memcpy_toiovecend);
+
+/*
+ *	Copy iovec to kernel. Returns -EFAULT on error.
+ */
+
+int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
+			int offset, int len)
+{
+	/* Skip over the finished iovecs */
+	while (offset >= iov->iov_len) {
+		offset -= iov->iov_len;
+		iov++;
+	}
+
+	while (len > 0) {
+		u8 __user *base = iov->iov_base + offset;
+		int copy = min_t(unsigned int, len, iov->iov_len - offset);
+
+		offset = 0;
+		if (copy_from_user(kdata, base, copy))
+			return -EFAULT;
+		len -= copy;
+		kdata += copy;
+		iov++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(memcpy_fromiovecend);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index b61869429f4c..827dd6beb49c 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -74,61 +74,6 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
 	return err;
 }
 
-/*
- *	Copy kernel to iovec. Returns -EFAULT on error.
- */
-
-int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
-		      int offset, int len)
-{
-	int copy;
-	for (; len > 0; ++iov) {
-		/* Skip over the finished iovecs */
-		if (unlikely(offset >= iov->iov_len)) {
-			offset -= iov->iov_len;
-			continue;
-		}
-		copy = min_t(unsigned int, iov->iov_len - offset, len);
-		if (copy_to_user(iov->iov_base + offset, kdata, copy))
-			return -EFAULT;
-		offset = 0;
-		kdata += copy;
-		len -= copy;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(memcpy_toiovecend);
-
-/*
- *	Copy iovec to kernel. Returns -EFAULT on error.
- */
-
-int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
-			int offset, int len)
-{
-	/* Skip over the finished iovecs */
-	while (offset >= iov->iov_len) {
-		offset -= iov->iov_len;
-		iov++;
-	}
-
-	while (len > 0) {
-		u8 __user *base = iov->iov_base + offset;
-		int copy = min_t(unsigned int, len, iov->iov_len - offset);
-
-		offset = 0;
-		if (copy_from_user(kdata, base, copy))
-			return -EFAULT;
-		len -= copy;
-		kdata += copy;
-		iov++;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(memcpy_fromiovecend);
-
 /*
  *	And now for the all-in-one: copy and checksum from a user iovec
  *	directly to a datagram
-- 
cgit v1.2.3-71-gd317


From b2373f255cacdc1ea4da25e75a5a78949ffd9d66 Mon Sep 17 00:00:00 2001
From: Anand Jain <Anand.Jain@oracle.com>
Date: Tue, 3 Jun 2014 11:36:03 +0800
Subject: btrfs: create sprout should rename fsid on the sysfs as well

Creating sprout will change the fsid of the mounted root.
do the same on the sysfs as well.

reproducer:
 mount /dev/sdb /btrfs (seed disk)
 btrfs dev add /dev/sdc /btrfs
 mount -o rw,remount /btrfs
 btrfs dev del /dev/sdb /btrfs
 mount /dev/sdb /btrfs

Error:
kobject_add_internal failed for fe350492-dc28-4051-a601-e017b17e6145 with -EEXIST, don't try to register things with the same name in the same directory.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/volumes.c         | 9 +++++++++
 include/uapi/linux/btrfs.h | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 19b67e969b52..6ca0d9cc46f9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2154,6 +2154,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
 	if (seeding_dev) {
+		char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
 		ret = init_first_rw_device(trans, root, device);
 		if (ret) {
 			btrfs_abort_transaction(trans, root, ret);
@@ -2164,6 +2165,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 			btrfs_abort_transaction(trans, root, ret);
 			goto error_trans;
 		}
+
+		/* Sprouting would change fsid of the mounted root,
+		 * so rename the fsid on the sysfs
+		 */
+		snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
+						root->fs_info->fsid);
+		if (kobject_rename(&root->fs_info->super_kobj, fsid_buf))
+			goto error_trans;
 	} else {
 		ret = btrfs_add_device(trans, root, device);
 		if (ret) {
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 6f9c38ce45c7..2f47824e7a36 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -38,6 +38,7 @@ struct btrfs_ioctl_vol_args {
 #define BTRFS_SUBVOL_QGROUP_INHERIT	(1ULL << 2)
 #define BTRFS_FSID_SIZE 16
 #define BTRFS_UUID_SIZE 16
+#define BTRFS_UUID_UNPARSED_SIZE	37
 
 #define BTRFS_QGROUP_INHERIT_SET_LIMITS	(1ULL << 0)
 
-- 
cgit v1.2.3-71-gd317


From 44ff0254b89079a8a95e652635e760d93196ac1f Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Thu, 5 Jun 2014 13:35:14 -0700
Subject: clk: exynos5420: Remove aclk66_peric from the clock tree description

The "aclk66_peric" clock is a gate clock with a whole bunch of gates
underneath it.  This big gate isn't very useful to include in our
clock tree.  If any of the children need to be turned on then the big
gate will need to be on anyway.  ...and there are plenty of other "big
gates" that aren't described in our clock tree, some of which shut off
collections of clocks that have no relationship in the hierarchy so
are hard to model.

"aclk66_peric" is causing earlyprintk problems since it gets disabled
as part of the boot process, so let's just remove it.

Strangely (and for no good reason) this clock is exported as part of
the common clock bindings.  Remove it since there are no in-kernel
device trees using it and no reason anyone out of tree should refer to
it either.

Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Tomasz Figa <t.figa@samsung.com>
---
 drivers/clk/samsung/clk-exynos5420.c   | 85 ++++++++++++++++++++++------------
 include/dt-bindings/clock/exynos5420.h |  1 -
 2 files changed, 55 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c
index 9d7d7eed03fd..61eccf0dd72f 100644
--- a/drivers/clk/samsung/clk-exynos5420.c
+++ b/drivers/clk/samsung/clk-exynos5420.c
@@ -890,8 +890,6 @@ static struct samsung_gate_clock exynos5x_gate_clks[] __initdata = {
 			GATE_BUS_TOP, 9, CLK_IGNORE_UNUSED, 0),
 	GATE(0, "aclk66_psgen", "mout_user_aclk66_psgen",
 			GATE_BUS_TOP, 10, CLK_IGNORE_UNUSED, 0),
-	GATE(CLK_ACLK66_PERIC, "aclk66_peric", "mout_user_aclk66_peric",
-			GATE_BUS_TOP, 11, CLK_IGNORE_UNUSED, 0),
 	GATE(0, "aclk266_isp", "mout_user_aclk266_isp",
 			GATE_BUS_TOP, 13, 0, 0),
 	GATE(0, "aclk166", "mout_user_aclk166",
@@ -994,34 +992,61 @@ static struct samsung_gate_clock exynos5x_gate_clks[] __initdata = {
 			SRC_MASK_FSYS, 24, CLK_SET_RATE_PARENT, 0),
 
 	/* PERIC Block */
-	GATE(CLK_UART0, "uart0", "aclk66_peric", GATE_IP_PERIC, 0, 0, 0),
-	GATE(CLK_UART1, "uart1", "aclk66_peric", GATE_IP_PERIC, 1, 0, 0),
-	GATE(CLK_UART2, "uart2", "aclk66_peric", GATE_IP_PERIC, 2, 0, 0),
-	GATE(CLK_UART3, "uart3", "aclk66_peric", GATE_IP_PERIC, 3, 0, 0),
-	GATE(CLK_I2C0, "i2c0", "aclk66_peric", GATE_IP_PERIC, 6, 0, 0),
-	GATE(CLK_I2C1, "i2c1", "aclk66_peric", GATE_IP_PERIC, 7, 0, 0),
-	GATE(CLK_I2C2, "i2c2", "aclk66_peric", GATE_IP_PERIC, 8, 0, 0),
-	GATE(CLK_I2C3, "i2c3", "aclk66_peric", GATE_IP_PERIC, 9, 0, 0),
-	GATE(CLK_USI0, "usi0", "aclk66_peric", GATE_IP_PERIC, 10, 0, 0),
-	GATE(CLK_USI1, "usi1", "aclk66_peric", GATE_IP_PERIC, 11, 0, 0),
-	GATE(CLK_USI2, "usi2", "aclk66_peric", GATE_IP_PERIC, 12, 0, 0),
-	GATE(CLK_USI3, "usi3", "aclk66_peric", GATE_IP_PERIC, 13, 0, 0),
-	GATE(CLK_I2C_HDMI, "i2c_hdmi", "aclk66_peric", GATE_IP_PERIC, 14, 0, 0),
-	GATE(CLK_TSADC, "tsadc", "aclk66_peric", GATE_IP_PERIC, 15, 0, 0),
-	GATE(CLK_SPI0, "spi0", "aclk66_peric", GATE_IP_PERIC, 16, 0, 0),
-	GATE(CLK_SPI1, "spi1", "aclk66_peric", GATE_IP_PERIC, 17, 0, 0),
-	GATE(CLK_SPI2, "spi2", "aclk66_peric", GATE_IP_PERIC, 18, 0, 0),
-	GATE(CLK_I2S1, "i2s1", "aclk66_peric", GATE_IP_PERIC, 20, 0, 0),
-	GATE(CLK_I2S2, "i2s2", "aclk66_peric", GATE_IP_PERIC, 21, 0, 0),
-	GATE(CLK_PCM1, "pcm1", "aclk66_peric", GATE_IP_PERIC, 22, 0, 0),
-	GATE(CLK_PCM2, "pcm2", "aclk66_peric", GATE_IP_PERIC, 23, 0, 0),
-	GATE(CLK_PWM, "pwm", "aclk66_peric", GATE_IP_PERIC, 24, 0, 0),
-	GATE(CLK_SPDIF, "spdif", "aclk66_peric", GATE_IP_PERIC, 26, 0, 0),
-	GATE(CLK_USI4, "usi4", "aclk66_peric", GATE_IP_PERIC, 28, 0, 0),
-	GATE(CLK_USI5, "usi5", "aclk66_peric", GATE_IP_PERIC, 30, 0, 0),
-	GATE(CLK_USI6, "usi6", "aclk66_peric", GATE_IP_PERIC, 31, 0, 0),
-
-	GATE(CLK_KEYIF, "keyif", "aclk66_peric", GATE_BUS_PERIC, 22, 0, 0),
+	GATE(CLK_UART0, "uart0", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 0, 0, 0),
+	GATE(CLK_UART1, "uart1", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 1, 0, 0),
+	GATE(CLK_UART2, "uart2", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 2, 0, 0),
+	GATE(CLK_UART3, "uart3", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 3, 0, 0),
+	GATE(CLK_I2C0, "i2c0", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 6, 0, 0),
+	GATE(CLK_I2C1, "i2c1", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 7, 0, 0),
+	GATE(CLK_I2C2, "i2c2", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 8, 0, 0),
+	GATE(CLK_I2C3, "i2c3", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 9, 0, 0),
+	GATE(CLK_USI0, "usi0", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 10, 0, 0),
+	GATE(CLK_USI1, "usi1", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 11, 0, 0),
+	GATE(CLK_USI2, "usi2", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 12, 0, 0),
+	GATE(CLK_USI3, "usi3", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 13, 0, 0),
+	GATE(CLK_I2C_HDMI, "i2c_hdmi", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 14, 0, 0),
+	GATE(CLK_TSADC, "tsadc", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 15, 0, 0),
+	GATE(CLK_SPI0, "spi0", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 16, 0, 0),
+	GATE(CLK_SPI1, "spi1", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 17, 0, 0),
+	GATE(CLK_SPI2, "spi2", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 18, 0, 0),
+	GATE(CLK_I2S1, "i2s1", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 20, 0, 0),
+	GATE(CLK_I2S2, "i2s2", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 21, 0, 0),
+	GATE(CLK_PCM1, "pcm1", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 22, 0, 0),
+	GATE(CLK_PCM2, "pcm2", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 23, 0, 0),
+	GATE(CLK_PWM, "pwm", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 24, 0, 0),
+	GATE(CLK_SPDIF, "spdif", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 26, 0, 0),
+	GATE(CLK_USI4, "usi4", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 28, 0, 0),
+	GATE(CLK_USI5, "usi5", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 30, 0, 0),
+	GATE(CLK_USI6, "usi6", "mout_user_aclk66_peric",
+			GATE_IP_PERIC, 31, 0, 0),
+
+	GATE(CLK_KEYIF, "keyif", "mout_user_aclk66_peric",
+			GATE_BUS_PERIC, 22, 0, 0),
 
 	/* PERIS Block */
 	GATE(CLK_CHIPID, "chipid", "aclk66_psgen",
diff --git a/include/dt-bindings/clock/exynos5420.h b/include/dt-bindings/clock/exynos5420.h
index 97dcb89d37d3..14e1c8f9640c 100644
--- a/include/dt-bindings/clock/exynos5420.h
+++ b/include/dt-bindings/clock/exynos5420.h
@@ -63,7 +63,6 @@
 #define CLK_SCLK_MPHY_IXTAL24	161
 
 /* gate clocks */
-#define CLK_ACLK66_PERIC	256
 #define CLK_UART0		257
 #define CLK_UART1		258
 #define CLK_UART2		259
-- 
cgit v1.2.3-71-gd317


From 4e26445faad366d67d7723622bf6a60a6f0f5993 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizefan@huawei.com>
Date: Mon, 30 Jun 2014 11:50:28 +0800
Subject: kernfs: introduce kernfs_pin_sb()

kernfs_pin_sb() tries to get a refcnt of the superblock.

This will be used by cgroupfs.

v2:
- make kernfs_pin_sb() return the superblock.
- drop kernfs_drop_sb().

tj: Updated the comment a bit.

[ This is a prerequisite for a bugfix. ]
Cc: <stable@vger.kernel.org> # 3.15
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/kernfs/mount.c      | 30 ++++++++++++++++++++++++++++++
 include/linux/kernfs.h |  1 +
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d171b98a6cdd..f973ae9b05f1 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -211,6 +211,36 @@ void kernfs_kill_sb(struct super_block *sb)
 	kernfs_put(root_kn);
 }
 
+/**
+ * kernfs_pin_sb: try to pin the superblock associated with a kernfs_root
+ * @kernfs_root: the kernfs_root in question
+ * @ns: the namespace tag
+ *
+ * Pin the superblock so the superblock won't be destroyed in subsequent
+ * operations.  This can be used to block ->kill_sb() which may be useful
+ * for kernfs users which dynamically manage superblocks.
+ *
+ * Returns NULL if there's no superblock associated to this kernfs_root, or
+ * -EINVAL if the superblock is being freed.
+ */
+struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
+{
+	struct kernfs_super_info *info;
+	struct super_block *sb = NULL;
+
+	mutex_lock(&kernfs_mutex);
+	list_for_each_entry(info, &root->supers, node) {
+		if (info->ns == ns) {
+			sb = info->sb;
+			if (!atomic_inc_not_zero(&info->sb->s_active))
+				sb = ERR_PTR(-EINVAL);
+			break;
+		}
+	}
+	mutex_unlock(&kernfs_mutex);
+	return sb;
+}
+
 void __init kernfs_init(void)
 {
 	kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 17aa1cce6f8e..20f493564917 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -304,6 +304,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
 			       struct kernfs_root *root, unsigned long magic,
 			       bool *new_sb_created, const void *ns);
 void kernfs_kill_sb(struct super_block *sb);
+struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns);
 
 void kernfs_init(void);
 
-- 
cgit v1.2.3-71-gd317


From b14bf2d0c0358140041d1c1805a674376964d0e0 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 30 Jun 2014 11:04:21 -0400
Subject: usb-storage/SCSI: Add broken_fua blacklist flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some buggy JMicron USB-ATA bridges don't know how to translate the FUA
bit in READs or WRITEs.  This patch adds an entry in unusual_devs.h
and a blacklist flag to tell the sd driver not to use FUA.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Michael Büsch <m@bues.ch>
Tested-by: Michael Büsch <m@bues.ch>
Acked-by: James Bottomley <James.Bottomley@HansenPartnership.com>
CC: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sd.c                  | 5 ++++-
 drivers/usb/storage/scsiglue.c     | 4 ++++
 drivers/usb/storage/unusual_devs.h | 7 +++++++
 include/linux/usb_usual.h          | 4 +++-
 include/scsi/scsi_device.h         | 1 +
 5 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index e9689d57ccb6..6825eda1114a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2441,7 +2441,10 @@ sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer)
 		}
 
 		sdkp->DPOFUA = (data.device_specific & 0x10) != 0;
-		if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw) {
+		if (sdp->broken_fua) {
+			sd_first_printk(KERN_NOTICE, sdkp, "Disabling FUA\n");
+			sdkp->DPOFUA = 0;
+		} else if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw) {
 			sd_first_printk(KERN_NOTICE, sdkp,
 				  "Uses READ/WRITE(6), disabling FUA\n");
 			sdkp->DPOFUA = 0;
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 9d38ddc8da49..866b5df36ed1 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -256,6 +256,10 @@ static int slave_configure(struct scsi_device *sdev)
 		if (us->fflags & US_FL_WRITE_CACHE)
 			sdev->wce_default_on = 1;
 
+		/* A few buggy USB-ATA bridges don't understand FUA */
+		if (us->fflags & US_FL_BROKEN_FUA)
+			sdev->broken_fua = 1;
+
 	} else {
 
 		/* Non-disk-type devices don't need to blacklist any pages
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 174a447868cd..80a5b366255f 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1936,6 +1936,13 @@ UNUSUAL_DEV(  0x14cd, 0x6600, 0x0201, 0x0201,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
+/* Reported by Michael Büsch <m@bues.ch> */
+UNUSUAL_DEV(  0x152d, 0x0567, 0x0114, 0x0114,
+		"JMicron",
+		"USB to ATA/ATAPI Bridge",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_BROKEN_FUA ),
+
 /* Reported by Alexandre Oliva <oliva@lsd.ic.unicamp.br>
  * JMicron responds to USN and several other SCSI ioctls with a
  * residue that causes subsequent I/O requests to fail.  */
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index 1a64b26046ed..9b7de1b46437 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -70,7 +70,9 @@
 	US_FLAG(NEEDS_CAP16,	0x00400000)			\
 		/* cannot handle READ_CAPACITY_10 */		\
 	US_FLAG(IGNORE_UAS,	0x00800000)			\
-		/* Device advertises UAS but it is broken */
+		/* Device advertises UAS but it is broken */	\
+	US_FLAG(BROKEN_FUA,	0x01000000)			\
+		/* Cannot handle FUA in WRITE or READ CDBs */	\
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 5853c913d2b0..27ab31017f09 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -173,6 +173,7 @@ struct scsi_device {
 	unsigned is_visible:1;	/* is the device visible in sysfs */
 	unsigned wce_default_on:1;	/* Cache is ON by default */
 	unsigned no_dif:1;	/* T10 PI (DIF) should be disabled */
+	unsigned broken_fua:1;		/* Don't set FUA bit */
 
 	atomic_t disk_events_disable_depth; /* disable depth for disk events */
 
-- 
cgit v1.2.3-71-gd317


From 330d282216d6e4d845a21b72572dc4df4122e8fa Mon Sep 17 00:00:00 2001
From: Zhengyu He <hzy@google.com>
Date: Tue, 1 Jul 2014 12:11:47 -0700
Subject: core: fix typo in percpu read_mostly section

This fixes a typo that named the read_mostly section of percpu as
readmostly. It works fine with SMP because the linker script specifies
.data..percpu..readmostly. However, UP kernel builds don't have percpu
sections defined and the non-percpu version of the section is called
data..read_mostly, so .data..readmostly will float around and may break
things unexpectedly.

Looking at the original change that introduced data..percpu..readmostly
(commit c957ef2c59e952803766ddc22e89981ab534606f), it looks like this
was the original intention.

Tested: Built UP kernel and confirmed the sections got merged.

- Before the patch:
$ objdump -h vmlinux.o  | grep '\.data\.\.read.*mostly'
38 .data..read_mostly 00004418  0000000000000000  0000000000000000  00431ac0  2**6
50 .data..readmostly 00000014  0000000000000000  0000000000000000  00444000  2**3

- After the patch:
$ objdump -h vmlinux.o  | grep '\.data\.\.read.*mostly'
38 .data..read_mostly 00004438  0000000000000000  0000000000000000  00431ac0  2**6

Signed-off-by: Zhengyu He <hzy@google.com>
Signed-off-by: Filipe Brandenburger <filbranden@google.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 2 +-
 include/linux/percpu-defs.h       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 471ba48c7ae4..c1c0b0cf39b4 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -693,7 +693,7 @@
 	. = ALIGN(PAGE_SIZE);						\
 	*(.data..percpu..page_aligned)					\
 	. = ALIGN(cacheline);						\
-	*(.data..percpu..readmostly)					\
+	*(.data..percpu..read_mostly)					\
 	. = ALIGN(cacheline);						\
 	*(.data..percpu)						\
 	*(.data..percpu..shared_aligned)				\
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index a5fc7d01aad6..dec01d6c3f80 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -146,10 +146,10 @@
  * Declaration/definition used for per-CPU variables that must be read mostly.
  */
 #define DECLARE_PER_CPU_READ_MOSTLY(type, name)			\
-	DECLARE_PER_CPU_SECTION(type, name, "..readmostly")
+	DECLARE_PER_CPU_SECTION(type, name, "..read_mostly")
 
 #define DEFINE_PER_CPU_READ_MOSTLY(type, name)				\
-	DEFINE_PER_CPU_SECTION(type, name, "..readmostly")
+	DEFINE_PER_CPU_SECTION(type, name, "..read_mostly")
 
 /*
  * Intermodule exports for per-CPU variables.  sparse forgets about
-- 
cgit v1.2.3-71-gd317


From ecca47ce8294843045e7465d76fee84dbf07a004 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 1 Jul 2014 16:41:03 -0400
Subject: kernfs: kernfs_notify() must be useable from non-sleepable contexts

d911d9874801 ("kernfs: make kernfs_notify() trigger inotify events
too") added fsnotify triggering to kernfs_notify() which requires a
sleepable context.  There are already existing users of
kernfs_notify() which invoke it from an atomic context and in general
it's silly to require a sleepable context for triggering a
notification.

The following is an invalid context bug triggerd by md invoking
sysfs_notify() from IO completion path.

 BUG: sleeping function called from invalid context at kernel/locking/mutex.c:586
 in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1
 2 locks held by swapper/1/0:
  #0:  (&(&vblk->vq_lock)->rlock){-.-...}, at: [<ffffffffa0039042>] virtblk_done+0x42/0xe0 [virtio_blk]
  #1:  (&(&bitmap->counts.lock)->rlock){-.....}, at: [<ffffffff81633718>] bitmap_endwrite+0x68/0x240
 irq event stamp: 33518
 hardirqs last  enabled at (33515): [<ffffffff8102544f>] default_idle+0x1f/0x230
 hardirqs last disabled at (33516): [<ffffffff818122ed>] common_interrupt+0x6d/0x72
 softirqs last  enabled at (33518): [<ffffffff810a1272>] _local_bh_enable+0x22/0x50
 softirqs last disabled at (33517): [<ffffffff810a29e0>] irq_enter+0x60/0x80
 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.16.0-0.rc2.git2.1.fc21.x86_64 #1
 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
  0000000000000000 f90db13964f4ee05 ffff88007d403b80 ffffffff81807b4c
  0000000000000000 ffff88007d403ba8 ffffffff810d4f14 0000000000000000
  0000000000441800 ffff880078fa1780 ffff88007d403c38 ffffffff8180caf2
 Call Trace:
  <IRQ>  [<ffffffff81807b4c>] dump_stack+0x4d/0x66
  [<ffffffff810d4f14>] __might_sleep+0x184/0x240
  [<ffffffff8180caf2>] mutex_lock_nested+0x42/0x440
  [<ffffffff812d76a0>] kernfs_notify+0x90/0x150
  [<ffffffff8163377c>] bitmap_endwrite+0xcc/0x240
  [<ffffffffa00de863>] close_write+0x93/0xb0 [raid1]
  [<ffffffffa00df029>] r1_bio_write_done+0x29/0x50 [raid1]
  [<ffffffffa00e0474>] raid1_end_write_request+0xe4/0x260 [raid1]
  [<ffffffff813acb8b>] bio_endio+0x6b/0xa0
  [<ffffffff813b46c4>] blk_update_request+0x94/0x420
  [<ffffffff813bf0ea>] blk_mq_end_io+0x1a/0x70
  [<ffffffffa00392c2>] virtblk_request_done+0x32/0x80 [virtio_blk]
  [<ffffffff813c0648>] __blk_mq_complete_request+0x88/0x120
  [<ffffffff813c070a>] blk_mq_complete_request+0x2a/0x30
  [<ffffffffa0039066>] virtblk_done+0x66/0xe0 [virtio_blk]
  [<ffffffffa002535a>] vring_interrupt+0x3a/0xa0 [virtio_ring]
  [<ffffffff81116177>] handle_irq_event_percpu+0x77/0x340
  [<ffffffff8111647d>] handle_irq_event+0x3d/0x60
  [<ffffffff81119436>] handle_edge_irq+0x66/0x130
  [<ffffffff8101c3e4>] handle_irq+0x84/0x150
  [<ffffffff818146ad>] do_IRQ+0x4d/0xe0
  [<ffffffff818122f2>] common_interrupt+0x72/0x72
  <EOI>  [<ffffffff8105f706>] ? native_safe_halt+0x6/0x10
  [<ffffffff81025454>] default_idle+0x24/0x230
  [<ffffffff81025f9f>] arch_cpu_idle+0xf/0x20
  [<ffffffff810f5adc>] cpu_startup_entry+0x37c/0x7b0
  [<ffffffff8104df1b>] start_secondary+0x25b/0x300

This patch fixes it by punting the notification delivery through a
work item.  This ends up adding an extra pointer to kernfs_elem_attr
enlarging kernfs_node by a pointer, which is not ideal but not a very
big deal either.  If this turns out to be an actual issue, we can move
kernfs_elem_attr->size to kernfs_node->iattr later.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Josh Boyer <jwboyer@fedoraproject.org>
Cc: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/file.c       | 69 ++++++++++++++++++++++++++++++++++++++++----------
 include/linux/kernfs.h |  1 +
 2 files changed, 56 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e3d37f607f97..d895b4b7b661 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -39,6 +39,19 @@ struct kernfs_open_node {
 	struct list_head	files; /* goes through kernfs_open_file.list */
 };
 
+/*
+ * kernfs_notify() may be called from any context and bounces notifications
+ * through a work item.  To minimize space overhead in kernfs_node, the
+ * pending queue is implemented as a singly linked list of kernfs_nodes.
+ * The list is terminated with the self pointer so that whether a
+ * kernfs_node is on the list or not can be determined by testing the next
+ * pointer for NULL.
+ */
+#define KERNFS_NOTIFY_EOL			((void *)&kernfs_notify_list)
+
+static DEFINE_SPINLOCK(kernfs_notify_lock);
+static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
+
 static struct kernfs_open_file *kernfs_of(struct file *file)
 {
 	return ((struct seq_file *)file->private_data)->private;
@@ -783,24 +796,25 @@ static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait)
 	return DEFAULT_POLLMASK|POLLERR|POLLPRI;
 }
 
-/**
- * kernfs_notify - notify a kernfs file
- * @kn: file to notify
- *
- * Notify @kn such that poll(2) on @kn wakes up.
- */
-void kernfs_notify(struct kernfs_node *kn)
+static void kernfs_notify_workfn(struct work_struct *work)
 {
-	struct kernfs_root *root = kernfs_root(kn);
+	struct kernfs_node *kn;
 	struct kernfs_open_node *on;
 	struct kernfs_super_info *info;
-	unsigned long flags;
-
-	if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
+repeat:
+	/* pop one off the notify_list */
+	spin_lock_irq(&kernfs_notify_lock);
+	kn = kernfs_notify_list;
+	if (kn == KERNFS_NOTIFY_EOL) {
+		spin_unlock_irq(&kernfs_notify_lock);
 		return;
+	}
+	kernfs_notify_list = kn->attr.notify_next;
+	kn->attr.notify_next = NULL;
+	spin_unlock_irq(&kernfs_notify_lock);
 
 	/* kick poll */
-	spin_lock_irqsave(&kernfs_open_node_lock, flags);
+	spin_lock_irq(&kernfs_open_node_lock);
 
 	on = kn->attr.open;
 	if (on) {
@@ -808,12 +822,12 @@ void kernfs_notify(struct kernfs_node *kn)
 		wake_up_interruptible(&on->poll);
 	}
 
-	spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
+	spin_unlock_irq(&kernfs_open_node_lock);
 
 	/* kick fsnotify */
 	mutex_lock(&kernfs_mutex);
 
-	list_for_each_entry(info, &root->supers, node) {
+	list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
 		struct inode *inode;
 		struct dentry *dentry;
 
@@ -833,6 +847,33 @@ void kernfs_notify(struct kernfs_node *kn)
 	}
 
 	mutex_unlock(&kernfs_mutex);
+	kernfs_put(kn);
+	goto repeat;
+}
+
+/**
+ * kernfs_notify - notify a kernfs file
+ * @kn: file to notify
+ *
+ * Notify @kn such that poll(2) on @kn wakes up.  Maybe be called from any
+ * context.
+ */
+void kernfs_notify(struct kernfs_node *kn)
+{
+	static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
+	unsigned long flags;
+
+	if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
+		return;
+
+	spin_lock_irqsave(&kernfs_notify_lock, flags);
+	if (!kn->attr.notify_next) {
+		kernfs_get(kn);
+		kn->attr.notify_next = kernfs_notify_list;
+		kernfs_notify_list = kn;
+		schedule_work(&kernfs_notify_work);
+	}
+	spin_unlock_irqrestore(&kernfs_notify_lock, flags);
 }
 EXPORT_SYMBOL_GPL(kernfs_notify);
 
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 17aa1cce6f8e..145375ea0bd9 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -91,6 +91,7 @@ struct kernfs_elem_attr {
 	const struct kernfs_ops	*ops;
 	struct kernfs_open_node	*open;
 	loff_t			size;
+	struct kernfs_node	*notify_next;	/* for kernfs_notify() */
 };
 
 /*
-- 
cgit v1.2.3-71-gd317


From 5616b0a46ed82eb9a093f752fc4d7bd3cc688583 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 24 Jun 2014 16:59:35 +0200
Subject: [SCSI] use the scsi data buffer length to extract transfer size

Commit 8846bab180fa introduced a helper that can be used to query the
wire transfer size for a SCSI command taking protection information into
account.

However, some commands do not have a 1:1 mapping between the block range
they work on and the payload size (discard, write same). After the
scatterlist has been set up these requests use __data_len to store the
number of bytes to report completion on. This means that callers of
scsi_transfer_length() would get the wrong byte count for these types of
requests.

To overcome this we make scsi_transfer_length() use the scatterlist
length in the scsi_data_buffer as basis for the wire transfer
calculation instead of __data_len.

Reported-by: Christoph Hellwig <hch@infradead.org>
Debugged-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Fixes: d77e65350f2d82dfa0557707d505711f5a43c8fd
Cc: stable@vger.kernel.org
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
---
 include/scsi/scsi_cmnd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 42ed789ebafc..e0ae71098144 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -318,7 +318,7 @@ static inline void set_driver_byte(struct scsi_cmnd *cmd, char status)
 
 static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd)
 {
-	unsigned int xfer_len = blk_rq_bytes(scmd->request);
+	unsigned int xfer_len = scsi_out(scmd)->length;
 	unsigned int prot_op = scsi_get_prot_op(scmd);
 	unsigned int sector_size = scmd->device->sector_size;
 
-- 
cgit v1.2.3-71-gd317


From b9cd18de4db3c9ffa7e17b0dc0ca99ed5aa4d43a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 3 Jul 2014 15:43:15 -0400
Subject: ptrace,x86: force IRET path after a ptrace_stop()

The 'sysret' fastpath does not correctly restore even all regular
registers, much less any segment registers or reflags values.  That is
very much part of why it's faster than 'iret'.

Normally that isn't a problem, because the normal ptrace() interface
catches the process using the signal handler infrastructure, which
always returns with an iret.

However, some paths can get caught using ptrace_event() instead of the
signal path, and for those we need to make sure that we aren't going to
return to user space using 'sysret'.  Otherwise the modifications that
may have been done to the register set by the tracer wouldn't
necessarily take effect.

Fix it by forcing IRET path by setting TIF_NOTIFY_RESUME from
arch_ptrace_stop_needed() which is invoked from ptrace_stop().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Andy Lutomirski <luto@amacapital.net>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/ptrace.h | 16 ++++++++++++++++
 include/linux/ptrace.h        |  3 +++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 14fd6fd75a19..6205f0c434db 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -231,6 +231,22 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
 
 #define ARCH_HAS_USER_SINGLE_STEP_INFO
 
+/*
+ * When hitting ptrace_stop(), we cannot return using SYSRET because
+ * that does not restore the full CPU state, only a minimal set.  The
+ * ptracer can change arbitrary register values, which is usually okay
+ * because the usual ptrace stops run off the signal delivery path which
+ * forces IRET; however, ptrace_event() stops happen in arbitrary places
+ * in the kernel and don't force IRET path.
+ *
+ * So force IRET path after a ptrace stop.
+ */
+#define arch_ptrace_stop_needed(code, info)				\
+({									\
+	set_thread_flag(TIF_NOTIFY_RESUME);				\
+	false;								\
+})
+
 struct user_desc;
 extern int do_get_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 077904c8b70d..cc79eff4a1ad 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -334,6 +334,9 @@ static inline void user_single_step_siginfo(struct task_struct *tsk,
  * calling arch_ptrace_stop() when it would be superfluous.  For example,
  * if the thread has not been back to user mode since the last stop, the
  * thread state might indicate that nothing needs to be done.
+ *
+ * This is guaranteed to be invoked once before a task stops for ptrace and
+ * may include arch-specific operations necessary prior to a ptrace stop.
  */
 #define arch_ptrace_stop_needed(code, info)	(0)
 #endif
-- 
cgit v1.2.3-71-gd317


From c149dcb5c60bfea8871f16dfcc0690255eeb825f Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 4 Jul 2014 10:00:37 +0800
Subject: drm/i915: provide interface for audio driver to query cdclk

For Haswell and Broadwell, if the display power well has been disabled,
the display audio controller divider values EM4 M VALUE and EM5 N VALUE
will have been lost. The CDCLK frequency is required for reprogramming them
to generate 24MHz HD-A link BCLK. So provide a private interface for the
audio driver to query CDCLK.

This is a stopgap solution until a more generic interface between audio
and display drivers has been implemented.

Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Mengdong Lin <mengdong.lin@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/gpu/drm/i915/intel_pm.c | 21 +++++++++++++++++++++
 include/drm/i915_powerwell.h    |  1 +
 2 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 6463f0201cf2..409d62676854 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6053,6 +6053,27 @@ int i915_release_power_well(void)
 }
 EXPORT_SYMBOL_GPL(i915_release_power_well);
 
+/*
+ * Private interface for the audio driver to get CDCLK in kHz.
+ *
+ * Caller must request power well using i915_request_power_well() prior to
+ * making the call.
+ */
+int i915_get_cdclk_freq(void)
+{
+	struct drm_i915_private *dev_priv;
+
+	if (!hsw_pwr)
+		return -ENODEV;
+
+	dev_priv = container_of(hsw_pwr, struct drm_i915_private,
+				power_domains);
+
+	return intel_ddi_get_cdclk_freq(dev_priv);
+}
+EXPORT_SYMBOL_GPL(i915_get_cdclk_freq);
+
+
 #define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1)
 
 #define HSW_ALWAYS_ON_POWER_DOMAINS (			\
diff --git a/include/drm/i915_powerwell.h b/include/drm/i915_powerwell.h
index 2baba9996094..baa6f11b1837 100644
--- a/include/drm/i915_powerwell.h
+++ b/include/drm/i915_powerwell.h
@@ -32,5 +32,6 @@
 /* For use by hda_i915 driver */
 extern int i915_request_power_well(void);
 extern int i915_release_power_well(void);
+extern int i915_get_cdclk_freq(void);
 
 #endif				/* _I915_POWERWELL_H_ */
-- 
cgit v1.2.3-71-gd317


From 0b9f7d93ca6109048a4eb06332b666b6e29df4fe Mon Sep 17 00:00:00 2001
From: Aaron Lu <aaron.lu@intel.com>
Date: Mon, 7 Jul 2014 15:43:51 +0800
Subject: ACPI / i915: ignore firmware requests for backlight change

Some Thinkpad laptops' firmware will initiate a backlight level change
request through operation region on the events of AC plug/unplug, but
since we are not using firmware's interface to do the backlight setting
on these affected laptops, we do not want the firmware to use some
arbitrary value from its ASL variable to set the backlight level on
AC plug/unplug either.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=76491
Link: https://bugzilla.kernel.org/show_bug.cgi?id=77091
Reported-and-tested-by: Igor Gnatenko <i.gnatenko.brain@gmail.com>
Reported-and-tested-by: Anton Gubarkov <anton.gubarkov@gmail.com>
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/video.c                  | 3 ++-
 drivers/gpu/drm/i915/intel_opregion.c | 9 +++++++++
 include/acpi/video.h                  | 2 ++
 3 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index fb9ffe9adc64..5bb1278e9324 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -241,13 +241,14 @@ static bool acpi_video_use_native_backlight(void)
 		return use_native_backlight_dmi;
 }
 
-static bool acpi_video_verify_backlight_support(void)
+bool acpi_video_verify_backlight_support(void)
 {
 	if (acpi_osi_is_win8() && acpi_video_use_native_backlight() &&
 	    backlight_device_registered(BACKLIGHT_RAW))
 		return false;
 	return acpi_video_backlight_support();
 }
+EXPORT_SYMBOL_GPL(acpi_video_verify_backlight_support);
 
 /* backlight device sysfs support */
 static int acpi_video_get_brightness(struct backlight_device *bd)
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index 2e2c71fcc9ed..4f6b53998d79 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -403,6 +403,15 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
 
 	DRM_DEBUG_DRIVER("bclp = 0x%08x\n", bclp);
 
+	/*
+	 * If the acpi_video interface is not supposed to be used, don't
+	 * bother processing backlight level change requests from firmware.
+	 */
+	if (!acpi_video_verify_backlight_support()) {
+		DRM_DEBUG_KMS("opregion backlight request ignored\n");
+		return 0;
+	}
+
 	if (!(bclp & ASLE_BCLP_VALID))
 		return ASLC_BACKLIGHT_FAILED;
 
diff --git a/include/acpi/video.h b/include/acpi/video.h
index ea4c7bbded4d..843ef1adfbfa 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -22,6 +22,7 @@ extern void acpi_video_unregister(void);
 extern void acpi_video_unregister_backlight(void);
 extern int acpi_video_get_edid(struct acpi_device *device, int type,
 			       int device_id, void **edid);
+extern bool acpi_video_verify_backlight_support(void);
 #else
 static inline int acpi_video_register(void) { return 0; }
 static inline void acpi_video_unregister(void) { return; }
@@ -31,6 +32,7 @@ static inline int acpi_video_get_edid(struct acpi_device *device, int type,
 {
 	return -ENODEV;
 }
+static inline bool acpi_video_verify_backlight_support(void) { return false; }
 #endif
 
 #endif
-- 
cgit v1.2.3-71-gd317


From c0fb262bf226a5c943e4309662353d5fb905310a Mon Sep 17 00:00:00 2001
From: Arun Kumar K <arun.kk@samsung.com>
Date: Fri, 11 Jul 2014 08:03:59 +0900
Subject: clk: exynos5420: Add IDs for clocks used in PD mfc

Adds IDs for MUX clocks to be used by power domain for MFC
for doing re-parenting while pd on/off.

Signed-off-by: Arun Kumar K <arun.kk@samsung.com>
Signed-off-by: Shaik Ameer Basha <shaik.ameer@samsung.com>
Acked-by: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 drivers/clk/samsung/clk-exynos5420.c   | 6 ++++--
 include/dt-bindings/clock/exynos5420.h | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c
index 9d7d7eed03fd..f74f882f7f29 100644
--- a/drivers/clk/samsung/clk-exynos5420.c
+++ b/drivers/clk/samsung/clk-exynos5420.c
@@ -631,7 +631,8 @@ static struct samsung_mux_clock exynos5x_mux_clks[] __initdata = {
 			SRC_TOP4, 16, 1),
 	MUX(0, "mout_user_aclk266", mout_user_aclk266_p, SRC_TOP4, 20, 1),
 	MUX(0, "mout_user_aclk166", mout_user_aclk166_p, SRC_TOP4, 24, 1),
-	MUX(0, "mout_user_aclk333", mout_user_aclk333_p, SRC_TOP4, 28, 1),
+	MUX(CLK_MOUT_USER_ACLK333, "mout_user_aclk333", mout_user_aclk333_p,
+			SRC_TOP4, 28, 1),
 
 	MUX(0, "mout_user_aclk400_disp1", mout_user_aclk400_disp1_p,
 			SRC_TOP5, 0, 1),
@@ -684,7 +685,8 @@ static struct samsung_mux_clock exynos5x_mux_clks[] __initdata = {
 			SRC_TOP11, 12, 1),
 	MUX(0, "mout_sw_aclk266", mout_sw_aclk266_p, SRC_TOP11, 20, 1),
 	MUX(0, "mout_sw_aclk166", mout_sw_aclk166_p, SRC_TOP11, 24, 1),
-	MUX(0, "mout_sw_aclk333", mout_sw_aclk333_p, SRC_TOP11, 28, 1),
+	MUX(CLK_MOUT_SW_ACLK333, "mout_sw_aclk333", mout_sw_aclk333_p,
+			SRC_TOP11, 28, 1),
 
 	MUX(0, "mout_sw_aclk400_disp1", mout_sw_aclk400_disp1_p,
 			SRC_TOP12, 4, 1),
diff --git a/include/dt-bindings/clock/exynos5420.h b/include/dt-bindings/clock/exynos5420.h
index 97dcb89d37d3..3fc08ff24fa9 100644
--- a/include/dt-bindings/clock/exynos5420.h
+++ b/include/dt-bindings/clock/exynos5420.h
@@ -203,6 +203,8 @@
 #define CLK_MOUT_G3D		641
 #define CLK_MOUT_VPLL		642
 #define CLK_MOUT_MAUDIO0	643
+#define CLK_MOUT_USER_ACLK333	644
+#define CLK_MOUT_SW_ACLK333	645
 
 /* divider clocks */
 #define CLK_DOUT_PIXEL		768
-- 
cgit v1.2.3-71-gd317