From ad30ca2c03cecfb1b0749874bdceead269542de6 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Mon, 15 Dec 2014 19:25:59 +0200
Subject: cfg80211: allow usermode to query wiphy specific regdom

If a wiphy-idx is specified, the kernel will return the wiphy specific
regdomain, if such exists. Otherwise return the global regdom.

When no wiphy-idx is specified, return the global regdomain as well as
all wiphy-specific regulatory domains in the system, via a new nested
list of attributes.

Add a new attribute for each wiphy-specific regdomain, for usermode to
identify it as such.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b37bd5a1cb82..2d384d041224 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -252,7 +252,15 @@
  *	%NL80211_ATTR_IFINDEX.
  *
  * @NL80211_CMD_GET_REG: ask the wireless core to send us its currently set
- * 	regulatory domain.
+ *	regulatory domain. If %NL80211_ATTR_WIPHY is specified and the device
+ *	has a private regulatory domain, it will be returned. Otherwise, the
+ *	global regdomain will be returned.
+ *	A device will have a private regulatory domain if it uses the
+ *	regulatory_hint() API. Even when a private regdomain is used the channel
+ *	information will still be mended according to further hints from
+ *	the regulatory core to help with compliance. A dump version of this API
+ *	is now available which will returns the global regdomain as well as
+ *	all private regdomains of present wiphys (for those that have it).
  * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command
  *	after being queried by the kernel. CRDA replies by sending a regulatory
  *	domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our
-- 
cgit v1.2.3-71-gd317


From b0d7aa59592b4270531de5ce65dcf18338a2d98c Mon Sep 17 00:00:00 2001
From: Jonathan Doron <jond@wizery.com>
Date: Mon, 15 Dec 2014 19:26:00 +0200
Subject: cfg80211: allow wiphy specific regdomain management

Add a new regulatory flag that allows a driver to manage regdomain
changes/updates for its own wiphy.
A self-managed wiphys only employs regulatory information obtained from
the FW and driver and does not use other cfg80211 sources like
beacon-hints, country-code IEs and hints from other devices on the same
system. Conversely, a self-managed wiphy does not share its regulatory
hints with other devices in the system. If a system contains several
devices, one or more of which are self-managed, there might be
contradictory regulatory settings between them. Usage of flag is
generally discouraged. Only use it if the FW/driver is incompatible
with non-locally originated hints.

A new API lets the driver send a complete regdomain, to be applied on
its wiphy only.

After a wiphy-specific regdomain change takes place, usermode will get
a new type of change notification. The regulatory core also takes care
enforce regulatory restrictions, in case some interfaces are on
forbidden channels.

Signed-off-by: Jonathan Doron <jonathanx.doron@intel.com>
Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Reviewed-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 14 +++++++
 include/net/regulatory.h     | 19 ++++++++++
 include/uapi/linux/nl80211.h |  6 +++
 net/wireless/core.c          |  8 ++++
 net/wireless/core.h          |  7 ++++
 net/wireless/nl80211.c       | 49 +++++++++++++++---------
 net/wireless/nl80211.h       | 16 +++++++-
 net/wireless/reg.c           | 88 ++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 188 insertions(+), 19 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4ebb816241fa..4bc1fc9971a5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3807,6 +3807,20 @@ const u8 *cfg80211_find_vendor_ie(unsigned int oui, u8 oui_type,
  */
 int regulatory_hint(struct wiphy *wiphy, const char *alpha2);
 
+/**
+ * regulatory_set_wiphy_regd - set regdom info for self managed drivers
+ * @wiphy: the wireless device we want to process the regulatory domain on
+ * @rd: the regulatory domain informatoin to use for this wiphy
+ *
+ * Set the regulatory domain information for self-managed wiphys, only they
+ * may use this function. See %REGULATORY_WIPHY_SELF_MANAGED for more
+ * information.
+ *
+ * Return: 0 on success. -EINVAL, -EPERM
+ */
+int regulatory_set_wiphy_regd(struct wiphy *wiphy,
+			      struct ieee80211_regdomain *rd);
+
 /**
  * wiphy_apply_custom_regulatory - apply a custom driver regulatory domain
  * @wiphy: the wireless device we want to process the regulatory domain on
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index b776d72d84be..ebc5a2ed8631 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -147,6 +147,24 @@ struct regulatory_request {
  *	NL80211_IFTYPE_P2P_CLIENT, NL80211_IFTYPE_P2P_GO,
  *	NL80211_IFTYPE_P2P_DEVICE. The flag will be set by default if a device
  *	includes any modes unsupported for enforcement checking.
+ * @REGULATORY_WIPHY_SELF_MANAGED: for devices that employ wiphy-specific
+ *	regdom management. These devices will ignore all regdom changes not
+ *	originating from their own wiphy.
+ *	A self-managed wiphys only employs regulatory information obtained from
+ *	the FW and driver and does not use other cfg80211 sources like
+ *	beacon-hints, country-code IEs and hints from other devices on the same
+ *	system. Conversely, a self-managed wiphy does not share its regulatory
+ *	hints with other devices in the system. If a system contains several
+ *	devices, one or more of which are self-managed, there might be
+ *	contradictory regulatory settings between them. Usage of flag is
+ *	generally discouraged. Only use it if the FW/driver is incompatible
+ *	with non-locally originated hints.
+ *	This flag is incompatible with the flags: %REGULATORY_CUSTOM_REG,
+ *	%REGULATORY_STRICT_REG, %REGULATORY_COUNTRY_IE_FOLLOW_POWER,
+ *	%REGULATORY_COUNTRY_IE_IGNORE and %REGULATORY_DISABLE_BEACON_HINTS.
+ *	Mixing any of the above flags with this flag will result in a failure
+ *	to register the wiphy. This flag implies
+ *	%REGULATORY_DISABLE_BEACON_HINTS and %REGULATORY_COUNTRY_IE_IGNORE.
  */
 enum ieee80211_regulatory_flags {
 	REGULATORY_CUSTOM_REG			= BIT(0),
@@ -156,6 +174,7 @@ enum ieee80211_regulatory_flags {
 	REGULATORY_COUNTRY_IE_IGNORE		= BIT(4),
 	REGULATORY_ENABLE_RELAX_NO_IR           = BIT(5),
 	REGULATORY_IGNORE_STALE_KICKOFF         = BIT(6),
+	REGULATORY_WIPHY_SELF_MANAGED		= BIT(7),
 };
 
 struct ieee80211_freq_range {
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 2d384d041224..fb58e654f523 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -782,6 +782,10 @@
  *	peer given by %NL80211_ATTR_MAC. Both peers must be on the base channel
  *	when this command completes.
  *
+ * @NL80211_CMD_WIPHY_REG_CHANGE: Similar to %NL80211_CMD_REG_CHANGE, but used
+ *	as an event to indicate changes for devices with wiphy-specific regdom
+ *	management.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -966,6 +970,8 @@ enum nl80211_commands {
 	NL80211_CMD_TDLS_CHANNEL_SWITCH,
 	NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH,
 
+	NL80211_CMD_WIPHY_REG_CHANGE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 4910758baab1..b661fcce7e3c 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -561,6 +561,14 @@ int wiphy_register(struct wiphy *wiphy)
 				       BIT(NL80211_IFTYPE_MONITOR)))
 		wiphy->regulatory_flags |= REGULATORY_IGNORE_STALE_KICKOFF;
 
+	if (WARN_ON((wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) &&
+		    (wiphy->regulatory_flags &
+					(REGULATORY_CUSTOM_REG |
+					 REGULATORY_STRICT_REG |
+					 REGULATORY_COUNTRY_IE_FOLLOW_POWER |
+					 REGULATORY_COUNTRY_IE_IGNORE))))
+		return -EINVAL;
+
 	if (WARN_ON(wiphy->coalesce &&
 		    (!wiphy->coalesce->n_rules ||
 		     !wiphy->coalesce->n_patterns) &&
diff --git a/net/wireless/core.h b/net/wireless/core.h
index faa5b1609aae..e87cae57a83b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -36,6 +36,13 @@ struct cfg80211_registered_device {
 	 * the country on the country IE changed. */
 	char country_ie_alpha2[2];
 
+	/*
+	 * the driver requests the regulatory core to set this regulatory
+	 * domain as the wiphy's. Only used for %REGULATORY_WIPHY_SELF_MANAGED
+	 * devices using the regulatory_set_wiphy_regd() API
+	 */
+	const struct ieee80211_regdomain *requested_regd;
+
 	/* If a Country IE has been received this tells us the environment
 	 * which its telling us its in. This defaults to ENVIRON_ANY */
 	enum environment_cap env;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2d5dc428c5ab..eebb7e422989 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -11042,25 +11042,9 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
 				NL80211_MCGRP_SCAN, GFP_KERNEL);
 }
 
-/*
- * This can happen on global regulatory changes or device specific settings
- * based on custom world regulatory domains.
- */
-void nl80211_send_reg_change_event(struct regulatory_request *request)
+static bool nl80211_reg_change_event_fill(struct sk_buff *msg,
+					  struct regulatory_request *request)
 {
-	struct sk_buff *msg;
-	void *hdr;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return;
-
-	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_REG_CHANGE);
-	if (!hdr) {
-		nlmsg_free(msg);
-		return;
-	}
-
 	/* Userspace can always count this one always being set */
 	if (nla_put_u8(msg, NL80211_ATTR_REG_INITIATOR, request->initiator))
 		goto nla_put_failure;
@@ -11094,6 +11078,35 @@ void nl80211_send_reg_change_event(struct regulatory_request *request)
 			goto nla_put_failure;
 	}
 
+	return true;
+
+nla_put_failure:
+	return false;
+}
+
+/*
+ * This can happen on global regulatory changes or device specific settings
+ * based on custom regulatory domains.
+ */
+void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
+				     struct regulatory_request *request)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, cmd_id);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (nl80211_reg_change_event_fill(msg, request) == false)
+		goto nla_put_failure;
+
 	genlmsg_end(msg, hdr);
 
 	rcu_read_lock();
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 7ad70d6f0cc6..84d4edf1d545 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -17,7 +17,21 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev, u32 cmd);
 void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
 				     struct net_device *netdev);
-void nl80211_send_reg_change_event(struct regulatory_request *request);
+void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
+				     struct regulatory_request *request);
+
+static inline void
+nl80211_send_reg_change_event(struct regulatory_request *request)
+{
+	nl80211_common_reg_change_event(NL80211_CMD_REG_CHANGE, request);
+}
+
+static inline void
+nl80211_send_wiphy_reg_change_event(struct regulatory_request *request)
+{
+	nl80211_common_reg_change_event(NL80211_CMD_WIPHY_REG_CHANGE, request);
+}
+
 void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
 			  struct net_device *netdev,
 			  const u8 *buf, size_t len, gfp_t gfp);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2d9760084b74..c040f8a0f1ed 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1307,6 +1307,9 @@ static bool ignore_reg_update(struct wiphy *wiphy,
 {
 	struct regulatory_request *lr = get_last_request();
 
+	if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
+		return true;
+
 	if (!lr) {
 		REG_DBG_PRINT("Ignoring regulatory request set by %s "
 			      "since last_request is not set\n",
@@ -2147,11 +2150,52 @@ static void reg_process_pending_beacon_hints(void)
 	spin_unlock_bh(&reg_pending_beacons_lock);
 }
 
+static void reg_process_self_managed_hints(void)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wiphy *wiphy;
+	const struct ieee80211_regdomain *tmp;
+	const struct ieee80211_regdomain *regd;
+	enum ieee80211_band band;
+	struct regulatory_request request = {};
+
+	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		wiphy = &rdev->wiphy;
+
+		spin_lock(&reg_requests_lock);
+		regd = rdev->requested_regd;
+		rdev->requested_regd = NULL;
+		spin_unlock(&reg_requests_lock);
+
+		if (regd == NULL)
+			continue;
+
+		tmp = get_wiphy_regdom(wiphy);
+		rcu_assign_pointer(wiphy->regd, regd);
+		rcu_free_regdom(tmp);
+
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+			handle_band_custom(wiphy, wiphy->bands[band], regd);
+
+		reg_process_ht_flags(wiphy);
+
+		request.wiphy_idx = get_wiphy_idx(wiphy);
+		request.alpha2[0] = regd->alpha2[0];
+		request.alpha2[1] = regd->alpha2[1];
+		request.initiator = NL80211_REGDOM_SET_BY_DRIVER;
+
+		nl80211_send_wiphy_reg_change_event(&request);
+	}
+
+	reg_check_channels();
+}
+
 static void reg_todo(struct work_struct *work)
 {
 	rtnl_lock();
 	reg_process_pending_hints();
 	reg_process_pending_beacon_hints();
+	reg_process_self_managed_hints();
 	rtnl_unlock();
 }
 
@@ -2432,6 +2476,8 @@ static void restore_regulatory_settings(bool reset_user)
 	world_alpha2[1] = cfg80211_world_regdom->alpha2[1];
 
 	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
+			continue;
 		if (rdev->wiphy.regulatory_flags & REGULATORY_CUSTOM_REG)
 			restore_custom_reg_settings(&rdev->wiphy);
 	}
@@ -2835,10 +2881,52 @@ int set_regdom(const struct ieee80211_regdomain *rd)
 	return 0;
 }
 
+int regulatory_set_wiphy_regd(struct wiphy *wiphy,
+			      struct ieee80211_regdomain *rd)
+{
+	const struct ieee80211_regdomain *regd;
+	const struct ieee80211_regdomain *prev_regd;
+	struct cfg80211_registered_device *rdev;
+
+	if (WARN_ON(!wiphy || !rd))
+		return -EINVAL;
+
+	if (WARN(!(wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED),
+		 "wiphy should have REGULATORY_WIPHY_SELF_MANAGED\n"))
+		return -EPERM;
+
+	if (WARN(!is_valid_rd(rd), "Invalid regulatory domain detected\n")) {
+		print_regdomain_info(rd);
+		return -EINVAL;
+	}
+
+	regd = reg_copy_regd(rd);
+	if (IS_ERR(regd))
+		return PTR_ERR(regd);
+
+	rdev = wiphy_to_rdev(wiphy);
+
+	spin_lock(&reg_requests_lock);
+	prev_regd = rdev->requested_regd;
+	rdev->requested_regd = regd;
+	spin_unlock(&reg_requests_lock);
+
+	kfree(prev_regd);
+
+	schedule_work(&reg_work);
+	return 0;
+}
+EXPORT_SYMBOL(regulatory_set_wiphy_regd);
+
 void wiphy_regulatory_register(struct wiphy *wiphy)
 {
 	struct regulatory_request *lr;
 
+	/* self-managed devices ignore external hints */
+	if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
+		wiphy->regulatory_flags |= REGULATORY_DISABLE_BEACON_HINTS |
+					   REGULATORY_COUNTRY_IE_IGNORE;
+
 	if (!reg_dev_ignore_cell_hint(wiphy))
 		reg_num_devs_support_basehint++;
 
-- 
cgit v1.2.3-71-gd317


From 1bdd716cbccabc8127fbbaaa663c3090302ef78b Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Mon, 15 Dec 2014 19:26:01 +0200
Subject: cfg80211: return private regdom for self-managed devices

If a device has self-managed regulatory, insist on returning the wiphy
specific regdomain if a wiphy-idx is specified. The global regdomain is
meaningless for such devices.

Also add an attribute for self-managed devices, so usermode can
distinguish them as such.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Reviewed-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 10 ++++++++++
 net/wireless/nl80211.c       | 24 ++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index fb58e654f523..b3ada0b3a276 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -261,6 +261,9 @@
  *	the regulatory core to help with compliance. A dump version of this API
  *	is now available which will returns the global regdomain as well as
  *	all private regdomains of present wiphys (for those that have it).
+ *	If a wiphy is self-managed (%NL80211_ATTR_WIPHY_SELF_MANAGED_REG), then
+ *	its private regdomain is the only valid one for it. The regulatory
+ *	core is not used to help with compliance in this case.
  * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command
  *	after being queried by the kernel. CRDA replies by sending a regulatory
  *	domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our
@@ -1702,6 +1705,11 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_MAC_MASK: MAC address mask
  *
+ * @NL80211_ATTR_WIPHY_SELF_MANAGED_REG: flag attribute indicating this device
+ *	is self-managing its regulatory information and any regulatory domain
+ *	obtained from it is coming from the device's wiphy and not the global
+ *	cfg80211 regdomain.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2059,6 +2067,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_MAC_MASK,
 
+	NL80211_ATTR_WIPHY_SELF_MANAGED_REG,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index eebb7e422989..5b1907f4c181 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -396,6 +396,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
 	[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
 	[NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN },
+	[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -1701,6 +1702,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 			       rdev->wiphy.max_num_csa_counters))
 			goto nla_put_failure;
 
+		if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED &&
+		    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
+			goto nla_put_failure;
+
 		/* done */
 		state->split_start = 0;
 		break;
@@ -5406,6 +5411,8 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
 		goto put_failure;
 
 	if (info->attrs[NL80211_ATTR_WIPHY]) {
+		bool self_managed;
+
 		rdev = cfg80211_get_dev_from_info(genl_info_net(info), info);
 		if (IS_ERR(rdev)) {
 			nlmsg_free(msg);
@@ -5413,8 +5420,16 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
 		}
 
 		wiphy = &rdev->wiphy;
+		self_managed = wiphy->regulatory_flags &
+			       REGULATORY_WIPHY_SELF_MANAGED;
 		regdom = get_wiphy_regdom(wiphy);
 
+		/* a self-managed-reg device must have a private regdom */
+		if (WARN_ON(!regdom && self_managed)) {
+			nlmsg_free(msg);
+			return -EINVAL;
+		}
+
 		if (regdom &&
 		    nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy)))
 			goto nla_put_failure;
@@ -5471,6 +5486,10 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
 	    nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy)))
 		goto nla_put_failure;
 
+	if (wiphy && wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED &&
+	    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
+		goto nla_put_failure;
+
 	return genlmsg_end(msg, hdr);
 
 nla_put_failure:
@@ -11076,6 +11095,11 @@ static bool nl80211_reg_change_event_fill(struct sk_buff *msg,
 		if (wiphy &&
 		    nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx))
 			goto nla_put_failure;
+
+		if (wiphy &&
+		    wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED &&
+		    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
+			goto nla_put_failure;
 	}
 
 	return true;
-- 
cgit v1.2.3-71-gd317


From 93a1e86ce10e4898f9ca9cd09d659a8a7780ee5e Mon Sep 17 00:00:00 2001
From: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Date: Mon, 15 Dec 2014 13:25:39 +0200
Subject: nl80211: Stop scheduled scan if netlink client disappears

An attribute NL80211_ATTR_SOCKET_OWNER can be set by the scan initiator.
If present, the attribute will cause the scan to be stopped if the client
dies.

Signed-off-by: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  3 +++
 include/uapi/linux/nl80211.h |  3 +++
 net/wireless/core.c          | 16 ++++++++++++++++
 net/wireless/core.h          |  2 ++
 net/wireless/nl80211.c       | 16 ++++++++++++++++
 5 files changed, 40 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 45d4d7292e53..bd672ea08c9a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1517,6 +1517,8 @@ struct cfg80211_match_set {
  *	are 0 in the mask should be randomised, bits that are 1 should
  *	be taken from the @mac_addr
  * @rcu_head: RCU callback used to free the struct
+ * @owner_nlportid: netlink portid of owner (if this should is a request
+ *	owned by a particular socket)
  */
 struct cfg80211_sched_scan_request {
 	struct cfg80211_ssid *ssids;
@@ -1539,6 +1541,7 @@ struct cfg80211_sched_scan_request {
 	struct net_device *dev;
 	unsigned long scan_start;
 	struct rcu_head rcu_head;
+	u32 owner_nlportid;
 
 	/* keep last */
 	struct ieee80211_channel *channels[0];
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b3ada0b3a276..c0383e983544 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1672,6 +1672,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_SOCKET_OWNER: Flag attribute, if set during interface
  *	creation then the new interface will be owned by the netlink socket
  *	that created it and will be destroyed when the socket is closed.
+ *	If set during scheduled scan start then the new scan req will be
+ *	owned by the netlink socket that created it and the scheduled scan will
+ *	be stopped when the socket is closed.
  *
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 0743449405ca..456e4c38c279 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -321,6 +321,20 @@ static void cfg80211_destroy_iface_wk(struct work_struct *work)
 	rtnl_unlock();
 }
 
+static void cfg80211_sched_scan_stop_wk(struct work_struct *work)
+{
+	struct cfg80211_registered_device *rdev;
+
+	rdev = container_of(work, struct cfg80211_registered_device,
+			   sched_scan_stop_wk);
+
+	rtnl_lock();
+
+	__cfg80211_stop_sched_scan(rdev, false);
+
+	rtnl_unlock();
+}
+
 /* exported functions */
 
 struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
@@ -407,6 +421,7 @@ use_default_name:
 	INIT_LIST_HEAD(&rdev->destroy_list);
 	spin_lock_init(&rdev->destroy_list_lock);
 	INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk);
+	INIT_WORK(&rdev->sched_scan_stop_wk, cfg80211_sched_scan_stop_wk);
 
 #ifdef CONFIG_CFG80211_DEFAULT_PS
 	rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
@@ -787,6 +802,7 @@ void wiphy_unregister(struct wiphy *wiphy)
 	flush_work(&rdev->event_work);
 	cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
 	flush_work(&rdev->destroy_work);
+	flush_work(&rdev->sched_scan_stop_wk);
 
 #ifdef CONFIG_PM
 	if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index e82030c32311..801cd49c5a0c 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -91,6 +91,8 @@ struct cfg80211_registered_device {
 	struct list_head destroy_list;
 	struct work_struct destroy_work;
 
+	struct work_struct sched_scan_stop_wk;
+
 	/* must be last because of the way we do wiphy_priv(),
 	 * and it should at least be aligned to NETDEV_ALIGN */
 	struct wiphy wiphy __aligned(NETDEV_ALIGN);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index bacdf22fa472..702920134b34 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6214,6 +6214,9 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 	sched_scan_req->dev = dev;
 	sched_scan_req->wiphy = &rdev->wiphy;
 
+	if (info->attrs[NL80211_ATTR_SOCKET_OWNER])
+		sched_scan_req->owner_nlportid = info->snd_portid;
+
 	rcu_assign_pointer(rdev->sched_scan_req, sched_scan_req);
 
 	nl80211_send_sched_scan(rdev, dev,
@@ -12618,6 +12621,13 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 
 	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
 		bool schedule_destroy_work = false;
+		bool schedule_scan_stop = false;
+		struct cfg80211_sched_scan_request *sched_scan_req =
+			rcu_dereference(rdev->sched_scan_req);
+
+		if (sched_scan_req && notify->portid &&
+		    sched_scan_req->owner_nlportid == notify->portid)
+			schedule_scan_stop = true;
 
 		list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) {
 			cfg80211_mlme_unregister_socket(wdev, notify->portid);
@@ -12648,6 +12658,12 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 				spin_unlock(&rdev->destroy_list_lock);
 				schedule_work(&rdev->destroy_work);
 			}
+		} else if (schedule_scan_stop) {
+			sched_scan_req->owner_nlportid = 0;
+
+			if (rdev->ops->sched_scan_stop &&
+			    rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+				schedule_work(&rdev->sched_scan_stop_wk);
 		}
 	}
 
-- 
cgit v1.2.3-71-gd317


From 79f241b41b5f581c6be66785ab8b9c8e3b1651c7 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Wed, 17 Dec 2014 18:00:44 +0200
Subject: nl80211: increase the max number of rules in regdomain

Some network cards (Intel) produce per-channel regdomains and rely on
cfg80211 to merge rules as needed. This hits the max rules limit and
fails.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c0383e983544..18cb0aa06351 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2112,7 +2112,7 @@ enum nl80211_attrs {
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_HT_RATES		77
-#define NL80211_MAX_SUPP_REG_RULES		32
+#define NL80211_MAX_SUPP_REG_RULES		64
 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY	0
 #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY	16
 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY	24
-- 
cgit v1.2.3-71-gd317


From cbb77bc281d697764dbb035157b643a8cbc9df30 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@iki.fi>
Date: Sun, 7 Dec 2014 20:17:49 -0300
Subject: [media] DocBook: v4l: Rearrange raw bayer format definitions, remove
 bad comment

Rearrange 12-bit raw bayer format definitions after 10-bit ones. Also remove
the comment related to 16-bit bayer formats, it was simply wrong.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/videodev2.h | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index d279c1b75cf7..f0b94b8fcdfb 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -463,10 +463,6 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_SGBRG10 v4l2_fourcc('G', 'B', '1', '0') /* 10  GBGB.. RGRG.. */
 #define V4L2_PIX_FMT_SGRBG10 v4l2_fourcc('B', 'A', '1', '0') /* 10  GRGR.. BGBG.. */
 #define V4L2_PIX_FMT_SRGGB10 v4l2_fourcc('R', 'G', '1', '0') /* 10  RGRG.. GBGB.. */
-#define V4L2_PIX_FMT_SBGGR12 v4l2_fourcc('B', 'G', '1', '2') /* 12  BGBG.. GRGR.. */
-#define V4L2_PIX_FMT_SGBRG12 v4l2_fourcc('G', 'B', '1', '2') /* 12  GBGB.. RGRG.. */
-#define V4L2_PIX_FMT_SGRBG12 v4l2_fourcc('B', 'A', '1', '2') /* 12  GRGR.. BGBG.. */
-#define V4L2_PIX_FMT_SRGGB12 v4l2_fourcc('R', 'G', '1', '2') /* 12  RGRG.. GBGB.. */
 	/* 10bit raw bayer a-law compressed to 8 bits */
 #define V4L2_PIX_FMT_SBGGR10ALAW8 v4l2_fourcc('a', 'B', 'A', '8')
 #define V4L2_PIX_FMT_SGBRG10ALAW8 v4l2_fourcc('a', 'G', 'A', '8')
@@ -477,10 +473,10 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_SGBRG10DPCM8 v4l2_fourcc('b', 'G', 'A', '8')
 #define V4L2_PIX_FMT_SGRBG10DPCM8 v4l2_fourcc('B', 'D', '1', '0')
 #define V4L2_PIX_FMT_SRGGB10DPCM8 v4l2_fourcc('b', 'R', 'A', '8')
-	/*
-	 * 10bit raw bayer, expanded to 16 bits
-	 * xxxxrrrrrrrrrrxxxxgggggggggg xxxxggggggggggxxxxbbbbbbbbbb...
-	 */
+#define V4L2_PIX_FMT_SBGGR12 v4l2_fourcc('B', 'G', '1', '2') /* 12  BGBG.. GRGR.. */
+#define V4L2_PIX_FMT_SGBRG12 v4l2_fourcc('G', 'B', '1', '2') /* 12  GBGB.. RGRG.. */
+#define V4L2_PIX_FMT_SGRBG12 v4l2_fourcc('B', 'A', '1', '2') /* 12  GRGR.. BGBG.. */
+#define V4L2_PIX_FMT_SRGGB12 v4l2_fourcc('R', 'G', '1', '2') /* 12  RGRG.. GBGB.. */
 #define V4L2_PIX_FMT_SBGGR16 v4l2_fourcc('B', 'Y', 'R', '2') /* 16  BGBG.. GRGR.. */
 
 /* compressed formats */
-- 
cgit v1.2.3-71-gd317


From 4353e36ee84d936859eb6d65ecd9d3076edd11bc Mon Sep 17 00:00:00 2001
From: Aviv Greenberg <aviv.d.greenberg@intel.com>
Date: Wed, 3 Dec 2014 08:14:09 -0300
Subject: [media] v4l: Add packed Bayer raw10 pixel formats

These formats are just like 10-bit raw bayer formats that exist already, but
the pixels are not padded to byte boundaries. Instead, the eight high order
bits of four consecutive pixels are stored in four bytes, followed by a byte
of two low order bits of each of the four pixels.

Signed-off-by: Aviv Greenberg <aviv.d.greenberg@intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 .../DocBook/media/v4l/pixfmt-srggb10p.xml          | 99 ++++++++++++++++++++++
 Documentation/DocBook/media/v4l/pixfmt.xml         |  1 +
 include/uapi/linux/videodev2.h                     |  5 ++
 3 files changed, 105 insertions(+)
 create mode 100644 Documentation/DocBook/media/v4l/pixfmt-srggb10p.xml

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/pixfmt-srggb10p.xml b/Documentation/DocBook/media/v4l/pixfmt-srggb10p.xml
new file mode 100644
index 000000000000..30aa63581fe3
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-srggb10p.xml
@@ -0,0 +1,99 @@
+    <refentry id="pixfmt-srggb10p">
+      <refmeta>
+	<refentrytitle>V4L2_PIX_FMT_SRGGB10P ('pRAA'),
+	 V4L2_PIX_FMT_SGRBG10P ('pgAA'),
+	 V4L2_PIX_FMT_SGBRG10P ('pGAA'),
+	 V4L2_PIX_FMT_SBGGR10P ('pBAA'),
+	 </refentrytitle>
+	&manvol;
+      </refmeta>
+      <refnamediv>
+	<refname id="V4L2-PIX-FMT-SRGGB10P"><constant>V4L2_PIX_FMT_SRGGB10P</constant></refname>
+	<refname id="V4L2-PIX-FMT-SGRBG10P"><constant>V4L2_PIX_FMT_SGRBG10P</constant></refname>
+	<refname id="V4L2-PIX-FMT-SGBRG10P"><constant>V4L2_PIX_FMT_SGBRG10P</constant></refname>
+	<refname id="V4L2-PIX-FMT-SBGGR10P"><constant>V4L2_PIX_FMT_SBGGR10P</constant></refname>
+	<refpurpose>10-bit packed Bayer formats</refpurpose>
+      </refnamediv>
+      <refsect1>
+	<title>Description</title>
+
+	<para>These four pixel formats are packed raw sRGB /
+	Bayer formats with 10 bits per colour. Every four consecutive
+	colour components are packed into 5 bytes. Each of the first 4
+	bytes contain the 8 high order bits of the pixels, and the
+	fifth byte contains the two least significants bits of each
+	pixel, in the same order.</para>
+
+	<para>Each n-pixel row contains n/2 green samples and n/2 blue
+	or red samples, with alternating green-red and green-blue
+	rows. They are conventionally described as GRGR... BGBG...,
+	RGRG... GBGB..., etc. Below is an example of one of these
+	formats:</para>
+
+    <example>
+      <title><constant>V4L2_PIX_FMT_SBGGR10P</constant> 4 &times; 4
+      pixel image</title>
+
+      <formalpara>
+	<title>Byte Order.</title>
+	<para>Each cell is one byte.
+	  <informaltable frame="topbot" colsep="1" rowsep="1">
+	    <tgroup cols="5" align="center" border="1">
+	      <colspec align="left" colwidth="2*" />
+	      <tbody valign="top">
+		<row>
+		  <entry>start&nbsp;+&nbsp;0:</entry>
+		  <entry>B<subscript>00high</subscript></entry>
+		  <entry>G<subscript>01high</subscript></entry>
+		  <entry>B<subscript>02high</subscript></entry>
+		  <entry>G<subscript>03high</subscript></entry>
+		  <entry>B<subscript>00low</subscript>(bits 7--6)
+			 G<subscript>01low</subscript>(bits 5--4)
+			 B<subscript>02low</subscript>(bits 3--2)
+			 G<subscript>03low</subscript>(bits 1--0)
+		  </entry>
+		</row>
+		<row>
+		  <entry>start&nbsp;+&nbsp;5:</entry>
+		  <entry>G<subscript>10high</subscript></entry>
+		  <entry>R<subscript>11high</subscript></entry>
+		  <entry>G<subscript>12high</subscript></entry>
+		  <entry>R<subscript>13high</subscript></entry>
+		  <entry>G<subscript>10low</subscript>(bits 7--6)
+			 R<subscript>11low</subscript>(bits 5--4)
+			 G<subscript>12low</subscript>(bits 3--2)
+			 R<subscript>13low</subscript>(bits 1--0)
+		  </entry>
+		</row>
+		<row>
+		  <entry>start&nbsp;+&nbsp;10:</entry>
+		  <entry>B<subscript>20high</subscript></entry>
+		  <entry>G<subscript>21high</subscript></entry>
+		  <entry>B<subscript>22high</subscript></entry>
+		  <entry>G<subscript>23high</subscript></entry>
+		  <entry>B<subscript>20low</subscript>(bits 7--6)
+			 G<subscript>21low</subscript>(bits 5--4)
+			 B<subscript>22low</subscript>(bits 3--2)
+			 G<subscript>23low</subscript>(bits 1--0)
+		  </entry>
+		</row>
+		<row>
+		  <entry>start&nbsp;+&nbsp;15:</entry>
+		  <entry>G<subscript>30high</subscript></entry>
+		  <entry>R<subscript>31high</subscript></entry>
+		  <entry>G<subscript>32high</subscript></entry>
+		  <entry>R<subscript>33high</subscript></entry>
+		  <entry>G<subscript>30low</subscript>(bits 7--6)
+			 R<subscript>31low</subscript>(bits 5--4)
+			 G<subscript>32low</subscript>(bits 3--2)
+			 R<subscript>33low</subscript>(bits 1--0)
+		  </entry>
+		</row>
+	      </tbody>
+	    </tgroup>
+	  </informaltable>
+	</para>
+      </formalpara>
+    </example>
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml
index d5eca4b8f74b..5e0352c50324 100644
--- a/Documentation/DocBook/media/v4l/pixfmt.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt.xml
@@ -1405,6 +1405,7 @@ access the palette, this must be done with ioctls of the Linux framebuffer API.<
     &sub-srggb8;
     &sub-sbggr16;
     &sub-srggb10;
+    &sub-srggb10p;
     &sub-srggb10alaw8;
     &sub-srggb10dpcm8;
     &sub-srggb12;
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index f0b94b8fcdfb..fbdc3602ee27 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -463,6 +463,11 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_SGBRG10 v4l2_fourcc('G', 'B', '1', '0') /* 10  GBGB.. RGRG.. */
 #define V4L2_PIX_FMT_SGRBG10 v4l2_fourcc('B', 'A', '1', '0') /* 10  GRGR.. BGBG.. */
 #define V4L2_PIX_FMT_SRGGB10 v4l2_fourcc('R', 'G', '1', '0') /* 10  RGRG.. GBGB.. */
+	/* 10bit raw bayer packed, 5 bytes for every 4 pixels */
+#define V4L2_PIX_FMT_SBGGR10P v4l2_fourcc('p', 'B', 'A', 'A')
+#define V4L2_PIX_FMT_SGBRG10P v4l2_fourcc('p', 'G', 'A', 'A')
+#define V4L2_PIX_FMT_SGRBG10P v4l2_fourcc('p', 'g', 'A', 'A')
+#define V4L2_PIX_FMT_SRGGB10P v4l2_fourcc('p', 'R', 'A', 'A')
 	/* 10bit raw bayer a-law compressed to 8 bits */
 #define V4L2_PIX_FMT_SBGGR10ALAW8 v4l2_fourcc('a', 'B', 'A', '8')
 #define V4L2_PIX_FMT_SGBRG10ALAW8 v4l2_fourcc('a', 'G', 'A', '8')
-- 
cgit v1.2.3-71-gd317


From 417d2e507edcb5cf15eb344f86bd3dd28737f24e Mon Sep 17 00:00:00 2001
From: Benoit Parrot <bparrot@ti.com>
Date: Tue, 9 Dec 2014 16:43:44 -0300
Subject: [media] media: platform: add VPFE capture driver support for AM437X

This patch adds Video Processing Front End (VPFE) driver for
AM437X family of devices
Driver supports the following:
- V4L2 API using MMAP buffer access based on videobuf2 api
- Asynchronous sensor/decoder sub device registration
- DT support

Signed-off-by: Benoit Parrot <bparrot@ti.com>
Signed-off-by: Darren Etheridge <detheridge@ti.com>
Signed-off-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
[hans.verkuil@cisco.com: swapped two lines to fix vpfe_release() & add pinctrl include]
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 .../devicetree/bindings/media/ti-am437x-vpfe.txt   |   61 +
 MAINTAINERS                                        |    9 +
 drivers/media/platform/Kconfig                     |    1 +
 drivers/media/platform/Makefile                    |    2 +
 drivers/media/platform/am437x/Kconfig              |   11 +
 drivers/media/platform/am437x/Makefile             |    3 +
 drivers/media/platform/am437x/am437x-vpfe.c        | 2778 ++++++++++++++++++++
 drivers/media/platform/am437x/am437x-vpfe.h        |  283 ++
 drivers/media/platform/am437x/am437x-vpfe_regs.h   |  140 +
 include/uapi/linux/Kbuild                          |    1 +
 include/uapi/linux/am437x-vpfe.h                   |  122 +
 11 files changed, 3411 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/media/ti-am437x-vpfe.txt
 create mode 100644 drivers/media/platform/am437x/Kconfig
 create mode 100644 drivers/media/platform/am437x/Makefile
 create mode 100644 drivers/media/platform/am437x/am437x-vpfe.c
 create mode 100644 drivers/media/platform/am437x/am437x-vpfe.h
 create mode 100644 drivers/media/platform/am437x/am437x-vpfe_regs.h
 create mode 100644 include/uapi/linux/am437x-vpfe.h

(limited to 'include/uapi/linux')

diff --git a/Documentation/devicetree/bindings/media/ti-am437x-vpfe.txt b/Documentation/devicetree/bindings/media/ti-am437x-vpfe.txt
new file mode 100644
index 000000000000..3932e766553a
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/ti-am437x-vpfe.txt
@@ -0,0 +1,61 @@
+Texas Instruments AM437x CAMERA (VPFE)
+--------------------------------------
+
+The Video Processing Front End (VPFE) is a key component for image capture
+applications. The capture module provides the system interface and the
+processing capability to connect RAW image-sensor modules and video decoders
+to the AM437x device.
+
+Required properties:
+- compatible: must be "ti,am437x-vpfe"
+- reg: physical base address and length of the registers set for the device;
+- interrupts: should contain IRQ line for the VPFE;
+- ti,am437x-vpfe-interface: can be one of the following,
+	0 - Raw Bayer Interface.
+	1 - 8 Bit BT656 Interface.
+	2 - 10 Bit BT656 Interface.
+	3 - YCbCr 8 Bit Interface.
+	4 - YCbCr 16 Bit Interface.
+
+VPFE supports a single port node with parallel bus. It should contain one
+'port' child node with child 'endpoint' node. Please refer to the bindings
+defined in Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+Example:
+	vpfe: vpfe@f0034000 {
+		compatible = "ti,am437x-vpfe";
+		reg = <0x48328000 0x2000>;
+		interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
+
+		pinctrl-names = "default", "sleep";
+		pinctrl-0 = <&vpfe_pins_default>;
+		pinctrl-1 = <&vpfe_pins_sleep>;
+
+		port {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			vpfe0_ep: endpoint {
+				remote-endpoint = <&ov2659_1>;
+				ti,am437x-vpfe-interface = <0>;
+				bus-width = <8>;
+				hsync-active = <0>;
+				vsync-active = <0>;
+			};
+		};
+	};
+
+	i2c1: i2c@4802a000 {
+
+		ov2659@30 {
+			compatible = "ti,ov2659";
+			reg = <0x30>;
+
+			port {
+				ov2659_1: endpoint {
+					remote-endpoint = <&vpfe0_ep>;
+					bus-width = <8>;
+					mclk-frequency = <12000000>;
+				};
+			};
+	};
diff --git a/MAINTAINERS b/MAINTAINERS
index dc2d91252d8b..4318f348dbd8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8745,6 +8745,15 @@ S:	Maintained
 F:	drivers/media/platform/davinci/
 F:	include/media/davinci/
 
+TI AM437X VPFE DRIVER
+M:	Lad, Prabhakar <prabhakar.csengg@gmail.com>
+L:	linux-media@vger.kernel.org
+W:	http://linuxtv.org/
+Q:	http://patchwork.linuxtv.org/project/linux-media/list/
+T:	git git://linuxtv.org/mhadli/v4l-dvb-davinci_devices.git
+S:	Maintained
+F:	drivers/media/platform/am437x/
+
 SIS 190 ETHERNET DRIVER
 M:	Francois Romieu <romieu@fr.zoreil.com>
 L:	netdev@vger.kernel.org
diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index 480a174832a6..71e8873ceb94 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -118,6 +118,7 @@ config VIDEO_S3C_CAMIF
 source "drivers/media/platform/soc_camera/Kconfig"
 source "drivers/media/platform/exynos4-is/Kconfig"
 source "drivers/media/platform/s5p-tv/Kconfig"
+source "drivers/media/platform/am437x/Kconfig"
 
 endif # V4L_PLATFORM_DRIVERS
 
diff --git a/drivers/media/platform/Makefile b/drivers/media/platform/Makefile
index a49936b8ce8a..3ec154742083 100644
--- a/drivers/media/platform/Makefile
+++ b/drivers/media/platform/Makefile
@@ -46,4 +46,6 @@ obj-$(CONFIG_VIDEO_RENESAS_VSP1)	+= vsp1/
 
 obj-y	+= omap/
 
+obj-$(CONFIG_VIDEO_AM437X_VPFE)		+= am437x/
+
 ccflags-y += -I$(srctree)/drivers/media/i2c
diff --git a/drivers/media/platform/am437x/Kconfig b/drivers/media/platform/am437x/Kconfig
new file mode 100644
index 000000000000..7b023a76e32e
--- /dev/null
+++ b/drivers/media/platform/am437x/Kconfig
@@ -0,0 +1,11 @@
+config VIDEO_AM437X_VPFE
+	tristate "TI AM437x VPFE video capture driver"
+	depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API
+	depends on SOC_AM43XX || COMPILE_TEST
+	select VIDEOBUF2_DMA_CONTIG
+	help
+	   Support for AM437x Video Processing Front End based Video
+	   Capture Driver.
+
+	   To compile this driver as a module, choose M here. The module
+	   will be called am437x-vpfe.
diff --git a/drivers/media/platform/am437x/Makefile b/drivers/media/platform/am437x/Makefile
new file mode 100644
index 000000000000..d11fff16f260
--- /dev/null
+++ b/drivers/media/platform/am437x/Makefile
@@ -0,0 +1,3 @@
+# Makefile for AM437x VPFE driver
+
+obj-$(CONFIG_VIDEO_AM437X_VPFE) += am437x-vpfe.o
diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c
new file mode 100644
index 000000000000..e01ac22d6244
--- /dev/null
+++ b/drivers/media/platform/am437x/am437x-vpfe.c
@@ -0,0 +1,2778 @@
+/*
+ * TI VPFE capture Driver
+ *
+ * Copyright (C) 2013 - 2014 Texas Instruments, Inc.
+ *
+ * Benoit Parrot <bparrot@ti.com>
+ * Lad, Prabhakar <prabhakar.csengg@gmail.com>
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/videodev2.h>
+
+#include <media/v4l2-common.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-of.h>
+
+#include "am437x-vpfe.h"
+
+#define VPFE_MODULE_NAME	"vpfe"
+#define VPFE_VERSION		"0.1.0"
+
+static int debug;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "Debug level 0-8");
+
+#define vpfe_dbg(level, dev, fmt, arg...)	\
+		v4l2_dbg(level, debug, &dev->v4l2_dev, fmt, ##arg)
+#define vpfe_info(dev, fmt, arg...)	\
+		v4l2_info(&dev->v4l2_dev, fmt, ##arg)
+#define vpfe_err(dev, fmt, arg...)	\
+		v4l2_err(&dev->v4l2_dev, fmt, ##arg)
+
+/* standard information */
+struct vpfe_standard {
+	v4l2_std_id std_id;
+	unsigned int width;
+	unsigned int height;
+	struct v4l2_fract pixelaspect;
+	int frame_format;
+};
+
+const struct vpfe_standard vpfe_standards[] = {
+	{V4L2_STD_525_60, 720, 480, {11, 10}, 1},
+	{V4L2_STD_625_50, 720, 576, {54, 59}, 1},
+};
+
+struct bus_format {
+	unsigned int width;
+	unsigned int bpp;
+};
+
+/*
+ * struct vpfe_fmt - VPFE media bus format information
+ * @name: V4L2 format description
+ * @code: V4L2 media bus format code
+ * @shifted: V4L2 media bus format code for the same pixel layout but
+ *	shifted to be 8 bits per pixel. =0 if format is not shiftable.
+ * @pixelformat: V4L2 pixel format FCC identifier
+ * @width: Bits per pixel (when transferred over a bus)
+ * @bpp: Bytes per pixel (when stored in memory)
+ * @supported: Indicates format supported by subdev
+ */
+struct vpfe_fmt {
+	const char *name;
+	u32 fourcc;
+	u32 code;
+	struct bus_format l;
+	struct bus_format s;
+	bool supported;
+	u32 index;
+};
+
+static struct vpfe_fmt formats[] = {
+	{
+		.name		= "YUV 4:2:2 packed, YCbYCr",
+		.fourcc		= V4L2_PIX_FMT_YUYV,
+		.code		= MEDIA_BUS_FMT_YUYV8_2X8,
+		.l.width	= 10,
+		.l.bpp		= 4,
+		.s.width	= 8,
+		.s.bpp		= 2,
+		.supported	= false,
+	}, {
+		.name		= "YUV 4:2:2 packed, CbYCrY",
+		.fourcc		= V4L2_PIX_FMT_UYVY,
+		.code		= MEDIA_BUS_FMT_UYVY8_2X8,
+		.l.width	= 10,
+		.l.bpp		= 4,
+		.s.width	= 8,
+		.s.bpp		= 2,
+		.supported	= false,
+	}, {
+		.name		= "YUV 4:2:2 packed, YCrYCb",
+		.fourcc		= V4L2_PIX_FMT_YVYU,
+		.code		= MEDIA_BUS_FMT_YVYU8_2X8,
+		.l.width	= 10,
+		.l.bpp		= 4,
+		.s.width	= 8,
+		.s.bpp		= 2,
+		.supported	= false,
+	}, {
+		.name		= "YUV 4:2:2 packed, CrYCbY",
+		.fourcc		= V4L2_PIX_FMT_VYUY,
+		.code		= MEDIA_BUS_FMT_VYUY8_2X8,
+		.l.width	= 10,
+		.l.bpp		= 4,
+		.s.width	= 8,
+		.s.bpp		= 2,
+		.supported	= false,
+	}, {
+		.name		= "RAW8 BGGR",
+		.fourcc		= V4L2_PIX_FMT_SBGGR8,
+		.code		= MEDIA_BUS_FMT_SBGGR8_1X8,
+		.l.width	= 10,
+		.l.bpp		= 2,
+		.s.width	= 8,
+		.s.bpp		= 1,
+		.supported	= false,
+	}, {
+		.name		= "RAW8 GBRG",
+		.fourcc		= V4L2_PIX_FMT_SGBRG8,
+		.code		= MEDIA_BUS_FMT_SGBRG8_1X8,
+		.l.width	= 10,
+		.l.bpp		= 2,
+		.s.width	= 8,
+		.s.bpp		= 1,
+		.supported	= false,
+	}, {
+		.name		= "RAW8 GRBG",
+		.fourcc		= V4L2_PIX_FMT_SGRBG8,
+		.code		= MEDIA_BUS_FMT_SGRBG8_1X8,
+		.l.width	= 10,
+		.l.bpp		= 2,
+		.s.width	= 8,
+		.s.bpp		= 1,
+		.supported	= false,
+	}, {
+		.name		= "RAW8 RGGB",
+		.fourcc		= V4L2_PIX_FMT_SRGGB8,
+		.code		= MEDIA_BUS_FMT_SRGGB8_1X8,
+		.l.width	= 10,
+		.l.bpp		= 2,
+		.s.width	= 8,
+		.s.bpp		= 1,
+		.supported	= false,
+	}, {
+		.name		= "RGB565 (LE)",
+		.fourcc		= V4L2_PIX_FMT_RGB565,
+		.code		= MEDIA_BUS_FMT_RGB565_2X8_LE,
+		.l.width	= 10,
+		.l.bpp		= 4,
+		.s.width	= 8,
+		.s.bpp		= 2,
+		.supported	= false,
+	}, {
+		.name		= "RGB565 (BE)",
+		.fourcc		= V4L2_PIX_FMT_RGB565X,
+		.code		= MEDIA_BUS_FMT_RGB565_2X8_BE,
+		.l.width	= 10,
+		.l.bpp		= 4,
+		.s.width	= 8,
+		.s.bpp		= 2,
+		.supported	= false,
+	},
+};
+
+static int
+__vpfe_get_format(struct vpfe_device *vpfe,
+		  struct v4l2_format *format, unsigned int *bpp);
+
+static struct vpfe_fmt *find_format_by_code(unsigned int code)
+{
+	struct vpfe_fmt *fmt;
+	unsigned int k;
+
+	for (k = 0; k < ARRAY_SIZE(formats); k++) {
+		fmt = &formats[k];
+		if (fmt->code == code)
+			return fmt;
+	}
+
+	return NULL;
+}
+
+static struct vpfe_fmt *find_format_by_pix(unsigned int pixelformat)
+{
+	struct vpfe_fmt *fmt;
+	unsigned int k;
+
+	for (k = 0; k < ARRAY_SIZE(formats); k++) {
+		fmt = &formats[k];
+		if (fmt->fourcc == pixelformat)
+			return fmt;
+	}
+
+	return NULL;
+}
+
+static void
+mbus_to_pix(struct vpfe_device *vpfe,
+	    const struct v4l2_mbus_framefmt *mbus,
+	    struct v4l2_pix_format *pix, unsigned int *bpp)
+{
+	struct vpfe_subdev_info *sdinfo = vpfe->current_subdev;
+	unsigned int bus_width = sdinfo->vpfe_param.bus_width;
+	struct vpfe_fmt *fmt;
+
+	fmt = find_format_by_code(mbus->code);
+	if (WARN_ON(fmt == NULL)) {
+		pr_err("Invalid mbus code set\n");
+		*bpp = 1;
+		return;
+	}
+
+	memset(pix, 0, sizeof(*pix));
+	v4l2_fill_pix_format(pix, mbus);
+	pix->pixelformat = fmt->fourcc;
+	*bpp = (bus_width == 10) ?  fmt->l.bpp : fmt->s.bpp;
+
+	/* pitch should be 32 bytes aligned */
+	pix->bytesperline = ALIGN(pix->width * *bpp, 32);
+	pix->sizeimage = pix->bytesperline * pix->height;
+}
+
+static void pix_to_mbus(struct vpfe_device *vpfe,
+			struct v4l2_pix_format *pix_fmt,
+			struct v4l2_mbus_framefmt *mbus_fmt)
+{
+	struct vpfe_fmt *fmt;
+
+	fmt = find_format_by_pix(pix_fmt->pixelformat);
+	if (!fmt) {
+		/* default to first entry */
+		vpfe_dbg(3, vpfe, "Invalid pixel code: %x, default used instead\n",
+			pix_fmt->pixelformat);
+		fmt = &formats[0];
+	}
+
+	memset(mbus_fmt, 0, sizeof(*mbus_fmt));
+	v4l2_fill_mbus_format(mbus_fmt, pix_fmt, fmt->code);
+}
+
+/*  Print Four-character-code (FOURCC) */
+static char *print_fourcc(u32 fmt)
+{
+	static char code[5];
+
+	code[0] = (unsigned char)(fmt & 0xff);
+	code[1] = (unsigned char)((fmt >> 8) & 0xff);
+	code[2] = (unsigned char)((fmt >> 16) & 0xff);
+	code[3] = (unsigned char)((fmt >> 24) & 0xff);
+	code[4] = '\0';
+
+	return code;
+}
+
+static int
+cmp_v4l2_format(const struct v4l2_format *lhs, const struct v4l2_format *rhs)
+{
+	return lhs->type == rhs->type &&
+		lhs->fmt.pix.width == rhs->fmt.pix.width &&
+		lhs->fmt.pix.height == rhs->fmt.pix.height &&
+		lhs->fmt.pix.pixelformat == rhs->fmt.pix.pixelformat &&
+		lhs->fmt.pix.field == rhs->fmt.pix.field &&
+		lhs->fmt.pix.colorspace == rhs->fmt.pix.colorspace &&
+		lhs->fmt.pix.ycbcr_enc == rhs->fmt.pix.ycbcr_enc &&
+		lhs->fmt.pix.quantization == rhs->fmt.pix.quantization;
+}
+
+static inline u32 vpfe_reg_read(struct vpfe_ccdc *ccdc, u32 offset)
+{
+	return ioread32(ccdc->ccdc_cfg.base_addr + offset);
+}
+
+static inline void vpfe_reg_write(struct vpfe_ccdc *ccdc, u32 val, u32 offset)
+{
+	iowrite32(val, ccdc->ccdc_cfg.base_addr + offset);
+}
+
+static inline struct vpfe_device *to_vpfe(struct vpfe_ccdc *ccdc)
+{
+	return container_of(ccdc, struct vpfe_device, ccdc);
+}
+
+static inline struct vpfe_cap_buffer *to_vpfe_buffer(struct vb2_buffer *vb)
+{
+	return container_of(vb, struct vpfe_cap_buffer, vb);
+}
+
+static inline void vpfe_pcr_enable(struct vpfe_ccdc *ccdc, int flag)
+{
+	vpfe_reg_write(ccdc, !!flag, VPFE_PCR);
+}
+
+static void vpfe_config_enable(struct vpfe_ccdc *ccdc, int flag)
+{
+	unsigned int cfg;
+
+	if (!flag) {
+		cfg = vpfe_reg_read(ccdc, VPFE_CONFIG);
+		cfg &= ~(VPFE_CONFIG_EN_ENABLE << VPFE_CONFIG_EN_SHIFT);
+	} else {
+		cfg = VPFE_CONFIG_EN_ENABLE << VPFE_CONFIG_EN_SHIFT;
+	}
+
+	vpfe_reg_write(ccdc, cfg, VPFE_CONFIG);
+}
+
+static void vpfe_ccdc_setwin(struct vpfe_ccdc *ccdc,
+			     struct v4l2_rect *image_win,
+			     enum ccdc_frmfmt frm_fmt,
+			     int bpp)
+{
+	int horz_start, horz_nr_pixels;
+	int vert_start, vert_nr_lines;
+	int val, mid_img;
+
+	/*
+	 * ppc - per pixel count. indicates how many pixels per cell
+	 * output to SDRAM. example, for ycbcr, it is one y and one c, so 2.
+	 * raw capture this is 1
+	 */
+	horz_start = image_win->left * bpp;
+	horz_nr_pixels = (image_win->width * bpp) - 1;
+	vpfe_reg_write(ccdc, (horz_start << VPFE_HORZ_INFO_SPH_SHIFT) |
+				horz_nr_pixels, VPFE_HORZ_INFO);
+
+	vert_start = image_win->top;
+
+	if (frm_fmt == CCDC_FRMFMT_INTERLACED) {
+		vert_nr_lines = (image_win->height >> 1) - 1;
+		vert_start >>= 1;
+		/* Since first line doesn't have any data */
+		vert_start += 1;
+		/* configure VDINT0 */
+		val = (vert_start << VPFE_VDINT_VDINT0_SHIFT);
+	} else {
+		/* Since first line doesn't have any data */
+		vert_start += 1;
+		vert_nr_lines = image_win->height - 1;
+		/*
+		 * configure VDINT0 and VDINT1. VDINT1 will be at half
+		 * of image height
+		 */
+		mid_img = vert_start + (image_win->height / 2);
+		val = (vert_start << VPFE_VDINT_VDINT0_SHIFT) |
+				(mid_img & VPFE_VDINT_VDINT1_MASK);
+	}
+
+	vpfe_reg_write(ccdc, val, VPFE_VDINT);
+
+	vpfe_reg_write(ccdc, (vert_start << VPFE_VERT_START_SLV0_SHIFT) |
+				vert_start, VPFE_VERT_START);
+	vpfe_reg_write(ccdc, vert_nr_lines, VPFE_VERT_LINES);
+}
+
+static void vpfe_reg_dump(struct vpfe_ccdc *ccdc)
+{
+	struct vpfe_device *vpfe = to_vpfe(ccdc);
+
+	vpfe_dbg(3, vpfe, "ALAW: 0x%x\n", vpfe_reg_read(ccdc, VPFE_ALAW));
+	vpfe_dbg(3, vpfe, "CLAMP: 0x%x\n", vpfe_reg_read(ccdc, VPFE_CLAMP));
+	vpfe_dbg(3, vpfe, "DCSUB: 0x%x\n", vpfe_reg_read(ccdc, VPFE_DCSUB));
+	vpfe_dbg(3, vpfe, "BLKCMP: 0x%x\n", vpfe_reg_read(ccdc, VPFE_BLKCMP));
+	vpfe_dbg(3, vpfe, "COLPTN: 0x%x\n", vpfe_reg_read(ccdc, VPFE_COLPTN));
+	vpfe_dbg(3, vpfe, "SDOFST: 0x%x\n", vpfe_reg_read(ccdc, VPFE_SDOFST));
+	vpfe_dbg(3, vpfe, "SYN_MODE: 0x%x\n",
+		 vpfe_reg_read(ccdc, VPFE_SYNMODE));
+	vpfe_dbg(3, vpfe, "HSIZE_OFF: 0x%x\n",
+		 vpfe_reg_read(ccdc, VPFE_HSIZE_OFF));
+	vpfe_dbg(3, vpfe, "HORZ_INFO: 0x%x\n",
+		 vpfe_reg_read(ccdc, VPFE_HORZ_INFO));
+	vpfe_dbg(3, vpfe, "VERT_START: 0x%x\n",
+		 vpfe_reg_read(ccdc, VPFE_VERT_START));
+	vpfe_dbg(3, vpfe, "VERT_LINES: 0x%x\n",
+		 vpfe_reg_read(ccdc, VPFE_VERT_LINES));
+}
+
+static int
+vpfe_ccdc_validate_param(struct vpfe_ccdc *ccdc,
+			 struct vpfe_ccdc_config_params_raw *ccdcparam)
+{
+	struct vpfe_device *vpfe = to_vpfe(ccdc);
+	u8 max_gamma, max_data;
+
+	if (!ccdcparam->alaw.enable)
+		return 0;
+
+	max_gamma = ccdc_gamma_width_max_bit(ccdcparam->alaw.gamma_wd);
+	max_data = ccdc_data_size_max_bit(ccdcparam->data_sz);
+
+	if (ccdcparam->alaw.gamma_wd > VPFE_CCDC_GAMMA_BITS_09_0 ||
+	    ccdcparam->alaw.gamma_wd < VPFE_CCDC_GAMMA_BITS_15_6 ||
+	    max_gamma > max_data) {
+		vpfe_dbg(1, vpfe, "Invalid data line select\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void
+vpfe_ccdc_update_raw_params(struct vpfe_ccdc *ccdc,
+			    struct vpfe_ccdc_config_params_raw *raw_params)
+{
+	struct vpfe_ccdc_config_params_raw *config_params =
+				&ccdc->ccdc_cfg.bayer.config_params;
+
+	config_params = raw_params;
+}
+
+/*
+ * vpfe_ccdc_restore_defaults()
+ * This function will write defaults to all CCDC registers
+ */
+static void vpfe_ccdc_restore_defaults(struct vpfe_ccdc *ccdc)
+{
+	int i;
+
+	/* Disable CCDC */
+	vpfe_pcr_enable(ccdc, 0);
+
+	/* set all registers to default value */
+	for (i = 4; i <= 0x94; i += 4)
+		vpfe_reg_write(ccdc, 0,  i);
+
+	vpfe_reg_write(ccdc, VPFE_NO_CULLING, VPFE_CULLING);
+	vpfe_reg_write(ccdc, VPFE_CCDC_GAMMA_BITS_11_2, VPFE_ALAW);
+}
+
+static int vpfe_ccdc_close(struct vpfe_ccdc *ccdc, struct device *dev)
+{
+	int dma_cntl, i, pcr;
+
+	/* If the CCDC module is still busy wait for it to be done */
+	for (i = 0; i < 10; i++) {
+		usleep_range(5000, 6000);
+		pcr = vpfe_reg_read(ccdc, VPFE_PCR);
+		if (!pcr)
+			break;
+
+		/* make sure it it is disabled */
+		vpfe_pcr_enable(ccdc, 0);
+	}
+
+	/* Disable CCDC by resetting all register to default POR values */
+	vpfe_ccdc_restore_defaults(ccdc);
+
+	/* if DMA_CNTL overflow bit is set. Clear it
+	 *  It appears to take a while for this to become quiescent ~20ms
+	 */
+	for (i = 0; i < 10; i++) {
+		dma_cntl = vpfe_reg_read(ccdc, VPFE_DMA_CNTL);
+		if (!(dma_cntl & VPFE_DMA_CNTL_OVERFLOW))
+			break;
+
+		/* Clear the overflow bit */
+		vpfe_reg_write(ccdc, dma_cntl, VPFE_DMA_CNTL);
+		usleep_range(5000, 6000);
+	}
+
+	/* Disabled the module at the CONFIG level */
+	vpfe_config_enable(ccdc, 0);
+
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int vpfe_ccdc_set_params(struct vpfe_ccdc *ccdc, void __user *params)
+{
+	struct vpfe_device *vpfe = container_of(ccdc, struct vpfe_device, ccdc);
+	struct vpfe_ccdc_config_params_raw raw_params;
+	int x;
+
+	if (ccdc->ccdc_cfg.if_type != VPFE_RAW_BAYER)
+		return -EINVAL;
+
+	x = copy_from_user(&raw_params, params, sizeof(raw_params));
+	if (x) {
+		vpfe_dbg(1, vpfe,
+			"vpfe_ccdc_set_params: error in copying ccdc params, %d\n",
+			x);
+		return -EFAULT;
+	}
+
+	if (!vpfe_ccdc_validate_param(ccdc, &raw_params)) {
+		vpfe_ccdc_update_raw_params(ccdc, &raw_params);
+			return 0;
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * vpfe_ccdc_config_ycbcr()
+ * This function will configure CCDC for YCbCr video capture
+ */
+static void vpfe_ccdc_config_ycbcr(struct vpfe_ccdc *ccdc)
+{
+	struct vpfe_device *vpfe = container_of(ccdc, struct vpfe_device, ccdc);
+	struct ccdc_params_ycbcr *params = &ccdc->ccdc_cfg.ycbcr;
+	u32 syn_mode;
+
+	vpfe_dbg(3, vpfe, "vpfe_ccdc_config_ycbcr:\n");
+	/*
+	 * first restore the CCDC registers to default values
+	 * This is important since we assume default values to be set in
+	 * a lot of registers that we didn't touch
+	 */
+	vpfe_ccdc_restore_defaults(ccdc);
+
+	/*
+	 * configure pixel format, frame format, configure video frame
+	 * format, enable output to SDRAM, enable internal timing generator
+	 * and 8bit pack mode
+	 */
+	syn_mode = (((params->pix_fmt & VPFE_SYN_MODE_INPMOD_MASK) <<
+		    VPFE_SYN_MODE_INPMOD_SHIFT) |
+		    ((params->frm_fmt & VPFE_SYN_FLDMODE_MASK) <<
+		    VPFE_SYN_FLDMODE_SHIFT) | VPFE_VDHDEN_ENABLE |
+		    VPFE_WEN_ENABLE | VPFE_DATA_PACK_ENABLE);
+
+	/* setup BT.656 sync mode */
+	if (params->bt656_enable) {
+		vpfe_reg_write(ccdc, VPFE_REC656IF_BT656_EN, VPFE_REC656IF);
+
+		/*
+		 * configure the FID, VD, HD pin polarity,
+		 * fld,hd pol positive, vd negative, 8-bit data
+		 */
+		syn_mode |= VPFE_SYN_MODE_VD_POL_NEGATIVE;
+		if (ccdc->ccdc_cfg.if_type == VPFE_BT656_10BIT)
+			syn_mode |= VPFE_SYN_MODE_10BITS;
+		else
+			syn_mode |= VPFE_SYN_MODE_8BITS;
+	} else {
+		/* y/c external sync mode */
+		syn_mode |= (((params->fid_pol & VPFE_FID_POL_MASK) <<
+			     VPFE_FID_POL_SHIFT) |
+			     ((params->hd_pol & VPFE_HD_POL_MASK) <<
+			     VPFE_HD_POL_SHIFT) |
+			     ((params->vd_pol & VPFE_VD_POL_MASK) <<
+			     VPFE_VD_POL_SHIFT));
+	}
+	vpfe_reg_write(ccdc, syn_mode, VPFE_SYNMODE);
+
+	/* configure video window */
+	vpfe_ccdc_setwin(ccdc, &params->win,
+			 params->frm_fmt, params->bytesperpixel);
+
+	/*
+	 * configure the order of y cb cr in SDRAM, and disable latch
+	 * internal register on vsync
+	 */
+	if (ccdc->ccdc_cfg.if_type == VPFE_BT656_10BIT)
+		vpfe_reg_write(ccdc,
+			       (params->pix_order << VPFE_CCDCFG_Y8POS_SHIFT) |
+			       VPFE_LATCH_ON_VSYNC_DISABLE |
+			       VPFE_CCDCFG_BW656_10BIT, VPFE_CCDCFG);
+	else
+		vpfe_reg_write(ccdc,
+			       (params->pix_order << VPFE_CCDCFG_Y8POS_SHIFT) |
+			       VPFE_LATCH_ON_VSYNC_DISABLE, VPFE_CCDCFG);
+
+	/*
+	 * configure the horizontal line offset. This should be a
+	 * on 32 byte boundary. So clear LSB 5 bits
+	 */
+	vpfe_reg_write(ccdc, params->bytesperline, VPFE_HSIZE_OFF);
+
+	/* configure the memory line offset */
+	if (params->buf_type == CCDC_BUFTYPE_FLD_INTERLEAVED)
+		/* two fields are interleaved in memory */
+		vpfe_reg_write(ccdc, VPFE_SDOFST_FIELD_INTERLEAVED,
+			       VPFE_SDOFST);
+}
+
+static void
+vpfe_ccdc_config_black_clamp(struct vpfe_ccdc *ccdc,
+			     struct vpfe_ccdc_black_clamp *bclamp)
+{
+	u32 val;
+
+	if (!bclamp->enable) {
+		/* configure DCSub */
+		val = (bclamp->dc_sub) & VPFE_BLK_DC_SUB_MASK;
+		vpfe_reg_write(ccdc, val, VPFE_DCSUB);
+		vpfe_reg_write(ccdc, VPFE_CLAMP_DEFAULT_VAL, VPFE_CLAMP);
+		return;
+	}
+	/*
+	 * Configure gain,  Start pixel, No of line to be avg,
+	 * No of pixel/line to be avg, & Enable the Black clamping
+	 */
+	val = ((bclamp->sgain & VPFE_BLK_SGAIN_MASK) |
+	       ((bclamp->start_pixel & VPFE_BLK_ST_PXL_MASK) <<
+		VPFE_BLK_ST_PXL_SHIFT) |
+	       ((bclamp->sample_ln & VPFE_BLK_SAMPLE_LINE_MASK) <<
+		VPFE_BLK_SAMPLE_LINE_SHIFT) |
+	       ((bclamp->sample_pixel & VPFE_BLK_SAMPLE_LN_MASK) <<
+		VPFE_BLK_SAMPLE_LN_SHIFT) | VPFE_BLK_CLAMP_ENABLE);
+	vpfe_reg_write(ccdc, val, VPFE_CLAMP);
+	/* If Black clamping is enable then make dcsub 0 */
+	vpfe_reg_write(ccdc, VPFE_DCSUB_DEFAULT_VAL, VPFE_DCSUB);
+}
+
+static void
+vpfe_ccdc_config_black_compense(struct vpfe_ccdc *ccdc,
+				struct vpfe_ccdc_black_compensation *bcomp)
+{
+	u32 val;
+
+	val = ((bcomp->b & VPFE_BLK_COMP_MASK) |
+	      ((bcomp->gb & VPFE_BLK_COMP_MASK) <<
+	       VPFE_BLK_COMP_GB_COMP_SHIFT) |
+	      ((bcomp->gr & VPFE_BLK_COMP_MASK) <<
+	       VPFE_BLK_COMP_GR_COMP_SHIFT) |
+	      ((bcomp->r & VPFE_BLK_COMP_MASK) <<
+	       VPFE_BLK_COMP_R_COMP_SHIFT));
+	vpfe_reg_write(ccdc, val, VPFE_BLKCMP);
+}
+
+/*
+ * vpfe_ccdc_config_raw()
+ * This function will configure CCDC for Raw capture mode
+ */
+static void vpfe_ccdc_config_raw(struct vpfe_ccdc *ccdc)
+{
+	struct vpfe_device *vpfe = container_of(ccdc, struct vpfe_device, ccdc);
+	struct vpfe_ccdc_config_params_raw *config_params =
+				&ccdc->ccdc_cfg.bayer.config_params;
+	struct ccdc_params_raw *params = &ccdc->ccdc_cfg.bayer;
+	unsigned int syn_mode;
+	unsigned int val;
+
+	vpfe_dbg(3, vpfe, "vpfe_ccdc_config_raw:\n");
+
+	/* Reset CCDC */
+	vpfe_ccdc_restore_defaults(ccdc);
+
+	/* Disable latching function registers on VSYNC  */
+	vpfe_reg_write(ccdc, VPFE_LATCH_ON_VSYNC_DISABLE, VPFE_CCDCFG);
+
+	/*
+	 * Configure the vertical sync polarity(SYN_MODE.VDPOL),
+	 * horizontal sync polarity (SYN_MODE.HDPOL), frame id polarity
+	 * (SYN_MODE.FLDPOL), frame format(progressive or interlace),
+	 * data size(SYNMODE.DATSIZ), &pixel format (Input mode), output
+	 * SDRAM, enable internal timing generator
+	 */
+	syn_mode = (((params->vd_pol & VPFE_VD_POL_MASK) << VPFE_VD_POL_SHIFT) |
+		   ((params->hd_pol & VPFE_HD_POL_MASK) << VPFE_HD_POL_SHIFT) |
+		   ((params->fid_pol & VPFE_FID_POL_MASK) <<
+		   VPFE_FID_POL_SHIFT) | ((params->frm_fmt &
+		   VPFE_FRM_FMT_MASK) << VPFE_FRM_FMT_SHIFT) |
+		   ((config_params->data_sz & VPFE_DATA_SZ_MASK) <<
+		   VPFE_DATA_SZ_SHIFT) | ((params->pix_fmt &
+		   VPFE_PIX_FMT_MASK) << VPFE_PIX_FMT_SHIFT) |
+		   VPFE_WEN_ENABLE | VPFE_VDHDEN_ENABLE);
+
+	/* Enable and configure aLaw register if needed */
+	if (config_params->alaw.enable) {
+		val = ((config_params->alaw.gamma_wd &
+		      VPFE_ALAW_GAMMA_WD_MASK) | VPFE_ALAW_ENABLE);
+		vpfe_reg_write(ccdc, val, VPFE_ALAW);
+		vpfe_dbg(3, vpfe, "\nWriting 0x%x to ALAW...\n", val);
+	}
+
+	/* Configure video window */
+	vpfe_ccdc_setwin(ccdc, &params->win, params->frm_fmt,
+			 params->bytesperpixel);
+
+	/* Configure Black Clamp */
+	vpfe_ccdc_config_black_clamp(ccdc, &config_params->blk_clamp);
+
+	/* Configure Black level compensation */
+	vpfe_ccdc_config_black_compense(ccdc, &config_params->blk_comp);
+
+	/* If data size is 8 bit then pack the data */
+	if ((config_params->data_sz == VPFE_CCDC_DATA_8BITS) ||
+	    config_params->alaw.enable)
+		syn_mode |= VPFE_DATA_PACK_ENABLE;
+
+	/*
+	 * Configure Horizontal offset register. If pack 8 is enabled then
+	 * 1 pixel will take 1 byte
+	 */
+	vpfe_reg_write(ccdc, params->bytesperline, VPFE_HSIZE_OFF);
+
+	vpfe_dbg(3, vpfe, "Writing %d (%x) to HSIZE_OFF\n",
+		params->bytesperline, params->bytesperline);
+
+	/* Set value for SDOFST */
+	if (params->frm_fmt == CCDC_FRMFMT_INTERLACED) {
+		if (params->image_invert_enable) {
+			/* For interlace inverse mode */
+			vpfe_reg_write(ccdc, VPFE_INTERLACED_IMAGE_INVERT,
+				   VPFE_SDOFST);
+		} else {
+			/* For interlace non inverse mode */
+			vpfe_reg_write(ccdc, VPFE_INTERLACED_NO_IMAGE_INVERT,
+				   VPFE_SDOFST);
+		}
+	} else if (params->frm_fmt == CCDC_FRMFMT_PROGRESSIVE) {
+		vpfe_reg_write(ccdc, VPFE_PROGRESSIVE_NO_IMAGE_INVERT,
+			   VPFE_SDOFST);
+	}
+
+	vpfe_reg_write(ccdc, syn_mode, VPFE_SYNMODE);
+
+	vpfe_reg_dump(ccdc);
+}
+
+static inline int
+vpfe_ccdc_set_buftype(struct vpfe_ccdc *ccdc,
+		      enum ccdc_buftype buf_type)
+{
+	if (ccdc->ccdc_cfg.if_type == VPFE_RAW_BAYER)
+		ccdc->ccdc_cfg.bayer.buf_type = buf_type;
+	else
+		ccdc->ccdc_cfg.ycbcr.buf_type = buf_type;
+
+	return 0;
+}
+
+static inline enum ccdc_buftype vpfe_ccdc_get_buftype(struct vpfe_ccdc *ccdc)
+{
+	if (ccdc->ccdc_cfg.if_type == VPFE_RAW_BAYER)
+		return ccdc->ccdc_cfg.bayer.buf_type;
+
+	return ccdc->ccdc_cfg.ycbcr.buf_type;
+}
+
+static int vpfe_ccdc_set_pixel_format(struct vpfe_ccdc *ccdc, u32 pixfmt)
+{
+	struct vpfe_device *vpfe = container_of(ccdc, struct vpfe_device, ccdc);
+
+	vpfe_dbg(1, vpfe, "vpfe_ccdc_set_pixel_format: if_type: %d, pixfmt:%s\n",
+		 ccdc->ccdc_cfg.if_type, print_fourcc(pixfmt));
+
+	if (ccdc->ccdc_cfg.if_type == VPFE_RAW_BAYER) {
+		ccdc->ccdc_cfg.bayer.pix_fmt = CCDC_PIXFMT_RAW;
+		/*
+		 * Need to clear it in case it was left on
+		 * after the last capture.
+		 */
+		ccdc->ccdc_cfg.bayer.config_params.alaw.enable = 0;
+
+		switch (pixfmt) {
+		case V4L2_PIX_FMT_SBGGR8:
+			ccdc->ccdc_cfg.bayer.config_params.alaw.enable = 1;
+			break;
+
+		case V4L2_PIX_FMT_YUYV:
+		case V4L2_PIX_FMT_UYVY:
+		case V4L2_PIX_FMT_YUV420:
+		case V4L2_PIX_FMT_NV12:
+		case V4L2_PIX_FMT_RGB565X:
+			break;
+
+		case V4L2_PIX_FMT_SBGGR16:
+		default:
+			return -EINVAL;
+		}
+	} else {
+		switch (pixfmt) {
+		case V4L2_PIX_FMT_YUYV:
+			ccdc->ccdc_cfg.ycbcr.pix_order = CCDC_PIXORDER_YCBYCR;
+			break;
+
+		case V4L2_PIX_FMT_UYVY:
+			ccdc->ccdc_cfg.ycbcr.pix_order = CCDC_PIXORDER_CBYCRY;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static u32 vpfe_ccdc_get_pixel_format(struct vpfe_ccdc *ccdc)
+{
+	u32 pixfmt;
+
+	if (ccdc->ccdc_cfg.if_type == VPFE_RAW_BAYER) {
+		pixfmt = V4L2_PIX_FMT_YUYV;
+	} else {
+		if (ccdc->ccdc_cfg.ycbcr.pix_order == CCDC_PIXORDER_YCBYCR)
+			pixfmt = V4L2_PIX_FMT_YUYV;
+		else
+			pixfmt = V4L2_PIX_FMT_UYVY;
+	}
+
+	return pixfmt;
+}
+
+static int
+vpfe_ccdc_set_image_window(struct vpfe_ccdc *ccdc,
+			   struct v4l2_rect *win, unsigned int bpp)
+{
+	if (ccdc->ccdc_cfg.if_type == VPFE_RAW_BAYER) {
+		ccdc->ccdc_cfg.bayer.win = *win;
+		ccdc->ccdc_cfg.bayer.bytesperpixel = bpp;
+		ccdc->ccdc_cfg.bayer.bytesperline = ALIGN(win->width * bpp, 32);
+	} else {
+		ccdc->ccdc_cfg.ycbcr.win = *win;
+		ccdc->ccdc_cfg.ycbcr.bytesperpixel = bpp;
+		ccdc->ccdc_cfg.ycbcr.bytesperline = ALIGN(win->width * bpp, 32);
+	}
+
+	return 0;
+}
+
+static inline void
+vpfe_ccdc_get_image_window(struct vpfe_ccdc *ccdc,
+			   struct v4l2_rect *win)
+{
+	if (ccdc->ccdc_cfg.if_type == VPFE_RAW_BAYER)
+		*win = ccdc->ccdc_cfg.bayer.win;
+	else
+		*win = ccdc->ccdc_cfg.ycbcr.win;
+}
+
+static inline unsigned int vpfe_ccdc_get_line_length(struct vpfe_ccdc *ccdc)
+{
+	if (ccdc->ccdc_cfg.if_type == VPFE_RAW_BAYER)
+		return ccdc->ccdc_cfg.bayer.bytesperline;
+
+	return ccdc->ccdc_cfg.ycbcr.bytesperline;
+}
+
+static inline int
+vpfe_ccdc_set_frame_format(struct vpfe_ccdc *ccdc,
+			   enum ccdc_frmfmt frm_fmt)
+{
+	if (ccdc->ccdc_cfg.if_type == VPFE_RAW_BAYER)
+		ccdc->ccdc_cfg.bayer.frm_fmt = frm_fmt;
+	else
+		ccdc->ccdc_cfg.ycbcr.frm_fmt = frm_fmt;
+
+	return 0;
+}
+
+static inline enum ccdc_frmfmt
+vpfe_ccdc_get_frame_format(struct vpfe_ccdc *ccdc)
+{
+	if (ccdc->ccdc_cfg.if_type == VPFE_RAW_BAYER)
+		return ccdc->ccdc_cfg.bayer.frm_fmt;
+
+	return ccdc->ccdc_cfg.ycbcr.frm_fmt;
+}
+
+static inline int vpfe_ccdc_getfid(struct vpfe_ccdc *ccdc)
+{
+	return (vpfe_reg_read(ccdc, VPFE_SYNMODE) >> 15) & 1;
+}
+
+static inline void vpfe_set_sdr_addr(struct vpfe_ccdc *ccdc, unsigned long addr)
+{
+	vpfe_reg_write(ccdc, addr & 0xffffffe0, VPFE_SDR_ADDR);
+}
+
+static int vpfe_ccdc_set_hw_if_params(struct vpfe_ccdc *ccdc,
+				      struct vpfe_hw_if_param *params)
+{
+	struct vpfe_device *vpfe = container_of(ccdc, struct vpfe_device, ccdc);
+
+	ccdc->ccdc_cfg.if_type = params->if_type;
+
+	switch (params->if_type) {
+	case VPFE_BT656:
+	case VPFE_YCBCR_SYNC_16:
+	case VPFE_YCBCR_SYNC_8:
+	case VPFE_BT656_10BIT:
+		ccdc->ccdc_cfg.ycbcr.vd_pol = params->vdpol;
+		ccdc->ccdc_cfg.ycbcr.hd_pol = params->hdpol;
+		break;
+
+	case VPFE_RAW_BAYER:
+		ccdc->ccdc_cfg.bayer.vd_pol = params->vdpol;
+		ccdc->ccdc_cfg.bayer.hd_pol = params->hdpol;
+		if (params->bus_width == 10)
+			ccdc->ccdc_cfg.bayer.config_params.data_sz =
+				VPFE_CCDC_DATA_10BITS;
+		else
+			ccdc->ccdc_cfg.bayer.config_params.data_sz =
+				VPFE_CCDC_DATA_8BITS;
+		vpfe_dbg(1, vpfe, "params.bus_width: %d\n",
+			params->bus_width);
+		vpfe_dbg(1, vpfe, "config_params.data_sz: %d\n",
+			ccdc->ccdc_cfg.bayer.config_params.data_sz);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void vpfe_clear_intr(struct vpfe_ccdc *ccdc, int vdint)
+{
+	unsigned int vpfe_int_status;
+
+	vpfe_int_status = vpfe_reg_read(ccdc, VPFE_IRQ_STS);
+
+	switch (vdint) {
+	/* VD0 interrupt */
+	case VPFE_VDINT0:
+		vpfe_int_status &= ~VPFE_VDINT0;
+		vpfe_int_status |= VPFE_VDINT0;
+		break;
+
+	/* VD1 interrupt */
+	case VPFE_VDINT1:
+		vpfe_int_status &= ~VPFE_VDINT1;
+		vpfe_int_status |= VPFE_VDINT1;
+		break;
+
+	/* VD2 interrupt */
+	case VPFE_VDINT2:
+		vpfe_int_status &= ~VPFE_VDINT2;
+		vpfe_int_status |= VPFE_VDINT2;
+		break;
+
+	/* Clear all interrupts */
+	default:
+		vpfe_int_status &= ~(VPFE_VDINT0 |
+				VPFE_VDINT1 |
+				VPFE_VDINT2);
+		vpfe_int_status |= (VPFE_VDINT0 |
+				VPFE_VDINT1 |
+				VPFE_VDINT2);
+		break;
+	}
+	/* Clear specific VDINT from the status register */
+	vpfe_reg_write(ccdc, vpfe_int_status, VPFE_IRQ_STS);
+
+	vpfe_int_status = vpfe_reg_read(ccdc, VPFE_IRQ_STS);
+
+	/* Acknowledge that we are done with all interrupts */
+	vpfe_reg_write(ccdc, 1, VPFE_IRQ_EOI);
+}
+
+static void vpfe_ccdc_config_defaults(struct vpfe_ccdc *ccdc)
+{
+	ccdc->ccdc_cfg.if_type = VPFE_RAW_BAYER;
+
+	ccdc->ccdc_cfg.ycbcr.pix_fmt = CCDC_PIXFMT_YCBCR_8BIT;
+	ccdc->ccdc_cfg.ycbcr.frm_fmt = CCDC_FRMFMT_INTERLACED;
+	ccdc->ccdc_cfg.ycbcr.fid_pol = VPFE_PINPOL_POSITIVE;
+	ccdc->ccdc_cfg.ycbcr.vd_pol = VPFE_PINPOL_POSITIVE;
+	ccdc->ccdc_cfg.ycbcr.hd_pol = VPFE_PINPOL_POSITIVE;
+	ccdc->ccdc_cfg.ycbcr.pix_order = CCDC_PIXORDER_CBYCRY;
+	ccdc->ccdc_cfg.ycbcr.buf_type = CCDC_BUFTYPE_FLD_INTERLEAVED;
+
+	ccdc->ccdc_cfg.ycbcr.win.left = 0;
+	ccdc->ccdc_cfg.ycbcr.win.top = 0;
+	ccdc->ccdc_cfg.ycbcr.win.width = 720;
+	ccdc->ccdc_cfg.ycbcr.win.height = 576;
+	ccdc->ccdc_cfg.ycbcr.bt656_enable = 1;
+
+	ccdc->ccdc_cfg.bayer.pix_fmt = CCDC_PIXFMT_RAW;
+	ccdc->ccdc_cfg.bayer.frm_fmt = CCDC_FRMFMT_PROGRESSIVE;
+	ccdc->ccdc_cfg.bayer.fid_pol = VPFE_PINPOL_POSITIVE;
+	ccdc->ccdc_cfg.bayer.vd_pol = VPFE_PINPOL_POSITIVE;
+	ccdc->ccdc_cfg.bayer.hd_pol = VPFE_PINPOL_POSITIVE;
+
+	ccdc->ccdc_cfg.bayer.win.left = 0;
+	ccdc->ccdc_cfg.bayer.win.top = 0;
+	ccdc->ccdc_cfg.bayer.win.width = 800;
+	ccdc->ccdc_cfg.bayer.win.height = 600;
+	ccdc->ccdc_cfg.bayer.config_params.data_sz = VPFE_CCDC_DATA_8BITS;
+	ccdc->ccdc_cfg.bayer.config_params.alaw.gamma_wd =
+						VPFE_CCDC_GAMMA_BITS_09_0;
+}
+
+/*
+ * vpfe_get_ccdc_image_format - Get image parameters based on CCDC settings
+ */
+static int vpfe_get_ccdc_image_format(struct vpfe_device *vpfe,
+				      struct v4l2_format *f)
+{
+	struct v4l2_rect image_win;
+	enum ccdc_buftype buf_type;
+	enum ccdc_frmfmt frm_fmt;
+
+	memset(f, 0, sizeof(*f));
+	f->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	vpfe_ccdc_get_image_window(&vpfe->ccdc, &image_win);
+	f->fmt.pix.width = image_win.width;
+	f->fmt.pix.height = image_win.height;
+	f->fmt.pix.bytesperline = vpfe_ccdc_get_line_length(&vpfe->ccdc);
+	f->fmt.pix.sizeimage = f->fmt.pix.bytesperline *
+				f->fmt.pix.height;
+	buf_type = vpfe_ccdc_get_buftype(&vpfe->ccdc);
+	f->fmt.pix.pixelformat = vpfe_ccdc_get_pixel_format(&vpfe->ccdc);
+	frm_fmt = vpfe_ccdc_get_frame_format(&vpfe->ccdc);
+
+	if (frm_fmt == CCDC_FRMFMT_PROGRESSIVE) {
+		f->fmt.pix.field = V4L2_FIELD_NONE;
+	} else if (frm_fmt == CCDC_FRMFMT_INTERLACED) {
+		if (buf_type == CCDC_BUFTYPE_FLD_INTERLEAVED) {
+			f->fmt.pix.field = V4L2_FIELD_INTERLACED;
+		 } else if (buf_type == CCDC_BUFTYPE_FLD_SEPARATED) {
+			f->fmt.pix.field = V4L2_FIELD_SEQ_TB;
+		} else {
+			vpfe_err(vpfe, "Invalid buf_type\n");
+			return -EINVAL;
+		}
+	} else {
+		vpfe_err(vpfe, "Invalid frm_fmt\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int vpfe_config_ccdc_image_format(struct vpfe_device *vpfe)
+{
+	enum ccdc_frmfmt frm_fmt = CCDC_FRMFMT_INTERLACED;
+	int ret;
+
+	vpfe_dbg(2, vpfe, "vpfe_config_ccdc_image_format\n");
+
+	vpfe_dbg(1, vpfe, "pixelformat: %s\n",
+		print_fourcc(vpfe->fmt.fmt.pix.pixelformat));
+
+	if (vpfe_ccdc_set_pixel_format(&vpfe->ccdc,
+			vpfe->fmt.fmt.pix.pixelformat) < 0) {
+		vpfe_err(vpfe, "couldn't set pix format in ccdc\n");
+		return -EINVAL;
+	}
+
+	/* configure the image window */
+	vpfe_ccdc_set_image_window(&vpfe->ccdc, &vpfe->crop, vpfe->bpp);
+
+	switch (vpfe->fmt.fmt.pix.field) {
+	case V4L2_FIELD_INTERLACED:
+		/* do nothing, since it is default */
+		ret = vpfe_ccdc_set_buftype(
+				&vpfe->ccdc,
+				CCDC_BUFTYPE_FLD_INTERLEAVED);
+		break;
+
+	case V4L2_FIELD_NONE:
+		frm_fmt = CCDC_FRMFMT_PROGRESSIVE;
+		/* buffer type only applicable for interlaced scan */
+		break;
+
+	case V4L2_FIELD_SEQ_TB:
+		ret = vpfe_ccdc_set_buftype(
+				&vpfe->ccdc,
+				CCDC_BUFTYPE_FLD_SEPARATED);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (ret)
+		return ret;
+
+	return vpfe_ccdc_set_frame_format(&vpfe->ccdc, frm_fmt);
+}
+
+/*
+ * vpfe_config_image_format()
+ * For a given standard, this functions sets up the default
+ * pix format & crop values in the vpfe device and ccdc.  It first
+ * starts with defaults based values from the standard table.
+ * It then checks if sub device support g_mbus_fmt and then override the
+ * values based on that.Sets crop values to match with scan resolution
+ * starting at 0,0. It calls vpfe_config_ccdc_image_format() set the
+ * values in ccdc
+ */
+static int vpfe_config_image_format(struct vpfe_device *vpfe,
+				    v4l2_std_id std_id)
+{
+	struct v4l2_pix_format *pix = &vpfe->fmt.fmt.pix;
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(vpfe_standards); i++) {
+		if (vpfe_standards[i].std_id & std_id) {
+			vpfe->std_info.active_pixels =
+					vpfe_standards[i].width;
+			vpfe->std_info.active_lines =
+					vpfe_standards[i].height;
+			vpfe->std_info.frame_format =
+					vpfe_standards[i].frame_format;
+			vpfe->std_index = i;
+
+			break;
+		}
+	}
+
+	if (i ==  ARRAY_SIZE(vpfe_standards)) {
+		vpfe_err(vpfe, "standard not supported\n");
+		return -EINVAL;
+	}
+
+	vpfe->crop.top = vpfe->crop.left = 0;
+	vpfe->crop.width = vpfe->std_info.active_pixels;
+	vpfe->crop.height = vpfe->std_info.active_lines;
+	pix->width = vpfe->crop.width;
+	pix->height = vpfe->crop.height;
+	pix->pixelformat = V4L2_PIX_FMT_YUYV;
+
+	/* first field and frame format based on standard frame format */
+	if (vpfe->std_info.frame_format)
+		pix->field = V4L2_FIELD_INTERLACED;
+	else
+		pix->field = V4L2_FIELD_NONE;
+
+	ret = __vpfe_get_format(vpfe, &vpfe->fmt, &vpfe->bpp);
+	if (ret)
+		return ret;
+
+	/* Update the crop window based on found values */
+	vpfe->crop.width = pix->width;
+	vpfe->crop.height = pix->height;
+
+	return vpfe_config_ccdc_image_format(vpfe);
+}
+
+static int vpfe_initialize_device(struct vpfe_device *vpfe)
+{
+	struct vpfe_subdev_info *sdinfo;
+	int ret;
+
+	sdinfo = &vpfe->cfg->sub_devs[0];
+	sdinfo->sd = vpfe->sd[0];
+	vpfe->current_input = 0;
+	vpfe->std_index = 0;
+	/* Configure the default format information */
+	ret = vpfe_config_image_format(vpfe,
+				       vpfe_standards[vpfe->std_index].std_id);
+	if (ret)
+		return ret;
+
+	pm_runtime_get_sync(vpfe->pdev);
+
+	vpfe_config_enable(&vpfe->ccdc, 1);
+
+	vpfe_ccdc_restore_defaults(&vpfe->ccdc);
+
+	/* Clear all VPFE interrupts */
+	vpfe_clear_intr(&vpfe->ccdc, -1);
+
+	return ret;
+}
+
+/*
+ * vpfe_release : This function is based on the vb2_fop_release
+ * helper function.
+ * It has been augmented to handle module power management,
+ * by disabling/enabling h/w module fcntl clock when necessary.
+ */
+static int vpfe_release(struct file *file)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+	int ret;
+
+	mutex_lock(&vpfe->lock);
+
+	if (v4l2_fh_is_singular_file(file))
+		vpfe_ccdc_close(&vpfe->ccdc, vpfe->pdev);
+	ret = _vb2_fop_release(file, NULL);
+
+	mutex_unlock(&vpfe->lock);
+
+	return ret;
+}
+
+/*
+ * vpfe_open : This function is based on the v4l2_fh_open helper function.
+ * It has been augmented to handle module power management,
+ * by disabling/enabling h/w module fcntl clock when necessary.
+ */
+static int vpfe_open(struct file *file)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+	int ret;
+
+	mutex_lock(&vpfe->lock);
+
+	ret = v4l2_fh_open(file);
+	if (ret) {
+		vpfe_err(vpfe, "v4l2_fh_open failed\n");
+		goto unlock;
+	}
+
+	if (!v4l2_fh_is_singular_file(file))
+		goto unlock;
+
+	if (vpfe_initialize_device(vpfe)) {
+		v4l2_fh_release(file);
+		ret = -ENODEV;
+	}
+
+unlock:
+	mutex_unlock(&vpfe->lock);
+	return ret;
+}
+
+/**
+ * vpfe_schedule_next_buffer: set next buffer address for capture
+ * @vpfe : ptr to vpfe device
+ *
+ * This function will get next buffer from the dma queue and
+ * set the buffer address in the vpfe register for capture.
+ * the buffer is marked active
+ *
+ * Assumes caller is holding vpfe->dma_queue_lock already
+ */
+static inline void vpfe_schedule_next_buffer(struct vpfe_device *vpfe)
+{
+	vpfe->next_frm = list_entry(vpfe->dma_queue.next,
+				    struct vpfe_cap_buffer, list);
+	list_del(&vpfe->next_frm->list);
+
+	vpfe_set_sdr_addr(&vpfe->ccdc,
+		       vb2_dma_contig_plane_dma_addr(&vpfe->next_frm->vb, 0));
+}
+
+static inline void vpfe_schedule_bottom_field(struct vpfe_device *vpfe)
+{
+	unsigned long addr;
+
+	addr = vb2_dma_contig_plane_dma_addr(&vpfe->next_frm->vb, 0) +
+					vpfe->field_off;
+
+	vpfe_set_sdr_addr(&vpfe->ccdc, addr);
+}
+
+/*
+ * vpfe_process_buffer_complete: process a completed buffer
+ * @vpfe : ptr to vpfe device
+ *
+ * This function time stamp the buffer and mark it as DONE. It also
+ * wake up any process waiting on the QUEUE and set the next buffer
+ * as current
+ */
+static inline void vpfe_process_buffer_complete(struct vpfe_device *vpfe)
+{
+	v4l2_get_timestamp(&vpfe->cur_frm->vb.v4l2_buf.timestamp);
+	vpfe->cur_frm->vb.v4l2_buf.field = vpfe->fmt.fmt.pix.field;
+	vpfe->cur_frm->vb.v4l2_buf.sequence = vpfe->sequence++;
+	vb2_buffer_done(&vpfe->cur_frm->vb, VB2_BUF_STATE_DONE);
+	vpfe->cur_frm = vpfe->next_frm;
+}
+
+/*
+ * vpfe_isr : ISR handler for vpfe capture (VINT0)
+ * @irq: irq number
+ * @dev_id: dev_id ptr
+ *
+ * It changes status of the captured buffer, takes next buffer from the queue
+ * and sets its address in VPFE registers
+ */
+static irqreturn_t vpfe_isr(int irq, void *dev)
+{
+	struct vpfe_device *vpfe = (struct vpfe_device *)dev;
+	enum v4l2_field field;
+	int intr_status;
+	int fid;
+
+	intr_status = vpfe_reg_read(&vpfe->ccdc, VPFE_IRQ_STS);
+
+	if (intr_status & VPFE_VDINT0) {
+		field = vpfe->fmt.fmt.pix.field;
+
+		if (field == V4L2_FIELD_NONE) {
+			/* handle progressive frame capture */
+			if (vpfe->cur_frm != vpfe->next_frm)
+				vpfe_process_buffer_complete(vpfe);
+			goto next_intr;
+		}
+
+		/* interlaced or TB capture check which field
+		   we are in hardware */
+		fid = vpfe_ccdc_getfid(&vpfe->ccdc);
+
+		/* switch the software maintained field id */
+		vpfe->field ^= 1;
+		if (fid == vpfe->field) {
+			/* we are in-sync here,continue */
+			if (fid == 0) {
+				/*
+				 * One frame is just being captured. If the
+				 * next frame is available, release the
+				 * current frame and move on
+				 */
+				if (vpfe->cur_frm != vpfe->next_frm)
+					vpfe_process_buffer_complete(vpfe);
+				/*
+				 * based on whether the two fields are stored
+				 * interleave or separately in memory,
+				 * reconfigure the CCDC memory address
+				 */
+				if (field == V4L2_FIELD_SEQ_TB)
+					vpfe_schedule_bottom_field(vpfe);
+
+				goto next_intr;
+			}
+			/*
+			 * if one field is just being captured configure
+			 * the next frame get the next frame from the empty
+			 * queue if no frame is available hold on to the
+			 * current buffer
+			 */
+			spin_lock(&vpfe->dma_queue_lock);
+			if (!list_empty(&vpfe->dma_queue) &&
+			    vpfe->cur_frm == vpfe->next_frm)
+				vpfe_schedule_next_buffer(vpfe);
+			spin_unlock(&vpfe->dma_queue_lock);
+		} else if (fid == 0) {
+			/*
+			 * out of sync. Recover from any hardware out-of-sync.
+			 * May loose one frame
+			 */
+			vpfe->field = fid;
+		}
+	}
+
+next_intr:
+	if (intr_status & VPFE_VDINT1) {
+		spin_lock(&vpfe->dma_queue_lock);
+		if (vpfe->fmt.fmt.pix.field == V4L2_FIELD_NONE &&
+		    !list_empty(&vpfe->dma_queue) &&
+		    vpfe->cur_frm == vpfe->next_frm)
+			vpfe_schedule_next_buffer(vpfe);
+		spin_unlock(&vpfe->dma_queue_lock);
+	}
+
+	vpfe_clear_intr(&vpfe->ccdc, intr_status);
+
+	return IRQ_HANDLED;
+}
+
+static inline void vpfe_detach_irq(struct vpfe_device *vpfe)
+{
+	unsigned int intr = VPFE_VDINT0;
+	enum ccdc_frmfmt frame_format;
+
+	frame_format = vpfe_ccdc_get_frame_format(&vpfe->ccdc);
+	if (frame_format == CCDC_FRMFMT_PROGRESSIVE)
+		intr |= VPFE_VDINT1;
+
+	vpfe_reg_write(&vpfe->ccdc, intr, VPFE_IRQ_EN_CLR);
+}
+
+static inline void vpfe_attach_irq(struct vpfe_device *vpfe)
+{
+	unsigned int intr = VPFE_VDINT0;
+	enum ccdc_frmfmt frame_format;
+
+	frame_format = vpfe_ccdc_get_frame_format(&vpfe->ccdc);
+	if (frame_format == CCDC_FRMFMT_PROGRESSIVE)
+		intr |= VPFE_VDINT1;
+
+	vpfe_reg_write(&vpfe->ccdc, intr, VPFE_IRQ_EN_SET);
+}
+
+static int vpfe_querycap(struct file *file, void  *priv,
+			 struct v4l2_capability *cap)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+
+	vpfe_dbg(2, vpfe, "vpfe_querycap\n");
+
+	strlcpy(cap->driver, VPFE_MODULE_NAME, sizeof(cap->driver));
+	strlcpy(cap->card, "TI AM437x VPFE", sizeof(cap->card));
+	snprintf(cap->bus_info, sizeof(cap->bus_info),
+			"platform:%s", vpfe->v4l2_dev.name);
+	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING |
+			    V4L2_CAP_READWRITE;
+	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
+
+	return 0;
+}
+
+/* get the format set at output pad of the adjacent subdev */
+static int __vpfe_get_format(struct vpfe_device *vpfe,
+			     struct v4l2_format *format, unsigned int *bpp)
+{
+	struct v4l2_mbus_framefmt mbus_fmt;
+	struct vpfe_subdev_info *sdinfo;
+	struct v4l2_subdev_format fmt;
+	int ret;
+
+	sdinfo = vpfe->current_subdev;
+	if (!sdinfo->sd)
+		return -EINVAL;
+
+	fmt.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+	fmt.pad = 0;
+
+	ret = v4l2_subdev_call(sdinfo->sd, pad, get_fmt, NULL, &fmt);
+	if (ret && ret != -ENOIOCTLCMD && ret != -ENODEV)
+		return ret;
+
+	if (!ret) {
+		v4l2_fill_pix_format(&format->fmt.pix, &fmt.format);
+		mbus_to_pix(vpfe, &fmt.format, &format->fmt.pix, bpp);
+	} else {
+		ret = v4l2_device_call_until_err(&vpfe->v4l2_dev,
+						 sdinfo->grp_id,
+						 video, g_mbus_fmt,
+						 &mbus_fmt);
+		if (ret && ret != -ENOIOCTLCMD && ret != -ENODEV)
+			return ret;
+		v4l2_fill_pix_format(&format->fmt.pix, &mbus_fmt);
+		mbus_to_pix(vpfe, &mbus_fmt, &format->fmt.pix, bpp);
+	}
+
+	format->type = vpfe->fmt.type;
+
+	vpfe_dbg(1, vpfe,
+		 "%s size %dx%d (%s) bytesperline = %d, size = %d, bpp = %d\n",
+		 __func__, format->fmt.pix.width, format->fmt.pix.height,
+		 print_fourcc(format->fmt.pix.pixelformat),
+		 format->fmt.pix.bytesperline, format->fmt.pix.sizeimage, *bpp);
+
+	return 0;
+}
+
+/* set the format at output pad of the adjacent subdev */
+static int __vpfe_set_format(struct vpfe_device *vpfe,
+			     struct v4l2_format *format, unsigned int *bpp)
+{
+	struct v4l2_mbus_framefmt mbus_fmt;
+	struct vpfe_subdev_info *sdinfo;
+	struct v4l2_subdev_format fmt;
+	int ret;
+
+	vpfe_dbg(2, vpfe, "__vpfe_set_format\n");
+
+	sdinfo = vpfe->current_subdev;
+	if (!sdinfo->sd)
+		return -EINVAL;
+
+	fmt.which = V4L2_SUBDEV_FORMAT_ACTIVE;
+	fmt.pad = 0;
+
+	pix_to_mbus(vpfe, &format->fmt.pix, &fmt.format);
+
+	ret = v4l2_subdev_call(sdinfo->sd, pad, set_fmt, NULL, &fmt);
+	if (ret && ret != -ENOIOCTLCMD && ret != -ENODEV)
+		return ret;
+
+	if (!ret) {
+		v4l2_fill_pix_format(&format->fmt.pix, &fmt.format);
+		mbus_to_pix(vpfe, &fmt.format, &format->fmt.pix, bpp);
+	} else {
+		ret = v4l2_device_call_until_err(&vpfe->v4l2_dev,
+						 sdinfo->grp_id,
+						 video, s_mbus_fmt,
+						 &mbus_fmt);
+		if (ret && ret != -ENOIOCTLCMD && ret != -ENODEV)
+			return ret;
+
+		v4l2_fill_pix_format(&format->fmt.pix, &mbus_fmt);
+		mbus_to_pix(vpfe, &mbus_fmt, &format->fmt.pix, bpp);
+	}
+
+	format->type = vpfe->fmt.type;
+
+	vpfe_dbg(1, vpfe,
+		 "%s size %dx%d (%s) bytesperline = %d, size = %d, bpp = %d\n",
+		 __func__,  format->fmt.pix.width, format->fmt.pix.height,
+		 print_fourcc(format->fmt.pix.pixelformat),
+		 format->fmt.pix.bytesperline, format->fmt.pix.sizeimage, *bpp);
+
+	return 0;
+}
+
+static int vpfe_g_fmt(struct file *file, void *priv,
+		      struct v4l2_format *fmt)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+
+	vpfe_dbg(2, vpfe, "vpfe_g_fmt\n");
+
+	*fmt = vpfe->fmt;
+
+	return 0;
+}
+
+static int vpfe_enum_fmt(struct file *file, void  *priv,
+			 struct v4l2_fmtdesc *f)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+	struct vpfe_subdev_info *sdinfo;
+	struct vpfe_fmt *fmt = NULL;
+	unsigned int k;
+
+	vpfe_dbg(2, vpfe, "vpfe_enum_format index:%d\n",
+		f->index);
+
+	sdinfo = vpfe->current_subdev;
+	if (!sdinfo->sd)
+		return -EINVAL;
+
+	if (f->index > ARRAY_SIZE(formats))
+		return -EINVAL;
+
+	for (k = 0; k < ARRAY_SIZE(formats); k++) {
+		if (formats[k].index == f->index) {
+			fmt = &formats[k];
+			break;
+		}
+	}
+	if (!fmt)
+		return -EINVAL;
+
+	strncpy(f->description, fmt->name, sizeof(f->description) - 1);
+	f->pixelformat = fmt->fourcc;
+	f->type = vpfe->fmt.type;
+
+	vpfe_dbg(1, vpfe, "vpfe_enum_format: mbus index: %d code: %x pixelformat: %s [%s]\n",
+		f->index, fmt->code, print_fourcc(fmt->fourcc), fmt->name);
+
+	return 0;
+}
+
+static int vpfe_try_fmt(struct file *file, void *priv,
+			struct v4l2_format *fmt)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+	unsigned int bpp;
+
+	vpfe_dbg(2, vpfe, "vpfe_try_fmt\n");
+
+	return __vpfe_get_format(vpfe, fmt, &bpp);
+}
+
+static int vpfe_s_fmt(struct file *file, void *priv,
+		      struct v4l2_format *fmt)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+	struct v4l2_format format;
+	unsigned int bpp;
+	int ret;
+
+	vpfe_dbg(2, vpfe, "vpfe_s_fmt\n");
+
+	/* If streaming is started, return error */
+	if (vb2_is_busy(&vpfe->buffer_queue)) {
+		vpfe_err(vpfe, "%s device busy\n", __func__);
+		return -EBUSY;
+	}
+
+	ret = vpfe_try_fmt(file, priv, fmt);
+	if (ret)
+		return ret;
+
+
+	if (!cmp_v4l2_format(fmt, &format)) {
+		/* Sensor format is different from the requested format
+		 * so we need to change it
+		 */
+		ret = __vpfe_set_format(vpfe, fmt, &bpp);
+		if (ret)
+			return ret;
+	} else /* Just make sure all of the fields are consistent */
+		*fmt = format;
+
+	/* First detach any IRQ if currently attached */
+	vpfe_detach_irq(vpfe);
+	vpfe->fmt = *fmt;
+	vpfe->bpp = bpp;
+
+	/* Update the crop window based on found values */
+	vpfe->crop.width = fmt->fmt.pix.width;
+	vpfe->crop.height = fmt->fmt.pix.height;
+
+	/* set image capture parameters in the ccdc */
+	return vpfe_config_ccdc_image_format(vpfe);
+}
+
+static int vpfe_enum_size(struct file *file, void  *priv,
+			  struct v4l2_frmsizeenum *fsize)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+	struct v4l2_subdev_frame_size_enum fse;
+	struct vpfe_subdev_info *sdinfo;
+	struct v4l2_mbus_framefmt mbus;
+	struct v4l2_pix_format pix;
+	struct vpfe_fmt *fmt;
+	int ret;
+
+	vpfe_dbg(2, vpfe, "vpfe_enum_size\n");
+
+	/* check for valid format */
+	fmt = find_format_by_pix(fsize->pixel_format);
+	if (!fmt) {
+		vpfe_dbg(3, vpfe, "Invalid pixel code: %x, default used instead\n",
+			fsize->pixel_format);
+		return -EINVAL;
+	}
+
+	memset(fsize->reserved, 0x0, sizeof(fsize->reserved));
+
+	sdinfo = vpfe->current_subdev;
+	if (!sdinfo->sd)
+		return -EINVAL;
+
+	memset(&pix, 0x0, sizeof(pix));
+	/* Construct pix from parameter and use default for the rest */
+	pix.pixelformat = fsize->pixel_format;
+	pix.width = 640;
+	pix.height = 480;
+	pix.colorspace = V4L2_COLORSPACE_SRGB;
+	pix.field = V4L2_FIELD_NONE;
+	pix_to_mbus(vpfe, &pix, &mbus);
+
+	memset(&fse, 0x0, sizeof(fse));
+	fse.index = fsize->index;
+	fse.pad = 0;
+	fse.code = mbus.code;
+	ret = v4l2_subdev_call(sdinfo->sd, pad, enum_frame_size, NULL, &fse);
+	if (ret)
+		return -EINVAL;
+
+	vpfe_dbg(1, vpfe, "vpfe_enum_size: index: %d code: %x W:[%d,%d] H:[%d,%d]\n",
+		fse.index, fse.code, fse.min_width, fse.max_width,
+		fse.min_height, fse.max_height);
+
+	fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE;
+	fsize->discrete.width = fse.max_width;
+	fsize->discrete.height = fse.max_height;
+
+	vpfe_dbg(1, vpfe, "vpfe_enum_size: index: %d pixformat: %s size: %dx%d\n",
+		fsize->index, print_fourcc(fsize->pixel_format),
+		fsize->discrete.width, fsize->discrete.height);
+
+	return 0;
+}
+
+/*
+ * vpfe_get_subdev_input_index - Get subdev index and subdev input index for a
+ * given app input index
+ */
+static int
+vpfe_get_subdev_input_index(struct vpfe_device *vpfe,
+			    int *subdev_index,
+			    int *subdev_input_index,
+			    int app_input_index)
+{
+	struct vpfe_config *cfg = vpfe->cfg;
+	struct vpfe_subdev_info *sdinfo;
+	int i, j = 0;
+
+	for (i = 0; i < ARRAY_SIZE(vpfe->cfg->asd); i++) {
+		sdinfo = &cfg->sub_devs[i];
+		if (app_input_index < (j + 1)) {
+			*subdev_index = i;
+			*subdev_input_index = app_input_index - j;
+			return 0;
+		}
+		j++;
+	}
+	return -EINVAL;
+}
+
+/*
+ * vpfe_get_app_input - Get app input index for a given subdev input index
+ * driver stores the input index of the current sub device and translate it
+ * when application request the current input
+ */
+static int vpfe_get_app_input_index(struct vpfe_device *vpfe,
+				    int *app_input_index)
+{
+	struct vpfe_config *cfg = vpfe->cfg;
+	struct vpfe_subdev_info *sdinfo;
+	int i, j = 0;
+
+	for (i = 0; i < ARRAY_SIZE(vpfe->cfg->asd); i++) {
+		sdinfo = &cfg->sub_devs[i];
+		if (!strcmp(sdinfo->name, vpfe->current_subdev->name)) {
+			if (vpfe->current_input >= 1)
+				return -1;
+			*app_input_index = j + vpfe->current_input;
+			return 0;
+		}
+		j++;
+	}
+	return -EINVAL;
+}
+
+static int vpfe_enum_input(struct file *file, void *priv,
+			   struct v4l2_input *inp)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+	struct vpfe_subdev_info *sdinfo;
+	int subdev, index;
+
+	vpfe_dbg(2, vpfe, "vpfe_enum_input\n");
+
+	if (vpfe_get_subdev_input_index(vpfe, &subdev, &index,
+					inp->index) < 0) {
+		vpfe_dbg(1, vpfe,
+			"input information not found for the subdev\n");
+		return -EINVAL;
+	}
+	sdinfo = &vpfe->cfg->sub_devs[subdev];
+	*inp = sdinfo->inputs[index];
+
+	return 0;
+}
+
+static int vpfe_g_input(struct file *file, void *priv, unsigned int *index)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+
+	vpfe_dbg(2, vpfe, "vpfe_g_input\n");
+
+	return vpfe_get_app_input_index(vpfe, index);
+}
+
+/* Assumes caller is holding vpfe_dev->lock */
+static int vpfe_set_input(struct vpfe_device *vpfe, unsigned int index)
+{
+	int subdev_index = 0, inp_index = 0;
+	struct vpfe_subdev_info *sdinfo;
+	struct vpfe_route *route;
+	u32 input, output;
+	int ret;
+
+	vpfe_dbg(2, vpfe, "vpfe_set_input: index: %d\n", index);
+
+	/* If streaming is started, return error */
+	if (vb2_is_busy(&vpfe->buffer_queue)) {
+		vpfe_err(vpfe, "%s device busy\n", __func__);
+		return -EBUSY;
+	}
+	ret = vpfe_get_subdev_input_index(vpfe,
+					  &subdev_index,
+					  &inp_index,
+					  index);
+	if (ret < 0) {
+		vpfe_err(vpfe, "invalid input index: %d\n", index);
+		goto get_out;
+	}
+
+	sdinfo = &vpfe->cfg->sub_devs[subdev_index];
+	sdinfo->sd = vpfe->sd[subdev_index];
+	route = &sdinfo->routes[inp_index];
+	if (route && sdinfo->can_route) {
+		input = route->input;
+		output = route->output;
+		if (sdinfo->sd) {
+			ret = v4l2_subdev_call(sdinfo->sd, video,
+					s_routing, input, output, 0);
+			if (ret) {
+				vpfe_err(vpfe, "s_routing failed\n");
+				ret = -EINVAL;
+				goto get_out;
+			}
+		}
+
+	}
+
+	vpfe->current_subdev = sdinfo;
+	if (sdinfo->sd)
+		vpfe->v4l2_dev.ctrl_handler = sdinfo->sd->ctrl_handler;
+	vpfe->current_input = index;
+	vpfe->std_index = 0;
+
+	/* set the bus/interface parameter for the sub device in ccdc */
+	ret = vpfe_ccdc_set_hw_if_params(&vpfe->ccdc, &sdinfo->vpfe_param);
+	if (ret)
+		return ret;
+
+	/* set the default image parameters in the device */
+	return vpfe_config_image_format(vpfe,
+					vpfe_standards[vpfe->std_index].std_id);
+
+get_out:
+	return ret;
+}
+
+static int vpfe_s_input(struct file *file, void *priv, unsigned int index)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+
+	vpfe_dbg(2, vpfe,
+		"vpfe_s_input: index: %d\n", index);
+
+	return vpfe_set_input(vpfe, index);
+}
+
+static int vpfe_querystd(struct file *file, void *priv, v4l2_std_id *std_id)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+	struct vpfe_subdev_info *sdinfo;
+
+	vpfe_dbg(2, vpfe, "vpfe_querystd\n");
+
+	sdinfo = vpfe->current_subdev;
+	if (!(sdinfo->inputs[0].capabilities & V4L2_IN_CAP_STD))
+		return -ENODATA;
+
+	/* Call querystd function of decoder device */
+	return v4l2_device_call_until_err(&vpfe->v4l2_dev, sdinfo->grp_id,
+					 video, querystd, std_id);
+}
+
+static int vpfe_s_std(struct file *file, void *priv, v4l2_std_id std_id)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+	struct vpfe_subdev_info *sdinfo;
+	int ret;
+
+	vpfe_dbg(2, vpfe, "vpfe_s_std\n");
+
+	sdinfo = vpfe->current_subdev;
+	if (!(sdinfo->inputs[0].capabilities & V4L2_IN_CAP_STD))
+		return -ENODATA;
+
+	/* If streaming is started, return error */
+	if (vb2_is_busy(&vpfe->buffer_queue)) {
+		vpfe_err(vpfe, "%s device busy\n", __func__);
+		ret = -EBUSY;
+		return ret;
+	}
+
+	ret = v4l2_device_call_until_err(&vpfe->v4l2_dev, sdinfo->grp_id,
+					 video, s_std, std_id);
+	if (ret < 0) {
+		vpfe_err(vpfe, "Failed to set standard\n");
+		return ret;
+	}
+	ret = vpfe_config_image_format(vpfe, std_id);
+
+	return ret;
+}
+
+static int vpfe_g_std(struct file *file, void *priv, v4l2_std_id *std_id)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+	struct vpfe_subdev_info *sdinfo;
+
+	vpfe_dbg(2, vpfe, "vpfe_g_std\n");
+
+	sdinfo = vpfe->current_subdev;
+	if (sdinfo->inputs[0].capabilities != V4L2_IN_CAP_STD)
+		return -ENODATA;
+
+	*std_id = vpfe_standards[vpfe->std_index].std_id;
+
+	return 0;
+}
+
+/*
+ * vpfe_calculate_offsets : This function calculates buffers offset
+ * for top and bottom field
+ */
+static void vpfe_calculate_offsets(struct vpfe_device *vpfe)
+{
+	struct v4l2_rect image_win;
+
+	vpfe_dbg(2, vpfe, "vpfe_calculate_offsets\n");
+
+	vpfe_ccdc_get_image_window(&vpfe->ccdc, &image_win);
+	vpfe->field_off = image_win.height * image_win.width;
+}
+
+/*
+ * vpfe_queue_setup - Callback function for buffer setup.
+ * @vq: vb2_queue ptr
+ * @fmt: v4l2 format
+ * @nbuffers: ptr to number of buffers requested by application
+ * @nplanes:: contains number of distinct video planes needed to hold a frame
+ * @sizes[]: contains the size (in bytes) of each plane.
+ * @alloc_ctxs: ptr to allocation context
+ *
+ * This callback function is called when reqbuf() is called to adjust
+ * the buffer count and buffer size
+ */
+static int vpfe_queue_setup(struct vb2_queue *vq,
+			    const struct v4l2_format *fmt,
+			    unsigned int *nbuffers, unsigned int *nplanes,
+			    unsigned int sizes[], void *alloc_ctxs[])
+{
+	struct vpfe_device *vpfe = vb2_get_drv_priv(vq);
+
+	if (fmt && fmt->fmt.pix.sizeimage < vpfe->fmt.fmt.pix.sizeimage)
+		return -EINVAL;
+
+	if (vq->num_buffers + *nbuffers < 3)
+		*nbuffers = 3 - vq->num_buffers;
+
+	*nplanes = 1;
+	sizes[0] = fmt ? fmt->fmt.pix.sizeimage : vpfe->fmt.fmt.pix.sizeimage;
+	alloc_ctxs[0] = vpfe->alloc_ctx;
+
+	vpfe_dbg(1, vpfe,
+		"nbuffers=%d, size=%u\n", *nbuffers, sizes[0]);
+
+	/* Calculate field offset */
+	vpfe_calculate_offsets(vpfe);
+
+	return 0;
+}
+
+/*
+ * vpfe_buffer_prepare :  callback function for buffer prepare
+ * @vb: ptr to vb2_buffer
+ *
+ * This is the callback function for buffer prepare when vb2_qbuf()
+ * function is called. The buffer is prepared and user space virtual address
+ * or user address is converted into  physical address
+ */
+static int vpfe_buffer_prepare(struct vb2_buffer *vb)
+{
+	struct vpfe_device *vpfe = vb2_get_drv_priv(vb->vb2_queue);
+
+	vb2_set_plane_payload(vb, 0, vpfe->fmt.fmt.pix.sizeimage);
+
+	if (vb2_get_plane_payload(vb, 0) > vb2_plane_size(vb, 0))
+		return -EINVAL;
+
+	vb->v4l2_buf.field = vpfe->fmt.fmt.pix.field;
+
+	return 0;
+}
+
+/*
+ * vpfe_buffer_queue : Callback function to add buffer to DMA queue
+ * @vb: ptr to vb2_buffer
+ */
+static void vpfe_buffer_queue(struct vb2_buffer *vb)
+{
+	struct vpfe_device *vpfe = vb2_get_drv_priv(vb->vb2_queue);
+	struct vpfe_cap_buffer *buf = to_vpfe_buffer(vb);
+	unsigned long flags = 0;
+
+	/* add the buffer to the DMA queue */
+	spin_lock_irqsave(&vpfe->dma_queue_lock, flags);
+	list_add_tail(&buf->list, &vpfe->dma_queue);
+	spin_unlock_irqrestore(&vpfe->dma_queue_lock, flags);
+}
+
+/*
+ * vpfe_start_streaming : Starts the DMA engine for streaming
+ * @vb: ptr to vb2_buffer
+ * @count: number of buffers
+ */
+static int vpfe_start_streaming(struct vb2_queue *vq, unsigned int count)
+{
+	struct vpfe_device *vpfe = vb2_get_drv_priv(vq);
+	struct vpfe_cap_buffer *buf, *tmp;
+	struct vpfe_subdev_info *sdinfo;
+	unsigned long flags;
+	unsigned long addr;
+	int ret;
+
+	spin_lock_irqsave(&vpfe->dma_queue_lock, flags);
+
+	vpfe->field = 0;
+	vpfe->sequence = 0;
+
+	sdinfo = vpfe->current_subdev;
+
+	vpfe_attach_irq(vpfe);
+
+	if (vpfe->ccdc.ccdc_cfg.if_type == VPFE_RAW_BAYER)
+		vpfe_ccdc_config_raw(&vpfe->ccdc);
+	else
+		vpfe_ccdc_config_ycbcr(&vpfe->ccdc);
+
+	/* Get the next frame from the buffer queue */
+	vpfe->next_frm = list_entry(vpfe->dma_queue.next,
+				    struct vpfe_cap_buffer, list);
+	vpfe->cur_frm = vpfe->next_frm;
+	/* Remove buffer from the buffer queue */
+	list_del(&vpfe->cur_frm->list);
+	spin_unlock_irqrestore(&vpfe->dma_queue_lock, flags);
+
+	addr = vb2_dma_contig_plane_dma_addr(&vpfe->cur_frm->vb, 0);
+
+	vpfe_set_sdr_addr(&vpfe->ccdc, (unsigned long)(addr));
+
+	vpfe_pcr_enable(&vpfe->ccdc, 1);
+
+	ret = v4l2_subdev_call(sdinfo->sd, video, s_stream, 1);
+	if (ret < 0) {
+		vpfe_err(vpfe, "Error in attaching interrupt handle\n");
+		goto err;
+	}
+
+	return 0;
+
+err:
+	list_for_each_entry_safe(buf, tmp, &vpfe->dma_queue, list) {
+		list_del(&buf->list);
+		vb2_buffer_done(&buf->vb, VB2_BUF_STATE_QUEUED);
+	}
+	spin_unlock_irqrestore(&vpfe->dma_queue_lock, flags);
+
+	return ret;
+}
+
+/*
+ * vpfe_stop_streaming : Stop the DMA engine
+ * @vq: ptr to vb2_queue
+ *
+ * This callback stops the DMA engine and any remaining buffers
+ * in the DMA queue are released.
+ */
+static void vpfe_stop_streaming(struct vb2_queue *vq)
+{
+	struct vpfe_device *vpfe = vb2_get_drv_priv(vq);
+	struct vpfe_subdev_info *sdinfo;
+	unsigned long flags;
+	int ret;
+
+	vpfe_pcr_enable(&vpfe->ccdc, 0);
+
+	vpfe_detach_irq(vpfe);
+
+	sdinfo = vpfe->current_subdev;
+	ret = v4l2_subdev_call(sdinfo->sd, video, s_stream, 0);
+	if (ret && ret != -ENOIOCTLCMD && ret != -ENODEV)
+		vpfe_dbg(1, vpfe, "stream off failed in subdev\n");
+
+	/* release all active buffers */
+	spin_lock_irqsave(&vpfe->dma_queue_lock, flags);
+	if (vpfe->cur_frm == vpfe->next_frm) {
+		vb2_buffer_done(&vpfe->cur_frm->vb, VB2_BUF_STATE_ERROR);
+	} else {
+		if (vpfe->cur_frm != NULL)
+			vb2_buffer_done(&vpfe->cur_frm->vb,
+					VB2_BUF_STATE_ERROR);
+		if (vpfe->next_frm != NULL)
+			vb2_buffer_done(&vpfe->next_frm->vb,
+					VB2_BUF_STATE_ERROR);
+	}
+
+	while (!list_empty(&vpfe->dma_queue)) {
+		vpfe->next_frm = list_entry(vpfe->dma_queue.next,
+						struct vpfe_cap_buffer, list);
+		list_del(&vpfe->next_frm->list);
+		vb2_buffer_done(&vpfe->next_frm->vb, VB2_BUF_STATE_ERROR);
+	}
+	spin_unlock_irqrestore(&vpfe->dma_queue_lock, flags);
+}
+
+static int vpfe_cropcap(struct file *file, void *priv,
+			struct v4l2_cropcap *crop)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+
+	vpfe_dbg(2, vpfe, "vpfe_cropcap\n");
+
+	if (vpfe->std_index >= ARRAY_SIZE(vpfe_standards))
+		return -EINVAL;
+
+	memset(crop, 0, sizeof(struct v4l2_cropcap));
+
+	crop->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	crop->defrect.width = vpfe_standards[vpfe->std_index].width;
+	crop->bounds.width = crop->defrect.width;
+	crop->defrect.height = vpfe_standards[vpfe->std_index].height;
+	crop->bounds.height = crop->defrect.height;
+	crop->pixelaspect = vpfe_standards[vpfe->std_index].pixelaspect;
+
+	return 0;
+}
+
+static int
+vpfe_g_selection(struct file *file, void *fh, struct v4l2_selection *s)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+
+	switch (s->target) {
+	case V4L2_SEL_TGT_CROP_BOUNDS:
+	case V4L2_SEL_TGT_CROP_DEFAULT:
+		s->r.left = s->r.top = 0;
+		s->r.width = vpfe->crop.width;
+		s->r.height = vpfe->crop.height;
+		break;
+
+	case V4L2_SEL_TGT_CROP:
+		s->r = vpfe->crop;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int enclosed_rectangle(struct v4l2_rect *a, struct v4l2_rect *b)
+{
+	if (a->left < b->left || a->top < b->top)
+		return 0;
+
+	if (a->left + a->width > b->left + b->width)
+		return 0;
+
+	if (a->top + a->height > b->top + b->height)
+		return 0;
+
+	return 1;
+}
+
+static int
+vpfe_s_selection(struct file *file, void *fh, struct v4l2_selection *s)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+	struct v4l2_rect cr = vpfe->crop;
+	struct v4l2_rect r = s->r;
+
+	/* If streaming is started, return error */
+	if (vb2_is_busy(&vpfe->buffer_queue)) {
+		vpfe_err(vpfe, "%s device busy\n", __func__);
+		return -EBUSY;
+	}
+
+	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
+			s->target != V4L2_SEL_TGT_CROP)
+		return -EINVAL;
+
+	v4l_bound_align_image(&r.width, 0, cr.width, 0,
+			      &r.height, 0, cr.height, 0, 0);
+
+	r.left = clamp_t(unsigned int, r.left, 0, cr.width - r.width);
+	r.top  = clamp_t(unsigned int, r.top, 0, cr.height - r.height);
+
+	if (s->flags & V4L2_SEL_FLAG_LE && !enclosed_rectangle(&r, &s->r))
+		return -ERANGE;
+
+	if (s->flags & V4L2_SEL_FLAG_GE && !enclosed_rectangle(&s->r, &r))
+		return -ERANGE;
+
+	s->r = vpfe->crop = r;
+
+	vpfe_ccdc_set_image_window(&vpfe->ccdc, &r, vpfe->bpp);
+	vpfe->fmt.fmt.pix.width = r.width;
+	vpfe->fmt.fmt.pix.height = r.height;
+	vpfe->fmt.fmt.pix.bytesperline = vpfe_ccdc_get_line_length(&vpfe->ccdc);
+	vpfe->fmt.fmt.pix.sizeimage = vpfe->fmt.fmt.pix.bytesperline *
+						vpfe->fmt.fmt.pix.height;
+
+	vpfe_dbg(1, vpfe, "cropped (%d,%d)/%dx%d of %dx%d\n",
+		 r.left, r.top, r.width, r.height, cr.width, cr.height);
+
+	return 0;
+}
+
+static long vpfe_ioctl_default(struct file *file, void *priv,
+			       bool valid_prio, unsigned int cmd, void *param)
+{
+	struct vpfe_device *vpfe = video_drvdata(file);
+	int ret;
+
+	vpfe_dbg(2, vpfe, "vpfe_ioctl_default\n");
+
+	if (!valid_prio) {
+		vpfe_err(vpfe, "%s device busy\n", __func__);
+		return -EBUSY;
+	}
+
+	/* If streaming is started, return error */
+	if (vb2_is_busy(&vpfe->buffer_queue)) {
+		vpfe_err(vpfe, "%s device busy\n", __func__);
+		return -EBUSY;
+	}
+
+	switch (cmd) {
+	case VIDIOC_AM437X_CCDC_CFG:
+		ret = vpfe_ccdc_set_params(&vpfe->ccdc, param);
+		if (ret) {
+			vpfe_dbg(2, vpfe,
+				"Error setting parameters in CCDC\n");
+			return ret;
+		}
+		ret = vpfe_get_ccdc_image_format(vpfe,
+						 &vpfe->fmt);
+		if (ret < 0) {
+			vpfe_dbg(2, vpfe,
+				"Invalid image format at CCDC\n");
+			return ret;
+		}
+		break;
+
+	default:
+		ret = -ENOTTY;
+		break;
+	}
+
+	return ret;
+}
+
+static const struct vb2_ops vpfe_video_qops = {
+	.wait_prepare		= vb2_ops_wait_prepare,
+	.wait_finish		= vb2_ops_wait_finish,
+	.queue_setup		= vpfe_queue_setup,
+	.buf_prepare		= vpfe_buffer_prepare,
+	.buf_queue		= vpfe_buffer_queue,
+	.start_streaming	= vpfe_start_streaming,
+	.stop_streaming		= vpfe_stop_streaming,
+};
+
+/* vpfe capture driver file operations */
+static const struct v4l2_file_operations vpfe_fops = {
+	.owner		= THIS_MODULE,
+	.open		= vpfe_open,
+	.release	= vpfe_release,
+	.read		= vb2_fop_read,
+	.poll		= vb2_fop_poll,
+	.unlocked_ioctl	= video_ioctl2,
+	.mmap		= vb2_fop_mmap,
+};
+
+/* vpfe capture ioctl operations */
+static const struct v4l2_ioctl_ops vpfe_ioctl_ops = {
+	.vidioc_querycap		= vpfe_querycap,
+	.vidioc_enum_fmt_vid_cap	= vpfe_enum_fmt,
+	.vidioc_g_fmt_vid_cap		= vpfe_g_fmt,
+	.vidioc_s_fmt_vid_cap		= vpfe_s_fmt,
+	.vidioc_try_fmt_vid_cap		= vpfe_try_fmt,
+
+	.vidioc_enum_framesizes		= vpfe_enum_size,
+
+	.vidioc_enum_input		= vpfe_enum_input,
+	.vidioc_g_input			= vpfe_g_input,
+	.vidioc_s_input			= vpfe_s_input,
+
+	.vidioc_querystd		= vpfe_querystd,
+	.vidioc_s_std			= vpfe_s_std,
+	.vidioc_g_std			= vpfe_g_std,
+
+	.vidioc_reqbufs			= vb2_ioctl_reqbufs,
+	.vidioc_create_bufs		= vb2_ioctl_create_bufs,
+	.vidioc_prepare_buf		= vb2_ioctl_prepare_buf,
+	.vidioc_querybuf		= vb2_ioctl_querybuf,
+	.vidioc_qbuf			= vb2_ioctl_qbuf,
+	.vidioc_dqbuf			= vb2_ioctl_dqbuf,
+	.vidioc_expbuf			= vb2_ioctl_expbuf,
+	.vidioc_streamon		= vb2_ioctl_streamon,
+	.vidioc_streamoff		= vb2_ioctl_streamoff,
+
+	.vidioc_log_status		= v4l2_ctrl_log_status,
+	.vidioc_subscribe_event		= v4l2_ctrl_subscribe_event,
+	.vidioc_unsubscribe_event	= v4l2_event_unsubscribe,
+
+	.vidioc_cropcap			= vpfe_cropcap,
+	.vidioc_g_selection		= vpfe_g_selection,
+	.vidioc_s_selection		= vpfe_s_selection,
+
+	.vidioc_default			= vpfe_ioctl_default,
+};
+
+static int
+vpfe_async_bound(struct v4l2_async_notifier *notifier,
+		 struct v4l2_subdev *subdev,
+		 struct v4l2_async_subdev *asd)
+{
+	struct vpfe_device *vpfe = container_of(notifier->v4l2_dev,
+					       struct vpfe_device, v4l2_dev);
+	struct v4l2_subdev_mbus_code_enum mbus_code;
+	struct vpfe_subdev_info *sdinfo;
+	bool found = false;
+	int i, j;
+
+	vpfe_dbg(1, vpfe, "vpfe_async_bound\n");
+
+	for (i = 0; i < ARRAY_SIZE(vpfe->cfg->asd); i++) {
+		sdinfo = &vpfe->cfg->sub_devs[i];
+
+		if (!strcmp(sdinfo->name, subdev->name)) {
+			vpfe->sd[i] = subdev;
+			vpfe_info(vpfe,
+				 "v4l2 sub device %s registered\n",
+				 subdev->name);
+			vpfe->sd[i]->grp_id =
+					sdinfo->grp_id;
+			/* update tvnorms from the sub devices */
+			for (j = 0; j < 1; j++)
+				vpfe->video_dev->tvnorms |=
+					sdinfo->inputs[j].std;
+
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		vpfe_info(vpfe, "sub device (%s) not matched\n", subdev->name);
+		return -EINVAL;
+	}
+
+	/* setup the supported formats & indexes */
+	for (j = 0, i = 0; ; ++j) {
+		struct vpfe_fmt *fmt;
+		int ret;
+
+		memset(&mbus_code, 0, sizeof(mbus_code));
+		mbus_code.index = j;
+		ret = v4l2_subdev_call(subdev, pad, enum_mbus_code,
+			       NULL, &mbus_code);
+		if (ret)
+			break;
+
+		fmt = find_format_by_code(mbus_code.code);
+		if (!fmt)
+			continue;
+
+		fmt->supported = true;
+		fmt->index = i++;
+	}
+
+	return 0;
+}
+
+static int vpfe_probe_complete(struct vpfe_device *vpfe)
+{
+	struct video_device *vdev;
+	struct vb2_queue *q;
+	int err;
+
+	spin_lock_init(&vpfe->dma_queue_lock);
+	mutex_init(&vpfe->lock);
+
+	vpfe->fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	/* set first sub device as current one */
+	vpfe->current_subdev = &vpfe->cfg->sub_devs[0];
+	vpfe->v4l2_dev.ctrl_handler = vpfe->sd[0]->ctrl_handler;
+
+	err = vpfe_set_input(vpfe, 0);
+	if (err)
+		goto probe_out;
+
+	/* Initialize videobuf2 queue as per the buffer type */
+	vpfe->alloc_ctx = vb2_dma_contig_init_ctx(vpfe->pdev);
+	if (IS_ERR(vpfe->alloc_ctx)) {
+		vpfe_err(vpfe, "Failed to get the context\n");
+		err = PTR_ERR(vpfe->alloc_ctx);
+		goto probe_out;
+	}
+
+	q = &vpfe->buffer_queue;
+	q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	q->io_modes = VB2_MMAP | VB2_DMABUF | VB2_READ;
+	q->drv_priv = vpfe;
+	q->ops = &vpfe_video_qops;
+	q->mem_ops = &vb2_dma_contig_memops;
+	q->buf_struct_size = sizeof(struct vpfe_cap_buffer);
+	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+	q->lock = &vpfe->lock;
+	q->min_buffers_needed = 1;
+
+	err = vb2_queue_init(q);
+	if (err) {
+		vpfe_err(vpfe, "vb2_queue_init() failed\n");
+		vb2_dma_contig_cleanup_ctx(vpfe->alloc_ctx);
+		goto probe_out;
+	}
+
+	INIT_LIST_HEAD(&vpfe->dma_queue);
+
+	vdev = vpfe->video_dev;
+	strlcpy(vdev->name, VPFE_MODULE_NAME, sizeof(vdev->name));
+	vdev->release = video_device_release;
+	vdev->fops = &vpfe_fops;
+	vdev->ioctl_ops = &vpfe_ioctl_ops;
+	vdev->v4l2_dev = &vpfe->v4l2_dev;
+	vdev->vfl_dir = VFL_DIR_RX;
+	vdev->queue = q;
+	vdev->lock = &vpfe->lock;
+	video_set_drvdata(vdev, vpfe);
+	err = video_register_device(vpfe->video_dev, VFL_TYPE_GRABBER, -1);
+	if (err) {
+		vpfe_err(vpfe,
+			"Unable to register video device.\n");
+		goto probe_out;
+	}
+
+	return 0;
+
+probe_out:
+	v4l2_device_unregister(&vpfe->v4l2_dev);
+	return err;
+}
+
+static int vpfe_async_complete(struct v4l2_async_notifier *notifier)
+{
+	struct vpfe_device *vpfe = container_of(notifier->v4l2_dev,
+					struct vpfe_device, v4l2_dev);
+
+	return vpfe_probe_complete(vpfe);
+}
+
+static struct vpfe_config *
+vpfe_get_pdata(struct platform_device *pdev)
+{
+	struct device_node *endpoint = NULL, *rem = NULL;
+	struct v4l2_of_endpoint bus_cfg;
+	struct vpfe_subdev_info *sdinfo;
+	struct vpfe_config *pdata;
+	unsigned int flags;
+	unsigned int i;
+	int err;
+
+	dev_dbg(&pdev->dev, "vpfe_get_pdata\n");
+
+	if (!IS_ENABLED(CONFIG_OF) || !pdev->dev.of_node)
+		return pdev->dev.platform_data;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return NULL;
+
+	for (i = 0; ; i++) {
+		endpoint = of_graph_get_next_endpoint(pdev->dev.of_node,
+						      endpoint);
+		if (!endpoint)
+			break;
+
+		sdinfo = &pdata->sub_devs[i];
+		sdinfo->grp_id = 0;
+
+		/* we only support camera */
+		sdinfo->inputs[0].index = i;
+		strcpy(sdinfo->inputs[0].name, "Camera");
+		sdinfo->inputs[0].type = V4L2_INPUT_TYPE_CAMERA;
+		sdinfo->inputs[0].std = V4L2_STD_ALL;
+		sdinfo->inputs[0].capabilities = V4L2_IN_CAP_STD;
+
+		sdinfo->can_route = 0;
+		sdinfo->routes = NULL;
+
+		of_property_read_u32(endpoint, "ti,am437x-vpfe-interface",
+				     &sdinfo->vpfe_param.if_type);
+		if (sdinfo->vpfe_param.if_type < 0 ||
+			sdinfo->vpfe_param.if_type > 4) {
+			sdinfo->vpfe_param.if_type = VPFE_RAW_BAYER;
+		}
+
+		err = v4l2_of_parse_endpoint(endpoint, &bus_cfg);
+		if (err) {
+			dev_err(&pdev->dev, "Could not parse the endpoint\n");
+			goto done;
+		}
+
+		sdinfo->vpfe_param.bus_width = bus_cfg.bus.parallel.bus_width;
+
+		if (sdinfo->vpfe_param.bus_width < 8 ||
+			sdinfo->vpfe_param.bus_width > 16) {
+			dev_err(&pdev->dev, "Invalid bus width.\n");
+			goto done;
+		}
+
+		flags = bus_cfg.bus.parallel.flags;
+
+		if (flags & V4L2_MBUS_HSYNC_ACTIVE_HIGH)
+			sdinfo->vpfe_param.hdpol = 1;
+
+		if (flags & V4L2_MBUS_VSYNC_ACTIVE_HIGH)
+			sdinfo->vpfe_param.vdpol = 1;
+
+		rem = of_graph_get_remote_port_parent(endpoint);
+		if (!rem) {
+			dev_err(&pdev->dev, "Remote device at %s not found\n",
+				endpoint->full_name);
+			goto done;
+		}
+
+		strncpy(sdinfo->name, rem->name, sizeof(sdinfo->name));
+
+		pdata->asd[i] = devm_kzalloc(&pdev->dev,
+					     sizeof(struct v4l2_async_subdev),
+					     GFP_KERNEL);
+		pdata->asd[i]->match_type = V4L2_ASYNC_MATCH_OF;
+		pdata->asd[i]->match.of.node = rem;
+		of_node_put(endpoint);
+		of_node_put(rem);
+	}
+
+	of_node_put(endpoint);
+	return pdata;
+
+done:
+	of_node_put(endpoint);
+	of_node_put(rem);
+	return NULL;
+}
+
+/*
+ * vpfe_probe : This function creates device entries by register
+ * itself to the V4L2 driver and initializes fields of each
+ * device objects
+ */
+static int vpfe_probe(struct platform_device *pdev)
+{
+	struct vpfe_config *vpfe_cfg = vpfe_get_pdata(pdev);
+	struct vpfe_device *vpfe;
+	struct vpfe_ccdc *ccdc;
+	struct resource	*res;
+	int ret;
+
+	if (!vpfe_cfg) {
+		dev_err(&pdev->dev, "No platform data\n");
+		return -EINVAL;
+	}
+
+	vpfe = devm_kzalloc(&pdev->dev, sizeof(*vpfe), GFP_KERNEL);
+	if (!vpfe)
+		return -ENOMEM;
+
+	vpfe->pdev = &pdev->dev;
+	vpfe->cfg = vpfe_cfg;
+	ccdc = &vpfe->ccdc;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	ccdc->ccdc_cfg.base_addr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ccdc->ccdc_cfg.base_addr))
+		return PTR_ERR(ccdc->ccdc_cfg.base_addr);
+
+	vpfe->irq = platform_get_irq(pdev, 0);
+	if (vpfe->irq <= 0) {
+		dev_err(&pdev->dev, "No IRQ resource\n");
+		return -ENODEV;
+	}
+
+	ret = devm_request_irq(vpfe->pdev, vpfe->irq, vpfe_isr, 0,
+			       "vpfe_capture0", vpfe);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to request interrupt\n");
+		return -EINVAL;
+	}
+
+	vpfe->video_dev = video_device_alloc();
+	if (!vpfe->video_dev) {
+		dev_err(&pdev->dev, "Unable to allocate video device\n");
+		return -ENOMEM;
+	}
+
+	ret = v4l2_device_register(&pdev->dev, &vpfe->v4l2_dev);
+	if (ret) {
+		vpfe_err(vpfe,
+			"Unable to register v4l2 device.\n");
+		goto probe_out_video_release;
+	}
+
+	/* set the driver data in platform device */
+	platform_set_drvdata(pdev, vpfe);
+	/* Enabling module functional clock */
+	pm_runtime_enable(&pdev->dev);
+
+	/* for now just enable it here instead of waiting for the open */
+	pm_runtime_get_sync(&pdev->dev);
+
+	vpfe_ccdc_config_defaults(ccdc);
+
+	pm_runtime_put_sync(&pdev->dev);
+
+	vpfe->sd = devm_kzalloc(&pdev->dev, sizeof(struct v4l2_subdev *) *
+				ARRAY_SIZE(vpfe->cfg->asd), GFP_KERNEL);
+	if (!vpfe->sd) {
+		ret = -ENOMEM;
+		goto probe_out_v4l2_unregister;
+	}
+
+	vpfe->notifier.subdevs = vpfe->cfg->asd;
+	vpfe->notifier.num_subdevs = ARRAY_SIZE(vpfe->cfg->asd);
+	vpfe->notifier.bound = vpfe_async_bound;
+	vpfe->notifier.complete = vpfe_async_complete;
+	ret = v4l2_async_notifier_register(&vpfe->v4l2_dev,
+						&vpfe->notifier);
+	if (ret) {
+		vpfe_err(vpfe, "Error registering async notifier\n");
+		ret = -EINVAL;
+		goto probe_out_v4l2_unregister;
+	}
+
+	return 0;
+
+probe_out_v4l2_unregister:
+	v4l2_device_unregister(&vpfe->v4l2_dev);
+probe_out_video_release:
+	if (!video_is_registered(vpfe->video_dev))
+		video_device_release(vpfe->video_dev);
+	return ret;
+}
+
+/*
+ * vpfe_remove : It un-register device from V4L2 driver
+ */
+static int vpfe_remove(struct platform_device *pdev)
+{
+	struct vpfe_device *vpfe = platform_get_drvdata(pdev);
+
+	vpfe_dbg(2, vpfe, "vpfe_remove\n");
+
+	pm_runtime_disable(&pdev->dev);
+
+	v4l2_async_notifier_unregister(&vpfe->notifier);
+	v4l2_device_unregister(&vpfe->v4l2_dev);
+	video_unregister_device(vpfe->video_dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+static void vpfe_save_context(struct vpfe_ccdc *ccdc)
+{
+	ccdc->ccdc_ctx[VPFE_PCR >> 2] = vpfe_reg_read(ccdc, VPFE_PCR);
+	ccdc->ccdc_ctx[VPFE_SYNMODE >> 2] = vpfe_reg_read(ccdc, VPFE_SYNMODE);
+	ccdc->ccdc_ctx[VPFE_SDOFST >> 2] = vpfe_reg_read(ccdc, VPFE_SDOFST);
+	ccdc->ccdc_ctx[VPFE_SDR_ADDR >> 2] = vpfe_reg_read(ccdc, VPFE_SDR_ADDR);
+	ccdc->ccdc_ctx[VPFE_CLAMP >> 2] = vpfe_reg_read(ccdc, VPFE_CLAMP);
+	ccdc->ccdc_ctx[VPFE_DCSUB >> 2] = vpfe_reg_read(ccdc, VPFE_DCSUB);
+	ccdc->ccdc_ctx[VPFE_COLPTN >> 2] = vpfe_reg_read(ccdc, VPFE_COLPTN);
+	ccdc->ccdc_ctx[VPFE_BLKCMP >> 2] = vpfe_reg_read(ccdc, VPFE_BLKCMP);
+	ccdc->ccdc_ctx[VPFE_VDINT >> 2] = vpfe_reg_read(ccdc, VPFE_VDINT);
+	ccdc->ccdc_ctx[VPFE_ALAW >> 2] = vpfe_reg_read(ccdc, VPFE_ALAW);
+	ccdc->ccdc_ctx[VPFE_REC656IF >> 2] = vpfe_reg_read(ccdc, VPFE_REC656IF);
+	ccdc->ccdc_ctx[VPFE_CCDCFG >> 2] = vpfe_reg_read(ccdc, VPFE_CCDCFG);
+	ccdc->ccdc_ctx[VPFE_CULLING >> 2] = vpfe_reg_read(ccdc, VPFE_CULLING);
+	ccdc->ccdc_ctx[VPFE_HD_VD_WID >> 2] = vpfe_reg_read(ccdc,
+							    VPFE_HD_VD_WID);
+	ccdc->ccdc_ctx[VPFE_PIX_LINES >> 2] = vpfe_reg_read(ccdc,
+							    VPFE_PIX_LINES);
+	ccdc->ccdc_ctx[VPFE_HORZ_INFO >> 2] = vpfe_reg_read(ccdc,
+							    VPFE_HORZ_INFO);
+	ccdc->ccdc_ctx[VPFE_VERT_START >> 2] = vpfe_reg_read(ccdc,
+							     VPFE_VERT_START);
+	ccdc->ccdc_ctx[VPFE_VERT_LINES >> 2] = vpfe_reg_read(ccdc,
+							     VPFE_VERT_LINES);
+	ccdc->ccdc_ctx[VPFE_HSIZE_OFF >> 2] = vpfe_reg_read(ccdc,
+							    VPFE_HSIZE_OFF);
+}
+
+static int vpfe_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct vpfe_device *vpfe = platform_get_drvdata(pdev);
+	struct vpfe_ccdc *ccdc = &vpfe->ccdc;
+
+	/* if streaming has not started we don't care */
+	if (!vb2_start_streaming_called(&vpfe->buffer_queue))
+		return 0;
+
+	pm_runtime_get_sync(dev);
+	vpfe_config_enable(ccdc, 1);
+
+	/* Save VPFE context */
+	vpfe_save_context(ccdc);
+
+	/* Disable CCDC */
+	vpfe_pcr_enable(ccdc, 0);
+	vpfe_config_enable(ccdc, 0);
+
+	/* Disable both master and slave clock */
+	pm_runtime_put_sync(dev);
+
+	/* Select sleep pin state */
+	pinctrl_pm_select_sleep_state(dev);
+
+	return 0;
+}
+
+static void vpfe_restore_context(struct vpfe_ccdc *ccdc)
+{
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_SYNMODE >> 2], VPFE_SYNMODE);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_CULLING >> 2], VPFE_CULLING);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_SDOFST >> 2], VPFE_SDOFST);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_SDR_ADDR >> 2], VPFE_SDR_ADDR);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_CLAMP >> 2], VPFE_CLAMP);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_DCSUB >> 2], VPFE_DCSUB);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_COLPTN >> 2], VPFE_COLPTN);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_BLKCMP >> 2], VPFE_BLKCMP);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_VDINT >> 2], VPFE_VDINT);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_ALAW >> 2], VPFE_ALAW);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_REC656IF >> 2], VPFE_REC656IF);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_CCDCFG >> 2], VPFE_CCDCFG);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_PCR >> 2], VPFE_PCR);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_HD_VD_WID >> 2],
+						VPFE_HD_VD_WID);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_PIX_LINES >> 2],
+						VPFE_PIX_LINES);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_HORZ_INFO >> 2],
+						VPFE_HORZ_INFO);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_VERT_START >> 2],
+						VPFE_VERT_START);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_VERT_LINES >> 2],
+						VPFE_VERT_LINES);
+	vpfe_reg_write(ccdc, ccdc->ccdc_ctx[VPFE_HSIZE_OFF >> 2],
+						VPFE_HSIZE_OFF);
+}
+
+static int vpfe_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct vpfe_device *vpfe = platform_get_drvdata(pdev);
+	struct vpfe_ccdc *ccdc = &vpfe->ccdc;
+
+	/* if streaming has not started we don't care */
+	if (!vb2_start_streaming_called(&vpfe->buffer_queue))
+		return 0;
+
+	/* Enable both master and slave clock */
+	pm_runtime_get_sync(dev);
+	vpfe_config_enable(ccdc, 1);
+
+	/* Restore VPFE context */
+	vpfe_restore_context(ccdc);
+
+	vpfe_config_enable(ccdc, 0);
+	pm_runtime_put_sync(dev);
+
+	/* Select default pin state */
+	pinctrl_pm_select_default_state(dev);
+
+	return 0;
+}
+
+#endif
+
+static SIMPLE_DEV_PM_OPS(vpfe_pm_ops, vpfe_suspend, vpfe_resume);
+
+static const struct of_device_id vpfe_of_match[] = {
+	{ .compatible = "ti,am437x-vpfe", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, vpfe_of_match);
+
+static struct platform_driver vpfe_driver = {
+	.probe		= vpfe_probe,
+	.remove		= vpfe_remove,
+	.driver = {
+		.name	= VPFE_MODULE_NAME,
+		.owner	= THIS_MODULE,
+		.pm	= &vpfe_pm_ops,
+		.of_match_table = of_match_ptr(vpfe_of_match),
+	},
+};
+
+module_platform_driver(vpfe_driver);
+
+MODULE_AUTHOR("Texas Instruments");
+MODULE_DESCRIPTION("TI AM437x VPFE driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(VPFE_VERSION);
diff --git a/drivers/media/platform/am437x/am437x-vpfe.h b/drivers/media/platform/am437x/am437x-vpfe.h
new file mode 100644
index 000000000000..0f557352313d
--- /dev/null
+++ b/drivers/media/platform/am437x/am437x-vpfe.h
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) 2013 - 2014 Texas Instruments, Inc.
+ *
+ * Benoit Parrot <bparrot@ti.com>
+ * Lad, Prabhakar <prabhakar.csengg@gmail.com>
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef AM437X_VPFE_H
+#define AM437X_VPFE_H
+
+#include <linux/am437x-vpfe.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/i2c.h>
+#include <linux/videodev2.h>
+
+#include <media/v4l2-dev.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-ioctl.h>
+#include <media/videobuf2-dma-contig.h>
+
+#include "am437x-vpfe_regs.h"
+
+enum vpfe_pin_pol {
+	VPFE_PINPOL_POSITIVE = 0,
+	VPFE_PINPOL_NEGATIVE,
+};
+
+enum vpfe_hw_if_type {
+	/* Raw Bayer */
+	VPFE_RAW_BAYER = 0,
+	/* BT656 - 8 bit */
+	VPFE_BT656,
+	/* BT656 - 10 bit */
+	VPFE_BT656_10BIT,
+	/* YCbCr - 8 bit with external sync */
+	VPFE_YCBCR_SYNC_8,
+	/* YCbCr - 16 bit with external sync */
+	VPFE_YCBCR_SYNC_16,
+};
+
+/* interface description */
+struct vpfe_hw_if_param {
+	enum vpfe_hw_if_type if_type;
+	enum vpfe_pin_pol hdpol;
+	enum vpfe_pin_pol vdpol;
+	unsigned int bus_width;
+};
+
+#define VPFE_MAX_SUBDEV		1
+#define VPFE_MAX_INPUTS		1
+
+struct vpfe_pixel_format {
+	struct v4l2_fmtdesc fmtdesc;
+	/* bytes per pixel */
+	int bpp;
+};
+
+struct vpfe_std_info {
+	int active_pixels;
+	int active_lines;
+	/* current frame format */
+	int frame_format;
+};
+
+struct vpfe_route {
+	u32 input;
+	u32 output;
+};
+
+struct vpfe_subdev_info {
+	 char name[32];
+	/* Sub device group id */
+	int grp_id;
+	/* inputs available at the sub device */
+	struct v4l2_input inputs[VPFE_MAX_INPUTS];
+	/* Sub dev routing information for each input */
+	struct vpfe_route *routes;
+	/* check if sub dev supports routing */
+	int can_route;
+	/* ccdc bus/interface configuration */
+	struct vpfe_hw_if_param vpfe_param;
+	struct v4l2_subdev *sd;
+};
+
+struct vpfe_config {
+	/* information about each subdev */
+	struct vpfe_subdev_info sub_devs[VPFE_MAX_SUBDEV];
+	/* Flat array, arranged in groups */
+	struct v4l2_async_subdev *asd[VPFE_MAX_SUBDEV];
+};
+
+struct vpfe_cap_buffer {
+	struct vb2_buffer vb;
+	struct list_head list;
+};
+
+enum ccdc_pixfmt {
+	CCDC_PIXFMT_RAW = 0,
+	CCDC_PIXFMT_YCBCR_16BIT,
+	CCDC_PIXFMT_YCBCR_8BIT,
+};
+
+enum ccdc_frmfmt {
+	CCDC_FRMFMT_PROGRESSIVE = 0,
+	CCDC_FRMFMT_INTERLACED,
+};
+
+/* PIXEL ORDER IN MEMORY from LSB to MSB */
+/* only applicable for 8-bit input mode  */
+enum ccdc_pixorder {
+	CCDC_PIXORDER_YCBYCR,
+	CCDC_PIXORDER_CBYCRY,
+};
+
+enum ccdc_buftype {
+	CCDC_BUFTYPE_FLD_INTERLEAVED,
+	CCDC_BUFTYPE_FLD_SEPARATED
+};
+
+
+/* returns the highest bit used for the gamma */
+static inline u8 ccdc_gamma_width_max_bit(enum vpfe_ccdc_gamma_width width)
+{
+	return 15 - width;
+}
+
+/* returns the highest bit used for this data size */
+static inline u8 ccdc_data_size_max_bit(enum vpfe_ccdc_data_size sz)
+{
+	return sz == VPFE_CCDC_DATA_8BITS ? 7 : 15 - sz;
+}
+
+/* Structure for CCDC configuration parameters for raw capture mode */
+struct ccdc_params_raw {
+	/* pixel format */
+	enum ccdc_pixfmt pix_fmt;
+	/* progressive or interlaced frame */
+	enum ccdc_frmfmt frm_fmt;
+	struct v4l2_rect win;
+	/* Current Format Bytes Per Pixels */
+	unsigned int bytesperpixel;
+	/* Current Format Bytes per Lines
+	 * (Aligned to 32 bytes) used for HORZ_INFO
+	 */
+	unsigned int bytesperline;
+	/* field id polarity */
+	enum vpfe_pin_pol fid_pol;
+	/* vertical sync polarity */
+	enum vpfe_pin_pol vd_pol;
+	/* horizontal sync polarity */
+	enum vpfe_pin_pol hd_pol;
+	/* interleaved or separated fields */
+	enum ccdc_buftype buf_type;
+	/*
+	 * enable to store the image in inverse
+	 * order in memory(bottom to top)
+	 */
+	unsigned char image_invert_enable;
+	/* configurable parameters */
+	struct vpfe_ccdc_config_params_raw config_params;
+};
+
+struct ccdc_params_ycbcr {
+	/* pixel format */
+	enum ccdc_pixfmt pix_fmt;
+	/* progressive or interlaced frame */
+	enum ccdc_frmfmt frm_fmt;
+	struct v4l2_rect win;
+	/* Current Format Bytes Per Pixels */
+	unsigned int bytesperpixel;
+	/* Current Format Bytes per Lines
+	 * (Aligned to 32 bytes) used for HORZ_INFO
+	 */
+	unsigned int bytesperline;
+	/* field id polarity */
+	enum vpfe_pin_pol fid_pol;
+	/* vertical sync polarity */
+	enum vpfe_pin_pol vd_pol;
+	/* horizontal sync polarity */
+	enum vpfe_pin_pol hd_pol;
+	/* enable BT.656 embedded sync mode */
+	int bt656_enable;
+	/* cb:y:cr:y or y:cb:y:cr in memory */
+	enum ccdc_pixorder pix_order;
+	/* interleaved or separated fields  */
+	enum ccdc_buftype buf_type;
+};
+
+/*
+ * CCDC operational configuration
+ */
+struct ccdc_config {
+	/* CCDC interface type */
+	enum vpfe_hw_if_type if_type;
+	/* Raw Bayer configuration */
+	struct ccdc_params_raw bayer;
+	/* YCbCr configuration */
+	struct ccdc_params_ycbcr ycbcr;
+	/* ccdc base address */
+	void __iomem *base_addr;
+};
+
+struct vpfe_ccdc {
+	struct ccdc_config ccdc_cfg;
+	u32 ccdc_ctx[VPFE_REG_END / sizeof(u32)];
+};
+
+struct vpfe_device {
+	/* V4l2 specific parameters */
+	/* Identifies video device for this channel */
+	struct video_device *video_dev;
+	/* sub devices */
+	struct v4l2_subdev **sd;
+	/* vpfe cfg */
+	struct vpfe_config *cfg;
+	/* V4l2 device */
+	struct v4l2_device v4l2_dev;
+	/* parent device */
+	struct device *pdev;
+	/* subdevice async Notifier */
+	struct v4l2_async_notifier notifier;
+	/* Indicates id of the field which is being displayed */
+	unsigned field;
+	unsigned sequence;
+	/* current interface type */
+	struct vpfe_hw_if_param vpfe_if_params;
+	/* ptr to currently selected sub device */
+	struct vpfe_subdev_info *current_subdev;
+	/* current input at the sub device */
+	int current_input;
+	/* Keeps track of the information about the standard */
+	struct vpfe_std_info std_info;
+	/* std index into std table */
+	int std_index;
+	/* IRQs used when CCDC output to SDRAM */
+	unsigned int irq;
+	/* Pointer pointing to current v4l2_buffer */
+	struct vpfe_cap_buffer *cur_frm;
+	/* Pointer pointing to next v4l2_buffer */
+	struct vpfe_cap_buffer *next_frm;
+	/* Used to store pixel format */
+	struct v4l2_format fmt;
+	/* Used to store current bytes per pixel based on current format */
+	unsigned int bpp;
+	/*
+	 * used when IMP is chained to store the crop window which
+	 * is different from the image window
+	 */
+	struct v4l2_rect crop;
+	/* Buffer queue used in video-buf */
+	struct vb2_queue buffer_queue;
+	/* Allocator-specific contexts for each plane */
+	struct vb2_alloc_ctx *alloc_ctx;
+	/* Queue of filled frames */
+	struct list_head dma_queue;
+	/* IRQ lock for DMA queue */
+	spinlock_t dma_queue_lock;
+	/* lock used to access this structure */
+	struct mutex lock;
+	/*
+	 * offset where second field starts from the starting of the
+	 * buffer for field separated YCbCr formats
+	 */
+	u32 field_off;
+	struct vpfe_ccdc ccdc;
+};
+
+#endif	/* AM437X_VPFE_H */
diff --git a/drivers/media/platform/am437x/am437x-vpfe_regs.h b/drivers/media/platform/am437x/am437x-vpfe_regs.h
new file mode 100644
index 000000000000..4a0ed29723e8
--- /dev/null
+++ b/drivers/media/platform/am437x/am437x-vpfe_regs.h
@@ -0,0 +1,140 @@
+/*
+ * TI AM437x Image Sensor Interface Registers
+ *
+ * Copyright (C) 2013 - 2014 Texas Instruments, Inc.
+ *
+ * Benoit Parrot <bparrot@ti.com>
+ * Lad, Prabhakar <prabhakar.csengg@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef AM437X_VPFE_REGS_H
+#define AM437X_VPFE_REGS_H
+
+/* VPFE module register offset */
+#define VPFE_REVISION				0x0
+#define VPFE_PCR				0x4
+#define VPFE_SYNMODE				0x8
+#define VPFE_HD_VD_WID				0xc
+#define VPFE_PIX_LINES				0x10
+#define VPFE_HORZ_INFO				0x14
+#define VPFE_VERT_START				0x18
+#define VPFE_VERT_LINES				0x1c
+#define VPFE_CULLING				0x20
+#define VPFE_HSIZE_OFF				0x24
+#define VPFE_SDOFST				0x28
+#define VPFE_SDR_ADDR				0x2c
+#define VPFE_CLAMP				0x30
+#define VPFE_DCSUB				0x34
+#define VPFE_COLPTN				0x38
+#define VPFE_BLKCMP				0x3c
+#define VPFE_VDINT				0x48
+#define VPFE_ALAW				0x4c
+#define VPFE_REC656IF				0x50
+#define VPFE_CCDCFG				0x54
+#define VPFE_DMA_CNTL				0x98
+#define VPFE_SYSCONFIG				0x104
+#define VPFE_CONFIG				0x108
+#define VPFE_IRQ_EOI				0x110
+#define VPFE_IRQ_STS_RAW			0x114
+#define VPFE_IRQ_STS				0x118
+#define VPFE_IRQ_EN_SET				0x11c
+#define VPFE_IRQ_EN_CLR				0x120
+#define VPFE_REG_END				0x124
+
+/* Define bit fields within selected registers */
+#define VPFE_FID_POL_MASK			1
+#define VPFE_FID_POL_SHIFT			4
+#define VPFE_HD_POL_MASK			1
+#define VPFE_HD_POL_SHIFT			3
+#define VPFE_VD_POL_MASK			1
+#define VPFE_VD_POL_SHIFT			2
+#define VPFE_HSIZE_OFF_MASK			0xffffffe0
+#define VPFE_32BYTE_ALIGN_VAL			31
+#define VPFE_FRM_FMT_MASK			0x1
+#define VPFE_FRM_FMT_SHIFT			7
+#define VPFE_DATA_SZ_MASK			7
+#define VPFE_DATA_SZ_SHIFT			8
+#define VPFE_PIX_FMT_MASK			3
+#define VPFE_PIX_FMT_SHIFT			12
+#define VPFE_VP2SDR_DISABLE			0xfffbffff
+#define VPFE_WEN_ENABLE				(1 << 17)
+#define VPFE_SDR2RSZ_DISABLE			0xfff7ffff
+#define VPFE_VDHDEN_ENABLE			(1 << 16)
+#define VPFE_LPF_ENABLE				(1 << 14)
+#define VPFE_ALAW_ENABLE			(1 << 3)
+#define VPFE_ALAW_GAMMA_WD_MASK			7
+#define VPFE_BLK_CLAMP_ENABLE			(1 << 31)
+#define VPFE_BLK_SGAIN_MASK			0x1f
+#define VPFE_BLK_ST_PXL_MASK			0x7fff
+#define VPFE_BLK_ST_PXL_SHIFT			10
+#define VPFE_BLK_SAMPLE_LN_MASK			7
+#define VPFE_BLK_SAMPLE_LN_SHIFT		28
+#define VPFE_BLK_SAMPLE_LINE_MASK		7
+#define VPFE_BLK_SAMPLE_LINE_SHIFT		25
+#define VPFE_BLK_DC_SUB_MASK			0x03fff
+#define VPFE_BLK_COMP_MASK			0xff
+#define VPFE_BLK_COMP_GB_COMP_SHIFT		8
+#define VPFE_BLK_COMP_GR_COMP_SHIFT		16
+#define VPFE_BLK_COMP_R_COMP_SHIFT		24
+#define VPFE_LATCH_ON_VSYNC_DISABLE		(1 << 15)
+#define VPFE_DATA_PACK_ENABLE			(1 << 11)
+#define VPFE_HORZ_INFO_SPH_SHIFT		16
+#define VPFE_VERT_START_SLV0_SHIFT		16
+#define VPFE_VDINT_VDINT0_SHIFT			16
+#define VPFE_VDINT_VDINT1_MASK			0xffff
+#define VPFE_PPC_RAW				1
+#define VPFE_DCSUB_DEFAULT_VAL			0
+#define VPFE_CLAMP_DEFAULT_VAL			0
+#define VPFE_COLPTN_VAL				0xbb11bb11
+#define VPFE_TWO_BYTES_PER_PIXEL		2
+#define VPFE_INTERLACED_IMAGE_INVERT		0x4b6d
+#define VPFE_INTERLACED_NO_IMAGE_INVERT		0x0249
+#define VPFE_PROGRESSIVE_IMAGE_INVERT		0x4000
+#define VPFE_PROGRESSIVE_NO_IMAGE_INVERT	0
+#define VPFE_INTERLACED_HEIGHT_SHIFT		1
+#define VPFE_SYN_MODE_INPMOD_SHIFT		12
+#define VPFE_SYN_MODE_INPMOD_MASK		3
+#define VPFE_SYN_MODE_8BITS			(7 << 8)
+#define VPFE_SYN_MODE_10BITS			(6 << 8)
+#define VPFE_SYN_MODE_11BITS			(5 << 8)
+#define VPFE_SYN_MODE_12BITS			(4 << 8)
+#define VPFE_SYN_MODE_13BITS			(3 << 8)
+#define VPFE_SYN_MODE_14BITS			(2 << 8)
+#define VPFE_SYN_MODE_15BITS			(1 << 8)
+#define VPFE_SYN_MODE_16BITS			(0 << 8)
+#define VPFE_SYN_FLDMODE_MASK			1
+#define VPFE_SYN_FLDMODE_SHIFT			7
+#define VPFE_REC656IF_BT656_EN			3
+#define VPFE_SYN_MODE_VD_POL_NEGATIVE		(1 << 2)
+#define VPFE_CCDCFG_Y8POS_SHIFT			11
+#define VPFE_CCDCFG_BW656_10BIT			(1 << 5)
+#define VPFE_SDOFST_FIELD_INTERLEAVED		0x249
+#define VPFE_NO_CULLING				0xffff00ff
+#define VPFE_VDINT0				(1 << 0)
+#define VPFE_VDINT1				(1 << 1)
+#define VPFE_VDINT2				(1 << 2)
+#define VPFE_DMA_CNTL_OVERFLOW			(1 << 31)
+
+#define VPFE_CONFIG_PCLK_INV_SHIFT		0
+#define VPFE_CONFIG_PCLK_INV_MASK		1
+#define VPFE_CONFIG_PCLK_INV_NOT_INV		0
+#define VPFE_CONFIG_PCLK_INV_INV		1
+#define VPFE_CONFIG_EN_SHIFT			1
+#define VPFE_CONFIG_EN_MASK			2
+#define VPFE_CONFIG_EN_DISABLE			0
+#define VPFE_CONFIG_EN_ENABLE			1
+#define VPFE_CONFIG_ST_SHIFT			2
+#define VPFE_CONFIG_ST_MASK			4
+#define VPFE_CONFIG_ST_OCP_ACTIVE		0
+#define VPFE_CONFIG_ST_OCP_STANDBY		1
+
+#endif		/* AM437X_VPFE_REGS_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 00b100023c47..9312d5806541 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -35,6 +35,7 @@ header-y += adfs_fs.h
 header-y += affs_hardblocks.h
 header-y += agpgart.h
 header-y += aio_abi.h
+header-y += am437x-vpfe.h
 header-y += apm_bios.h
 header-y += arcfb.h
 header-y += atalk.h
diff --git a/include/uapi/linux/am437x-vpfe.h b/include/uapi/linux/am437x-vpfe.h
new file mode 100644
index 000000000000..9b03033f9cd6
--- /dev/null
+++ b/include/uapi/linux/am437x-vpfe.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2013 - 2014 Texas Instruments, Inc.
+ *
+ * Benoit Parrot <bparrot@ti.com>
+ * Lad, Prabhakar <prabhakar.csengg@gmail.com>
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef AM437X_VPFE_USER_H
+#define AM437X_VPFE_USER_H
+
+enum vpfe_ccdc_data_size {
+	VPFE_CCDC_DATA_16BITS = 0,
+	VPFE_CCDC_DATA_15BITS,
+	VPFE_CCDC_DATA_14BITS,
+	VPFE_CCDC_DATA_13BITS,
+	VPFE_CCDC_DATA_12BITS,
+	VPFE_CCDC_DATA_11BITS,
+	VPFE_CCDC_DATA_10BITS,
+	VPFE_CCDC_DATA_8BITS,
+};
+
+/* enum for No of pixel per line to be avg. in Black Clamping*/
+enum vpfe_ccdc_sample_length {
+	VPFE_CCDC_SAMPLE_1PIXELS = 0,
+	VPFE_CCDC_SAMPLE_2PIXELS,
+	VPFE_CCDC_SAMPLE_4PIXELS,
+	VPFE_CCDC_SAMPLE_8PIXELS,
+	VPFE_CCDC_SAMPLE_16PIXELS,
+};
+
+/* enum for No of lines in Black Clamping */
+enum vpfe_ccdc_sample_line {
+	VPFE_CCDC_SAMPLE_1LINES = 0,
+	VPFE_CCDC_SAMPLE_2LINES,
+	VPFE_CCDC_SAMPLE_4LINES,
+	VPFE_CCDC_SAMPLE_8LINES,
+	VPFE_CCDC_SAMPLE_16LINES,
+};
+
+/* enum for Alaw gamma width */
+enum vpfe_ccdc_gamma_width {
+	VPFE_CCDC_GAMMA_BITS_15_6 = 0,	/* use bits 15-6 for gamma */
+	VPFE_CCDC_GAMMA_BITS_14_5,
+	VPFE_CCDC_GAMMA_BITS_13_4,
+	VPFE_CCDC_GAMMA_BITS_12_3,
+	VPFE_CCDC_GAMMA_BITS_11_2,
+	VPFE_CCDC_GAMMA_BITS_10_1,
+	VPFE_CCDC_GAMMA_BITS_09_0,	/* use bits 9-0 for gamma */
+};
+
+/* structure for ALaw */
+struct vpfe_ccdc_a_law {
+	/* Enable/disable A-Law */
+	unsigned char enable;
+	/* Gamma Width Input */
+	enum vpfe_ccdc_gamma_width gamma_wd;
+};
+
+/* structure for Black Clamping */
+struct vpfe_ccdc_black_clamp {
+	unsigned char enable;
+	/* only if bClampEnable is TRUE */
+	enum vpfe_ccdc_sample_length sample_pixel;
+	/* only if bClampEnable is TRUE */
+	enum vpfe_ccdc_sample_line sample_ln;
+	/* only if bClampEnable is TRUE */
+	unsigned short start_pixel;
+	/* only if bClampEnable is TRUE */
+	unsigned short sgain;
+	/* only if bClampEnable is FALSE */
+	unsigned short dc_sub;
+};
+
+/* structure for Black Level Compensation */
+struct vpfe_ccdc_black_compensation {
+	/* Constant value to subtract from Red component */
+	char r;
+	/* Constant value to subtract from Gr component */
+	char gr;
+	/* Constant value to subtract from Blue component */
+	char b;
+	/* Constant value to subtract from Gb component */
+	char gb;
+};
+
+/* Structure for CCDC configuration parameters for raw capture mode passed
+ * by application
+ */
+struct vpfe_ccdc_config_params_raw {
+	/* data size value from 8 to 16 bits */
+	enum vpfe_ccdc_data_size data_sz;
+	/* Structure for Optional A-Law */
+	struct vpfe_ccdc_a_law alaw;
+	/* Structure for Optical Black Clamp */
+	struct vpfe_ccdc_black_clamp blk_clamp;
+	/* Structure for Black Compensation */
+	struct vpfe_ccdc_black_compensation blk_comp;
+};
+
+/*
+ *  Private IOCTL
+ * VIDIOC_AM437X_CCDC_CFG - Set CCDC configuration for raw capture
+ * This is an experimental ioctl that will change in future kernels. So use
+ * this ioctl with care !
+ **/
+#define VIDIOC_AM437X_CCDC_CFG \
+	_IOW('V', BASE_VIDIOC_PRIVATE + 1, void *)
+
+#endif		/* AM437X_VPFE_USER_H */
-- 
cgit v1.2.3-71-gd317


From 33f72e6f0c67f673fd0c63a8182dbd9ffb8cf50b Mon Sep 17 00:00:00 2001
From: Bill Hong <bhong@brocade.com>
Date: Sat, 27 Dec 2014 10:12:39 -0800
Subject: l2tp : multicast notification to the registered listeners

Previously l2tp module did not provide any means for the user space to
get notified when tunnels/sessions are added/modified/deleted.
This change contains the following
- create a multicast group for the listeners to register.
- notify the registered listeners when the tunnels/sessions are
  created/modified/deleted.

Signed-off-by: Bill Hong <bhong@brocade.com>
Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
Reviewed-by: Sven-Thorsten Dietrich <sven@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/l2tp.h |   1 +
 net/l2tp/l2tp_netlink.c   | 101 +++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 92 insertions(+), 10 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 21caa2631c20..347ef22a964e 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -178,5 +178,6 @@ enum l2tp_seqmode {
  */
 #define L2TP_GENL_NAME		"l2tp"
 #define L2TP_GENL_VERSION	0x1
+#define L2TP_GENL_MCGROUP       "l2tp"
 
 #endif /* _UAPI_LINUX_L2TP_H_ */
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 0ac907adb2f4..6b16598f31d5 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -40,6 +40,18 @@ static struct genl_family l2tp_nl_family = {
 	.netnsok	= true,
 };
 
+static const struct genl_multicast_group l2tp_multicast_group[] = {
+	{
+		.name = L2TP_GENL_MCGROUP,
+	},
+};
+
+static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq,
+			       int flags, struct l2tp_tunnel *tunnel, u8 cmd);
+static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
+				int flags, struct l2tp_session *session,
+				u8 cmd);
+
 /* Accessed under genl lock */
 static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
 
@@ -97,6 +109,52 @@ out:
 	return ret;
 }
 
+static int l2tp_tunnel_notify(struct genl_family *family,
+			      struct genl_info *info,
+			      struct l2tp_tunnel *tunnel,
+			      u8 cmd)
+{
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
+				  NLM_F_ACK, tunnel, cmd);
+
+	if (ret >= 0)
+		return genlmsg_multicast_allns(family, msg, 0,	0, GFP_ATOMIC);
+
+	nlmsg_free(msg);
+
+	return ret;
+}
+
+static int l2tp_session_notify(struct genl_family *family,
+			       struct genl_info *info,
+			       struct l2tp_session *session,
+			       u8 cmd)
+{
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
+				   NLM_F_ACK, session, cmd);
+
+	if (ret >= 0)
+		return genlmsg_multicast_allns(family, msg, 0,	0, GFP_ATOMIC);
+
+	nlmsg_free(msg);
+
+	return ret;
+}
+
 static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info)
 {
 	u32 tunnel_id;
@@ -188,6 +246,9 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
 		break;
 	}
 
+	if (ret >= 0)
+		ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
+					 tunnel, L2TP_CMD_TUNNEL_CREATE);
 out:
 	return ret;
 }
@@ -211,6 +272,9 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
 		goto out;
 	}
 
+	l2tp_tunnel_notify(&l2tp_nl_family, info,
+			   tunnel, L2TP_CMD_TUNNEL_DELETE);
+
 	(void) l2tp_tunnel_delete(tunnel);
 
 out:
@@ -239,12 +303,15 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
 	if (info->attrs[L2TP_ATTR_DEBUG])
 		tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
 
+	ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
+				 tunnel, L2TP_CMD_TUNNEL_MODIFY);
+
 out:
 	return ret;
 }
 
 static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags,
-			       struct l2tp_tunnel *tunnel)
+			       struct l2tp_tunnel *tunnel, u8 cmd)
 {
 	void *hdr;
 	struct nlattr *nest;
@@ -254,8 +321,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 	struct ipv6_pinfo *np = NULL;
 #endif
 
-	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags,
-			  L2TP_CMD_TUNNEL_GET);
+	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd);
 	if (!hdr)
 		return -EMSGSIZE;
 
@@ -359,7 +425,7 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
-				  NLM_F_ACK, tunnel);
+				  NLM_F_ACK, tunnel, L2TP_CMD_TUNNEL_GET);
 	if (ret < 0)
 		goto err_out;
 
@@ -385,7 +451,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback
 
 		if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid,
 					cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					tunnel) <= 0)
+					tunnel, L2TP_CMD_TUNNEL_GET) <= 0)
 			goto out;
 
 		ti++;
@@ -539,6 +605,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 		ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id,
 			session_id, peer_session_id, &cfg);
 
+	if (ret >= 0) {
+		session = l2tp_session_find(net, tunnel, session_id);
+		if (session)
+			ret = l2tp_session_notify(&l2tp_nl_family, info, session,
+						  L2TP_CMD_SESSION_CREATE);
+	}
+
 out:
 	return ret;
 }
@@ -555,6 +628,9 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
 		goto out;
 	}
 
+	l2tp_session_notify(&l2tp_nl_family, info,
+			    session, L2TP_CMD_SESSION_DELETE);
+
 	pw_type = session->pwtype;
 	if (pw_type < __L2TP_PWTYPE_MAX)
 		if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
@@ -601,12 +677,15 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
 	if (info->attrs[L2TP_ATTR_MRU])
 		session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
 
+	ret = l2tp_session_notify(&l2tp_nl_family, info,
+				  session, L2TP_CMD_SESSION_MODIFY);
+
 out:
 	return ret;
 }
 
 static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags,
-				struct l2tp_session *session)
+				struct l2tp_session *session, u8 cmd)
 {
 	void *hdr;
 	struct nlattr *nest;
@@ -615,7 +694,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 
 	sk = tunnel->sock;
 
-	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
+	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd);
 	if (!hdr)
 		return -EMSGSIZE;
 
@@ -699,7 +778,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
-				   0, session);
+				   0, session, L2TP_CMD_SESSION_GET);
 	if (ret < 0)
 		goto err_out;
 
@@ -737,7 +816,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
 
 		if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					 session) <= 0)
+					 session, L2TP_CMD_SESSION_GET) <= 0)
 			break;
 
 		si++;
@@ -896,7 +975,9 @@ EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
 static int l2tp_nl_init(void)
 {
 	pr_info("L2TP netlink interface\n");
-	return genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops);
+	return genl_register_family_with_ops_groups(&l2tp_nl_family,
+						    l2tp_nl_ops,
+						    l2tp_multicast_group);
 }
 
 static void l2tp_nl_cleanup(void)
-- 
cgit v1.2.3-71-gd317


From 1803f594cbf9bb2e662ac945038113d0d0cc5e89 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 5 Jan 2015 11:16:42 +0100
Subject: nl80211: document NL80211_BSS_STATUS_AUTHENTICATED isn't used

The flag is no longer used (and hasn't been for a long time)
since trying to track authentication (and make decisions based
on state) was just causing issues all over - see commit
95de817b9034d50860319f6033ec85d25024694c.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 18cb0aa06351..54f391141351 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3265,6 +3265,9 @@ enum nl80211_bss {
 /**
  * enum nl80211_bss_status - BSS "status"
  * @NL80211_BSS_STATUS_AUTHENTICATED: Authenticated with this BSS.
+ *	Note that this is no longer used since cfg80211 no longer
+ *	keeps track of whether or not authentication was done with
+ *	a given BSS.
  * @NL80211_BSS_STATUS_ASSOCIATED: Associated with this BSS.
  * @NL80211_BSS_STATUS_IBSS_JOINED: Joined to this IBSS.
  *
-- 
cgit v1.2.3-71-gd317


From ad6f939ab193750cc94a265f58e007fb598c97b7 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 5 Jan 2015 13:56:17 -0800
Subject: ip: Add offset parameter to ip_cmsg_recv

Add ip_cmsg_recv_offset function which takes an offset argument
that indicates the starting offset in skb where data is being received
from. This will be useful in the case of UDP and provided checksum
to user space.

ip_cmsg_recv is an inline call to ip_cmsg_recv_offset with offset of
zero.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h |  1 +
 include/uapi/linux/in.h |  1 +
 net/ipv4/ip_sockglue.c  | 41 ++++++++++++++++++++++++++++++++++++++++-
 net/ipv4/udp.c          |  2 +-
 4 files changed, 43 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 605ca421d5ab..eb16c7beed1e 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -203,6 +203,7 @@ struct inet_sock {
 #define IP_CMSG_RETOPTS		BIT(4)
 #define IP_CMSG_PASSSEC		BIT(5)
 #define IP_CMSG_ORIGDSTADDR	BIT(6)
+#define IP_CMSG_CHECKSUM	BIT(7)
 
 static inline struct inet_sock *inet_sk(const struct sock *sk)
 {
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index c33a65e3d62c..589ced069e8a 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -109,6 +109,7 @@ struct in_addr {
 
 #define IP_MINTTL       21
 #define IP_NODEFRAG     22
+#define IP_CHECKSUM	23
 
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 513d506ffebb..a317797b3cd0 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -37,6 +37,7 @@
 #include <net/route.h>
 #include <net/xfrm.h>
 #include <net/compat.h>
+#include <net/checksum.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/transp_v6.h>
 #endif
@@ -96,6 +97,20 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
 	put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
 }
 
+static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
+				  int offset)
+{
+	__wsum csum = skb->csum;
+
+	if (skb->ip_summed != CHECKSUM_COMPLETE)
+		return;
+
+	if (offset != 0)
+		csum = csum_sub(csum, csum_partial(skb->data, offset, 0));
+
+	put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
+}
+
 static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
 {
 	char *secdata;
@@ -191,9 +206,16 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
 			return;
 	}
 
-	if (flags & IP_CMSG_ORIGDSTADDR)
+	if (flags & IP_CMSG_ORIGDSTADDR) {
 		ip_cmsg_recv_dstaddr(msg, skb);
 
+		flags &= ~IP_CMSG_ORIGDSTADDR;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_CHECKSUM)
+		ip_cmsg_recv_checksum(msg, skb, offset);
 }
 EXPORT_SYMBOL(ip_cmsg_recv_offset);
 
@@ -533,6 +555,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 	case IP_MULTICAST_ALL:
 	case IP_MULTICAST_LOOP:
 	case IP_RECVORIGDSTADDR:
+	case IP_CHECKSUM:
 		if (optlen >= sizeof(int)) {
 			if (get_user(val, (int __user *) optval))
 				return -EFAULT;
@@ -630,6 +653,19 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		else
 			inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
 		break;
+	case IP_CHECKSUM:
+		if (val) {
+			if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) {
+				inet_inc_convert_csum(sk);
+				inet->cmsg_flags |= IP_CMSG_CHECKSUM;
+			}
+		} else {
+			if (inet->cmsg_flags & IP_CMSG_CHECKSUM) {
+				inet_dec_convert_csum(sk);
+				inet->cmsg_flags &= ~IP_CMSG_CHECKSUM;
+			}
+		}
+		break;
 	case IP_TOS:	/* This sets both TOS and Precedence */
 		if (sk->sk_type == SOCK_STREAM) {
 			val &= ~INET_ECN_MASK;
@@ -1233,6 +1269,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_RECVORIGDSTADDR:
 		val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
 		break;
+	case IP_CHECKSUM:
+		val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
+		break;
 	case IP_TOS:
 		val = inet->tos;
 		break;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 53358d88f110..97ef1f8b7be8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1329,7 +1329,7 @@ try_again:
 		*addr_len = sizeof(*sin);
 	}
 	if (inet->cmsg_flags)
-		ip_cmsg_recv(msg, skb);
+		ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr));
 
 	err = copied;
 	if (flags & MSG_TRUNC)
-- 
cgit v1.2.3-71-gd317


From ea697639992d96da98016b8934e68a73876a2264 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 5 Jan 2015 23:57:47 +0100
Subject: net: tcp: add RTAX_CC_ALGO fib handling

This patch adds the minimum necessary for the RTAX_CC_ALGO congestion
control metric to be set up and dumped back to user space.

While the internal representation of RTAX_CC_ALGO is handled as a u32
key, we avoided to expose this implementation detail to user space, thus
instead, we chose the netlink attribute that is being exchanged between
user space to be the actual congestion control algorithm name, similarly
as in the setsockopt(2) API in order to allow for maximum flexibility,
even for 3rd party modules.

It is a bit unfortunate that RTAX_QUICKACK used up a whole RTAX slot as
it should have been stored in RTAX_FEATURES instead, we first thought
about reusing it for the congestion control key, but it brings more
complications and/or confusion than worth it.

Joint work with Florian Westphal.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h              |  7 +++++++
 include/uapi/linux/rtnetlink.h |  2 ++
 net/core/rtnetlink.c           | 15 +++++++++++++--
 net/decnet/dn_fib.c            |  3 ++-
 net/decnet/dn_table.c          |  4 +++-
 net/ipv4/fib_semantics.c       | 14 ++++++++++++--
 net/ipv6/route.c               | 17 +++++++++++++++--
 7 files changed, 54 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 135b70c9a734..95bb237152e0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -846,7 +846,14 @@ extern struct tcp_congestion_ops tcp_reno;
 
 struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
 u32 tcp_ca_get_key_by_name(const char *name);
+#ifdef CONFIG_INET
 char *tcp_ca_get_name_by_key(u32 key, char *buffer);
+#else
+static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
+{
+	return NULL;
+}
+#endif
 
 static inline bool tcp_ca_needs_ecn(const struct sock *sk)
 {
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 9c9b8b4480cd..d81f22d5b390 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -389,6 +389,8 @@ enum {
 #define RTAX_INITRWND RTAX_INITRWND
 	RTAX_QUICKACK,
 #define RTAX_QUICKACK RTAX_QUICKACK
+	RTAX_CC_ALGO,
+#define RTAX_CC_ALGO RTAX_CC_ALGO
 	__RTAX_MAX
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index da983d4bac02..6a6cdade1676 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -50,6 +50,7 @@
 #include <net/arp.h>
 #include <net/route.h>
 #include <net/udp.h>
+#include <net/tcp.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/fib_rules.h>
@@ -669,9 +670,19 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 
 	for (i = 0; i < RTAX_MAX; i++) {
 		if (metrics[i]) {
+			if (i == RTAX_CC_ALGO - 1) {
+				char tmp[TCP_CA_NAME_MAX], *name;
+
+				name = tcp_ca_get_name_by_key(metrics[i], tmp);
+				if (!name)
+					continue;
+				if (nla_put_string(skb, i + 1, name))
+					goto nla_put_failure;
+			} else {
+				if (nla_put_u32(skb, i + 1, metrics[i]))
+					goto nla_put_failure;
+			}
 			valid++;
-			if (nla_put_u32(skb, i+1, metrics[i]))
-				goto nla_put_failure;
 		}
 	}
 
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index d332aefb0846..df4803437888 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -298,7 +298,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *att
 			int type = nla_type(attr);
 
 			if (type) {
-				if (type > RTAX_MAX || nla_len(attr) < 4)
+				if (type > RTAX_MAX || type == RTAX_CC_ALGO ||
+				    nla_len(attr) < 4)
 					goto err_inval;
 
 				fi->fib_metrics[type-1] = nla_get_u32(attr);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 86e3807052e9..3f19fcbf126d 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -29,6 +29,7 @@
 #include <linux/route.h> /* RTF_xxx */
 #include <net/neighbour.h>
 #include <net/netlink.h>
+#include <net/tcp.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/fib_rules.h>
@@ -273,7 +274,8 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi)
 	size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
 			 + nla_total_size(4) /* RTA_TABLE */
 			 + nla_total_size(2) /* RTA_DST */
-			 + nla_total_size(4); /* RTA_PRIORITY */
+			 + nla_total_size(4) /* RTA_PRIORITY */
+			 + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
 
 	/* space for nested metrics */
 	payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f99f41bd15b8..d2b7b5521b1b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -360,7 +360,8 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
 			 + nla_total_size(4) /* RTA_TABLE */
 			 + nla_total_size(4) /* RTA_DST */
 			 + nla_total_size(4) /* RTA_PRIORITY */
-			 + nla_total_size(4); /* RTA_PREFSRC */
+			 + nla_total_size(4) /* RTA_PREFSRC */
+			 + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
 
 	/* space for nested metrics */
 	payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
@@ -859,7 +860,16 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 
 				if (type > RTAX_MAX)
 					goto err_inval;
-				val = nla_get_u32(nla);
+				if (type == RTAX_CC_ALGO) {
+					char tmp[TCP_CA_NAME_MAX];
+
+					nla_strlcpy(tmp, nla, sizeof(tmp));
+					val = tcp_ca_get_key_by_name(tmp);
+					if (val == TCP_CA_UNSPEC)
+						goto err_inval;
+				} else {
+					val = nla_get_u32(nla);
+				}
 				if (type == RTAX_ADVMSS && val > 65535 - 40)
 					val = 65535 - 40;
 				if (type == RTAX_MTU && val > 65535 - 15)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 454771d20b21..34dcbb59df75 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1488,10 +1488,22 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
 		int type = nla_type(nla);
 
 		if (type) {
+			u32 val;
+
 			if (unlikely(type > RTAX_MAX))
 				goto err;
+			if (type == RTAX_CC_ALGO) {
+				char tmp[TCP_CA_NAME_MAX];
+
+				nla_strlcpy(tmp, nla, sizeof(tmp));
+				val = tcp_ca_get_key_by_name(tmp);
+				if (val == TCP_CA_UNSPEC)
+					goto err;
+			} else {
+				val = nla_get_u32(nla);
+			}
 
-			mp[type - 1] = nla_get_u32(nla);
+			mp[type - 1] = val;
 			__set_bit(type - 1, mxc->mx_valid);
 		}
 	}
@@ -2571,7 +2583,8 @@ static inline size_t rt6_nlmsg_size(void)
 	       + nla_total_size(4) /* RTA_OIF */
 	       + nla_total_size(4) /* RTA_PRIORITY */
 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
-	       + nla_total_size(sizeof(struct rta_cacheinfo));
+	       + nla_total_size(sizeof(struct rta_cacheinfo))
+	       + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
 }
 
 static int rt6_fill_node(struct net *net,
-- 
cgit v1.2.3-71-gd317


From d75bb06b61cb69ee6223d791d3bb230e68623b20 Mon Sep 17 00:00:00 2001
From: Gautam Kumar Shukla <gautams@broadcom.com>
Date: Tue, 23 Dec 2014 16:55:19 +0100
Subject: cfg80211: add extensible feature flag attribute

With the wiphy::features flag being used up this patch adds a
new field wiphy::ext_features. Considering extensibility this
new field is declared as a byte array. This extensible flag is
exposed to user-space by NL80211_ATTR_EXT_FEATURES.

Cc: Avinash Patil <patila@marvell.com>
Signed-off-by: Gautam (Gautam Kumar) Shukla <gautams@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 39 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/nl80211.h | 22 ++++++++++++++++++++++
 net/wireless/nl80211.c       |  5 +++++
 3 files changed, 66 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index bd672ea08c9a..f38645fb83b9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3016,6 +3016,8 @@ struct wiphy_vendor_command {
  * @regulatory_flags: wiphy regulatory flags, see
  *	&enum ieee80211_regulatory_flags
  * @features: features advertised to nl80211, see &enum nl80211_feature_flags.
+ * @ext_features: extended features advertised to nl80211, see
+ *	&enum nl80211_ext_feature_index.
  * @bss_priv_size: each BSS struct has private data allocated with it,
  *	this variable determines its size
  * @max_scan_ssids: maximum number of SSIDs the device can scan for in
@@ -3125,6 +3127,7 @@ struct wiphy {
 	u16 max_acl_mac_addrs;
 
 	u32 flags, regulatory_flags, features;
+	u8 ext_features[DIV_ROUND_UP(NUM_NL80211_EXT_FEATURES, 8)];
 
 	u32 ap_sme_capa;
 
@@ -5052,6 +5055,42 @@ void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
  */
 void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy);
 
+/**
+ * wiphy_ext_feature_set - set the extended feature flag
+ *
+ * @wiphy: the wiphy to modify.
+ * @ftidx: extended feature bit index.
+ *
+ * The extended features are flagged in multiple bytes (see
+ * &struct wiphy.@ext_features)
+ */
+static inline void wiphy_ext_feature_set(struct wiphy *wiphy,
+					 enum nl80211_ext_feature_index ftidx)
+{
+	u8 *ft_byte;
+
+	ft_byte = &wiphy->ext_features[ftidx / 8];
+	*ft_byte |= BIT(ftidx % 8);
+}
+
+/**
+ * wiphy_ext_feature_isset - check the extended feature flag
+ *
+ * @wiphy: the wiphy to modify.
+ * @ftidx: extended feature bit index.
+ *
+ * The extended features are flagged in multiple bytes (see
+ * &struct wiphy.@ext_features)
+ */
+static inline bool
+wiphy_ext_feature_isset(struct wiphy *wiphy,
+			enum nl80211_ext_feature_index ftidx)
+{
+	u8 ft_byte;
+
+	ft_byte = wiphy->ext_features[ftidx / 8];
+	return (ft_byte & BIT(ftidx % 8)) != 0;
+}
 
 /* ethtool helper */
 void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 54f391141351..f95d35483086 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1713,6 +1713,13 @@ enum nl80211_commands {
  *	obtained from it is coming from the device's wiphy and not the global
  *	cfg80211 regdomain.
  *
+ * @NL80211_ATTR_EXT_FEATURES: extended feature flags contained in a byte
+ *	array. The feature flags are identified by their bit index (see &enum
+ *	nl80211_ext_feature_index). The bit index is ordered starting at the
+ *	least-significant bit of the first byte in the array, ie. bit index 0
+ *	is located at bit 0 of byte 0. bit index 25 would be located at bit 1
+ *	of byte 3 (u8 array).
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2072,6 +2079,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_WIPHY_SELF_MANAGED_REG,
 
+	NL80211_ATTR_EXT_FEATURES,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -4223,6 +4232,19 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_ND_RANDOM_MAC_ADDR		= 1 << 31,
 };
 
+/**
+ * enum nl80211_ext_feature_index - bit index of extended features.
+ *
+ * @NUM_NL80211_EXT_FEATURES: number of extended features.
+ * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
+ */
+enum nl80211_ext_feature_index {
+
+	/* add new features before the definition below */
+	NUM_NL80211_EXT_FEATURES,
+	MAX_NL80211_EXT_FEATURES = NUM_NL80211_EXT_FEATURES - 1
+};
+
 /**
  * enum nl80211_probe_resp_offload_support_attr - optional supported
  *	protocols for probe-response offloading by the driver/FW.
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 702920134b34..689e1a8fd60a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1706,6 +1706,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 		    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
 			goto nla_put_failure;
 
+		if (nla_put(msg, NL80211_ATTR_EXT_FEATURES,
+			    sizeof(rdev->wiphy.ext_features),
+			    rdev->wiphy.ext_features))
+			goto nla_put_failure;
+
 		/* done */
 		state->split_start = 0;
 		break;
-- 
cgit v1.2.3-71-gd317


From 71b836eca7f380fbd4c025f8c4371f9a071bc909 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 23 Dec 2014 17:17:38 +0100
Subject: nl80211: define multicast group names in header

Put the group names into the userspace API header file so that
userspace clients can use symbolic names from there instead of
hardcoding the actual names. This doesn't really change much,
but seems somewhat cleaner.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  7 +++++++
 net/wireless/nl80211.c       | 12 ++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f95d35483086..7ba9404b290d 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -29,6 +29,13 @@
 
 #define NL80211_GENL_NAME "nl80211"
 
+#define NL80211_MULTICAST_GROUP_CONFIG		"config"
+#define NL80211_MULTICAST_GROUP_SCAN		"scan"
+#define NL80211_MULTICAST_GROUP_REG		"regulatory"
+#define NL80211_MULTICAST_GROUP_MLME		"mlme"
+#define NL80211_MULTICAST_GROUP_VENDOR		"vendor"
+#define NL80211_MULTICAST_GROUP_TESTMODE	"testmode"
+
 /**
  * DOC: Station handling
  *
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 689e1a8fd60a..049f505e5660 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -59,13 +59,13 @@ enum nl80211_multicast_groups {
 };
 
 static const struct genl_multicast_group nl80211_mcgrps[] = {
-	[NL80211_MCGRP_CONFIG] = { .name = "config", },
-	[NL80211_MCGRP_SCAN] = { .name = "scan", },
-	[NL80211_MCGRP_REGULATORY] = { .name = "regulatory", },
-	[NL80211_MCGRP_MLME] = { .name = "mlme", },
-	[NL80211_MCGRP_VENDOR] = { .name = "vendor", },
+	[NL80211_MCGRP_CONFIG] = { .name = NL80211_MULTICAST_GROUP_CONFIG },
+	[NL80211_MCGRP_SCAN] = { .name = NL80211_MULTICAST_GROUP_SCAN },
+	[NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG },
+	[NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME },
+	[NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR },
 #ifdef CONFIG_NL80211_TESTMODE
-	[NL80211_MCGRP_TESTMODE] = { .name = "testmode", }
+	[NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE }
 #endif
 };
 
-- 
cgit v1.2.3-71-gd317


From 4ed20bebf51578229a1986efcf46344075ec8447 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 14 Nov 2014 16:35:34 +0100
Subject: cfg80211: remove "channel" from survey names

All of the survey data is (currently) per channel anyway,
so having the word "channel" in the name does nothing. In
the next patch I'll introduce global data to the survey,
where the word "channel" is actually confusing.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/wmi.c         |  8 ++---
 drivers/net/wireless/ath/ath5k/mac80211-ops.c | 16 +++++-----
 drivers/net/wireless/ath/ath9k/link.c         | 16 +++++-----
 drivers/net/wireless/ath/carl9170/cmd.c       | 12 ++++----
 drivers/net/wireless/ath/carl9170/main.c      |  6 ++--
 drivers/net/wireless/mwifiex/cfg80211.c       |  7 +++--
 drivers/net/wireless/mwl8k.c                  | 12 ++++----
 drivers/net/wireless/p54/eeprom.c             |  6 ++--
 drivers/net/wireless/p54/main.c               |  8 ++---
 drivers/net/wireless/p54/txrx.c               | 12 ++++----
 drivers/net/wireless/rt2x00/rt2800lib.c       | 12 ++++----
 include/net/cfg80211.h                        | 44 +++++++++++++--------------
 include/uapi/linux/nl80211.h                  | 27 ++++++++++------
 net/mac80211/ethtool.c                        | 20 ++++++------
 net/wireless/nl80211.c                        | 30 +++++++++---------
 net/wireless/trace.h                          | 26 ++++++++--------
 16 files changed, 135 insertions(+), 127 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index c0f3e4d09263..721631c3dd3e 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -1344,11 +1344,11 @@ static void ath10k_wmi_event_chan_info(struct ath10k *ar, struct sk_buff *skb)
 		rx_clear_count -= ar->survey_last_rx_clear_count;
 
 		survey = &ar->survey[idx];
-		survey->channel_time = WMI_CHAN_INFO_MSEC(cycle_count);
-		survey->channel_time_rx = WMI_CHAN_INFO_MSEC(rx_clear_count);
+		survey->time = WMI_CHAN_INFO_MSEC(cycle_count);
+		survey->time_rx = WMI_CHAN_INFO_MSEC(rx_clear_count);
 		survey->noise = noise_floor;
-		survey->filled = SURVEY_INFO_CHANNEL_TIME |
-				 SURVEY_INFO_CHANNEL_TIME_RX |
+		survey->filled = SURVEY_INFO_TIME |
+				 SURVEY_INFO_TIME_RX |
 				 SURVEY_INFO_NOISE_DBM;
 	}
 
diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
index 19eab2a69ad5..3b4a6463d87a 100644
--- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c
+++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
@@ -672,10 +672,10 @@ ath5k_get_survey(struct ieee80211_hw *hw, int idx, struct survey_info *survey)
 	spin_lock_bh(&common->cc_lock);
 	ath_hw_cycle_counters_update(common);
 	if (cc->cycles > 0) {
-		ah->survey.channel_time += cc->cycles / div;
-		ah->survey.channel_time_busy += cc->rx_busy / div;
-		ah->survey.channel_time_rx += cc->rx_frame / div;
-		ah->survey.channel_time_tx += cc->tx_frame / div;
+		ah->survey.time += cc->cycles / div;
+		ah->survey.time_busy += cc->rx_busy / div;
+		ah->survey.time_rx += cc->rx_frame / div;
+		ah->survey.time_tx += cc->tx_frame / div;
 	}
 	memset(cc, 0, sizeof(*cc));
 	spin_unlock_bh(&common->cc_lock);
@@ -686,10 +686,10 @@ ath5k_get_survey(struct ieee80211_hw *hw, int idx, struct survey_info *survey)
 	survey->noise = ah->ah_noise_floor;
 	survey->filled = SURVEY_INFO_NOISE_DBM |
 			SURVEY_INFO_IN_USE |
-			SURVEY_INFO_CHANNEL_TIME |
-			SURVEY_INFO_CHANNEL_TIME_BUSY |
-			SURVEY_INFO_CHANNEL_TIME_RX |
-			SURVEY_INFO_CHANNEL_TIME_TX;
+			SURVEY_INFO_TIME |
+			SURVEY_INFO_TIME_BUSY |
+			SURVEY_INFO_TIME_RX |
+			SURVEY_INFO_TIME_TX;
 
 	return 0;
 }
diff --git a/drivers/net/wireless/ath/ath9k/link.c b/drivers/net/wireless/ath/ath9k/link.c
index b829263e3d0a..90631d768a60 100644
--- a/drivers/net/wireless/ath/ath9k/link.c
+++ b/drivers/net/wireless/ath/ath9k/link.c
@@ -516,14 +516,14 @@ int ath_update_survey_stats(struct ath_softc *sc)
 		ath_hw_cycle_counters_update(common);
 
 	if (cc->cycles > 0) {
-		survey->filled |= SURVEY_INFO_CHANNEL_TIME |
-			SURVEY_INFO_CHANNEL_TIME_BUSY |
-			SURVEY_INFO_CHANNEL_TIME_RX |
-			SURVEY_INFO_CHANNEL_TIME_TX;
-		survey->channel_time += cc->cycles / div;
-		survey->channel_time_busy += cc->rx_busy / div;
-		survey->channel_time_rx += cc->rx_frame / div;
-		survey->channel_time_tx += cc->tx_frame / div;
+		survey->filled |= SURVEY_INFO_TIME |
+			SURVEY_INFO_TIME_BUSY |
+			SURVEY_INFO_TIME_RX |
+			SURVEY_INFO_TIME_TX;
+		survey->time += cc->cycles / div;
+		survey->time_busy += cc->rx_busy / div;
+		survey->time_rx += cc->rx_frame / div;
+		survey->time_tx += cc->tx_frame / div;
 	}
 
 	if (cc->cycles < div)
diff --git a/drivers/net/wireless/ath/carl9170/cmd.c b/drivers/net/wireless/ath/carl9170/cmd.c
index 39a63874b275..f2b4f537e4c1 100644
--- a/drivers/net/wireless/ath/carl9170/cmd.c
+++ b/drivers/net/wireless/ath/carl9170/cmd.c
@@ -188,12 +188,12 @@ int carl9170_collect_tally(struct ar9170 *ar)
 
 		if (ar->channel) {
 			info = &ar->survey[ar->channel->hw_value];
-			info->channel_time = ar->tally.active;
-			info->channel_time_busy = ar->tally.cca;
-			info->channel_time_tx = ar->tally.tx_time;
-			do_div(info->channel_time, 1000);
-			do_div(info->channel_time_busy, 1000);
-			do_div(info->channel_time_tx, 1000);
+			info->time = ar->tally.active;
+			info->time_busy = ar->tally.cca;
+			info->time_tx = ar->tally.tx_time;
+			do_div(info->time, 1000);
+			do_div(info->time_busy, 1000);
+			do_div(info->time_tx, 1000);
 		}
 	}
 	return 0;
diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
index ef5b6dc7b7f1..f1455a04cb62 100644
--- a/drivers/net/wireless/ath/carl9170/main.c
+++ b/drivers/net/wireless/ath/carl9170/main.c
@@ -1690,9 +1690,9 @@ found:
 		survey->filled |= SURVEY_INFO_IN_USE;
 
 	if (ar->fw.hw_counters) {
-		survey->filled |= SURVEY_INFO_CHANNEL_TIME |
-				  SURVEY_INFO_CHANNEL_TIME_BUSY |
-				  SURVEY_INFO_CHANNEL_TIME_TX;
+		survey->filled |= SURVEY_INFO_TIME |
+				  SURVEY_INFO_TIME_BUSY |
+				  SURVEY_INFO_TIME_TX;
 	}
 
 	return 0;
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 4a66a6555366..8bd446b69658 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -1037,10 +1037,11 @@ mwifiex_cfg80211_dump_survey(struct wiphy *wiphy, struct net_device *dev,
 	survey->channel = ieee80211_get_channel(wiphy,
 	    ieee80211_channel_to_frequency(pchan_stats[idx].chan_num, band));
 	survey->filled = SURVEY_INFO_NOISE_DBM |
-		SURVEY_INFO_CHANNEL_TIME | SURVEY_INFO_CHANNEL_TIME_BUSY;
+			 SURVEY_INFO_TIME |
+			 SURVEY_INFO_TIME_BUSY;
 	survey->noise = pchan_stats[idx].noise;
-	survey->channel_time = pchan_stats[idx].cca_scan_dur;
-	survey->channel_time_busy = pchan_stats[idx].cca_busy_dur;
+	survey->time = pchan_stats[idx].cca_scan_dur;
+	survey->time_busy = pchan_stats[idx].cca_busy_dur;
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index b8d1e04aa9b9..f9b1218c761a 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -3098,14 +3098,14 @@ static void mwl8k_update_survey(struct mwl8k_priv *priv,
 
 	cca_cnt = ioread32(priv->regs + NOK_CCA_CNT_REG);
 	cca_cnt /= 1000; /* uSecs to mSecs */
-	survey->channel_time_busy = (u64) cca_cnt;
+	survey->time_busy = (u64) cca_cnt;
 
 	rx_rdy = ioread32(priv->regs + BBU_RXRDY_CNT_REG);
 	rx_rdy /= 1000; /* uSecs to mSecs */
-	survey->channel_time_rx = (u64) rx_rdy;
+	survey->time_rx = (u64) rx_rdy;
 
 	priv->channel_time = jiffies - priv->channel_time;
-	survey->channel_time = jiffies_to_msecs(priv->channel_time);
+	survey->time = jiffies_to_msecs(priv->channel_time);
 
 	survey->channel = channel;
 
@@ -3115,9 +3115,9 @@ static void mwl8k_update_survey(struct mwl8k_priv *priv,
 	survey->noise = nf * -1;
 
 	survey->filled = SURVEY_INFO_NOISE_DBM |
-			 SURVEY_INFO_CHANNEL_TIME |
-			 SURVEY_INFO_CHANNEL_TIME_BUSY |
-			 SURVEY_INFO_CHANNEL_TIME_RX;
+			 SURVEY_INFO_TIME |
+			 SURVEY_INFO_TIME_BUSY |
+			 SURVEY_INFO_TIME_RX;
 }
 
 /*
diff --git a/drivers/net/wireless/p54/eeprom.c b/drivers/net/wireless/p54/eeprom.c
index 0fe67d2da208..2fe713eda7ad 100644
--- a/drivers/net/wireless/p54/eeprom.c
+++ b/drivers/net/wireless/p54/eeprom.c
@@ -196,9 +196,9 @@ static int p54_generate_band(struct ieee80211_hw *dev,
 		dest->max_power = chan->max_power;
 		priv->survey[*chan_num].channel = &tmp->channels[j];
 		priv->survey[*chan_num].filled = SURVEY_INFO_NOISE_DBM |
-			SURVEY_INFO_CHANNEL_TIME |
-			SURVEY_INFO_CHANNEL_TIME_BUSY |
-			SURVEY_INFO_CHANNEL_TIME_TX;
+			SURVEY_INFO_TIME |
+			SURVEY_INFO_TIME_BUSY |
+			SURVEY_INFO_TIME_TX;
 		dest->hw_value = (*chan_num);
 		j++;
 		(*chan_num)++;
diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c
index 13a30c4a27f2..b9250d75d253 100644
--- a/drivers/net/wireless/p54/main.c
+++ b/drivers/net/wireless/p54/main.c
@@ -305,9 +305,9 @@ static void p54_reset_stats(struct p54_common *priv)
 		struct survey_info *info = &priv->survey[chan->hw_value];
 
 		/* only reset channel statistics, don't touch .filled, etc. */
-		info->channel_time = 0;
-		info->channel_time_busy = 0;
-		info->channel_time_tx = 0;
+		info->time = 0;
+		info->time_busy = 0;
+		info->time_tx = 0;
 	}
 
 	priv->update_stats = true;
@@ -636,7 +636,7 @@ static int p54_get_survey(struct ieee80211_hw *dev, int idx,
 
 		if (in_use) {
 			/* test if the reported statistics are valid. */
-			if  (survey->channel_time != 0) {
+			if  (survey->time != 0) {
 				survey->filled |= SURVEY_INFO_IN_USE;
 			} else {
 				/*
diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
index 153c61539ec8..24e5ff9a9272 100644
--- a/drivers/net/wireless/p54/txrx.c
+++ b/drivers/net/wireless/p54/txrx.c
@@ -587,13 +587,13 @@ static void p54_rx_stats(struct p54_common *priv, struct sk_buff *skb)
 	if (chan) {
 		struct survey_info *survey = &priv->survey[chan->hw_value];
 		survey->noise = clamp(priv->noise, -128, 127);
-		survey->channel_time = priv->survey_raw.active;
-		survey->channel_time_tx = priv->survey_raw.tx;
-		survey->channel_time_busy = priv->survey_raw.tx +
+		survey->time = priv->survey_raw.active;
+		survey->time_tx = priv->survey_raw.tx;
+		survey->time_busy = priv->survey_raw.tx +
 			priv->survey_raw.cca;
-		do_div(survey->channel_time, 1024);
-		do_div(survey->channel_time_tx, 1024);
-		do_div(survey->channel_time_busy, 1024);
+		do_div(survey->time, 1024);
+		do_div(survey->time_tx, 1024);
+		do_div(survey->time_busy, 1024);
 	}
 
 	tmp = p54_find_and_unlink_skb(priv, hdr->req_id);
diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
index 81ee481487cf..be2d54f257b1 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/rt2x00/rt2800lib.c
@@ -8020,13 +8020,13 @@ int rt2800_get_survey(struct ieee80211_hw *hw, int idx,
 	rt2800_register_read(rt2x00dev, CH_BUSY_STA_SEC, &busy_ext);
 
 	if (idle || busy) {
-		survey->filled = SURVEY_INFO_CHANNEL_TIME |
-				 SURVEY_INFO_CHANNEL_TIME_BUSY |
-				 SURVEY_INFO_CHANNEL_TIME_EXT_BUSY;
+		survey->filled = SURVEY_INFO_TIME |
+				 SURVEY_INFO_TIME_BUSY |
+				 SURVEY_INFO_TIME_EXT_BUSY;
 
-		survey->channel_time = (idle + busy) / 1000;
-		survey->channel_time_busy = busy / 1000;
-		survey->channel_time_ext_busy = busy_ext / 1000;
+		survey->time = (idle + busy) / 1000;
+		survey->time_busy = busy / 1000;
+		survey->time_ext_busy = busy_ext / 1000;
 	}
 
 	if (!(hw->conf.flags & IEEE80211_CONF_OFFCHANNEL))
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f38645fb83b9..3b489f8fc4cd 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -520,23 +520,23 @@ ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef)
  *
  * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in
  * @SURVEY_INFO_IN_USE: channel is currently being used
- * @SURVEY_INFO_CHANNEL_TIME: channel active time (in ms) was filled in
- * @SURVEY_INFO_CHANNEL_TIME_BUSY: channel busy time was filled in
- * @SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: extension channel busy time was filled in
- * @SURVEY_INFO_CHANNEL_TIME_RX: channel receive time was filled in
- * @SURVEY_INFO_CHANNEL_TIME_TX: channel transmit time was filled in
+ * @SURVEY_INFO_TIME: active time (in ms) was filled in
+ * @SURVEY_INFO_TIME_BUSY: busy time was filled in
+ * @SURVEY_INFO_TIME_EXT_BUSY: extension channel busy time was filled in
+ * @SURVEY_INFO_TIME_RX: receive time was filled in
+ * @SURVEY_INFO_TIME_TX: transmit time was filled in
  *
  * Used by the driver to indicate which info in &struct survey_info
  * it has filled in during the get_survey().
  */
 enum survey_info_flags {
-	SURVEY_INFO_NOISE_DBM = 1<<0,
-	SURVEY_INFO_IN_USE = 1<<1,
-	SURVEY_INFO_CHANNEL_TIME = 1<<2,
-	SURVEY_INFO_CHANNEL_TIME_BUSY = 1<<3,
-	SURVEY_INFO_CHANNEL_TIME_EXT_BUSY = 1<<4,
-	SURVEY_INFO_CHANNEL_TIME_RX = 1<<5,
-	SURVEY_INFO_CHANNEL_TIME_TX = 1<<6,
+	SURVEY_INFO_NOISE_DBM		= BIT(0),
+	SURVEY_INFO_IN_USE		= BIT(1),
+	SURVEY_INFO_TIME		= BIT(2),
+	SURVEY_INFO_TIME_BUSY		= BIT(3),
+	SURVEY_INFO_TIME_EXT_BUSY	= BIT(4),
+	SURVEY_INFO_TIME_RX		= BIT(5),
+	SURVEY_INFO_TIME_TX		= BIT(6),
 };
 
 /**
@@ -546,11 +546,11 @@ enum survey_info_flags {
  * @filled: bitflag of flags from &enum survey_info_flags
  * @noise: channel noise in dBm. This and all following fields are
  *	optional
- * @channel_time: amount of time in ms the radio spent on the channel
- * @channel_time_busy: amount of time the primary channel was sensed busy
- * @channel_time_ext_busy: amount of time the extension channel was sensed busy
- * @channel_time_rx: amount of time the radio spent receiving data
- * @channel_time_tx: amount of time the radio spent transmitting data
+ * @time: amount of time in ms the radio was turn on (on the channel)
+ * @time_busy: amount of time the primary channel was sensed busy
+ * @time_ext_busy: amount of time the extension channel was sensed busy
+ * @time_rx: amount of time the radio spent receiving data
+ * @time_tx: amount of time the radio spent transmitting data
  *
  * Used by dump_survey() to report back per-channel survey information.
  *
@@ -559,11 +559,11 @@ enum survey_info_flags {
  */
 struct survey_info {
 	struct ieee80211_channel *channel;
-	u64 channel_time;
-	u64 channel_time_busy;
-	u64 channel_time_ext_busy;
-	u64 channel_time_rx;
-	u64 channel_time_tx;
+	u64 time;
+	u64 time_busy;
+	u64 time_ext_busy;
+	u64 time_rx;
+	u64 time_tx;
 	u32 filled;
 	s8 noise;
 };
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 7ba9404b290d..1a5acc80ab88 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2815,15 +2815,15 @@ enum nl80211_user_reg_hint_type {
  * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel
  * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm)
  * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used
- * @NL80211_SURVEY_INFO_CHANNEL_TIME: amount of time (in ms) that the radio
+ * @NL80211_SURVEY_INFO_TIME: amount of time (in ms) that the radio
  *	spent on this channel
- * @NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY: amount of the time the primary
+ * @NL80211_SURVEY_INFO_TIME_BUSY: amount of the time the primary
  *	channel was sensed busy (either due to activity or energy detect)
- * @NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: amount of time the extension
+ * @NL80211_SURVEY_INFO_TIME_EXT_BUSY: amount of time the extension
  *	channel was sensed busy
- * @NL80211_SURVEY_INFO_CHANNEL_TIME_RX: amount of time the radio spent
+ * @NL80211_SURVEY_INFO_TIME_RX: amount of time the radio spent
  *	receiving data
- * @NL80211_SURVEY_INFO_CHANNEL_TIME_TX: amount of time the radio spent
+ * @NL80211_SURVEY_INFO_TIME_TX: amount of time the radio spent
  *	transmitting data
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
@@ -2834,17 +2834,24 @@ enum nl80211_survey_info {
 	NL80211_SURVEY_INFO_FREQUENCY,
 	NL80211_SURVEY_INFO_NOISE,
 	NL80211_SURVEY_INFO_IN_USE,
-	NL80211_SURVEY_INFO_CHANNEL_TIME,
-	NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY,
-	NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY,
-	NL80211_SURVEY_INFO_CHANNEL_TIME_RX,
-	NL80211_SURVEY_INFO_CHANNEL_TIME_TX,
+	NL80211_SURVEY_INFO_TIME,
+	NL80211_SURVEY_INFO_TIME_BUSY,
+	NL80211_SURVEY_INFO_TIME_EXT_BUSY,
+	NL80211_SURVEY_INFO_TIME_RX,
+	NL80211_SURVEY_INFO_TIME_TX,
 
 	/* keep last */
 	__NL80211_SURVEY_INFO_AFTER_LAST,
 	NL80211_SURVEY_INFO_MAX = __NL80211_SURVEY_INFO_AFTER_LAST - 1
 };
 
+/* keep old names for compatibility */
+#define NL80211_SURVEY_INFO_CHANNEL_TIME		NL80211_SURVEY_INFO_TIME
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY		NL80211_SURVEY_INFO_TIME_BUSY
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY	NL80211_SURVEY_INFO_TIME_EXT_BUSY
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_RX		NL80211_SURVEY_INFO_TIME_RX
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_TX		NL80211_SURVEY_INFO_TIME_TX
+
 /**
  * enum nl80211_mntr_flags - monitor configuration flags
  *
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index ebfc8091557b..eea742710c0a 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -175,24 +175,24 @@ do_survey:
 		data[i++] = (u8)survey.noise;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME)
-		data[i++] = survey.channel_time;
+	if (survey.filled & SURVEY_INFO_TIME)
+		data[i++] = survey.time;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME_BUSY)
-		data[i++] = survey.channel_time_busy;
+	if (survey.filled & SURVEY_INFO_TIME_BUSY)
+		data[i++] = survey.time_busy;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY)
-		data[i++] = survey.channel_time_ext_busy;
+	if (survey.filled & SURVEY_INFO_TIME_EXT_BUSY)
+		data[i++] = survey.time_ext_busy;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME_RX)
-		data[i++] = survey.channel_time_rx;
+	if (survey.filled & SURVEY_INFO_TIME_RX)
+		data[i++] = survey.time_rx;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME_TX)
-		data[i++] = survey.channel_time_tx;
+	if (survey.filled & SURVEY_INFO_TIME_TX)
+		data[i++] = survey.time_tx;
 	else
 		data[i++] = -1LL;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ad3e294acabe..94ab2014fefe 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6641,25 +6641,25 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 	if ((survey->filled & SURVEY_INFO_IN_USE) &&
 	    nla_put_flag(msg, NL80211_SURVEY_INFO_IN_USE))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME,
-			survey->channel_time))
+	if ((survey->filled & SURVEY_INFO_TIME) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME,
+			survey->time))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_BUSY) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY,
-			survey->channel_time_busy))
+	if ((survey->filled & SURVEY_INFO_TIME_BUSY) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_BUSY,
+			survey->time_busy))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY,
-			survey->channel_time_ext_busy))
+	if ((survey->filled & SURVEY_INFO_TIME_EXT_BUSY) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_EXT_BUSY,
+			survey->time_ext_busy))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_RX) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_RX,
-			survey->channel_time_rx))
+	if ((survey->filled & SURVEY_INFO_TIME_RX) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_RX,
+			survey->time_rx))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_TX) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_TX,
-			survey->channel_time_tx))
+	if ((survey->filled & SURVEY_INFO_TIME_TX) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_TX,
+			survey->time_tx))
 		goto nla_put_failure;
 
 	nla_nest_end(msg, infoattr);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index ad38910f7036..bbb7afc264af 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1604,11 +1604,11 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		WIPHY_ENTRY
 		CHAN_ENTRY
 		__field(int, ret)
-		__field(u64, channel_time)
-		__field(u64, channel_time_busy)
-		__field(u64, channel_time_ext_busy)
-		__field(u64, channel_time_rx)
-		__field(u64, channel_time_tx)
+		__field(u64, time)
+		__field(u64, time_busy)
+		__field(u64, time_ext_busy)
+		__field(u64, time_rx)
+		__field(u64, time_tx)
 		__field(u32, filled)
 		__field(s8, noise)
 	),
@@ -1616,11 +1616,11 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		WIPHY_ASSIGN;
 		CHAN_ASSIGN(info->channel);
 		__entry->ret = ret;
-		__entry->channel_time = info->channel_time;
-		__entry->channel_time_busy = info->channel_time_busy;
-		__entry->channel_time_ext_busy = info->channel_time_ext_busy;
-		__entry->channel_time_rx = info->channel_time_rx;
-		__entry->channel_time_tx = info->channel_time_tx;
+		__entry->time = info->time;
+		__entry->time_busy = info->time_busy;
+		__entry->time_ext_busy = info->time_ext_busy;
+		__entry->time_rx = info->time_rx;
+		__entry->time_tx = info->time_tx;
 		__entry->filled = info->filled;
 		__entry->noise = info->noise;
 	),
@@ -1629,9 +1629,9 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		  "channel time extension busy: %llu, channel time rx: %llu, "
 		  "channel time tx: %llu, filled: %u, noise: %d",
 		  WIPHY_PR_ARG, __entry->ret, CHAN_PR_ARG,
-		  __entry->channel_time, __entry->channel_time_busy,
-		  __entry->channel_time_ext_busy, __entry->channel_time_rx,
-		  __entry->channel_time_tx, __entry->filled, __entry->noise)
+		  __entry->time, __entry->time_busy,
+		  __entry->time_ext_busy, __entry->time_rx,
+		  __entry->time_tx, __entry->filled, __entry->noise)
 );
 
 TRACE_EVENT(rdev_tdls_oper,
-- 
cgit v1.2.3-71-gd317


From 11f78ac32b06648c1dde9371b70323168b51a83e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 14 Nov 2014 16:43:50 +0100
Subject: cfg80211: allow survey data to return global data

Not all devices are able to report survey data (particularly
time spent for various operations) per channel. As all these
statistics already exist in survey data, allow such devices
to report them (if userspace requested it)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  3 ++-
 include/uapi/linux/nl80211.h | 16 +++++++++++++---
 net/wireless/nl80211.c       | 31 ++++++++++++++++++-------------
 3 files changed, 33 insertions(+), 17 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 3b489f8fc4cd..5a861440c122 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -542,7 +542,8 @@ enum survey_info_flags {
 /**
  * struct survey_info - channel survey response
  *
- * @channel: the channel this survey record reports, mandatory
+ * @channel: the channel this survey record reports, may be %NULL for a single
+ *	record to report global statistics
  * @filled: bitflag of flags from &enum survey_info_flags
  * @noise: channel noise in dBm. This and all following fields are
  *	optional
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1a5acc80ab88..5e8b65f239a5 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1727,6 +1727,14 @@ enum nl80211_commands {
  *	is located at bit 0 of byte 0. bit index 25 would be located at bit 1
  *	of byte 3 (u8 array).
  *
+ * @NL80211_ATTR_SURVEY_RADIO_STATS: Request overall radio statistics to be
+ *	returned along with other survey data. If set, @NL80211_CMD_GET_SURVEY
+ *	may return a survey entry without a channel indicating global radio
+ *	statistics (only some values are valid and make sense.)
+ *	For devices that don't return such an entry even then, the information
+ *	should be contained in the result as the sum of the respective counters
+ *	over all channels.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2088,6 +2096,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_EXT_FEATURES,
 
+	NL80211_ATTR_SURVEY_RADIO_STATS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2816,15 +2826,15 @@ enum nl80211_user_reg_hint_type {
  * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm)
  * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used
  * @NL80211_SURVEY_INFO_TIME: amount of time (in ms) that the radio
- *	spent on this channel
+ *	was turned on (on channel or globally)
  * @NL80211_SURVEY_INFO_TIME_BUSY: amount of the time the primary
  *	channel was sensed busy (either due to activity or energy detect)
  * @NL80211_SURVEY_INFO_TIME_EXT_BUSY: amount of time the extension
  *	channel was sensed busy
  * @NL80211_SURVEY_INFO_TIME_RX: amount of time the radio spent
- *	receiving data
+ *	receiving data (on channel or globally)
  * @NL80211_SURVEY_INFO_TIME_TX: amount of time the radio spent
- *	transmitting data
+ *	transmitting data (on channel or globally)
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
  * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 94ab2014fefe..9555ef9fd99e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6613,12 +6613,17 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
 }
 
 static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
-				int flags, struct net_device *dev,
-				struct survey_info *survey)
+			       int flags, struct net_device *dev,
+			       bool allow_radio_stats,
+			       struct survey_info *survey)
 {
 	void *hdr;
 	struct nlattr *infoattr;
 
+	/* skip radio stats if userspace didn't request them */
+	if (!survey->channel && !allow_radio_stats)
+		return 0;
+
 	hdr = nl80211hdr_put(msg, portid, seq, flags,
 			     NL80211_CMD_NEW_SURVEY_RESULTS);
 	if (!hdr)
@@ -6631,7 +6636,8 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 	if (!infoattr)
 		goto nla_put_failure;
 
-	if (nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY,
+	if (survey->channel &&
+	    nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY,
 			survey->channel->center_freq))
 		goto nla_put_failure;
 
@@ -6671,19 +6677,22 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 	return -EMSGSIZE;
 }
 
-static int nl80211_dump_survey(struct sk_buff *skb,
-			struct netlink_callback *cb)
+static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct survey_info survey;
 	struct cfg80211_registered_device *rdev;
 	struct wireless_dev *wdev;
 	int survey_idx = cb->args[2];
 	int res;
+	bool radio_stats;
 
 	res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (res)
 		return res;
 
+	/* prepare_wdev_dump parsed the attributes */
+	radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
+
 	if (!wdev->netdev) {
 		res = -EINVAL;
 		goto out_err;
@@ -6701,13 +6710,9 @@ static int nl80211_dump_survey(struct sk_buff *skb,
 		if (res)
 			goto out_err;
 
-		/* Survey without a channel doesn't make sense */
-		if (!survey.channel) {
-			res = -EINVAL;
-			goto out;
-		}
-
-		if (survey.channel->flags & IEEE80211_CHAN_DISABLED) {
+		/* don't send disabled channels, but do send non-channel data */
+		if (survey.channel &&
+		    survey.channel->flags & IEEE80211_CHAN_DISABLED) {
 			survey_idx++;
 			continue;
 		}
@@ -6715,7 +6720,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
 		if (nl80211_send_survey(skb,
 				NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				wdev->netdev, &survey) < 0)
+				wdev->netdev, radio_stats, &survey) < 0)
 			goto out;
 		survey_idx++;
 	}
-- 
cgit v1.2.3-71-gd317


From 052536abfa9144566599a7fbe8cc89e1086fa9a7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 14 Nov 2014 16:44:11 +0100
Subject: cfg80211: add scan time to survey data

Add the time spent scanning to the survey data so it can be
reported by drivers that collect such information.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 4 ++++
 include/uapi/linux/nl80211.h | 3 +++
 net/wireless/nl80211.c       | 4 ++++
 net/wireless/trace.h         | 7 +++++--
 4 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5a861440c122..f94f0d486d13 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -525,6 +525,7 @@ ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef)
  * @SURVEY_INFO_TIME_EXT_BUSY: extension channel busy time was filled in
  * @SURVEY_INFO_TIME_RX: receive time was filled in
  * @SURVEY_INFO_TIME_TX: transmit time was filled in
+ * @SURVEY_INFO_TIME_SCAN: scan time was filled in
  *
  * Used by the driver to indicate which info in &struct survey_info
  * it has filled in during the get_survey().
@@ -537,6 +538,7 @@ enum survey_info_flags {
 	SURVEY_INFO_TIME_EXT_BUSY	= BIT(4),
 	SURVEY_INFO_TIME_RX		= BIT(5),
 	SURVEY_INFO_TIME_TX		= BIT(6),
+	SURVEY_INFO_TIME_SCAN		= BIT(7),
 };
 
 /**
@@ -552,6 +554,7 @@ enum survey_info_flags {
  * @time_ext_busy: amount of time the extension channel was sensed busy
  * @time_rx: amount of time the radio spent receiving data
  * @time_tx: amount of time the radio spent transmitting data
+ * @time_scan: amount of time the radio spent for scanning
  *
  * Used by dump_survey() to report back per-channel survey information.
  *
@@ -565,6 +568,7 @@ struct survey_info {
 	u64 time_ext_busy;
 	u64 time_rx;
 	u64 time_tx;
+	u64 time_scan;
 	u32 filled;
 	s8 noise;
 };
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 5e8b65f239a5..2f549a253138 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2835,6 +2835,8 @@ enum nl80211_user_reg_hint_type {
  *	receiving data (on channel or globally)
  * @NL80211_SURVEY_INFO_TIME_TX: amount of time the radio spent
  *	transmitting data (on channel or globally)
+ * @NL80211_SURVEY_INFO_TIME_SCAN: time the radio spent for scan
+ *	(on this channel or globally)
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
  * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
@@ -2849,6 +2851,7 @@ enum nl80211_survey_info {
 	NL80211_SURVEY_INFO_TIME_EXT_BUSY,
 	NL80211_SURVEY_INFO_TIME_RX,
 	NL80211_SURVEY_INFO_TIME_TX,
+	NL80211_SURVEY_INFO_TIME_SCAN,
 
 	/* keep last */
 	__NL80211_SURVEY_INFO_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9555ef9fd99e..f56309bd21bd 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6667,6 +6667,10 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_TX,
 			survey->time_tx))
 		goto nla_put_failure;
+	if ((survey->filled & SURVEY_INFO_TIME_SCAN) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_SCAN,
+			survey->time_scan))
+		goto nla_put_failure;
 
 	nla_nest_end(msg, infoattr);
 
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index bbb7afc264af..b17b3692f8c2 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1609,6 +1609,7 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		__field(u64, time_ext_busy)
 		__field(u64, time_rx)
 		__field(u64, time_tx)
+		__field(u64, time_scan)
 		__field(u32, filled)
 		__field(s8, noise)
 	),
@@ -1621,17 +1622,19 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		__entry->time_ext_busy = info->time_ext_busy;
 		__entry->time_rx = info->time_rx;
 		__entry->time_tx = info->time_tx;
+		__entry->time_scan = info->time_scan;
 		__entry->filled = info->filled;
 		__entry->noise = info->noise;
 	),
 	TP_printk(WIPHY_PR_FMT ", returned: %d, " CHAN_PR_FMT
 		  ", channel time: %llu, channel time busy: %llu, "
 		  "channel time extension busy: %llu, channel time rx: %llu, "
-		  "channel time tx: %llu, filled: %u, noise: %d",
+		  "channel time tx: %llu, scan time: %llu, filled: %u, noise: %d",
 		  WIPHY_PR_ARG, __entry->ret, CHAN_PR_ARG,
 		  __entry->time, __entry->time_busy,
 		  __entry->time_ext_busy, __entry->time_rx,
-		  __entry->time_tx, __entry->filled, __entry->noise)
+		  __entry->time_tx, __entry->time_scan,
+		  __entry->filled, __entry->noise)
 );
 
 TRACE_EVENT(rdev_tdls_oper,
-- 
cgit v1.2.3-71-gd317


From 319090bf6c75e3ad42a8c74973be5e78ae4f948f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 Nov 2014 14:08:11 +0100
Subject: cfg80211: remove enum station_info_flags

This is really just duplicating the list of information that's
already available in the nl80211 attribute, so remove the list.
Two small changes are needed:
 * remove STATION_INFO_ASSOC_REQ_IES complete, but the length
   (assoc_req_ies_len) can be used instead
 * add NL80211_STA_INFO_RX_DROP_MISC which exists internally
   but not in nl80211 yet

This gets rid of the duplicate maintenance of the two lists.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c         |  14 +--
 drivers/net/wireless/ath/ath6kl/main.c             |   1 -
 drivers/net/wireless/ath/wil6210/cfg80211.c        |  18 +--
 drivers/net/wireless/ath/wil6210/wmi.c             |   1 -
 drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c |  11 +-
 drivers/net/wireless/libertas/cfg.c                |  12 +-
 drivers/net/wireless/mwifiex/cfg80211.c            |  10 +-
 drivers/net/wireless/mwifiex/uap_event.c           |   1 -
 drivers/net/wireless/rndis_wlan.c                  |   4 +-
 drivers/net/wireless/ti/wlcore/main.c              |   2 +-
 drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c  |   9 +-
 drivers/staging/wlan-ng/cfg80211.c                 |   4 +-
 include/net/cfg80211.h                             |  77 +-----------
 include/uapi/linux/nl80211.h                       |   3 +
 net/mac80211/ethtool.c                             |   6 +-
 net/mac80211/sta_info.c                            |  80 ++++++-------
 net/wireless/nl80211.c                             | 133 ++++++++-------------
 net/wireless/wext-compat.c                         |  10 +-
 18 files changed, 142 insertions(+), 254 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 7a5337877a0c..44dd6ef923cd 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1799,20 +1799,20 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
 
 	if (vif->target_stats.rx_byte) {
 		sinfo->rx_bytes = vif->target_stats.rx_byte;
-		sinfo->filled |= STATION_INFO_RX_BYTES64;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
 		sinfo->rx_packets = vif->target_stats.rx_pkt;
-		sinfo->filled |= STATION_INFO_RX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
 	}
 
 	if (vif->target_stats.tx_byte) {
 		sinfo->tx_bytes = vif->target_stats.tx_byte;
-		sinfo->filled |= STATION_INFO_TX_BYTES64;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64);
 		sinfo->tx_packets = vif->target_stats.tx_pkt;
-		sinfo->filled |= STATION_INFO_TX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
 	}
 
 	sinfo->signal = vif->target_stats.cs_rssi;
-	sinfo->filled |= STATION_INFO_SIGNAL;
+	sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 
 	rate = vif->target_stats.tx_ucast_rate;
 
@@ -1844,12 +1844,12 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
 		return 0;
 	}
 
-	sinfo->filled |= STATION_INFO_TX_BITRATE;
+	sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 
 	if (test_bit(CONNECTED, &vif->flags) &&
 	    test_bit(DTIM_PERIOD_AVAIL, &vif->flags) &&
 	    vif->nw_type == INFRA_NETWORK) {
-		sinfo->filled |= STATION_INFO_BSS_PARAM;
+		sinfo->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
 		sinfo->bss_param.flags = 0;
 		sinfo->bss_param.dtim_period = vif->assoc_bss_dtim_period;
 		sinfo->bss_param.beacon_interval = vif->assoc_bss_beacon_int;
diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c
index 933aef025698..b42ba46b5030 100644
--- a/drivers/net/wireless/ath/ath6kl/main.c
+++ b/drivers/net/wireless/ath/ath6kl/main.c
@@ -488,7 +488,6 @@ void ath6kl_connect_ap_mode_sta(struct ath6kl_vif *vif, u16 aid, u8 *mac_addr,
 
 	sinfo.assoc_req_ies = ies;
 	sinfo.assoc_req_ies_len = ies_len;
-	sinfo.filled |= STATION_INFO_ASSOC_REQ_IES;
 
 	cfg80211_new_sta(vif->ndev, mac_addr, &sinfo, GFP_KERNEL);
 
diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 38332a6dfb3a..e72a95d1ced6 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -142,14 +142,14 @@ int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
 
 	sinfo->generation = wil->sinfo_gen;
 
-	sinfo->filled = STATION_INFO_RX_BYTES |
-			STATION_INFO_TX_BYTES |
-			STATION_INFO_RX_PACKETS |
-			STATION_INFO_TX_PACKETS |
-			STATION_INFO_RX_BITRATE |
-			STATION_INFO_TX_BITRATE |
-			STATION_INFO_RX_DROP_MISC |
-			STATION_INFO_TX_FAILED;
+	sinfo->filled = BIT(NL80211_STA_INFO_RX_BYTES) |
+			BIT(NL80211_STA_INFO_TX_BYTES) |
+			BIT(NL80211_STA_INFO_RX_PACKETS) |
+			BIT(NL80211_STA_INFO_TX_PACKETS) |
+			BIT(NL80211_STA_INFO_RX_BITRATE) |
+			BIT(NL80211_STA_INFO_TX_BITRATE) |
+			BIT(NL80211_STA_INFO_RX_DROP_MISC) |
+			BIT(NL80211_STA_INFO_TX_FAILED);
 
 	sinfo->txrate.flags = RATE_INFO_FLAGS_MCS | RATE_INFO_FLAGS_60G;
 	sinfo->txrate.mcs = le16_to_cpu(reply.evt.bf_mcs);
@@ -163,7 +163,7 @@ int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
 	sinfo->tx_failed = stats->tx_errors;
 
 	if (test_bit(wil_status_fwconnected, &wil->status)) {
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 		sinfo->signal = reply.evt.sqi;
 	}
 
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 63476c86cd0e..899754920955 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -457,7 +457,6 @@ static void wmi_evt_connect(struct wil6210_priv *wil, int id, void *d, int len)
 		if (assoc_req_ie) {
 			sinfo.assoc_req_ies = assoc_req_ie;
 			sinfo.assoc_req_ies_len = assoc_req_ielen;
-			sinfo.filled |= STATION_INFO_ASSOC_REQ_IES;
 		}
 
 		cfg80211_new_sta(ndev, evt->bssid, &sinfo, GFP_KERNEL);
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c
index 3aecc5f48719..4a88b2381a68 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c
@@ -2333,10 +2333,10 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 			brcmf_err("GET STA INFO failed, %d\n", err);
 			goto done;
 		}
-		sinfo->filled = STATION_INFO_INACTIVE_TIME;
+		sinfo->filled = BIT(NL80211_STA_INFO_INACTIVE_TIME);
 		sinfo->inactive_time = le32_to_cpu(sta_info_le.idle) * 1000;
 		if (le32_to_cpu(sta_info_le.flags) & BRCMF_STA_ASSOC) {
-			sinfo->filled |= STATION_INFO_CONNECTED_TIME;
+			sinfo->filled |= BIT(NL80211_STA_INFO_CONNECTED_TIME);
 			sinfo->connected_time = le32_to_cpu(sta_info_le.in);
 		}
 		brcmf_dbg(TRACE, "STA idle time : %d ms, connected time :%d sec\n",
@@ -2354,7 +2354,7 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 			brcmf_err("Could not get rate (%d)\n", err);
 			goto done;
 		} else {
-			sinfo->filled |= STATION_INFO_TX_BITRATE;
+			sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 			sinfo->txrate.legacy = rate * 5;
 			brcmf_dbg(CONN, "Rate %d Mbps\n", rate / 2);
 		}
@@ -2369,7 +2369,7 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 				goto done;
 			} else {
 				rssi = le32_to_cpu(scb_val.val);
-				sinfo->filled |= STATION_INFO_SIGNAL;
+				sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 				sinfo->signal = rssi;
 				brcmf_dbg(CONN, "RSSI %d dBm\n", rssi);
 			}
@@ -2396,7 +2396,7 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 				brcmf_dbg(CONN, "DTIM peroid %d\n",
 					  dtim_period);
 			}
-			sinfo->filled |= STATION_INFO_BSS_PARAM;
+			sinfo->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
 		}
 	} else
 		err = -EPERM;
@@ -4778,7 +4778,6 @@ brcmf_notify_connect_status_ap(struct brcmf_cfg80211_info *cfg,
 	if (((event == BRCMF_E_ASSOC_IND) || (event == BRCMF_E_REASSOC_IND)) &&
 	    (reason == BRCMF_E_STATUS_SUCCESS)) {
 		memset(&sinfo, 0, sizeof(sinfo));
-		sinfo.filled = STATION_INFO_ASSOC_REQ_IES;
 		if (!data) {
 			brcmf_err("No IEs present in ASSOC/REASSOC_IND");
 			return -EINVAL;
diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index 34f09ef90bb3..a92985a6ea21 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c
@@ -1616,10 +1616,10 @@ static int lbs_cfg_get_station(struct wiphy *wiphy, struct net_device *dev,
 
 	lbs_deb_enter(LBS_DEB_CFG80211);
 
-	sinfo->filled |= STATION_INFO_TX_BYTES |
-			 STATION_INFO_TX_PACKETS |
-			 STATION_INFO_RX_BYTES |
-			 STATION_INFO_RX_PACKETS;
+	sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES) |
+			 BIT(NL80211_STA_INFO_TX_PACKETS) |
+			 BIT(NL80211_STA_INFO_RX_BYTES) |
+			 BIT(NL80211_STA_INFO_RX_PACKETS);
 	sinfo->tx_bytes = priv->dev->stats.tx_bytes;
 	sinfo->tx_packets = priv->dev->stats.tx_packets;
 	sinfo->rx_bytes = priv->dev->stats.rx_bytes;
@@ -1629,14 +1629,14 @@ static int lbs_cfg_get_station(struct wiphy *wiphy, struct net_device *dev,
 	ret = lbs_get_rssi(priv, &signal, &noise);
 	if (ret == 0) {
 		sinfo->signal = signal;
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 	}
 
 	/* Convert priv->cur_rate from hw_value to NL80211 value */
 	for (i = 0; i < ARRAY_SIZE(lbs_rates); i++) {
 		if (priv->cur_rate == lbs_rates[i].hw_value) {
 			sinfo->txrate.legacy = lbs_rates[i].bitrate;
-			sinfo->filled |= STATION_INFO_TX_BITRATE;
+			sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 			break;
 		}
 	}
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 8bd446b69658..71312ff52703 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -910,10 +910,10 @@ mwifiex_dump_station_info(struct mwifiex_private *priv,
 {
 	u32 rate;
 
-	sinfo->filled = STATION_INFO_RX_BYTES | STATION_INFO_TX_BYTES |
-			STATION_INFO_RX_PACKETS | STATION_INFO_TX_PACKETS |
-			STATION_INFO_TX_BITRATE |
-			STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG;
+	sinfo->filled = BIT(NL80211_STA_INFO_RX_BYTES) | BIT(NL80211_STA_INFO_TX_BYTES) |
+			BIT(NL80211_STA_INFO_RX_PACKETS) | BIT(NL80211_STA_INFO_TX_PACKETS) |
+			BIT(NL80211_STA_INFO_TX_BITRATE) |
+			BIT(NL80211_STA_INFO_SIGNAL) | BIT(NL80211_STA_INFO_SIGNAL_AVG);
 
 	/* Get signal information from the firmware */
 	if (mwifiex_send_cmd(priv, HostCmd_CMD_RSSI_INFO,
@@ -944,7 +944,7 @@ mwifiex_dump_station_info(struct mwifiex_private *priv,
 	sinfo->txrate.legacy = rate * 5;
 
 	if (priv->bss_mode == NL80211_IFTYPE_STATION) {
-		sinfo->filled |= STATION_INFO_BSS_PARAM;
+		sinfo->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
 		sinfo->bss_param.flags = 0;
 		if (priv->curr_bss_params.bss_descriptor.cap_info_bitmap &
 						WLAN_CAPABILITY_SHORT_PREAMBLE)
diff --git a/drivers/net/wireless/mwifiex/uap_event.c b/drivers/net/wireless/mwifiex/uap_event.c
index c54a537e31fb..3b3a970e2086 100644
--- a/drivers/net/wireless/mwifiex/uap_event.c
+++ b/drivers/net/wireless/mwifiex/uap_event.c
@@ -68,7 +68,6 @@ int mwifiex_process_uap_event(struct mwifiex_private *priv)
 				len = ETH_ALEN;
 
 			if (len != -1) {
-				sinfo.filled = STATION_INFO_ASSOC_REQ_IES;
 				sinfo.assoc_req_ies = &event->data[len];
 				len = (u8 *)sinfo.assoc_req_ies -
 				      (u8 *)&event->frame_control;
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 1a4facd1fbf3..60d44ce9c017 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -2478,7 +2478,7 @@ static void rndis_fill_station_info(struct usbnet *usbdev,
 	ret = rndis_query_oid(usbdev, RNDIS_OID_GEN_LINK_SPEED, &linkspeed, &len);
 	if (ret == 0) {
 		sinfo->txrate.legacy = le32_to_cpu(linkspeed) / 1000;
-		sinfo->filled |= STATION_INFO_TX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 	}
 
 	len = sizeof(rssi);
@@ -2486,7 +2486,7 @@ static void rndis_fill_station_info(struct usbnet *usbdev,
 			      &rssi, &len);
 	if (ret == 0) {
 		sinfo->signal = level_to_qual(le32_to_cpu(rssi));
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 	}
 }
 
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 8d11b0ca412c..a2133b1fd631 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5401,7 +5401,7 @@ static void wlcore_op_sta_statistics(struct ieee80211_hw *hw,
 	if (ret < 0)
 		goto out_sleep;
 
-	sinfo->filled |= STATION_INFO_SIGNAL;
+	sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 	sinfo->signal = rssi_dbm;
 
 out_sleep:
diff --git a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
index 3d26955da724..c76874d72a22 100644
--- a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
@@ -1092,17 +1092,17 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy,
 			goto exit;
 		}
 
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 		sinfo->signal = translate_percentage_to_dbm(padapter->recvpriv.
 							    signal_strength);
 
-		sinfo->filled |= STATION_INFO_TX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 		sinfo->txrate.legacy = rtw_get_cur_max_rate(padapter);
 
-		sinfo->filled |= STATION_INFO_RX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
 		sinfo->rx_packets = sta_rx_data_pkts(psta);
 
-		sinfo->filled |= STATION_INFO_TX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
 		sinfo->tx_packets = psta->sta_stats.tx_pkts;
 	}
 
@@ -2365,7 +2365,6 @@ void rtw_cfg80211_indicate_sta_assoc(struct rtw_adapter *padapter,
 					     u.reassoc_req.variable);
 
 		sinfo.filled = 0;
-		sinfo.filled = STATION_INFO_ASSOC_REQ_IES;
 		sinfo.assoc_req_ies = pmgmt_frame + ie_offset;
 		sinfo.assoc_req_ies_len = frame_len - ie_offset;
 		cfg80211_new_sta(ndev, hdr->addr2, &sinfo, GFP_ATOMIC);
diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c
index 8942dcb44180..7c87aecf4744 100644
--- a/drivers/staging/wlan-ng/cfg80211.c
+++ b/drivers/staging/wlan-ng/cfg80211.c
@@ -325,9 +325,9 @@ static int prism2_get_station(struct wiphy *wiphy, struct net_device *dev,
 
 	if (result == 0) {
 		sinfo->txrate.legacy = quality.txrate.data;
-		sinfo->filled |= STATION_INFO_TX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 		sinfo->signal = quality.level.data;
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 	}
 
 	return result;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 42e3d74f1906..91c133626c32 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -865,75 +865,6 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
 				  struct station_parameters *params,
 				  enum cfg80211_station_type statype);
 
-/**
- * enum station_info_flags - station information flags
- *
- * Used by the driver to indicate which info in &struct station_info
- * it has filled in during get_station() or dump_station().
- *
- * @STATION_INFO_INACTIVE_TIME: @inactive_time filled
- * @STATION_INFO_RX_BYTES: @rx_bytes filled
- * @STATION_INFO_TX_BYTES: @tx_bytes filled
- * @STATION_INFO_RX_BYTES64: @rx_bytes filled with 64-bit value
- * @STATION_INFO_TX_BYTES64: @tx_bytes filled with 64-bit value
- * @STATION_INFO_LLID: @llid filled
- * @STATION_INFO_PLID: @plid filled
- * @STATION_INFO_PLINK_STATE: @plink_state filled
- * @STATION_INFO_SIGNAL: @signal filled
- * @STATION_INFO_TX_BITRATE: @txrate fields are filled
- *	(tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs)
- * @STATION_INFO_RX_PACKETS: @rx_packets filled with 32-bit value
- * @STATION_INFO_TX_PACKETS: @tx_packets filled with 32-bit value
- * @STATION_INFO_TX_RETRIES: @tx_retries filled
- * @STATION_INFO_TX_FAILED: @tx_failed filled
- * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled
- * @STATION_INFO_SIGNAL_AVG: @signal_avg filled
- * @STATION_INFO_RX_BITRATE: @rxrate fields are filled
- * @STATION_INFO_BSS_PARAM: @bss_param filled
- * @STATION_INFO_CONNECTED_TIME: @connected_time filled
- * @STATION_INFO_ASSOC_REQ_IES: @assoc_req_ies filled
- * @STATION_INFO_STA_FLAGS: @sta_flags filled
- * @STATION_INFO_BEACON_LOSS_COUNT: @beacon_loss_count filled
- * @STATION_INFO_T_OFFSET: @t_offset filled
- * @STATION_INFO_LOCAL_PM: @local_pm filled
- * @STATION_INFO_PEER_PM: @peer_pm filled
- * @STATION_INFO_NONPEER_PM: @nonpeer_pm filled
- * @STATION_INFO_CHAIN_SIGNAL: @chain_signal filled
- * @STATION_INFO_CHAIN_SIGNAL_AVG: @chain_signal_avg filled
- * @STATION_INFO_EXPECTED_THROUGHPUT: @expected_throughput filled
- */
-enum station_info_flags {
-	STATION_INFO_INACTIVE_TIME		= BIT(0),
-	STATION_INFO_RX_BYTES			= BIT(1),
-	STATION_INFO_TX_BYTES			= BIT(2),
-	STATION_INFO_LLID			= BIT(3),
-	STATION_INFO_PLID			= BIT(4),
-	STATION_INFO_PLINK_STATE		= BIT(5),
-	STATION_INFO_SIGNAL			= BIT(6),
-	STATION_INFO_TX_BITRATE			= BIT(7),
-	STATION_INFO_RX_PACKETS			= BIT(8),
-	STATION_INFO_TX_PACKETS			= BIT(9),
-	STATION_INFO_TX_RETRIES			= BIT(10),
-	STATION_INFO_TX_FAILED			= BIT(11),
-	STATION_INFO_RX_DROP_MISC		= BIT(12),
-	STATION_INFO_SIGNAL_AVG			= BIT(13),
-	STATION_INFO_RX_BITRATE			= BIT(14),
-	STATION_INFO_BSS_PARAM			= BIT(15),
-	STATION_INFO_CONNECTED_TIME		= BIT(16),
-	STATION_INFO_ASSOC_REQ_IES		= BIT(17),
-	STATION_INFO_STA_FLAGS			= BIT(18),
-	STATION_INFO_BEACON_LOSS_COUNT		= BIT(19),
-	STATION_INFO_T_OFFSET			= BIT(20),
-	STATION_INFO_LOCAL_PM			= BIT(21),
-	STATION_INFO_PEER_PM			= BIT(22),
-	STATION_INFO_NONPEER_PM			= BIT(23),
-	STATION_INFO_RX_BYTES64			= BIT(24),
-	STATION_INFO_TX_BYTES64			= BIT(25),
-	STATION_INFO_CHAIN_SIGNAL		= BIT(26),
-	STATION_INFO_CHAIN_SIGNAL_AVG		= BIT(27),
-	STATION_INFO_EXPECTED_THROUGHPUT	= BIT(28),
-};
-
 /**
  * enum station_info_rate_flags - bitrate info flags
  *
@@ -1015,7 +946,8 @@ struct sta_bss_parameters {
  *
  * Station information filled by driver for get_station() and dump_station.
  *
- * @filled: bitflag of flags from &enum station_info_flags
+ * @filled: bitflag of flags using the bits of &enum nl80211_sta_info to
+ *	indicate the relevant values in this struct for them
  * @connected_time: time(in secs) since a station is last connected
  * @inactive_time: time since last station activity (tx/rx) in milliseconds
  * @rx_bytes: bytes received from this station
@@ -1094,11 +1026,6 @@ struct station_info {
 	enum nl80211_mesh_power_mode nonpeer_pm;
 
 	u32 expected_throughput;
-
-	/*
-	 * Note: Add a new enum station_info_flags value for each new field and
-	 * use it to check which fields are initialized.
-	 */
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 2f549a253138..e48ca0bbd07b 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2373,6 +2373,8 @@ enum nl80211_sta_bss_param {
  *	Same format as NL80211_STA_INFO_CHAIN_SIGNAL.
  * @NL80211_STA_EXPECTED_THROUGHPUT: expected throughput considering also the
  *	802.11 header (u32, kbps)
+ * @NL80211_STA_INFO_RX_DROP_MISC: RX packets dropped for unspecified reasons
+ *	(u64)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2405,6 +2407,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_CHAIN_SIGNAL,
 	NL80211_STA_INFO_CHAIN_SIGNAL_AVG,
 	NL80211_STA_INFO_EXPECTED_THROUGHPUT,
+	NL80211_STA_INFO_RX_DROP_MISC,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index eea742710c0a..52bcea6ad9e8 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -117,16 +117,16 @@ static void ieee80211_get_stats(struct net_device *dev,
 		data[i++] = sta->sta_state;
 
 
-		if (sinfo.filled & STATION_INFO_TX_BITRATE)
+		if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE))
 			data[i] = 100000 *
 				cfg80211_calculate_bitrate(&sinfo.txrate);
 		i++;
-		if (sinfo.filled & STATION_INFO_RX_BITRATE)
+		if (sinfo.filled & BIT(NL80211_STA_INFO_RX_BITRATE))
 			data[i] = 100000 *
 				cfg80211_calculate_bitrate(&sinfo.rxrate);
 		i++;
 
-		if (sinfo.filled & STATION_INFO_SIGNAL_AVG)
+		if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))
 			data[i] = (u8)sinfo.signal_avg;
 		i++;
 	} else {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 967b42eae5c2..64b53b943d98 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1756,51 +1756,51 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	drv_sta_statistics(local, sdata, &sta->sta, sinfo);
 
-	sinfo->filled |= STATION_INFO_INACTIVE_TIME |
-			 STATION_INFO_STA_FLAGS |
-			 STATION_INFO_BSS_PARAM |
-			 STATION_INFO_CONNECTED_TIME |
-			 STATION_INFO_RX_DROP_MISC |
-			 STATION_INFO_BEACON_LOSS_COUNT;
+	sinfo->filled |= BIT(NL80211_STA_INFO_INACTIVE_TIME) |
+			 BIT(NL80211_STA_INFO_STA_FLAGS) |
+			 BIT(NL80211_STA_INFO_BSS_PARAM) |
+			 BIT(NL80211_STA_INFO_CONNECTED_TIME) |
+			 BIT(NL80211_STA_INFO_RX_DROP_MISC) |
+			 BIT(NL80211_STA_INFO_BEACON_LOSS);
 
 	ktime_get_ts(&uptime);
 	sinfo->connected_time = uptime.tv_sec - sta->last_connected;
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
 
-	if (!(sinfo->filled & (STATION_INFO_TX_BYTES64 |
-			       STATION_INFO_TX_BYTES))) {
+	if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) |
+			       BIT(NL80211_STA_INFO_TX_BYTES)))) {
 		sinfo->tx_bytes = 0;
 		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
 			sinfo->tx_bytes += sta->tx_bytes[ac];
-		sinfo->filled |= STATION_INFO_TX_BYTES64;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_TX_PACKETS)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_PACKETS))) {
 		sinfo->tx_packets = 0;
 		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
 			sinfo->tx_packets += sta->tx_packets[ac];
-		sinfo->filled |= STATION_INFO_TX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
 	}
 
-	if (!(sinfo->filled & (STATION_INFO_RX_BYTES64 |
-			       STATION_INFO_RX_BYTES))) {
+	if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) |
+			       BIT(NL80211_STA_INFO_RX_BYTES)))) {
 		sinfo->rx_bytes = sta->rx_bytes;
-		sinfo->filled |= STATION_INFO_RX_BYTES64;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_RX_PACKETS)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) {
 		sinfo->rx_packets = sta->rx_packets;
-		sinfo->filled |= STATION_INFO_RX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_TX_RETRIES)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_RETRIES))) {
 		sinfo->tx_retries = sta->tx_retry_count;
-		sinfo->filled |= STATION_INFO_TX_RETRIES;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_RETRIES);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_TX_FAILED)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_FAILED))) {
 		sinfo->tx_failed = sta->tx_retry_failed;
-		sinfo->filled |= STATION_INFO_TX_FAILED;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED);
 	}
 
 	sinfo->rx_dropped_misc = sta->rx_dropped;
@@ -1808,22 +1808,22 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
-		if (!(sinfo->filled & STATION_INFO_SIGNAL)) {
+		if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) {
 			sinfo->signal = (s8)sta->last_signal;
-			sinfo->filled |= STATION_INFO_SIGNAL;
+			sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 		}
 
-		if (!(sinfo->filled & STATION_INFO_SIGNAL_AVG)) {
+		if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) {
 			sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
-			sinfo->filled |= STATION_INFO_SIGNAL_AVG;
+			sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
 		}
 	}
 
 	if (sta->chains &&
-	    !(sinfo->filled & (STATION_INFO_CHAIN_SIGNAL |
-			       STATION_INFO_CHAIN_SIGNAL_AVG))) {
-		sinfo->filled |= STATION_INFO_CHAIN_SIGNAL |
-				 STATION_INFO_CHAIN_SIGNAL_AVG;
+	    !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
+			       BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
+		sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
+				 BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
 
 		sinfo->chains = sta->chains;
 		for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) {
@@ -1833,30 +1833,30 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		}
 	}
 
-	if (!(sinfo->filled & STATION_INFO_TX_BITRATE)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE))) {
 		sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate);
-		sinfo->filled |= STATION_INFO_TX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_RX_BITRATE)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE))) {
 		sta_set_rate_info_rx(sta, &sinfo->rxrate);
-		sinfo->filled |= STATION_INFO_RX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE);
 	}
 
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
 #ifdef CONFIG_MAC80211_MESH
-		sinfo->filled |= STATION_INFO_LLID |
-				 STATION_INFO_PLID |
-				 STATION_INFO_PLINK_STATE |
-				 STATION_INFO_LOCAL_PM |
-				 STATION_INFO_PEER_PM |
-				 STATION_INFO_NONPEER_PM;
+		sinfo->filled |= BIT(NL80211_STA_INFO_LLID) |
+				 BIT(NL80211_STA_INFO_PLID) |
+				 BIT(NL80211_STA_INFO_PLINK_STATE) |
+				 BIT(NL80211_STA_INFO_LOCAL_PM) |
+				 BIT(NL80211_STA_INFO_PEER_PM) |
+				 BIT(NL80211_STA_INFO_NONPEER_PM);
 
 		sinfo->llid = sta->llid;
 		sinfo->plid = sta->plid;
 		sinfo->plink_state = sta->plink_state;
 		if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) {
-			sinfo->filled |= STATION_INFO_T_OFFSET;
+			sinfo->filled |= BIT(NL80211_STA_INFO_T_OFFSET);
 			sinfo->t_offset = sta->t_offset;
 		}
 		sinfo->local_pm = sta->local_pm;
@@ -1905,7 +1905,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		thr = drv_get_expected_throughput(local, &sta->sta);
 
 	if (thr != 0) {
-		sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT;
+		sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
 		sinfo->expected_throughput = thr;
 	}
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a75dc91976d3..68faf8a2aa43 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3671,115 +3671,77 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 	sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO);
 	if (!sinfoattr)
 		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_CONNECTED_TIME) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_CONNECTED_TIME,
-			sinfo->connected_time))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_INACTIVE_TIME) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_INACTIVE_TIME,
-			sinfo->inactive_time))
-		goto nla_put_failure;
-	if ((sinfo->filled & (STATION_INFO_RX_BYTES |
-			      STATION_INFO_RX_BYTES64)) &&
+
+#define PUT_SINFO(attr, memb, type) do {				\
+	if (sinfo->filled & BIT(NL80211_STA_INFO_ ## attr) &&		\
+	    nla_put_ ## type(msg, NL80211_STA_INFO_ ## attr,		\
+			     sinfo->memb))				\
+		goto nla_put_failure;					\
+	} while (0)
+
+	PUT_SINFO(CONNECTED_TIME, connected_time, u32);
+	PUT_SINFO(INACTIVE_TIME, inactive_time, u32);
+
+	if (sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES) |
+			     BIT(NL80211_STA_INFO_RX_BYTES64)) &&
 	    nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES,
 			(u32)sinfo->rx_bytes))
 		goto nla_put_failure;
-	if ((sinfo->filled & (STATION_INFO_TX_BYTES |
-			      STATION_INFO_TX_BYTES64)) &&
+
+	if (sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES) |
+			     BIT(NL80211_STA_INFO_TX_BYTES64)) &&
 	    nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES,
 			(u32)sinfo->tx_bytes))
 		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_RX_BYTES64) &&
-	    nla_put_u64(msg, NL80211_STA_INFO_RX_BYTES64,
-			sinfo->rx_bytes))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_TX_BYTES64) &&
-	    nla_put_u64(msg, NL80211_STA_INFO_TX_BYTES64,
-			sinfo->tx_bytes))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_LLID) &&
-	    nla_put_u16(msg, NL80211_STA_INFO_LLID, sinfo->llid))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_PLID) &&
-	    nla_put_u16(msg, NL80211_STA_INFO_PLID, sinfo->plid))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_PLINK_STATE) &&
-	    nla_put_u8(msg, NL80211_STA_INFO_PLINK_STATE,
-		       sinfo->plink_state))
-		goto nla_put_failure;
+
+	PUT_SINFO(RX_BYTES64, rx_bytes, u64);
+	PUT_SINFO(TX_BYTES64, tx_bytes, u64);
+	PUT_SINFO(LLID, llid, u16);
+	PUT_SINFO(PLID, plid, u16);
+	PUT_SINFO(PLINK_STATE, plink_state, u8);
+
 	switch (rdev->wiphy.signal_type) {
 	case CFG80211_SIGNAL_TYPE_MBM:
-		if ((sinfo->filled & STATION_INFO_SIGNAL) &&
-		    nla_put_u8(msg, NL80211_STA_INFO_SIGNAL,
-			       sinfo->signal))
-			goto nla_put_failure;
-		if ((sinfo->filled & STATION_INFO_SIGNAL_AVG) &&
-		    nla_put_u8(msg, NL80211_STA_INFO_SIGNAL_AVG,
-			       sinfo->signal_avg))
-			goto nla_put_failure;
+		PUT_SINFO(SIGNAL, signal, u8);
+		PUT_SINFO(SIGNAL_AVG, signal_avg, u8);
 		break;
 	default:
 		break;
 	}
-	if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL) {
+	if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL)) {
 		if (!nl80211_put_signal(msg, sinfo->chains,
 					sinfo->chain_signal,
 					NL80211_STA_INFO_CHAIN_SIGNAL))
 			goto nla_put_failure;
 	}
-	if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL_AVG) {
+	if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) {
 		if (!nl80211_put_signal(msg, sinfo->chains,
 					sinfo->chain_signal_avg,
 					NL80211_STA_INFO_CHAIN_SIGNAL_AVG))
 			goto nla_put_failure;
 	}
-	if (sinfo->filled & STATION_INFO_TX_BITRATE) {
+	if (sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE)) {
 		if (!nl80211_put_sta_rate(msg, &sinfo->txrate,
 					  NL80211_STA_INFO_TX_BITRATE))
 			goto nla_put_failure;
 	}
-	if (sinfo->filled & STATION_INFO_RX_BITRATE) {
+	if (sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE)) {
 		if (!nl80211_put_sta_rate(msg, &sinfo->rxrate,
 					  NL80211_STA_INFO_RX_BITRATE))
 			goto nla_put_failure;
 	}
-	if ((sinfo->filled & STATION_INFO_RX_PACKETS) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_RX_PACKETS,
-			sinfo->rx_packets))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_TX_PACKETS) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_TX_PACKETS,
-			sinfo->tx_packets))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_TX_RETRIES) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_TX_RETRIES,
-			sinfo->tx_retries))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_TX_FAILED) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_TX_FAILED,
-			sinfo->tx_failed))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_EXPECTED_THROUGHPUT) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_EXPECTED_THROUGHPUT,
-			sinfo->expected_throughput))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS,
-			sinfo->beacon_loss_count))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_LOCAL_PM) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_LOCAL_PM,
-			sinfo->local_pm))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_PEER_PM) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_PEER_PM,
-			sinfo->peer_pm))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_NONPEER_PM) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_NONPEER_PM,
-			sinfo->nonpeer_pm))
-		goto nla_put_failure;
-	if (sinfo->filled & STATION_INFO_BSS_PARAM) {
+
+	PUT_SINFO(RX_PACKETS, rx_packets, u32);
+	PUT_SINFO(TX_PACKETS, tx_packets, u32);
+	PUT_SINFO(TX_RETRIES, tx_retries, u32);
+	PUT_SINFO(TX_FAILED, tx_failed, u32);
+	PUT_SINFO(EXPECTED_THROUGHPUT, expected_throughput, u32);
+	PUT_SINFO(BEACON_LOSS, beacon_loss_count, u32);
+	PUT_SINFO(LOCAL_PM, local_pm, u32);
+	PUT_SINFO(PEER_PM, peer_pm, u32);
+	PUT_SINFO(NONPEER_PM, nonpeer_pm, u32);
+
+	if (sinfo->filled & BIT(NL80211_STA_INFO_BSS_PARAM)) {
 		bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM);
 		if (!bss_param)
 			goto nla_put_failure;
@@ -3798,18 +3760,19 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 
 		nla_nest_end(msg, bss_param);
 	}
-	if ((sinfo->filled & STATION_INFO_STA_FLAGS) &&
+	if ((sinfo->filled & BIT(NL80211_STA_INFO_STA_FLAGS)) &&
 	    nla_put(msg, NL80211_STA_INFO_STA_FLAGS,
 		    sizeof(struct nl80211_sta_flag_update),
 		    &sinfo->sta_flags))
 		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_T_OFFSET) &&
-		nla_put_u64(msg, NL80211_STA_INFO_T_OFFSET,
-			    sinfo->t_offset))
-		goto nla_put_failure;
+
+	PUT_SINFO(T_OFFSET, t_offset, u64);
+	PUT_SINFO(RX_DROP_MISC, rx_dropped_misc, u64);
+
+#undef PUT_SINFO
 	nla_nest_end(msg, sinfoattr);
 
-	if ((sinfo->filled & STATION_INFO_ASSOC_REQ_IES) &&
+	if (sinfo->assoc_req_ies_len &&
 	    nla_put(msg, NL80211_ATTR_IE, sinfo->assoc_req_ies_len,
 		    sinfo->assoc_req_ies))
 		goto nla_put_failure;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 0f47948c572f..5b24d39d7903 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1300,7 +1300,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
 	if (err)
 		return err;
 
-	if (!(sinfo.filled & STATION_INFO_TX_BITRATE))
+	if (!(sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)))
 		return -EOPNOTSUPP;
 
 	rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate);
@@ -1340,7 +1340,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 
 	switch (rdev->wiphy.signal_type) {
 	case CFG80211_SIGNAL_TYPE_MBM:
-		if (sinfo.filled & STATION_INFO_SIGNAL) {
+		if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) {
 			int sig = sinfo.signal;
 			wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED;
 			wstats.qual.updated |= IW_QUAL_QUAL_UPDATED;
@@ -1354,7 +1354,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 			break;
 		}
 	case CFG80211_SIGNAL_TYPE_UNSPEC:
-		if (sinfo.filled & STATION_INFO_SIGNAL) {
+		if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) {
 			wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED;
 			wstats.qual.updated |= IW_QUAL_QUAL_UPDATED;
 			wstats.qual.level = sinfo.signal;
@@ -1367,9 +1367,9 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 	}
 
 	wstats.qual.updated |= IW_QUAL_NOISE_INVALID;
-	if (sinfo.filled & STATION_INFO_RX_DROP_MISC)
+	if (sinfo.filled & BIT(NL80211_STA_INFO_RX_DROP_MISC))
 		wstats.discard.misc = sinfo.rx_dropped_misc;
-	if (sinfo.filled & STATION_INFO_TX_FAILED)
+	if (sinfo.filled & BIT(NL80211_STA_INFO_TX_FAILED))
 		wstats.discard.retries = sinfo.tx_failed;
 
 	return &wstats;
-- 
cgit v1.2.3-71-gd317


From a76b1942a10293a94edf3c93c23a6231b63532f5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 Nov 2014 14:12:22 +0100
Subject: cfg80211: add nl80211 beacon-only statistics

Add these two values:
 * BEACON_RX: number of beacons received from this peer
 * BEACON_SIGNAL_AVG: signal strength average for beacons only

These can then be used for Android Lollipop's statistics request.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 6 ++++++
 include/uapi/linux/nl80211.h | 5 +++++
 net/wireless/nl80211.c       | 2 ++
 3 files changed, 13 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 91c133626c32..ef26ce16b058 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -987,6 +987,9 @@ struct sta_bss_parameters {
  * @nonpeer_pm: non-peer mesh STA power save mode
  * @expected_throughput: expected throughput in kbps (including 802.11 headers)
  *	towards this station.
+ * @rx_beacon: number of beacons received from this peer
+ * @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received
+ *	from this peer
  */
 struct station_info {
 	u32 filled;
@@ -1026,6 +1029,9 @@ struct station_info {
 	enum nl80211_mesh_power_mode nonpeer_pm;
 
 	u32 expected_throughput;
+
+	u64 rx_beacon;
+	u8 rx_beacon_signal_avg;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index e48ca0bbd07b..0c3d341c0aeb 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2375,6 +2375,9 @@ enum nl80211_sta_bss_param {
  *	802.11 header (u32, kbps)
  * @NL80211_STA_INFO_RX_DROP_MISC: RX packets dropped for unspecified reasons
  *	(u64)
+ * @NL80211_STA_INFO_BEACON_RX: number of beacons received from this peer (u64)
+ * @NL80211_STA_INFO_BEACON_SIGNAL_AVG: signal strength average
+ *	for beacons only (u8, dBm)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2408,6 +2411,8 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_CHAIN_SIGNAL_AVG,
 	NL80211_STA_INFO_EXPECTED_THROUGHPUT,
 	NL80211_STA_INFO_RX_DROP_MISC,
+	NL80211_STA_INFO_BEACON_RX,
+	NL80211_STA_INFO_BEACON_SIGNAL_AVG,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 68faf8a2aa43..42b968a1f994 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3768,6 +3768,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 
 	PUT_SINFO(T_OFFSET, t_offset, u64);
 	PUT_SINFO(RX_DROP_MISC, rx_dropped_misc, u64);
+	PUT_SINFO(BEACON_RX, rx_beacon, u64);
+	PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
 
 #undef PUT_SINFO
 	nla_nest_end(msg, sinfoattr);
-- 
cgit v1.2.3-71-gd317


From 8d791361a4698ca6f01c361a47b39b30d26bf66c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 21 Nov 2014 12:40:05 +0100
Subject: nl80211: clarify packet statistics descriptions

The current statistics we keep aren't very clear, some are on
MPDUs and some on MSDUs/MMPDUs. Clarify the descriptions based
on the counters mac80211 keeps.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 12 ++++++------
 include/uapi/linux/nl80211.h | 24 +++++++++++++++---------
 2 files changed, 21 insertions(+), 15 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ef26ce16b058..95420fb61600 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -950,8 +950,8 @@ struct sta_bss_parameters {
  *	indicate the relevant values in this struct for them
  * @connected_time: time(in secs) since a station is last connected
  * @inactive_time: time since last station activity (tx/rx) in milliseconds
- * @rx_bytes: bytes received from this station
- * @tx_bytes: bytes transmitted to this station
+ * @rx_bytes: bytes (size of MPDUs) received from this station
+ * @tx_bytes: bytes (size of MPDUs) transmitted to this station
  * @llid: mesh local link id
  * @plid: mesh peer link id
  * @plink_state: mesh peer link state
@@ -964,10 +964,10 @@ struct sta_bss_parameters {
  * @chain_signal_avg: per-chain signal strength average in dBm
  * @txrate: current unicast bitrate from this station
  * @rxrate: current unicast bitrate to this station
- * @rx_packets: packets received from this station
- * @tx_packets: packets transmitted to this station
- * @tx_retries: cumulative retry counts
- * @tx_failed: number of failed transmissions (retries exceeded, no ACK)
+ * @rx_packets: packets (MSDUs & MMPDUs) received from this station
+ * @tx_packets: packets (MSDUs & MMPDUs) transmitted to this station
+ * @tx_retries: cumulative retry counts (MPDUs)
+ * @tx_failed: number of failed transmissions (MPDUs) (retries exceeded, no ACK)
  * @rx_dropped_misc:  Dropped for un-specified reason.
  * @bss_param: current BSS parameters
  * @generation: generation number for nl80211 dumps.
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 0c3d341c0aeb..b0fb5d598250 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2338,18 +2338,24 @@ enum nl80211_sta_bss_param {
  *
  * @__NL80211_STA_INFO_INVALID: attribute number 0 is reserved
  * @NL80211_STA_INFO_INACTIVE_TIME: time since last activity (u32, msecs)
- * @NL80211_STA_INFO_RX_BYTES: total received bytes (u32, from this station)
- * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (u32, to this station)
- * @NL80211_STA_INFO_RX_BYTES64: total received bytes (u64, from this station)
- * @NL80211_STA_INFO_TX_BYTES64: total transmitted bytes (u64, to this station)
+ * @NL80211_STA_INFO_RX_BYTES: total received bytes (MPDU length)
+ *	(u32, from this station)
+ * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (MPDU length)
+ *	(u32, to this station)
+ * @NL80211_STA_INFO_RX_BYTES64: total received bytes (MPDU length)
+ *	(u64, from this station)
+ * @NL80211_STA_INFO_TX_BYTES64: total transmitted bytes (MPDU length)
+ *	(u64, to this station)
  * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm)
  * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute
  * 	containing info as possible, see &enum nl80211_rate_info
- * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station)
- * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (u32, to this
- *	station)
- * @NL80211_STA_INFO_TX_RETRIES: total retries (u32, to this station)
- * @NL80211_STA_INFO_TX_FAILED: total failed packets (u32, to this station)
+ * @NL80211_STA_INFO_RX_PACKETS: total received packet (MSDUs and MMPDUs)
+ *	(u32, from this station)
+ * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (MSDUs and MMPDUs)
+ *	(u32, to this station)
+ * @NL80211_STA_INFO_TX_RETRIES: total retries (MPDUs) (u32, to this station)
+ * @NL80211_STA_INFO_TX_FAILED: total failed packets (MPDUs)
+ *	(u32, to this station)
  * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm)
  * @NL80211_STA_INFO_LLID: the station's mesh LLID
  * @NL80211_STA_INFO_PLID: the station's mesh PLID
-- 
cgit v1.2.3-71-gd317


From 6de39808cf1dd7b02bf42e7d8695d80f5eaf645d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 19 Dec 2014 12:34:00 +0100
Subject: nl80211: support per-TID station statistics

The base for the current statistics is pretty mixed up, support
exporting RX/TX statistics for MSDUs per TID. This (currently)
covers received MSDUs, transmitted MSDUs and retries/failures
thereof.

Doing it per TID for MSDUs makes more sense than say only per AC
because it's symmetric - we could export per-AC statistics for all
frames (which AC we used for transmission can be determined also
for management frames) but per TID is better and usually data
frames are really the ones we care about. Also, on RX we can't
determine the AC - but we do know the TID for any QoS MPDU we
received.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 21 +++++++++++++++++++++
 include/uapi/linux/nl80211.h | 31 +++++++++++++++++++++++++++++++
 net/wireless/nl80211.c       | 41 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 95420fb61600..197735788f18 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -939,6 +939,24 @@ struct sta_bss_parameters {
 	u16 beacon_interval;
 };
 
+/**
+ * struct cfg80211_tid_stats - per-TID statistics
+ * @filled: bitmap of flags using the bits of &enum nl80211_tid_stats to
+ *	indicate the relevant values in this struct are filled
+ * @rx_msdu: number of received MSDUs
+ * @tx_msdu: number of (attempted) transmitted MSDUs
+ * @tx_msdu_retries: number of retries (not counting the first) for
+ *	transmitted MSDUs
+ * @tx_msdu_failed: number of failed transmitted MSDUs
+ */
+struct cfg80211_tid_stats {
+	u32 filled;
+	u64 rx_msdu;
+	u64 tx_msdu;
+	u64 tx_msdu_retries;
+	u64 tx_msdu_failed;
+};
+
 #define IEEE80211_MAX_CHAINS	4
 
 /**
@@ -990,6 +1008,8 @@ struct sta_bss_parameters {
  * @rx_beacon: number of beacons received from this peer
  * @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received
  *	from this peer
+ * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last
+ *	(IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs.
  */
 struct station_info {
 	u32 filled;
@@ -1032,6 +1052,7 @@ struct station_info {
 
 	u64 rx_beacon;
 	u8 rx_beacon_signal_avg;
+	struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1];
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b0fb5d598250..a963d4824c51 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2384,6 +2384,11 @@ enum nl80211_sta_bss_param {
  * @NL80211_STA_INFO_BEACON_RX: number of beacons received from this peer (u64)
  * @NL80211_STA_INFO_BEACON_SIGNAL_AVG: signal strength average
  *	for beacons only (u8, dBm)
+ * @NL80211_STA_INFO_TID_STATS: per-TID statistics (see &enum nl80211_tid_stats)
+ *	This is a nested attribute where each the inner attribute number is the
+ *	TID+1 and the special TID 16 (i.e. value 17) is used for non-QoS frames;
+ *	each one of those is again nested with &enum nl80211_tid_stats
+ *	attributes carrying the actual values.
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2419,12 +2424,38 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_RX_DROP_MISC,
 	NL80211_STA_INFO_BEACON_RX,
 	NL80211_STA_INFO_BEACON_SIGNAL_AVG,
+	NL80211_STA_INFO_TID_STATS,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
 	NL80211_STA_INFO_MAX = __NL80211_STA_INFO_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_tid_stats - per TID statistics attributes
+ * @__NL80211_TID_STATS_INVALID: attribute number 0 is reserved
+ * @NL80211_TID_STATS_RX_MSDU: number of MSDUs received (u64)
+ * @NL80211_TID_STATS_TX_MSDU: number of MSDUs transmitted (or
+ *	attempted to transmit; u64)
+ * @NL80211_TID_STATS_TX_MSDU_RETRIES: number of retries for
+ *	transmitted MSDUs (not counting the first attempt; u64)
+ * @NL80211_TID_STATS_TX_MSDU_FAILED: number of failed transmitted
+ *	MSDUs (u64)
+ * @NUM_NL80211_TID_STATS: number of attributes here
+ * @NL80211_TID_STATS_MAX: highest numbered attribute here
+ */
+enum nl80211_tid_stats {
+	__NL80211_TID_STATS_INVALID,
+	NL80211_TID_STATS_RX_MSDU,
+	NL80211_TID_STATS_TX_MSDU,
+	NL80211_TID_STATS_TX_MSDU_RETRIES,
+	NL80211_TID_STATS_TX_MSDU_FAILED,
+
+	/* keep last */
+	NUM_NL80211_TID_STATS,
+	NL80211_TID_STATS_MAX = NUM_NL80211_TID_STATS - 1
+};
+
 /**
  * enum nl80211_mpath_flags - nl80211 mesh path flags
  *
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 42b968a1f994..7c2ce26e22de 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3772,6 +3772,47 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 	PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
 
 #undef PUT_SINFO
+
+	if (sinfo->filled & BIT(NL80211_STA_INFO_TID_STATS)) {
+		struct nlattr *tidsattr;
+		int tid;
+
+		tidsattr = nla_nest_start(msg, NL80211_STA_INFO_TID_STATS);
+		if (!tidsattr)
+			goto nla_put_failure;
+
+		for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
+			struct cfg80211_tid_stats *tidstats;
+			struct nlattr *tidattr;
+
+			tidstats = &sinfo->pertid[tid];
+
+			if (!tidstats->filled)
+				continue;
+
+			tidattr = nla_nest_start(msg, tid + 1);
+			if (!tidattr)
+				goto nla_put_failure;
+
+#define PUT_TIDVAL(attr, memb, type) do {				\
+	if (tidstats->filled & BIT(NL80211_TID_STATS_ ## attr) &&	\
+	    nla_put_ ## type(msg, NL80211_TID_STATS_ ## attr,		\
+			     tidstats->memb))				\
+		goto nla_put_failure;					\
+	} while (0)
+
+			PUT_TIDVAL(RX_MSDU, rx_msdu, u64);
+			PUT_TIDVAL(TX_MSDU, tx_msdu, u64);
+			PUT_TIDVAL(TX_MSDU_RETRIES, tx_msdu_retries, u64);
+			PUT_TIDVAL(TX_MSDU_FAILED, tx_msdu_failed, u64);
+
+#undef PUT_TIDVAL
+			nla_nest_end(msg, tidattr);
+		}
+
+		nla_nest_end(msg, tidsattr);
+	}
+
 	nla_nest_end(msg, sinfoattr);
 
 	if (sinfo->assoc_req_ies_len &&
-- 
cgit v1.2.3-71-gd317


From 3b50d9029809b60a5081d90c282aa04d438d3ea1 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 6 Jan 2015 15:45:31 -0800
Subject: ipv6: fix redefinition of in6_pktinfo and ip6_mtuinfo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both netinet/in.h and linux/ipv6.h define these two structs,
if we include both of them, we got:

	/usr/include/linux/ipv6.h:19:8: error: redefinition of ‘struct in6_pktinfo’
	 struct in6_pktinfo {
		^
	In file included from /usr/include/arpa/inet.h:22:0,
			 from txtimestamp.c:33:
	/usr/include/netinet/in.h:524:8: note: originally defined here
	 struct in6_pktinfo
		^
	In file included from txtimestamp.c:40:0:
	/usr/include/linux/ipv6.h:24:8: error: redefinition of ‘struct ip6_mtuinfo’
	 struct ip6_mtuinfo {
		^
	In file included from /usr/include/arpa/inet.h:22:0,
			 from txtimestamp.c:33:
	/usr/include/netinet/in.h:531:8: note: originally defined here
	 struct ip6_mtuinfo
		^
So similarly to what we did for in6_addr, we need to sync with
libc header on their definitions.

Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ipv6.h        | 5 ++++-
 include/uapi/linux/libc-compat.h | 6 ++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index e863d088b9a5..b9b1b7d1c839 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -15,16 +15,19 @@
  *	*under construction*
  */
 
-
+#if __UAPI_DEF_IN6_PKTINFO
 struct in6_pktinfo {
 	struct in6_addr	ipi6_addr;
 	int		ipi6_ifindex;
 };
+#endif
 
+#if __UAPI_DEF_IP6_MTUINFO
 struct ip6_mtuinfo {
 	struct sockaddr_in6	ip6m_addr;
 	__u32			ip6m_mtu;
 };
+#endif
 
 struct in6_ifreq {
 	struct in6_addr	ifr6_addr;
diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index e28807ad17fa..fa673e9cc040 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -70,6 +70,8 @@
 #define __UAPI_DEF_IPV6_MREQ		0
 #define __UAPI_DEF_IPPROTO_V6		0
 #define __UAPI_DEF_IPV6_OPTIONS		0
+#define __UAPI_DEF_IN6_PKTINFO		0
+#define __UAPI_DEF_IP6_MTUINFO		0
 
 #else
 
@@ -84,6 +86,8 @@
 #define __UAPI_DEF_IPV6_MREQ		1
 #define __UAPI_DEF_IPPROTO_V6		1
 #define __UAPI_DEF_IPV6_OPTIONS		1
+#define __UAPI_DEF_IN6_PKTINFO		1
+#define __UAPI_DEF_IP6_MTUINFO		1
 
 #endif /* _NETINET_IN_H */
 
@@ -106,6 +110,8 @@
 #define __UAPI_DEF_IPV6_MREQ		1
 #define __UAPI_DEF_IPPROTO_V6		1
 #define __UAPI_DEF_IPV6_OPTIONS		1
+#define __UAPI_DEF_IN6_PKTINFO		1
+#define __UAPI_DEF_IP6_MTUINFO		1
 
 /* Definitions for xattr.h */
 #define __UAPI_DEF_XATTR		1
-- 
cgit v1.2.3-71-gd317


From 5930cb3511dfb4392de1180c892b353316ef29ec Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Thu, 18 Dec 2014 21:45:24 +0200
Subject: serial: driver for Conexant Digicolor USART

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig           |  15 +
 drivers/tty/serial/Makefile          |   1 +
 drivers/tty/serial/digicolor-usart.c | 563 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/serial_core.h     |   3 +
 4 files changed, 582 insertions(+)
 create mode 100644 drivers/tty/serial/digicolor-usart.c

(limited to 'include/uapi/linux')

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index c79b43cd6014..96ec6cfd74e8 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1549,6 +1549,21 @@ config SERIAL_FSL_LPUART_CONSOLE
 	  If you have enabled the lpuart serial port on the Freescale SoCs,
 	  you can make it the console by answering Y to this option.
 
+config SERIAL_CONEXANT_DIGICOLOR
+	tristate "Conexant Digicolor CX92xxx USART serial port support"
+	depends on OF
+	select SERIAL_CORE
+	help
+	  Support for the on-chip USART on Conexant Digicolor SoCs.
+
+config SERIAL_CONEXANT_DIGICOLOR_CONSOLE
+	bool "Console on Conexant Digicolor serial port"
+	depends on SERIAL_CONEXANT_DIGICOLOR=y
+	select SERIAL_CORE_CONSOLE
+	help
+	  If you have enabled the USART serial port on Conexant Digicolor
+	  SoCs, you can make it the console by answering Y to this option.
+
 config SERIAL_ST_ASC
 	tristate "ST ASC serial port support"
 	select SERIAL_CORE
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 9a548acf5fdc..770a19bb7fcb 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_SERIAL_EFM32_UART) += efm32-uart.o
 obj-$(CONFIG_SERIAL_ARC)	+= arc_uart.o
 obj-$(CONFIG_SERIAL_RP2)	+= rp2.o
 obj-$(CONFIG_SERIAL_FSL_LPUART)	+= fsl_lpuart.o
+obj-$(CONFIG_SERIAL_CONEXANT_DIGICOLOR)	+= digicolor-usart.o
 obj-$(CONFIG_SERIAL_MEN_Z135)	+= men_z135_uart.o
 
 # GPIOLIB helpers for modem control lines
diff --git a/drivers/tty/serial/digicolor-usart.c b/drivers/tty/serial/digicolor-usart.c
new file mode 100644
index 000000000000..09ce0b3764e2
--- /dev/null
+++ b/drivers/tty/serial/digicolor-usart.c
@@ -0,0 +1,563 @@
+/*
+ *  Driver for Conexant Digicolor serial ports (USART)
+ *
+ * Author: Baruch Siach <baruch@tkos.co.il>
+ *
+ * Copyright (C) 2014 Paradox Innovation Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/serial_core.h>
+#include <linux/serial.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+
+#define UA_ENABLE			0x00
+#define UA_ENABLE_ENABLE		BIT(0)
+
+#define UA_CONTROL			0x01
+#define UA_CONTROL_RX_ENABLE		BIT(0)
+#define UA_CONTROL_TX_ENABLE		BIT(1)
+#define UA_CONTROL_SOFT_RESET		BIT(2)
+
+#define UA_STATUS			0x02
+#define UA_STATUS_PARITY_ERR		BIT(0)
+#define UA_STATUS_FRAME_ERR		BIT(1)
+#define UA_STATUS_OVERRUN_ERR		BIT(2)
+#define UA_STATUS_TX_READY		BIT(6)
+
+#define UA_CONFIG			0x03
+#define UA_CONFIG_CHAR_LEN		BIT(0)
+#define UA_CONFIG_STOP_BITS		BIT(1)
+#define UA_CONFIG_PARITY		BIT(2)
+#define UA_CONFIG_ODD_PARITY		BIT(4)
+
+#define UA_EMI_REC			0x04
+
+#define UA_HBAUD_LO			0x08
+#define UA_HBAUD_HI			0x09
+
+#define UA_STATUS_FIFO			0x0a
+#define UA_STATUS_FIFO_RX_EMPTY		BIT(2)
+#define UA_STATUS_FIFO_RX_INT_ALMOST	BIT(3)
+#define UA_STATUS_FIFO_TX_FULL		BIT(4)
+#define UA_STATUS_FIFO_TX_INT_ALMOST	BIT(7)
+
+#define UA_CONFIG_FIFO			0x0b
+#define UA_CONFIG_FIFO_RX_THRESH	7
+#define UA_CONFIG_FIFO_RX_FIFO_MODE	BIT(3)
+#define UA_CONFIG_FIFO_TX_FIFO_MODE	BIT(7)
+
+#define UA_INTFLAG_CLEAR		0x1c
+#define UA_INTFLAG_SET			0x1d
+#define UA_INT_ENABLE			0x1e
+#define UA_INT_STATUS			0x1f
+
+#define UA_INT_TX			BIT(0)
+#define UA_INT_RX			BIT(1)
+
+#define DIGICOLOR_USART_NR		3
+
+/*
+ * We use the 16 bytes hardware FIFO to buffer Rx traffic. Rx interrupt is
+ * only produced when the FIFO is filled more than a certain configurable
+ * threshold. Unfortunately, there is no way to set this threshold below half
+ * FIFO. This means that we must periodically poll the FIFO status register to
+ * see whether there are waiting Rx bytes.
+ */
+
+struct digicolor_port {
+	struct uart_port port;
+	struct delayed_work rx_poll_work;
+};
+
+static struct uart_port *digicolor_ports[DIGICOLOR_USART_NR];
+
+static bool digicolor_uart_tx_full(struct uart_port *port)
+{
+	return !!(readb_relaxed(port->membase + UA_STATUS_FIFO) &
+		  UA_STATUS_FIFO_TX_FULL);
+}
+
+static bool digicolor_uart_rx_empty(struct uart_port *port)
+{
+	return !!(readb_relaxed(port->membase + UA_STATUS_FIFO) &
+		  UA_STATUS_FIFO_RX_EMPTY);
+}
+
+static void digicolor_uart_stop_tx(struct uart_port *port)
+{
+	u8 int_enable = readb_relaxed(port->membase + UA_INT_ENABLE);
+
+	int_enable &= ~UA_INT_TX;
+	writeb_relaxed(int_enable, port->membase + UA_INT_ENABLE);
+}
+
+static void digicolor_uart_start_tx(struct uart_port *port)
+{
+	u8 int_enable = readb_relaxed(port->membase + UA_INT_ENABLE);
+
+	int_enable |= UA_INT_TX;
+	writeb_relaxed(int_enable, port->membase + UA_INT_ENABLE);
+}
+
+static void digicolor_uart_stop_rx(struct uart_port *port)
+{
+	u8 int_enable = readb_relaxed(port->membase + UA_INT_ENABLE);
+
+	int_enable &= ~UA_INT_RX;
+	writeb_relaxed(int_enable, port->membase + UA_INT_ENABLE);
+}
+
+static void digicolor_rx_poll(struct work_struct *work)
+{
+	struct digicolor_port *dp =
+		container_of(to_delayed_work(work),
+			     struct digicolor_port, rx_poll_work);
+
+	if (!digicolor_uart_rx_empty(&dp->port))
+		/* force RX interrupt */
+		writeb_relaxed(UA_INT_RX, dp->port.membase + UA_INTFLAG_SET);
+
+	schedule_delayed_work(&dp->rx_poll_work, msecs_to_jiffies(100));
+}
+
+static void digicolor_uart_rx(struct uart_port *port)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	while (1) {
+		u8 status, ch;
+		unsigned int ch_flag;
+
+		if (digicolor_uart_rx_empty(port))
+			break;
+
+		ch = readb_relaxed(port->membase + UA_EMI_REC);
+		status = readb_relaxed(port->membase + UA_STATUS);
+
+		port->icount.rx++;
+		ch_flag = TTY_NORMAL;
+
+		if (status) {
+			if (status & UA_STATUS_PARITY_ERR)
+				port->icount.parity++;
+			else if (status & UA_STATUS_FRAME_ERR)
+				port->icount.frame++;
+			else if (status & UA_STATUS_OVERRUN_ERR)
+				port->icount.overrun++;
+
+			status &= port->read_status_mask;
+
+			if (status & UA_STATUS_PARITY_ERR)
+				ch_flag = TTY_PARITY;
+			else if (status & UA_STATUS_FRAME_ERR)
+				ch_flag = TTY_FRAME;
+			else if (status & UA_STATUS_OVERRUN_ERR)
+				ch_flag = TTY_OVERRUN;
+		}
+
+		if (uart_handle_sysrq_char(port, ch))
+			continue;
+
+		if (status & port->ignore_status_mask)
+			continue;
+
+		uart_insert_char(port, status, UA_STATUS_OVERRUN_ERR, ch,
+				 ch_flag);
+	}
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	tty_flip_buffer_push(&port->state->port);
+}
+
+static void digicolor_uart_tx(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->state->xmit;
+	unsigned long flags;
+
+	if (digicolor_uart_tx_full(port))
+		return;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	if (port->x_char) {
+		writeb_relaxed(port->x_char, port->membase + UA_EMI_REC);
+		port->icount.tx++;
+		port->x_char = 0;
+		goto out;
+	}
+
+	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
+		digicolor_uart_stop_tx(port);
+		goto out;
+	}
+
+	while (!uart_circ_empty(xmit)) {
+		writeb(xmit->buf[xmit->tail], port->membase + UA_EMI_REC);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+
+		if (digicolor_uart_tx_full(port))
+			break;
+	}
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+out:
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static irqreturn_t digicolor_uart_int(int irq, void *dev_id)
+{
+	struct uart_port *port = dev_id;
+	u8 int_status = readb_relaxed(port->membase + UA_INT_STATUS);
+
+	writeb_relaxed(UA_INT_RX | UA_INT_TX,
+		       port->membase + UA_INTFLAG_CLEAR);
+
+	if (int_status & UA_INT_RX)
+		digicolor_uart_rx(port);
+	if (int_status & UA_INT_TX)
+		digicolor_uart_tx(port);
+
+	return IRQ_HANDLED;
+}
+
+static unsigned int digicolor_uart_tx_empty(struct uart_port *port)
+{
+	u8 status = readb_relaxed(port->membase + UA_STATUS);
+
+	return (status & UA_STATUS_TX_READY) ? TIOCSER_TEMT : 0;
+}
+
+static unsigned int digicolor_uart_get_mctrl(struct uart_port *port)
+{
+	return TIOCM_CTS;
+}
+
+static void digicolor_uart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+}
+
+static void digicolor_uart_break_ctl(struct uart_port *port, int state)
+{
+}
+
+static int digicolor_uart_startup(struct uart_port *port)
+{
+	struct digicolor_port *dp =
+		container_of(port, struct digicolor_port, port);
+
+	writeb_relaxed(UA_ENABLE_ENABLE, port->membase + UA_ENABLE);
+	writeb_relaxed(UA_CONTROL_SOFT_RESET, port->membase + UA_CONTROL);
+	writeb_relaxed(0, port->membase + UA_CONTROL);
+
+	writeb_relaxed(UA_CONFIG_FIFO_RX_FIFO_MODE
+		       | UA_CONFIG_FIFO_TX_FIFO_MODE | UA_CONFIG_FIFO_RX_THRESH,
+		       port->membase + UA_CONFIG_FIFO);
+	writeb_relaxed(UA_STATUS_FIFO_RX_INT_ALMOST,
+		       port->membase + UA_STATUS_FIFO);
+	writeb_relaxed(UA_CONTROL_RX_ENABLE | UA_CONTROL_TX_ENABLE,
+		       port->membase + UA_CONTROL);
+	writeb_relaxed(UA_INT_TX | UA_INT_RX,
+		       port->membase + UA_INT_ENABLE);
+
+	schedule_delayed_work(&dp->rx_poll_work, msecs_to_jiffies(100));
+
+	return 0;
+}
+
+static void digicolor_uart_shutdown(struct uart_port *port)
+{
+	struct digicolor_port *dp =
+		container_of(port, struct digicolor_port, port);
+
+	writeb_relaxed(0, port->membase + UA_ENABLE);
+	cancel_delayed_work_sync(&dp->rx_poll_work);
+}
+
+static void digicolor_uart_set_termios(struct uart_port *port,
+				       struct ktermios *termios,
+				       struct ktermios *old)
+{
+	unsigned int baud, divisor;
+	u8 config = 0;
+	unsigned long flags;
+
+	/* Mask termios capabilities we don't support */
+	termios->c_cflag &= ~CMSPAR;
+	termios->c_iflag &= ~(BRKINT | IGNBRK);
+
+	/* Limit baud rates so that we don't need the fractional divider */
+	baud = uart_get_baud_rate(port, termios, old,
+				  port->uartclk / (0x10000*16),
+				  port->uartclk / 256);
+	divisor = uart_get_divisor(port, baud) - 1;
+
+	switch (termios->c_cflag & CSIZE) {
+	case CS7:
+		break;
+	case CS8:
+	default:
+		config |= UA_CONFIG_CHAR_LEN;
+		break;
+	}
+
+	if (termios->c_cflag & CSTOPB)
+		config |= UA_CONFIG_STOP_BITS;
+
+	if (termios->c_cflag & PARENB) {
+		config |= UA_CONFIG_PARITY;
+		if (termios->c_cflag & PARODD)
+			config |= UA_CONFIG_ODD_PARITY;
+	}
+
+	/* Set read status mask */
+	port->read_status_mask = UA_STATUS_OVERRUN_ERR;
+	if (termios->c_iflag & INPCK)
+		port->read_status_mask |= UA_STATUS_PARITY_ERR
+			| UA_STATUS_FRAME_ERR;
+
+	/* Set status ignore mask */
+	port->ignore_status_mask = 0;
+	if (!(termios->c_cflag & CREAD))
+		port->ignore_status_mask |= UA_STATUS_OVERRUN_ERR
+			| UA_STATUS_PARITY_ERR | UA_STATUS_FRAME_ERR;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	uart_update_timeout(port, termios->c_cflag, baud);
+
+	writeb_relaxed(config, port->membase + UA_CONFIG);
+	writeb_relaxed(divisor & 0xff, port->membase + UA_HBAUD_LO);
+	writeb_relaxed(divisor >> 8, port->membase + UA_HBAUD_HI);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static const char *digicolor_uart_type(struct uart_port *port)
+{
+	return (port->type == PORT_DIGICOLOR) ? "DIGICOLOR USART" : NULL;
+}
+
+static void digicolor_uart_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE)
+		port->type = PORT_DIGICOLOR;
+}
+
+static void digicolor_uart_release_port(struct uart_port *port)
+{
+}
+
+static int digicolor_uart_request_port(struct uart_port *port)
+{
+	return 0;
+}
+
+static const struct uart_ops digicolor_uart_ops = {
+	.tx_empty	= digicolor_uart_tx_empty,
+	.set_mctrl	= digicolor_uart_set_mctrl,
+	.get_mctrl	= digicolor_uart_get_mctrl,
+	.stop_tx	= digicolor_uart_stop_tx,
+	.start_tx	= digicolor_uart_start_tx,
+	.stop_rx	= digicolor_uart_stop_rx,
+	.break_ctl	= digicolor_uart_break_ctl,
+	.startup	= digicolor_uart_startup,
+	.shutdown	= digicolor_uart_shutdown,
+	.set_termios	= digicolor_uart_set_termios,
+	.type		= digicolor_uart_type,
+	.config_port	= digicolor_uart_config_port,
+	.release_port	= digicolor_uart_release_port,
+	.request_port	= digicolor_uart_request_port,
+};
+
+static void digicolor_uart_console_putchar(struct uart_port *port, int ch)
+{
+	while (digicolor_uart_tx_full(port))
+		cpu_relax();
+
+	writeb_relaxed(ch, port->membase + UA_EMI_REC);
+}
+
+static void digicolor_uart_console_write(struct console *co, const char *c,
+					 unsigned n)
+{
+	struct uart_port *port = digicolor_ports[co->index];
+	u8 status;
+	unsigned long flags;
+	int locked = 1;
+
+	if (port->sysrq || oops_in_progress)
+		locked = spin_trylock_irqsave(&port->lock, flags);
+	else
+		spin_lock_irqsave(&port->lock, flags);
+
+	uart_console_write(port, c, n, digicolor_uart_console_putchar);
+
+	if (locked)
+		spin_unlock_irqrestore(&port->lock, flags);
+
+	/* Wait for transmitter to become empty */
+	do {
+		status = readb_relaxed(port->membase + UA_STATUS);
+	} while ((status & UA_STATUS_TX_READY) == 0);
+}
+
+static int digicolor_uart_console_setup(struct console *co, char *options)
+{
+	int baud = 115200, bits = 8, parity = 'n', flow = 'n';
+	struct uart_port *port;
+
+	if (co->index < 0 || co->index >= DIGICOLOR_USART_NR)
+		return -EINVAL;
+
+	port = digicolor_ports[co->index];
+	if (!port)
+		return -ENODEV;
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct console digicolor_console = {
+	.name	= "ttyS",
+	.device	= uart_console_device,
+	.write	= digicolor_uart_console_write,
+	.setup	= digicolor_uart_console_setup,
+	.flags	= CON_PRINTBUFFER,
+	.index	= -1,
+};
+
+static struct uart_driver digicolor_uart = {
+	.driver_name	= "digicolor-usart",
+	.dev_name	= "ttyS",
+	.nr		= DIGICOLOR_USART_NR,
+};
+
+static int digicolor_uart_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	int ret, index;
+	struct digicolor_port *dp;
+	struct resource *res;
+	struct clk *uart_clk;
+
+	if (!np) {
+		dev_err(&pdev->dev, "Missing device tree node\n");
+		return -ENXIO;
+	}
+
+	index = of_alias_get_id(np, "serial");
+	if (index < 0 || index >= DIGICOLOR_USART_NR)
+		return -EINVAL;
+
+	dp = devm_kzalloc(&pdev->dev, sizeof(*dp), GFP_KERNEL);
+	if (!dp)
+		return -ENOMEM;
+
+	uart_clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(uart_clk))
+		return PTR_ERR(uart_clk);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dp->port.mapbase = res->start;
+	dp->port.membase = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(dp->port.membase))
+		return PTR_ERR(dp->port.membase);
+
+	dp->port.irq = platform_get_irq(pdev, 0);
+	if (IS_ERR_VALUE(dp->port.irq))
+		return dp->port.irq;
+
+	dp->port.iotype = UPIO_MEM;
+	dp->port.uartclk = clk_get_rate(uart_clk);
+	dp->port.fifosize = 16;
+	dp->port.dev = &pdev->dev;
+	dp->port.ops = &digicolor_uart_ops;
+	dp->port.line = index;
+	dp->port.type = PORT_DIGICOLOR;
+	spin_lock_init(&dp->port.lock);
+
+	digicolor_ports[index] = &dp->port;
+	platform_set_drvdata(pdev, &dp->port);
+
+	INIT_DELAYED_WORK(&dp->rx_poll_work, digicolor_rx_poll);
+
+	ret = devm_request_irq(&pdev->dev, dp->port.irq, digicolor_uart_int, 0,
+			       dev_name(&pdev->dev), &dp->port);
+	if (ret)
+		return ret;
+
+	return uart_add_one_port(&digicolor_uart, &dp->port);
+}
+
+static int digicolor_uart_remove(struct platform_device *pdev)
+{
+	struct uart_port *port = platform_get_drvdata(pdev);
+
+	uart_remove_one_port(&digicolor_uart, port);
+
+	return 0;
+}
+
+static const struct of_device_id digicolor_uart_dt_ids[] = {
+	{ .compatible = "cnxt,cx92755-usart", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, digicolor_uart_dt_ids);
+
+static struct platform_driver digicolor_uart_platform = {
+	.driver = {
+		.name		= "digicolor-usart",
+		.of_match_table	= of_match_ptr(digicolor_uart_dt_ids),
+	},
+	.probe	= digicolor_uart_probe,
+	.remove	= digicolor_uart_remove,
+};
+
+static int __init digicolor_uart_init(void)
+{
+	int ret;
+
+	if (IS_ENABLED(CONFIG_SERIAL_CONEXANT_DIGICOLOR_CONSOLE)) {
+		digicolor_uart.cons = &digicolor_console;
+		digicolor_console.data = &digicolor_uart;
+	}
+
+	ret = uart_register_driver(&digicolor_uart);
+	if (ret)
+		return ret;
+
+	return platform_driver_register(&digicolor_uart_platform);
+}
+module_init(digicolor_uart_init);
+
+static void __exit digicolor_uart_exit(void)
+{
+	platform_driver_unregister(&digicolor_uart_platform);
+	uart_unregister_driver(&digicolor_uart);
+}
+module_exit(digicolor_uart_exit);
+
+MODULE_AUTHOR("Baruch Siach <baruch@tkos.co.il>");
+MODULE_DESCRIPTION("Conexant Digicolor USART serial driver");
+MODULE_LICENSE("GPL");
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index c17218094f18..9124e837c181 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -248,4 +248,7 @@
 /* MESON */
 #define PORT_MESON	109
 
+/* Conexant Digicolor */
+#define PORT_DIGICOLOR	110
+
 #endif /* _UAPILINUX_SERIAL_CORE_H */
-- 
cgit v1.2.3-71-gd317


From fddceb8b5399083bcc042266f188116cc779fc80 Mon Sep 17 00:00:00 2001
From: Vijay Rai <vijay.rai@freescale.com>
Date: Mon, 5 Jan 2015 21:14:42 +0530
Subject: tty: 8250: Add 64byte UART support for FSL platforms

Some of FSL SoCs like T1040 has new version of UART controller which
can support 64byte FiFo.
To enable 64 byte support, following needs to be done:
-FCR[EN64] needs to be programmed to 1 to enable it.
-Also, when FCR[EN64]==1, RTL bits to be used as below
to define various Receive Trigger Levels:
        -FCR[RTL] = 00  1 byte
        -FCR[RTL] = 01  16 bytes
        -FCR[RTL] = 10  32 bytes
        -FCR[RTL] = 11  56 bytes
-tx_loadsz is set to 63-bytes instead of 64-bytes to implement
 workaround of errata A-008006 which states that tx_loadsz should
 be configured less than Maximum supported fifo bytes

Signed-off-by: Vijay Rai <vijay.rai@freescale.com>
Signed-off-by: Priyanka Jain <Priyanka.Jain@freescale.com>
Signed-off-by: Poonam Aggrwal <poonam.aggrwal@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 23 ++++++++++++++++++++++-
 include/uapi/linux/serial_core.h    |  3 ++-
 include/uapi/linux/serial_reg.h     |  3 ++-
 3 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 3bfcfdb57d05..689169c11f0f 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -329,6 +329,17 @@ static const struct serial8250_config uart_config[] = {
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
 	},
+/* tx_loadsz is set to 63-bytes instead of 64-bytes to implement
+workaround of errata A-008006 which states that tx_loadsz should  be
+configured less than Maximum supported fifo bytes */
+	[PORT_16550A_FSL64] = {
+		.name		= "16550A_FSL64",
+		.fifo_size	= 64,
+		.tx_loadsz	= 63,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 |
+				  UART_FCR7_64BYTE,
+		.flags		= UART_CAP_FIFO,
+	},
 };
 
 /* Uart divisor latch read */
@@ -956,7 +967,17 @@ static void autoconfig_16550a(struct uart_8250_port *up)
 			up->port.type = PORT_16650;
 			up->capabilities |= UART_CAP_EFR | UART_CAP_SLEEP;
 		} else {
-			DEBUG_AUTOCONF("Motorola 8xxx DUART ");
+			serial_out(up, UART_LCR, 0);
+			serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO |
+				   UART_FCR7_64BYTE);
+			status1 = serial_in(up, UART_IIR) >> 5;
+			serial_out(up, UART_FCR, 0);
+			serial_out(up, UART_LCR, 0);
+
+			if (status1 == 7)
+				up->port.type = PORT_16550A_FSL64;
+			else
+				DEBUG_AUTOCONF("Motorola 8xxx DUART ");
 		}
 		serial_out(up, UART_EFR, 0);
 		return;
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index 9124e837c181..ee628c489e47 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -55,7 +55,8 @@
 #define PORT_ALTR_16550_F64 27	/* Altera 16550 UART with 64 FIFOs */
 #define PORT_ALTR_16550_F128 28 /* Altera 16550 UART with 128 FIFOs */
 #define PORT_RT2880	29	/* Ralink RT2880 internal UART */
-#define PORT_MAX_8250	29	/* max port ID */
+#define PORT_16550A_FSL64 30	/* Freescale 16550 UART with 64 FIFOs */
+#define PORT_MAX_8250	30	/* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h
index 53af3b790129..00adb01fa5f3 100644
--- a/include/uapi/linux/serial_reg.h
+++ b/include/uapi/linux/serial_reg.h
@@ -86,7 +86,8 @@
 #define UART_FCR6_T_TRIGGER_8	0x10 /* Mask for transmit trigger set at 8 */
 #define UART_FCR6_T_TRIGGER_24  0x20 /* Mask for transmit trigger set at 24 */
 #define UART_FCR6_T_TRIGGER_30	0x30 /* Mask for transmit trigger set at 30 */
-#define UART_FCR7_64BYTE	0x20 /* Go into 64 byte mode (TI16C750) */
+#define UART_FCR7_64BYTE	0x20 /* Go into 64 byte mode (TI16C750 and
+					some Freescale UARTs) */
 
 #define UART_FCR_R_TRIG_SHIFT		6
 #define UART_FCR_R_TRIG_BITS(x)		\
-- 
cgit v1.2.3-71-gd317


From bdced7ef7838c1c4aebe9f295e44b7f0dcae2109 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Sat, 10 Jan 2015 07:31:12 -0800
Subject: bridge: support for multiple vlans and vlan ranges in setlink and
 dellink requests

This patch changes bridge IFLA_AF_SPEC netlink attribute parser to
look for more than one IFLA_BRIDGE_VLAN_INFO attribute. This allows
userspace to pack more than one vlan in the setlink msg.

The dumps were already sending more than one vlan info in the getlink msg.

This patch also adds bridge_vlan_info flags BRIDGE_VLAN_INFO_RANGE_BEGIN and
BRIDGE_VLAN_INFO_RANGE_END to indicate start and end of vlan range

This patch also deletes unused ifla_br_policy.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h |   2 +
 net/bridge/br_netlink.c        | 104 +++++++++++++++++++++++++++--------------
 2 files changed, 70 insertions(+), 36 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index b03ee8f62d3c..eaaea6208b42 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -125,6 +125,8 @@ enum {
 #define BRIDGE_VLAN_INFO_MASTER	(1<<0)	/* Operate on Bridge device as well */
 #define BRIDGE_VLAN_INFO_PVID	(1<<1)	/* VLAN is PVID, ingress untagged */
 #define BRIDGE_VLAN_INFO_UNTAGGED	(1<<2)	/* VLAN egresses untagged */
+#define BRIDGE_VLAN_INFO_RANGE_BEGIN	(1<<3) /* VLAN is start of vlan range */
+#define BRIDGE_VLAN_INFO_RANGE_END	(1<<4) /* VLAN is end of vlan range */
 
 struct bridge_vlan_info {
 	__u16 flags;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 9f5eb55a4d3a..6f616a2df0b4 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -218,57 +218,89 @@ out:
 	return err;
 }
 
-static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = {
-	[IFLA_BRIDGE_FLAGS]	= { .type = NLA_U16 },
-	[IFLA_BRIDGE_MODE]	= { .type = NLA_U16 },
-	[IFLA_BRIDGE_VLAN_INFO]	= { .type = NLA_BINARY,
-				    .len = sizeof(struct bridge_vlan_info), },
-};
+static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
+			int cmd, struct bridge_vlan_info *vinfo)
+{
+	int err = 0;
+
+	switch (cmd) {
+	case RTM_SETLINK:
+		if (p) {
+			err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
+			if (err)
+				break;
+
+			if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+				err = br_vlan_add(p->br, vinfo->vid,
+						  vinfo->flags);
+		} else {
+			err = br_vlan_add(br, vinfo->vid, vinfo->flags);
+		}
+		break;
+
+	case RTM_DELLINK:
+		if (p) {
+			nbp_vlan_delete(p, vinfo->vid);
+			if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+				br_vlan_delete(p->br, vinfo->vid);
+		} else {
+			br_vlan_delete(br, vinfo->vid);
+		}
+		break;
+	}
+
+	return err;
+}
 
 static int br_afspec(struct net_bridge *br,
 		     struct net_bridge_port *p,
 		     struct nlattr *af_spec,
 		     int cmd)
 {
-	struct nlattr *tb[IFLA_BRIDGE_MAX+1];
+	struct bridge_vlan_info *vinfo_start = NULL;
+	struct bridge_vlan_info *vinfo = NULL;
+	struct nlattr *attr;
 	int err = 0;
+	int rem;
 
-	err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy);
-	if (err)
-		return err;
+	nla_for_each_nested(attr, af_spec, rem) {
+		if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
+			continue;
+		if (nla_len(attr) != sizeof(struct bridge_vlan_info))
+			return -EINVAL;
+		vinfo = nla_data(attr);
+		if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
+			if (vinfo_start)
+				return -EINVAL;
+			vinfo_start = vinfo;
+			continue;
+		}
+
+		if (vinfo_start) {
+			struct bridge_vlan_info tmp_vinfo;
+			int v;
 
-	if (tb[IFLA_BRIDGE_VLAN_INFO]) {
-		struct bridge_vlan_info *vinfo;
+			if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END))
+				return -EINVAL;
 
-		vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
+			if (vinfo->vid <= vinfo_start->vid)
+				return -EINVAL;
 
-		if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
-			return -EINVAL;
+			memcpy(&tmp_vinfo, vinfo_start,
+			       sizeof(struct bridge_vlan_info));
 
-		switch (cmd) {
-		case RTM_SETLINK:
-			if (p) {
-				err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
+			for (v = vinfo_start->vid; v <= vinfo->vid; v++) {
+				tmp_vinfo.vid = v;
+				err = br_vlan_info(br, p, cmd, &tmp_vinfo);
 				if (err)
 					break;
-
-				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
-					err = br_vlan_add(p->br, vinfo->vid,
-							  vinfo->flags);
-			} else
-				err = br_vlan_add(br, vinfo->vid, vinfo->flags);
-
-			break;
-
-		case RTM_DELLINK:
-			if (p) {
-				nbp_vlan_delete(p, vinfo->vid);
-				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
-					br_vlan_delete(p->br, vinfo->vid);
-			} else
-				br_vlan_delete(br, vinfo->vid);
-			break;
+			}
+			vinfo_start = NULL;
+		} else {
+			err = br_vlan_info(br, p, cmd, vinfo);
 		}
+		if (err)
+			break;
 	}
 
 	return err;
-- 
cgit v1.2.3-71-gd317


From 35a27cee321e7c4e7cba3550b2f48c2ca44d8a72 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Sat, 10 Jan 2015 07:31:13 -0800
Subject: rtnetlink: new filter RTEXT_FILTER_BRVLAN_COMPRESSED

This filter is same as RTEXT_FILTER_BRVLAN except that it tries
to compress the consecutive vlans into ranges.

This helps on systems with large number of configured vlans.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index d81f22d5b390..a1d18593f41e 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -636,6 +636,7 @@ struct tcamsg {
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
 #define RTEXT_FILTER_BRVLAN	(1 << 1)
+#define RTEXT_FILTER_BRVLAN_COMPRESSED	(1 << 2)
 
 /* End of information exported to user level */
 
-- 
cgit v1.2.3-71-gd317


From c66ad9ca3f4f55886829a61bd24fc5612d0c05c1 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 12 Jan 2015 14:29:34 -0500
Subject: ipv6: directly include libc-compat.h in ipv6.h

Patch 3b50d9029809 ("ipv6: fix redefinition of in6_pktinfo ...")
fixed a libc compatibility issue in ipv6 structure definitions
as described in include/uapi/linux/libc-compat.h.

It relies on including linux/in6.h to include libc-compat.h itself.
Include that file directly to clearly communicate the dependency
(libc-compat.h: "This include must be as early as possible").

Signed-off-by: Willem de Bruijn <willemb@google.com>

----

As discussed in http://patchwork.ozlabs.org/patch/427384/
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ipv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index b9b1b7d1c839..73cb02dc3065 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -1,6 +1,7 @@
 #ifndef _UAPI_IPV6_H
 #define _UAPI_IPV6_H
 
+#include <linux/libc-compat.h>
 #include <linux/types.h>
 #include <linux/in6.h>
 #include <asm/byteorder.h>
-- 
cgit v1.2.3-71-gd317


From 75453ccb61120885d6715a49496c57930dbe6253 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Fri, 9 Jan 2015 14:06:37 +0200
Subject: nl80211: send netdetect configuration info in NL80211_CMD_GET_WOWLAN

Send the netdetect configuration information in the response to
NL8021_CMD_GET_WOWLAN commands.  This includes the scan interval,
SSIDs to match and frequencies to scan.

Additionally, add the NL80211_WOWLAN_TRIG_NET_DETECT with
NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  4 +++-
 net/wireless/nl80211.c       | 52 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index a963d4824c51..b6c1a00bd8d2 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3734,7 +3734,9 @@ struct nl80211_pattern_support {
  *	same attributes used with @NL80211_CMD_START_SCHED_SCAN.  It
  *	specifies how the scan is performed (e.g. the interval and the
  *	channels to scan) as well as the scan results that will
- *	trigger a wake (i.e. the matchsets).
+ *	trigger a wake (i.e. the matchsets).  This attribute is also
+ *	sent in a response to @NL80211_CMD_GET_WIPHY, indicating the
+ *	number of match sets supported by the driver (u32).
  * @NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS: nested attribute
  *	containing an array with information about what triggered the
  *	wake up.  If no elements are present in the array, it means
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7c2ce26e22de..380784378df8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1088,6 +1088,11 @@ static int nl80211_send_wowlan(struct sk_buff *msg,
 			return -ENOBUFS;
 	}
 
+	if ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_NET_DETECT) &&
+	    nla_put_u32(msg, NL80211_WOWLAN_TRIG_NET_DETECT,
+			rdev->wiphy.wowlan->max_nd_match_sets))
+		return -ENOBUFS;
+
 	if (large && nl80211_send_wowlan_tcp_caps(rdev, msg))
 		return -ENOBUFS;
 
@@ -8747,6 +8752,48 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg,
 	return 0;
 }
 
+static int nl80211_send_wowlan_nd(struct sk_buff *msg,
+				  struct cfg80211_sched_scan_request *req)
+{
+	struct nlattr *nd, *freqs, *matches, *match;
+	int i;
+
+	if (!req)
+		return 0;
+
+	nd = nla_nest_start(msg, NL80211_WOWLAN_TRIG_NET_DETECT);
+	if (!nd)
+		return -ENOBUFS;
+
+	if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL, req->interval))
+		return -ENOBUFS;
+
+	freqs = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQUENCIES);
+	if (!freqs)
+		return -ENOBUFS;
+
+	for (i = 0; i < req->n_channels; i++)
+		nla_put_u32(msg, i, req->channels[i]->center_freq);
+
+	nla_nest_end(msg, freqs);
+
+	if (req->n_match_sets) {
+		matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH);
+		for (i = 0; i < req->n_match_sets; i++) {
+			match = nla_nest_start(msg, i);
+			nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID,
+				req->match_sets[i].ssid.ssid_len,
+				req->match_sets[i].ssid.ssid);
+			nla_nest_end(msg, match);
+		}
+		nla_nest_end(msg, matches);
+	}
+
+	nla_nest_end(msg, nd);
+
+	return 0;
+}
+
 static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -8804,6 +8851,11 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
 					    rdev->wiphy.wowlan_config->tcp))
 			goto nla_put_failure;
 
+		if (nl80211_send_wowlan_nd(
+			    msg,
+			    rdev->wiphy.wowlan_config->nd_config))
+			goto nla_put_failure;
+
 		nla_nest_end(msg, nl_wowlan);
 	}
 
-- 
cgit v1.2.3-71-gd317


From dfd8645ea1bd91277f841e74c33e1f4dbbede808 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 12 Jan 2015 17:00:38 -0800
Subject: vxlan: Remote checksum offload

Add support for remote checksum offload in VXLAN. This uses a
reserved bit to indicate that RCO is being done, and uses the low order
reserved eight bits of the VNI to hold the start and offset values in a
compressed manner.

Start is encoded in the low order seven bits of VNI. This is start >> 1
so that the checksum start offset is 0-254 using even values only.
Checksum offset (transport checksum field) is indicated in the high
order bit in the low order byte of the VNI. If the bit is set, the
checksum field is for UDP (so offset = start + 6), else checksum
field is for TCP (so offset = start + 16). Only TCP and UDP are
supported in this implementation.

Remote checksum offload for VXLAN is described in:

https://tools.ietf.org/html/draft-herbert-vxlan-rco-00

Tested by running 200 TCP_STREAM connections with VXLAN (over IPv4).

With UDP checksums and Remote Checksum Offload
  IPv4
      Client
        11.84% CPU utilization
      Server
        12.96% CPU utilization
      9197 Mbps
  IPv6
      Client
        12.46% CPU utilization
      Server
        14.48% CPU utilization
      8963 Mbps

With UDP checksums, no remote checksum offload
  IPv4
      Client
        15.67% CPU utilization
      Server
        14.83% CPU utilization
      9094 Mbps
  IPv6
      Client
        16.21% CPU utilization
      Server
        14.32% CPU utilization
      9058 Mbps

No UDP checksums
  IPv4
      Client
        15.03% CPU utilization
      Server
        23.09% CPU utilization
      9089 Mbps
  IPv6
      Client
        16.18% CPU utilization
      Server
        26.57% CPU utilization
       8954 Mbps

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c          | 191 +++++++++++++++++++++++++++++++++++++++++--
 include/net/vxlan.h          |  11 +++
 include/uapi/linux/if_link.h |   2 +
 3 files changed, 198 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 5c56a3ff25aa..99df0d76157c 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -539,6 +539,46 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
 	return 1;
 }
 
+static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
+					  unsigned int off,
+					  struct vxlanhdr *vh, size_t hdrlen,
+					  u32 data)
+{
+	size_t start, offset, plen;
+	__wsum delta;
+
+	if (skb->remcsum_offload)
+		return vh;
+
+	if (!NAPI_GRO_CB(skb)->csum_valid)
+		return NULL;
+
+	start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
+	offset = start + ((data & VXLAN_RCO_UDP) ?
+			  offsetof(struct udphdr, check) :
+			  offsetof(struct tcphdr, check));
+
+	plen = hdrlen + offset + sizeof(u16);
+
+	/* Pull checksum that will be written */
+	if (skb_gro_header_hard(skb, off + plen)) {
+		vh = skb_gro_header_slow(skb, off + plen, off);
+		if (!vh)
+			return NULL;
+	}
+
+	delta = remcsum_adjust((void *)vh + hdrlen,
+			       NAPI_GRO_CB(skb)->csum, start, offset);
+
+	/* Adjust skb->csum since we changed the packet */
+	skb->csum = csum_add(skb->csum, delta);
+	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+
+	skb->remcsum_offload = 1;
+
+	return vh;
+}
+
 static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 					  struct sk_buff *skb,
 					  struct udp_offload *uoff)
@@ -547,6 +587,9 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 	struct vxlanhdr *vh, *vh2;
 	unsigned int hlen, off_vx;
 	int flush = 1;
+	struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock,
+					     udp_offloads);
+	u32 flags;
 
 	off_vx = skb_gro_offset(skb);
 	hlen = off_vx + sizeof(*vh);
@@ -557,6 +600,19 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 			goto out;
 	}
 
+	skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
+	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
+
+	flags = ntohl(vh->vx_flags);
+
+	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
+		vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
+				       ntohl(vh->vx_vni));
+
+		if (!vh)
+			goto out;
+	}
+
 	flush = 0;
 
 	for (p = *head; p; p = p->next) {
@@ -570,8 +626,6 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 		}
 	}
 
-	skb_gro_pull(skb, sizeof(struct vxlanhdr));
-	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
 	pp = eth_gro_receive(head, skb);
 
 out:
@@ -1087,6 +1141,42 @@ static void vxlan_igmp_leave(struct work_struct *work)
 	dev_put(vxlan->dev);
 }
 
+static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
+				      size_t hdrlen, u32 data)
+{
+	size_t start, offset, plen;
+	__wsum delta;
+
+	if (skb->remcsum_offload) {
+		/* Already processed in GRO path */
+		skb->remcsum_offload = 0;
+		return vh;
+	}
+
+	start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
+	offset = start + ((data & VXLAN_RCO_UDP) ?
+			  offsetof(struct udphdr, check) :
+			  offsetof(struct tcphdr, check));
+
+	plen = hdrlen + offset + sizeof(u16);
+
+	if (!pskb_may_pull(skb, plen))
+		return NULL;
+
+	vh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
+
+	if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE))
+		__skb_checksum_complete(skb);
+
+	delta = remcsum_adjust((void *)vh + hdrlen,
+			       skb->csum, start, offset);
+
+	/* Adjust skb->csum since we changed the packet */
+	skb->csum = csum_add(skb->csum, delta);
+
+	return vh;
+}
+
 /* Callback from net/ipv4/udp.c to receive packets */
 static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
@@ -1111,12 +1201,22 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 
 	if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
 		goto drop;
+	vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
 
 	vs = rcu_dereference_sk_user_data(sk);
 	if (!vs)
 		goto drop;
 
-	if (flags || (vni & 0xff)) {
+	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
+		vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni);
+		if (!vxh)
+			goto drop;
+
+		flags &= ~VXLAN_HF_RCO;
+		vni &= VXLAN_VID_MASK;
+	}
+
+	if (flags || (vni & ~VXLAN_VID_MASK)) {
 		/* If there are any unprocessed flags remaining treat
 		 * this as a malformed packet. This behavior diverges from
 		 * VXLAN RFC (RFC7348) which stipulates that bits in reserved
@@ -1553,8 +1653,23 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 	int min_headroom;
 	int err;
 	bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk);
+	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+	u16 hdrlen = sizeof(struct vxlanhdr);
+
+	if ((vs->flags & VXLAN_F_REMCSUM_TX) &&
+	    skb->ip_summed == CHECKSUM_PARTIAL) {
+		int csum_start = skb_checksum_start_offset(skb);
+
+		if (csum_start <= VXLAN_MAX_REMCSUM_START &&
+		    !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
+		    (skb->csum_offset == offsetof(struct udphdr, check) ||
+		     skb->csum_offset == offsetof(struct tcphdr, check))) {
+			udp_sum = false;
+			type |= SKB_GSO_TUNNEL_REMCSUM;
+		}
+	}
 
-	skb = udp_tunnel_handle_offloads(skb, udp_sum);
+	skb = iptunnel_handle_offloads(skb, udp_sum, type);
 	if (IS_ERR(skb)) {
 		err = -EINVAL;
 		goto err;
@@ -1583,6 +1698,22 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
 	vxh->vx_vni = vni;
 
+	if (type & SKB_GSO_TUNNEL_REMCSUM) {
+		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
+			   VXLAN_RCO_SHIFT;
+
+		if (skb->csum_offset == offsetof(struct udphdr, check))
+			data |= VXLAN_RCO_UDP;
+
+		vxh->vx_vni |= htonl(data);
+		vxh->vx_flags |= htonl(VXLAN_HF_RCO);
+
+		if (!skb_is_gso(skb)) {
+			skb->ip_summed = CHECKSUM_NONE;
+			skb->encapsulation = 0;
+		}
+	}
+
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
@@ -1603,8 +1734,23 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 	int min_headroom;
 	int err;
 	bool udp_sum = !vs->sock->sk->sk_no_check_tx;
+	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+	u16 hdrlen = sizeof(struct vxlanhdr);
+
+	if ((vs->flags & VXLAN_F_REMCSUM_TX) &&
+	    skb->ip_summed == CHECKSUM_PARTIAL) {
+		int csum_start = skb_checksum_start_offset(skb);
+
+		if (csum_start <= VXLAN_MAX_REMCSUM_START &&
+		    !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
+		    (skb->csum_offset == offsetof(struct udphdr, check) ||
+		     skb->csum_offset == offsetof(struct tcphdr, check))) {
+			udp_sum = false;
+			type |= SKB_GSO_TUNNEL_REMCSUM;
+		}
+	}
 
-	skb = udp_tunnel_handle_offloads(skb, udp_sum);
+	skb = iptunnel_handle_offloads(skb, udp_sum, type);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -1627,6 +1773,22 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
 	vxh->vx_vni = vni;
 
+	if (type & SKB_GSO_TUNNEL_REMCSUM) {
+		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
+			   VXLAN_RCO_SHIFT;
+
+		if (skb->csum_offset == offsetof(struct udphdr, check))
+			data |= VXLAN_RCO_UDP;
+
+		vxh->vx_vni |= htonl(data);
+		vxh->vx_flags |= htonl(VXLAN_HF_RCO);
+
+		if (!skb_is_gso(skb)) {
+			skb->ip_summed = CHECKSUM_NONE;
+			skb->encapsulation = 0;
+		}
+	}
+
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
@@ -2218,6 +2380,8 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_UDP_CSUM]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]	= { .type = NLA_U8 },
+	[IFLA_VXLAN_REMCSUM_TX]	= { .type = NLA_U8 },
+	[IFLA_VXLAN_REMCSUM_RX]	= { .type = NLA_U8 },
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -2339,6 +2503,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
 	atomic_set(&vs->refcnt, 1);
 	vs->rcv = rcv;
 	vs->data = data;
+	vs->flags = flags;
 
 	/* Initialize the vxlan udp offloads structure */
 	vs->udp_offloads.port = port;
@@ -2533,6 +2698,14 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
 		vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
 
+	if (data[IFLA_VXLAN_REMCSUM_TX] &&
+	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
+		vxlan->flags |= VXLAN_F_REMCSUM_TX;
+
+	if (data[IFLA_VXLAN_REMCSUM_RX] &&
+	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
+		vxlan->flags |= VXLAN_F_REMCSUM_RX;
+
 	if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
 			   vxlan->dst_port)) {
 		pr_info("duplicate VNI %u\n", vni);
@@ -2601,6 +2774,8 @@ static size_t vxlan_get_size(const struct net_device *dev)
 		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
 		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */
 		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
+		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
+		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
 		0;
 }
 
@@ -2666,7 +2841,11 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
 			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
 	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
-			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)))
+			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
+	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
+			!!(vxlan->flags & VXLAN_F_REMCSUM_TX)) ||
+	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
+			!!(vxlan->flags & VXLAN_F_REMCSUM_RX)))
 		goto nla_put_failure;
 
 	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index a0d80736224f..0a7443b49133 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -19,6 +19,14 @@ struct vxlanhdr {
 
 /* VXLAN header flags. */
 #define VXLAN_HF_VNI 0x08000000
+#define VXLAN_HF_RCO 0x00200000
+
+/* Remote checksum offload header option */
+#define VXLAN_RCO_MASK  0x7f    /* Last byte of vni field */
+#define VXLAN_RCO_UDP   0x80    /* Indicate UDP RCO (TCP when not set *) */
+#define VXLAN_RCO_SHIFT 1       /* Left shift of start */
+#define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1)
+#define VXLAN_MAX_REMCSUM_START (VXLAN_RCO_MASK << VXLAN_RCO_SHIFT)
 
 #define VXLAN_N_VID     (1u << 24)
 #define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
@@ -38,6 +46,7 @@ struct vxlan_sock {
 	struct hlist_head vni_list[VNI_HASH_SIZE];
 	atomic_t	  refcnt;
 	struct udp_offload udp_offloads;
+	u32		  flags;
 };
 
 #define VXLAN_F_LEARN			0x01
@@ -49,6 +58,8 @@ struct vxlan_sock {
 #define VXLAN_F_UDP_CSUM		0x40
 #define VXLAN_F_UDP_ZERO_CSUM6_TX	0x80
 #define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
+#define VXLAN_F_REMCSUM_TX		0x200
+#define VXLAN_F_REMCSUM_RX		0x400
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index f7d0d2d7173a..b2723f65846f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -370,6 +370,8 @@ enum {
 	IFLA_VXLAN_UDP_CSUM,
 	IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
 	IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+	IFLA_VXLAN_REMCSUM_TX,
+	IFLA_VXLAN_REMCSUM_RX,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
-- 
cgit v1.2.3-71-gd317


From 3511494ce2f3d3b77544c79b87511a4ddb61dc89 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 15 Jan 2015 03:53:55 +0100
Subject: vxlan: Group Policy extension

Implements supports for the Group Policy VXLAN extension [0] to provide
a lightweight and simple security label mechanism across network peers
based on VXLAN. The security context and associated metadata is mapped
to/from skb->mark. This allows further mapping to a SELinux context
using SECMARK, to implement ACLs directly with nftables, iptables, OVS,
tc, etc.

The group membership is defined by the lower 16 bits of skb->mark, the
upper 16 bits are used for flags.

SELinux allows to manage label to secure local resources. However,
distributed applications require ACLs to implemented across hosts. This
is typically achieved by matching on L2-L4 fields to identify the
original sending host and process on the receiver. On top of that,
netlabel and specifically CIPSO [1] allow to map security contexts to
universal labels.  However, netlabel and CIPSO are relatively complex.
This patch provides a lightweight alternative for overlay network
environments with a trusted underlay. No additional control protocol
is required.

           Host 1:                       Host 2:

      Group A        Group B        Group B     Group A
      +-----+   +-------------+    +-------+   +-----+
      | lxc |   | SELinux CTX |    | httpd |   | VM  |
      +--+--+   +--+----------+    +---+---+   +--+--+
	  \---+---/                     \----+---/
	      |                              |
	  +---+---+                      +---+---+
	  | vxlan |                      | vxlan |
	  +---+---+                      +---+---+
	      +------------------------------+

Backwards compatibility:
A VXLAN-GBP socket can receive standard VXLAN frames and will assign
the default group 0x0000 to such frames. A Linux VXLAN socket will
drop VXLAN-GBP  frames. The extension is therefore disabled by default
and needs to be specifically enabled:

   ip link add [...] type vxlan [...] gbp

In a mixed environment with VXLAN and VXLAN-GBP sockets, the GBP socket
must run on a separate port number.

Examples:
 iptables:
  host1# iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200
  host2# iptables -I INPUT -m mark --mark 0x200 -j DROP

 OVS:
  # ovs-ofctl add-flow br0 'in_port=1,actions=load:0x200->NXM_NX_TUN_GBP_ID[],NORMAL'
  # ovs-ofctl add-flow br0 'in_port=2,tun_gbp_id=0x200,actions=drop'

[0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
[1] http://lwn.net/Articles/204905/

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c           | 84 ++++++++++++++++++++++++++++++++++++-------
 include/net/vxlan.h           | 79 +++++++++++++++++++++++++++++++++++++---
 include/uapi/linux/if_link.h  |  1 +
 net/openvswitch/vport-vxlan.c |  9 +++--
 4 files changed, 152 insertions(+), 21 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 99df0d76157c..6dbf8e041922 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -620,7 +620,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 			continue;
 
 		vh2 = (struct vxlanhdr *)(p->data + off_vx);
-		if (vh->vx_vni != vh2->vx_vni) {
+		if (vh->vx_flags != vh2->vx_flags ||
+		    vh->vx_vni != vh2->vx_vni) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 		}
@@ -1183,6 +1184,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	struct vxlan_sock *vs;
 	struct vxlanhdr *vxh;
 	u32 flags, vni;
+	struct vxlan_metadata md = {0};
 
 	/* Need Vxlan and inner Ethernet header to be present */
 	if (!pskb_may_pull(skb, VXLAN_HLEN))
@@ -1216,6 +1218,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		vni &= VXLAN_VID_MASK;
 	}
 
+	/* For backwards compatibility, only allow reserved fields to be
+	 * used by VXLAN extensions if explicitly requested.
+	 */
+	if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
+		struct vxlanhdr_gbp *gbp;
+
+		gbp = (struct vxlanhdr_gbp *)vxh;
+		md.gbp = ntohs(gbp->policy_id);
+
+		if (gbp->dont_learn)
+			md.gbp |= VXLAN_GBP_DONT_LEARN;
+
+		if (gbp->policy_applied)
+			md.gbp |= VXLAN_GBP_POLICY_APPLIED;
+
+		flags &= ~VXLAN_GBP_USED_BITS;
+	}
+
 	if (flags || (vni & ~VXLAN_VID_MASK)) {
 		/* If there are any unprocessed flags remaining treat
 		 * this as a malformed packet. This behavior diverges from
@@ -1229,7 +1249,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		goto bad_flags;
 	}
 
-	vs->rcv(vs, skb, vxh->vx_vni);
+	md.vni = vxh->vx_vni;
+	vs->rcv(vs, skb, &md);
 	return 0;
 
 drop:
@@ -1246,8 +1267,8 @@ error:
 	return 1;
 }
 
-static void vxlan_rcv(struct vxlan_sock *vs,
-		      struct sk_buff *skb, __be32 vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+		      struct vxlan_metadata *md)
 {
 	struct iphdr *oip = NULL;
 	struct ipv6hdr *oip6 = NULL;
@@ -1258,7 +1279,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
 	int err = 0;
 	union vxlan_addr *remote_ip;
 
-	vni = ntohl(vx_vni) >> 8;
+	vni = ntohl(md->vni) >> 8;
 	/* Is this VNI defined? */
 	vxlan = vxlan_vs_find_vni(vs, vni);
 	if (!vxlan)
@@ -1292,6 +1313,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
 		goto drop;
 
 	skb_reset_network_header(skb);
+	skb->mark = md->gbp;
 
 	if (oip6)
 		err = IP6_ECN_decapsulate(oip6, skb);
@@ -1641,13 +1663,30 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
 	return false;
 }
 
+static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
+				struct vxlan_metadata *md)
+{
+	struct vxlanhdr_gbp *gbp;
+
+	gbp = (struct vxlanhdr_gbp *)vxh;
+	vxh->vx_flags |= htonl(VXLAN_HF_GBP);
+
+	if (md->gbp & VXLAN_GBP_DONT_LEARN)
+		gbp->dont_learn = 1;
+
+	if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
+		gbp->policy_applied = 1;
+
+	gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 			   struct dst_entry *dst, struct sk_buff *skb,
 			   struct net_device *dev, struct in6_addr *saddr,
 			   struct in6_addr *daddr, __u8 prio, __u8 ttl,
-			   __be16 src_port, __be16 dst_port, __be32 vni,
-			   bool xnet)
+			   __be16 src_port, __be16 dst_port,
+			   struct vxlan_metadata *md, bool xnet)
 {
 	struct vxlanhdr *vxh;
 	int min_headroom;
@@ -1696,7 +1735,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
-	vxh->vx_vni = vni;
+	vxh->vx_vni = md->vni;
 
 	if (type & SKB_GSO_TUNNEL_REMCSUM) {
 		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1714,6 +1753,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 		}
 	}
 
+	if (vs->flags & VXLAN_F_GBP)
+		vxlan_build_gbp_hdr(vxh, vs, md);
+
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
@@ -1728,7 +1770,8 @@ err:
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
+		   __be16 src_port, __be16 dst_port,
+		   struct vxlan_metadata *md, bool xnet)
 {
 	struct vxlanhdr *vxh;
 	int min_headroom;
@@ -1771,7 +1814,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
-	vxh->vx_vni = vni;
+	vxh->vx_vni = md->vni;
 
 	if (type & SKB_GSO_TUNNEL_REMCSUM) {
 		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1789,6 +1832,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		}
 	}
 
+	if (vs->flags & VXLAN_F_GBP)
+		vxlan_build_gbp_hdr(vxh, vs, md);
+
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
@@ -1849,6 +1895,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	const struct iphdr *old_iph;
 	struct flowi4 fl4;
 	union vxlan_addr *dst;
+	struct vxlan_metadata md;
 	__be16 src_port = 0, dst_port;
 	u32 vni;
 	__be16 df = 0;
@@ -1919,11 +1966,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
+		md.vni = htonl(vni << 8);
+		md.gbp = skb->mark;
 
 		err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
 				     fl4.saddr, dst->sin.sin_addr.s_addr,
-				     tos, ttl, df, src_port, dst_port,
-				     htonl(vni << 8),
+				     tos, ttl, df, src_port, dst_port, &md,
 				     !net_eq(vxlan->net, dev_net(vxlan->dev)));
 		if (err < 0) {
 			/* skb is already freed. */
@@ -1976,10 +2024,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);
+		md.vni = htonl(vni << 8);
+		md.gbp = skb->mark;
 
 		err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
 				      dev, &fl6.saddr, &fl6.daddr, 0, ttl,
-				      src_port, dst_port, htonl(vni << 8),
+				      src_port, dst_port, &md,
 				      !net_eq(vxlan->net, dev_net(vxlan->dev)));
 #endif
 	}
@@ -2382,6 +2432,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_REMCSUM_TX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_REMCSUM_RX]	= { .type = NLA_U8 },
+	[IFLA_VXLAN_GBP]	= { .type = NLA_FLAG, },
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -2706,6 +2757,9 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
 		vxlan->flags |= VXLAN_F_REMCSUM_RX;
 
+	if (data[IFLA_VXLAN_GBP])
+		vxlan->flags |= VXLAN_F_GBP;
+
 	if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
 			   vxlan->dst_port)) {
 		pr_info("duplicate VNI %u\n", vni);
@@ -2851,6 +2905,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
 		goto nla_put_failure;
 
+	if (vxlan->flags & VXLAN_F_GBP &&
+	    nla_put_flag(skb, IFLA_VXLAN_GBP))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 0a7443b49133..f4a3583171bd 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -11,15 +11,76 @@
 #define VNI_HASH_BITS	10
 #define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)
 
-/* VXLAN protocol header */
+/*
+ * VXLAN Group Based Policy Extension:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R|        Group Policy ID        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                VXLAN Network Identifier (VNI) |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * D = Don't Learn bit. When set, this bit indicates that the egress
+ *     VTEP MUST NOT learn the source address of the encapsulated frame.
+ *
+ * A = Indicates that the group policy has already been applied to
+ *     this packet. Policies MUST NOT be applied by devices when the
+ *     A bit is set.
+ *
+ * [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
+ */
+struct vxlanhdr_gbp {
+	__u8	vx_flags;
+#ifdef __LITTLE_ENDIAN_BITFIELD
+	__u8	reserved_flags1:3,
+		policy_applied:1,
+		reserved_flags2:2,
+		dont_learn:1,
+		reserved_flags3:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u8	reserved_flags1:1,
+		dont_learn:1,
+		reserved_flags2:2,
+		policy_applied:1,
+		reserved_flags3:3;
+#else
+#error	"Please fix <asm/byteorder.h>"
+#endif
+	__be16	policy_id;
+	__be32	vx_vni;
+};
+
+#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF)
+
+/* skb->mark mapping
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R|        Group Policy ID        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+#define VXLAN_GBP_DONT_LEARN		(BIT(6) << 16)
+#define VXLAN_GBP_POLICY_APPLIED	(BIT(3) << 16)
+#define VXLAN_GBP_ID_MASK		(0xFFFF)
+
+/* VXLAN protocol header:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |G|R|R|R|I|R|R|C|               Reserved                        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                VXLAN Network Identifier (VNI) |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * G = 1	Group Policy (VXLAN-GBP)
+ * I = 1	VXLAN Network Identifier (VNI) present
+ * C = 1	Remote checksum offload (RCO)
+ */
 struct vxlanhdr {
 	__be32 vx_flags;
 	__be32 vx_vni;
 };
 
 /* VXLAN header flags. */
-#define VXLAN_HF_VNI 0x08000000
-#define VXLAN_HF_RCO 0x00200000
+#define VXLAN_HF_RCO BIT(24)
+#define VXLAN_HF_VNI BIT(27)
+#define VXLAN_HF_GBP BIT(31)
 
 /* Remote checksum offload header option */
 #define VXLAN_RCO_MASK  0x7f    /* Last byte of vni field */
@@ -32,8 +93,14 @@ struct vxlanhdr {
 #define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
 #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
 
+struct vxlan_metadata {
+	__be32		vni;
+	u32		gbp;
+};
+
 struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
+typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
+			   struct vxlan_metadata *md);
 
 /* per UDP socket information */
 struct vxlan_sock {
@@ -60,6 +127,7 @@ struct vxlan_sock {
 #define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
 #define VXLAN_F_REMCSUM_TX		0x200
 #define VXLAN_F_REMCSUM_RX		0x400
+#define VXLAN_F_GBP			0x800
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
@@ -70,7 +138,8 @@ void vxlan_sock_release(struct vxlan_sock *vs);
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
+		   __be16 src_port, __be16 dst_port, struct vxlan_metadata *md,
+		   bool xnet);
 
 static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
 						     netdev_features_t features)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index b2723f65846f..2a8380edbb7e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -372,6 +372,7 @@ enum {
 	IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
 	IFLA_VXLAN_REMCSUM_TX,
 	IFLA_VXLAN_REMCSUM_RX,
+	IFLA_VXLAN_GBP,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 1435a053a870..9919d71c52c3 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -59,7 +59,8 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
 }
 
 /* Called with rcu_read_lock and BH disabled. */
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+		      struct vxlan_metadata *md)
 {
 	struct ovs_tunnel_info tun_info;
 	struct vport *vport = vs->data;
@@ -68,7 +69,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
-	key = cpu_to_be64(ntohl(vx_vni) >> 8);
+	key = cpu_to_be64(ntohl(md->vni) >> 8);
 	ovs_flow_tun_info_init(&tun_info, iph,
 			       udp_hdr(skb)->source, udp_hdr(skb)->dest,
 			       key, TUNNEL_KEY, NULL, 0);
@@ -146,6 +147,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	struct vxlan_port *vxlan_port = vxlan_vport(vport);
 	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
 	const struct ovs_key_ipv4_tunnel *tun_key;
+	struct vxlan_metadata md = {0};
 	struct rtable *rt;
 	struct flowi4 fl;
 	__be16 src_port;
@@ -170,12 +172,13 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	skb->ignore_df = 1;
 
 	src_port = udp_flow_src_port(net, skb, 0, 0, true);
+	md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
 
 	err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
 			     fl.saddr, tun_key->ipv4_dst,
 			     tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
 			     src_port, dst_port,
-			     htonl(be64_to_cpu(tun_key->tun_id) << 8),
+			     &md,
 			     false);
 	if (err < 0)
 		ip_rt_put(rt);
-- 
cgit v1.2.3-71-gd317


From 1dd144cf5b4b47e12438c2c6883925ce1a9b499f Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 15 Jan 2015 03:53:59 +0100
Subject: openvswitch: Support VXLAN Group Policy extension

Introduces support for the group policy extension to the VXLAN virtual
port. The extension is disabled by default and only enabled if the user
has provided the respective configuration.

  ovs-vsctl add-port br0 vxlan0 -- \
     set Interface vxlan0 type=vxlan options:exts=gbp

The configuration interface to enable the extension is based on a new
attribute OVS_VXLAN_EXT_GBP nested inside OVS_TUNNEL_ATTR_EXTENSION
which can carry additional extensions as needed in the future.

The group policy metadata is stored as binary blob (struct ovs_vxlan_opts)
internally just like Geneve options but transported as nested Netlink
attributes to user space.

Renames the existing TUNNEL_OPTIONS_PRESENT to TUNNEL_GENEVE_OPT with the
binary value kept intact, a new flag TUNNEL_VXLAN_OPT is introduced.

The attributes OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and existing
OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS are implemented mutually exclusive.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h         |   5 +-
 include/uapi/linux/openvswitch.h |  11 ++++
 net/openvswitch/flow_netlink.c   | 114 ++++++++++++++++++++++++++++++++++-----
 net/openvswitch/vport-geneve.c   |  15 ++++--
 net/openvswitch/vport-vxlan.c    |  82 +++++++++++++++++++++++++++-
 net/openvswitch/vport-vxlan.h    |  11 ++++
 6 files changed, 218 insertions(+), 20 deletions(-)
 create mode 100644 net/openvswitch/vport-vxlan.h

(limited to 'include/uapi/linux')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 25a59eb388a6..ce4db3cc5647 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -97,7 +97,10 @@ struct ip_tunnel {
 #define TUNNEL_DONT_FRAGMENT    __cpu_to_be16(0x0100)
 #define TUNNEL_OAM		__cpu_to_be16(0x0200)
 #define TUNNEL_CRIT_OPT		__cpu_to_be16(0x0400)
-#define TUNNEL_OPTIONS_PRESENT	__cpu_to_be16(0x0800)
+#define TUNNEL_GENEVE_OPT	__cpu_to_be16(0x0800)
+#define TUNNEL_VXLAN_OPT	__cpu_to_be16(0x1000)
+
+#define TUNNEL_OPTIONS_PRESENT	(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
 
 struct tnl_ptk_info {
 	__be16 flags;
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index f714e8633352..cd8d933963c2 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -252,11 +252,21 @@ enum ovs_vport_attr {
 
 #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
 
+enum {
+	OVS_VXLAN_EXT_UNSPEC,
+	OVS_VXLAN_EXT_GBP,	/* Flag or __u32 */
+	__OVS_VXLAN_EXT_MAX,
+};
+
+#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1)
+
+
 /* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
  */
 enum {
 	OVS_TUNNEL_ATTR_UNSPEC,
 	OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
+	OVS_TUNNEL_ATTR_EXTENSION,
 	__OVS_TUNNEL_ATTR_MAX
 };
 
@@ -328,6 +338,7 @@ enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,        /* Array of Geneve options. */
 	OVS_TUNNEL_KEY_ATTR_TP_SRC,		/* be16 src Transport Port. */
 	OVS_TUNNEL_KEY_ATTR_TP_DST,		/* be16 dst Transport Port. */
+	OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS,		/* Nested OVS_VXLAN_EXT_* */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 518941c5bdf1..d210d1be3470 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -49,6 +49,7 @@
 #include <net/mpls.h>
 
 #include "flow_netlink.h"
+#include "vport-vxlan.h"
 
 struct ovs_len_tbl {
 	int len;
@@ -268,6 +269,9 @@ size_t ovs_tun_key_attr_size(void)
 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
 		+ nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+		/* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
+		 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
+		 */
 		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
 		+ nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
 }
@@ -308,6 +312,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
 	[OVS_TUNNEL_KEY_ATTR_TP_DST]	    = { .len = sizeof(u16) },
 	[OVS_TUNNEL_KEY_ATTR_OAM]	    = { .len = 0 },
 	[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_NESTED },
+	[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED },
 };
 
 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
@@ -460,6 +465,41 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a,
 	return 0;
 }
 
+static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = {
+	[OVS_VXLAN_EXT_GBP]	= { .type = NLA_U32 },
+};
+
+static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
+				     struct sw_flow_match *match, bool is_mask,
+				     bool log)
+{
+	struct nlattr *tb[OVS_VXLAN_EXT_MAX+1];
+	unsigned long opt_key_offset;
+	struct ovs_vxlan_opts opts;
+	int err;
+
+	BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
+
+	err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy);
+	if (err < 0)
+		return err;
+
+	memset(&opts, 0, sizeof(opts));
+
+	if (tb[OVS_VXLAN_EXT_GBP])
+		opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]);
+
+	if (!is_mask)
+		SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
+	else
+		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+
+	opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
+	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
+				  is_mask);
+	return 0;
+}
+
 static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 				struct sw_flow_match *match, bool is_mask,
 				bool log)
@@ -468,6 +508,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 	int rem;
 	bool ttl = false;
 	__be16 tun_flags = 0;
+	int opts_type = 0;
 
 	nla_for_each_nested(a, attr, rem) {
 		int type = nla_type(a);
@@ -527,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 			tun_flags |= TUNNEL_OAM;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+			if (opts_type) {
+				OVS_NLERR(log, "Multiple metadata blocks provided");
+				return -EINVAL;
+			}
+
 			err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
 			if (err)
 				return err;
 
-			tun_flags |= TUNNEL_OPTIONS_PRESENT;
+			tun_flags |= TUNNEL_GENEVE_OPT;
+			opts_type = type;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+			if (opts_type) {
+				OVS_NLERR(log, "Multiple metadata blocks provided");
+				return -EINVAL;
+			}
+
+			err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
+			if (err)
+				return err;
+
+			tun_flags |= TUNNEL_VXLAN_OPT;
+			opts_type = type;
 			break;
 		default:
 			OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
@@ -560,6 +620,23 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 		}
 	}
 
+	return opts_type;
+}
+
+static int vxlan_opt_to_nlattr(struct sk_buff *skb,
+			       const void *tun_opts, int swkey_tun_opts_len)
+{
+	const struct ovs_vxlan_opts *opts = tun_opts;
+	struct nlattr *nla;
+
+	nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
+	if (!nla)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, nla);
 	return 0;
 }
 
@@ -596,10 +673,15 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if ((output->tun_flags & TUNNEL_OAM) &&
 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
 		return -EMSGSIZE;
-	if (tun_opts &&
-	    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
-		    swkey_tun_opts_len, tun_opts))
-		return -EMSGSIZE;
+	if (tun_opts) {
+		if (output->tun_flags & TUNNEL_GENEVE_OPT &&
+		    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+			    swkey_tun_opts_len, tun_opts))
+			return -EMSGSIZE;
+		else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
+			 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
+			return -EMSGSIZE;
+	}
 
 	return 0;
 }
@@ -680,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
 	}
 	if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
 		if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
-					 is_mask, log))
+					 is_mask, log) < 0)
 			return -EINVAL;
 		*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
 	}
@@ -1578,17 +1660,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	struct sw_flow_key key;
 	struct ovs_tunnel_info *tun_info;
 	struct nlattr *a;
-	int err, start;
+	int err, start, opts_type;
 
 	ovs_match_init(&match, &key, NULL);
-	err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
-	if (err)
-		return err;
+	opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
+	if (opts_type < 0)
+		return opts_type;
 
 	if (key.tun_opts_len) {
-		err = validate_geneve_opts(&key);
-		if (err < 0)
-			return err;
+		switch (opts_type) {
+		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+			err = validate_geneve_opts(&key);
+			if (err < 0)
+				return err;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+			break;
+		}
 	};
 
 	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 88a010c98c05..7ca3d454ff3b 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -88,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
 
 	opts_len = geneveh->opt_len * 4;
 
-	flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
+	flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
 		(udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
 		(geneveh->oam ? TUNNEL_OAM : 0) |
 		(geneveh->critical ? TUNNEL_CRIT_OPT : 0);
@@ -178,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 	__be16 sport;
 	struct rtable *rt;
 	struct flowi4 fl;
-	u8 vni[3];
+	u8 vni[3], opts_len, *opts;
 	__be16 df;
 	int err;
 
@@ -200,11 +200,18 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 	tunnel_id_to_vni(tun_key->tun_id, vni);
 	skb->ignore_df = 1;
 
+	if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) {
+		opts = (u8 *)tun_info->options;
+		opts_len = tun_info->options_len;
+	} else {
+		opts = NULL;
+		opts_len = 0;
+	}
+
 	err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
 			      tun_key->ipv4_dst, tun_key->ipv4_tos,
 			      tun_key->ipv4_ttl, df, sport, dport,
-			      tun_key->tun_flags, vni,
-			      tun_info->options_len, (u8 *)tun_info->options,
+			      tun_key->tun_flags, vni, opts_len, opts,
 			      false);
 	if (err < 0)
 		ip_rt_put(rt);
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 9919d71c52c3..8a2d54cba9ba 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -40,6 +40,7 @@
 
 #include "datapath.h"
 #include "vport.h"
+#include "vport-vxlan.h"
 
 /**
  * struct vxlan_port - Keeps track of open UDP ports
@@ -49,6 +50,7 @@
 struct vxlan_port {
 	struct vxlan_sock *vs;
 	char name[IFNAMSIZ];
+	u32 exts; /* VXLAN_F_* in <net/vxlan.h> */
 };
 
 static struct vport_ops ovs_vxlan_vport_ops;
@@ -63,16 +65,26 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
 		      struct vxlan_metadata *md)
 {
 	struct ovs_tunnel_info tun_info;
+	struct vxlan_port *vxlan_port;
 	struct vport *vport = vs->data;
 	struct iphdr *iph;
+	struct ovs_vxlan_opts opts = {
+		.gbp = md->gbp,
+	};
 	__be64 key;
+	__be16 flags;
+
+	flags = TUNNEL_KEY;
+	vxlan_port = vxlan_vport(vport);
+	if (vxlan_port->exts & VXLAN_F_GBP)
+		flags |= TUNNEL_VXLAN_OPT;
 
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
 	key = cpu_to_be64(ntohl(md->vni) >> 8);
 	ovs_flow_tun_info_init(&tun_info, iph,
 			       udp_hdr(skb)->source, udp_hdr(skb)->dest,
-			       key, TUNNEL_KEY, NULL, 0);
+			       key, flags, &opts, sizeof(opts));
 
 	ovs_vport_receive(vport, skb, &tun_info);
 }
@@ -84,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
 
 	if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
 		return -EMSGSIZE;
+
+	if (vxlan_port->exts) {
+		struct nlattr *exts;
+
+		exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
+		if (!exts)
+			return -EMSGSIZE;
+
+		if (vxlan_port->exts & VXLAN_F_GBP &&
+		    nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
+			return -EMSGSIZE;
+
+		nla_nest_end(skb, exts);
+	}
+
 	return 0;
 }
 
@@ -96,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport)
 	ovs_vport_deferred_free(vport);
 }
 
+static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = {
+	[OVS_VXLAN_EXT_GBP]	= { .type = NLA_FLAG, },
+};
+
+static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
+{
+	struct nlattr *exts[OVS_VXLAN_EXT_MAX+1];
+	struct vxlan_port *vxlan_port;
+	int err;
+
+	if (nla_len(attr) < sizeof(struct nlattr))
+		return -EINVAL;
+
+	err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
+	if (err < 0)
+		return err;
+
+	vxlan_port = vxlan_vport(vport);
+
+	if (exts[OVS_VXLAN_EXT_GBP])
+		vxlan_port->exts |= VXLAN_F_GBP;
+
+	return 0;
+}
+
 static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
 {
 	struct net *net = ovs_dp_get_net(parms->dp);
@@ -128,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
 	vxlan_port = vxlan_vport(vport);
 	strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
 
-	vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
+	a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
+	if (a) {
+		err = vxlan_configure_exts(vport, a);
+		if (err) {
+			ovs_vport_free(vport);
+			goto error;
+		}
+	}
+
+	vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true,
+			    vxlan_port->exts);
 	if (IS_ERR(vs)) {
 		ovs_vport_free(vport);
 		return (void *)vs;
@@ -141,6 +203,21 @@ error:
 	return ERR_PTR(err);
 }
 
+static int vxlan_ext_gbp(struct sk_buff *skb)
+{
+	const struct ovs_tunnel_info *tun_info;
+	const struct ovs_vxlan_opts *opts;
+
+	tun_info = OVS_CB(skb)->egress_tun_info;
+	opts = tun_info->options;
+
+	if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT &&
+	    tun_info->options_len >= sizeof(*opts))
+		return opts->gbp;
+	else
+		return 0;
+}
+
 static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
 	struct net *net = ovs_dp_get_net(vport->dp);
@@ -173,6 +250,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 
 	src_port = udp_flow_src_port(net, skb, 0, 0, true);
 	md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
+	md.gbp = vxlan_ext_gbp(skb);
 
 	err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
 			     fl.saddr, tun_key->ipv4_dst,
diff --git a/net/openvswitch/vport-vxlan.h b/net/openvswitch/vport-vxlan.h
new file mode 100644
index 000000000000..4b08233e73d5
--- /dev/null
+++ b/net/openvswitch/vport-vxlan.h
@@ -0,0 +1,11 @@
+#ifndef VPORT_VXLAN_H
+#define VPORT_VXLAN_H 1
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+struct ovs_vxlan_opts {
+	__u32 gbp;
+};
+
+#endif
-- 
cgit v1.2.3-71-gd317


From 97d910d0aaa619ca530d08e2b1125b8014ccb030 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 15 Jan 2015 16:05:21 +0100
Subject: cfg80211: remove 80+80 MHz rate reporting

These rates are treated the same as 160 MHz in the spec, so
it makes no sense to distinguish them. As no driver uses them
yet, this is also not a problem, just remove them.

In the userspace API the field remains reserved to preserve
API and ABI.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 8 +++-----
 include/uapi/linux/nl80211.h | 3 ++-
 net/wireless/nl80211.c       | 3 ---
 net/wireless/util.c          | 3 +--
 4 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 38abc07503fd..0322048fddab 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -875,7 +875,6 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
  * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS
  * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 MHz width transmission
  * @RATE_INFO_FLAGS_80_MHZ_WIDTH: 80 MHz width transmission
- * @RATE_INFO_FLAGS_80P80_MHZ_WIDTH: 80+80 MHz width transmission
  * @RATE_INFO_FLAGS_160_MHZ_WIDTH: 160 MHz width transmission
  * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval
  * @RATE_INFO_FLAGS_60G: 60GHz MCS
@@ -885,10 +884,9 @@ enum rate_info_flags {
 	RATE_INFO_FLAGS_VHT_MCS			= BIT(1),
 	RATE_INFO_FLAGS_40_MHZ_WIDTH		= BIT(2),
 	RATE_INFO_FLAGS_80_MHZ_WIDTH		= BIT(3),
-	RATE_INFO_FLAGS_80P80_MHZ_WIDTH		= BIT(4),
-	RATE_INFO_FLAGS_160_MHZ_WIDTH		= BIT(5),
-	RATE_INFO_FLAGS_SHORT_GI		= BIT(6),
-	RATE_INFO_FLAGS_60G			= BIT(7),
+	RATE_INFO_FLAGS_160_MHZ_WIDTH		= BIT(4),
+	RATE_INFO_FLAGS_SHORT_GI		= BIT(5),
+	RATE_INFO_FLAGS_60G			= BIT(6),
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b6c1a00bd8d2..11cdb85ac646 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2278,7 +2278,8 @@ struct nl80211_sta_flag_update {
  * @NL80211_RATE_INFO_VHT_MCS: MCS index for VHT (u8)
  * @NL80211_RATE_INFO_VHT_NSS: number of streams in VHT (u8)
  * @NL80211_RATE_INFO_80_MHZ_WIDTH: 80 MHz VHT rate
- * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: 80+80 MHz VHT rate
+ * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: unused - 80+80 is treated the
+ *	same as 160 for purposes of the bitrates
  * @NL80211_RATE_INFO_160_MHZ_WIDTH: 160 MHz VHT rate
  * @__NL80211_RATE_INFO_AFTER_LAST: internal use
  */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 380784378df8..8998484ea970 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3614,9 +3614,6 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 		if (info->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_80_MHZ_WIDTH))
 			return false;
-		if (info->flags & RATE_INFO_FLAGS_80P80_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_80P80_MHZ_WIDTH))
-			return false;
 		if (info->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_160_MHZ_WIDTH))
 			return false;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index d0ac795445b7..6942d48f1ac5 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1073,8 +1073,7 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
 	if (WARN_ON_ONCE(rate->mcs > 9))
 		return 0;
 
-	idx = rate->flags & (RATE_INFO_FLAGS_160_MHZ_WIDTH |
-			     RATE_INFO_FLAGS_80P80_MHZ_WIDTH) ? 3 :
+	idx = rate->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH ? 3 :
 		  rate->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH ? 2 :
 		  rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH ? 1 : 0;
 
-- 
cgit v1.2.3-71-gd317


From b51f3beecfbbfc946749a91fb444cb8917cf444f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 15 Jan 2015 16:14:02 +0100
Subject: cfg80211: change bandwidth reporting to explicit field

For some reason, we made the bandwidth separate flags, which
is rather confusing - a single rate cannot have different
bandwidths at the same time.

Change this to no longer be flags but use a separate field
for the bandwidth ('bw') instead.

While at it, add support for 5 and 10 MHz rates - these are
reported as regular legacy rates with their real bitrate,
but tagged as 5/10 now to make it easier to distinguish them.

In the nl80211 API, the flags are preserved, but the code
now can also clearly only set a single one of the flags.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c |  3 ++-
 drivers/net/wireless/mwifiex/cfg80211.c    | 11 ++++----
 include/net/cfg80211.h                     | 33 ++++++++++++++++++------
 include/uapi/linux/nl80211.h               |  8 ++++++
 net/mac80211/cfg.c                         | 31 +++++++++++++++--------
 net/mac80211/util.c                        | 25 +++++++++++++------
 net/wireless/nl80211.c                     | 40 +++++++++++++++++++++---------
 net/wireless/util.c                        | 24 ++++++++++++++----
 8 files changed, 125 insertions(+), 50 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 44dd6ef923cd..85da63a67faf 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1827,6 +1827,7 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
 		}
 
 		sinfo->txrate.flags |= RATE_INFO_FLAGS_MCS;
+		sinfo->txrate.bw = RATE_INFO_BW_20;
 	} else if (is_rate_ht40(rate, &mcs, &sgi)) {
 		if (sgi) {
 			sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
@@ -1835,7 +1836,7 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
 			sinfo->txrate.mcs = mcs;
 		}
 
-		sinfo->txrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+		sinfo->txrate.bw = RATE_INFO_BW_40;
 		sinfo->txrate.flags |= RATE_INFO_FLAGS_MCS;
 	} else {
 		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 71312ff52703..1996a8b612d2 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -856,16 +856,16 @@ mwifiex_parse_htinfo(struct mwifiex_private *priv, u8 tx_htinfo,
 			/* HT or VHT */
 			switch (tx_htinfo & (BIT(3) | BIT(2))) {
 			case 0:
-				/* This will be 20MHz */
+				rate->bw = RATE_INFO_BW_20;
 				break;
 			case (BIT(2)):
-				rate->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+				rate->bw = RATE_INFO_BW_40;
 				break;
 			case (BIT(3)):
-				rate->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
+				rate->bw = RATE_INFO_BW_80;
 				break;
 			case (BIT(3) | BIT(2)):
-				rate->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
+				rate->bw = RATE_INFO_BW_160;
 				break;
 			}
 
@@ -885,8 +885,9 @@ mwifiex_parse_htinfo(struct mwifiex_private *priv, u8 tx_htinfo,
 		if ((tx_htinfo & BIT(0)) && (priv->tx_rate < 16)) {
 			rate->mcs = priv->tx_rate;
 			rate->flags |= RATE_INFO_FLAGS_MCS;
+			rate->bw = RATE_INFO_BW_20;
 			if (tx_htinfo & BIT(1))
-				rate->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+				rate->bw = RATE_INFO_BW_40;
 			if (tx_htinfo & BIT(2))
 				rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
 		}
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0322048fddab..7b44ba0a7632 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -873,20 +873,35 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
  *
  * @RATE_INFO_FLAGS_MCS: mcs field filled with HT MCS
  * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS
- * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 MHz width transmission
- * @RATE_INFO_FLAGS_80_MHZ_WIDTH: 80 MHz width transmission
- * @RATE_INFO_FLAGS_160_MHZ_WIDTH: 160 MHz width transmission
  * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval
  * @RATE_INFO_FLAGS_60G: 60GHz MCS
  */
 enum rate_info_flags {
 	RATE_INFO_FLAGS_MCS			= BIT(0),
 	RATE_INFO_FLAGS_VHT_MCS			= BIT(1),
-	RATE_INFO_FLAGS_40_MHZ_WIDTH		= BIT(2),
-	RATE_INFO_FLAGS_80_MHZ_WIDTH		= BIT(3),
-	RATE_INFO_FLAGS_160_MHZ_WIDTH		= BIT(4),
-	RATE_INFO_FLAGS_SHORT_GI		= BIT(5),
-	RATE_INFO_FLAGS_60G			= BIT(6),
+	RATE_INFO_FLAGS_SHORT_GI		= BIT(2),
+	RATE_INFO_FLAGS_60G			= BIT(3),
+};
+
+/**
+ * enum rate_info_bw - rate bandwidth information
+ *
+ * Used by the driver to indicate the rate bandwidth.
+ *
+ * @RATE_INFO_BW_5: 5 MHz bandwidth
+ * @RATE_INFO_BW_10: 10 MHz bandwidth
+ * @RATE_INFO_BW_20: 20 MHz bandwidth
+ * @RATE_INFO_BW_40: 40 MHz bandwidth
+ * @RATE_INFO_BW_80: 80 MHz bandwidth
+ * @RATE_INFO_BW_160: 160 MHz bandwidth
+ */
+enum rate_info_bw {
+	RATE_INFO_BW_5,
+	RATE_INFO_BW_10,
+	RATE_INFO_BW_20,
+	RATE_INFO_BW_40,
+	RATE_INFO_BW_80,
+	RATE_INFO_BW_160,
 };
 
 /**
@@ -898,12 +913,14 @@ enum rate_info_flags {
  * @mcs: mcs index if struct describes a 802.11n bitrate
  * @legacy: bitrate in 100kbit/s for 802.11abg
  * @nss: number of streams (VHT only)
+ * @bw: bandwidth (from &enum rate_info_bw)
  */
 struct rate_info {
 	u8 flags;
 	u8 mcs;
 	u16 legacy;
 	u8 nss;
+	u8 bw;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 11cdb85ac646..f52797a90816 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2281,6 +2281,12 @@ struct nl80211_sta_flag_update {
  * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: unused - 80+80 is treated the
  *	same as 160 for purposes of the bitrates
  * @NL80211_RATE_INFO_160_MHZ_WIDTH: 160 MHz VHT rate
+ * @NL80211_RATE_INFO_10_MHZ_WIDTH: 10 MHz width - note that this is
+ *	a legacy rate and will be reported as the actual bitrate, i.e.
+ *	half the base (20 MHz) rate
+ * @NL80211_RATE_INFO_5_MHZ_WIDTH: 5 MHz width - note that this is
+ *	a legacy rate and will be reported as the actual bitrate, i.e.
+ *	a quarter of the base (20 MHz) rate
  * @__NL80211_RATE_INFO_AFTER_LAST: internal use
  */
 enum nl80211_rate_info {
@@ -2295,6 +2301,8 @@ enum nl80211_rate_info {
 	NL80211_RATE_INFO_80_MHZ_WIDTH,
 	NL80211_RATE_INFO_80P80_MHZ_WIDTH,
 	NL80211_RATE_INFO_160_MHZ_WIDTH,
+	NL80211_RATE_INFO_10_MHZ_WIDTH,
+	NL80211_RATE_INFO_5_MHZ_WIDTH,
 
 	/* keep last */
 	__NL80211_RATE_INFO_AFTER_LAST,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 6d5076fbf87a..ff090ef1ea2c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -428,11 +428,13 @@ void sta_set_rate_info_tx(struct sta_info *sta,
 		rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
 	}
 	if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
-		rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
-	if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
-		rinfo->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
-	if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH)
-		rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
+		rinfo->bw = RATE_INFO_BW_40;
+	else if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
+		rinfo->bw = RATE_INFO_BW_80;
+	else if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH)
+		rinfo->bw = RATE_INFO_BW_160;
+	else
+		rinfo->bw = RATE_INFO_BW_20;
 	if (rate->flags & IEEE80211_TX_RC_SHORT_GI)
 		rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
 }
@@ -459,14 +461,21 @@ void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
 		rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
 	}
 
-	if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
-		rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
 	if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI)
 		rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
-	if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ)
-		rinfo->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
-	if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ)
-		rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
+
+	if (sta->last_rx_rate_flag & RX_FLAG_5MHZ)
+		rinfo->bw = RATE_INFO_BW_5;
+	else if (sta->last_rx_rate_flag & RX_FLAG_10MHZ)
+		rinfo->bw = RATE_INFO_BW_10;
+	else if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
+		rinfo->bw = RATE_INFO_BW_40;
+	else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ)
+		rinfo->bw = RATE_INFO_BW_80;
+	else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ)
+		rinfo->bw = RATE_INFO_BW_160;
+	else
+		rinfo->bw = RATE_INFO_BW_20;
 }
 
 static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index db7216124736..fbd37d43dfce 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2541,7 +2541,9 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
 		ri.mcs = status->rate_idx;
 		ri.flags |= RATE_INFO_FLAGS_MCS;
 		if (status->flag & RX_FLAG_40MHZ)
-			ri.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+			ri.bw = RATE_INFO_BW_40;
+		else
+			ri.bw = RATE_INFO_BW_20;
 		if (status->flag & RX_FLAG_SHORT_GI)
 			ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
 	} else if (status->flag & RX_FLAG_VHT) {
@@ -2549,11 +2551,13 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
 		ri.mcs = status->rate_idx;
 		ri.nss = status->vht_nss;
 		if (status->flag & RX_FLAG_40MHZ)
-			ri.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
-		if (status->vht_flag & RX_VHT_FLAG_80MHZ)
-			ri.flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
-		if (status->vht_flag & RX_VHT_FLAG_160MHZ)
-			ri.flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
+			ri.bw = RATE_INFO_BW_40;
+		else if (status->vht_flag & RX_VHT_FLAG_80MHZ)
+			ri.bw = RATE_INFO_BW_80;
+		else if (status->vht_flag & RX_VHT_FLAG_160MHZ)
+			ri.bw = RATE_INFO_BW_160;
+		else
+			ri.bw = RATE_INFO_BW_20;
 		if (status->flag & RX_FLAG_SHORT_GI)
 			ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
 	} else {
@@ -2561,10 +2565,15 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
 		int shift = 0;
 		int bitrate;
 
-		if (status->flag & RX_FLAG_10MHZ)
+		if (status->flag & RX_FLAG_10MHZ) {
 			shift = 1;
-		if (status->flag & RX_FLAG_5MHZ)
+			ri.bw = RATE_INFO_BW_10;
+		} else if (status->flag & RX_FLAG_5MHZ) {
 			shift = 2;
+			ri.bw = RATE_INFO_BW_5;
+		} else {
+			ri.bw = RATE_INFO_BW_20;
+		}
 
 		sband = local->hw.wiphy->bands[status->band];
 		bitrate = sband->bitrates[status->rate_idx].bitrate;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8998484ea970..8e56eeb583aa 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3578,6 +3578,7 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 	struct nlattr *rate;
 	u32 bitrate;
 	u16 bitrate_compat;
+	enum nl80211_attrs rate_flg;
 
 	rate = nla_nest_start(msg, attr);
 	if (!rate)
@@ -3594,12 +3595,36 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 	    nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate_compat))
 		return false;
 
+	switch (info->bw) {
+	case RATE_INFO_BW_5:
+		rate_flg = NL80211_RATE_INFO_5_MHZ_WIDTH;
+		break;
+	case RATE_INFO_BW_10:
+		rate_flg = NL80211_RATE_INFO_10_MHZ_WIDTH;
+		break;
+	default:
+		WARN_ON(1);
+		/* fall through */
+	case RATE_INFO_BW_20:
+		rate_flg = 0;
+		break;
+	case RATE_INFO_BW_40:
+		rate_flg = NL80211_RATE_INFO_40_MHZ_WIDTH;
+		break;
+	case RATE_INFO_BW_80:
+		rate_flg = NL80211_RATE_INFO_80_MHZ_WIDTH;
+		break;
+	case RATE_INFO_BW_160:
+		rate_flg = NL80211_RATE_INFO_160_MHZ_WIDTH;
+		break;
+	}
+
+	if (rate_flg && nla_put_flag(msg, rate_flg))
+		return false;
+
 	if (info->flags & RATE_INFO_FLAGS_MCS) {
 		if (nla_put_u8(msg, NL80211_RATE_INFO_MCS, info->mcs))
 			return false;
-		if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH))
-			return false;
 		if (info->flags & RATE_INFO_FLAGS_SHORT_GI &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI))
 			return false;
@@ -3608,15 +3633,6 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 			return false;
 		if (nla_put_u8(msg, NL80211_RATE_INFO_VHT_NSS, info->nss))
 			return false;
-		if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH))
-			return false;
-		if (info->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_80_MHZ_WIDTH))
-			return false;
-		if (info->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_160_MHZ_WIDTH))
-			return false;
 		if (info->flags & RATE_INFO_FLAGS_SHORT_GI &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI))
 			return false;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 6942d48f1ac5..3535e8ade48f 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1073,9 +1073,24 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
 	if (WARN_ON_ONCE(rate->mcs > 9))
 		return 0;
 
-	idx = rate->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH ? 3 :
-		  rate->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH ? 2 :
-		  rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH ? 1 : 0;
+	switch (rate->bw) {
+	case RATE_INFO_BW_160:
+		idx = 3;
+		break;
+	case RATE_INFO_BW_80:
+		idx = 2;
+		break;
+	case RATE_INFO_BW_40:
+		idx = 1;
+		break;
+	case RATE_INFO_BW_5:
+	case RATE_INFO_BW_10:
+	default:
+		WARN_ON(1);
+		/* fall through */
+	case RATE_INFO_BW_20:
+		idx = 0;
+	}
 
 	bitrate = base[idx][rate->mcs];
 	bitrate *= rate->nss;
@@ -1106,8 +1121,7 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate)
 	modulation = rate->mcs & 7;
 	streams = (rate->mcs >> 3) + 1;
 
-	bitrate = (rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) ?
-			13500000 : 6500000;
+	bitrate = (rate->bw == RATE_INFO_BW_40) ? 13500000 : 6500000;
 
 	if (modulation < 4)
 		bitrate *= (modulation + 1);
-- 
cgit v1.2.3-71-gd317


From d23b8ad8ab23f5a18b91e2396fb63d10f66b08d6 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 15 Jan 2015 09:52:39 +0100
Subject: tc: add BPF based action

This action provides a possibility to exec custom BPF code.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_bpf.h        |  25 +++++
 include/uapi/linux/tc_act/Kbuild   |   1 +
 include/uapi/linux/tc_act/tc_bpf.h |  31 ++++++
 net/sched/Kconfig                  |  12 +++
 net/sched/Makefile                 |   1 +
 net/sched/act_bpf.c                | 205 +++++++++++++++++++++++++++++++++++++
 6 files changed, 275 insertions(+)
 create mode 100644 include/net/tc_act/tc_bpf.h
 create mode 100644 include/uapi/linux/tc_act/tc_bpf.h
 create mode 100644 net/sched/act_bpf.c

(limited to 'include/uapi/linux')

diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h
new file mode 100644
index 000000000000..86a070ffc930
--- /dev/null
+++ b/include/net/tc_act/tc_bpf.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NET_TC_BPF_H
+#define __NET_TC_BPF_H
+
+#include <linux/filter.h>
+#include <net/act_api.h>
+
+struct tcf_bpf {
+	struct tcf_common	common;
+	struct bpf_prog		*filter;
+	struct sock_filter	*bpf_ops;
+	u16			bpf_num_ops;
+};
+#define to_bpf(a) \
+	container_of(a->priv, struct tcf_bpf, common)
+
+#endif /* __NET_TC_BPF_H */
diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild
index b057da2b87a4..19d5219b0b99 100644
--- a/include/uapi/linux/tc_act/Kbuild
+++ b/include/uapi/linux/tc_act/Kbuild
@@ -8,3 +8,4 @@ header-y += tc_nat.h
 header-y += tc_pedit.h
 header-y += tc_skbedit.h
 header-y += tc_vlan.h
+header-y += tc_bpf.h
diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h
new file mode 100644
index 000000000000..5288bd77e63b
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_bpf.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_TC_BPF_H
+#define __LINUX_TC_BPF_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_BPF 13
+
+struct tc_act_bpf {
+	tc_gen;
+};
+
+enum {
+	TCA_ACT_BPF_UNSPEC,
+	TCA_ACT_BPF_TM,
+	TCA_ACT_BPF_PARMS,
+	TCA_ACT_BPF_OPS_LEN,
+	TCA_ACT_BPF_OPS,
+	__TCA_ACT_BPF_MAX,
+};
+#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index c54c9d9d1ffb..466943551581 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -698,6 +698,18 @@ config NET_ACT_VLAN
 	  To compile this code as a module, choose M here: the
 	  module will be called act_vlan.
 
+config NET_ACT_BPF
+        tristate "BPF based action"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to execute BPF code on packets. The BPF code will decide
+	  if the packet should be dropped or not.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_bpf.
+
 config NET_CLS_IND
 	bool "Incoming device classification"
 	depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 679f24ae7f93..7ca2b4e76312 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_NET_ACT_SIMP)	+= act_simple.o
 obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
 obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
+obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
new file mode 100644
index 000000000000..1bd257e473a9
--- /dev/null
+++ b/net/sched/act_bpf.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/filter.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_bpf.h>
+#include <net/tc_act/tc_bpf.h>
+
+#define BPF_TAB_MASK     15
+
+static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
+		   struct tcf_result *res)
+{
+	struct tcf_bpf *b = a->priv;
+	int action;
+	int filter_res;
+
+	spin_lock(&b->tcf_lock);
+	b->tcf_tm.lastuse = jiffies;
+	bstats_update(&b->tcf_bstats, skb);
+	action = b->tcf_action;
+
+	filter_res = BPF_PROG_RUN(b->filter, skb);
+	if (filter_res == 0) {
+		/* Return code 0 from the BPF program
+		 * is being interpreted as a drop here.
+		 */
+		action = TC_ACT_SHOT;
+		b->tcf_qstats.drops++;
+	}
+
+	spin_unlock(&b->tcf_lock);
+	return action;
+}
+
+static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a,
+			int bind, int ref)
+{
+	unsigned char *tp = skb_tail_pointer(skb);
+	struct tcf_bpf *b = a->priv;
+	struct tc_act_bpf opt = {
+		.index    = b->tcf_index,
+		.refcnt   = b->tcf_refcnt - ref,
+		.bindcnt  = b->tcf_bindcnt - bind,
+		.action   = b->tcf_action,
+	};
+	struct tcf_t t;
+	struct nlattr *nla;
+
+	if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops))
+		goto nla_put_failure;
+
+	nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops *
+			  sizeof(struct sock_filter));
+	if (!nla)
+		goto nla_put_failure;
+
+	memcpy(nla_data(nla), b->bpf_ops, nla_len(nla));
+
+	t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(b->tcf_tm.expires);
+	if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t))
+		goto nla_put_failure;
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, tp);
+	return -1;
+}
+
+static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
+	[TCA_ACT_BPF_PARMS]	= { .len = sizeof(struct tc_act_bpf) },
+	[TCA_ACT_BPF_OPS_LEN]	= { .type = NLA_U16 },
+	[TCA_ACT_BPF_OPS]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
+};
+
+static int tcf_bpf_init(struct net *net, struct nlattr *nla,
+			struct nlattr *est, struct tc_action *a,
+			int ovr, int bind)
+{
+	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+	struct tc_act_bpf *parm;
+	struct tcf_bpf *b;
+	u16 bpf_size, bpf_num_ops;
+	struct sock_filter *bpf_ops;
+	struct sock_fprog_kern tmp;
+	struct bpf_prog *fp;
+	int ret;
+
+	if (!nla)
+		return -EINVAL;
+
+	ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[TCA_ACT_BPF_PARMS] ||
+	    !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS])
+		return -EINVAL;
+	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
+
+	bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
+	if (bpf_num_ops	> BPF_MAXINSNS || bpf_num_ops == 0)
+		return -EINVAL;
+
+	bpf_size = bpf_num_ops * sizeof(*bpf_ops);
+	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
+	if (!bpf_ops)
+		return -ENOMEM;
+
+	memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);
+
+	tmp.len = bpf_num_ops;
+	tmp.filter = bpf_ops;
+
+	ret = bpf_prog_create(&fp, &tmp);
+	if (ret)
+		goto free_bpf_ops;
+
+	if (!tcf_hash_check(parm->index, a, bind)) {
+		ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind);
+		if (ret)
+			goto destroy_fp;
+
+		ret = ACT_P_CREATED;
+	} else {
+		if (bind)
+			goto destroy_fp;
+		tcf_hash_release(a, bind);
+		if (!ovr) {
+			ret = -EEXIST;
+			goto destroy_fp;
+		}
+	}
+
+	b = to_bpf(a);
+	spin_lock_bh(&b->tcf_lock);
+	b->tcf_action = parm->action;
+	b->bpf_num_ops = bpf_num_ops;
+	b->bpf_ops = bpf_ops;
+	b->filter = fp;
+	spin_unlock_bh(&b->tcf_lock);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(a);
+	return ret;
+
+destroy_fp:
+	bpf_prog_destroy(fp);
+free_bpf_ops:
+	kfree(bpf_ops);
+	return ret;
+}
+
+static void tcf_bpf_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_bpf *b = a->priv;
+
+	bpf_prog_destroy(b->filter);
+}
+
+static struct tc_action_ops act_bpf_ops = {
+	.kind =		"bpf",
+	.type =		TCA_ACT_BPF,
+	.owner =	THIS_MODULE,
+	.act =		tcf_bpf,
+	.dump =		tcf_bpf_dump,
+	.cleanup =	tcf_bpf_cleanup,
+	.init =		tcf_bpf_init,
+};
+
+static int __init bpf_init_module(void)
+{
+	return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK);
+}
+
+static void __exit bpf_cleanup_module(void)
+{
+	tcf_unregister_action(&act_bpf_ops);
+}
+
+module_init(bpf_init_module);
+module_exit(bpf_cleanup_module);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_DESCRIPTION("TC BPF based action");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3-71-gd317


From 0c7aecd4bde4b7302cd41986d3a29e4f0b0ed218 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 15 Jan 2015 15:11:15 +0100
Subject: netns: add rtnl cmd to add and get peer netns ids

With this patch, a user can define an id for a peer netns by providing a FD or a
PID. These ids are local to the netns where it is added (ie valid only into this
netns).

The main function (ie the one exported to other module), peernet2id(), allows to
get the id of a peer netns. If no id has been assigned by the user, this
function allocates one.

These ids will be used in netlink messages to point to a peer netns, for example
in case of a x-netns interface.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                        |   1 +
 include/net/net_namespace.h        |   4 +
 include/uapi/linux/Kbuild          |   1 +
 include/uapi/linux/net_namespace.h |  23 ++++
 include/uapi/linux/rtnetlink.h     |   5 +
 net/core/net_namespace.c           | 211 +++++++++++++++++++++++++++++++++++++
 6 files changed, 245 insertions(+)
 create mode 100644 include/uapi/linux/net_namespace.h

(limited to 'include/uapi/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 9de900572633..9b91d9f0257e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6578,6 +6578,7 @@ F:	include/linux/netdevice.h
 F:	include/uapi/linux/in.h
 F:	include/uapi/linux/net.h
 F:	include/uapi/linux/netdevice.h
+F:	include/uapi/linux/net_namespace.h
 F:	tools/net/
 F:	tools/testing/selftests/net/
 F:	lib/random32.c
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 2e8756b8c775..36faf4990c4b 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -60,6 +60,7 @@ struct net {
 	struct list_head	exit_list;	/* Use only net_mutex */
 
 	struct user_namespace   *user_ns;	/* Owning user namespace */
+	struct idr		netns_ids;
 
 	struct ns_common	ns;
 
@@ -290,6 +291,9 @@ static inline struct net *read_pnet(struct net * const *pnet)
 #define __net_initconst	__initconst
 #endif
 
+int peernet2id(struct net *net, struct net *peer);
+struct net *get_net_ns_by_id(struct net *net, int id);
+
 struct pernet_operations {
 	struct list_head list;
 	int (*init)(struct net *net);
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 00b100023c47..14b7b6e44c77 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -283,6 +283,7 @@ header-y += net.h
 header-y += netlink_diag.h
 header-y += netlink.h
 header-y += netrom.h
+header-y += net_namespace.h
 header-y += net_tstamp.h
 header-y += nfc.h
 header-y += nfs2.h
diff --git a/include/uapi/linux/net_namespace.h b/include/uapi/linux/net_namespace.h
new file mode 100644
index 000000000000..778cd2c3ebf4
--- /dev/null
+++ b/include/uapi/linux/net_namespace.h
@@ -0,0 +1,23 @@
+/* Copyright (c) 2015 6WIND S.A.
+ * Author: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+#ifndef _UAPI_LINUX_NET_NAMESPACE_H_
+#define _UAPI_LINUX_NET_NAMESPACE_H_
+
+/* Attributes of RTM_NEWNSID/RTM_GETNSID messages */
+enum {
+	NETNSA_NONE,
+#define NETNSA_NSID_NOT_ASSIGNED -1
+	NETNSA_NSID,
+	NETNSA_PID,
+	NETNSA_FD,
+	__NETNSA_MAX,
+};
+
+#define NETNSA_MAX		(__NETNSA_MAX - 1)
+
+#endif /* _UAPI_LINUX_NET_NAMESPACE_H_ */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index a1d18593f41e..5cc5d66bf519 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -132,6 +132,11 @@ enum {
 	RTM_GETMDB = 86,
 #define RTM_GETMDB RTM_GETMDB
 
+	RTM_NEWNSID = 88,
+#define RTM_NEWNSID RTM_NEWNSID
+	RTM_GETNSID = 90,
+#define RTM_GETNSID RTM_GETNSID
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ce780c722e48..9d1a4cac83b6 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -15,6 +15,10 @@
 #include <linux/file.h>
 #include <linux/export.h>
 #include <linux/user_namespace.h>
+#include <linux/net_namespace.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+#include <net/netlink.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
@@ -144,6 +148,77 @@ static void ops_free_list(const struct pernet_operations *ops,
 	}
 }
 
+static int alloc_netid(struct net *net, struct net *peer, int reqid)
+{
+	int min = 0, max = 0;
+
+	ASSERT_RTNL();
+
+	if (reqid >= 0) {
+		min = reqid;
+		max = reqid + 1;
+	}
+
+	return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+}
+
+/* This function is used by idr_for_each(). If net is equal to peer, the
+ * function returns the id so that idr_for_each() stops. Because we cannot
+ * returns the id 0 (idr_for_each() will not stop), we return the magic value
+ * NET_ID_ZERO (-1) for it.
+ */
+#define NET_ID_ZERO -1
+static int net_eq_idr(int id, void *net, void *peer)
+{
+	if (net_eq(net, peer))
+		return id ? : NET_ID_ZERO;
+	return 0;
+}
+
+static int __peernet2id(struct net *net, struct net *peer, bool alloc)
+{
+	int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
+
+	ASSERT_RTNL();
+
+	/* Magic value for id 0. */
+	if (id == NET_ID_ZERO)
+		return 0;
+	if (id > 0)
+		return id;
+
+	if (alloc)
+		return alloc_netid(net, peer, -1);
+
+	return -ENOENT;
+}
+
+/* This function returns the id of a peer netns. If no id is assigned, one will
+ * be allocated and returned.
+ */
+int peernet2id(struct net *net, struct net *peer)
+{
+	int id = __peernet2id(net, peer, true);
+
+	return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
+}
+
+struct net *get_net_ns_by_id(struct net *net, int id)
+{
+	struct net *peer;
+
+	if (id < 0)
+		return NULL;
+
+	rcu_read_lock();
+	peer = idr_find(&net->netns_ids, id);
+	if (peer)
+		get_net(peer);
+	rcu_read_unlock();
+
+	return peer;
+}
+
 /*
  * setup_net runs the initializers for the network namespace object.
  */
@@ -158,6 +233,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	atomic_set(&net->passive, 1);
 	net->dev_base_seq = 1;
 	net->user_ns = user_ns;
+	idr_init(&net->netns_ids);
 
 #ifdef NETNS_REFCNT_DEBUG
 	atomic_set(&net->use_count, 0);
@@ -288,6 +364,14 @@ static void cleanup_net(struct work_struct *work)
 	list_for_each_entry(net, &net_kill_list, cleanup_list) {
 		list_del_rcu(&net->list);
 		list_add_tail(&net->exit_list, &net_exit_list);
+		for_each_net(tmp) {
+			int id = __peernet2id(tmp, net, false);
+
+			if (id >= 0)
+				idr_remove(&tmp->netns_ids, id);
+		}
+		idr_destroy(&net->netns_ids);
+
 	}
 	rtnl_unlock();
 
@@ -402,6 +486,130 @@ static struct pernet_operations __net_initdata net_ns_ops = {
 	.exit = net_ns_net_exit,
 };
 
+static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
+	[NETNSA_NONE]		= { .type = NLA_UNSPEC },
+	[NETNSA_NSID]		= { .type = NLA_S32 },
+	[NETNSA_PID]		= { .type = NLA_U32 },
+	[NETNSA_FD]		= { .type = NLA_U32 },
+};
+
+static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tb[NETNSA_MAX + 1];
+	struct net *peer;
+	int nsid, err;
+
+	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+			  rtnl_net_policy);
+	if (err < 0)
+		return err;
+	if (!tb[NETNSA_NSID])
+		return -EINVAL;
+	nsid = nla_get_s32(tb[NETNSA_NSID]);
+
+	if (tb[NETNSA_PID])
+		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
+	else if (tb[NETNSA_FD])
+		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
+	else
+		return -EINVAL;
+	if (IS_ERR(peer))
+		return PTR_ERR(peer);
+
+	if (__peernet2id(net, peer, false) >= 0) {
+		err = -EEXIST;
+		goto out;
+	}
+
+	err = alloc_netid(net, peer, nsid);
+	if (err > 0)
+		err = 0;
+out:
+	put_net(peer);
+	return err;
+}
+
+static int rtnl_net_get_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct rtgenmsg))
+	       + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
+	       ;
+}
+
+static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
+			 int cmd, struct net *net, struct net *peer)
+{
+	struct nlmsghdr *nlh;
+	struct rtgenmsg *rth;
+	int id;
+
+	ASSERT_RTNL();
+
+	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	rth = nlmsg_data(nlh);
+	rth->rtgen_family = AF_UNSPEC;
+
+	id = __peernet2id(net, peer, false);
+	if  (id < 0)
+		id = NETNSA_NSID_NOT_ASSIGNED;
+	if (nla_put_s32(skb, NETNSA_NSID, id))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tb[NETNSA_MAX + 1];
+	struct sk_buff *msg;
+	int err = -ENOBUFS;
+	struct net *peer;
+
+	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+			  rtnl_net_policy);
+	if (err < 0)
+		return err;
+	if (tb[NETNSA_PID])
+		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
+	else if (tb[NETNSA_FD])
+		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
+	else
+		return -EINVAL;
+
+	if (IS_ERR(peer))
+		return PTR_ERR(peer);
+
+	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+			    RTM_GETNSID, net, peer);
+	if (err < 0)
+		goto err_out;
+
+	err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
+	goto out;
+
+err_out:
+	nlmsg_free(msg);
+out:
+	put_net(peer);
+	return err;
+}
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
@@ -435,6 +643,9 @@ static int __init net_ns_init(void)
 
 	register_pernet_subsys(&net_ns_ops);
 
+	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-71-gd317


From d37512a277dfb2cef8a578e25a3246f61399a55a Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 15 Jan 2015 15:11:16 +0100
Subject: rtnl: add link netns id to interface messages

This patch adds a new attribute (IFLA_LINK_NETNSID) which contains the 'link'
netns id when this netns is different from the netns where the interface
stands (for example for x-net interfaces like ip tunnels).
With this attribute, it's possible to interpret correctly all advertised
information (like IFLA_LINK, etc.).

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rtnetlink.h      |  2 ++
 include/uapi/linux/if_link.h |  1 +
 net/core/rtnetlink.c         | 13 +++++++++++++
 3 files changed, 16 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index e21b9f9653c0..6c6d5393fc34 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -46,6 +46,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
  *			    to create when creating a new device.
  *	@get_num_rx_queues: Function to determine number of receive queues
  *			    to create when creating a new device.
+ *	@get_link_net: Function to get the i/o netns of the device
  */
 struct rtnl_link_ops {
 	struct list_head	list;
@@ -93,6 +94,7 @@ struct rtnl_link_ops {
 	int			(*fill_slave_info)(struct sk_buff *skb,
 						   const struct net_device *dev,
 						   const struct net_device *slave_dev);
+	struct net		*(*get_link_net)(const struct net_device *dev);
 };
 
 int __rtnl_link_register(struct rtnl_link_ops *ops);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 2a8380edbb7e..0deee3eeddbf 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -146,6 +146,7 @@ enum {
 	IFLA_PHYS_PORT_ID,
 	IFLA_CARRIER_CHANGES,
 	IFLA_PHYS_SWITCH_ID,
+	IFLA_LINK_NETNSID,
 	__IFLA_MAX
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 47b39f3e867c..bd6370f0cb31 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -875,6 +875,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
+	       + nla_total_size(4) /* IFLA_LINK_NETNSID */
 	       + nla_total_size(ext_filter_mask
 			        & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
 	       + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
@@ -1169,6 +1170,18 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			goto nla_put_failure;
 	}
 
+	if (dev->rtnl_link_ops &&
+	    dev->rtnl_link_ops->get_link_net) {
+		struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
+
+		if (!net_eq(dev_net(dev), link_net)) {
+			int id = peernet2id(dev_net(dev), link_net);
+
+			if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
+				goto nla_put_failure;
+		}
+	}
+
 	if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
 		goto nla_put_failure;
 
-- 
cgit v1.2.3-71-gd317


From 22a5dc0e5e3e8fef804230cd73ed7b0afd4c7bae Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 18 Jan 2015 16:35:14 -0500
Subject: net: sched: Introduce connmark action

This tc action allows you to retrieve the connection tracking mark
This action has been used heavily by openwrt for a few years now.

There are known limitations currently:

doesn't work for initial packets, since we only query the ct table.
  Fine given use case is for returning packets

no implicit defrag.
  frags should be rare so fix later..

won't work for more complex tasks, e.g. lookup of other extensions
  since we have no means to store results

we still have a 2nd lookup later on via normal conntrack path.
This shouldn't break anything though since skb->nfct isn't altered.

V2:
remove unnecessary braces (Jiri)
change the action identifier to 14 (Jiri)
Fix some stylistic issues caught by checkpatch
V3:
Move module params to bottom (Cong)
Get rid of tcf_hashinfo_init and friends and conform to newer API (Cong)

Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_connmark.h        |  14 +++
 include/uapi/linux/tc_act/tc_connmark.h |  22 ++++
 net/sched/Kconfig                       |  11 ++
 net/sched/Makefile                      |   1 +
 net/sched/act_connmark.c                | 192 ++++++++++++++++++++++++++++++++
 5 files changed, 240 insertions(+)
 create mode 100644 include/net/tc_act/tc_connmark.h
 create mode 100644 include/uapi/linux/tc_act/tc_connmark.h
 create mode 100644 net/sched/act_connmark.c

(limited to 'include/uapi/linux')

diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h
new file mode 100644
index 000000000000..5c1104c2e24f
--- /dev/null
+++ b/include/net/tc_act/tc_connmark.h
@@ -0,0 +1,14 @@
+#ifndef __NET_TC_CONNMARK_H
+#define __NET_TC_CONNMARK_H
+
+#include <net/act_api.h>
+
+struct tcf_connmark_info {
+	struct tcf_common common;
+	u16 zone;
+};
+
+#define to_connmark(a) \
+	container_of(a->priv, struct tcf_connmark_info, common)
+
+#endif /* __NET_TC_CONNMARK_H */
diff --git a/include/uapi/linux/tc_act/tc_connmark.h b/include/uapi/linux/tc_act/tc_connmark.h
new file mode 100644
index 000000000000..994b0971bce2
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_connmark.h
@@ -0,0 +1,22 @@
+#ifndef __UAPI_TC_CONNMARK_H
+#define __UAPI_TC_CONNMARK_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_CONNMARK 14
+
+struct tc_connmark {
+	tc_gen;
+	__u16 zone;
+};
+
+enum {
+	TCA_CONNMARK_UNSPEC,
+	TCA_CONNMARK_PARMS,
+	TCA_CONNMARK_TM,
+	__TCA_CONNMARK_MAX
+};
+#define TCA_CONNMARK_MAX (__TCA_CONNMARK_MAX - 1)
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 466943551581..475e35e261ec 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -710,6 +710,17 @@ config NET_ACT_BPF
 	  To compile this code as a module, choose M here: the
 	  module will be called act_bpf.
 
+config NET_ACT_CONNMARK
+        tristate "Netfilter Connection Mark Retriever"
+        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+        ---help---
+	  Say Y here to allow retrieving of conn mark
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_connmark.
+
 config NET_CLS_IND
 	bool "Incoming device classification"
 	depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7ca2b4e76312..7ca7f4c1b8c2 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
 obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
 obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
+obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
new file mode 100644
index 000000000000..8e472518f9f6
--- /dev/null
+++ b/net/sched/act_connmark.c
@@ -0,0 +1,192 @@
+/*
+ * net/sched/act_connmark.c  netfilter connmark retriever action
+ * skb mark is over-written
+ *
+ * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/act_api.h>
+#include <uapi/linux/tc_act/tc_connmark.h>
+#include <net/tc_act/tc_connmark.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+#define CONNMARK_TAB_MASK     3
+
+static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
+			struct tcf_result *res)
+{
+	const struct nf_conntrack_tuple_hash *thash;
+	struct nf_conntrack_tuple tuple;
+	enum ip_conntrack_info ctinfo;
+	struct tcf_connmark_info *ca = a->priv;
+	struct nf_conn *c;
+	int proto;
+
+	spin_lock(&ca->tcf_lock);
+	ca->tcf_tm.lastuse = jiffies;
+	bstats_update(&ca->tcf_bstats, skb);
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		if (skb->len < sizeof(struct iphdr))
+			goto out;
+
+		proto = NFPROTO_IPV4;
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		if (skb->len < sizeof(struct ipv6hdr))
+			goto out;
+
+		proto = NFPROTO_IPV6;
+	} else {
+		goto out;
+	}
+
+	c = nf_ct_get(skb, &ctinfo);
+	if (c) {
+		skb->mark = c->mark;
+		/* using overlimits stats to count how many packets marked */
+		ca->tcf_qstats.overlimits++;
+		nf_ct_put(c);
+		goto out;
+	}
+
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+			       proto, &tuple))
+		goto out;
+
+	thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple);
+	if (!thash)
+		goto out;
+
+	c = nf_ct_tuplehash_to_ctrack(thash);
+	/* using overlimits stats to count how many packets marked */
+	ca->tcf_qstats.overlimits++;
+	skb->mark = c->mark;
+	nf_ct_put(c);
+
+out:
+	skb->nfct = NULL;
+	spin_unlock(&ca->tcf_lock);
+	return ca->tcf_action;
+}
+
+static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
+	[TCA_CONNMARK_PARMS] = { .len = sizeof(struct tc_connmark) },
+};
+
+static int tcf_connmark_init(struct net *net, struct nlattr *nla,
+			     struct nlattr *est, struct tc_action *a,
+			     int ovr, int bind)
+{
+	struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+	struct tcf_connmark_info *ci;
+	struct tc_connmark *parm;
+	int ret = 0;
+
+	if (!nla)
+		return -EINVAL;
+
+	ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy);
+	if (ret < 0)
+		return ret;
+
+	parm = nla_data(tb[TCA_CONNMARK_PARMS]);
+
+	if (!tcf_hash_check(parm->index, a, bind)) {
+		ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind);
+		if (ret)
+			return ret;
+
+		ci = to_connmark(a);
+		ci->tcf_action = parm->action;
+		ci->zone = parm->zone;
+
+		tcf_hash_insert(a);
+		ret = ACT_P_CREATED;
+	} else {
+		ci = to_connmark(a);
+		if (bind)
+			return 0;
+		tcf_hash_release(a, bind);
+		if (!ovr)
+			return -EEXIST;
+		/* replacing action and zone */
+		ci->tcf_action = parm->action;
+		ci->zone = parm->zone;
+	}
+
+	return ret;
+}
+
+static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
+				    int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_connmark_info *ci = a->priv;
+
+	struct tc_connmark opt = {
+		.index   = ci->tcf_index,
+		.refcnt  = ci->tcf_refcnt - ref,
+		.bindcnt = ci->tcf_bindcnt - bind,
+		.action  = ci->tcf_action,
+		.zone   = ci->zone,
+	};
+	struct tcf_t t;
+
+	if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(ci->tcf_tm.expires);
+	if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t))
+		goto nla_put_failure;
+
+	return skb->len;
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tc_action_ops act_connmark_ops = {
+	.kind		=	"connmark",
+	.type		=	TCA_ACT_CONNMARK,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_connmark,
+	.dump		=	tcf_connmark_dump,
+	.init		=	tcf_connmark_init,
+};
+
+static int __init connmark_init_module(void)
+{
+	return tcf_register_action(&act_connmark_ops, CONNMARK_TAB_MASK);
+}
+
+static void __exit connmark_cleanup_module(void)
+{
+	tcf_unregister_action(&act_connmark_ops);
+}
+
+module_init(connmark_init_module);
+module_exit(connmark_cleanup_module);
+MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
+MODULE_DESCRIPTION("Connection tracking mark restoring");
+MODULE_LICENSE("GPL");
+
-- 
cgit v1.2.3-71-gd317


From 7d7d731d1f0f27d2e470d4bc6112aca5ff30476f Mon Sep 17 00:00:00 2001
From: Jeremiah Mahler <jmmahler@gmail.com>
Date: Fri, 5 Dec 2014 06:56:50 -0800
Subject: msdos_fs.h: fix 'fields' in comment

Signed-off-by: Jeremiah Mahler <jmmahler@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/uapi/linux/msdos_fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/msdos_fs.h b/include/uapi/linux/msdos_fs.h
index e284ff919d6e..e956704f5fb1 100644
--- a/include/uapi/linux/msdos_fs.h
+++ b/include/uapi/linux/msdos_fs.h
@@ -134,7 +134,7 @@ struct fat_boot_sector {
 			__u8	vol_id[4];	/* volume ID */
 			__u8	vol_label[11];	/* volume label */
 			__u8	fs_type[8];		/* file system type */
-			/* other fiealds are not added here */
+			/* other fields are not added here */
 		} fat16;
 
 		struct {
@@ -157,7 +157,7 @@ struct fat_boot_sector {
 			__u8	vol_id[4];	/* volume ID */
 			__u8	vol_label[11];	/* volume label */
 			__u8	fs_type[8];		/* file system type */
-			/* other fiealds are not added here */
+			/* other fields are not added here */
 		} fat32;
 	};
 };
-- 
cgit v1.2.3-71-gd317


From a0675c25d6392c2197b796a60c4a2a0138c86355 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Sat, 7 Jun 2014 00:54:51 +0200
Subject: arm/arm64: KVM: add virtual GICv3 distributor emulation

With everything separated and prepared, we implement a model of a
GICv3 distributor and redistributors by using the existing framework
to provide handler functions for each register group.

Currently we limit the emulation to a model enforcing a single
security state, with SRE==1 (forcing system register access) and
ARE==1 (allowing more than 8 VCPUs).

We share some of the functions provided for GICv2 emulation, but take
the different ways of addressing (v)CPUs into account.
Save and restore is currently not implemented.

Similar to the split-off of the GICv2 specific code, the new emulation
code goes into a new file (vgic-v3-emul.c).

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/Makefile            |   1 +
 include/kvm/arm_vgic.h             |   9 +-
 include/linux/irqchip/arm-gic-v3.h |  32 ++
 include/linux/kvm_host.h           |   1 +
 include/uapi/linux/kvm.h           |   2 +
 virt/kvm/arm/vgic-v3-emul.c        | 922 +++++++++++++++++++++++++++++++++++++
 virt/kvm/arm/vgic.c                |  11 +-
 virt/kvm/arm/vgic.h                |   3 +
 8 files changed, 978 insertions(+), 3 deletions(-)
 create mode 100644 virt/kvm/arm/vgic-v3-emul.c

(limited to 'include/uapi/linux')

diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index d9573533a873..4e6e09ee4033 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -24,5 +24,6 @@ kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
 kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index ff04afd0d901..98c30168bce4 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -162,7 +162,11 @@ struct vgic_dist {
 
 	/* Distributor and vcpu interface mapping in the guest */
 	phys_addr_t		vgic_dist_base;
-	phys_addr_t		vgic_cpu_base;
+	/* GICv2 and GICv3 use different mapped register blocks */
+	union {
+		phys_addr_t		vgic_cpu_base;
+		phys_addr_t		vgic_redist_base;
+	};
 
 	/* Distributor enabled */
 	u32			enabled;
@@ -224,6 +228,9 @@ struct vgic_dist {
 	 */
 	struct vgic_bitmap	*irq_spi_target;
 
+	/* Target MPIDR for each IRQ (needed for GICv3 IROUTERn) only */
+	u32			*irq_spi_mpidr;
+
 	/* Bitmap indicating which CPU has something pending */
 	unsigned long		*irq_pending_on_cpu;
 
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 1e8b0cf30792..3fb4d8588a26 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -33,6 +33,7 @@
 #define GICD_SETSPI_SR			0x0050
 #define GICD_CLRSPI_SR			0x0058
 #define GICD_SEIR			0x0068
+#define GICD_IGROUPR			0x0080
 #define GICD_ISENABLER			0x0100
 #define GICD_ICENABLER			0x0180
 #define GICD_ISPENDR			0x0200
@@ -41,14 +42,37 @@
 #define GICD_ICACTIVER			0x0380
 #define GICD_IPRIORITYR			0x0400
 #define GICD_ICFGR			0x0C00
+#define GICD_IGRPMODR			0x0D00
+#define GICD_NSACR			0x0E00
 #define GICD_IROUTER			0x6000
+#define GICD_IDREGS			0xFFD0
 #define GICD_PIDR2			0xFFE8
 
+/*
+ * Those registers are actually from GICv2, but the spec demands that they
+ * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
+ */
+#define GICD_ITARGETSR			0x0800
+#define GICD_SGIR			0x0F00
+#define GICD_CPENDSGIR			0x0F10
+#define GICD_SPENDSGIR			0x0F20
+
 #define GICD_CTLR_RWP			(1U << 31)
+#define GICD_CTLR_DS			(1U << 6)
 #define GICD_CTLR_ARE_NS		(1U << 4)
 #define GICD_CTLR_ENABLE_G1A		(1U << 1)
 #define GICD_CTLR_ENABLE_G1		(1U << 0)
 
+/*
+ * In systems with a single security state (what we emulate in KVM)
+ * the meaning of the interrupt group enable bits is slightly different
+ */
+#define GICD_CTLR_ENABLE_SS_G1		(1U << 1)
+#define GICD_CTLR_ENABLE_SS_G0		(1U << 0)
+
+#define GICD_TYPER_LPIS			(1U << 17)
+#define GICD_TYPER_MBIS			(1U << 16)
+
 #define GICD_TYPER_ID_BITS(typer)	((((typer) >> 19) & 0x1f) + 1)
 #define GICD_TYPER_IRQS(typer)		((((typer) & 0x1f) + 1) * 32)
 #define GICD_TYPER_LPIS			(1U << 17)
@@ -60,6 +84,8 @@
 #define GIC_PIDR2_ARCH_GICv3		0x30
 #define GIC_PIDR2_ARCH_GICv4		0x40
 
+#define GIC_V3_DIST_SIZE		0x10000
+
 /*
  * Re-Distributor registers, offsets from RD_base
  */
@@ -78,6 +104,7 @@
 #define GICR_SYNCR			0x00C0
 #define GICR_MOVLPIR			0x0100
 #define GICR_MOVALLR			0x0110
+#define GICR_IDREGS			GICD_IDREGS
 #define GICR_PIDR2			GICD_PIDR2
 
 #define GICR_CTLR_ENABLE_LPIS		(1UL << 0)
@@ -104,6 +131,7 @@
 /*
  * Re-Distributor registers, offsets from SGI_base
  */
+#define GICR_IGROUPR0			GICD_IGROUPR
 #define GICR_ISENABLER0			GICD_ISENABLER
 #define GICR_ICENABLER0			GICD_ICENABLER
 #define GICR_ISPENDR0			GICD_ISPENDR
@@ -112,11 +140,15 @@
 #define GICR_ICACTIVER0			GICD_ICACTIVER
 #define GICR_IPRIORITYR0		GICD_IPRIORITYR
 #define GICR_ICFGR0			GICD_ICFGR
+#define GICR_IGRPMODR0			GICD_IGRPMODR
+#define GICR_NSACR			GICD_NSACR
 
 #define GICR_TYPER_PLPIS		(1U << 0)
 #define GICR_TYPER_VLPIS		(1U << 1)
 #define GICR_TYPER_LAST			(1U << 4)
 
+#define GIC_V3_REDIST_SIZE		0x20000
+
 #define LPI_PROP_GROUP1			(1 << 1)
 #define LPI_PROP_ENABLED		(1 << 0)
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 25d7ce31a5d4..0ef2daa199d8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1052,6 +1052,7 @@ void kvm_unregister_device_ops(u32 type);
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
+extern struct kvm_device_ops kvm_arm_vgic_v3_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a37fd1224f36..b4e6f1e70f03 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -952,6 +952,8 @@ enum kvm_device_type {
 #define KVM_DEV_TYPE_ARM_VGIC_V2	KVM_DEV_TYPE_ARM_VGIC_V2
 	KVM_DEV_TYPE_FLIC,
 #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
+	KVM_DEV_TYPE_ARM_VGIC_V3,
+#define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
 	KVM_DEV_TYPE_MAX,
 };
 
diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c
new file mode 100644
index 000000000000..8db1db597223
--- /dev/null
+++ b/virt/kvm/arm/vgic-v3-emul.c
@@ -0,0 +1,922 @@
+/*
+ * GICv3 distributor and redistributor emulation
+ *
+ * GICv3 emulation is currently only supported on a GICv3 host (because
+ * we rely on the hardware's CPU interface virtualization support), but
+ * supports both hardware with or without the optional GICv2 backwards
+ * compatibility features.
+ *
+ * Limitations of the emulation:
+ * (RAZ/WI: read as zero, write ignore, RAO/WI: read as one, write ignore)
+ * - We do not support LPIs (yet). TYPER.LPIS is reported as 0 and is RAZ/WI.
+ * - We do not support the message based interrupts (MBIs) triggered by
+ *   writes to the GICD_{SET,CLR}SPI_* registers. TYPER.MBIS is reported as 0.
+ * - We do not support the (optional) backwards compatibility feature.
+ *   GICD_CTLR.ARE resets to 1 and is RAO/WI. If the _host_ GIC supports
+ *   the compatiblity feature, you can use a GICv2 in the guest, though.
+ * - We only support a single security state. GICD_CTLR.DS is 1 and is RAO/WI.
+ * - Priorities are not emulated (same as the GICv2 emulation). Linux
+ *   as a guest is fine with this, because it does not use priorities.
+ * - We only support Group1 interrupts. Again Linux uses only those.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Andre Przywara <andre.przywara@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+
+#include <linux/irqchip/arm-gic-v3.h>
+#include <kvm/arm_vgic.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+#include "vgic.h"
+
+static bool handle_mmio_rao_wi(struct kvm_vcpu *vcpu,
+			       struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	u32 reg = 0xffffffff;
+
+	vgic_reg_access(mmio, &reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+
+	return false;
+}
+
+static bool handle_mmio_ctlr(struct kvm_vcpu *vcpu,
+			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	u32 reg = 0;
+
+	/*
+	 * Force ARE and DS to 1, the guest cannot change this.
+	 * For the time being we only support Group1 interrupts.
+	 */
+	if (vcpu->kvm->arch.vgic.enabled)
+		reg = GICD_CTLR_ENABLE_SS_G1;
+	reg |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
+
+	vgic_reg_access(mmio, &reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
+	if (mmio->is_write) {
+		if (reg & GICD_CTLR_ENABLE_SS_G0)
+			kvm_info("guest tried to enable unsupported Group0 interrupts\n");
+		vcpu->kvm->arch.vgic.enabled = !!(reg & GICD_CTLR_ENABLE_SS_G1);
+		vgic_update_state(vcpu->kvm);
+		return true;
+	}
+	return false;
+}
+
+/*
+ * As this implementation does not provide compatibility
+ * with GICv2 (ARE==1), we report zero CPUs in bits [5..7].
+ * Also LPIs and MBIs are not supported, so we set the respective bits to 0.
+ * Also we report at most 2**10=1024 interrupt IDs (to match 1024 SPIs).
+ */
+#define INTERRUPT_ID_BITS 10
+static bool handle_mmio_typer(struct kvm_vcpu *vcpu,
+			      struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	u32 reg;
+
+	reg = (min(vcpu->kvm->arch.vgic.nr_irqs, 1024) >> 5) - 1;
+
+	reg |= (INTERRUPT_ID_BITS - 1) << 19;
+
+	vgic_reg_access(mmio, &reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+
+	return false;
+}
+
+static bool handle_mmio_iidr(struct kvm_vcpu *vcpu,
+			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	u32 reg;
+
+	reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+	vgic_reg_access(mmio, &reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+
+	return false;
+}
+
+static bool handle_mmio_set_enable_reg_dist(struct kvm_vcpu *vcpu,
+					    struct kvm_exit_mmio *mmio,
+					    phys_addr_t offset)
+{
+	if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
+		return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
+					      vcpu->vcpu_id,
+					      ACCESS_WRITE_SETBIT);
+
+	vgic_reg_access(mmio, NULL, offset,
+			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+	return false;
+}
+
+static bool handle_mmio_clear_enable_reg_dist(struct kvm_vcpu *vcpu,
+					      struct kvm_exit_mmio *mmio,
+					      phys_addr_t offset)
+{
+	if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
+		return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
+					      vcpu->vcpu_id,
+					      ACCESS_WRITE_CLEARBIT);
+
+	vgic_reg_access(mmio, NULL, offset,
+			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+	return false;
+}
+
+static bool handle_mmio_set_pending_reg_dist(struct kvm_vcpu *vcpu,
+					     struct kvm_exit_mmio *mmio,
+					     phys_addr_t offset)
+{
+	if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
+		return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset,
+						   vcpu->vcpu_id);
+
+	vgic_reg_access(mmio, NULL, offset,
+			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+	return false;
+}
+
+static bool handle_mmio_clear_pending_reg_dist(struct kvm_vcpu *vcpu,
+					       struct kvm_exit_mmio *mmio,
+					       phys_addr_t offset)
+{
+	if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8))
+		return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset,
+						     vcpu->vcpu_id);
+
+	vgic_reg_access(mmio, NULL, offset,
+			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+	return false;
+}
+
+static bool handle_mmio_priority_reg_dist(struct kvm_vcpu *vcpu,
+					  struct kvm_exit_mmio *mmio,
+					  phys_addr_t offset)
+{
+	u32 *reg;
+
+	if (unlikely(offset < VGIC_NR_PRIVATE_IRQS)) {
+		vgic_reg_access(mmio, NULL, offset,
+				ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+		return false;
+	}
+
+	reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
+				   vcpu->vcpu_id, offset);
+	vgic_reg_access(mmio, reg, offset,
+		ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
+	return false;
+}
+
+static bool handle_mmio_cfg_reg_dist(struct kvm_vcpu *vcpu,
+				     struct kvm_exit_mmio *mmio,
+				     phys_addr_t offset)
+{
+	u32 *reg;
+
+	if (unlikely(offset < VGIC_NR_PRIVATE_IRQS / 4)) {
+		vgic_reg_access(mmio, NULL, offset,
+				ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+		return false;
+	}
+
+	reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
+				  vcpu->vcpu_id, offset >> 1);
+
+	return vgic_handle_cfg_reg(reg, mmio, offset);
+}
+
+/*
+ * We use a compressed version of the MPIDR (all 32 bits in one 32-bit word)
+ * when we store the target MPIDR written by the guest.
+ */
+static u32 compress_mpidr(unsigned long mpidr)
+{
+	u32 ret;
+
+	ret = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	ret |= MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8;
+	ret |= MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16;
+	ret |= MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24;
+
+	return ret;
+}
+
+static unsigned long uncompress_mpidr(u32 value)
+{
+	unsigned long mpidr;
+
+	mpidr  = ((value >>  0) & 0xFF) << MPIDR_LEVEL_SHIFT(0);
+	mpidr |= ((value >>  8) & 0xFF) << MPIDR_LEVEL_SHIFT(1);
+	mpidr |= ((value >> 16) & 0xFF) << MPIDR_LEVEL_SHIFT(2);
+	mpidr |= (u64)((value >> 24) & 0xFF) << MPIDR_LEVEL_SHIFT(3);
+
+	return mpidr;
+}
+
+/*
+ * Lookup the given MPIDR value to get the vcpu_id (if there is one)
+ * and store that in the irq_spi_cpu[] array.
+ * This limits the number of VCPUs to 255 for now, extending the data
+ * type (or storing kvm_vcpu pointers) should lift the limit.
+ * Store the original MPIDR value in an extra array to support read-as-written.
+ * Unallocated MPIDRs are translated to a special value and caught
+ * before any array accesses.
+ */
+static bool handle_mmio_route_reg(struct kvm_vcpu *vcpu,
+				  struct kvm_exit_mmio *mmio,
+				  phys_addr_t offset)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	int spi;
+	u32 reg;
+	int vcpu_id;
+	unsigned long *bmap, mpidr;
+
+	/*
+	 * The upper 32 bits of each 64 bit register are zero,
+	 * as we don't support Aff3.
+	 */
+	if ((offset & 4)) {
+		vgic_reg_access(mmio, NULL, offset,
+				ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+		return false;
+	}
+
+	/* This region only covers SPIs, so no handling of private IRQs here. */
+	spi = offset / 8;
+
+	/* get the stored MPIDR for this IRQ */
+	mpidr = uncompress_mpidr(dist->irq_spi_mpidr[spi]);
+	reg = mpidr;
+
+	vgic_reg_access(mmio, &reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
+
+	if (!mmio->is_write)
+		return false;
+
+	/*
+	 * Now clear the currently assigned vCPU from the map, making room
+	 * for the new one to be written below
+	 */
+	vcpu = kvm_mpidr_to_vcpu(kvm, mpidr);
+	if (likely(vcpu)) {
+		vcpu_id = vcpu->vcpu_id;
+		bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]);
+		__clear_bit(spi, bmap);
+	}
+
+	dist->irq_spi_mpidr[spi] = compress_mpidr(reg);
+	vcpu = kvm_mpidr_to_vcpu(kvm, reg & MPIDR_HWID_BITMASK);
+
+	/*
+	 * The spec says that non-existent MPIDR values should not be
+	 * forwarded to any existent (v)CPU, but should be able to become
+	 * pending anyway. We simply keep the irq_spi_target[] array empty, so
+	 * the interrupt will never be injected.
+	 * irq_spi_cpu[irq] gets a magic value in this case.
+	 */
+	if (likely(vcpu)) {
+		vcpu_id = vcpu->vcpu_id;
+		dist->irq_spi_cpu[spi] = vcpu_id;
+		bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]);
+		__set_bit(spi, bmap);
+	} else {
+		dist->irq_spi_cpu[spi] = VCPU_NOT_ALLOCATED;
+	}
+
+	vgic_update_state(kvm);
+
+	return true;
+}
+
+/*
+ * We should be careful about promising too much when a guest reads
+ * this register. Don't claim to be like any hardware implementation,
+ * but just report the GIC as version 3 - which is what a Linux guest
+ * would check.
+ */
+static bool handle_mmio_idregs(struct kvm_vcpu *vcpu,
+			       struct kvm_exit_mmio *mmio,
+			       phys_addr_t offset)
+{
+	u32 reg = 0;
+
+	switch (offset + GICD_IDREGS) {
+	case GICD_PIDR2:
+		reg = 0x3b;
+		break;
+	}
+
+	vgic_reg_access(mmio, &reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+
+	return false;
+}
+
+static const struct kvm_mmio_range vgic_v3_dist_ranges[] = {
+	{
+		.base           = GICD_CTLR,
+		.len            = 0x04,
+		.bits_per_irq   = 0,
+		.handle_mmio    = handle_mmio_ctlr,
+	},
+	{
+		.base           = GICD_TYPER,
+		.len            = 0x04,
+		.bits_per_irq   = 0,
+		.handle_mmio    = handle_mmio_typer,
+	},
+	{
+		.base           = GICD_IIDR,
+		.len            = 0x04,
+		.bits_per_irq   = 0,
+		.handle_mmio    = handle_mmio_iidr,
+	},
+	{
+		/* this register is optional, it is RAZ/WI if not implemented */
+		.base           = GICD_STATUSR,
+		.len            = 0x04,
+		.bits_per_irq   = 0,
+		.handle_mmio    = handle_mmio_raz_wi,
+	},
+	{
+		/* this write only register is WI when TYPER.MBIS=0 */
+		.base		= GICD_SETSPI_NSR,
+		.len		= 0x04,
+		.bits_per_irq	= 0,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		/* this write only register is WI when TYPER.MBIS=0 */
+		.base		= GICD_CLRSPI_NSR,
+		.len		= 0x04,
+		.bits_per_irq	= 0,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		/* this is RAZ/WI when DS=1 */
+		.base		= GICD_SETSPI_SR,
+		.len		= 0x04,
+		.bits_per_irq	= 0,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		/* this is RAZ/WI when DS=1 */
+		.base		= GICD_CLRSPI_SR,
+		.len		= 0x04,
+		.bits_per_irq	= 0,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= GICD_IGROUPR,
+		.len		= 0x80,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_rao_wi,
+	},
+	{
+		.base		= GICD_ISENABLER,
+		.len		= 0x80,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_set_enable_reg_dist,
+	},
+	{
+		.base		= GICD_ICENABLER,
+		.len		= 0x80,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_clear_enable_reg_dist,
+	},
+	{
+		.base		= GICD_ISPENDR,
+		.len		= 0x80,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_set_pending_reg_dist,
+	},
+	{
+		.base		= GICD_ICPENDR,
+		.len		= 0x80,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_clear_pending_reg_dist,
+	},
+	{
+		.base		= GICD_ISACTIVER,
+		.len		= 0x80,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= GICD_ICACTIVER,
+		.len		= 0x80,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= GICD_IPRIORITYR,
+		.len		= 0x400,
+		.bits_per_irq	= 8,
+		.handle_mmio	= handle_mmio_priority_reg_dist,
+	},
+	{
+		/* TARGETSRn is RES0 when ARE=1 */
+		.base		= GICD_ITARGETSR,
+		.len		= 0x400,
+		.bits_per_irq	= 8,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= GICD_ICFGR,
+		.len		= 0x100,
+		.bits_per_irq	= 2,
+		.handle_mmio	= handle_mmio_cfg_reg_dist,
+	},
+	{
+		/* this is RAZ/WI when DS=1 */
+		.base		= GICD_IGRPMODR,
+		.len		= 0x80,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		/* this is RAZ/WI when DS=1 */
+		.base		= GICD_NSACR,
+		.len		= 0x100,
+		.bits_per_irq	= 2,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		/* this is RAZ/WI when ARE=1 */
+		.base		= GICD_SGIR,
+		.len		= 0x04,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		/* this is RAZ/WI when ARE=1 */
+		.base		= GICD_CPENDSGIR,
+		.len		= 0x10,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		/* this is RAZ/WI when ARE=1 */
+		.base           = GICD_SPENDSGIR,
+		.len            = 0x10,
+		.handle_mmio    = handle_mmio_raz_wi,
+	},
+	{
+		.base		= GICD_IROUTER + 0x100,
+		.len		= 0x1ee0,
+		.bits_per_irq	= 64,
+		.handle_mmio	= handle_mmio_route_reg,
+	},
+	{
+		.base           = GICD_IDREGS,
+		.len            = 0x30,
+		.bits_per_irq   = 0,
+		.handle_mmio    = handle_mmio_idregs,
+	},
+	{},
+};
+
+static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu,
+					      struct kvm_exit_mmio *mmio,
+					      phys_addr_t offset)
+{
+	struct kvm_vcpu *redist_vcpu = mmio->private;
+
+	return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
+				      redist_vcpu->vcpu_id,
+				      ACCESS_WRITE_SETBIT);
+}
+
+static bool handle_mmio_clear_enable_reg_redist(struct kvm_vcpu *vcpu,
+						struct kvm_exit_mmio *mmio,
+						phys_addr_t offset)
+{
+	struct kvm_vcpu *redist_vcpu = mmio->private;
+
+	return vgic_handle_enable_reg(vcpu->kvm, mmio, offset,
+				      redist_vcpu->vcpu_id,
+				      ACCESS_WRITE_CLEARBIT);
+}
+
+static bool handle_mmio_set_pending_reg_redist(struct kvm_vcpu *vcpu,
+					       struct kvm_exit_mmio *mmio,
+					       phys_addr_t offset)
+{
+	struct kvm_vcpu *redist_vcpu = mmio->private;
+
+	return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset,
+					   redist_vcpu->vcpu_id);
+}
+
+static bool handle_mmio_clear_pending_reg_redist(struct kvm_vcpu *vcpu,
+						 struct kvm_exit_mmio *mmio,
+						 phys_addr_t offset)
+{
+	struct kvm_vcpu *redist_vcpu = mmio->private;
+
+	return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset,
+					     redist_vcpu->vcpu_id);
+}
+
+static bool handle_mmio_priority_reg_redist(struct kvm_vcpu *vcpu,
+					    struct kvm_exit_mmio *mmio,
+					    phys_addr_t offset)
+{
+	struct kvm_vcpu *redist_vcpu = mmio->private;
+	u32 *reg;
+
+	reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
+				   redist_vcpu->vcpu_id, offset);
+	vgic_reg_access(mmio, reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
+	return false;
+}
+
+static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu,
+				       struct kvm_exit_mmio *mmio,
+				       phys_addr_t offset)
+{
+	struct kvm_vcpu *redist_vcpu = mmio->private;
+
+	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
+				       redist_vcpu->vcpu_id, offset >> 1);
+
+	return vgic_handle_cfg_reg(reg, mmio, offset);
+}
+
+static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = {
+	{
+		.base		= GICR_IGROUPR0,
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_rao_wi,
+	},
+	{
+		.base		= GICR_ISENABLER0,
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_set_enable_reg_redist,
+	},
+	{
+		.base		= GICR_ICENABLER0,
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_clear_enable_reg_redist,
+	},
+	{
+		.base		= GICR_ISPENDR0,
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_set_pending_reg_redist,
+	},
+	{
+		.base		= GICR_ICPENDR0,
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_clear_pending_reg_redist,
+	},
+	{
+		.base		= GICR_ISACTIVER0,
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= GICR_ICACTIVER0,
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= GICR_IPRIORITYR0,
+		.len		= 0x20,
+		.bits_per_irq	= 8,
+		.handle_mmio	= handle_mmio_priority_reg_redist,
+	},
+	{
+		.base		= GICR_ICFGR0,
+		.len		= 0x08,
+		.bits_per_irq	= 2,
+		.handle_mmio	= handle_mmio_cfg_reg_redist,
+	},
+	{
+		.base		= GICR_IGRPMODR0,
+		.len		= 0x04,
+		.bits_per_irq	= 1,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{
+		.base		= GICR_NSACR,
+		.len		= 0x04,
+		.handle_mmio	= handle_mmio_raz_wi,
+	},
+	{},
+};
+
+static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu,
+				    struct kvm_exit_mmio *mmio,
+				    phys_addr_t offset)
+{
+	/* since we don't support LPIs, this register is zero for now */
+	vgic_reg_access(mmio, NULL, offset,
+			ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+	return false;
+}
+
+static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu,
+				     struct kvm_exit_mmio *mmio,
+				     phys_addr_t offset)
+{
+	u32 reg;
+	u64 mpidr;
+	struct kvm_vcpu *redist_vcpu = mmio->private;
+	int target_vcpu_id = redist_vcpu->vcpu_id;
+
+	/* the upper 32 bits contain the affinity value */
+	if ((offset & ~3) == 4) {
+		mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu);
+		reg = compress_mpidr(mpidr);
+
+		vgic_reg_access(mmio, &reg, offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+		return false;
+	}
+
+	reg = redist_vcpu->vcpu_id << 8;
+	if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
+		reg |= GICR_TYPER_LAST;
+	vgic_reg_access(mmio, &reg, offset,
+			ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+	return false;
+}
+
+static const struct kvm_mmio_range vgic_redist_ranges[] = {
+	{
+		.base           = GICR_CTLR,
+		.len            = 0x04,
+		.bits_per_irq   = 0,
+		.handle_mmio    = handle_mmio_ctlr_redist,
+	},
+	{
+		.base           = GICR_TYPER,
+		.len            = 0x08,
+		.bits_per_irq   = 0,
+		.handle_mmio    = handle_mmio_typer_redist,
+	},
+	{
+		.base           = GICR_IIDR,
+		.len            = 0x04,
+		.bits_per_irq   = 0,
+		.handle_mmio    = handle_mmio_iidr,
+	},
+	{
+		.base           = GICR_WAKER,
+		.len            = 0x04,
+		.bits_per_irq   = 0,
+		.handle_mmio    = handle_mmio_raz_wi,
+	},
+	{
+		.base           = GICR_IDREGS,
+		.len            = 0x30,
+		.bits_per_irq   = 0,
+		.handle_mmio    = handle_mmio_idregs,
+	},
+	{},
+};
+
+/*
+ * This function splits accesses between the distributor and the two
+ * redistributor parts (private/SPI). As each redistributor is accessible
+ * from any CPU, we have to determine the affected VCPU by taking the faulting
+ * address into account. We then pass this VCPU to the handler function via
+ * the private parameter.
+ */
+#define SGI_BASE_OFFSET SZ_64K
+static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+				struct kvm_exit_mmio *mmio)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	unsigned long dbase = dist->vgic_dist_base;
+	unsigned long rdbase = dist->vgic_redist_base;
+	int nrcpus = atomic_read(&vcpu->kvm->online_vcpus);
+	int vcpu_id;
+	const struct kvm_mmio_range *mmio_range;
+
+	if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) {
+		return vgic_handle_mmio_range(vcpu, run, mmio,
+					      vgic_v3_dist_ranges, dbase);
+	}
+
+	if (!is_in_range(mmio->phys_addr, mmio->len, rdbase,
+	    GIC_V3_REDIST_SIZE * nrcpus))
+		return false;
+
+	vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE;
+	rdbase += (vcpu_id * GIC_V3_REDIST_SIZE);
+	mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id);
+
+	if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) {
+		rdbase += SGI_BASE_OFFSET;
+		mmio_range = vgic_redist_sgi_ranges;
+	} else {
+		mmio_range = vgic_redist_ranges;
+	}
+	return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase);
+}
+
+static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq)
+{
+	if (vgic_queue_irq(vcpu, 0, irq)) {
+		vgic_dist_irq_clear_pending(vcpu, irq);
+		vgic_cpu_irq_clear(vcpu, irq);
+		return true;
+	}
+
+	return false;
+}
+
+static int vgic_v3_map_resources(struct kvm *kvm,
+				 const struct vgic_params *params)
+{
+	int ret = 0;
+	struct vgic_dist *dist = &kvm->arch.vgic;
+
+	if (!irqchip_in_kernel(kvm))
+		return 0;
+
+	mutex_lock(&kvm->lock);
+
+	if (vgic_ready(kvm))
+		goto out;
+
+	if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
+	    IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) {
+		kvm_err("Need to set vgic distributor addresses first\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	/*
+	 * For a VGICv3 we require the userland to explicitly initialize
+	 * the VGIC before we need to use it.
+	 */
+	if (!vgic_initialized(kvm)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	kvm->arch.vgic.ready = true;
+out:
+	if (ret)
+		kvm_vgic_destroy(kvm);
+	mutex_unlock(&kvm->lock);
+	return ret;
+}
+
+static int vgic_v3_init_model(struct kvm *kvm)
+{
+	int i;
+	u32 mpidr;
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	int nr_spis = dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
+
+	dist->irq_spi_mpidr = kcalloc(nr_spis, sizeof(dist->irq_spi_mpidr[0]),
+				      GFP_KERNEL);
+
+	if (!dist->irq_spi_mpidr)
+		return -ENOMEM;
+
+	/* Initialize the target VCPUs for each IRQ to VCPU 0 */
+	mpidr = compress_mpidr(kvm_vcpu_get_mpidr_aff(kvm_get_vcpu(kvm, 0)));
+	for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i++) {
+		dist->irq_spi_cpu[i - VGIC_NR_PRIVATE_IRQS] = 0;
+		dist->irq_spi_mpidr[i - VGIC_NR_PRIVATE_IRQS] = mpidr;
+		vgic_bitmap_set_irq_val(dist->irq_spi_target, 0, i, 1);
+	}
+
+	return 0;
+}
+
+/* GICv3 does not keep track of SGI sources anymore. */
+static void vgic_v3_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
+{
+}
+
+void vgic_v3_init_emulation(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+
+	dist->vm_ops.handle_mmio = vgic_v3_handle_mmio;
+	dist->vm_ops.queue_sgi = vgic_v3_queue_sgi;
+	dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source;
+	dist->vm_ops.init_model = vgic_v3_init_model;
+	dist->vm_ops.map_resources = vgic_v3_map_resources;
+
+	kvm->arch.max_vcpus = KVM_MAX_VCPUS;
+}
+
+static int vgic_v3_create(struct kvm_device *dev, u32 type)
+{
+	return kvm_vgic_create(dev->kvm, type);
+}
+
+static void vgic_v3_destroy(struct kvm_device *dev)
+{
+	kfree(dev);
+}
+
+static int vgic_v3_set_attr(struct kvm_device *dev,
+			    struct kvm_device_attr *attr)
+{
+	int ret;
+
+	ret = vgic_set_common_attr(dev, attr);
+	if (ret != -ENXIO)
+		return ret;
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+		return -ENXIO;
+	}
+
+	return -ENXIO;
+}
+
+static int vgic_v3_get_attr(struct kvm_device *dev,
+			    struct kvm_device_attr *attr)
+{
+	int ret;
+
+	ret = vgic_get_common_attr(dev, attr);
+	if (ret != -ENXIO)
+		return ret;
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+		return -ENXIO;
+	}
+
+	return -ENXIO;
+}
+
+static int vgic_v3_has_attr(struct kvm_device *dev,
+			    struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR:
+		switch (attr->attr) {
+		case KVM_VGIC_V2_ADDR_TYPE_DIST:
+		case KVM_VGIC_V2_ADDR_TYPE_CPU:
+			return -ENXIO;
+		}
+		break;
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+		return -ENXIO;
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+		return 0;
+	case KVM_DEV_ARM_VGIC_GRP_CTRL:
+		switch (attr->attr) {
+		case KVM_DEV_ARM_VGIC_CTRL_INIT:
+			return 0;
+		}
+	}
+	return -ENXIO;
+}
+
+struct kvm_device_ops kvm_arm_vgic_v3_ops = {
+	.name = "kvm-arm-vgic-v3",
+	.create = vgic_v3_create,
+	.destroy = vgic_v3_destroy,
+	.set_attr = vgic_v3_set_attr,
+	.get_attr = vgic_v3_get_attr,
+	.has_attr = vgic_v3_has_attr,
+};
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index b6e17c886ce2..6d23e57c3561 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1249,7 +1249,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 	struct kvm_vcpu *vcpu;
 	int edge_triggered, level_triggered;
 	int enabled;
-	bool ret = true;
+	bool ret = true, can_inject = true;
 
 	spin_lock(&dist->lock);
 
@@ -1264,6 +1264,11 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
 	if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
 		cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
+		if (cpuid == VCPU_NOT_ALLOCATED) {
+			/* Pretend we use CPU0, and prevent injection */
+			cpuid = 0;
+			can_inject = false;
+		}
 		vcpu = kvm_get_vcpu(kvm, cpuid);
 	}
 
@@ -1286,7 +1291,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
 	enabled = vgic_irq_is_enabled(vcpu, irq_num);
 
-	if (!enabled) {
+	if (!enabled || !can_inject) {
 		ret = false;
 		goto out;
 	}
@@ -1439,6 +1444,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
 	}
 	kfree(dist->irq_sgi_sources);
 	kfree(dist->irq_spi_cpu);
+	kfree(dist->irq_spi_mpidr);
 	kfree(dist->irq_spi_target);
 	kfree(dist->irq_pending_on_cpu);
 	dist->irq_sgi_sources = NULL;
@@ -1594,6 +1600,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
 	kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
+	kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
 
 out_unlock:
 	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h
index e363b9341873..1e83bdf5f499 100644
--- a/virt/kvm/arm/vgic.h
+++ b/virt/kvm/arm/vgic.h
@@ -35,6 +35,8 @@
 #define ACCESS_WRITE_VALUE	(3 << 1)
 #define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
 
+#define VCPU_NOT_ALLOCATED	((u8)-1)
+
 unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x);
 
 void vgic_update_state(struct kvm *kvm);
@@ -116,5 +118,6 @@ int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
 
 int vgic_init(struct kvm *kvm);
 void vgic_v2_init_emulation(struct kvm *kvm);
+void vgic_v3_init_emulation(struct kvm *kvm);
 
 #endif
-- 
cgit v1.2.3-71-gd317


From 7f870c81a068fd1e69fe556f7fe85a5ece144b0c Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 28 Dec 2014 12:35:05 +0200
Subject: virtio_pci: drop virtio_config dependency

virtio_pci does not depend on virtio_config:
let's not include it, users can pull it in as necessary.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/uapi/linux/virtio_pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index 35b552c7f330..509d630f04f4 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -39,7 +39,7 @@
 #ifndef _LINUX_VIRTIO_PCI_H
 #define _LINUX_VIRTIO_PCI_H
 
-#include <linux/virtio_config.h>
+#include <linux/types.h>
 
 #ifndef VIRTIO_PCI_NO_LEGACY
 
-- 
cgit v1.2.3-71-gd317


From 71d70c266c84c4e708bb36b20d0c0a29af42821c Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 29 May 2013 11:52:22 +0930
Subject: virtio-pci: define layout for virtio 1.0

Based on patches by Michael S. Tsirkin <mst@redhat.com>, but I found it
hard to follow so changed to use structures which are more
self-documenting.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_pci.h | 62 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index 509d630f04f4..4e054235358f 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -99,4 +99,66 @@
 /* Vector value used to disable MSI for queue */
 #define VIRTIO_MSI_NO_VECTOR            0xffff
 
+#ifndef VIRTIO_PCI_NO_MODERN
+
+/* IDs for different capabilities.  Must all exist. */
+
+/* Common configuration */
+#define VIRTIO_PCI_CAP_COMMON_CFG	1
+/* Notifications */
+#define VIRTIO_PCI_CAP_NOTIFY_CFG	2
+/* ISR access */
+#define VIRTIO_PCI_CAP_ISR_CFG		3
+/* Device specific confiuration */
+#define VIRTIO_PCI_CAP_DEVICE_CFG	4
+
+/* This is the PCI capability header: */
+struct virtio_pci_cap {
+	__u8 cap_vndr;		/* Generic PCI field: PCI_CAP_ID_VNDR */
+	__u8 cap_next;		/* Generic PCI field: next ptr. */
+	__u8 cap_len;		/* Generic PCI field: capability length */
+	__u8 type_and_bar;	/* Upper 3 bits: bar.
+				 * Lower 3 is VIRTIO_PCI_CAP_*_CFG. */
+	__le32 offset;		/* Offset within bar. */
+	__le32 length;		/* Length. */
+};
+
+#define VIRTIO_PCI_CAP_BAR_SHIFT	5
+#define VIRTIO_PCI_CAP_BAR_MASK		0x7
+#define VIRTIO_PCI_CAP_TYPE_SHIFT	0
+#define VIRTIO_PCI_CAP_TYPE_MASK	0x7
+
+struct virtio_pci_notify_cap {
+	struct virtio_pci_cap cap;
+	__le32 notify_off_multiplier;	/* Multiplier for queue_notify_off. */
+};
+
+/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
+struct virtio_pci_common_cfg {
+	/* About the whole device. */
+	__le32 device_feature_select;	/* read-write */
+	__le32 device_feature;		/* read-only */
+	__le32 guest_feature_select;	/* read-write */
+	__le32 guest_feature;		/* read-write */
+	__le16 msix_config;		/* read-write */
+	__le16 num_queues;		/* read-only */
+	__u8 device_status;		/* read-write */
+	__u8 config_generation;		/* read-only */
+
+	/* About a specific virtqueue. */
+	__le16 queue_select;		/* read-write */
+	__le16 queue_size;		/* read-write, power of 2. */
+	__le16 queue_msix_vector;	/* read-write */
+	__le16 queue_enable;		/* read-write */
+	__le16 queue_notify_off;	/* read-only */
+	__le32 queue_desc_lo;		/* read-write */
+	__le32 queue_desc_hi;		/* read-write */
+	__le32 queue_avail_lo;		/* read-write */
+	__le32 queue_avail_hi;		/* read-write */
+	__le32 queue_used_lo;		/* read-write */
+	__le32 queue_used_hi;		/* read-write */
+};
+
+#endif /* VIRTIO_PCI_NO_MODERN */
+
 #endif
-- 
cgit v1.2.3-71-gd317


From 1fcf0512c9c870e78e1c9898ecb9458593403466 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 11 Dec 2014 13:59:51 +0200
Subject: virtio_pci: modern driver

Lightly tested against qemu.

One thing *not* implemented here is separate mappings
for descriptor/avail/used rings. That's nice to have,
will be done later after we have core support.

This also exposes the PCI layout to userspace, and
adds macros for PCI layout offsets:

QEMU wants it, so why not?  Trust, but verify.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/Makefile            |   2 +-
 drivers/virtio/virtio_pci_common.c |  14 +-
 drivers/virtio/virtio_pci_common.h |  25 +-
 drivers/virtio/virtio_pci_modern.c | 587 +++++++++++++++++++++++++++++++++++++
 include/uapi/linux/virtio_pci.h    |   7 +-
 5 files changed, 625 insertions(+), 10 deletions(-)
 create mode 100644 drivers/virtio/virtio_pci_modern.c

(limited to 'include/uapi/linux')

diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index bf5104b56894..bd230d1c0533 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
-virtio_pci-y := virtio_pci_legacy.o virtio_pci_common.o
+virtio_pci-y := virtio_pci_modern.o virtio_pci_legacy.o virtio_pci_common.o
 obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 457cbe29c8c4..8ae34a34f3af 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -505,7 +505,9 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
 	if (rc)
 		goto err_request_regions;
 
-	rc = virtio_pci_legacy_probe(vp_dev);
+	rc = virtio_pci_modern_probe(vp_dev);
+	if (rc == -ENODEV)
+		rc = virtio_pci_legacy_probe(vp_dev);
 	if (rc)
 		goto err_probe;
 
@@ -518,7 +520,10 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
 	return 0;
 
 err_register:
-	virtio_pci_legacy_remove(vp_dev);
+	if (vp_dev->ioaddr)
+	     virtio_pci_legacy_remove(vp_dev);
+	else
+	     virtio_pci_modern_remove(vp_dev);
 err_probe:
 	pci_release_regions(pci_dev);
 err_request_regions:
@@ -534,7 +539,10 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
 
 	unregister_virtio_device(&vp_dev->vdev);
 
-	virtio_pci_legacy_remove(pci_dev);
+	if (vp_dev->ioaddr)
+		virtio_pci_legacy_remove(vp_dev);
+	else
+		virtio_pci_modern_remove(vp_dev);
 
 	pci_release_regions(pci_dev);
 	pci_disable_device(pci_dev);
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index 2b1e70db44a0..610c43f19230 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -53,12 +53,29 @@ struct virtio_pci_device {
 	struct virtio_device vdev;
 	struct pci_dev *pci_dev;
 
+	/* In legacy mode, these two point to within ->legacy. */
+	/* Where to read and clear interrupt */
+	u8 __iomem *isr;
+
+	/* Modern only fields */
+	/* The IO mapping for the PCI config space (non-legacy mode) */
+	struct virtio_pci_common_cfg __iomem *common;
+	/* Device-specific data (non-legacy mode)  */
+	void __iomem *device;
+
+	/* So we can sanity-check accesses. */
+	size_t device_len;
+
+	/* Capability for when we need to map notifications per-vq. */
+	int notify_map_cap;
+
+	/* Multiply queue_notify_off by this value. (non-legacy mode). */
+	u32 notify_offset_multiplier;
+
+	/* Legacy only field */
 	/* the IO mapping for the PCI config space */
 	void __iomem *ioaddr;
 
-	/* the IO mapping for ISR operation */
-	void __iomem *isr;
-
 	/* a list of queues so we can dispatch IRQs */
 	spinlock_t lock;
 	struct list_head virtqueues;
@@ -129,5 +146,7 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu);
 
 int virtio_pci_legacy_probe(struct virtio_pci_device *);
 void virtio_pci_legacy_remove(struct virtio_pci_device *);
+int virtio_pci_modern_probe(struct virtio_pci_device *);
+void virtio_pci_modern_remove(struct virtio_pci_device *);
 
 #endif
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
new file mode 100644
index 000000000000..a3d81013e0c2
--- /dev/null
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -0,0 +1,587 @@
+/*
+ * Virtio PCI driver - modern (virtio 1.0) device support
+ *
+ * This module allows virtio devices to be used over a virtual PCI device.
+ * This can be used with QEMU based VMMs like KVM or Xen.
+ *
+ * Copyright IBM Corp. 2007
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ *  Anthony Liguori  <aliguori@us.ibm.com>
+ *  Rusty Russell <rusty@rustcorp.com.au>
+ *  Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#define VIRTIO_PCI_NO_LEGACY
+#include "virtio_pci_common.h"
+
+static void __iomem *map_capability(struct pci_dev *dev, int off,
+				    size_t minlen,
+				    u32 align,
+				    u32 start, u32 size,
+				    size_t *len)
+{
+	u8 bar;
+	u32 offset, length;
+	void __iomem *p;
+
+	pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap,
+						 bar),
+			     &bar);
+	pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
+			     &offset);
+	pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
+			      &length);
+
+	if (length <= start) {
+		dev_err(&dev->dev,
+			"virtio_pci: bad capability len %u (>%u expected)\n",
+			length, start);
+		return NULL;
+	}
+
+	if (length - start < minlen) {
+		dev_err(&dev->dev,
+			"virtio_pci: bad capability len %u (>=%zu expected)\n",
+			length, minlen);
+		return NULL;
+	}
+
+	length -= start;
+
+	if (start + offset < offset) {
+		dev_err(&dev->dev,
+			"virtio_pci: map wrap-around %u+%u\n",
+			start, offset);
+		return NULL;
+	}
+
+	offset += start;
+
+	if (offset & (align - 1)) {
+		dev_err(&dev->dev,
+			"virtio_pci: offset %u not aligned to %u\n",
+			offset, align);
+		return NULL;
+	}
+
+	if (length > size)
+		length = size;
+
+	if (len)
+		*len = length;
+
+	if (minlen + offset < minlen ||
+	    minlen + offset > pci_resource_len(dev, bar)) {
+		dev_err(&dev->dev,
+			"virtio_pci: map virtio %zu@%u "
+			"out of range on bar %i length %lu\n",
+			minlen, offset,
+			bar, (unsigned long)pci_resource_len(dev, bar));
+		return NULL;
+	}
+
+	p = pci_iomap_range(dev, bar, offset, length);
+	if (!p)
+		dev_err(&dev->dev,
+			"virtio_pci: unable to map virtio %u@%u on bar %i\n",
+			length, offset, bar);
+	return p;
+}
+
+static void iowrite64_twopart(u64 val, __le32 __iomem *lo, __le32 __iomem *hi)
+{
+	iowrite32((u32)val, lo);
+	iowrite32(val >> 32, hi);
+}
+
+/* virtio config->get_features() implementation */
+static u64 vp_get_features(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	u64 features;
+
+	iowrite32(0, &vp_dev->common->device_feature_select);
+	features = ioread32(&vp_dev->common->device_feature);
+	iowrite32(1, &vp_dev->common->device_feature_select);
+	features |= ((u64)ioread32(&vp_dev->common->device_feature) << 32);
+
+	return features;
+}
+
+/* virtio config->finalize_features() implementation */
+static int vp_finalize_features(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
+	if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
+		dev_err(&vdev->dev, "virtio: device uses modern interface "
+			"but does not have VIRTIO_F_VERSION_1\n");
+		return -EINVAL;
+	}
+
+	iowrite32(0, &vp_dev->common->guest_feature_select);
+	iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
+	iowrite32(1, &vp_dev->common->guest_feature_select);
+	iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
+
+	return 0;
+}
+
+/* virtio config->get() implementation */
+static void vp_get(struct virtio_device *vdev, unsigned offset,
+		   void *buf, unsigned len)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	u8 b;
+	__le16 w;
+	__le32 l;
+
+	BUG_ON(offset + len > vp_dev->device_len);
+
+	switch (len) {
+	case 1:
+		b = ioread8(vp_dev->device + offset);
+		memcpy(buf, &b, sizeof b);
+		break;
+	case 2:
+		w = cpu_to_le16(ioread16(vp_dev->device + offset));
+		memcpy(buf, &w, sizeof w);
+		break;
+	case 4:
+		l = cpu_to_le32(ioread32(vp_dev->device + offset));
+		memcpy(buf, &l, sizeof l);
+		break;
+	case 8:
+		l = cpu_to_le32(ioread32(vp_dev->device + offset));
+		memcpy(buf, &l, sizeof l);
+		l = cpu_to_le32(ioread32(vp_dev->device + offset + sizeof l));
+		memcpy(buf + sizeof l, &l, sizeof l);
+		break;
+	default:
+		BUG();
+	}
+}
+
+/* the config->set() implementation.  it's symmetric to the config->get()
+ * implementation */
+static void vp_set(struct virtio_device *vdev, unsigned offset,
+		   const void *buf, unsigned len)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	u8 b;
+	__le16 w;
+	__le32 l;
+
+	BUG_ON(offset + len > vp_dev->device_len);
+
+	switch (len) {
+	case 1:
+		memcpy(&b, buf, sizeof b);
+		iowrite8(b, vp_dev->device + offset);
+		break;
+	case 2:
+		memcpy(&w, buf, sizeof w);
+		iowrite16(le16_to_cpu(w), vp_dev->device + offset);
+		break;
+	case 4:
+		memcpy(&l, buf, sizeof l);
+		iowrite32(le32_to_cpu(l), vp_dev->device + offset);
+		break;
+	case 8:
+		memcpy(&l, buf, sizeof l);
+		iowrite32(le32_to_cpu(l), vp_dev->device + offset);
+		memcpy(&l, buf + sizeof l, sizeof l);
+		iowrite32(le32_to_cpu(l), vp_dev->device + offset + sizeof l);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static u32 vp_generation(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	return ioread8(&vp_dev->common->config_generation);
+}
+
+/* config->{get,set}_status() implementations */
+static u8 vp_get_status(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	return ioread8(&vp_dev->common->device_status);
+}
+
+static void vp_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	/* We should never be setting status to 0. */
+	BUG_ON(status == 0);
+	iowrite8(status, &vp_dev->common->device_status);
+}
+
+static void vp_reset(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	/* 0 status means a reset. */
+	iowrite8(0, &vp_dev->common->device_status);
+	/* Flush out the status write, and flush in device writes,
+	 * including MSI-X interrupts, if any. */
+	ioread8(&vp_dev->common->device_status);
+	/* Flush pending VQ/configuration callbacks. */
+	vp_synchronize_vectors(vdev);
+}
+
+static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
+{
+	/* Setup the vector used for configuration events */
+	iowrite16(vector, &vp_dev->common->msix_config);
+	/* Verify we had enough resources to assign the vector */
+	/* Will also flush the write out to device */
+	return ioread16(&vp_dev->common->msix_config);
+}
+
+static size_t vring_pci_size(u16 num)
+{
+	/* We only need a cacheline separation. */
+	return PAGE_ALIGN(vring_size(num, SMP_CACHE_BYTES));
+}
+
+static void *alloc_virtqueue_pages(int *num)
+{
+	void *pages;
+
+	/* TODO: allocate each queue chunk individually */
+	for (; *num && vring_pci_size(*num) > PAGE_SIZE; *num /= 2) {
+		pages = alloc_pages_exact(vring_pci_size(*num),
+					  GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
+		if (pages)
+			return pages;
+	}
+
+	if (!*num)
+		return NULL;
+
+	/* Try to get a single page. You are my only hope! */
+	return alloc_pages_exact(vring_pci_size(*num), GFP_KERNEL|__GFP_ZERO);
+}
+
+static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
+				  struct virtio_pci_vq_info *info,
+				  unsigned index,
+				  void (*callback)(struct virtqueue *vq),
+				  const char *name,
+				  u16 msix_vec)
+{
+	struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
+	struct virtqueue *vq;
+	u16 num, off;
+	int err;
+
+	if (index >= ioread16(&cfg->num_queues))
+		return ERR_PTR(-ENOENT);
+
+	/* Select the queue we're interested in */
+	iowrite16(index, &cfg->queue_select);
+
+	/* Check if queue is either not available or already active. */
+	num = ioread16(&cfg->queue_size);
+	if (!num || ioread8(&cfg->queue_enable))
+		return ERR_PTR(-ENOENT);
+
+	if (num & (num - 1)) {
+		dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* get offset of notification word for this vq */
+	off = ioread16(&cfg->queue_notify_off);
+
+	info->num = num;
+	info->msix_vector = msix_vec;
+
+	info->queue = alloc_virtqueue_pages(&info->num);
+	if (info->queue == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* create the vring */
+	vq = vring_new_virtqueue(index, info->num,
+				 SMP_CACHE_BYTES, &vp_dev->vdev,
+				 true, info->queue, vp_notify, callback, name);
+	if (!vq) {
+		err = -ENOMEM;
+		goto err_new_queue;
+	}
+
+	/* activate the queue */
+	iowrite16(num, &cfg->queue_size);
+	iowrite64_twopart(virt_to_phys(info->queue),
+			  &cfg->queue_desc_lo, &cfg->queue_desc_hi);
+	iowrite64_twopart(virt_to_phys(virtqueue_get_avail(vq)),
+			  &cfg->queue_avail_lo, &cfg->queue_avail_hi);
+	iowrite64_twopart(virt_to_phys(virtqueue_get_used(vq)),
+			  &cfg->queue_used_lo, &cfg->queue_used_hi);
+
+	vq->priv = (void __force *)map_capability(vp_dev->pci_dev,
+				  vp_dev->notify_map_cap, 2, 2,
+				  off * vp_dev->notify_offset_multiplier, 2,
+				  NULL);
+
+	if (!vq->priv) {
+		err = -ENOMEM;
+		goto err_map_notify;
+	}
+
+	if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
+		iowrite16(msix_vec, &cfg->queue_msix_vector);
+		msix_vec = ioread16(&cfg->queue_msix_vector);
+		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
+			err = -EBUSY;
+			goto err_assign_vector;
+		}
+	}
+
+	return vq;
+
+err_assign_vector:
+	pci_iounmap(vp_dev->pci_dev, (void __iomem __force *)vq->priv);
+err_map_notify:
+	vring_del_virtqueue(vq);
+err_new_queue:
+	free_pages_exact(info->queue, vring_pci_size(info->num));
+	return ERR_PTR(err);
+}
+
+static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+			      struct virtqueue *vqs[],
+			      vq_callback_t *callbacks[],
+			      const char *names[])
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	struct virtqueue *vq;
+	int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names);
+
+	if (rc)
+		return rc;
+
+	/* Select and activate all queues. Has to be done last: once we do
+	 * this, there's no way to go back except reset.
+	 */
+	list_for_each_entry(vq, &vdev->vqs, list) {
+		iowrite16(vq->index, &vp_dev->common->queue_select);
+		iowrite8(1, &vp_dev->common->queue_enable);
+	}
+
+	return 0;
+}
+
+static void del_vq(struct virtio_pci_vq_info *info)
+{
+	struct virtqueue *vq = info->vq;
+	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+
+	iowrite16(vq->index, &vp_dev->common->queue_select);
+
+	if (vp_dev->msix_enabled) {
+		iowrite16(VIRTIO_MSI_NO_VECTOR,
+			  &vp_dev->common->queue_msix_vector);
+		/* Flush the write out to device */
+		ioread16(&vp_dev->common->queue_msix_vector);
+	}
+
+	pci_iounmap(vp_dev->pci_dev, (void __force __iomem *)vq->priv);
+
+	vring_del_virtqueue(vq);
+
+	free_pages_exact(info->queue, vring_pci_size(info->num));
+}
+
+static const struct virtio_config_ops virtio_pci_config_ops = {
+	.get		= vp_get,
+	.set		= vp_set,
+	.generation	= vp_generation,
+	.get_status	= vp_get_status,
+	.set_status	= vp_set_status,
+	.reset		= vp_reset,
+	.find_vqs	= vp_modern_find_vqs,
+	.del_vqs	= vp_del_vqs,
+	.get_features	= vp_get_features,
+	.finalize_features = vp_finalize_features,
+	.bus_name	= vp_bus_name,
+	.set_vq_affinity = vp_set_vq_affinity,
+};
+
+/**
+ * virtio_pci_find_capability - walk capabilities to find device info.
+ * @dev: the pci device
+ * @cfg_type: the VIRTIO_PCI_CAP_* value we seek
+ * @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO.
+ *
+ * Returns offset of the capability, or 0.
+ */
+static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type,
+					     u32 ioresource_types)
+{
+	int pos;
+
+	for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
+	     pos > 0;
+	     pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
+		u8 type, bar;
+		pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+							 cfg_type),
+				     &type);
+		pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+							 bar),
+				     &bar);
+
+		/* Ignore structures with reserved BAR values */
+		if (bar > 0x5)
+			continue;
+
+		if (type == cfg_type) {
+			if (pci_resource_len(dev, bar) &&
+			    pci_resource_flags(dev, bar) & ioresource_types)
+				return pos;
+		}
+	}
+	return 0;
+}
+
+static void virtio_pci_release_dev(struct device *_d)
+{
+	struct virtio_device *vdev = dev_to_virtio(_d);
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	kfree(vp_dev);
+}
+
+/* TODO: validate the ABI statically. */
+static inline void check_offsets(void)
+{
+}
+
+/* the PCI probing function */
+int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
+{
+	struct pci_dev *pci_dev = vp_dev->pci_dev;
+	int err, common, isr, notify, device;
+	u32 notify_length;
+
+	check_offsets();
+
+	/* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
+	if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
+		return -ENODEV;
+
+	if (pci_dev->device < 0x1040) {
+		/* Transitional devices: use the PCI subsystem device id as
+		 * virtio device id, same as legacy driver always did.
+		 */
+		vp_dev->vdev.id.device = pci_dev->subsystem_device;
+	} else {
+		/* Modern devices: simply use PCI device id, but start from 0x1040. */
+		vp_dev->vdev.id.device = pci_dev->device - 0x1040;
+	}
+	vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
+
+	if (virtio_device_is_legacy_only(vp_dev->vdev.id))
+		return -ENODEV;
+
+	/* check for a common config: if not, use legacy mode (bar 0). */
+	common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
+					    IORESOURCE_IO | IORESOURCE_MEM);
+	if (!common) {
+		dev_info(&pci_dev->dev,
+			 "virtio_pci: leaving for legacy driver\n");
+		return -ENODEV;
+	}
+
+	/* If common is there, these should be too... */
+	isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
+					 IORESOURCE_IO | IORESOURCE_MEM);
+	notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
+					    IORESOURCE_IO | IORESOURCE_MEM);
+	if (!isr || !notify) {
+		dev_err(&pci_dev->dev,
+			"virtio_pci: missing capabilities %i/%i/%i\n",
+			common, isr, notify);
+		return -EINVAL;
+	}
+
+	/* Device capability is only mandatory for devices that have
+	 * device-specific configuration.
+	 */
+	device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
+					    IORESOURCE_IO | IORESOURCE_MEM);
+
+	err = -EINVAL;
+	vp_dev->common = map_capability(pci_dev, common,
+					sizeof(struct virtio_pci_common_cfg), 4,
+					0, sizeof(struct virtio_pci_common_cfg),
+					NULL);
+	if (!vp_dev->common)
+		goto err_map_common;
+	vp_dev->isr = map_capability(pci_dev, isr, sizeof(u8), 1,
+				     0, 1,
+				     NULL);
+	if (!vp_dev->isr)
+		goto err_map_isr;
+
+	/* Read notify_off_multiplier from config space. */
+	pci_read_config_dword(pci_dev,
+			      notify + offsetof(struct virtio_pci_notify_cap,
+						notify_off_multiplier),
+			      &vp_dev->notify_offset_multiplier);
+	/* Read notify length from config space. */
+	pci_read_config_dword(pci_dev,
+			      notify + offsetof(struct virtio_pci_notify_cap,
+						cap.length),
+			      &notify_length);
+
+	vp_dev->notify_map_cap = notify;
+
+	/* Again, we don't know how much we should map, but PAGE_SIZE
+	 * is more than enough for all existing devices.
+	 */
+	if (device) {
+		vp_dev->device = map_capability(pci_dev, device, 0, 4,
+						0, PAGE_SIZE,
+						&vp_dev->device_len);
+		if (!vp_dev->device)
+			goto err_map_device;
+	}
+
+	vp_dev->vdev.config = &virtio_pci_config_ops;
+
+	vp_dev->config_vector = vp_config_vector;
+	vp_dev->setup_vq = setup_vq;
+	vp_dev->del_vq = del_vq;
+
+	return 0;
+
+err_map_device:
+	pci_iounmap(pci_dev, vp_dev->isr);
+err_map_isr:
+	pci_iounmap(pci_dev, vp_dev->common);
+err_map_common:
+	return err;
+}
+
+void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev)
+{
+	struct pci_dev *pci_dev = vp_dev->pci_dev;
+
+	if (vp_dev->device)
+		pci_iounmap(pci_dev, vp_dev->device);
+	pci_iounmap(pci_dev, vp_dev->isr);
+	pci_iounmap(pci_dev, vp_dev->common);
+}
diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index 4e054235358f..a2b2e1353e30 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -117,10 +117,11 @@ struct virtio_pci_cap {
 	__u8 cap_vndr;		/* Generic PCI field: PCI_CAP_ID_VNDR */
 	__u8 cap_next;		/* Generic PCI field: next ptr. */
 	__u8 cap_len;		/* Generic PCI field: capability length */
-	__u8 type_and_bar;	/* Upper 3 bits: bar.
-				 * Lower 3 is VIRTIO_PCI_CAP_*_CFG. */
+	__u8 cfg_type;		/* Identifies the structure. */
+	__u8 bar;		/* Where to find it. */
+	__u8 padding[3];	/* Pad to full dword. */
 	__le32 offset;		/* Offset within bar. */
-	__le32 length;		/* Length. */
+	__le32 length;		/* Length of the structure, in bytes. */
 };
 
 #define VIRTIO_PCI_CAP_BAR_SHIFT	5
-- 
cgit v1.2.3-71-gd317


From 89461c4a12faa643fd7564037440d626b777b033 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 30 May 2013 16:29:32 +0930
Subject: virtio_pci: macros for PCI layout offsets

QEMU wants it, so why not?  Trust, but verify.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_modern.c | 60 +++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/virtio_pci.h    | 36 +++++++++++++++++++----
 2 files changed, 90 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index a3d81013e0c2..b2e707ad81cf 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -464,9 +464,67 @@ static void virtio_pci_release_dev(struct device *_d)
 	kfree(vp_dev);
 }
 
-/* TODO: validate the ABI statically. */
+/* This is part of the ABI.  Don't screw with it. */
 static inline void check_offsets(void)
 {
+	/* Note: disk space was harmed in compilation of this function. */
+	BUILD_BUG_ON(VIRTIO_PCI_CAP_VNDR !=
+		     offsetof(struct virtio_pci_cap, cap_vndr));
+	BUILD_BUG_ON(VIRTIO_PCI_CAP_NEXT !=
+		     offsetof(struct virtio_pci_cap, cap_next));
+	BUILD_BUG_ON(VIRTIO_PCI_CAP_LEN !=
+		     offsetof(struct virtio_pci_cap, cap_len));
+	BUILD_BUG_ON(VIRTIO_PCI_CAP_CFG_TYPE !=
+		     offsetof(struct virtio_pci_cap, cfg_type));
+	BUILD_BUG_ON(VIRTIO_PCI_CAP_BAR !=
+		     offsetof(struct virtio_pci_cap, bar));
+	BUILD_BUG_ON(VIRTIO_PCI_CAP_OFFSET !=
+		     offsetof(struct virtio_pci_cap, offset));
+	BUILD_BUG_ON(VIRTIO_PCI_CAP_LENGTH !=
+		     offsetof(struct virtio_pci_cap, length));
+	BUILD_BUG_ON(VIRTIO_PCI_NOTIFY_CAP_MULT !=
+		     offsetof(struct virtio_pci_notify_cap,
+			      notify_off_multiplier));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_DFSELECT !=
+		     offsetof(struct virtio_pci_common_cfg,
+			      device_feature_select));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_DF !=
+		     offsetof(struct virtio_pci_common_cfg, device_feature));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_GFSELECT !=
+		     offsetof(struct virtio_pci_common_cfg,
+			      guest_feature_select));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_GF !=
+		     offsetof(struct virtio_pci_common_cfg, guest_feature));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_MSIX !=
+		     offsetof(struct virtio_pci_common_cfg, msix_config));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_NUMQ !=
+		     offsetof(struct virtio_pci_common_cfg, num_queues));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_STATUS !=
+		     offsetof(struct virtio_pci_common_cfg, device_status));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_CFGGENERATION !=
+		     offsetof(struct virtio_pci_common_cfg, config_generation));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SELECT !=
+		     offsetof(struct virtio_pci_common_cfg, queue_select));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SIZE !=
+		     offsetof(struct virtio_pci_common_cfg, queue_size));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_MSIX !=
+		     offsetof(struct virtio_pci_common_cfg, queue_msix_vector));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_ENABLE !=
+		     offsetof(struct virtio_pci_common_cfg, queue_enable));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NOFF !=
+		     offsetof(struct virtio_pci_common_cfg, queue_notify_off));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCLO !=
+		     offsetof(struct virtio_pci_common_cfg, queue_desc_lo));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCHI !=
+		     offsetof(struct virtio_pci_common_cfg, queue_desc_hi));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILLO !=
+		     offsetof(struct virtio_pci_common_cfg, queue_avail_lo));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILHI !=
+		     offsetof(struct virtio_pci_common_cfg, queue_avail_hi));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDLO !=
+		     offsetof(struct virtio_pci_common_cfg, queue_used_lo));
+	BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI !=
+		     offsetof(struct virtio_pci_common_cfg, queue_used_hi));
 }
 
 /* the PCI probing function */
diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index a2b2e1353e30..3b7e4d2765fb 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -124,11 +124,6 @@ struct virtio_pci_cap {
 	__le32 length;		/* Length of the structure, in bytes. */
 };
 
-#define VIRTIO_PCI_CAP_BAR_SHIFT	5
-#define VIRTIO_PCI_CAP_BAR_MASK		0x7
-#define VIRTIO_PCI_CAP_TYPE_SHIFT	0
-#define VIRTIO_PCI_CAP_TYPE_MASK	0x7
-
 struct virtio_pci_notify_cap {
 	struct virtio_pci_cap cap;
 	__le32 notify_off_multiplier;	/* Multiplier for queue_notify_off. */
@@ -160,6 +155,37 @@ struct virtio_pci_common_cfg {
 	__le32 queue_used_hi;		/* read-write */
 };
 
+/* Macro versions of offsets for the Old Timers! */
+#define VIRTIO_PCI_CAP_VNDR		0
+#define VIRTIO_PCI_CAP_NEXT		1
+#define VIRTIO_PCI_CAP_LEN		2
+#define VIRTIO_PCI_CAP_CFG_TYPE		3
+#define VIRTIO_PCI_CAP_BAR		4
+#define VIRTIO_PCI_CAP_OFFSET		8
+#define VIRTIO_PCI_CAP_LENGTH		12
+
+#define VIRTIO_PCI_NOTIFY_CAP_MULT	16
+
+#define VIRTIO_PCI_COMMON_DFSELECT	0
+#define VIRTIO_PCI_COMMON_DF		4
+#define VIRTIO_PCI_COMMON_GFSELECT	8
+#define VIRTIO_PCI_COMMON_GF		12
+#define VIRTIO_PCI_COMMON_MSIX		16
+#define VIRTIO_PCI_COMMON_NUMQ		18
+#define VIRTIO_PCI_COMMON_STATUS	20
+#define VIRTIO_PCI_COMMON_CFGGENERATION	21
+#define VIRTIO_PCI_COMMON_Q_SELECT	22
+#define VIRTIO_PCI_COMMON_Q_SIZE	24
+#define VIRTIO_PCI_COMMON_Q_MSIX	26
+#define VIRTIO_PCI_COMMON_Q_ENABLE	28
+#define VIRTIO_PCI_COMMON_Q_NOFF	30
+#define VIRTIO_PCI_COMMON_Q_DESCLO	32
+#define VIRTIO_PCI_COMMON_Q_DESCHI	36
+#define VIRTIO_PCI_COMMON_Q_AVAILLO	40
+#define VIRTIO_PCI_COMMON_Q_AVAILHI	44
+#define VIRTIO_PCI_COMMON_Q_USEDLO	48
+#define VIRTIO_PCI_COMMON_Q_USEDHI	52
+
 #endif /* VIRTIO_PCI_NO_MODERN */
 
 #endif
-- 
cgit v1.2.3-71-gd317


From 25e65e4efca4116a9fc7a892ede2cf98f138de45 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 15 Jan 2015 13:33:31 +0200
Subject: virtio_balloon: coding style fixes

Most of our code has
struct foo {
}

Fix two instances where balloon is inconsistent.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_balloon.c     | 3 +--
 include/uapi/linux/virtio_balloon.h | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 3176ea4028a8..0413157f3b49 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -44,8 +44,7 @@ static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
 module_param(oom_pages, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
 
-struct virtio_balloon
-{
+struct virtio_balloon {
 	struct virtio_device *vdev;
 	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
 
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index be40f7059e93..4b0488f20b2e 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -36,8 +36,7 @@
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
 
-struct virtio_balloon_config
-{
+struct virtio_balloon_config {
 	/* Number of pages host wants Guest to give up. */
 	__le32 num_pages;
 	/* Number of pages we've actually got in balloon. */
-- 
cgit v1.2.3-71-gd317


From 9c45101e88b2bf2ce36b8833fcfa784a9149aa74 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 19 Nov 2014 09:21:58 +0100
Subject: quota: Cleanup flags definitions

Currently all quota flags were defined just in kernel-private headers.
Export flags readable / writeable from userspace to userspace via
include/uapi/linux/quota.h.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c           |  2 +-
 include/linux/dqblk_v1.h   |  3 ---
 include/linux/quota.h      | 14 ++++++++------
 include/uapi/linux/quota.h | 14 +++++++++++++-
 4 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 8f0acef3d184..f8be368b9086 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1248,7 +1248,7 @@ static int ignore_hardlimit(struct dquot *dquot)
 
 	return capable(CAP_SYS_RESOURCE) &&
 	       (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD ||
-		!(info->dqi_flags & V1_DQF_RSQUASH));
+		!(info->dqi_flags & DQF_ROOT_SQUASH));
 }
 
 /* needs dq_data_lock */
diff --git a/include/linux/dqblk_v1.h b/include/linux/dqblk_v1.h
index 3713a7232dd8..c0d4d1e2a45c 100644
--- a/include/linux/dqblk_v1.h
+++ b/include/linux/dqblk_v1.h
@@ -5,9 +5,6 @@
 #ifndef _LINUX_DQBLK_V1_H
 #define _LINUX_DQBLK_V1_H
 
-/* Root squash turned on */
-#define V1_DQF_RSQUASH 1
-
 /* Numbers of blocks needed for updates */
 #define V1_INIT_ALLOC 1
 #define V1_INIT_REWRITE 1
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 50978b781a19..0c42113607ce 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -223,12 +223,14 @@ struct mem_dqinfo {
 
 struct super_block;
 
-#define DQF_MASK 0xffff		/* Mask for format specific flags */
-#define DQF_GETINFO_MASK 0x1ffff	/* Mask for flags passed to userspace */
-#define DQF_SETINFO_MASK 0xffff		/* Mask for flags modifiable from userspace */
-#define DQF_SYS_FILE_B		16
-#define DQF_SYS_FILE (1 << DQF_SYS_FILE_B)	/* Quota file stored as system file */
-#define DQF_INFO_DIRTY_B	31
+/* Mask for flags passed to userspace */
+#define DQF_GETINFO_MASK (DQF_ROOT_SQUASH | DQF_SYS_FILE)
+/* Mask for flags modifiable from userspace */
+#define DQF_SETINFO_MASK DQF_ROOT_SQUASH
+
+enum {
+	DQF_INFO_DIRTY_B = DQF_PRIVATE,
+};
 #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B)	/* Is info dirty? */
 
 extern void mark_info_dirty(struct super_block *sb, int type);
diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h
index 3b6cfbeb086d..1f49b8341c99 100644
--- a/include/uapi/linux/quota.h
+++ b/include/uapi/linux/quota.h
@@ -126,10 +126,22 @@ struct if_dqblk {
 #define IIF_FLAGS	4
 #define IIF_ALL		(IIF_BGRACE | IIF_IGRACE | IIF_FLAGS)
 
+enum {
+	DQF_ROOT_SQUASH_B = 0,
+	DQF_SYS_FILE_B = 16,
+	/* Kernel internal flags invisible to userspace */
+	DQF_PRIVATE
+};
+
+/* Root squash enabled (for v1 quota format) */
+#define DQF_ROOT_SQUASH	(1 << DQF_ROOT_SQUASH_B)
+/* Quota stored in a system file */
+#define DQF_SYS_FILE	(1 << DQF_SYS_FILE_B)
+
 struct if_dqinfo {
 	__u64 dqi_bgrace;
 	__u64 dqi_igrace;
-	__u32 dqi_flags;
+	__u32 dqi_flags;	/* DFQ_* */
 	__u32 dqi_valid;
 };
 
-- 
cgit v1.2.3-71-gd317


From 4b681c82d2f9bef121c912ffcaac89a004af3f2c Mon Sep 17 00:00:00 2001
From: Vadim Kochan <vadim4j@gmail.com>
Date: Mon, 12 Jan 2015 16:34:05 +0200
Subject: nl80211: Allow set network namespace by fd

Added new NL80211_ATTR_NETNS_FD which allows to
set namespace via nl80211 by fd.

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  2 ++
 net/core/net_namespace.c     |  1 +
 net/wireless/nl80211.c       | 16 +++++++++++-----
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f52797a90816..f68532b015db 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2098,6 +2098,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SURVEY_RADIO_STATS,
 
+	NL80211_ATTR_NETNS_FD,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7f155175bba8..5d5ee8f3e4ff 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -361,6 +361,7 @@ struct net *get_net_ns_by_fd(int fd)
 	return ERR_PTR(-EINVAL);
 }
 #endif
+EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
 
 struct net *get_net_ns_by_pid(pid_t pid)
 {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c5661c5ad8f3..c64100ec79e3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -397,6 +397,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
 	[NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN },
 	[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
+	[NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -7762,14 +7763,19 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net *net;
 	int err;
-	u32 pid;
 
-	if (!info->attrs[NL80211_ATTR_PID])
-		return -EINVAL;
+	if (info->attrs[NL80211_ATTR_PID]) {
+		u32 pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]);
+
+		net = get_net_ns_by_pid(pid);
+	} else if (info->attrs[NL80211_ATTR_NETNS_FD]) {
+		u32 fd = nla_get_u32(info->attrs[NL80211_ATTR_NETNS_FD]);
 
-	pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]);
+		net = get_net_ns_by_fd(fd);
+	} else {
+		return -EINVAL;
+	}
 
-	net = get_net_ns_by_pid(pid);
 	if (IS_ERR(net))
 		return PTR_ERR(net);
 
-- 
cgit v1.2.3-71-gd317


From 9c74893441d3cf4b258a82b19cbf6bfd2ed6e549 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Fri, 16 Jan 2015 16:04:09 +0200
Subject: nl80211: add an attribute to allow delaying the first scheduled scan
 cycle

The userspace may want to delay the the first scheduled scan or
net-detect cycle.  Add an optional attribute to the scheduled scan
configuration to pass the delay to be (optionally) used by the driver.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
[add the attribute to the policy to validate it]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  5 +++++
 include/uapi/linux/nl80211.h | 20 ++++++++++++++------
 net/wireless/nl80211.c       |  5 +++++
 3 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7b44ba0a7632..64e09e1e8099 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1493,6 +1493,10 @@ struct cfg80211_match_set {
  * @rcu_head: RCU callback used to free the struct
  * @owner_nlportid: netlink portid of owner (if this should is a request
  *	owned by a particular socket)
+ * @delay: delay in seconds to use before starting the first scan
+ *	cycle.  The driver may ignore this parameter and start
+ *	immediately (or at any other time), if this feature is not
+ *	supported.
  */
 struct cfg80211_sched_scan_request {
 	struct cfg80211_ssid *ssids;
@@ -1506,6 +1510,7 @@ struct cfg80211_sched_scan_request {
 	struct cfg80211_match_set *match_sets;
 	int n_match_sets;
 	s32 min_rssi_thold;
+	u32 delay;
 
 	u8 mac_addr[ETH_ALEN] __aligned(2);
 	u8 mac_addr_mask[ETH_ALEN] __aligned(2);
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f68532b015db..1cbc3aae425c 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -324,7 +324,9 @@
  *	if passed, define which channels should be scanned; if not
  *	passed, all channels allowed for the current regulatory domain
  *	are used.  Extra IEs can also be passed from the userspace by
- *	using the %NL80211_ATTR_IE attribute.
+ *	using the %NL80211_ATTR_IE attribute.  The first cycle of the
+ *	scheduled scan can be delayed by %NL80211_ATTR_SCHED_SCAN_DELAY
+ *	is supplied.
  * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan. Returns -ENOENT if
  *	scheduled scan is not running. The caller may assume that as soon
  *	as the call returns, it is safe to start a new scheduled scan again.
@@ -1735,6 +1737,9 @@ enum nl80211_commands {
  *	should be contained in the result as the sum of the respective counters
  *	over all channels.
  *
+ * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before a scheduled scan (or a
+ *	WoWLAN net-detect scan) is started, u32 in seconds.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2100,6 +2105,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_NETNS_FD,
 
+	NL80211_ATTR_SCHED_SCAN_DELAY,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3743,11 +3750,12 @@ struct nl80211_pattern_support {
  * @NL80211_WOWLAN_TRIG_NET_DETECT: wake up when a configured network
  *	is detected.  This is a nested attribute that contains the
  *	same attributes used with @NL80211_CMD_START_SCHED_SCAN.  It
- *	specifies how the scan is performed (e.g. the interval and the
- *	channels to scan) as well as the scan results that will
- *	trigger a wake (i.e. the matchsets).  This attribute is also
- *	sent in a response to @NL80211_CMD_GET_WIPHY, indicating the
- *	number of match sets supported by the driver (u32).
+ *	specifies how the scan is performed (e.g. the interval, the
+ *	channels to scan and the initial delay) as well as the scan
+ *	results that will trigger a wake (i.e. the matchsets).  This
+ *	attribute is also sent in a response to
+ *	@NL80211_CMD_GET_WIPHY, indicating the number of match sets
+ *	supported by the driver (u32).
  * @NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS: nested attribute
  *	containing an array with information about what triggered the
  *	wake up.  If no elements are present in the array, it means
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c64100ec79e3..4542e8683beb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -398,6 +398,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN },
 	[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
 	[NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
+	[NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -6205,6 +6206,10 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 		}
 	}
 
+	if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY])
+		request->delay =
+			nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_DELAY]);
+
 	request->interval = interval;
 	request->scan_start = jiffies;
 
-- 
cgit v1.2.3-71-gd317


From 2822545f9fe264ec62d4abc69c17ae759eafe4ce Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Wed, 15 Oct 2014 16:48:16 +0200
Subject: KVM: s390: new parameter for SIGP STOP irqs

In order to get rid of the action_flags and to properly migrate pending SIGP
STOP irqs triggered e.g. by SIGP STOP AND STORE STATUS, we need to remember
whether to store the status when stopping.

For this reason, a new parameter (flags) for the SIGP STOP irq is introduced.
These flags further define details of the requested STOP and can be easily
migrated.

Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt |  2 +-
 arch/s390/include/asm/kvm_host.h  |  2 ++
 arch/s390/kvm/interrupt.c         | 18 +++++++++++++++++-
 include/uapi/linux/kvm.h          |  6 ++++++
 4 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 0007fef4ed81..3ca6e0e9a769 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2312,7 +2312,7 @@ struct kvm_s390_interrupt {
 
 type can be one of the following:
 
-KVM_S390_SIGP_STOP (vcpu) - sigp restart
+KVM_S390_SIGP_STOP (vcpu) - sigp stop; optional flags in parm
 KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm
 KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm
 KVM_S390_RESTART (vcpu) - restart
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 9cba74d5d853..5eafe84a7b3d 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -378,6 +378,7 @@ struct kvm_s390_interrupt_info {
 		struct kvm_s390_emerg_info emerg;
 		struct kvm_s390_extcall_info extcall;
 		struct kvm_s390_prefix_info prefix;
+		struct kvm_s390_stop_info stop;
 		struct kvm_s390_mchk_info mchk;
 	};
 };
@@ -393,6 +394,7 @@ struct kvm_s390_irq_payload {
 	struct kvm_s390_emerg_info emerg;
 	struct kvm_s390_extcall_info extcall;
 	struct kvm_s390_prefix_info prefix;
+	struct kvm_s390_stop_info stop;
 	struct kvm_s390_mchk_info mchk;
 };
 
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 7fbbcbcea6ac..73bafc3d0f41 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -394,13 +394,20 @@ static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
 
 static int __must_check __deliver_stop(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_stop_info *stop = &li->irq.stop;
+
+	spin_lock(&li->lock);
+	stop->flags = 0;
+	clear_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
 	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
 	vcpu->stat.deliver_stop_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP,
 					 0, 0);
 
 	__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
-	clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs);
 	return 0;
 }
 
@@ -1031,13 +1038,19 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	return 0;
 }
 
+#define KVM_S390_STOP_SUPP_FLAGS 0
 static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_stop_info *stop = &li->irq.stop;
 
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
 
+	if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
+		return -EINVAL;
+
 	li->action_bits |= ACTION_STOP_ON_STOP;
+	stop->flags = irq->u.stop.flags;
 	set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
 	return 0;
 }
@@ -1306,6 +1319,9 @@ int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
 	case KVM_S390_SIGP_SET_PREFIX:
 		irq->u.prefix.address = s390int->parm;
 		break;
+	case KVM_S390_SIGP_STOP:
+		irq->u.stop.flags = s390int->parm;
+		break;
 	case KVM_S390_INT_EXTERNAL_CALL:
 		if (irq->u.extcall.code & 0xffff0000)
 			return -EINVAL;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a37fd1224f36..adc24a3fd23e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -491,6 +491,11 @@ struct kvm_s390_emerg_info {
 	__u16 code;
 };
 
+#define KVM_S390_STOP_FLAG_STORE_STATUS	0x01
+struct kvm_s390_stop_info {
+	__u32 flags;
+};
+
 struct kvm_s390_mchk_info {
 	__u64 cr14;
 	__u64 mcic;
@@ -509,6 +514,7 @@ struct kvm_s390_irq {
 		struct kvm_s390_emerg_info emerg;
 		struct kvm_s390_extcall_info extcall;
 		struct kvm_s390_prefix_info prefix;
+		struct kvm_s390_stop_info stop;
 		struct kvm_s390_mchk_info mchk;
 		char reserved[64];
 	} u;
-- 
cgit v1.2.3-71-gd317


From 2444b352c3acf54897b0e2803a7c4e66699f9f43 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 9 Oct 2014 14:10:13 +0200
Subject: KVM: s390: forward most SIGP orders to user space

Most SIGP orders are handled partially in kernel and partially in
user space. In order to:
- Get a correct SIGP SET PREFIX handler that informs user space
- Avoid race conditions between concurrently executed SIGP orders
- Serialize SIGP orders per VCPU

We need to handle all "slow" SIGP orders in user space. The remaining
ones to be handled completely in kernel are:
- SENSE
- SENSE RUNNING
- EXTERNAL CALL
- EMERGENCY SIGNAL
- CONDITIONAL EMERGENCY SIGNAL
According to the PoP, they have to be fast. They can be executed
without conflicting to the actions of other pending/concurrently
executing orders (e.g. STOP vs. START).

This patch introduces a new capability that will - when enabled -
forward all but the mentioned SIGP orders to user space. The
instruction counters in the kernel are still updated.

Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt | 20 ++++++++++++++++
 arch/s390/include/asm/kvm_host.h  |  1 +
 arch/s390/kvm/kvm-s390.c          |  5 ++++
 arch/s390/kvm/sigp.c              | 49 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h          |  1 +
 5 files changed, 76 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 3ca6e0e9a769..df19837e94d4 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3225,3 +3225,23 @@ userspace from doing that.
 If the hcall number specified is not one that has an in-kernel
 implementation, the KVM_ENABLE_CAP ioctl will fail with an EINVAL
 error.
+
+7.2 KVM_CAP_S390_USER_SIGP
+
+Architectures: s390
+Parameters: none
+
+This capability controls which SIGP orders will be handled completely in user
+space. With this capability enabled, all fast orders will be handled completely
+in the kernel:
+- SENSE
+- SENSE RUNNING
+- EXTERNAL CALL
+- EMERGENCY SIGNAL
+- CONDITIONAL EMERGENCY SIGNAL
+
+All other orders will be handled completely in user space.
+
+Only privileged operation exceptions will be checked for in the kernel (or even
+in the hardware prior to interception). If this capability is not enabled, the
+old way of handling SIGP orders is used (partially in kernel and user space).
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index b6170520380b..a2dcd0e099f7 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -521,6 +521,7 @@ struct kvm_arch{
 	int use_irqchip;
 	int use_cmma;
 	int user_cpu_state_ctrl;
+	int user_sigp;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	wait_queue_head_t ipte_wq;
 	int ipte_lock_count;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index bfb2b990da9b..3677b8ca647f 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -166,6 +166,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_IRQCHIP:
 	case KVM_CAP_VM_ATTRIBUTES:
 	case KVM_CAP_MP_STATE:
+	case KVM_CAP_S390_USER_SIGP:
 		r = 1;
 		break;
 	case KVM_CAP_NR_VCPUS:
@@ -254,6 +255,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		kvm->arch.use_irqchip = 1;
 		r = 0;
 		break;
+	case KVM_CAP_S390_USER_SIGP:
+		kvm->arch.user_sigp = 1;
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 1524be9120ad..23b1e86b2122 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -371,6 +371,53 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
 	return rc;
 }
 
+static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
+{
+	if (!vcpu->kvm->arch.user_sigp)
+		return 0;
+
+	switch (order_code) {
+	case SIGP_SENSE:
+	case SIGP_EXTERNAL_CALL:
+	case SIGP_EMERGENCY_SIGNAL:
+	case SIGP_COND_EMERGENCY_SIGNAL:
+	case SIGP_SENSE_RUNNING:
+		return 0;
+	/* update counters as we're directly dropping to user space */
+	case SIGP_STOP:
+		vcpu->stat.instruction_sigp_stop++;
+		break;
+	case SIGP_STOP_AND_STORE_STATUS:
+		vcpu->stat.instruction_sigp_stop_store_status++;
+		break;
+	case SIGP_STORE_STATUS_AT_ADDRESS:
+		vcpu->stat.instruction_sigp_store_status++;
+		break;
+	case SIGP_SET_PREFIX:
+		vcpu->stat.instruction_sigp_prefix++;
+		break;
+	case SIGP_START:
+		vcpu->stat.instruction_sigp_start++;
+		break;
+	case SIGP_RESTART:
+		vcpu->stat.instruction_sigp_restart++;
+		break;
+	case SIGP_INITIAL_CPU_RESET:
+		vcpu->stat.instruction_sigp_init_cpu_reset++;
+		break;
+	case SIGP_CPU_RESET:
+		vcpu->stat.instruction_sigp_cpu_reset++;
+		break;
+	default:
+		vcpu->stat.instruction_sigp_unknown++;
+	}
+
+	VCPU_EVENT(vcpu, 4, "sigp order %u: completely handled in user space",
+		   order_code);
+
+	return 1;
+}
+
 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 {
 	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
@@ -385,6 +432,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
 	order_code = kvm_s390_get_base_disp_rs(vcpu);
+	if (handle_sigp_order_in_user_space(vcpu, order_code))
+		return -EOPNOTSUPP;
 
 	if (r1 % 2)
 		parameter = vcpu->run->s.regs.gprs[r1];
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index adc24a3fd23e..37f71c3040c3 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -759,6 +759,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_FIXUP_HCALL 103
 #define KVM_CAP_PPC_ENABLE_HCALL 104
 #define KVM_CAP_CHECK_EXTENSION_VM 105
+#define KVM_CAP_S390_USER_SIGP 106
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3-71-gd317


From 193523bf937309d57c6dd7839bcf34d7a029dbee Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 20 Jan 2015 15:15:47 +0100
Subject: vxlan: advertise netns of vxlan dev in fdb msg

Netlink FDB messages are sent in the link netns. The header of these messages
contains the ifindex (ndm_ifindex) of the netdevice, but this ifindex is
unusable in case of x-netns vxlan.
I named the new attribute NDA_NDM_IFINDEX_NETNSID, to avoid confusion with
NDA_IFINDEX.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c            | 5 +++++
 include/uapi/linux/neighbour.h | 1 +
 net/core/net_namespace.c       | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 0346eaa6d236..19d3664ab9dd 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -339,6 +339,11 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	ndm->ndm_flags = fdb->flags;
 	ndm->ndm_type = RTN_UNICAST;
 
+	if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
+	    nla_put_s32(skb, NDA_NDM_IFINDEX_NETNSID,
+			peernet2id(vxlan->net, dev_net(vxlan->dev))))
+		goto nla_put_failure;
+
 	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
 		goto nla_put_failure;
 
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index f3d77f9f1e0b..38f236853cc0 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -25,6 +25,7 @@ enum {
 	NDA_VNI,
 	NDA_IFINDEX,
 	NDA_MASTER,
+	NDA_NDM_IFINDEX_NETNSID,
 	__NDA_MAX
 };
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 9d1a4cac83b6..b7bde551ef76 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -202,6 +202,7 @@ int peernet2id(struct net *net, struct net *peer)
 
 	return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
 }
+EXPORT_SYMBOL(peernet2id);
 
 struct net *get_net_ns_by_id(struct net *net, int id)
 {
-- 
cgit v1.2.3-71-gd317


From c2943f14534bdc4230f4da6dcd4ea03c5d8c8162 Mon Sep 17 00:00:00 2001
From: Harout Hedeshian <harouth@codeaurora.org>
Date: Tue, 20 Jan 2015 10:06:05 -0700
Subject: net: ipv6: Add sysctl entry to disable MTU updates from RA

The kernel forcefully applies MTU values received in router
advertisements provided the new MTU is less than the current. This
behavior is undesirable when the user space is managing the MTU. Instead
a sysctl flag 'accept_ra_mtu' is introduced such that the user space
can control whether or not RA provided MTU updates should be applied. The
default behavior is unchanged; user space must explicitly set this flag
to 0 for RA MTUs to be ignored.

Signed-off-by: Harout Hedeshian <harouth@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  7 +++++++
 include/linux/ipv6.h                   |  1 +
 include/uapi/linux/ipv6.h              |  1 +
 net/ipv6/addrconf.c                    | 10 ++++++++++
 net/ipv6/ndisc.c                       |  2 +-
 5 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 85b022179104..a5e4c813f17f 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1287,6 +1287,13 @@ accept_ra_rtr_pref - BOOLEAN
 	Functional default: enabled if accept_ra is enabled.
 			    disabled if accept_ra is disabled.
 
+accept_ra_mtu - BOOLEAN
+	Apply the MTU value specified in RA option 5 (RFC4861). If
+	disabled, the MTU specified in the RA will be ignored.
+
+	Functional default: enabled if accept_ra is enabled.
+			    disabled if accept_ra is disabled.
+
 accept_redirects - BOOLEAN
 	Accept Redirects.
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index c694e7baa621..2805062c013f 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -52,6 +52,7 @@ struct ipv6_devconf {
 	__s32		force_tllao;
 	__s32           ndisc_notify;
 	__s32		suppress_frag_ndisc;
+	__s32		accept_ra_mtu;
 	void		*sysctl;
 };
 
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 73cb02dc3065..437a6a4b125a 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -169,6 +169,7 @@ enum {
 	DEVCONF_SUPPRESS_FRAG_NDISC,
 	DEVCONF_ACCEPT_RA_FROM_LOCAL,
 	DEVCONF_USE_OPTIMISTIC,
+	DEVCONF_ACCEPT_RA_MTU,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d6b4f5d08014..7dcc065e2160 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -201,6 +201,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.disable_ipv6		= 0,
 	.accept_dad		= 1,
 	.suppress_frag_ndisc	= 1,
+	.accept_ra_mtu		= 1,
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -238,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.disable_ipv6		= 0,
 	.accept_dad		= 1,
 	.suppress_frag_ndisc	= 1,
+	.accept_ra_mtu		= 1,
 };
 
 /* Check if a valid qdisc is available */
@@ -4380,6 +4382,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
 	array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
 	array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
+	array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5258,6 +5261,13 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
+		{
+			.procname	= "accept_ra_mtu",
+			.data		= &ipv6_devconf.accept_ra_mtu,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
 		{
 			/* sentinel */
 		}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 682866777d53..8a9d7c19e247 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1348,7 +1348,7 @@ skip_routeinfo:
 		}
 	}
 
-	if (ndopts.nd_opts_mtu) {
+	if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) {
 		__be32 n;
 		u32 mtu;
 
-- 
cgit v1.2.3-71-gd317


From 74ed7ab9264c54471c7f057409d352052820d750 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Wed, 21 Jan 2015 16:42:52 -0800
Subject: openvswitch: Add support for unique flow IDs.

Previously, flows were manipulated by userspace specifying a full,
unmasked flow key. This adds significant burden onto flow
serialization/deserialization, particularly when dumping flows.

This patch adds an alternative way to refer to flows using a
variable-length "unique flow identifier" (UFID). At flow setup time,
userspace may specify a UFID for a flow, which is stored with the flow
and inserted into a separate table for lookup, in addition to the
standard flow table. Flows created using a UFID must be fetched or
deleted using the UFID.

All flow dump operations may now be made more terse with OVS_UFID_F_*
flags. For example, the OVS_UFID_F_OMIT_KEY flag allows responses to
omit the flow key from a datapath operation if the flow has a
corresponding UFID. This significantly reduces the time spent assembling
and transacting netlink messages. With all OVS_UFID_F_OMIT_* flags
enabled, the datapath only returns the UFID and statistics for each flow
during flow dump, increasing ovs-vswitchd revalidator performance by 40%
or more.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/openvswitch.txt |  13 ++
 include/uapi/linux/openvswitch.h         |  20 +++
 net/openvswitch/datapath.c               | 207 +++++++++++++++++++++++--------
 net/openvswitch/flow.h                   |  28 ++++-
 net/openvswitch/flow_netlink.c           |  68 +++++++++-
 net/openvswitch/flow_netlink.h           |   8 +-
 net/openvswitch/flow_table.c             | 187 +++++++++++++++++++++++-----
 net/openvswitch/flow_table.h             |   8 +-
 8 files changed, 448 insertions(+), 91 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/openvswitch.txt b/Documentation/networking/openvswitch.txt
index 37c20ee2455e..b3b9ac61d29d 100644
--- a/Documentation/networking/openvswitch.txt
+++ b/Documentation/networking/openvswitch.txt
@@ -131,6 +131,19 @@ performs best-effort detection of overlapping wildcarded flows and may reject
 some but not all of them. However, this behavior may change in future versions.
 
 
+Unique flow identifiers
+-----------------------
+
+An alternative to using the original match portion of a key as the handle for
+flow identification is a unique flow identifier, or "UFID". UFIDs are optional
+for both the kernel and user space program.
+
+User space programs that support UFID are expected to provide it during flow
+setup in addition to the flow, then refer to the flow using the UFID for all
+future operations. The kernel is not required to index flows by the original
+flow key if a UFID is specified.
+
+
 Basic rule for evolving flow keys
 ---------------------------------
 
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index cd8d933963c2..7a8785a99243 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -459,6 +459,14 @@ struct ovs_key_nd {
  * a wildcarded match. Omitting attribute is treated as wildcarding all
  * corresponding fields. Optional for all requests. If not present,
  * all flow key bits are exact match bits.
+ * @OVS_FLOW_ATTR_UFID: A value between 1-16 octets specifying a unique
+ * identifier for the flow. Causes the flow to be indexed by this value rather
+ * than the value of the %OVS_FLOW_ATTR_KEY attribute. Optional for all
+ * requests. Present in notifications if the flow was created with this
+ * attribute.
+ * @OVS_FLOW_ATTR_UFID_FLAGS: A 32-bit value of OR'd %OVS_UFID_F_*
+ * flags that provide alternative semantics for flow installation and
+ * retrieval. Optional for all requests.
  *
  * These attributes follow the &struct ovs_header within the Generic Netlink
  * payload for %OVS_FLOW_* commands.
@@ -474,11 +482,23 @@ enum ovs_flow_attr {
 	OVS_FLOW_ATTR_MASK,      /* Sequence of OVS_KEY_ATTR_* attributes. */
 	OVS_FLOW_ATTR_PROBE,     /* Flow operation is a feature probe, error
 				  * logging should be suppressed. */
+	OVS_FLOW_ATTR_UFID,      /* Variable length unique flow identifier. */
+	OVS_FLOW_ATTR_UFID_FLAGS,/* u32 of OVS_UFID_F_*. */
 	__OVS_FLOW_ATTR_MAX
 };
 
 #define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
 
+/**
+ * Omit attributes for notifications.
+ *
+ * If a datapath request contains an %OVS_UFID_F_OMIT_* flag, then the datapath
+ * may omit the corresponding %OVS_FLOW_ATTR_* from the response.
+ */
+#define OVS_UFID_F_OMIT_KEY      (1 << 0)
+#define OVS_UFID_F_OMIT_MASK     (1 << 1)
+#define OVS_UFID_F_OMIT_ACTIONS  (1 << 2)
+
 /**
  * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
  * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 257b97546b33..ae5e77cdc0ca 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -65,6 +65,8 @@ static struct genl_family dp_packet_genl_family;
 static struct genl_family dp_flow_genl_family;
 static struct genl_family dp_datapath_genl_family;
 
+static const struct nla_policy flow_policy[];
+
 static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
 	.name = OVS_FLOW_MCGROUP,
 };
@@ -662,15 +664,48 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
 	}
 }
 
-static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
+static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
+{
+	return ovs_identifier_is_ufid(sfid) &&
+	       !(ufid_flags & OVS_UFID_F_OMIT_KEY);
+}
+
+static bool should_fill_mask(uint32_t ufid_flags)
+{
+	return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
+}
+
+static bool should_fill_actions(uint32_t ufid_flags)
 {
-	return NLMSG_ALIGN(sizeof(struct ovs_header))
-		+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */
-		+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+	return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
+}
+
+static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
+				    const struct sw_flow_id *sfid,
+				    uint32_t ufid_flags)
+{
+	size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
+
+	/* OVS_FLOW_ATTR_UFID */
+	if (sfid && ovs_identifier_is_ufid(sfid))
+		len += nla_total_size(sfid->ufid_len);
+
+	/* OVS_FLOW_ATTR_KEY */
+	if (!sfid || should_fill_key(sfid, ufid_flags))
+		len += nla_total_size(ovs_key_attr_size());
+
+	/* OVS_FLOW_ATTR_MASK */
+	if (should_fill_mask(ufid_flags))
+		len += nla_total_size(ovs_key_attr_size());
+
+	/* OVS_FLOW_ATTR_ACTIONS */
+	if (should_fill_actions(ufid_flags))
+		len += nla_total_size(acts->actions_len);
+
+	return len
 		+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
-		+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
-		+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
+		+ nla_total_size(8); /* OVS_FLOW_ATTR_USED */
 }
 
 /* Called with ovs_mutex or RCU read lock. */
@@ -741,7 +776,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
 /* Called with ovs_mutex or RCU read lock. */
 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 				  struct sk_buff *skb, u32 portid,
-				  u32 seq, u32 flags, u8 cmd)
+				  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
 {
 	const int skb_orig_len = skb->len;
 	struct ovs_header *ovs_header;
@@ -754,21 +789,31 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 
 	ovs_header->dp_ifindex = dp_ifindex;
 
-	err = ovs_nla_put_unmasked_key(flow, skb);
+	err = ovs_nla_put_identifier(flow, skb);
 	if (err)
 		goto error;
 
-	err = ovs_nla_put_mask(flow, skb);
-	if (err)
-		goto error;
+	if (should_fill_key(&flow->id, ufid_flags)) {
+		err = ovs_nla_put_masked_key(flow, skb);
+		if (err)
+			goto error;
+	}
+
+	if (should_fill_mask(ufid_flags)) {
+		err = ovs_nla_put_mask(flow, skb);
+		if (err)
+			goto error;
+	}
 
 	err = ovs_flow_cmd_fill_stats(flow, skb);
 	if (err)
 		goto error;
 
-	err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
-	if (err)
-		goto error;
+	if (should_fill_actions(ufid_flags)) {
+		err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
+		if (err)
+			goto error;
+	}
 
 	genlmsg_end(skb, ovs_header);
 	return 0;
@@ -780,15 +825,19 @@ error:
 
 /* May not be called with RCU read lock. */
 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
+					       const struct sw_flow_id *sfid,
 					       struct genl_info *info,
-					       bool always)
+					       bool always,
+					       uint32_t ufid_flags)
 {
 	struct sk_buff *skb;
+	size_t len;
 
 	if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
 		return NULL;
 
-	skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL);
+	len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
+	skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
@@ -799,19 +848,19 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
 					       int dp_ifindex,
 					       struct genl_info *info, u8 cmd,
-					       bool always)
+					       bool always, u32 ufid_flags)
 {
 	struct sk_buff *skb;
 	int retval;
 
-	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info,
-				      always);
+	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
+				      &flow->id, info, always, ufid_flags);
 	if (IS_ERR_OR_NULL(skb))
 		return skb;
 
 	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
 					info->snd_portid, info->snd_seq, 0,
-					cmd);
+					cmd, ufid_flags);
 	BUG_ON(retval < 0);
 	return skb;
 }
@@ -820,12 +869,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr **a = info->attrs;
 	struct ovs_header *ovs_header = info->userhdr;
-	struct sw_flow *flow, *new_flow;
+	struct sw_flow *flow = NULL, *new_flow;
 	struct sw_flow_mask mask;
 	struct sk_buff *reply;
 	struct datapath *dp;
+	struct sw_flow_key key;
 	struct sw_flow_actions *acts;
 	struct sw_flow_match match;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 	int error;
 	bool log = !a[OVS_FLOW_ATTR_PROBE];
 
@@ -850,13 +901,19 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* Extract key. */
-	ovs_match_init(&match, &new_flow->unmasked_key, &mask);
+	ovs_match_init(&match, &key, &mask);
 	error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
 				  a[OVS_FLOW_ATTR_MASK], log);
 	if (error)
 		goto err_kfree_flow;
 
-	ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
+	ovs_flow_mask_key(&new_flow->key, &key, &mask);
+
+	/* Extract flow identifier. */
+	error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
+				       &key, log);
+	if (error)
+		goto err_kfree_flow;
 
 	/* Validate actions. */
 	error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
@@ -866,7 +923,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		goto err_kfree_flow;
 	}
 
-	reply = ovs_flow_cmd_alloc_info(acts, info, false);
+	reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
+					ufid_flags);
 	if (IS_ERR(reply)) {
 		error = PTR_ERR(reply);
 		goto err_kfree_acts;
@@ -878,8 +936,12 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		error = -ENODEV;
 		goto err_unlock_ovs;
 	}
+
 	/* Check if this is a duplicate flow */
-	flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
+	if (ovs_identifier_is_ufid(&new_flow->id))
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
+	if (!flow)
+		flow = ovs_flow_tbl_lookup(&dp->table, &key);
 	if (likely(!flow)) {
 		rcu_assign_pointer(new_flow->sf_acts, acts);
 
@@ -895,7 +957,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       ufid_flags);
 			BUG_ON(error < 0);
 		}
 		ovs_unlock();
@@ -913,10 +976,15 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 			error = -EEXIST;
 			goto err_unlock_ovs;
 		}
-		/* The unmasked key has to be the same for flow updates. */
-		if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
-			/* Look for any overlapping flow. */
-			flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+		/* The flow identifier has to be the same for flow updates.
+		 * Look for any overlapping flow.
+		 */
+		if (unlikely(!ovs_flow_cmp(flow, &match))) {
+			if (ovs_identifier_is_key(&flow->id))
+				flow = ovs_flow_tbl_lookup_exact(&dp->table,
+								 &match);
+			else /* UFID matches but key is different */
+				flow = NULL;
 			if (!flow) {
 				error = -ENOENT;
 				goto err_unlock_ovs;
@@ -931,7 +999,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       ufid_flags);
 			BUG_ON(error < 0);
 		}
 		ovs_unlock();
@@ -987,8 +1056,11 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
 	struct sw_flow_match match;
+	struct sw_flow_id sfid;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 	int error;
 	bool log = !a[OVS_FLOW_ATTR_PROBE];
+	bool ufid_present;
 
 	/* Extract key. */
 	error = -EINVAL;
@@ -997,6 +1069,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		goto error;
 	}
 
+	ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
 	ovs_match_init(&match, &key, &mask);
 	error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
 				  a[OVS_FLOW_ATTR_MASK], log);
@@ -1013,7 +1086,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		}
 
 		/* Can allocate before locking if have acts. */
-		reply = ovs_flow_cmd_alloc_info(acts, info, false);
+		reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
+						ufid_flags);
 		if (IS_ERR(reply)) {
 			error = PTR_ERR(reply);
 			goto err_kfree_acts;
@@ -1027,7 +1101,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		goto err_unlock_ovs;
 	}
 	/* Check that the flow exists. */
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (ufid_present)
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
+	else
+		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
 	if (unlikely(!flow)) {
 		error = -ENOENT;
 		goto err_unlock_ovs;
@@ -1043,13 +1120,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       ufid_flags);
 			BUG_ON(error < 0);
 		}
 	} else {
 		/* Could not alloc without acts before locking. */
 		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
-						info, OVS_FLOW_CMD_NEW, false);
+						info, OVS_FLOW_CMD_NEW, false,
+						ufid_flags);
+
 		if (unlikely(IS_ERR(reply))) {
 			error = PTR_ERR(reply);
 			goto err_unlock_ovs;
@@ -1086,17 +1166,22 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	struct sw_flow *flow;
 	struct datapath *dp;
 	struct sw_flow_match match;
-	int err;
+	struct sw_flow_id ufid;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
+	int err = 0;
 	bool log = !a[OVS_FLOW_ATTR_PROBE];
+	bool ufid_present;
 
-	if (!a[OVS_FLOW_ATTR_KEY]) {
+	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
+	if (a[OVS_FLOW_ATTR_KEY]) {
+		ovs_match_init(&match, &key, NULL);
+		err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
+					log);
+	} else if (!ufid_present) {
 		OVS_NLERR(log,
 			  "Flow get message rejected, Key attribute missing.");
-		return -EINVAL;
+		err = -EINVAL;
 	}
-
-	ovs_match_init(&match, &key, NULL);
-	err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log);
 	if (err)
 		return err;
 
@@ -1107,14 +1192,17 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 		goto unlock;
 	}
 
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (ufid_present)
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
+	else
+		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
 	if (!flow) {
 		err = -ENOENT;
 		goto unlock;
 	}
 
 	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
-					OVS_FLOW_CMD_NEW, true);
+					OVS_FLOW_CMD_NEW, true, ufid_flags);
 	if (IS_ERR(reply)) {
 		err = PTR_ERR(reply);
 		goto unlock;
@@ -1133,13 +1221,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
 	struct sk_buff *reply;
-	struct sw_flow *flow;
+	struct sw_flow *flow = NULL;
 	struct datapath *dp;
 	struct sw_flow_match match;
+	struct sw_flow_id ufid;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 	int err;
 	bool log = !a[OVS_FLOW_ATTR_PROBE];
+	bool ufid_present;
 
-	if (likely(a[OVS_FLOW_ATTR_KEY])) {
+	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
+	if (a[OVS_FLOW_ATTR_KEY]) {
 		ovs_match_init(&match, &key, NULL);
 		err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
 					log);
@@ -1154,12 +1246,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		goto unlock;
 	}
 
-	if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
+	if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
 		err = ovs_flow_tbl_flush(&dp->table);
 		goto unlock;
 	}
 
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (ufid_present)
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
+	else
+		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
 	if (unlikely(!flow)) {
 		err = -ENOENT;
 		goto unlock;
@@ -1169,14 +1264,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	ovs_unlock();
 
 	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
-					info, false);
+					&flow->id, info, false, ufid_flags);
 	if (likely(reply)) {
 		if (likely(!IS_ERR(reply))) {
 			rcu_read_lock();	/*To keep RCU checker happy. */
 			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
 						     reply, info->snd_portid,
 						     info->snd_seq, 0,
-						     OVS_FLOW_CMD_DEL);
+						     OVS_FLOW_CMD_DEL,
+						     ufid_flags);
 			rcu_read_unlock();
 			BUG_ON(err < 0);
 
@@ -1195,9 +1291,18 @@ unlock:
 
 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct nlattr *a[__OVS_FLOW_ATTR_MAX];
 	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
 	struct table_instance *ti;
 	struct datapath *dp;
+	u32 ufid_flags;
+	int err;
+
+	err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
+			    OVS_FLOW_ATTR_MAX, flow_policy);
+	if (err)
+		return err;
+	ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 
 	rcu_read_lock();
 	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1220,7 +1325,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
 					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					   OVS_FLOW_CMD_NEW) < 0)
+					   OVS_FLOW_CMD_NEW, ufid_flags) < 0)
 			break;
 
 		cb->args[0] = bucket;
@@ -1236,6 +1341,8 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
 	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
 	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
 	[OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
+	[OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
+	[OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
 };
 
 static const struct genl_ops dp_flow_genl_ops[] = {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index d3d0a406562d..a076e445ccc2 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -197,6 +197,16 @@ struct sw_flow_match {
 	struct sw_flow_mask *mask;
 };
 
+#define MAX_UFID_LENGTH 16 /* 128 bits */
+
+struct sw_flow_id {
+	u32 ufid_len;
+	union {
+		u32 ufid[MAX_UFID_LENGTH / 4];
+		struct sw_flow_key *unmasked_key;
+	};
+};
+
 struct sw_flow_actions {
 	struct rcu_head rcu;
 	u32 actions_len;
@@ -213,13 +223,15 @@ struct flow_stats {
 
 struct sw_flow {
 	struct rcu_head rcu;
-	struct hlist_node hash_node[2];
-	u32 hash;
+	struct {
+		struct hlist_node node[2];
+		u32 hash;
+	} flow_table, ufid_table;
 	int stats_last_writer;		/* NUMA-node id of the last writer on
 					 * 'stats[0]'.
 					 */
 	struct sw_flow_key key;
-	struct sw_flow_key unmasked_key;
+	struct sw_flow_id id;
 	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
 	struct flow_stats __rcu *stats[]; /* One for each NUMA node.  First one
@@ -243,6 +255,16 @@ struct arp_eth_header {
 	unsigned char       ar_tip[4];		/* target IP address        */
 } __packed;
 
+static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
+{
+	return sfid->ufid_len;
+}
+
+static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid)
+{
+	return !ovs_identifier_is_ufid(sfid);
+}
+
 void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
 			   const struct sk_buff *);
 void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 33751f81bfcb..8b9a612b39d1 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1180,6 +1180,59 @@ free_newmask:
 	return err;
 }
 
+static size_t get_ufid_len(const struct nlattr *attr, bool log)
+{
+	size_t len;
+
+	if (!attr)
+		return 0;
+
+	len = nla_len(attr);
+	if (len < 1 || len > MAX_UFID_LENGTH) {
+		OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
+			  nla_len(attr), MAX_UFID_LENGTH);
+		return 0;
+	}
+
+	return len;
+}
+
+/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
+ * or false otherwise.
+ */
+bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
+		      bool log)
+{
+	sfid->ufid_len = get_ufid_len(attr, log);
+	if (sfid->ufid_len)
+		memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
+
+	return sfid->ufid_len;
+}
+
+int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
+			   const struct sw_flow_key *key, bool log)
+{
+	struct sw_flow_key *new_key;
+
+	if (ovs_nla_get_ufid(sfid, ufid, log))
+		return 0;
+
+	/* If UFID was not provided, use unmasked key. */
+	new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
+	if (!new_key)
+		return -ENOMEM;
+	memcpy(new_key, key, sizeof(*key));
+	sfid->unmasked_key = new_key;
+
+	return 0;
+}
+
+u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
+{
+	return attr ? nla_get_u32(attr) : 0;
+}
+
 /**
  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
  * @key: Receives extracted in_port, priority, tun_key and skb_mark.
@@ -1450,9 +1503,20 @@ int ovs_nla_put_key(const struct sw_flow_key *swkey,
 }
 
 /* Called with ovs_mutex or RCU read lock. */
-int ovs_nla_put_unmasked_key(const struct sw_flow *flow, struct sk_buff *skb)
+int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
+{
+	if (ovs_identifier_is_ufid(&flow->id))
+		return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
+			       flow->id.ufid);
+
+	return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
+			       OVS_FLOW_ATTR_KEY, false, skb);
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
 {
-	return ovs_nla_put_key(&flow->unmasked_key, &flow->unmasked_key,
+	return ovs_nla_put_key(&flow->mask->key, &flow->key,
 				OVS_FLOW_ATTR_KEY, false, skb);
 }
 
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 9ed09e66876a..5c3d75bff310 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -48,7 +48,8 @@ int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
 int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *,
 			      bool log);
 
-int ovs_nla_put_unmasked_key(const struct sw_flow *flow, struct sk_buff *skb);
+int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
+int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
 
 int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
@@ -56,6 +57,11 @@ int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
 int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
 				  const struct ovs_tunnel_info *);
 
+bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
+int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
+			   const struct sw_flow_key *key, bool log);
+u32 ovs_nla_get_ufid_flags(const struct nlattr *attr);
+
 int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key,
 			 struct sw_flow_actions **sfa, bool log);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 9a3f41f26da8..5e57628e6584 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -139,6 +139,8 @@ static void flow_free(struct sw_flow *flow)
 {
 	int node;
 
+	if (ovs_identifier_is_key(&flow->id))
+		kfree(flow->id.unmasked_key);
 	kfree((struct sw_flow_actions __force *)flow->sf_acts);
 	for_each_node(node)
 		if (flow->stats[node])
@@ -200,18 +202,28 @@ static struct table_instance *table_instance_alloc(int new_size)
 
 int ovs_flow_tbl_init(struct flow_table *table)
 {
-	struct table_instance *ti;
+	struct table_instance *ti, *ufid_ti;
 
 	ti = table_instance_alloc(TBL_MIN_BUCKETS);
 
 	if (!ti)
 		return -ENOMEM;
 
+	ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+	if (!ufid_ti)
+		goto free_ti;
+
 	rcu_assign_pointer(table->ti, ti);
+	rcu_assign_pointer(table->ufid_ti, ufid_ti);
 	INIT_LIST_HEAD(&table->mask_list);
 	table->last_rehash = jiffies;
 	table->count = 0;
+	table->ufid_count = 0;
 	return 0;
+
+free_ti:
+	__table_instance_destroy(ti);
+	return -ENOMEM;
 }
 
 static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
@@ -221,13 +233,16 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
 	__table_instance_destroy(ti);
 }
 
-static void table_instance_destroy(struct table_instance *ti, bool deferred)
+static void table_instance_destroy(struct table_instance *ti,
+				   struct table_instance *ufid_ti,
+				   bool deferred)
 {
 	int i;
 
 	if (!ti)
 		return;
 
+	BUG_ON(!ufid_ti);
 	if (ti->keep_flows)
 		goto skip_flows;
 
@@ -236,18 +251,24 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred)
 		struct hlist_head *head = flex_array_get(ti->buckets, i);
 		struct hlist_node *n;
 		int ver = ti->node_ver;
+		int ufid_ver = ufid_ti->node_ver;
 
-		hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
-			hlist_del_rcu(&flow->hash_node[ver]);
+		hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
+			hlist_del_rcu(&flow->flow_table.node[ver]);
+			if (ovs_identifier_is_ufid(&flow->id))
+				hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
 			ovs_flow_free(flow, deferred);
 		}
 	}
 
 skip_flows:
-	if (deferred)
+	if (deferred) {
 		call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
-	else
+		call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
+	} else {
 		__table_instance_destroy(ti);
+		__table_instance_destroy(ufid_ti);
+	}
 }
 
 /* No need for locking this function is called from RCU callback or
@@ -256,8 +277,9 @@ skip_flows:
 void ovs_flow_tbl_destroy(struct flow_table *table)
 {
 	struct table_instance *ti = rcu_dereference_raw(table->ti);
+	struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
 
-	table_instance_destroy(ti, false);
+	table_instance_destroy(ti, ufid_ti, false);
 }
 
 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -272,7 +294,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
 	while (*bucket < ti->n_buckets) {
 		i = 0;
 		head = flex_array_get(ti->buckets, *bucket);
-		hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
+		hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) {
 			if (i < *last) {
 				i++;
 				continue;
@@ -294,16 +316,26 @@ static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
 				(hash & (ti->n_buckets - 1)));
 }
 
-static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow)
+static void table_instance_insert(struct table_instance *ti,
+				  struct sw_flow *flow)
 {
 	struct hlist_head *head;
 
-	head = find_bucket(ti, flow->hash);
-	hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head);
+	head = find_bucket(ti, flow->flow_table.hash);
+	hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head);
+}
+
+static void ufid_table_instance_insert(struct table_instance *ti,
+				       struct sw_flow *flow)
+{
+	struct hlist_head *head;
+
+	head = find_bucket(ti, flow->ufid_table.hash);
+	hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);
 }
 
 static void flow_table_copy_flows(struct table_instance *old,
-				  struct table_instance *new)
+				  struct table_instance *new, bool ufid)
 {
 	int old_ver;
 	int i;
@@ -318,15 +350,21 @@ static void flow_table_copy_flows(struct table_instance *old,
 
 		head = flex_array_get(old->buckets, i);
 
-		hlist_for_each_entry(flow, head, hash_node[old_ver])
-			table_instance_insert(new, flow);
+		if (ufid)
+			hlist_for_each_entry(flow, head,
+					     ufid_table.node[old_ver])
+				ufid_table_instance_insert(new, flow);
+		else
+			hlist_for_each_entry(flow, head,
+					     flow_table.node[old_ver])
+				table_instance_insert(new, flow);
 	}
 
 	old->keep_flows = true;
 }
 
 static struct table_instance *table_instance_rehash(struct table_instance *ti,
-					    int n_buckets)
+						    int n_buckets, bool ufid)
 {
 	struct table_instance *new_ti;
 
@@ -334,27 +372,38 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti,
 	if (!new_ti)
 		return NULL;
 
-	flow_table_copy_flows(ti, new_ti);
+	flow_table_copy_flows(ti, new_ti, ufid);
 
 	return new_ti;
 }
 
 int ovs_flow_tbl_flush(struct flow_table *flow_table)
 {
-	struct table_instance *old_ti;
-	struct table_instance *new_ti;
+	struct table_instance *old_ti, *new_ti;
+	struct table_instance *old_ufid_ti, *new_ufid_ti;
 
-	old_ti = ovsl_dereference(flow_table->ti);
 	new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
 	if (!new_ti)
 		return -ENOMEM;
+	new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+	if (!new_ufid_ti)
+		goto err_free_ti;
+
+	old_ti = ovsl_dereference(flow_table->ti);
+	old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
 
 	rcu_assign_pointer(flow_table->ti, new_ti);
+	rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
 	flow_table->last_rehash = jiffies;
 	flow_table->count = 0;
+	flow_table->ufid_count = 0;
 
-	table_instance_destroy(old_ti, true);
+	table_instance_destroy(old_ti, old_ufid_ti, true);
 	return 0;
+
+err_free_ti:
+	__table_instance_destroy(new_ti);
+	return -ENOMEM;
 }
 
 static u32 flow_hash(const struct sw_flow_key *key,
@@ -402,14 +451,15 @@ static bool flow_cmp_masked_key(const struct sw_flow *flow,
 	return cmp_key(&flow->key, key, range->start, range->end);
 }
 
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
-			       const struct sw_flow_match *match)
+static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+				      const struct sw_flow_match *match)
 {
 	struct sw_flow_key *key = match->key;
 	int key_start = flow_key_start(key);
 	int key_end = match->range.end;
 
-	return cmp_key(&flow->unmasked_key, key, key_start, key_end);
+	BUG_ON(ovs_identifier_is_ufid(&flow->id));
+	return cmp_key(flow->id.unmasked_key, key, key_start, key_end);
 }
 
 static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
@@ -424,8 +474,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
 	ovs_flow_mask_key(&masked_key, unmasked, mask);
 	hash = flow_hash(&masked_key, &mask->range);
 	head = find_bucket(ti, hash);
-	hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
-		if (flow->mask == mask && flow->hash == hash &&
+	hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
+		if (flow->mask == mask && flow->flow_table.hash == hash &&
 		    flow_cmp_masked_key(flow, &masked_key, &mask->range))
 			return flow;
 	}
@@ -468,7 +518,48 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
 	/* Always called under ovs-mutex. */
 	list_for_each_entry(mask, &tbl->mask_list, list) {
 		flow = masked_flow_lookup(ti, match->key, mask);
-		if (flow && ovs_flow_cmp_unmasked_key(flow, match))  /* Found */
+		if (flow && ovs_identifier_is_key(&flow->id) &&
+		    ovs_flow_cmp_unmasked_key(flow, match))
+			return flow;
+	}
+	return NULL;
+}
+
+static u32 ufid_hash(const struct sw_flow_id *sfid)
+{
+	return jhash(sfid->ufid, sfid->ufid_len, 0);
+}
+
+static bool ovs_flow_cmp_ufid(const struct sw_flow *flow,
+			      const struct sw_flow_id *sfid)
+{
+	if (flow->id.ufid_len != sfid->ufid_len)
+		return false;
+
+	return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len);
+}
+
+bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match)
+{
+	if (ovs_identifier_is_ufid(&flow->id))
+		return flow_cmp_masked_key(flow, match->key, &match->range);
+
+	return ovs_flow_cmp_unmasked_key(flow, match);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
+					 const struct sw_flow_id *ufid)
+{
+	struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
+	struct sw_flow *flow;
+	struct hlist_head *head;
+	u32 hash;
+
+	hash = ufid_hash(ufid);
+	head = find_bucket(ti, hash);
+	hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) {
+		if (flow->ufid_table.hash == hash &&
+		    ovs_flow_cmp_ufid(flow, ufid))
 			return flow;
 	}
 	return NULL;
@@ -485,9 +576,10 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
 	return num;
 }
 
-static struct table_instance *table_instance_expand(struct table_instance *ti)
+static struct table_instance *table_instance_expand(struct table_instance *ti,
+						    bool ufid)
 {
-	return table_instance_rehash(ti, ti->n_buckets * 2);
+	return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
 }
 
 /* Remove 'mask' from the mask list, if it is not needed any more. */
@@ -512,10 +604,15 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
 {
 	struct table_instance *ti = ovsl_dereference(table->ti);
+	struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
 
 	BUG_ON(table->count == 0);
-	hlist_del_rcu(&flow->hash_node[ti->node_ver]);
+	hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
 	table->count--;
+	if (ovs_identifier_is_ufid(&flow->id)) {
+		hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
+		table->ufid_count--;
+	}
 
 	/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
 	 * accessible as long as the RCU read lock is held.
@@ -589,24 +686,46 @@ static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
 	struct table_instance *new_ti = NULL;
 	struct table_instance *ti;
 
-	flow->hash = flow_hash(&flow->key, &flow->mask->range);
+	flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);
 	ti = ovsl_dereference(table->ti);
 	table_instance_insert(ti, flow);
 	table->count++;
 
 	/* Expand table, if necessary, to make room. */
 	if (table->count > ti->n_buckets)
-		new_ti = table_instance_expand(ti);
+		new_ti = table_instance_expand(ti, false);
 	else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
-		new_ti = table_instance_rehash(ti, ti->n_buckets);
+		new_ti = table_instance_rehash(ti, ti->n_buckets, false);
 
 	if (new_ti) {
 		rcu_assign_pointer(table->ti, new_ti);
-		table_instance_destroy(ti, true);
+		call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
 		table->last_rehash = jiffies;
 	}
 }
 
+/* Must be called with OVS mutex held. */
+static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
+{
+	struct table_instance *ti;
+
+	flow->ufid_table.hash = ufid_hash(&flow->id);
+	ti = ovsl_dereference(table->ufid_ti);
+	ufid_table_instance_insert(ti, flow);
+	table->ufid_count++;
+
+	/* Expand table, if necessary, to make room. */
+	if (table->ufid_count > ti->n_buckets) {
+		struct table_instance *new_ti;
+
+		new_ti = table_instance_expand(ti, true);
+		if (new_ti) {
+			rcu_assign_pointer(table->ufid_ti, new_ti);
+			call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
+		}
+	}
+}
+
 /* Must be called with OVS mutex held. */
 int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 			const struct sw_flow_mask *mask)
@@ -617,6 +736,8 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 	if (err)
 		return err;
 	flow_key_insert(table, flow);
+	if (ovs_identifier_is_ufid(&flow->id))
+		flow_ufid_insert(table, flow);
 
 	return 0;
 }
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 309fa6415689..616eda10d955 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -47,9 +47,11 @@ struct table_instance {
 
 struct flow_table {
 	struct table_instance __rcu *ti;
+	struct table_instance __rcu *ufid_ti;
 	struct list_head mask_list;
 	unsigned long last_rehash;
 	unsigned int count;
+	unsigned int ufid_count;
 };
 
 extern struct kmem_cache *flow_stats_cache;
@@ -78,8 +80,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
 				    const struct sw_flow_key *);
 struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
 					  const struct sw_flow_match *match);
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
-			       const struct sw_flow_match *match);
+struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *,
+					 const struct sw_flow_id *);
+
+bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *);
 
 void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
 		       const struct sw_flow_mask *mask);
-- 
cgit v1.2.3-71-gd317


From 5929b8a38ce02b7769e8b79605a8690f442a2933 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Mon, 26 Jan 2015 18:05:22 +0100
Subject: PCI: Add defines for PCIe Max_Read_Request_Size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are a few drivers using magic numbers when operating with PCIe
capabilities and PCI_EXP_DEVCTL_READRQ.  Define known values to allow
cleaning their code a bit.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/uapi/linux/pci_regs.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 4a1d0cc38ff2..efe3443572ba 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -451,6 +451,10 @@
 #define  PCI_EXP_DEVCTL_AUX_PME	0x0400	/* Auxiliary Power PM Enable */
 #define  PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800  /* Enable No Snoop */
 #define  PCI_EXP_DEVCTL_READRQ	0x7000	/* Max_Read_Request_Size */
+#define  PCI_EXP_DEVCTL_READRQ_128B  0x0000 /* 128 Bytes */
+#define  PCI_EXP_DEVCTL_READRQ_256B  0x1000 /* 256 Bytes */
+#define  PCI_EXP_DEVCTL_READRQ_512B  0x2000 /* 512 Bytes */
+#define  PCI_EXP_DEVCTL_READRQ_1024B 0x3000 /* 1024 Bytes */
 #define  PCI_EXP_DEVCTL_BCR_FLR 0x8000  /* Bridge Configuration Retry / FLR */
 #define PCI_EXP_DEVSTA		10	/* Device Status */
 #define  PCI_EXP_DEVSTA_CED	0x0001	/* Correctable Error Detected */
-- 
cgit v1.2.3-71-gd317


From 5e33f6fdf735cda1d4580fe6f1878da05718fe73 Mon Sep 17 00:00:00 2001
From: Robert Baldyga <r.baldyga@samsung.com>
Date: Fri, 23 Jan 2015 13:41:01 +0100
Subject: usb: gadget: ffs: add eventfd notification about ffs events

Add eventfd which notifies userspace about ep0 events and AIO completion
events. It simplifies using of FunctionFS with event loop, because now
we need to poll on single file (instead of polling on ep0 and eventfd's
supplied to AIO layer).

FunctionFS eventfd is not triggered if another eventfd is supplied to
AIO layer (in AIO request). It can be useful, for example, when we want
to handle AIO transations for chosen endpoint in separate thread.

Signed-off-by: Robert Baldyga <r.baldyga@samsung.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/function/f_fs.c  | 29 ++++++++++++++++++++++++++++-
 drivers/usb/gadget/function/u_fs.h  |  1 +
 include/uapi/linux/usb/functionfs.h |  1 +
 3 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 14e44d7083a1..af98b096af2f 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -31,6 +31,7 @@
 #include <linux/aio.h>
 #include <linux/mmu_context.h>
 #include <linux/poll.h>
+#include <linux/eventfd.h>
 
 #include "u_fs.h"
 #include "u_f.h"
@@ -153,6 +154,8 @@ struct ffs_io_data {
 
 	struct usb_ep *ep;
 	struct usb_request *req;
+
+	struct ffs_data *ffs;
 };
 
 struct ffs_desc_helper {
@@ -674,6 +677,9 @@ static void ffs_user_copy_worker(struct work_struct *work)
 
 	aio_complete(io_data->kiocb, ret, ret);
 
+	if (io_data->ffs->ffs_eventfd && !io_data->kiocb->ki_eventfd)
+		eventfd_signal(io_data->ffs->ffs_eventfd, 1);
+
 	usb_ep_free_request(io_data->ep, io_data->req);
 
 	io_data->kiocb->private = NULL;
@@ -827,6 +833,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
 			io_data->buf = data;
 			io_data->ep = ep->ep;
 			io_data->req = req;
+			io_data->ffs = epfile->ffs;
 
 			req->context  = io_data;
 			req->complete = ffs_epfile_async_io_complete;
@@ -1510,6 +1517,9 @@ static void ffs_data_clear(struct ffs_data *ffs)
 	if (ffs->epfiles)
 		ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count);
 
+	if (ffs->ffs_eventfd)
+		eventfd_ctx_put(ffs->ffs_eventfd);
+
 	kfree(ffs->raw_descs_data);
 	kfree(ffs->raw_strings);
 	kfree(ffs->stringtabs);
@@ -2169,7 +2179,8 @@ static int __ffs_data_got_descs(struct ffs_data *ffs,
 			      FUNCTIONFS_HAS_HS_DESC |
 			      FUNCTIONFS_HAS_SS_DESC |
 			      FUNCTIONFS_HAS_MS_OS_DESC |
-			      FUNCTIONFS_VIRTUAL_ADDR)) {
+			      FUNCTIONFS_VIRTUAL_ADDR |
+			      FUNCTIONFS_EVENTFD)) {
 			ret = -ENOSYS;
 			goto error;
 		}
@@ -2180,6 +2191,20 @@ static int __ffs_data_got_descs(struct ffs_data *ffs,
 		goto error;
 	}
 
+	if (flags & FUNCTIONFS_EVENTFD) {
+		if (len < 4)
+			goto error;
+		ffs->ffs_eventfd =
+			eventfd_ctx_fdget((int)get_unaligned_le32(data));
+		if (IS_ERR(ffs->ffs_eventfd)) {
+			ret = PTR_ERR(ffs->ffs_eventfd);
+			ffs->ffs_eventfd = NULL;
+			goto error;
+		}
+		data += 4;
+		len  -= 4;
+	}
+
 	/* Read fs_count, hs_count and ss_count (if present) */
 	for (i = 0; i < 3; ++i) {
 		if (!(flags & (1 << i))) {
@@ -2454,6 +2479,8 @@ static void __ffs_event_add(struct ffs_data *ffs,
 	pr_vdebug("adding event %d\n", type);
 	ffs->ev.types[ffs->ev.count++] = type;
 	wake_up_locked(&ffs->ev.waitq);
+	if (ffs->ffs_eventfd)
+		eventfd_signal(ffs->ffs_eventfd, 1);
 }
 
 static void ffs_event_add(struct ffs_data *ffs,
diff --git a/drivers/usb/gadget/function/u_fs.h b/drivers/usb/gadget/function/u_fs.h
index 284a1f00a980..60139854e0b1 100644
--- a/drivers/usb/gadget/function/u_fs.h
+++ b/drivers/usb/gadget/function/u_fs.h
@@ -272,6 +272,7 @@ struct ffs_data {
 		kgid_t				gid;
 	}				file_perms;
 
+	struct eventfd_ctx *ffs_eventfd;
 	bool no_disconnect;
 	struct work_struct reset_work;
 
diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 295ba299e7bd..108dd7997014 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -20,6 +20,7 @@ enum functionfs_flags {
 	FUNCTIONFS_HAS_SS_DESC = 4,
 	FUNCTIONFS_HAS_MS_OS_DESC = 8,
 	FUNCTIONFS_VIRTUAL_ADDR = 16,
+	FUNCTIONFS_EVENTFD = 32,
 };
 
 /* Descriptor of an non-audio endpoint */
-- 
cgit v1.2.3-71-gd317


From 4967082b469320eeba54ffbca632af1962858fb7 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 26 Jan 2015 14:10:53 +0100
Subject: vxlan: advertise link netns in fdb messages

Previous commit is based on a wrong assumption, fdb messages are always sent
into the netns where the interface stands (see vxlan_fdb_notify()).

These fdb messages doesn't embed the rtnl attribute IFLA_LINK_NETNSID, thus we
need to add it (useful to interpret NDA_IFINDEX or NDA_DST for example).

Note also that vxlan_nlmsg_size() was not updated.

Fixes: 193523bf9373 ("vxlan: advertise netns of vxlan dev in fdb msg")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c            | 5 +++--
 include/uapi/linux/neighbour.h | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 87736e65cd15..31bac2a21ce3 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -341,8 +341,8 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	ndm->ndm_type = RTN_UNICAST;
 
 	if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
-	    nla_put_s32(skb, NDA_NDM_IFINDEX_NETNSID,
-			peernet2id(vxlan->net, dev_net(vxlan->dev))))
+	    nla_put_s32(skb, NDA_LINK_NETNSID,
+			peernet2id(dev_net(vxlan->dev), vxlan->net)))
 		goto nla_put_failure;
 
 	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
@@ -385,6 +385,7 @@ static inline size_t vxlan_nlmsg_size(void)
 		+ nla_total_size(sizeof(__be16)) /* NDA_PORT */
 		+ nla_total_size(sizeof(__be32)) /* NDA_VNI */
 		+ nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
+		+ nla_total_size(sizeof(__s32)) /* NDA_LINK_NETNSID */
 		+ nla_total_size(sizeof(struct nda_cacheinfo));
 }
 
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 38f236853cc0..3873a35509aa 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -25,7 +25,7 @@ enum {
 	NDA_VNI,
 	NDA_IFINDEX,
 	NDA_MASTER,
-	NDA_NDM_IFINDEX_NETNSID,
+	NDA_LINK_NETNSID,
 	__NDA_MAX
 };
 
-- 
cgit v1.2.3-71-gd317


From 500d4160abe9a2e88b12e319c13ae3ebd1e18108 Mon Sep 17 00:00:00 2001
From: Ping Cheng <pinglinux@gmail.com>
Date: Tue, 27 Jan 2015 13:30:03 -0800
Subject: HID: wacom: add support for Cintiq 27QHD and 27QHD touch

These devices have accelerometers. To report accelerometer coordinates, a new
property, INPUT_PROP_ACCELEROMETER, is added.

Signed-off-by: Ping Cheng <pingc@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_sys.c    |  3 ++
 drivers/hid/wacom_wac.c    | 87 ++++++++++++++++++++++++++++++++++++++--------
 drivers/hid/wacom_wac.h    |  7 +++-
 include/uapi/linux/input.h |  1 +
 4 files changed, 83 insertions(+), 15 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index f01ab3a0c5f5..f0568a7e6de9 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -403,6 +403,9 @@ static int wacom_query_tablet_data(struct hid_device *hdev,
 		else if (features->type == WACOM_24HDT || features->type == CINTIQ_HYBRID) {
 			return wacom_set_device_mode(hdev, 18, 3, 2);
 		}
+		else if (features->type == WACOM_27QHDT) {
+			return wacom_set_device_mode(hdev, 131, 3, 2);
+		}
 	} else if (features->device_type == BTN_TOOL_PEN) {
 		if (features->type <= BAMBOO_PT && features->type != WIRELESS) {
 			return wacom_set_device_mode(hdev, 2, 2, 2);
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index d239d82a1f90..1a6507999a65 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -656,6 +656,8 @@ static int wacom_intuos_irq(struct wacom_wac *wacom)
 	    data[0] != WACOM_REPORT_INTUOSREAD &&
 	    data[0] != WACOM_REPORT_INTUOSWRITE &&
 	    data[0] != WACOM_REPORT_INTUOSPAD &&
+	    data[0] != WACOM_REPORT_CINTIQ &&
+	    data[0] != WACOM_REPORT_CINTIQPAD &&
 	    data[0] != WACOM_REPORT_INTUOS5PAD) {
 		dev_dbg(input->dev.parent,
 			"%s: received unknown report #%d\n", __func__, data[0]);
@@ -667,7 +669,8 @@ static int wacom_intuos_irq(struct wacom_wac *wacom)
 		idx = data[1] & 0x01;
 
 	/* pad packets. Works as a second tool and is always in prox */
-	if (data[0] == WACOM_REPORT_INTUOSPAD || data[0] == WACOM_REPORT_INTUOS5PAD) {
+	if (data[0] == WACOM_REPORT_INTUOSPAD || data[0] == WACOM_REPORT_INTUOS5PAD ||
+	    data[0] == WACOM_REPORT_CINTIQPAD) {
 		input = wacom->pad_input;
 		if (features->type >= INTUOS4S && features->type <= INTUOS4L) {
 			input_report_key(input, BTN_0, (data[2] & 0x01));
@@ -767,6 +770,14 @@ static int wacom_intuos_irq(struct wacom_wac *wacom)
 			} else {
 				input_report_abs(input, ABS_MISC, 0);
 			}
+		} else if (features->type == WACOM_27QHD) {
+			input_report_key(input, KEY_PROG1, data[2] & 0x01);
+			input_report_key(input, KEY_PROG2, data[2] & 0x02);
+			input_report_key(input, KEY_PROG3, data[2] & 0x04);
+
+			input_report_abs(input, ABS_X, be16_to_cpup((__be16 *)&data[4]));
+			input_report_abs(input, ABS_Y, be16_to_cpup((__be16 *)&data[6]));
+			input_report_abs(input, ABS_Z, be16_to_cpup((__be16 *)&data[8]));
 		} else if (features->type == CINTIQ_HYBRID) {
 			/*
 			 * Do not send hardware buttons under Android. They
@@ -1027,8 +1038,20 @@ static int wacom_24hdt_irq(struct wacom_wac *wacom)
 	struct input_dev *input = wacom->input;
 	unsigned char *data = wacom->data;
 	int i;
-	int current_num_contacts = data[61];
+	int current_num_contacts = 0;
 	int contacts_to_send = 0;
+	int num_contacts_left = 4; /* maximum contacts per packet */
+	int byte_per_packet = WACOM_BYTES_PER_24HDT_PACKET;
+	int y_offset = 2;
+
+	if (wacom->features.type == WACOM_27QHDT) {
+		current_num_contacts = data[63];
+		num_contacts_left = 10;
+		byte_per_packet = WACOM_BYTES_PER_QHDTHID_PACKET;
+		y_offset = 0;
+	} else {
+		current_num_contacts = data[61];
+	}
 
 	/*
 	 * First packet resets the counter since only the first
@@ -1037,11 +1060,10 @@ static int wacom_24hdt_irq(struct wacom_wac *wacom)
 	if (current_num_contacts)
 		wacom->num_contacts_left = current_num_contacts;
 
-	/* There are at most 4 contacts per packet */
-	contacts_to_send = min(4, wacom->num_contacts_left);
+	contacts_to_send = min(num_contacts_left, wacom->num_contacts_left);
 
 	for (i = 0; i < contacts_to_send; i++) {
-		int offset = (WACOM_BYTES_PER_24HDT_PACKET * i) + 1;
+		int offset = (byte_per_packet * i) + 1;
 		bool touch = (data[offset] & 0x1) && !wacom->shared->stylus_in_proximity;
 		int slot = input_mt_get_slot_by_key(input, data[offset + 1]);
 
@@ -1052,18 +1074,23 @@ static int wacom_24hdt_irq(struct wacom_wac *wacom)
 
 		if (touch) {
 			int t_x = get_unaligned_le16(&data[offset + 2]);
-			int c_x = get_unaligned_le16(&data[offset + 4]);
-			int t_y = get_unaligned_le16(&data[offset + 6]);
-			int c_y = get_unaligned_le16(&data[offset + 8]);
-			int w = get_unaligned_le16(&data[offset + 10]);
-			int h = get_unaligned_le16(&data[offset + 12]);
+			int t_y = get_unaligned_le16(&data[offset + 4 + y_offset]);
 
 			input_report_abs(input, ABS_MT_POSITION_X, t_x);
 			input_report_abs(input, ABS_MT_POSITION_Y, t_y);
-			input_report_abs(input, ABS_MT_TOUCH_MAJOR, min(w,h));
-			input_report_abs(input, ABS_MT_WIDTH_MAJOR, min(w, h) + int_dist(t_x, t_y, c_x, c_y));
-			input_report_abs(input, ABS_MT_WIDTH_MINOR, min(w, h));
-			input_report_abs(input, ABS_MT_ORIENTATION, w > h);
+
+			if (wacom->features.type != WACOM_27QHDT) {
+				int c_x = get_unaligned_le16(&data[offset + 4]);
+				int c_y = get_unaligned_le16(&data[offset + 8]);
+				int w = get_unaligned_le16(&data[offset + 10]);
+				int h = get_unaligned_le16(&data[offset + 12]);
+
+				input_report_abs(input, ABS_MT_TOUCH_MAJOR, min(w,h));
+				input_report_abs(input, ABS_MT_WIDTH_MAJOR,
+						 min(w, h) + int_dist(t_x, t_y, c_x, c_y));
+				input_report_abs(input, ABS_MT_WIDTH_MINOR, min(w, h));
+				input_report_abs(input, ABS_MT_ORIENTATION, w > h);
+			}
 		}
 	}
 	input_mt_report_pointer_emulation(input, true);
@@ -1894,6 +1921,7 @@ void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len)
 	case WACOM_21UX2:
 	case WACOM_22HD:
 	case WACOM_24HD:
+	case WACOM_27QHD:
 	case DTK:
 	case CINTIQ_HYBRID:
 		sync = wacom_intuos_irq(wacom_wac);
@@ -1904,6 +1932,7 @@ void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len)
 		break;
 
 	case WACOM_24HDT:
+	case WACOM_27QHDT:
 		sync = wacom_24hdt_irq(wacom_wac);
 		break;
 
@@ -2115,6 +2144,7 @@ int wacom_setup_pentouch_input_capabilities(struct input_dev *input_dev,
 		__set_bit(INPUT_PROP_POINTER, input_dev->propbit);
 		break;
 
+	case WACOM_27QHD:
 	case WACOM_24HD:
 	case DTK:
 	case WACOM_22HD:
@@ -2183,6 +2213,7 @@ int wacom_setup_pentouch_input_capabilities(struct input_dev *input_dev,
 		}
 		/* fall through */
 
+	case WACOM_27QHDT:
 	case MTSCREEN:
 	case MTTPC:
 	case MTTPC_B:
@@ -2330,6 +2361,19 @@ int wacom_setup_pad_input_capabilities(struct input_dev *input_dev,
 		input_set_abs_params(input_dev, ABS_THROTTLE, 0, 71, 0, 0);
 		break;
 
+	case WACOM_27QHD:
+		__set_bit(KEY_PROG1, input_dev->keybit);
+		__set_bit(KEY_PROG2, input_dev->keybit);
+		__set_bit(KEY_PROG3, input_dev->keybit);
+		input_set_abs_params(input_dev, ABS_X, -2048, 2048, 0, 0);
+		input_abs_set_res(input_dev, ABS_X, 1024); /* points/g */
+		input_set_abs_params(input_dev, ABS_Y, -2048, 2048, 0, 0);
+		input_abs_set_res(input_dev, ABS_Y, 1024);
+		input_set_abs_params(input_dev, ABS_Z, -2048, 2048, 0, 0);
+		input_abs_set_res(input_dev, ABS_Z, 1024);
+		__set_bit(INPUT_PROP_ACCELEROMETER, input_dev->propbit);
+		break;
+
 	case DTK:
 		for (i = 0; i < 6; i++)
 			__set_bit(BTN_0 + i, input_dev->keybit);
@@ -2680,6 +2724,18 @@ static const struct wacom_features wacom_features_0xF6 =
 	{ "Wacom Cintiq 24HD touch", .type = WACOM_24HDT, /* Touch */
 	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0xf8, .touch_max = 10,
 	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
+static const struct wacom_features wacom_features_0x32A =
+	{ "Wacom Cintiq 27QHD", 119740, 67520, 2047,
+	  63, WACOM_27QHD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES,
+	  WACOM_27QHD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES };
+static const struct wacom_features wacom_features_0x32B =
+	{ "Wacom Cintiq 27QHD touch", 119740, 67520, 2047, 63,
+	  WACOM_27QHD, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES,
+	  WACOM_CINTIQ_OFFSET, WACOM_CINTIQ_OFFSET,
+	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x32C };
+static const struct wacom_features wacom_features_0x32C =
+	{ "Wacom Cintiq 27QHD touch", .type = WACOM_27QHDT,
+	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x32B, .touch_max = 10 };
 static const struct wacom_features wacom_features_0x3F =
 	{ "Wacom Cintiq 21UX", 87200, 65600, 1023, 63,
 	  CINTIQ, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES };
@@ -3046,6 +3102,9 @@ const struct hid_device_id wacom_ids[] = {
 	{ USB_DEVICE_WACOM(0x315) },
 	{ USB_DEVICE_WACOM(0x317) },
 	{ USB_DEVICE_WACOM(0x323) },
+	{ USB_DEVICE_WACOM(0x32A) },
+	{ USB_DEVICE_WACOM(0x32B) },
+	{ USB_DEVICE_WACOM(0x32C) },
 	{ USB_DEVICE_WACOM(0x32F) },
 	{ USB_DEVICE_WACOM(0x4001) },
 	{ USB_DEVICE_WACOM(0x4004) },
diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h
index 72e78cc18933..021ee1c1980a 100644
--- a/drivers/hid/wacom_wac.h
+++ b/drivers/hid/wacom_wac.h
@@ -13,7 +13,7 @@
 #include <linux/hid.h>
 
 /* maximum packet length for USB devices */
-#define WACOM_PKGLEN_MAX	68
+#define WACOM_PKGLEN_MAX	192
 
 #define WACOM_NAME_MAX		64
 
@@ -37,6 +37,7 @@
 /* wacom data size per MT contact */
 #define WACOM_BYTES_PER_MT_PACKET	11
 #define WACOM_BYTES_PER_24HDT_PACKET	14
+#define WACOM_BYTES_PER_QHDTHID_PACKET	 6
 
 /* device IDs */
 #define STYLUS_DEVICE_ID	0x02
@@ -58,6 +59,8 @@
 #define WACOM_REPORT_TPCMT		13
 #define WACOM_REPORT_TPCMT2		3
 #define WACOM_REPORT_TPCHID		15
+#define WACOM_REPORT_CINTIQ		16
+#define WACOM_REPORT_CINTIQPAD		17
 #define WACOM_REPORT_TPCST		16
 #define WACOM_REPORT_DTUS		17
 #define WACOM_REPORT_TPC1FGE		18
@@ -109,6 +112,7 @@ enum {
 	WACOM_22HD,
 	DTK,
 	WACOM_24HD,
+	WACOM_27QHD,
 	CINTIQ_HYBRID,
 	CINTIQ,
 	WACOM_BEE,
@@ -117,6 +121,7 @@ enum {
 	WIRELESS,
 	BAMBOO_PT,
 	WACOM_24HDT,
+	WACOM_27QHDT,
 	TABLETPC,   /* add new TPC below */
 	TABLETPCE,
 	TABLETPC2FG,
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index a1d7e931ab72..b0a813079852 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -166,6 +166,7 @@ struct input_keymap_entry {
 #define INPUT_PROP_SEMI_MT		0x03	/* touch rectangle only */
 #define INPUT_PROP_TOPBUTTONPAD		0x04	/* softbuttons at top of pad */
 #define INPUT_PROP_POINTING_STICK	0x05	/* is a pointing stick */
+#define INPUT_PROP_ACCELEROMETER	0x06	/* has accelerometer */
 
 #define INPUT_PROP_MAX			0x1f
 #define INPUT_PROP_CNT			(INPUT_PROP_MAX + 1)
-- 
cgit v1.2.3-71-gd317


From a12c6b861fab9229f002dc2eddc0aee988170e2b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 29 Jan 2015 16:52:16 +0100
Subject: nl80211: don't document per-wiphy interface dump

Such a feature doesn't exist and isn't really needed since you
probably won't have enough interfaces to make it worthwhile, so
just remove that from the documentation.

Reported-by: booto [on IRC]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1cbc3aae425c..68b294e83944 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -180,8 +180,8 @@
  *	%NL80211_ATTR_WIPHY and %NL80211_ATTR_WIPHY_NAME.
  *
  * @NL80211_CMD_GET_INTERFACE: Request an interface's configuration;
- *	either a dump request on a %NL80211_ATTR_WIPHY or a specific get
- *	on an %NL80211_ATTR_IFINDEX is supported.
+ *	either a dump request for all interfaces or a specific get with a
+ *	single %NL80211_ATTR_IFINDEX is supported.
  * @NL80211_CMD_SET_INTERFACE: Set type of a virtual interface, requires
  *	%NL80211_ATTR_IFINDEX and %NL80211_ATTR_IFTYPE.
  * @NL80211_CMD_NEW_INTERFACE: Newly created virtual interface or response
-- 
cgit v1.2.3-71-gd317


From 3f2cee73b650921b2e214bf487b2061a1c266504 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 29 Jan 2015 11:29:13 -0500
Subject: USB: usbfs: allow URBs to be reaped after disconnection

The usbfs API has a peculiar hole: Users are not allowed to reap their
URBs after the device has been disconnected.  There doesn't seem to be
any good reason for this; it is an ad-hoc inconsistency.

The patch allows users to issue the USBDEVFS_REAPURB and
USBDEVFS_REAPURBNDELAY ioctls (together with their 32-bit counterparts
on 64-bit systems) even after the device is gone.  If no URBs are
pending for a disconnected device then the ioctls will return -ENODEV
rather than -EAGAIN, because obviously no new URBs will ever be able
to complete.

The patch also adds a new capability flag for
USBDEVFS_GET_CAPABILITIES to indicate that the reap-after-disconnect
feature is supported.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Chris Dickens <christopher.a.dickens@gmail.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
---
 drivers/usb/core/devio.c          | 63 +++++++++++++++++++++++----------------
 include/uapi/linux/usbdevice_fs.h |  3 +-
 2 files changed, 39 insertions(+), 27 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 0b59731c3021..66abdbcfbfa5 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1689,7 +1689,7 @@ static struct async *reap_as(struct usb_dev_state *ps)
 	for (;;) {
 		__set_current_state(TASK_INTERRUPTIBLE);
 		as = async_getcompleted(ps);
-		if (as)
+		if (as || !connected(ps))
 			break;
 		if (signal_pending(current))
 			break;
@@ -1712,7 +1712,7 @@ static int proc_reapurb(struct usb_dev_state *ps, void __user *arg)
 	}
 	if (signal_pending(current))
 		return -EINTR;
-	return -EIO;
+	return -ENODEV;
 }
 
 static int proc_reapurbnonblock(struct usb_dev_state *ps, void __user *arg)
@@ -1721,10 +1721,11 @@ static int proc_reapurbnonblock(struct usb_dev_state *ps, void __user *arg)
 	struct async *as;
 
 	as = async_getcompleted(ps);
-	retval = -EAGAIN;
 	if (as) {
 		retval = processcompl(as, (void __user * __user *)arg);
 		free_async(as);
+	} else {
+		retval = (connected(ps) ? -EAGAIN : -ENODEV);
 	}
 	return retval;
 }
@@ -1854,7 +1855,7 @@ static int proc_reapurb_compat(struct usb_dev_state *ps, void __user *arg)
 	}
 	if (signal_pending(current))
 		return -EINTR;
-	return -EIO;
+	return -ENODEV;
 }
 
 static int proc_reapurbnonblock_compat(struct usb_dev_state *ps, void __user *arg)
@@ -1862,11 +1863,12 @@ static int proc_reapurbnonblock_compat(struct usb_dev_state *ps, void __user *ar
 	int retval;
 	struct async *as;
 
-	retval = -EAGAIN;
 	as = async_getcompleted(ps);
 	if (as) {
 		retval = processcompl_compat(as, (void __user * __user *)arg);
 		free_async(as);
+	} else {
+		retval = (connected(ps) ? -EAGAIN : -ENODEV);
 	}
 	return retval;
 }
@@ -2038,7 +2040,8 @@ static int proc_get_capabilities(struct usb_dev_state *ps, void __user *arg)
 {
 	__u32 caps;
 
-	caps = USBDEVFS_CAP_ZERO_PACKET | USBDEVFS_CAP_NO_PACKET_SIZE_LIM;
+	caps = USBDEVFS_CAP_ZERO_PACKET | USBDEVFS_CAP_NO_PACKET_SIZE_LIM |
+			USBDEVFS_CAP_REAP_AFTER_DISCONNECT;
 	if (!ps->dev->bus->no_stop_on_short)
 		caps |= USBDEVFS_CAP_BULK_CONTINUATION;
 	if (ps->dev->bus->sg_tablesize)
@@ -2138,6 +2141,32 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 		return -EPERM;
 
 	usb_lock_device(dev);
+
+	/* Reap operations are allowed even after disconnection */
+	switch (cmd) {
+	case USBDEVFS_REAPURB:
+		snoop(&dev->dev, "%s: REAPURB\n", __func__);
+		ret = proc_reapurb(ps, p);
+		goto done;
+
+	case USBDEVFS_REAPURBNDELAY:
+		snoop(&dev->dev, "%s: REAPURBNDELAY\n", __func__);
+		ret = proc_reapurbnonblock(ps, p);
+		goto done;
+
+#ifdef CONFIG_COMPAT
+	case USBDEVFS_REAPURB32:
+		snoop(&dev->dev, "%s: REAPURB32\n", __func__);
+		ret = proc_reapurb_compat(ps, p);
+		goto done;
+
+	case USBDEVFS_REAPURBNDELAY32:
+		snoop(&dev->dev, "%s: REAPURBNDELAY32\n", __func__);
+		ret = proc_reapurbnonblock_compat(ps, p);
+		goto done;
+#endif
+	}
+
 	if (!connected(ps)) {
 		usb_unlock_device(dev);
 		return -ENODEV;
@@ -2231,16 +2260,6 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 			inode->i_mtime = CURRENT_TIME;
 		break;
 
-	case USBDEVFS_REAPURB32:
-		snoop(&dev->dev, "%s: REAPURB32\n", __func__);
-		ret = proc_reapurb_compat(ps, p);
-		break;
-
-	case USBDEVFS_REAPURBNDELAY32:
-		snoop(&dev->dev, "%s: REAPURBNDELAY32\n", __func__);
-		ret = proc_reapurbnonblock_compat(ps, p);
-		break;
-
 	case USBDEVFS_IOCTL32:
 		snoop(&dev->dev, "%s: IOCTL32\n", __func__);
 		ret = proc_ioctl_compat(ps, ptr_to_compat(p));
@@ -2252,16 +2271,6 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 		ret = proc_unlinkurb(ps, p);
 		break;
 
-	case USBDEVFS_REAPURB:
-		snoop(&dev->dev, "%s: REAPURB\n", __func__);
-		ret = proc_reapurb(ps, p);
-		break;
-
-	case USBDEVFS_REAPURBNDELAY:
-		snoop(&dev->dev, "%s: REAPURBNDELAY\n", __func__);
-		ret = proc_reapurbnonblock(ps, p);
-		break;
-
 	case USBDEVFS_DISCSIGNAL:
 		snoop(&dev->dev, "%s: DISCSIGNAL\n", __func__);
 		ret = proc_disconnectsignal(ps, p);
@@ -2304,6 +2313,8 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 		ret = proc_free_streams(ps, p);
 		break;
 	}
+
+ done:
 	usb_unlock_device(dev);
 	if (ret >= 0)
 		inode->i_atime = CURRENT_TIME;
diff --git a/include/uapi/linux/usbdevice_fs.h b/include/uapi/linux/usbdevice_fs.h
index abe5f4bd4d82..019ba1e0799a 100644
--- a/include/uapi/linux/usbdevice_fs.h
+++ b/include/uapi/linux/usbdevice_fs.h
@@ -128,11 +128,12 @@ struct usbdevfs_hub_portinfo {
 	char port [127];	/* e.g. port 3 connects to device 27 */
 };
 
-/* Device capability flags */
+/* System and bus capability flags */
 #define USBDEVFS_CAP_ZERO_PACKET		0x01
 #define USBDEVFS_CAP_BULK_CONTINUATION		0x02
 #define USBDEVFS_CAP_NO_PACKET_SIZE_LIM		0x04
 #define USBDEVFS_CAP_BULK_SCATTER_GATHER	0x08
+#define USBDEVFS_CAP_REAP_AFTER_DISCONNECT	0x10
 
 /* USBDEVFS_DISCONNECT_CLAIM flags & struct */
 
-- 
cgit v1.2.3-71-gd317


From 08b717c2ae8b7e23e1d018dad601fdf12bde3a96 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Fri, 23 Jan 2015 12:52:33 -0300
Subject: [media] adv7180: Add fast switch support

In fast switch mode the adv7180 (and similar) can lock onto a new signal
faster when switching between different inputs. As a downside though it is
no longer able to auto-detect the incoming format.

The fast switch mode is exposed as a boolean v4l control that allows
userspace applications to either enable or disable fast switch mode.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Federico Vaga <federico.vaga@gmail.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 drivers/media/i2c/adv7180.c        | 29 +++++++++++++++++++++++++++++
 include/uapi/linux/v4l2-controls.h |  4 ++++
 2 files changed, 33 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/i2c/adv7180.c b/drivers/media/i2c/adv7180.c
index 3c1c866d1e3c..b75878c27c2a 100644
--- a/drivers/media/i2c/adv7180.c
+++ b/drivers/media/i2c/adv7180.c
@@ -127,6 +127,9 @@
 #define ADV7180_REG_VPP_SLAVE_ADDR	0xFD
 #define ADV7180_REG_CSI_SLAVE_ADDR	0xFE
 
+#define ADV7180_REG_FLCONTROL 0x40e0
+#define ADV7180_FLCONTROL_FL_ENABLE 0x1
+
 #define ADV7180_CSI_REG_PWRDN	0x00
 #define ADV7180_CSI_PWRDN	0x80
 
@@ -164,6 +167,8 @@
 #define ADV7180_DEFAULT_CSI_I2C_ADDR 0x44
 #define ADV7180_DEFAULT_VPP_I2C_ADDR 0x42
 
+#define V4L2_CID_ADV_FAST_SWITCH	(V4L2_CID_USER_ADV7180_BASE + 0x00)
+
 struct adv7180_state;
 
 #define ADV7180_FLAG_RESET_POWERED	BIT(0)
@@ -508,6 +513,18 @@ static int adv7180_s_ctrl(struct v4l2_ctrl *ctrl)
 			break;
 		ret = adv7180_write(state, ADV7180_REG_SD_SAT_CR, val);
 		break;
+	case V4L2_CID_ADV_FAST_SWITCH:
+		if (ctrl->val) {
+			/* ADI required write */
+			adv7180_write(state, 0x80d9, 0x44);
+			adv7180_write(state, ADV7180_REG_FLCONTROL,
+				ADV7180_FLCONTROL_FL_ENABLE);
+		} else {
+			/* ADI required write */
+			adv7180_write(state, 0x80d9, 0xc4);
+			adv7180_write(state, ADV7180_REG_FLCONTROL, 0x00);
+		}
+		break;
 	default:
 		ret = -EINVAL;
 	}
@@ -520,6 +537,16 @@ static const struct v4l2_ctrl_ops adv7180_ctrl_ops = {
 	.s_ctrl = adv7180_s_ctrl,
 };
 
+static const struct v4l2_ctrl_config adv7180_ctrl_fast_switch = {
+	.ops = &adv7180_ctrl_ops,
+	.id = V4L2_CID_ADV_FAST_SWITCH,
+	.name = "Fast Switching",
+	.type = V4L2_CTRL_TYPE_BOOLEAN,
+	.min = 0,
+	.max = 1,
+	.step = 1,
+};
+
 static int adv7180_init_controls(struct adv7180_state *state)
 {
 	v4l2_ctrl_handler_init(&state->ctrl_hdl, 4);
@@ -536,6 +563,8 @@ static int adv7180_init_controls(struct adv7180_state *state)
 	v4l2_ctrl_new_std(&state->ctrl_hdl, &adv7180_ctrl_ops,
 			  V4L2_CID_HUE, ADV7180_HUE_MIN,
 			  ADV7180_HUE_MAX, 1, ADV7180_HUE_DEF);
+	v4l2_ctrl_new_custom(&state->ctrl_hdl, &adv7180_ctrl_fast_switch, NULL);
+
 	state->sd.ctrl_handler = &state->ctrl_hdl;
 	if (state->ctrl_hdl.error) {
 		int err = state->ctrl_hdl.error;
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 661f119a51b8..9f6e108ff4a0 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -170,6 +170,10 @@ enum v4l2_colorfx {
  * We reserve 16 controls for this driver. */
 #define V4L2_CID_USER_SAA7134_BASE		(V4L2_CID_USER_BASE + 0x1060)
 
+/* The base for the adv7180 driver controls.
+ * We reserve 16 controls for this driver. */
+#define V4L2_CID_USER_ADV7180_BASE		(V4L2_CID_USER_BASE + 0x1070)
+
 /* MPEG-class control IDs */
 /* The MPEG controls are applicable to all codec controls
  * and the 'MPEG' part of the define is historical */
-- 
cgit v1.2.3-71-gd317


From 9cf514ccfacb301f3b1b4509a8ce25dffad55880 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 5 May 2014 13:11:59 +0200
Subject: nfsd: implement pNFS operations

Add support for the GETDEVICEINFO, LAYOUTGET, LAYOUTCOMMIT and
LAYOUTRETURN NFSv4.1 operations, as well as backing code to manage
outstanding layouts and devices.

Layout management is very straight forward, with a nfs4_layout_stateid
structure that extends nfs4_stid to manage layout stateids as the
top-level structure.  It is linked into the nfs4_file and nfs4_client
structures like the other stateids, and contains a linked list of
layouts that hang of the stateid.  The actual layout operations are
implemented in layout drivers that are not part of this commit, but
will be added later.

The worst part of this commit is the management of the pNFS device IDs,
which suffers from a specification that is not sanely implementable due
to the fact that the device-IDs are global and not bound to an export,
and have a small enough size so that we can't store the fsid portion of
a file handle, and must never be reused.  As we still do need perform all
export authentication and validation checks on a device ID passed to
GETDEVICEINFO we are caught between a rock and a hard place.  To work
around this issue we add a new hash that maps from a 64-bit integer to a
fsid so that we can look up the export to authenticate against it,
a 32-bit integer as a generation that we can bump when changing the device,
and a currently unused 32-bit integer that could be used in the future
to handle more than a single device per export.  Entries in this hash
table are never deleted as we can't reuse the ids anyway, and would have
a severe lifetime problem anyway as Linux export structures are temporary
structures that can go away under load.

Parts of the XDR data, structures and marshaling/unmarshaling code, as
well as many concepts are derived from the old pNFS server implementation
from Andy Adamson, Benny Halevy, Dean Hildebrand, Marc Eshel, Fred Isaman,
Mike Sager, Ricardo Labiaga and many others.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/nfsd/Kconfig                  |  10 +
 fs/nfsd/Makefile                 |   1 +
 fs/nfsd/export.c                 |   8 +
 fs/nfsd/export.h                 |   2 +
 fs/nfsd/nfs4layouts.c            | 487 +++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4proc.c               | 302 ++++++++++++++++++++++++
 fs/nfsd/nfs4state.c              |  16 +-
 fs/nfsd/nfs4xdr.c                | 312 +++++++++++++++++++++++++
 fs/nfsd/nfsctl.c                 |   9 +-
 fs/nfsd/nfsd.h                   |  16 +-
 fs/nfsd/pnfs.h                   |  80 +++++++
 fs/nfsd/state.h                  |  21 ++
 fs/nfsd/xdr4.h                   |  59 +++++
 include/linux/nfs4.h             |   1 +
 include/uapi/linux/nfsd/debug.h  |   1 +
 include/uapi/linux/nfsd/export.h |   4 +-
 16 files changed, 1324 insertions(+), 5 deletions(-)
 create mode 100644 fs/nfsd/nfs4layouts.c
 create mode 100644 fs/nfsd/pnfs.h

(limited to 'include/uapi/linux')

diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 73395156bdb4..683bf718aead 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -82,6 +82,16 @@ config NFSD_V4
 
 	  If unsure, say N.
 
+config NFSD_PNFS
+	bool "NFSv4.1 server support for Parallel NFS (pNFS)"
+	depends on NFSD_V4
+	help
+	  This option enables support for the parallel NFS features of the
+	  minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS
+	  server.
+
+	  If unsure, say N.
+
 config NFSD_V4_SECURITY_LABEL
 	bool "Provide Security Label support for NFSv4 server"
 	depends on NFSD_V4 && SECURITY
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index af32ef06b4fe..5806270a8567 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -12,3 +12,4 @@ nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
 			   nfs4acl.o nfs4callback.o nfs4recover.o
+nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 30a739d896ff..c3e3b6e55ae2 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -20,6 +20,7 @@
 #include "nfsd.h"
 #include "nfsfh.h"
 #include "netns.h"
+#include "pnfs.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
 
@@ -545,6 +546,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 
 	exp.ex_client = dom;
 	exp.cd = cd;
+	exp.ex_devid_map = NULL;
 
 	/* expiry */
 	err = -EINVAL;
@@ -621,6 +623,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 		if (!gid_valid(exp.ex_anon_gid))
 			goto out4;
 		err = 0;
+
+		nfsd4_setup_layout_type(&exp);
 	}
 
 	expp = svc_export_lookup(&exp);
@@ -703,6 +707,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
 	new->ex_fslocs.locations = NULL;
 	new->ex_fslocs.locations_count = 0;
 	new->ex_fslocs.migrated = 0;
+	new->ex_layout_type = 0;
 	new->ex_uuid = NULL;
 	new->cd = item->cd;
 }
@@ -717,6 +722,8 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
 	new->ex_anon_uid = item->ex_anon_uid;
 	new->ex_anon_gid = item->ex_anon_gid;
 	new->ex_fsid = item->ex_fsid;
+	new->ex_devid_map = item->ex_devid_map;
+	item->ex_devid_map = NULL;
 	new->ex_uuid = item->ex_uuid;
 	item->ex_uuid = NULL;
 	new->ex_fslocs.locations = item->ex_fslocs.locations;
@@ -725,6 +732,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
 	item->ex_fslocs.locations_count = 0;
 	new->ex_fslocs.migrated = item->ex_fslocs.migrated;
 	item->ex_fslocs.migrated = 0;
+	new->ex_layout_type = item->ex_layout_type;
 	new->ex_nflavors = item->ex_nflavors;
 	for (i = 0; i < MAX_SECINFO_LIST; i++) {
 		new->ex_flavors[i] = item->ex_flavors[i];
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index 04dc8c167b0c..1f52bfcc436f 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -56,6 +56,8 @@ struct svc_export {
 	struct nfsd4_fs_locations ex_fslocs;
 	uint32_t		ex_nflavors;
 	struct exp_flavor_info	ex_flavors[MAX_SECINFO_LIST];
+	enum pnfs_layouttype	ex_layout_type;
+	struct nfsd4_deviceid_map *ex_devid_map;
 	struct cache_detail	*cd;
 };
 
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
new file mode 100644
index 000000000000..8273270418b1
--- /dev/null
+++ b/fs/nfsd/nfs4layouts.c
@@ -0,0 +1,487 @@
+/*
+ * Copyright (c) 2014 Christoph Hellwig.
+ */
+#include <linux/jhash.h>
+#include <linux/sched.h>
+
+#include "pnfs.h"
+#include "netns.h"
+
+#define NFSDDBG_FACILITY                NFSDDBG_PNFS
+
+struct nfs4_layout {
+	struct list_head		lo_perstate;
+	struct nfs4_layout_stateid	*lo_state;
+	struct nfsd4_layout_seg		lo_seg;
+};
+
+static struct kmem_cache *nfs4_layout_cache;
+static struct kmem_cache *nfs4_layout_stateid_cache;
+
+const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] =  {
+};
+
+/* pNFS device ID to export fsid mapping */
+#define DEVID_HASH_BITS	8
+#define DEVID_HASH_SIZE	(1 << DEVID_HASH_BITS)
+#define DEVID_HASH_MASK	(DEVID_HASH_SIZE - 1)
+static u64 nfsd_devid_seq = 1;
+static struct list_head nfsd_devid_hash[DEVID_HASH_SIZE];
+static DEFINE_SPINLOCK(nfsd_devid_lock);
+
+static inline u32 devid_hashfn(u64 idx)
+{
+	return jhash_2words(idx, idx >> 32, 0) & DEVID_HASH_MASK;
+}
+
+static void
+nfsd4_alloc_devid_map(const struct svc_fh *fhp)
+{
+	const struct knfsd_fh *fh = &fhp->fh_handle;
+	size_t fsid_len = key_len(fh->fh_fsid_type);
+	struct nfsd4_deviceid_map *map, *old;
+	int i;
+
+	map = kzalloc(sizeof(*map) + fsid_len, GFP_KERNEL);
+	if (!map)
+		return;
+
+	map->fsid_type = fh->fh_fsid_type;
+	memcpy(&map->fsid, fh->fh_fsid, fsid_len);
+
+	spin_lock(&nfsd_devid_lock);
+	if (fhp->fh_export->ex_devid_map)
+		goto out_unlock;
+
+	for (i = 0; i < DEVID_HASH_SIZE; i++) {
+		list_for_each_entry(old, &nfsd_devid_hash[i], hash) {
+			if (old->fsid_type != fh->fh_fsid_type)
+				continue;
+			if (memcmp(old->fsid, fh->fh_fsid,
+					key_len(old->fsid_type)))
+				continue;
+
+			fhp->fh_export->ex_devid_map = old;
+			goto out_unlock;
+		}
+	}
+
+	map->idx = nfsd_devid_seq++;
+	list_add_tail_rcu(&map->hash, &nfsd_devid_hash[devid_hashfn(map->idx)]);
+	fhp->fh_export->ex_devid_map = map;
+	map = NULL;
+
+out_unlock:
+	spin_unlock(&nfsd_devid_lock);
+	kfree(map);
+}
+
+struct nfsd4_deviceid_map *
+nfsd4_find_devid_map(int idx)
+{
+	struct nfsd4_deviceid_map *map, *ret = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(map, &nfsd_devid_hash[devid_hashfn(idx)], hash)
+		if (map->idx == idx)
+			ret = map;
+	rcu_read_unlock();
+
+	return ret;
+}
+
+int
+nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp,
+		u32 device_generation)
+{
+	if (!fhp->fh_export->ex_devid_map) {
+		nfsd4_alloc_devid_map(fhp);
+		if (!fhp->fh_export->ex_devid_map)
+			return -ENOMEM;
+	}
+
+	id->fsid_idx = fhp->fh_export->ex_devid_map->idx;
+	id->generation = device_generation;
+	id->pad = 0;
+	return 0;
+}
+
+void nfsd4_setup_layout_type(struct svc_export *exp)
+{
+	if (exp->ex_flags & NFSEXP_NOPNFS)
+		return;
+}
+
+static void
+nfsd4_free_layout_stateid(struct nfs4_stid *stid)
+{
+	struct nfs4_layout_stateid *ls = layoutstateid(stid);
+	struct nfs4_client *clp = ls->ls_stid.sc_client;
+	struct nfs4_file *fp = ls->ls_stid.sc_file;
+
+	spin_lock(&clp->cl_lock);
+	list_del_init(&ls->ls_perclnt);
+	spin_unlock(&clp->cl_lock);
+
+	spin_lock(&fp->fi_lock);
+	list_del_init(&ls->ls_perfile);
+	spin_unlock(&fp->fi_lock);
+
+	kmem_cache_free(nfs4_layout_stateid_cache, ls);
+}
+
+static struct nfs4_layout_stateid *
+nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
+		struct nfs4_stid *parent, u32 layout_type)
+{
+	struct nfs4_client *clp = cstate->clp;
+	struct nfs4_file *fp = parent->sc_file;
+	struct nfs4_layout_stateid *ls;
+	struct nfs4_stid *stp;
+
+	stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache);
+	if (!stp)
+		return NULL;
+	stp->sc_free = nfsd4_free_layout_stateid;
+	get_nfs4_file(fp);
+	stp->sc_file = fp;
+
+	ls = layoutstateid(stp);
+	INIT_LIST_HEAD(&ls->ls_perclnt);
+	INIT_LIST_HEAD(&ls->ls_perfile);
+	spin_lock_init(&ls->ls_lock);
+	INIT_LIST_HEAD(&ls->ls_layouts);
+	ls->ls_layout_type = layout_type;
+
+	spin_lock(&clp->cl_lock);
+	stp->sc_type = NFS4_LAYOUT_STID;
+	list_add(&ls->ls_perclnt, &clp->cl_lo_states);
+	spin_unlock(&clp->cl_lock);
+
+	spin_lock(&fp->fi_lock);
+	list_add(&ls->ls_perfile, &fp->fi_lo_states);
+	spin_unlock(&fp->fi_lock);
+
+	return ls;
+}
+
+__be32
+nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate, stateid_t *stateid,
+		bool create, u32 layout_type, struct nfs4_layout_stateid **lsp)
+{
+	struct nfs4_layout_stateid *ls;
+	struct nfs4_stid *stid;
+	unsigned char typemask = NFS4_LAYOUT_STID;
+	__be32 status;
+
+	if (create)
+		typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID);
+
+	status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid,
+			net_generic(SVC_NET(rqstp), nfsd_net_id));
+	if (status)
+		goto out;
+
+	if (!fh_match(&cstate->current_fh.fh_handle,
+		      &stid->sc_file->fi_fhandle)) {
+		status = nfserr_bad_stateid;
+		goto out_put_stid;
+	}
+
+	if (stid->sc_type != NFS4_LAYOUT_STID) {
+		ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type);
+		nfs4_put_stid(stid);
+
+		status = nfserr_jukebox;
+		if (!ls)
+			goto out;
+	} else {
+		ls = container_of(stid, struct nfs4_layout_stateid, ls_stid);
+
+		status = nfserr_bad_stateid;
+		if (stateid->si_generation > stid->sc_stateid.si_generation)
+			goto out_put_stid;
+		if (layout_type != ls->ls_layout_type)
+			goto out_put_stid;
+	}
+
+	*lsp = ls;
+	return 0;
+
+out_put_stid:
+	nfs4_put_stid(stid);
+out:
+	return status;
+}
+
+static inline u64
+layout_end(struct nfsd4_layout_seg *seg)
+{
+	u64 end = seg->offset + seg->length;
+	return end >= seg->offset ? end : NFS4_MAX_UINT64;
+}
+
+static void
+layout_update_len(struct nfsd4_layout_seg *lo, u64 end)
+{
+	if (end == NFS4_MAX_UINT64)
+		lo->length = NFS4_MAX_UINT64;
+	else
+		lo->length = end - lo->offset;
+}
+
+static bool
+layouts_overlapping(struct nfs4_layout *lo, struct nfsd4_layout_seg *s)
+{
+	if (s->iomode != IOMODE_ANY && s->iomode != lo->lo_seg.iomode)
+		return false;
+	if (layout_end(&lo->lo_seg) <= s->offset)
+		return false;
+	if (layout_end(s) <= lo->lo_seg.offset)
+		return false;
+	return true;
+}
+
+static bool
+layouts_try_merge(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *new)
+{
+	if (lo->iomode != new->iomode)
+		return false;
+	if (layout_end(new) < lo->offset)
+		return false;
+	if (layout_end(lo) < new->offset)
+		return false;
+
+	lo->offset = min(lo->offset, new->offset);
+	layout_update_len(lo, max(layout_end(lo), layout_end(new)));
+	return true;
+}
+
+__be32
+nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
+{
+	struct nfsd4_layout_seg *seg = &lgp->lg_seg;
+	struct nfs4_layout *lp, *new = NULL;
+
+	spin_lock(&ls->ls_lock);
+	list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) {
+		if (layouts_try_merge(&lp->lo_seg, seg))
+			goto done;
+	}
+	spin_unlock(&ls->ls_lock);
+
+	new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL);
+	if (!new)
+		return nfserr_jukebox;
+	memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg));
+	new->lo_state = ls;
+
+	spin_lock(&ls->ls_lock);
+	list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) {
+		if (layouts_try_merge(&lp->lo_seg, seg))
+			goto done;
+	}
+
+	atomic_inc(&ls->ls_stid.sc_count);
+	list_add_tail(&new->lo_perstate, &ls->ls_layouts);
+	new = NULL;
+done:
+	update_stateid(&ls->ls_stid.sc_stateid);
+	memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t));
+	spin_unlock(&ls->ls_lock);
+	if (new)
+		kmem_cache_free(nfs4_layout_cache, new);
+	return nfs_ok;
+}
+
+static void
+nfsd4_free_layouts(struct list_head *reaplist)
+{
+	while (!list_empty(reaplist)) {
+		struct nfs4_layout *lp = list_first_entry(reaplist,
+				struct nfs4_layout, lo_perstate);
+
+		list_del(&lp->lo_perstate);
+		nfs4_put_stid(&lp->lo_state->ls_stid);
+		kmem_cache_free(nfs4_layout_cache, lp);
+	}
+}
+
+static void
+nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg,
+		struct list_head *reaplist)
+{
+	struct nfsd4_layout_seg *lo = &lp->lo_seg;
+	u64 end = layout_end(lo);
+
+	if (seg->offset <= lo->offset) {
+		if (layout_end(seg) >= end) {
+			list_move_tail(&lp->lo_perstate, reaplist);
+			return;
+		}
+		end = seg->offset;
+	} else {
+		/* retain the whole layout segment on a split. */
+		if (layout_end(seg) < end) {
+			dprintk("%s: split not supported\n", __func__);
+			return;
+		}
+
+		lo->offset = layout_end(seg);
+	}
+
+	layout_update_len(lo, end);
+}
+
+__be32
+nfsd4_return_file_layouts(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		struct nfsd4_layoutreturn *lrp)
+{
+	struct nfs4_layout_stateid *ls;
+	struct nfs4_layout *lp, *n;
+	LIST_HEAD(reaplist);
+	__be32 nfserr;
+	int found = 0;
+
+	nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lrp->lr_sid,
+						false, lrp->lr_layout_type,
+						&ls);
+	if (nfserr)
+		return nfserr;
+
+	spin_lock(&ls->ls_lock);
+	list_for_each_entry_safe(lp, n, &ls->ls_layouts, lo_perstate) {
+		if (layouts_overlapping(lp, &lrp->lr_seg)) {
+			nfsd4_return_file_layout(lp, &lrp->lr_seg, &reaplist);
+			found++;
+		}
+	}
+	if (!list_empty(&ls->ls_layouts)) {
+		if (found) {
+			update_stateid(&ls->ls_stid.sc_stateid);
+			memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid,
+				sizeof(stateid_t));
+		}
+		lrp->lrs_present = 1;
+	} else {
+		nfs4_unhash_stid(&ls->ls_stid);
+		lrp->lrs_present = 0;
+	}
+	spin_unlock(&ls->ls_lock);
+
+	nfs4_put_stid(&ls->ls_stid);
+	nfsd4_free_layouts(&reaplist);
+	return nfs_ok;
+}
+
+__be32
+nfsd4_return_client_layouts(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		struct nfsd4_layoutreturn *lrp)
+{
+	struct nfs4_layout_stateid *ls, *n;
+	struct nfs4_client *clp = cstate->clp;
+	struct nfs4_layout *lp, *t;
+	LIST_HEAD(reaplist);
+
+	lrp->lrs_present = 0;
+
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) {
+		if (lrp->lr_return_type == RETURN_FSID &&
+		    !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle,
+				   &cstate->current_fh.fh_handle))
+			continue;
+
+		spin_lock(&ls->ls_lock);
+		list_for_each_entry_safe(lp, t, &ls->ls_layouts, lo_perstate) {
+			if (lrp->lr_seg.iomode == IOMODE_ANY ||
+			    lrp->lr_seg.iomode == lp->lo_seg.iomode)
+				list_move_tail(&lp->lo_perstate, &reaplist);
+		}
+		spin_unlock(&ls->ls_lock);
+	}
+	spin_unlock(&clp->cl_lock);
+
+	nfsd4_free_layouts(&reaplist);
+	return 0;
+}
+
+static void
+nfsd4_return_all_layouts(struct nfs4_layout_stateid *ls,
+		struct list_head *reaplist)
+{
+	spin_lock(&ls->ls_lock);
+	list_splice_init(&ls->ls_layouts, reaplist);
+	spin_unlock(&ls->ls_lock);
+}
+
+void
+nfsd4_return_all_client_layouts(struct nfs4_client *clp)
+{
+	struct nfs4_layout_stateid *ls, *n;
+	LIST_HEAD(reaplist);
+
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt)
+		nfsd4_return_all_layouts(ls, &reaplist);
+	spin_unlock(&clp->cl_lock);
+
+	nfsd4_free_layouts(&reaplist);
+}
+
+void
+nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
+{
+	struct nfs4_layout_stateid *ls, *n;
+	LIST_HEAD(reaplist);
+
+	spin_lock(&fp->fi_lock);
+	list_for_each_entry_safe(ls, n, &fp->fi_lo_states, ls_perfile) {
+		if (ls->ls_stid.sc_client == clp)
+			nfsd4_return_all_layouts(ls, &reaplist);
+	}
+	spin_unlock(&fp->fi_lock);
+
+	nfsd4_free_layouts(&reaplist);
+}
+
+int
+nfsd4_init_pnfs(void)
+{
+	int i;
+
+	for (i = 0; i < DEVID_HASH_SIZE; i++)
+		INIT_LIST_HEAD(&nfsd_devid_hash[i]);
+
+	nfs4_layout_cache = kmem_cache_create("nfs4_layout",
+			sizeof(struct nfs4_layout), 0, 0, NULL);
+	if (!nfs4_layout_cache)
+		return -ENOMEM;
+
+	nfs4_layout_stateid_cache = kmem_cache_create("nfs4_layout_stateid",
+			sizeof(struct nfs4_layout_stateid), 0, 0, NULL);
+	if (!nfs4_layout_stateid_cache) {
+		kmem_cache_destroy(nfs4_layout_cache);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void
+nfsd4_exit_pnfs(void)
+{
+	int i;
+
+	kmem_cache_destroy(nfs4_layout_cache);
+	kmem_cache_destroy(nfs4_layout_stateid_cache);
+
+	for (i = 0; i < DEVID_HASH_SIZE; i++) {
+		struct nfsd4_deviceid_map *map, *n;
+
+		list_for_each_entry_safe(map, n, &nfsd_devid_hash[i], hash)
+			kfree(map);
+	}
+}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ac71d13c69ef..2b91443497cc 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -43,6 +43,7 @@
 #include "current_stateid.h"
 #include "netns.h"
 #include "acl.h"
+#include "pnfs.h"
 
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 #include <linux/security.h>
@@ -1178,6 +1179,252 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return status == nfserr_same ? nfs_ok : status;
 }
 
+#ifdef CONFIG_NFSD_PNFS
+static const struct nfsd4_layout_ops *
+nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
+{
+	if (!exp->ex_layout_type) {
+		dprintk("%s: export does not support pNFS\n", __func__);
+		return NULL;
+	}
+
+	if (exp->ex_layout_type != layout_type) {
+		dprintk("%s: layout type %d not supported\n",
+			__func__, layout_type);
+		return NULL;
+	}
+
+	return nfsd4_layout_ops[layout_type];
+}
+
+static __be32
+nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		struct nfsd4_getdeviceinfo *gdp)
+{
+	const struct nfsd4_layout_ops *ops;
+	struct nfsd4_deviceid_map *map;
+	struct svc_export *exp;
+	__be32 nfserr;
+
+	dprintk("%s: layout_type %u dev_id [0x%llx:0x%x] maxcnt %u\n",
+	       __func__,
+	       gdp->gd_layout_type,
+	       gdp->gd_devid.fsid_idx, gdp->gd_devid.generation,
+	       gdp->gd_maxcount);
+
+	map = nfsd4_find_devid_map(gdp->gd_devid.fsid_idx);
+	if (!map) {
+		dprintk("%s: couldn't find device ID to export mapping!\n",
+			__func__);
+		return nfserr_noent;
+	}
+
+	exp = rqst_exp_find(rqstp, map->fsid_type, map->fsid);
+	if (IS_ERR(exp)) {
+		dprintk("%s: could not find device id\n", __func__);
+		return nfserr_noent;
+	}
+
+	nfserr = nfserr_layoutunavailable;
+	ops = nfsd4_layout_verify(exp, gdp->gd_layout_type);
+	if (!ops)
+		goto out;
+
+	nfserr = nfs_ok;
+	if (gdp->gd_maxcount != 0)
+		nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
+
+	gdp->gd_notify_types &= ops->notify_types;
+	exp_put(exp);
+out:
+	return nfserr;
+}
+
+static __be32
+nfsd4_layoutget(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		struct nfsd4_layoutget *lgp)
+{
+	struct svc_fh *current_fh = &cstate->current_fh;
+	const struct nfsd4_layout_ops *ops;
+	struct nfs4_layout_stateid *ls;
+	__be32 nfserr;
+	int accmode;
+
+	switch (lgp->lg_seg.iomode) {
+	case IOMODE_READ:
+		accmode = NFSD_MAY_READ;
+		break;
+	case IOMODE_RW:
+		accmode = NFSD_MAY_READ | NFSD_MAY_WRITE;
+		break;
+	default:
+		dprintk("%s: invalid iomode %d\n",
+			__func__, lgp->lg_seg.iomode);
+		nfserr = nfserr_badiomode;
+		goto out;
+	}
+
+	nfserr = fh_verify(rqstp, current_fh, 0, accmode);
+	if (nfserr)
+		goto out;
+
+	nfserr = nfserr_layoutunavailable;
+	ops = nfsd4_layout_verify(current_fh->fh_export, lgp->lg_layout_type);
+	if (!ops)
+		goto out;
+
+	/*
+	 * Verify minlength and range as per RFC5661:
+	 *  o  If loga_length is less than loga_minlength,
+	 *     the metadata server MUST return NFS4ERR_INVAL.
+	 *  o  If the sum of loga_offset and loga_minlength exceeds
+	 *     NFS4_UINT64_MAX, and loga_minlength is not
+	 *     NFS4_UINT64_MAX, the error NFS4ERR_INVAL MUST result.
+	 *  o  If the sum of loga_offset and loga_length exceeds
+	 *     NFS4_UINT64_MAX, and loga_length is not NFS4_UINT64_MAX,
+	 *     the error NFS4ERR_INVAL MUST result.
+	 */
+	nfserr = nfserr_inval;
+	if (lgp->lg_seg.length < lgp->lg_minlength ||
+	    (lgp->lg_minlength != NFS4_MAX_UINT64 &&
+	     lgp->lg_minlength > NFS4_MAX_UINT64 - lgp->lg_seg.offset) ||
+	    (lgp->lg_seg.length != NFS4_MAX_UINT64 &&
+	     lgp->lg_seg.length > NFS4_MAX_UINT64 - lgp->lg_seg.offset))
+		goto out;
+	if (lgp->lg_seg.length == 0)
+		goto out;
+
+	nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid,
+						true, lgp->lg_layout_type, &ls);
+	if (nfserr)
+		goto out;
+
+	nfserr = ops->proc_layoutget(current_fh->fh_dentry->d_inode,
+				     current_fh, lgp);
+	if (nfserr)
+		goto out_put_stid;
+
+	nfserr = nfsd4_insert_layout(lgp, ls);
+
+out_put_stid:
+	nfs4_put_stid(&ls->ls_stid);
+out:
+	return nfserr;
+}
+
+static __be32
+nfsd4_layoutcommit(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		struct nfsd4_layoutcommit *lcp)
+{
+	const struct nfsd4_layout_seg *seg = &lcp->lc_seg;
+	struct svc_fh *current_fh = &cstate->current_fh;
+	const struct nfsd4_layout_ops *ops;
+	loff_t new_size = lcp->lc_last_wr + 1;
+	struct inode *inode;
+	struct nfs4_layout_stateid *ls;
+	__be32 nfserr;
+
+	nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_WRITE);
+	if (nfserr)
+		goto out;
+
+	nfserr = nfserr_layoutunavailable;
+	ops = nfsd4_layout_verify(current_fh->fh_export, lcp->lc_layout_type);
+	if (!ops)
+		goto out;
+	inode = current_fh->fh_dentry->d_inode;
+
+	nfserr = nfserr_inval;
+	if (new_size <= seg->offset) {
+		dprintk("pnfsd: last write before layout segment\n");
+		goto out;
+	}
+	if (new_size > seg->offset + seg->length) {
+		dprintk("pnfsd: last write beyond layout segment\n");
+		goto out;
+	}
+	if (!lcp->lc_newoffset && new_size > i_size_read(inode)) {
+		dprintk("pnfsd: layoutcommit beyond EOF\n");
+		goto out;
+	}
+
+	nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid,
+						false, lcp->lc_layout_type,
+						&ls);
+	if (nfserr) {
+		/* fixup error code as per RFC5661 */
+		if (nfserr == nfserr_bad_stateid)
+			nfserr = nfserr_badlayout;
+		goto out;
+	}
+
+	nfserr = ops->proc_layoutcommit(inode, lcp);
+	if (nfserr)
+		goto out_put_stid;
+
+	if (new_size > i_size_read(inode)) {
+		lcp->lc_size_chg = 1;
+		lcp->lc_newsize = new_size;
+	} else {
+		lcp->lc_size_chg = 0;
+	}
+
+out_put_stid:
+	nfs4_put_stid(&ls->ls_stid);
+out:
+	return nfserr;
+}
+
+static __be32
+nfsd4_layoutreturn(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		struct nfsd4_layoutreturn *lrp)
+{
+	struct svc_fh *current_fh = &cstate->current_fh;
+	__be32 nfserr;
+
+	nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
+	if (nfserr)
+		goto out;
+
+	nfserr = nfserr_layoutunavailable;
+	if (!nfsd4_layout_verify(current_fh->fh_export, lrp->lr_layout_type))
+		goto out;
+
+	switch (lrp->lr_seg.iomode) {
+	case IOMODE_READ:
+	case IOMODE_RW:
+	case IOMODE_ANY:
+		break;
+	default:
+		dprintk("%s: invalid iomode %d\n", __func__,
+			lrp->lr_seg.iomode);
+		nfserr = nfserr_inval;
+		goto out;
+	}
+
+	switch (lrp->lr_return_type) {
+	case RETURN_FILE:
+		nfserr = nfsd4_return_file_layouts(rqstp, cstate, lrp);
+		break;
+	case RETURN_FSID:
+	case RETURN_ALL:
+		nfserr = nfsd4_return_client_layouts(rqstp, cstate, lrp);
+		break;
+	default:
+		dprintk("%s: invalid return_type %d\n", __func__,
+			lrp->lr_return_type);
+		nfserr = nfserr_inval;
+		break;
+	}
+out:
+	return nfserr;
+}
+#endif /* CONFIG_NFSD_PNFS */
+
 /*
  * NULL call.
  */
@@ -1679,6 +1926,36 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd
 		op_encode_channel_attrs_maxsz) * sizeof(__be32);
 }
 
+#ifdef CONFIG_NFSD_PNFS
+/*
+ * At this stage we don't really know what layout driver will handle the request,
+ * so we need to define an arbitrary upper bound here.
+ */
+#define MAX_LAYOUT_SIZE		128
+static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+	return (op_encode_hdr_size +
+		1 /* logr_return_on_close */ +
+		op_encode_stateid_maxsz +
+		1 /* nr of layouts */ +
+		MAX_LAYOUT_SIZE) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+	return (op_encode_hdr_size +
+		1 /* locr_newsize */ +
+		2 /* ns_size */) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+	return (op_encode_hdr_size +
+		1 /* lrs_stateid */ +
+		op_encode_stateid_maxsz) * sizeof(__be32);
+}
+#endif /* CONFIG_NFSD_PNFS */
+
 static struct nfsd4_operation nfsd4_ops[] = {
 	[OP_ACCESS] = {
 		.op_func = (nfsd4op_func)nfsd4_access,
@@ -1966,6 +2243,31 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
+#ifdef CONFIG_NFSD_PNFS
+	[OP_GETDEVICEINFO] = {
+		.op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
+		.op_flags = ALLOWED_WITHOUT_FH,
+		.op_name = "OP_GETDEVICEINFO",
+	},
+	[OP_LAYOUTGET] = {
+		.op_func = (nfsd4op_func)nfsd4_layoutget,
+		.op_flags = OP_MODIFIES_SOMETHING,
+		.op_name = "OP_LAYOUTGET",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutget_rsize,
+	},
+	[OP_LAYOUTCOMMIT] = {
+		.op_func = (nfsd4op_func)nfsd4_layoutcommit,
+		.op_flags = OP_MODIFIES_SOMETHING,
+		.op_name = "OP_LAYOUTCOMMIT",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutcommit_rsize,
+	},
+	[OP_LAYOUTRETURN] = {
+		.op_func = (nfsd4op_func)nfsd4_layoutreturn,
+		.op_flags = OP_MODIFIES_SOMETHING,
+		.op_name = "OP_LAYOUTRETURN",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutreturn_rsize,
+	},
+#endif /* CONFIG_NFSD_PNFS */
 
 	/* NFSv4.2 operations */
 	[OP_ALLOCATE] = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index eefd29ec43f2..c89f79dc69e2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -48,6 +48,7 @@
 #include "current_stateid.h"
 
 #include "netns.h"
+#include "pnfs.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -1539,6 +1540,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
 	INIT_LIST_HEAD(&clp->cl_lru);
 	INIT_LIST_HEAD(&clp->cl_callbacks);
 	INIT_LIST_HEAD(&clp->cl_revoked);
+#ifdef CONFIG_NFSD_PNFS
+	INIT_LIST_HEAD(&clp->cl_lo_states);
+#endif
 	spin_lock_init(&clp->cl_lock);
 	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
 	return clp;
@@ -1643,6 +1647,7 @@ __destroy_client(struct nfs4_client *clp)
 		nfs4_get_stateowner(&oo->oo_owner);
 		release_openowner(oo);
 	}
+	nfsd4_return_all_client_layouts(clp);
 	nfsd4_shutdown_callback(clp);
 	if (clp->cl_cb_conn.cb_xprt)
 		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
@@ -2126,8 +2131,11 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
 static void
 nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
 {
-	/* pNFS is not supported */
+#ifdef CONFIG_NFSD_PNFS
+	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS;
+#else
 	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
+#endif
 
 	/* Referrals are supported, Migration is not. */
 	new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
@@ -3055,6 +3063,9 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
 	fp->fi_share_deny = 0;
 	memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
 	memset(fp->fi_access, 0, sizeof(fp->fi_access));
+#ifdef CONFIG_NFSD_PNFS
+	INIT_LIST_HEAD(&fp->fi_lo_states);
+#endif
 	hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
 }
 
@@ -4841,6 +4852,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	update_stateid(&stp->st_stid.sc_stateid);
 	memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
 
+	nfsd4_return_all_file_layouts(stp->st_stateowner->so_client,
+				      stp->st_stid.sc_file);
+
 	nfsd4_close_open_stateid(stp);
 
 	/* put reference from nfs4_preprocess_seqid_op */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 974533e5a427..df5e66caf100 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -47,6 +47,7 @@
 #include "state.h"
 #include "cache.h"
 #include "netns.h"
+#include "pnfs.h"
 
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 #include <linux/security.h>
@@ -1522,6 +1523,127 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
 	DECODE_TAIL;
 }
 
+#ifdef CONFIG_NFSD_PNFS
+static __be32
+nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
+		struct nfsd4_getdeviceinfo *gdev)
+{
+	DECODE_HEAD;
+	u32 num, i;
+
+	READ_BUF(sizeof(struct nfsd4_deviceid) + 3 * 4);
+	COPYMEM(&gdev->gd_devid, sizeof(struct nfsd4_deviceid));
+	gdev->gd_layout_type = be32_to_cpup(p++);
+	gdev->gd_maxcount = be32_to_cpup(p++);
+	num = be32_to_cpup(p++);
+	if (num) {
+		READ_BUF(4 * num);
+		gdev->gd_notify_types = be32_to_cpup(p++);
+		for (i = 1; i < num; i++) {
+			if (be32_to_cpup(p++)) {
+				status = nfserr_inval;
+				goto out;
+			}
+		}
+	}
+	DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
+		struct nfsd4_layoutget *lgp)
+{
+	DECODE_HEAD;
+
+	READ_BUF(36);
+	lgp->lg_signal = be32_to_cpup(p++);
+	lgp->lg_layout_type = be32_to_cpup(p++);
+	lgp->lg_seg.iomode = be32_to_cpup(p++);
+	p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
+	p = xdr_decode_hyper(p, &lgp->lg_seg.length);
+	p = xdr_decode_hyper(p, &lgp->lg_minlength);
+	nfsd4_decode_stateid(argp, &lgp->lg_sid);
+	READ_BUF(4);
+	lgp->lg_maxcount = be32_to_cpup(p++);
+
+	DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
+		struct nfsd4_layoutcommit *lcp)
+{
+	DECODE_HEAD;
+	u32 timechange;
+
+	READ_BUF(20);
+	p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
+	p = xdr_decode_hyper(p, &lcp->lc_seg.length);
+	lcp->lc_reclaim = be32_to_cpup(p++);
+	nfsd4_decode_stateid(argp, &lcp->lc_sid);
+	READ_BUF(4);
+	lcp->lc_newoffset = be32_to_cpup(p++);
+	if (lcp->lc_newoffset) {
+		READ_BUF(8);
+		p = xdr_decode_hyper(p, &lcp->lc_last_wr);
+	} else
+		lcp->lc_last_wr = 0;
+	READ_BUF(4);
+	timechange = be32_to_cpup(p++);
+	if (timechange) {
+		status = nfsd4_decode_time(argp, &lcp->lc_mtime);
+		if (status)
+			return status;
+	} else {
+		lcp->lc_mtime.tv_nsec = UTIME_NOW;
+	}
+	READ_BUF(8);
+	lcp->lc_layout_type = be32_to_cpup(p++);
+
+	/*
+	 * Save the layout update in XDR format and let the layout driver deal
+	 * with it later.
+	 */
+	lcp->lc_up_len = be32_to_cpup(p++);
+	if (lcp->lc_up_len > 0) {
+		READ_BUF(lcp->lc_up_len);
+		READMEM(lcp->lc_up_layout, lcp->lc_up_len);
+	}
+
+	DECODE_TAIL;
+}
+
+static __be32
+nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
+		struct nfsd4_layoutreturn *lrp)
+{
+	DECODE_HEAD;
+
+	READ_BUF(16);
+	lrp->lr_reclaim = be32_to_cpup(p++);
+	lrp->lr_layout_type = be32_to_cpup(p++);
+	lrp->lr_seg.iomode = be32_to_cpup(p++);
+	lrp->lr_return_type = be32_to_cpup(p++);
+	if (lrp->lr_return_type == RETURN_FILE) {
+		READ_BUF(16);
+		p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
+		p = xdr_decode_hyper(p, &lrp->lr_seg.length);
+		nfsd4_decode_stateid(argp, &lrp->lr_sid);
+		READ_BUF(4);
+		lrp->lrf_body_len = be32_to_cpup(p++);
+		if (lrp->lrf_body_len > 0) {
+			READ_BUF(lrp->lrf_body_len);
+			READMEM(lrp->lrf_body, lrp->lrf_body_len);
+		}
+	} else {
+		lrp->lr_seg.offset = 0;
+		lrp->lr_seg.length = NFS4_MAX_UINT64;
+	}
+
+	DECODE_TAIL;
+}
+#endif /* CONFIG_NFSD_PNFS */
+
 static __be32
 nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
 		       struct nfsd4_fallocate *fallocate)
@@ -1616,11 +1738,19 @@ static nfsd4_dec nfsd4_dec_ops[] = {
 	[OP_DESTROY_SESSION]	= (nfsd4_dec)nfsd4_decode_destroy_session,
 	[OP_FREE_STATEID]	= (nfsd4_dec)nfsd4_decode_free_stateid,
 	[OP_GET_DIR_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
+#ifdef CONFIG_NFSD_PNFS
+	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_getdeviceinfo,
+	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_layoutcommit,
+	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_layoutget,
+	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_layoutreturn,
+#else
 	[OP_GETDEVICEINFO]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_GETDEVICELIST]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_LAYOUTCOMMIT]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_LAYOUTGET]		= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_LAYOUTRETURN]	= (nfsd4_dec)nfsd4_decode_notsupp,
+#endif
 	[OP_SECINFO_NO_NAME]	= (nfsd4_dec)nfsd4_decode_secinfo_no_name,
 	[OP_SEQUENCE]		= (nfsd4_dec)nfsd4_decode_sequence,
 	[OP_SET_SSV]		= (nfsd4_dec)nfsd4_decode_notsupp,
@@ -2548,6 +2678,30 @@ out_acl:
 			get_parent_attributes(exp, &stat);
 		p = xdr_encode_hyper(p, stat.ino);
 	}
+#ifdef CONFIG_NFSD_PNFS
+	if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) ||
+	    (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) {
+		if (exp->ex_layout_type) {
+			p = xdr_reserve_space(xdr, 8);
+			if (!p)
+				goto out_resource;
+			*p++ = cpu_to_be32(1);
+			*p++ = cpu_to_be32(exp->ex_layout_type);
+		} else {
+			p = xdr_reserve_space(xdr, 4);
+			if (!p)
+				goto out_resource;
+			*p++ = cpu_to_be32(0);
+		}
+	}
+
+	if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
+			goto out_resource;
+		*p++ = cpu_to_be32(stat.blksize);
+	}
+#endif /* CONFIG_NFSD_PNFS */
 	if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
 		status = nfsd4_encode_security_label(xdr, rqstp, context,
 								contextlen);
@@ -3824,6 +3978,156 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
 	return nfserr;
 }
 
+#ifdef CONFIG_NFSD_PNFS
+static __be32
+nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+		struct nfsd4_getdeviceinfo *gdev)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	const struct nfsd4_layout_ops *ops =
+		nfsd4_layout_ops[gdev->gd_layout_type];
+	u32 starting_len = xdr->buf->len, needed_len;
+	__be32 *p;
+
+	dprintk("%s: err %d\n", __func__, nfserr);
+	if (nfserr)
+		goto out;
+
+	nfserr = nfserr_resource;
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		goto out;
+
+	*p++ = cpu_to_be32(gdev->gd_layout_type);
+
+	/* If maxcount is 0 then just update notifications */
+	if (gdev->gd_maxcount != 0) {
+		nfserr = ops->encode_getdeviceinfo(xdr, gdev);
+		if (nfserr) {
+			/*
+			 * We don't bother to burden the layout drivers with
+			 * enforcing gd_maxcount, just tell the client to
+			 * come back with a bigger buffer if it's not enough.
+			 */
+			if (xdr->buf->len + 4 > gdev->gd_maxcount)
+				goto toosmall;
+			goto out;
+		}
+	}
+
+	nfserr = nfserr_resource;
+	if (gdev->gd_notify_types) {
+		p = xdr_reserve_space(xdr, 4 + 4);
+		if (!p)
+			goto out;
+		*p++ = cpu_to_be32(1);			/* bitmap length */
+		*p++ = cpu_to_be32(gdev->gd_notify_types);
+	} else {
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
+			goto out;
+		*p++ = 0;
+	}
+
+	nfserr = 0;
+out:
+	kfree(gdev->gd_device);
+	dprintk("%s: done: %d\n", __func__, be32_to_cpu(nfserr));
+	return nfserr;
+
+toosmall:
+	dprintk("%s: maxcount too small\n", __func__);
+	needed_len = xdr->buf->len + 4 /* notifications */;
+	xdr_truncate_encode(xdr, starting_len);
+	p = xdr_reserve_space(xdr, 4);
+	if (!p) {
+		nfserr = nfserr_resource;
+	} else {
+		*p++ = cpu_to_be32(needed_len);
+		nfserr = nfserr_toosmall;
+	}
+	goto out;
+}
+
+static __be32
+nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
+		struct nfsd4_layoutget *lgp)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	const struct nfsd4_layout_ops *ops =
+		nfsd4_layout_ops[lgp->lg_layout_type];
+	__be32 *p;
+
+	dprintk("%s: err %d\n", __func__, nfserr);
+	if (nfserr)
+		goto out;
+
+	nfserr = nfserr_resource;
+	p = xdr_reserve_space(xdr, 36 + sizeof(stateid_opaque_t));
+	if (!p)
+		goto out;
+
+	*p++ = cpu_to_be32(1);	/* we always set return-on-close */
+	*p++ = cpu_to_be32(lgp->lg_sid.si_generation);
+	p = xdr_encode_opaque_fixed(p, &lgp->lg_sid.si_opaque,
+				    sizeof(stateid_opaque_t));
+
+	*p++ = cpu_to_be32(1);	/* we always return a single layout */
+	p = xdr_encode_hyper(p, lgp->lg_seg.offset);
+	p = xdr_encode_hyper(p, lgp->lg_seg.length);
+	*p++ = cpu_to_be32(lgp->lg_seg.iomode);
+	*p++ = cpu_to_be32(lgp->lg_layout_type);
+
+	nfserr = ops->encode_layoutget(xdr, lgp);
+out:
+	kfree(lgp->lg_content);
+	return nfserr;
+}
+
+static __be32
+nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
+			  struct nfsd4_layoutcommit *lcp)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	__be32 *p;
+
+	if (nfserr)
+		return nfserr;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return nfserr_resource;
+	*p++ = cpu_to_be32(lcp->lc_size_chg);
+	if (lcp->lc_size_chg) {
+		p = xdr_reserve_space(xdr, 8);
+		if (!p)
+			return nfserr_resource;
+		p = xdr_encode_hyper(p, lcp->lc_newsize);
+	}
+
+	return nfs_ok;
+}
+
+static __be32
+nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
+		struct nfsd4_layoutreturn *lrp)
+{
+	struct xdr_stream *xdr = &resp->xdr;
+	__be32 *p;
+
+	if (nfserr)
+		return nfserr;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return nfserr_resource;
+	*p++ = cpu_to_be32(lrp->lrs_present);
+	if (lrp->lrs_present)
+		nfsd4_encode_stateid(xdr, &lrp->lr_sid);
+	return nfs_ok;
+}
+#endif /* CONFIG_NFSD_PNFS */
+
 static __be32
 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_seek *seek)
@@ -3900,11 +4204,19 @@ static nfsd4_enc nfsd4_enc_ops[] = {
 	[OP_DESTROY_SESSION]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_FREE_STATEID]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_GET_DIR_DELEGATION]	= (nfsd4_enc)nfsd4_encode_noop,
+#ifdef CONFIG_NFSD_PNFS
+	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_getdeviceinfo,
+	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_layoutcommit,
+	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_layoutget,
+	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_layoutreturn,
+#else
 	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_LAYOUTCOMMIT]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_LAYOUTGET]		= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_LAYOUTRETURN]	= (nfsd4_enc)nfsd4_encode_noop,
+#endif
 	[OP_SECINFO_NO_NAME]	= (nfsd4_enc)nfsd4_encode_secinfo_no_name,
 	[OP_SEQUENCE]		= (nfsd4_enc)nfsd4_encode_sequence,
 	[OP_SET_SSV]		= (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 19ace74d35f6..aa47d75ddb26 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -21,6 +21,7 @@
 #include "cache.h"
 #include "state.h"
 #include "netns.h"
+#include "pnfs.h"
 
 /*
  *	We have a single directory with several nodes in it.
@@ -1258,9 +1259,12 @@ static int __init init_nfsd(void)
 	retval = nfsd4_init_slabs();
 	if (retval)
 		goto out_unregister_pernet;
-	retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
+	retval = nfsd4_init_pnfs();
 	if (retval)
 		goto out_free_slabs;
+	retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
+	if (retval)
+		goto out_exit_pnfs;
 	nfsd_stat_init();	/* Statistics */
 	retval = nfsd_reply_cache_init();
 	if (retval)
@@ -1282,6 +1286,8 @@ out_free_lockd:
 out_free_stat:
 	nfsd_stat_shutdown();
 	nfsd_fault_inject_cleanup();
+out_exit_pnfs:
+	nfsd4_exit_pnfs();
 out_free_slabs:
 	nfsd4_free_slabs();
 out_unregister_pernet:
@@ -1299,6 +1305,7 @@ static void __exit exit_nfsd(void)
 	nfsd_stat_shutdown();
 	nfsd_lockd_shutdown();
 	nfsd4_free_slabs();
+	nfsd4_exit_pnfs();
 	nfsd_fault_inject_cleanup();
 	unregister_filesystem(&nfsd_fs_type);
 	unregister_pernet_subsys(&nfsd_net_ops);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 33a46a8dfaf7..565c4da1a9eb 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -325,15 +325,27 @@ void		nfsd_lockd_shutdown(void);
 
 #define NFSD4_SUPPORTED_ATTRS_WORD2 0
 
+/* 4.1 */
+#ifdef CONFIG_NFSD_PNFS
+#define PNFSD_SUPPORTED_ATTRS_WORD1	FATTR4_WORD1_FS_LAYOUT_TYPES
+#define PNFSD_SUPPORTED_ATTRS_WORD2 \
+(FATTR4_WORD2_LAYOUT_BLKSIZE	| FATTR4_WORD2_LAYOUT_TYPES)
+#else
+#define PNFSD_SUPPORTED_ATTRS_WORD1	0
+#define PNFSD_SUPPORTED_ATTRS_WORD2	0
+#endif /* CONFIG_NFSD_PNFS */
+
 #define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
 	NFSD4_SUPPORTED_ATTRS_WORD0
 
 #define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
-	NFSD4_SUPPORTED_ATTRS_WORD1
+	(NFSD4_SUPPORTED_ATTRS_WORD1	| PNFSD_SUPPORTED_ATTRS_WORD1)
 
 #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
-	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
+	(NFSD4_SUPPORTED_ATTRS_WORD2	| PNFSD_SUPPORTED_ATTRS_WORD2 | \
+	 FATTR4_WORD2_SUPPATTR_EXCLCREAT)
 
+/* 4.2 */
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
 #define NFSD4_2_SECURITY_ATTRS		FATTR4_WORD2_SECURITY_LABEL
 #else
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
new file mode 100644
index 000000000000..a9616a4e13cd
--- /dev/null
+++ b/fs/nfsd/pnfs.h
@@ -0,0 +1,80 @@
+#ifndef _FS_NFSD_PNFS_H
+#define _FS_NFSD_PNFS_H 1
+
+#include <linux/exportfs.h>
+#include <linux/nfsd/export.h>
+
+#include "state.h"
+#include "xdr4.h"
+
+struct xdr_stream;
+
+struct nfsd4_deviceid_map {
+	struct list_head	hash;
+	u64			idx;
+	int			fsid_type;
+	u32			fsid[];
+};
+
+struct nfsd4_layout_ops {
+	u32		notify_types;
+
+	__be32 (*proc_getdeviceinfo)(struct super_block *sb,
+			struct nfsd4_getdeviceinfo *gdevp);
+	__be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
+			struct nfsd4_getdeviceinfo *gdevp);
+
+	__be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
+			struct nfsd4_layoutget *lgp);
+	__be32 (*encode_layoutget)(struct xdr_stream *,
+			struct nfsd4_layoutget *lgp);
+
+	__be32 (*proc_layoutcommit)(struct inode *inode,
+			struct nfsd4_layoutcommit *lcp);
+};
+
+extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
+
+__be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate, stateid_t *stateid,
+		bool create, u32 layout_type, struct nfs4_layout_stateid **lsp);
+__be32 nfsd4_insert_layout(struct nfsd4_layoutget *lgp,
+		struct nfs4_layout_stateid *ls);
+__be32 nfsd4_return_file_layouts(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		struct nfsd4_layoutreturn *lrp);
+__be32 nfsd4_return_client_layouts(struct svc_rqst *rqstp,
+		struct nfsd4_compound_state *cstate,
+		struct nfsd4_layoutreturn *lrp);
+int nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp,
+		u32 device_generation);
+struct nfsd4_deviceid_map *nfsd4_find_devid_map(int idx);
+
+#ifdef CONFIG_NFSD_PNFS
+void nfsd4_setup_layout_type(struct svc_export *exp);
+void nfsd4_return_all_client_layouts(struct nfs4_client *);
+void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
+		struct nfs4_file *fp);
+int nfsd4_init_pnfs(void);
+void nfsd4_exit_pnfs(void);
+#else
+static inline void nfsd4_setup_layout_type(struct svc_export *exp)
+{
+}
+
+static inline void nfsd4_return_all_client_layouts(struct nfs4_client *clp)
+{
+}
+static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
+		struct nfs4_file *fp)
+{
+}
+static inline void nfsd4_exit_pnfs(void)
+{
+}
+static inline int nfsd4_init_pnfs(void)
+{
+	return 0;
+}
+#endif /* CONFIG_NFSD_PNFS */
+#endif /* _FS_NFSD_PNFS_H */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 38ebb1268b59..5f66b7fd0297 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -92,6 +92,7 @@ struct nfs4_stid {
 /* For a deleg stateid kept around only to process free_stateid's: */
 #define NFS4_REVOKED_DELEG_STID 16
 #define NFS4_CLOSED_DELEG_STID 32
+#define NFS4_LAYOUT_STID 64
 	unsigned char sc_type;
 	stateid_t sc_stateid;
 	struct nfs4_client *sc_client;
@@ -297,6 +298,9 @@ struct nfs4_client {
 	struct list_head	cl_delegations;
 	struct list_head	cl_revoked;	/* unacknowledged, revoked 4.1 state */
 	struct list_head        cl_lru;         /* tail queue */
+#ifdef CONFIG_NFSD_PNFS
+	struct list_head	cl_lo_states;	/* outstanding layout states */
+#endif
 	struct xdr_netobj	cl_name; 	/* id generated by client */
 	nfs4_verifier		cl_verifier; 	/* generated by client */
 	time_t                  cl_time;        /* time of last lease renewal */
@@ -496,6 +500,9 @@ struct nfs4_file {
 	int			fi_delegees;
 	struct knfsd_fh		fi_fhandle;
 	bool			fi_had_conflict;
+#ifdef CONFIG_NFSD_PNFS
+	struct list_head	fi_lo_states;
+#endif
 };
 
 /*
@@ -528,6 +535,20 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
 	return container_of(s, struct nfs4_ol_stateid, st_stid);
 }
 
+struct nfs4_layout_stateid {
+	struct nfs4_stid		ls_stid;
+	struct list_head		ls_perclnt;
+	struct list_head		ls_perfile;
+	spinlock_t			ls_lock;
+	struct list_head		ls_layouts;
+	u32				ls_layout_type;
+};
+
+static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
+{
+	return container_of(s, struct nfs4_layout_stateid, ls_stid);
+}
+
 /* flags for preprocess_seqid_op() */
 #define RD_STATE	        0x00000010
 #define WR_STATE	        0x00000020
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 90a5925bd6ab..0bda93e58e1b 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -428,6 +428,61 @@ struct nfsd4_reclaim_complete {
 	u32 rca_one_fs;
 };
 
+struct nfsd4_deviceid {
+	u64			fsid_idx;
+	u32			generation;
+	u32			pad;
+};
+
+struct nfsd4_layout_seg {
+	u32			iomode;
+	u64			offset;
+	u64			length;
+};
+
+struct nfsd4_getdeviceinfo {
+	struct nfsd4_deviceid	gd_devid;	/* request */
+	u32			gd_layout_type;	/* request */
+	u32			gd_maxcount;	/* request */
+	u32			gd_notify_types;/* request - response */
+	void			*gd_device;	/* response */
+};
+
+struct nfsd4_layoutget {
+	u64			lg_minlength;	/* request */
+	u32			lg_signal;	/* request */
+	u32			lg_layout_type;	/* request */
+	u32			lg_maxcount;	/* request */
+	stateid_t		lg_sid;		/* request/response */
+	struct nfsd4_layout_seg	lg_seg;		/* request/response */
+	void			*lg_content;	/* response */
+};
+
+struct nfsd4_layoutcommit {
+	stateid_t		lc_sid;		/* request */
+	struct nfsd4_layout_seg	lc_seg;		/* request */
+	u32			lc_reclaim;	/* request */
+	u32			lc_newoffset;	/* request */
+	u64			lc_last_wr;	/* request */
+	struct timespec		lc_mtime;	/* request */
+	u32			lc_layout_type;	/* request */
+	u32			lc_up_len;	/* layout length */
+	void			*lc_up_layout;	/* decoded by callback */
+	u32			lc_size_chg;	/* boolean for response */
+	u64			lc_newsize;	/* response */
+};
+
+struct nfsd4_layoutreturn {
+	u32			lr_return_type;	/* request */
+	u32			lr_layout_type;	/* request */
+	struct nfsd4_layout_seg	lr_seg;		/* request */
+	u32			lr_reclaim;	/* request */
+	u32			lrf_body_len;	/* request */
+	void			*lrf_body;	/* request */
+	stateid_t		lr_sid;		/* request/response */
+	u32			lrs_present;	/* response */
+};
+
 struct nfsd4_fallocate {
 	/* request */
 	stateid_t	falloc_stateid;
@@ -491,6 +546,10 @@ struct nfsd4_op {
 		struct nfsd4_reclaim_complete	reclaim_complete;
 		struct nfsd4_test_stateid	test_stateid;
 		struct nfsd4_free_stateid	free_stateid;
+		struct nfsd4_getdeviceinfo	getdeviceinfo;
+		struct nfsd4_layoutget		layoutget;
+		struct nfsd4_layoutcommit	layoutcommit;
+		struct nfsd4_layoutreturn	layoutreturn;
 
 		/* NFSv4.2 */
 		struct nfsd4_fallocate		allocate;
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 8a3589c2542c..bc10d687f2ce 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -411,6 +411,7 @@ enum lock_type4 {
 #define FATTR4_WORD1_TIME_MODIFY_SET    (1UL << 22)
 #define FATTR4_WORD1_MOUNTED_ON_FILEID  (1UL << 23)
 #define FATTR4_WORD1_FS_LAYOUT_TYPES    (1UL << 30)
+#define FATTR4_WORD2_LAYOUT_TYPES       (1UL << 0)
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
diff --git a/include/uapi/linux/nfsd/debug.h b/include/uapi/linux/nfsd/debug.h
index 1fdc95bb2375..0bf130a1c58d 100644
--- a/include/uapi/linux/nfsd/debug.h
+++ b/include/uapi/linux/nfsd/debug.h
@@ -32,6 +32,7 @@
 #define NFSDDBG_REPCACHE	0x0080
 #define NFSDDBG_XDR		0x0100
 #define NFSDDBG_LOCKD		0x0200
+#define NFSDDBG_PNFS		0x0400
 #define NFSDDBG_ALL		0x7FFF
 #define NFSDDBG_NOCHANGE	0xFFFF
 
diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h
index 584b6ef3a5e8..4742f2cb42f2 100644
--- a/include/uapi/linux/nfsd/export.h
+++ b/include/uapi/linux/nfsd/export.h
@@ -47,8 +47,10 @@
  * exported filesystem.
  */
 #define	NFSEXP_V4ROOT		0x10000
+#define NFSEXP_NOPNFS		0x20000
+
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
-#define NFSEXP_ALLFLAGS		0x1FE7F
+#define NFSEXP_ALLFLAGS		0x3FE7F
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
-- 
cgit v1.2.3-71-gd317


From b7396a38fb28db4ebbbf35da1057eb5206b4ad6c Mon Sep 17 00:00:00 2001
From: Chunyan Zhang <chunyan.zhang@spreadtrum.com>
Date: Wed, 28 Jan 2015 19:08:44 +0800
Subject: tty/serial: Add Spreadtrum sc9836-uart driver support

Add a full sc9836-uart driver for SC9836 SoC which is based on the
spreadtrum sharkl64 platform.
This driver also support earlycon.

Originally-by: Lanqing Liu <lanqing.liu@spreadtrum.com>
Signed-off-by: Orson Zhai <orson.zhai@spreadtrum.com>
Signed-off-by: Chunyan Zhang <chunyan.zhang@spreadtrum.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig       |  18 +
 drivers/tty/serial/Makefile      |   1 +
 drivers/tty/serial/sprd_serial.c | 793 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/serial_core.h |   3 +
 4 files changed, 815 insertions(+)
 create mode 100644 drivers/tty/serial/sprd_serial.c

(limited to 'include/uapi/linux')

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index c63317e87c33..ddcc0a4c659c 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1583,6 +1583,24 @@ config SERIAL_MEN_Z135
 	  This driver can also be build as a module. If so, the module will be called
 	  men_z135_uart.ko
 
+config SERIAL_SPRD
+	tristate "Support for Spreadtrum serial"
+	depends on ARCH_SPRD
+	select SERIAL_CORE
+	help
+	  This enables the driver for the Spreadtrum's serial.
+
+config SERIAL_SPRD_CONSOLE
+	bool "Spreadtrum UART console support"
+	depends on SERIAL_SPRD=y
+	select SERIAL_CORE_CONSOLE
+	select SERIAL_EARLYCON
+	help
+	  Support for early debug console using Spreadtrum's serial. This enables
+	  the console before standard serial driver is probed. This is enabled
+	  with "earlycon" on the kernel command line. The console is
+	  enabled when early_param is processed.
+
 endmenu
 
 config SERIAL_MCTRL_GPIO
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index a1c1085ef75c..431879003ccd 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -93,6 +93,7 @@ obj-$(CONFIG_SERIAL_RP2)	+= rp2.o
 obj-$(CONFIG_SERIAL_FSL_LPUART)	+= fsl_lpuart.o
 obj-$(CONFIG_SERIAL_CONEXANT_DIGICOLOR)	+= digicolor-usart.o
 obj-$(CONFIG_SERIAL_MEN_Z135)	+= men_z135_uart.o
+obj-$(CONFIG_SERIAL_SPRD) += sprd_serial.o
 
 # GPIOLIB helpers for modem control lines
 obj-$(CONFIG_SERIAL_MCTRL_GPIO)	+= serial_mctrl_gpio.o
diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c
new file mode 100644
index 000000000000..594b63331ef4
--- /dev/null
+++ b/drivers/tty/serial/sprd_serial.c
@@ -0,0 +1,793 @@
+/*
+ * Copyright (C) 2012-2015 Spreadtrum Communications Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#if defined(CONFIG_SERIAL_SPRD_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+#define SUPPORT_SYSRQ
+#endif
+
+#include <linux/clk.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/serial_core.h>
+#include <linux/serial.h>
+#include <linux/slab.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+
+/* device name */
+#define UART_NR_MAX		8
+#define SPRD_TTY_NAME		"ttyS"
+#define SPRD_FIFO_SIZE		128
+#define SPRD_DEF_RATE		26000000
+#define SPRD_BAUD_IO_LIMIT	3000000
+#define SPRD_TIMEOUT		256
+
+/* the offset of serial registers and BITs for them */
+/* data registers */
+#define SPRD_TXD		0x0000
+#define SPRD_RXD		0x0004
+
+/* line status register and its BITs  */
+#define SPRD_LSR		0x0008
+#define SPRD_LSR_OE		BIT(4)
+#define SPRD_LSR_FE		BIT(3)
+#define SPRD_LSR_PE		BIT(2)
+#define SPRD_LSR_BI		BIT(7)
+#define SPRD_LSR_TX_OVER	BIT(15)
+
+/* data number in TX and RX fifo */
+#define SPRD_STS1		0x000C
+
+/* interrupt enable register and its BITs */
+#define SPRD_IEN		0x0010
+#define SPRD_IEN_RX_FULL	BIT(0)
+#define SPRD_IEN_TX_EMPTY	BIT(1)
+#define SPRD_IEN_BREAK_DETECT	BIT(7)
+#define SPRD_IEN_TIMEOUT	BIT(13)
+
+/* interrupt clear register */
+#define SPRD_ICLR		0x0014
+
+/* line control register */
+#define SPRD_LCR		0x0018
+#define SPRD_LCR_STOP_1BIT	0x10
+#define SPRD_LCR_STOP_2BIT	0x30
+#define SPRD_LCR_DATA_LEN	(BIT(2) | BIT(3))
+#define SPRD_LCR_DATA_LEN5	0x0
+#define SPRD_LCR_DATA_LEN6	0x4
+#define SPRD_LCR_DATA_LEN7	0x8
+#define SPRD_LCR_DATA_LEN8	0xc
+#define SPRD_LCR_PARITY	(BIT(0) | BIT(1))
+#define SPRD_LCR_PARITY_EN	0x2
+#define SPRD_LCR_EVEN_PAR	0x0
+#define SPRD_LCR_ODD_PAR	0x1
+
+/* control register 1 */
+#define SPRD_CTL1			0x001C
+#define RX_HW_FLOW_CTL_THLD	BIT(6)
+#define RX_HW_FLOW_CTL_EN	BIT(7)
+#define TX_HW_FLOW_CTL_EN	BIT(8)
+#define RX_TOUT_THLD_DEF	0x3E00
+#define RX_HFC_THLD_DEF	0x40
+
+/* fifo threshold register */
+#define SPRD_CTL2		0x0020
+#define THLD_TX_EMPTY	0x40
+#define THLD_RX_FULL	0x40
+
+/* config baud rate register */
+#define SPRD_CLKD0		0x0024
+#define SPRD_CLKD1		0x0028
+
+/* interrupt mask status register */
+#define SPRD_IMSR			0x002C
+#define SPRD_IMSR_RX_FIFO_FULL		BIT(0)
+#define SPRD_IMSR_TX_FIFO_EMPTY	BIT(1)
+#define SPRD_IMSR_BREAK_DETECT		BIT(7)
+#define SPRD_IMSR_TIMEOUT		BIT(13)
+
+struct reg_backup {
+	u32 ien;
+	u32 ctrl0;
+	u32 ctrl1;
+	u32 ctrl2;
+	u32 clkd0;
+	u32 clkd1;
+	u32 dspwait;
+};
+
+struct sprd_uart_port {
+	struct uart_port port;
+	struct reg_backup reg_bak;
+	char name[16];
+};
+
+static struct sprd_uart_port *sprd_port[UART_NR_MAX];
+static int sprd_ports_num;
+
+static inline unsigned int serial_in(struct uart_port *port, int offset)
+{
+	return readl_relaxed(port->membase + offset);
+}
+
+static inline void serial_out(struct uart_port *port, int offset, int value)
+{
+	writel_relaxed(value, port->membase + offset);
+}
+
+static unsigned int sprd_tx_empty(struct uart_port *port)
+{
+	if (serial_in(port, SPRD_STS1) & 0xff00)
+		return 0;
+	else
+		return TIOCSER_TEMT;
+}
+
+static unsigned int sprd_get_mctrl(struct uart_port *port)
+{
+	return TIOCM_DSR | TIOCM_CTS;
+}
+
+static void sprd_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	/* nothing to do */
+}
+
+static void sprd_stop_tx(struct uart_port *port)
+{
+	unsigned int ien, iclr;
+
+	iclr = serial_in(port, SPRD_ICLR);
+	ien = serial_in(port, SPRD_IEN);
+
+	iclr |= SPRD_IEN_TX_EMPTY;
+	ien &= ~SPRD_IEN_TX_EMPTY;
+
+	serial_out(port, SPRD_ICLR, iclr);
+	serial_out(port, SPRD_IEN, ien);
+}
+
+static void sprd_start_tx(struct uart_port *port)
+{
+	unsigned int ien;
+
+	ien = serial_in(port, SPRD_IEN);
+	if (!(ien & SPRD_IEN_TX_EMPTY)) {
+		ien |= SPRD_IEN_TX_EMPTY;
+		serial_out(port, SPRD_IEN, ien);
+	}
+}
+
+static void sprd_stop_rx(struct uart_port *port)
+{
+	unsigned int ien, iclr;
+
+	iclr = serial_in(port, SPRD_ICLR);
+	ien = serial_in(port, SPRD_IEN);
+
+	ien &= ~(SPRD_IEN_RX_FULL | SPRD_IEN_BREAK_DETECT);
+	iclr |= SPRD_IEN_RX_FULL | SPRD_IEN_BREAK_DETECT;
+
+	serial_out(port, SPRD_IEN, ien);
+	serial_out(port, SPRD_ICLR, iclr);
+}
+
+/* The Sprd serial does not support this function. */
+static void sprd_break_ctl(struct uart_port *port, int break_state)
+{
+	/* nothing to do */
+}
+
+static int handle_lsr_errors(struct uart_port *port,
+			     unsigned int *flag,
+			     unsigned int *lsr)
+{
+	int ret = 0;
+
+	/* statistics */
+	if (*lsr & SPRD_LSR_BI) {
+		*lsr &= ~(SPRD_LSR_FE | SPRD_LSR_PE);
+		port->icount.brk++;
+		ret = uart_handle_break(port);
+		if (ret)
+			return ret;
+	} else if (*lsr & SPRD_LSR_PE)
+		port->icount.parity++;
+	else if (*lsr & SPRD_LSR_FE)
+		port->icount.frame++;
+	if (*lsr & SPRD_LSR_OE)
+		port->icount.overrun++;
+
+	/* mask off conditions which should be ignored */
+	*lsr &= port->read_status_mask;
+	if (*lsr & SPRD_LSR_BI)
+		*flag = TTY_BREAK;
+	else if (*lsr & SPRD_LSR_PE)
+		*flag = TTY_PARITY;
+	else if (*lsr & SPRD_LSR_FE)
+		*flag = TTY_FRAME;
+
+	return ret;
+}
+
+static inline void sprd_rx(struct uart_port *port)
+{
+	struct tty_port *tty = &port->state->port;
+	unsigned int ch, flag, lsr, max_count = SPRD_TIMEOUT;
+
+	while ((serial_in(port, SPRD_STS1) & 0x00ff) && max_count--) {
+		lsr = serial_in(port, SPRD_LSR);
+		ch = serial_in(port, SPRD_RXD);
+		flag = TTY_NORMAL;
+		port->icount.rx++;
+
+		if (lsr & (SPRD_LSR_BI | SPRD_LSR_PE |
+			SPRD_LSR_FE | SPRD_LSR_OE))
+			if (handle_lsr_errors(port, &lsr, &flag))
+				continue;
+		if (uart_handle_sysrq_char(port, ch))
+			continue;
+
+		uart_insert_char(port, lsr, SPRD_LSR_OE, ch, flag);
+	}
+
+	tty_flip_buffer_push(tty);
+}
+
+static inline void sprd_tx(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->state->xmit;
+	int count;
+
+	if (port->x_char) {
+		serial_out(port, SPRD_TXD, port->x_char);
+		port->icount.tx++;
+		port->x_char = 0;
+		return;
+	}
+
+	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
+		sprd_stop_tx(port);
+		return;
+	}
+
+	count = THLD_TX_EMPTY;
+	do {
+		serial_out(port, SPRD_TXD, xmit->buf[xmit->tail]);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+		if (uart_circ_empty(xmit))
+			break;
+	} while (--count > 0);
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	if (uart_circ_empty(xmit))
+		sprd_stop_tx(port);
+}
+
+/* this handles the interrupt from one port */
+static irqreturn_t sprd_handle_irq(int irq, void *dev_id)
+{
+	struct uart_port *port = dev_id;
+	unsigned int ims;
+
+	spin_lock(&port->lock);
+
+	ims = serial_in(port, SPRD_IMSR);
+
+	if (!ims)
+		return IRQ_NONE;
+
+	serial_out(port, SPRD_ICLR, ~0);
+
+	if (ims & (SPRD_IMSR_RX_FIFO_FULL |
+		SPRD_IMSR_BREAK_DETECT | SPRD_IMSR_TIMEOUT))
+		sprd_rx(port);
+
+	if (ims & SPRD_IMSR_TX_FIFO_EMPTY)
+		sprd_tx(port);
+
+	spin_unlock(&port->lock);
+
+	return IRQ_HANDLED;
+}
+
+static int sprd_startup(struct uart_port *port)
+{
+	int ret = 0;
+	unsigned int ien, fc;
+	unsigned int timeout;
+	struct sprd_uart_port *sp;
+	unsigned long flags;
+
+	serial_out(port, SPRD_CTL2, ((THLD_TX_EMPTY << 8) | THLD_RX_FULL));
+
+	/* clear rx fifo */
+	timeout = SPRD_TIMEOUT;
+	while (timeout-- && serial_in(port, SPRD_STS1) & 0x00ff)
+		serial_in(port, SPRD_RXD);
+
+	/* clear tx fifo */
+	timeout = SPRD_TIMEOUT;
+	while (timeout-- && serial_in(port, SPRD_STS1) & 0xff00)
+		cpu_relax();
+
+	/* clear interrupt */
+	serial_out(port, SPRD_IEN, 0);
+	serial_out(port, SPRD_ICLR, ~0);
+
+	/* allocate irq */
+	sp = container_of(port, struct sprd_uart_port, port);
+	snprintf(sp->name, sizeof(sp->name), "sprd_serial%d", port->line);
+	ret = devm_request_irq(port->dev, port->irq, sprd_handle_irq,
+				IRQF_SHARED, sp->name, port);
+	if (ret) {
+		dev_err(port->dev, "fail to request serial irq %d, ret=%d\n",
+			port->irq, ret);
+		return ret;
+	}
+	fc = serial_in(port, SPRD_CTL1);
+	fc |= RX_TOUT_THLD_DEF | RX_HFC_THLD_DEF;
+	serial_out(port, SPRD_CTL1, fc);
+
+	/* enable interrupt */
+	spin_lock_irqsave(&port->lock, flags);
+	ien = serial_in(port, SPRD_IEN);
+	ien |= SPRD_IEN_RX_FULL | SPRD_IEN_BREAK_DETECT | SPRD_IEN_TIMEOUT;
+	serial_out(port, SPRD_IEN, ien);
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return 0;
+}
+
+static void sprd_shutdown(struct uart_port *port)
+{
+	serial_out(port, SPRD_IEN, 0);
+	serial_out(port, SPRD_ICLR, ~0);
+	devm_free_irq(port->dev, port->irq, port);
+}
+
+static void sprd_set_termios(struct uart_port *port,
+				    struct ktermios *termios,
+				    struct ktermios *old)
+{
+	unsigned int baud, quot;
+	unsigned int lcr = 0, fc;
+	unsigned long flags;
+
+	/* ask the core to calculate the divisor for us */
+	baud = uart_get_baud_rate(port, termios, old, 0, SPRD_BAUD_IO_LIMIT);
+
+	quot = (unsigned int)((port->uartclk + baud / 2) / baud);
+
+	/* set data length */
+	switch (termios->c_cflag & CSIZE) {
+	case CS5:
+		lcr |= SPRD_LCR_DATA_LEN5;
+		break;
+	case CS6:
+		lcr |= SPRD_LCR_DATA_LEN6;
+		break;
+	case CS7:
+		lcr |= SPRD_LCR_DATA_LEN7;
+		break;
+	case CS8:
+	default:
+		lcr |= SPRD_LCR_DATA_LEN8;
+		break;
+	}
+
+	/* calculate stop bits */
+	lcr &= ~(SPRD_LCR_STOP_1BIT | SPRD_LCR_STOP_2BIT);
+	if (termios->c_cflag & CSTOPB)
+		lcr |= SPRD_LCR_STOP_2BIT;
+	else
+		lcr |= SPRD_LCR_STOP_1BIT;
+
+	/* calculate parity */
+	lcr &= ~SPRD_LCR_PARITY;
+	termios->c_cflag &= ~CMSPAR;	/* no support mark/space */
+	if (termios->c_cflag & PARENB) {
+		lcr |= SPRD_LCR_PARITY_EN;
+		if (termios->c_cflag & PARODD)
+			lcr |= SPRD_LCR_ODD_PAR;
+		else
+			lcr |= SPRD_LCR_EVEN_PAR;
+	}
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* update the per-port timeout */
+	uart_update_timeout(port, termios->c_cflag, baud);
+
+	port->read_status_mask = SPRD_LSR_OE;
+	if (termios->c_iflag & INPCK)
+		port->read_status_mask |= SPRD_LSR_FE | SPRD_LSR_PE;
+	if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK))
+		port->read_status_mask |= SPRD_LSR_BI;
+
+	/* characters to ignore */
+	port->ignore_status_mask = 0;
+	if (termios->c_iflag & IGNPAR)
+		port->ignore_status_mask |= SPRD_LSR_PE | SPRD_LSR_FE;
+	if (termios->c_iflag & IGNBRK) {
+		port->ignore_status_mask |= SPRD_LSR_BI;
+		/*
+		 * If we're ignoring parity and break indicators,
+		 * ignore overruns too (for real raw support).
+		 */
+		if (termios->c_iflag & IGNPAR)
+			port->ignore_status_mask |= SPRD_LSR_OE;
+	}
+
+	/* flow control */
+	fc = serial_in(port, SPRD_CTL1);
+	fc &= ~(RX_HW_FLOW_CTL_THLD | RX_HW_FLOW_CTL_EN | TX_HW_FLOW_CTL_EN);
+	if (termios->c_cflag & CRTSCTS) {
+		fc |= RX_HW_FLOW_CTL_THLD;
+		fc |= RX_HW_FLOW_CTL_EN;
+		fc |= TX_HW_FLOW_CTL_EN;
+	}
+
+	/* clock divider bit0~bit15 */
+	serial_out(port, SPRD_CLKD0, quot & 0xffff);
+
+	/* clock divider bit16~bit20 */
+	serial_out(port, SPRD_CLKD1, (quot & 0x1f0000) >> 16);
+	serial_out(port, SPRD_LCR, lcr);
+	fc |= RX_TOUT_THLD_DEF | RX_HFC_THLD_DEF;
+	serial_out(port, SPRD_CTL1, fc);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	/* Don't rewrite B0 */
+	if (tty_termios_baud_rate(termios))
+		tty_termios_encode_baud_rate(termios, baud, baud);
+}
+
+static const char *sprd_type(struct uart_port *port)
+{
+	return "SPX";
+}
+
+static void sprd_release_port(struct uart_port *port)
+{
+	/* nothing to do */
+}
+
+static int sprd_request_port(struct uart_port *port)
+{
+	return 0;
+}
+
+static void sprd_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE)
+		port->type = PORT_SPRD;
+}
+
+static int sprd_verify_port(struct uart_port *port,
+				   struct serial_struct *ser)
+{
+	if (ser->type != PORT_SPRD)
+		return -EINVAL;
+	if (port->irq != ser->irq)
+		return -EINVAL;
+	return 0;
+}
+
+static struct uart_ops serial_sprd_ops = {
+	.tx_empty = sprd_tx_empty,
+	.get_mctrl = sprd_get_mctrl,
+	.set_mctrl = sprd_set_mctrl,
+	.stop_tx = sprd_stop_tx,
+	.start_tx = sprd_start_tx,
+	.stop_rx = sprd_stop_rx,
+	.break_ctl = sprd_break_ctl,
+	.startup = sprd_startup,
+	.shutdown = sprd_shutdown,
+	.set_termios = sprd_set_termios,
+	.type = sprd_type,
+	.release_port = sprd_release_port,
+	.request_port = sprd_request_port,
+	.config_port = sprd_config_port,
+	.verify_port = sprd_verify_port,
+};
+
+#ifdef CONFIG_SERIAL_SPRD_CONSOLE
+static inline void wait_for_xmitr(struct uart_port *port)
+{
+	unsigned int status, tmout = 10000;
+
+	/* wait up to 10ms for the character(s) to be sent */
+	do {
+		status = serial_in(port, SPRD_STS1);
+		if (--tmout == 0)
+			break;
+		udelay(1);
+	} while (status & 0xff00);
+}
+
+static void sprd_console_putchar(struct uart_port *port, int ch)
+{
+	wait_for_xmitr(port);
+	serial_out(port, SPRD_TXD, ch);
+}
+
+static void sprd_console_write(struct console *co, const char *s,
+				      unsigned int count)
+{
+	struct uart_port *port = &sprd_port[co->index]->port;
+	int locked = 1;
+	unsigned long flags;
+
+	if (port->sysrq)
+		locked = 0;
+	else if (oops_in_progress)
+		locked = spin_trylock_irqsave(&port->lock, flags);
+	else
+		spin_lock_irqsave(&port->lock, flags);
+
+	uart_console_write(port, s, count, sprd_console_putchar);
+
+	/* wait for transmitter to become empty */
+	wait_for_xmitr(port);
+
+	if (locked)
+		spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static int __init sprd_console_setup(struct console *co, char *options)
+{
+	struct uart_port *port;
+	int baud = 115200;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (co->index >= UART_NR_MAX || co->index < 0)
+		co->index = 0;
+
+	port = &sprd_port[co->index]->port;
+	if (port == NULL) {
+		pr_info("serial port %d not yet initialized\n", co->index);
+		return -ENODEV;
+	}
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct uart_driver sprd_uart_driver;
+static struct console sprd_console = {
+	.name = SPRD_TTY_NAME,
+	.write = sprd_console_write,
+	.device = uart_console_device,
+	.setup = sprd_console_setup,
+	.flags = CON_PRINTBUFFER,
+	.index = -1,
+	.data = &sprd_uart_driver,
+};
+
+#define SPRD_CONSOLE	(&sprd_console)
+
+/* Support for earlycon */
+static void sprd_putc(struct uart_port *port, int c)
+{
+	unsigned int timeout = SPRD_TIMEOUT;
+
+	while (timeout-- &&
+		   !(readl(port->membase + SPRD_LSR) & SPRD_LSR_TX_OVER))
+		cpu_relax();
+
+	writeb(c, port->membase + SPRD_TXD);
+}
+
+static void sprd_early_write(struct console *con, const char *s,
+				    unsigned n)
+{
+	struct earlycon_device *dev = con->data;
+
+	uart_console_write(&dev->port, s, n, sprd_putc);
+}
+
+static int __init sprd_early_console_setup(
+				struct earlycon_device *device,
+				const char *opt)
+{
+	if (!device->port.membase)
+		return -ENODEV;
+
+	device->con->write = sprd_early_write;
+	return 0;
+}
+
+EARLYCON_DECLARE(sprd_serial, sprd_early_console_setup);
+OF_EARLYCON_DECLARE(sprd_serial, "sprd,sc9836-uart",
+		    sprd_early_console_setup);
+
+#else /* !CONFIG_SERIAL_SPRD_CONSOLE */
+#define SPRD_CONSOLE		NULL
+#endif
+
+static struct uart_driver sprd_uart_driver = {
+	.owner = THIS_MODULE,
+	.driver_name = "sprd_serial",
+	.dev_name = SPRD_TTY_NAME,
+	.major = 0,
+	.minor = 0,
+	.nr = UART_NR_MAX,
+	.cons = SPRD_CONSOLE,
+};
+
+static int sprd_probe_dt_alias(int index, struct device *dev)
+{
+	struct device_node *np;
+	int ret = index;
+
+	if (!IS_ENABLED(CONFIG_OF))
+		return ret;
+
+	np = dev->of_node;
+	if (!np)
+		return ret;
+
+	ret = of_alias_get_id(np, "serial");
+	if (IS_ERR_VALUE(ret))
+		ret = index;
+	else if (ret >= ARRAY_SIZE(sprd_port) || sprd_port[ret] != NULL) {
+		dev_warn(dev, "requested serial port %d not available.\n", ret);
+		ret = index;
+	}
+
+	return ret;
+}
+
+static int sprd_remove(struct platform_device *dev)
+{
+	struct sprd_uart_port *sup = platform_get_drvdata(dev);
+
+	if (sup) {
+		uart_remove_one_port(&sprd_uart_driver, &sup->port);
+		sprd_port[sup->port.line] = NULL;
+		sprd_ports_num--;
+	}
+
+	if (!sprd_ports_num)
+		uart_unregister_driver(&sprd_uart_driver);
+
+	return 0;
+}
+
+static int sprd_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct uart_port *up;
+	struct clk *clk;
+	int irq;
+	int index;
+	int ret;
+
+	for (index = 0; index < ARRAY_SIZE(sprd_port); index++)
+		if (sprd_port[index] == NULL)
+			break;
+
+	if (index == ARRAY_SIZE(sprd_port))
+		return -EBUSY;
+
+	index = sprd_probe_dt_alias(index, &pdev->dev);
+
+	sprd_port[index] = devm_kzalloc(&pdev->dev,
+		sizeof(*sprd_port[index]), GFP_KERNEL);
+	if (!sprd_port[index])
+		return -ENOMEM;
+
+	up = &sprd_port[index]->port;
+	up->dev = &pdev->dev;
+	up->line = index;
+	up->type = PORT_SPRD;
+	up->iotype = SERIAL_IO_PORT;
+	up->uartclk = SPRD_DEF_RATE;
+	up->fifosize = SPRD_FIFO_SIZE;
+	up->ops = &serial_sprd_ops;
+	up->flags = UPF_BOOT_AUTOCONF;
+
+	clk = devm_clk_get(&pdev->dev, NULL);
+	if (!IS_ERR(clk))
+		up->uartclk = clk_get_rate(clk);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "not provide mem resource\n");
+		return -ENODEV;
+	}
+	up->mapbase = res->start;
+	up->membase = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(up->membase))
+		return PTR_ERR(up->membase);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "not provide irq resource\n");
+		return -ENODEV;
+	}
+	up->irq = irq;
+
+	if (!sprd_ports_num) {
+		ret = uart_register_driver(&sprd_uart_driver);
+		if (ret < 0) {
+			pr_err("Failed to register SPRD-UART driver\n");
+			return ret;
+		}
+	}
+	sprd_ports_num++;
+
+	ret = uart_add_one_port(&sprd_uart_driver, up);
+	if (ret) {
+		sprd_port[index] = NULL;
+		sprd_remove(pdev);
+	}
+
+	platform_set_drvdata(pdev, up);
+
+	return ret;
+}
+
+static int sprd_suspend(struct device *dev)
+{
+	struct sprd_uart_port *sup = dev_get_drvdata(dev);
+
+	uart_suspend_port(&sprd_uart_driver, &sup->port);
+
+	return 0;
+}
+
+static int sprd_resume(struct device *dev)
+{
+	struct sprd_uart_port *sup = dev_get_drvdata(dev);
+
+	uart_resume_port(&sprd_uart_driver, &sup->port);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(sprd_pm_ops, sprd_suspend, sprd_resume);
+
+static const struct of_device_id serial_ids[] = {
+	{.compatible = "sprd,sc9836-uart",},
+	{}
+};
+
+static struct platform_driver sprd_platform_driver = {
+	.probe		= sprd_probe,
+	.remove		= sprd_remove,
+	.driver		= {
+		.name	= "sprd_serial",
+		.of_match_table = of_match_ptr(serial_ids),
+		.pm	= &sprd_pm_ops,
+	},
+};
+
+module_platform_driver(sprd_platform_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Spreadtrum SoC serial driver series");
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index ee628c489e47..55da91e763bb 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -252,4 +252,7 @@
 /* Conexant Digicolor */
 #define PORT_DIGICOLOR	110
 
+/* SPRD SERIAL  */
+#define PORT_SPRD	111
+
 #endif /* _UAPILINUX_SERIAL_CORE_H */
-- 
cgit v1.2.3-71-gd317


From 447b27c4f29b510b98e99395120d635f009ed563 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:16 +0100
Subject: NFC: Forward NFC_EVT_TRANSACTION to user space

NFC_EVT_TRANSACTION is sent through netlink in order for a
specific application running on a secure element to notify
userspace of an event. Typically the secure element application
counterpart on the host could interpret that event and act
upon it.

Forwarded information contains:
- SE host generating the event
- Application IDentifier doing the operation
- Applications parameters

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nfc.h    | 27 +++++++++++++++++++++++++++
 include/uapi/linux/nfc.h |  1 +
 net/nfc/core.c           | 21 +++++++++++++++++++++
 net/nfc/netlink.c        | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 net/nfc/nfc.h            |  2 ++
 5 files changed, 98 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 12adb817c27a..73190e65d5c1 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -135,6 +135,31 @@ struct nfc_se {
 	u16 state;
 };
 
+/**
+ * nfc_evt_transaction - A struct for NFC secure element event transaction.
+ *
+ * @aid: The application identifier triggering the event
+ *
+ * @aid_len: The application identifier length [5:16]
+ *
+ * @params: The application parameters transmitted during the transaction
+ *
+ * @params_len: The applications parameters length [0:255]
+ *
+ */
+#define NFC_MIN_AID_LENGTH	5
+#define	NFC_MAX_AID_LENGTH	16
+#define NFC_MAX_PARAMS_LENGTH	255
+
+#define NFC_EVT_TRANSACTION_AID_TAG	0x81
+#define NFC_EVT_TRANSACTION_PARAMS_TAG	0x82
+struct nfc_evt_transaction {
+	u32 aid_len;
+	u8 aid[NFC_MAX_AID_LENGTH];
+	u8 params_len;
+	u8 params[NFC_MAX_PARAMS_LENGTH];
+} __packed;
+
 struct nfc_genl_data {
 	u32 poll_req_portid;
 	struct mutex genl_data_mutex;
@@ -262,6 +287,8 @@ int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb);
 
 void nfc_driver_failure(struct nfc_dev *dev, int err);
 
+int nfc_se_transaction(struct nfc_dev *dev, u8 se_idx,
+		       struct nfc_evt_transaction *evt_transaction);
 int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type);
 int nfc_remove_se(struct nfc_dev *dev, u32 se_idx);
 struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx);
diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index 8119255feae4..c1e2e63cf9b5 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -183,6 +183,7 @@ enum nfc_attrs {
 	NFC_ATTR_SE_APDU,
 	NFC_ATTR_TARGET_ISO15693_DSFID,
 	NFC_ATTR_TARGET_ISO15693_UID,
+	NFC_ATTR_SE_PARAMS,
 /* private: internal use only */
 	__NFC_ATTR_AFTER_LAST
 };
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 7f1b6351755c..cff3f1614ad4 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -932,6 +932,27 @@ int nfc_remove_se(struct nfc_dev *dev, u32 se_idx)
 }
 EXPORT_SYMBOL(nfc_remove_se);
 
+int nfc_se_transaction(struct nfc_dev *dev, u8 se_idx,
+		       struct nfc_evt_transaction *evt_transaction)
+{
+	int rc;
+
+	pr_debug("transaction: %x\n", se_idx);
+
+	device_lock(&dev->dev);
+
+	if (!evt_transaction) {
+		rc = -EPROTO;
+		goto out;
+	}
+
+	rc = nfc_genl_se_transaction(dev, se_idx, evt_transaction);
+out:
+	device_unlock(&dev->dev);
+	return rc;
+}
+EXPORT_SYMBOL(nfc_se_transaction);
+
 static void nfc_release(struct device *d)
 {
 	struct nfc_dev *dev = to_nfc_dev(d);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index be387e6219a0..14a2d11581da 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -497,6 +497,53 @@ free_msg:
 	return -EMSGSIZE;
 }
 
+int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx,
+			    struct nfc_evt_transaction *evt_transaction)
+{
+	struct nfc_se *se;
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
+			  NFC_EVENT_SE_TRANSACTION);
+	if (!hdr)
+		goto free_msg;
+
+	se = nfc_find_se(dev, se_idx);
+	if (!se)
+		goto free_msg;
+
+	if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
+	    nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) ||
+	    nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type) ||
+	    nla_put(msg, NFC_ATTR_SE_AID, evt_transaction->aid_len,
+		    evt_transaction->aid) ||
+	    nla_put(msg, NFC_ATTR_SE_PARAMS, evt_transaction->params_len,
+		    evt_transaction->params))
+		goto nla_put_failure;
+
+	/* evt_transaction is no more used */
+	devm_kfree(&dev->dev, evt_transaction);
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+free_msg:
+	/* evt_transaction is no more used */
+	devm_kfree(&dev->dev, evt_transaction);
+	nlmsg_free(msg);
+	return -EMSGSIZE;
+}
+
 static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
 				u32 portid, u32 seq,
 				struct netlink_callback *cb,
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 88d60064890e..a8ce80b47720 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -100,6 +100,8 @@ int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list);
 
 int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type);
 int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx);
+int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx,
+			    struct nfc_evt_transaction *evt_transaction);
 
 struct nfc_dev *nfc_get_device(unsigned int idx);
 
-- 
cgit v1.2.3-71-gd317


From 49ca0d8bfaf3bc46d5eef60ce67b00eb195bd392 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 30 Jan 2015 13:29:31 -0500
Subject: net-timestamp: no-payload option

Add timestamping option SOF_TIMESTAMPING_OPT_TSONLY. For transmit
timestamps, this loops timestamps on top of empty packets.

Doing so reduces the pressure on SO_RCVBUF. Payload inspection and
cmsg reception (aside from timestamps) are no longer possible. This
works together with a follow on patch that allows administrators to
only allow tx timestamping if it does not loop payload or metadata.

Signed-off-by: Willem de Bruijn <willemb@google.com>

----

Changes (rfc -> v1)
  - add documentation
  - remove unnecessary skb->len test (thanks to Richard Cochran)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/timestamping.txt | 21 +++++++++++++++++++++
 include/uapi/linux/net_tstamp.h           |  3 ++-
 net/core/skbuff.c                         | 19 ++++++++++++++-----
 net/ipv4/ip_sockglue.c                    |  7 ++++---
 net/ipv6/datagram.c                       |  5 ++---
 net/rxrpc/ar-error.c                      |  5 +++++
 6 files changed, 48 insertions(+), 12 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index a5c784c89312..5f0922613f1a 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -162,6 +162,27 @@ SOF_TIMESTAMPING_OPT_CMSG:
   option IP_PKTINFO simultaneously.
 
 
+SOF_TIMESTAMPING_OPT_TSONLY:
+
+  Applies to transmit timestamps only. Makes the kernel return the
+  timestamp as a cmsg alongside an empty packet, as opposed to
+  alongside the original packet. This reduces the amount of memory
+  charged to the socket's receive budget (SO_RCVBUF) and delivers
+  the timestamp even if sysctl net.core.tstamp_allow_data is 0.
+  This option disables SOF_TIMESTAMPING_OPT_CMSG.
+
+
+New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
+disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
+regardless of the setting of sysctl net.core.tstamp_allow_data.
+
+An exception is when a process needs additional cmsg data, for
+instance SOL_IP/IP_PKTINFO to detect the egress network interface.
+Then pass option SOF_TIMESTAMPING_OPT_CMSG. This option depends on
+having access to the contents of the original packet, so cannot be
+combined with SOF_TIMESTAMPING_OPT_TSONLY.
+
+
 1.4 Bytestream Timestamps
 
 The SO_TIMESTAMPING interface supports timestamping of bytes in a
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index edbc888ceb51..6d1abea9746e 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -24,8 +24,9 @@ enum {
 	SOF_TIMESTAMPING_TX_SCHED = (1<<8),
 	SOF_TIMESTAMPING_TX_ACK = (1<<9),
 	SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
+	SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
 
-	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG,
+	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY,
 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 				 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 56db472e9b86..65a3798f43e6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3710,19 +3710,28 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 		     struct sock *sk, int tstype)
 {
 	struct sk_buff *skb;
+	bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
 
 	if (!sk)
 		return;
 
-	if (hwtstamps)
-		*skb_hwtstamps(orig_skb) = *hwtstamps;
+	if (tsonly)
+		skb = alloc_skb(0, GFP_ATOMIC);
 	else
-		orig_skb->tstamp = ktime_get_real();
-
-	skb = skb_clone(orig_skb, GFP_ATOMIC);
+		skb = skb_clone(orig_skb, GFP_ATOMIC);
 	if (!skb)
 		return;
 
+	if (tsonly) {
+		skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags;
+		skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
+	}
+
+	if (hwtstamps)
+		*skb_hwtstamps(skb) = *hwtstamps;
+	else
+		skb->tstamp = ktime_get_real();
+
 	__skb_complete_tx_timestamp(skb, sk, tstype);
 }
 EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index db5e0f81ce0a..31d8c71986b4 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -483,7 +483,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	serr = SKB_EXT_ERR(skb);
 
-	if (sin) {
+	if (sin && skb->len) {
 		sin->sin_family = AF_INET;
 		sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
 						   serr->addr_offset);
@@ -496,8 +496,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 	sin = &errhdr.offender;
 	memset(sin, 0, sizeof(*sin));
 
-	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
-	    ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
+	if (skb->len &&
+	    (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+	     ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) {
 		sin->sin_family = AF_INET;
 		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 		if (inet_sk(sk)->cmsg_flags)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 49f5e73db122..c215be70cac0 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -369,7 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	serr = SKB_EXT_ERR(skb);
 
-	if (sin) {
+	if (sin && skb->len) {
 		const unsigned char *nh = skb_network_header(skb);
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
@@ -394,8 +394,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
 	sin = &errhdr.offender;
 	memset(sin, 0, sizeof(*sin));
-
-	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
+	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) {
 		sin->sin6_family = AF_INET6;
 		if (np->rxopt.all) {
 			if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index 74c0fcd36838..5394b6be46ec 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -42,6 +42,11 @@ void rxrpc_UDP_error_report(struct sock *sk)
 		_leave("UDP socket errqueue empty");
 		return;
 	}
+	if (!skb->len) {
+		_leave("UDP empty message");
+		kfree_skb(skb);
+		return;
+	}
 
 	rxrpc_new_skb(skb);
 
-- 
cgit v1.2.3-71-gd317


From eb710b152003ea74561512b3f55c1e3c71dcef39 Mon Sep 17 00:00:00 2001
From: Satoru Takeuchi <takeuchi_satoru@jp.fujitsu.com>
Date: Wed, 24 Dec 2014 14:52:04 +0900
Subject: Btrfs: Remove unnecessary placeholder in btrfs_err_code

"notused" is not necessary. Set 1 to the first entry is enough.

Signed-off-by: Takeuchi Satoru <takeuchi_satoru@jp.fujitsu.com
Cc: Gui Hecheng <guihc.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Chris Mason <clm@fb.com>
---
 include/uapi/linux/btrfs.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 611e1c5893b4..b6dec05c7196 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -495,8 +495,7 @@ struct btrfs_ioctl_send_args {
 
 /* Error codes as returned by the kernel */
 enum btrfs_err_code {
-	notused,
-	BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
+	BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
 	BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
 	BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
 	BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
-- 
cgit v1.2.3-71-gd317


From 06eb395fa9856b5a87cf7d80baee2a0ed3cdb9d7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 4 Feb 2015 21:30:40 -0800
Subject: pkt_sched: fq: better control of DDOS traffic

FQ has a fast path for skb attached to a socket, as it does not
have to compute a flow hash. But for other packets, FQ being non
stochastic means that hosts exposed to random Internet traffic
can allocate million of flows structure (104 bytes each) pretty
easily. Not only host can OOM, but lookup in RB trees can take
too much cpu and memory resources.

This patch adds a new attribute, orphan_mask, that is adding
possibility of having a stochastic hash for orphaned skb.

Its default value is 1024 slots, to mimic SFQ behavior.

Note: This does not apply to locally generated TCP traffic,
and no locally generated traffic will share a flow structure
with another perfect or stochastic flow.

This patch also handles the specific case of SYNACK messages:

They are attached to the listener socket, and therefore all map
to a single hash bucket. If listener have set SO_MAX_PACING_RATE,
hoping to have new accepted socket inherit this rate, SYNACK
might be paced and even dropped.

This is very similar to an internal patch Google have used more
than one year.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  2 ++
 net/sched/sch_fq.c             | 19 +++++++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index d62316baae94..534b84710745 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -774,6 +774,8 @@ enum {
 
 	TCA_FQ_FLOW_REFILL_DELAY,	/* flow credit refill delay in usec */
 
+	TCA_FQ_ORPHAN_MASK,	/* mask applied to orphaned skb hashes */
+
 	__TCA_FQ_MAX
 };
 
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 69a3dbf55c60..a00c43043001 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -93,6 +93,7 @@ struct fq_sched_data {
 	u32		flow_refill_delay;
 	u32		flow_max_rate;	/* optional max rate per flow */
 	u32		flow_plimit;	/* max packets per flow */
+	u32		orphan_mask;	/* mask for orphaned skb */
 	struct rb_root	*fq_root;
 	u8		rate_enable;
 	u8		fq_trees_log;
@@ -223,11 +224,20 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
 	if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
 		return &q->internal;
 
-	if (unlikely(!sk)) {
+	/* SYNACK messages are attached to a listener socket.
+	 * 1) They are not part of a 'flow' yet
+	 * 2) We do not want to rate limit them (eg SYNFLOOD attack),
+	 *    especially if the listener set SO_MAX_PACING_RATE
+	 * 3) We pretend they are orphaned
+	 */
+	if (!sk || sk->sk_state == TCP_LISTEN) {
+		unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
+
 		/* By forcing low order bit to 1, we make sure to not
 		 * collide with a local flow (socket pointers are word aligned)
 		 */
-		sk = (struct sock *)(skb_get_hash(skb) | 1L);
+		sk = (struct sock *)((hash << 1) | 1UL);
+		skb_orphan(skb);
 	}
 
 	root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
@@ -704,6 +714,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
 		q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
 	}
 
+	if (tb[TCA_FQ_ORPHAN_MASK])
+		q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
+
 	if (!err) {
 		sch_tree_unlock(sch);
 		err = fq_resize(sch, fq_log);
@@ -749,6 +762,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->delayed		= RB_ROOT;
 	q->fq_root		= NULL;
 	q->fq_trees_log		= ilog2(1024);
+	q->orphan_mask		= 1024 - 1;
 	qdisc_watchdog_init(&q->watchdog, sch);
 
 	if (opt)
@@ -778,6 +792,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
 			jiffies_to_usecs(q->flow_refill_delay)) ||
+	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
 	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
 		goto nla_put_failure;
 
-- 
cgit v1.2.3-71-gd317


From 0ae45f63d4ef8d8eeec49c7d8b44a1775fff13e8 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 2 Feb 2015 00:37:00 -0500
Subject: vfs: add support for a lazytime mount option

Add a new mount option which enables a new "lazytime" mode.  This mode
causes atime, mtime, and ctime updates to only be made to the
in-memory version of the inode.  The on-disk times will only get
updated when (a) if the inode needs to be updated for some non-time
related change, (b) if userspace calls fsync(), syncfs() or sync(), or
(c) just before an undeleted inode is evicted from memory.

This is OK according to POSIX because there are no guarantees after a
crash unless userspace explicitly requests via a fsync(2) call.

For workloads which feature a large number of random write to a
preallocated file, the lazytime mount option significantly reduces
writes to the inode table.  The repeated 4k writes to a single block
will result in undesirable stress on flash devices and SMR disk
drives.  Even on conventional HDD's, the repeated writes to the inode
table block will trigger Adjacent Track Interference (ATI) remediation
latencies, which very negatively impact long tail latencies --- which
is a very big deal for web serving tiers (for example).

Google-Bug-Id: 18297052

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/inode.c                  |  6 ++++
 fs/fs-writeback.c                | 62 +++++++++++++++++++++++++++++++++-------
 fs/gfs2/file.c                   |  4 +--
 fs/inode.c                       | 56 +++++++++++++++++++++++++-----------
 fs/jfs/file.c                    |  2 +-
 fs/libfs.c                       |  2 +-
 fs/proc_namespace.c              |  1 +
 fs/sync.c                        |  8 ++++++
 include/linux/backing-dev.h      |  1 +
 include/linux/fs.h               |  5 ++++
 include/trace/events/writeback.h | 60 +++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/fs.h          |  4 ++-
 mm/backing-dev.c                 | 10 +++++--
 13 files changed, 186 insertions(+), 35 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5653fa42930b..628df5ba44a6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4840,11 +4840,17 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
  * If the inode is marked synchronous, we don't honour that here - doing
  * so would cause a commit on atime updates, which we don't bother doing.
  * We handle synchronous inodes at the highest possible level.
+ *
+ * If only the I_DIRTY_TIME flag is set, we can skip everything.  If
+ * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
+ * to copy into the on-disk inode structure are the timestamp files.
  */
 void ext4_dirty_inode(struct inode *inode, int flags)
 {
 	handle_t *handle;
 
+	if (flags == I_DIRTY_TIME)
+		return;
 	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
 	if (IS_ERR(handle))
 		goto out;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 2d609a5fbfea..004686191354 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -247,14 +247,19 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
 	return ret;
 }
 
+#define EXPIRE_DIRTY_ATIME 0x0001
+
 /*
  * Move expired (dirtied before work->older_than_this) dirty inodes from
  * @delaying_queue to @dispatch_queue.
  */
 static int move_expired_inodes(struct list_head *delaying_queue,
 			       struct list_head *dispatch_queue,
+			       int flags,
 			       struct wb_writeback_work *work)
 {
+	unsigned long *older_than_this = NULL;
+	unsigned long expire_time;
 	LIST_HEAD(tmp);
 	struct list_head *pos, *node;
 	struct super_block *sb = NULL;
@@ -262,13 +267,21 @@ static int move_expired_inodes(struct list_head *delaying_queue,
 	int do_sb_sort = 0;
 	int moved = 0;
 
+	if ((flags & EXPIRE_DIRTY_ATIME) == 0)
+		older_than_this = work->older_than_this;
+	else if ((work->reason == WB_REASON_SYNC) == 0) {
+		expire_time = jiffies - (HZ * 86400);
+		older_than_this = &expire_time;
+	}
 	while (!list_empty(delaying_queue)) {
 		inode = wb_inode(delaying_queue->prev);
-		if (work->older_than_this &&
-		    inode_dirtied_after(inode, *work->older_than_this))
+		if (older_than_this &&
+		    inode_dirtied_after(inode, *older_than_this))
 			break;
 		list_move(&inode->i_wb_list, &tmp);
 		moved++;
+		if (flags & EXPIRE_DIRTY_ATIME)
+			set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
 		if (sb_is_blkdev_sb(inode->i_sb))
 			continue;
 		if (sb && sb != inode->i_sb)
@@ -309,9 +322,12 @@ out:
 static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
 {
 	int moved;
+
 	assert_spin_locked(&wb->list_lock);
 	list_splice_init(&wb->b_more_io, &wb->b_io);
-	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work);
+	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
+	moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
+				     EXPIRE_DIRTY_ATIME, work);
 	trace_writeback_queue_io(wb, work, moved);
 }
 
@@ -435,6 +451,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
 		 * updates after data IO completion.
 		 */
 		redirty_tail(inode, wb);
+	} else if (inode->i_state & I_DIRTY_TIME) {
+		list_move(&inode->i_wb_list, &wb->b_dirty_time);
 	} else {
 		/* The inode is clean. Remove from writeback lists. */
 		list_del_init(&inode->i_wb_list);
@@ -481,7 +499,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	spin_lock(&inode->i_lock);
 
 	dirty = inode->i_state & I_DIRTY;
-	inode->i_state &= ~I_DIRTY;
+	if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) &&
+	     (inode->i_state & I_DIRTY_TIME)) ||
+	    (inode->i_state & I_DIRTY_TIME_EXPIRED)) {
+		dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
+		trace_writeback_lazytime(inode);
+	}
+	inode->i_state &= ~dirty;
 
 	/*
 	 * Paired with smp_mb() in __mark_inode_dirty().  This allows
@@ -501,8 +525,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 
 	spin_unlock(&inode->i_lock);
 
+	if (dirty & I_DIRTY_TIME)
+		mark_inode_dirty_sync(inode);
 	/* Don't write the inode if only I_DIRTY_PAGES was set */
-	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
+	if (dirty & ~I_DIRTY_PAGES) {
 		int err = write_inode(inode, wbc);
 		if (ret == 0)
 			ret = err;
@@ -550,7 +576,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 	 * make sure inode is on some writeback list and leave it there unless
 	 * we have completely cleaned the inode.
 	 */
-	if (!(inode->i_state & I_DIRTY) &&
+	if (!(inode->i_state & I_DIRTY_ALL) &&
 	    (wbc->sync_mode != WB_SYNC_ALL ||
 	     !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
 		goto out;
@@ -565,7 +591,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 	 * If inode is clean, remove it from writeback lists. Otherwise don't
 	 * touch it. See comment above for explanation.
 	 */
-	if (!(inode->i_state & I_DIRTY))
+	if (!(inode->i_state & I_DIRTY_ALL))
 		list_del_init(&inode->i_wb_list);
 	spin_unlock(&wb->list_lock);
 	inode_sync_complete(inode);
@@ -707,7 +733,7 @@ static long writeback_sb_inodes(struct super_block *sb,
 		wrote += write_chunk - wbc.nr_to_write;
 		spin_lock(&wb->list_lock);
 		spin_lock(&inode->i_lock);
-		if (!(inode->i_state & I_DIRTY))
+		if (!(inode->i_state & I_DIRTY_ALL))
 			wrote++;
 		requeue_inode(inode, wb, &wbc);
 		inode_sync_complete(inode);
@@ -1145,16 +1171,20 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
  * page->mapping->host, so the page-dirtying time is recorded in the internal
  * blockdev inode.
  */
+#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
 void __mark_inode_dirty(struct inode *inode, int flags)
 {
 	struct super_block *sb = inode->i_sb;
 	struct backing_dev_info *bdi = NULL;
+	int dirtytime;
+
+	trace_writeback_mark_inode_dirty(inode, flags);
 
 	/*
 	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
 	 * dirty the inode itself
 	 */
-	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
+	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) {
 		trace_writeback_dirty_inode_start(inode, flags);
 
 		if (sb->s_op->dirty_inode)
@@ -1162,6 +1192,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 
 		trace_writeback_dirty_inode(inode, flags);
 	}
+	if (flags & I_DIRTY_INODE)
+		flags &= ~I_DIRTY_TIME;
+	dirtytime = flags & I_DIRTY_TIME;
 
 	/*
 	 * Paired with smp_mb() in __writeback_single_inode() for the
@@ -1169,16 +1202,21 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 	 */
 	smp_mb();
 
-	if ((inode->i_state & flags) == flags)
+	if (((inode->i_state & flags) == flags) ||
+	    (dirtytime && (inode->i_state & I_DIRTY_INODE)))
 		return;
 
 	if (unlikely(block_dump))
 		block_dump___mark_inode_dirty(inode);
 
 	spin_lock(&inode->i_lock);
+	if (dirtytime && (inode->i_state & I_DIRTY_INODE))
+		goto out_unlock_inode;
 	if ((inode->i_state & flags) != flags) {
 		const int was_dirty = inode->i_state & I_DIRTY;
 
+		if (flags & I_DIRTY_INODE)
+			inode->i_state &= ~I_DIRTY_TIME;
 		inode->i_state |= flags;
 
 		/*
@@ -1225,8 +1263,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 			}
 
 			inode->dirtied_when = jiffies;
-			list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
+			list_move(&inode->i_wb_list, dirtytime ?
+				  &bdi->wb.b_dirty_time : &bdi->wb.b_dirty);
 			spin_unlock(&bdi->wb.list_lock);
+			trace_writeback_dirty_inode_enqueue(inode);
 
 			if (wakeup_bdi)
 				bdi_wakeup_thread_delayed(bdi);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6e600abf694a..15c44cf457cc 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -655,7 +655,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
 {
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
-	int sync_state = inode->i_state & I_DIRTY;
+	int sync_state = inode->i_state & I_DIRTY_ALL;
 	struct gfs2_inode *ip = GFS2_I(inode);
 	int ret = 0, ret1 = 0;
 
@@ -668,7 +668,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
 	if (!gfs2_is_jdata(ip))
 		sync_state &= ~I_DIRTY_PAGES;
 	if (datasync)
-		sync_state &= ~I_DIRTY_SYNC;
+		sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME);
 
 	if (sync_state) {
 		ret = sync_inode_metadata(inode, 1);
diff --git a/fs/inode.c b/fs/inode.c
index aa149e7262ac..4feb85cc125f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -18,6 +18,7 @@
 #include <linux/buffer_head.h> /* for inode_has_buffers */
 #include <linux/ratelimit.h>
 #include <linux/list_lru.h>
+#include <trace/events/writeback.h>
 #include "internal.h"
 
 /*
@@ -30,7 +31,7 @@
  * inode_sb_list_lock protects:
  *   sb->s_inodes, inode->i_sb_list
  * bdi->wb.list_lock protects:
- *   bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
+ *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list
  * inode_hash_lock protects:
  *   inode_hashtable, inode->i_hash
  *
@@ -416,7 +417,8 @@ static void inode_lru_list_add(struct inode *inode)
  */
 void inode_add_lru(struct inode *inode)
 {
-	if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) &&
+	if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC |
+				I_FREEING | I_WILL_FREE)) &&
 	    !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
 		inode_lru_list_add(inode);
 }
@@ -647,7 +649,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
 			spin_unlock(&inode->i_lock);
 			continue;
 		}
-		if (inode->i_state & I_DIRTY && !kill_dirty) {
+		if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
 			spin_unlock(&inode->i_lock);
 			busy = 1;
 			continue;
@@ -1432,11 +1434,20 @@ static void iput_final(struct inode *inode)
  */
 void iput(struct inode *inode)
 {
-	if (inode) {
-		BUG_ON(inode->i_state & I_CLEAR);
-
-		if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock))
-			iput_final(inode);
+	if (!inode)
+		return;
+	BUG_ON(inode->i_state & I_CLEAR);
+retry:
+	if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
+		if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
+			atomic_inc(&inode->i_count);
+			inode->i_state &= ~I_DIRTY_TIME;
+			spin_unlock(&inode->i_lock);
+			trace_writeback_lazytime_iput(inode);
+			mark_inode_dirty_sync(inode);
+			goto retry;
+		}
+		iput_final(inode);
 	}
 }
 EXPORT_SYMBOL(iput);
@@ -1495,14 +1506,9 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
 	return 0;
 }
 
-/*
- * This does the actual work of updating an inodes time or version.  Must have
- * had called mnt_want_write() before calling this.
- */
-static int update_time(struct inode *inode, struct timespec *time, int flags)
+int generic_update_time(struct inode *inode, struct timespec *time, int flags)
 {
-	if (inode->i_op->update_time)
-		return inode->i_op->update_time(inode, time, flags);
+	int iflags = I_DIRTY_TIME;
 
 	if (flags & S_ATIME)
 		inode->i_atime = *time;
@@ -1512,9 +1518,27 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
 		inode->i_ctime = *time;
 	if (flags & S_MTIME)
 		inode->i_mtime = *time;
-	mark_inode_dirty_sync(inode);
+
+	if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION))
+		iflags |= I_DIRTY_SYNC;
+	__mark_inode_dirty(inode, iflags);
 	return 0;
 }
+EXPORT_SYMBOL(generic_update_time);
+
+/*
+ * This does the actual work of updating an inodes time or version.  Must have
+ * had called mnt_want_write() before calling this.
+ */
+static int update_time(struct inode *inode, struct timespec *time, int flags)
+{
+	int (*update_time)(struct inode *, struct timespec *, int);
+
+	update_time = inode->i_op->update_time ? inode->i_op->update_time :
+		generic_update_time;
+
+	return update_time(inode, time, flags);
+}
 
 /**
  *	touch_atime	-	update the access time
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 33aa0cc1f8b8..10815f8dfd8b 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -39,7 +39,7 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 		return rc;
 
 	mutex_lock(&inode->i_mutex);
-	if (!(inode->i_state & I_DIRTY) ||
+	if (!(inode->i_state & I_DIRTY_ALL) ||
 	    (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
 		/* Make sure committed changes hit the disk */
 		jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
diff --git a/fs/libfs.c b/fs/libfs.c
index 005843ce5dbd..b2ffdb045be4 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -948,7 +948,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
 
 	mutex_lock(&inode->i_mutex);
 	ret = sync_mapping_buffers(inode->i_mapping);
-	if (!(inode->i_state & I_DIRTY))
+	if (!(inode->i_state & I_DIRTY_ALL))
 		goto out;
 	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
 		goto out;
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 0f96f71ab32b..8db932da4009 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -44,6 +44,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb)
 		{ MS_SYNCHRONOUS, ",sync" },
 		{ MS_DIRSYNC, ",dirsync" },
 		{ MS_MANDLOCK, ",mand" },
+		{ MS_LAZYTIME, ",lazytime" },
 		{ 0, NULL }
 	};
 	const struct proc_fs_info *fs_infop;
diff --git a/fs/sync.c b/fs/sync.c
index 01d9f18a70b5..fbc98ee62044 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -177,8 +177,16 @@ SYSCALL_DEFINE1(syncfs, int, fd)
  */
 int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
 {
+	struct inode *inode = file->f_mapping->host;
+
 	if (!file->f_op->fsync)
 		return -EINVAL;
+	if (!datasync && (inode->i_state & I_DIRTY_TIME)) {
+		spin_lock(&inode->i_lock);
+		inode->i_state &= ~I_DIRTY_TIME;
+		spin_unlock(&inode->i_lock);
+		mark_inode_dirty_sync(inode);
+	}
 	return file->f_op->fsync(file, start, end, datasync);
 }
 EXPORT_SYMBOL(vfs_fsync_range);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 5da6012b7a14..4cdf7336f64a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -55,6 +55,7 @@ struct bdi_writeback {
 	struct list_head b_dirty;	/* dirty inodes */
 	struct list_head b_io;		/* parked for writeback */
 	struct list_head b_more_io;	/* parked for more writeback */
+	struct list_head b_dirty_time;	/* time stamps are dirty */
 	spinlock_t list_lock;		/* protects the b_* lists */
 };
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 42efe13077b6..cd027ce2c705 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1746,8 +1746,12 @@ struct super_operations {
 #define __I_DIO_WAKEUP		9
 #define I_DIO_WAKEUP		(1 << I_DIO_WAKEUP)
 #define I_LINKABLE		(1 << 10)
+#define I_DIRTY_TIME		(1 << 11)
+#define __I_DIRTY_TIME_EXPIRED	12
+#define I_DIRTY_TIME_EXPIRED	(1 << __I_DIRTY_TIME_EXPIRED)
 
 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
+#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
 
 extern void __mark_inode_dirty(struct inode *, int);
 static inline void mark_inode_dirty(struct inode *inode)
@@ -1910,6 +1914,7 @@ extern int current_umask(void);
 
 extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
+extern int generic_update_time(struct inode *, struct timespec *, int);
 
 static inline struct inode *file_inode(const struct file *f)
 {
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index cee02d65ab3f..5ecb4c234625 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -18,6 +18,8 @@
 		{I_FREEING,		"I_FREEING"},		\
 		{I_CLEAR,		"I_CLEAR"},		\
 		{I_SYNC,		"I_SYNC"},		\
+		{I_DIRTY_TIME,		"I_DIRTY_TIME"},	\
+		{I_DIRTY_TIME_EXPIRED,	"I_DIRTY_TIME_EXPIRED"}, \
 		{I_REFERENCED,		"I_REFERENCED"}		\
 	)
 
@@ -68,6 +70,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
 	TP_STRUCT__entry (
 		__array(char, name, 32)
 		__field(unsigned long, ino)
+		__field(unsigned long, state)
 		__field(unsigned long, flags)
 	),
 
@@ -78,16 +81,25 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
 		strncpy(__entry->name,
 			bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
 		__entry->ino		= inode->i_ino;
+		__entry->state		= inode->i_state;
 		__entry->flags		= flags;
 	),
 
-	TP_printk("bdi %s: ino=%lu flags=%s",
+	TP_printk("bdi %s: ino=%lu state=%s flags=%s",
 		__entry->name,
 		__entry->ino,
+		show_inode_state(__entry->state),
 		show_inode_state(__entry->flags)
 	)
 );
 
+DEFINE_EVENT(writeback_dirty_inode_template, writeback_mark_inode_dirty,
+
+	TP_PROTO(struct inode *inode, int flags),
+
+	TP_ARGS(inode, flags)
+);
+
 DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start,
 
 	TP_PROTO(struct inode *inode, int flags),
@@ -598,6 +610,52 @@ DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode,
 	TP_ARGS(inode, wbc, nr_to_write)
 );
 
+DECLARE_EVENT_CLASS(writeback_lazytime_template,
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(unsigned long,	ino			)
+		__field(unsigned long,	state			)
+		__field(	__u16, mode			)
+		__field(unsigned long, dirtied_when		)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->state	= inode->i_state;
+		__entry->mode	= inode->i_mode;
+		__entry->dirtied_when = inode->dirtied_when;
+	),
+
+	TP_printk("dev %d,%d ino %lu dirtied %lu state %s mode 0%o",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino, __entry->dirtied_when,
+		  show_inode_state(__entry->state), __entry->mode)
+);
+
+DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime,
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode)
+);
+
+DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime_iput,
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode)
+);
+
+DEFINE_EVENT(writeback_lazytime_template, writeback_dirty_inode_enqueue,
+
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode)
+);
+
 #endif /* _TRACE_WRITEBACK_H */
 
 /* This part must be outside protection */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 3735fa0a6784..9b964a5920af 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -90,6 +90,7 @@ struct inodes_stat_t {
 #define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
 #define MS_I_VERSION	(1<<23) /* Update inode I_version field */
 #define MS_STRICTATIME	(1<<24) /* Always perform atime updates */
+#define MS_LAZYTIME	(1<<25) /* Update the on-disk [acm]times lazily */
 
 /* These sb flags are internal to the kernel */
 #define MS_NOSEC	(1<<28)
@@ -100,7 +101,8 @@ struct inodes_stat_t {
 /*
  * Superblock flags that can be altered by MS_REMOUNT
  */
-#define MS_RMT_MASK	(MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION)
+#define MS_RMT_MASK	(MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
+			 MS_LAZYTIME)
 
 /*
  * Old magic mount flag and mask
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0ae0df55000b..915feea94c66 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -69,10 +69,10 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 	unsigned long background_thresh;
 	unsigned long dirty_thresh;
 	unsigned long bdi_thresh;
-	unsigned long nr_dirty, nr_io, nr_more_io;
+	unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
 	struct inode *inode;
 
-	nr_dirty = nr_io = nr_more_io = 0;
+	nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
 	spin_lock(&wb->list_lock);
 	list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
 		nr_dirty++;
@@ -80,6 +80,9 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 		nr_io++;
 	list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
 		nr_more_io++;
+	list_for_each_entry(inode, &wb->b_dirty_time, i_wb_list)
+		if (inode->i_state & I_DIRTY_TIME)
+			nr_dirty_time++;
 	spin_unlock(&wb->list_lock);
 
 	global_dirty_limits(&background_thresh, &dirty_thresh);
@@ -98,6 +101,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 		   "b_dirty:            %10lu\n"
 		   "b_io:               %10lu\n"
 		   "b_more_io:          %10lu\n"
+		   "b_dirty_time:       %10lu\n"
 		   "bdi_list:           %10u\n"
 		   "state:              %10lx\n",
 		   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
@@ -111,6 +115,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 		   nr_dirty,
 		   nr_io,
 		   nr_more_io,
+		   nr_dirty_time,
 		   !list_empty(&bdi->bdi_list), bdi->state);
 #undef K
 
@@ -418,6 +423,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
 	INIT_LIST_HEAD(&wb->b_dirty);
 	INIT_LIST_HEAD(&wb->b_io);
 	INIT_LIST_HEAD(&wb->b_more_io);
+	INIT_LIST_HEAD(&wb->b_dirty_time);
 	spin_lock_init(&wb->list_lock);
 	INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
 }
-- 
cgit v1.2.3-71-gd317


From 692132b5b1c5ce97076915d4aed0c61513e18b03 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <nks@flawful.org>
Date: Mon, 2 Feb 2015 23:19:20 +0100
Subject: serial: driver for ETRAX FS UART

This is the last missing piece to get a kernel booting to a prompt in qemu-cris.

Signed-off-by: Niklas Cassel <nks@flawful.org>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig        |  10 +
 drivers/tty/serial/Makefile       |   1 +
 drivers/tty/serial/etraxfs-uart.c | 996 ++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/serial_core.h  |   3 +
 4 files changed, 1010 insertions(+)
 create mode 100644 drivers/tty/serial/etraxfs-uart.c

(limited to 'include/uapi/linux')

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index ddcc0a4c659c..5d916c7a216b 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1085,6 +1085,16 @@ config SERIAL_VT8500_CONSOLE
 	depends on SERIAL_VT8500=y
 	select SERIAL_CORE_CONSOLE
 
+config SERIAL_ETRAXFS
+	bool "ETRAX FS serial port support"
+	depends on ETRAX_ARCH_V32 && OF
+	select SERIAL_CORE
+
+config SERIAL_ETRAXFS_CONSOLE
+	bool "ETRAX FS serial console support"
+	depends on SERIAL_ETRAXFS
+	select SERIAL_CORE_CONSOLE
+
 config SERIAL_NETX
 	tristate "NetX serial port support"
 	depends on ARCH_NETX
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 431879003ccd..599be4b05a26 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_SERIAL_MPSC) += mpsc.o
 obj-$(CONFIG_SERIAL_MESON) += meson_uart.o
 obj-$(CONFIG_SERIAL_SB1250_DUART) += sb1250-duart.o
 obj-$(CONFIG_ETRAX_SERIAL) += crisv10.o
+obj-$(CONFIG_SERIAL_ETRAXFS) += etraxfs-uart.o
 obj-$(CONFIG_SERIAL_SCCNXP) += sccnxp.o
 obj-$(CONFIG_SERIAL_SC16IS7XX) += sc16is7xx.o
 obj-$(CONFIG_SERIAL_JSM) += jsm/
diff --git a/drivers/tty/serial/etraxfs-uart.c b/drivers/tty/serial/etraxfs-uart.c
new file mode 100644
index 000000000000..a57301a6fe42
--- /dev/null
+++ b/drivers/tty/serial/etraxfs-uart.c
@@ -0,0 +1,996 @@
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/platform_device.h>
+#include <linux/serial_core.h>
+#include <linux/tty_flip.h>
+#include <linux/of.h>
+#include <linux/gpio.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <hwregs/ser_defs.h>
+
+#define DRV_NAME "etraxfs-uart"
+#define UART_NR CONFIG_ETRAX_SERIAL_PORTS
+
+#define MODIFY_REG(instance, reg, var)				\
+	do {							\
+		if (REG_RD_INT(ser, instance, reg) !=		\
+		    REG_TYPE_CONV(int, reg_ser_##reg, var))	\
+			REG_WR(ser, instance, reg, var);	\
+	} while (0)
+
+struct uart_cris_port {
+	struct uart_port port;
+
+	int initialized;
+	int irq;
+
+	void __iomem *regi_ser;
+
+	struct gpio_desc *dtr_pin;
+	struct gpio_desc *dsr_pin;
+	struct gpio_desc *ri_pin;
+	struct gpio_desc *cd_pin;
+
+	int write_ongoing;
+};
+
+static struct uart_driver etraxfs_uart_driver;
+static struct uart_port *console_port;
+static int console_baud = 115200;
+static struct uart_cris_port *etraxfs_uart_ports[UART_NR];
+
+static void cris_serial_port_init(struct uart_port *port, int line);
+static void etraxfs_uart_stop_rx(struct uart_port *port);
+static inline void etraxfs_uart_start_tx_bottom(struct uart_port *port);
+
+#ifdef CONFIG_SERIAL_ETRAXFS_CONSOLE
+static void
+cris_console_write(struct console *co, const char *s, unsigned int count)
+{
+	struct uart_cris_port *up;
+	int i;
+	reg_ser_r_stat_din stat;
+	reg_ser_rw_tr_dma_en tr_dma_en, old;
+
+	up = etraxfs_uart_ports[co->index];
+
+	if (!up)
+		return;
+
+	/* Switch to manual mode. */
+	tr_dma_en = old = REG_RD(ser, up->regi_ser, rw_tr_dma_en);
+	if (tr_dma_en.en == regk_ser_yes) {
+		tr_dma_en.en = regk_ser_no;
+		REG_WR(ser, up->regi_ser, rw_tr_dma_en, tr_dma_en);
+	}
+
+	/* Send data. */
+	for (i = 0; i < count; i++) {
+		/* LF -> CRLF */
+		if (s[i] == '\n') {
+			do {
+				stat = REG_RD(ser, up->regi_ser, r_stat_din);
+			} while (!stat.tr_rdy);
+			REG_WR_INT(ser, up->regi_ser, rw_dout, '\r');
+		}
+		/* Wait until transmitter is ready and send. */
+		do {
+			stat = REG_RD(ser, up->regi_ser, r_stat_din);
+		} while (!stat.tr_rdy);
+		REG_WR_INT(ser, up->regi_ser, rw_dout, s[i]);
+	}
+
+	/* Restore mode. */
+	if (tr_dma_en.en != old.en)
+		REG_WR(ser, up->regi_ser, rw_tr_dma_en, old);
+}
+
+static int __init
+cris_console_setup(struct console *co, char *options)
+{
+	struct uart_port *port;
+	int baud = 115200;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (co->index < 0 || co->index >= UART_NR)
+		co->index = 0;
+	port = &etraxfs_uart_ports[co->index]->port;
+	console_port = port;
+
+	co->flags |= CON_CONSDEV;
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+	console_baud = baud;
+	cris_serial_port_init(port, co->index);
+	uart_set_options(port, co, baud, parity, bits, flow);
+
+	return 0;
+}
+
+static struct tty_driver *cris_console_device(struct console *co, int *index)
+{
+	struct uart_driver *p = co->data;
+	*index = co->index;
+	return p->tty_driver;
+}
+
+static struct console cris_console = {
+	.name = "ttyS",
+	.write = cris_console_write,
+	.device = cris_console_device,
+	.setup = cris_console_setup,
+	.flags = CON_PRINTBUFFER,
+	.index = -1,
+	.data = &etraxfs_uart_driver,
+};
+#endif /* CONFIG_SERIAL_ETRAXFS_CONSOLE */
+
+static struct uart_driver etraxfs_uart_driver = {
+	.owner = THIS_MODULE,
+	.driver_name = "serial",
+	.dev_name = "ttyS",
+	.major = TTY_MAJOR,
+	.minor = 64,
+	.nr = UART_NR,
+#ifdef CONFIG_SERIAL_ETRAXFS_CONSOLE
+	.cons = &cris_console,
+#endif /* CONFIG_SERIAL_ETRAXFS_CONSOLE */
+};
+
+static inline int crisv32_serial_get_rts(struct uart_cris_port *up)
+{
+	void __iomem *regi_ser = up->regi_ser;
+	/*
+	 * Return what the user has controlled rts to or
+	 * what the pin is? (if auto_rts is used it differs during tx)
+	 */
+	reg_ser_r_stat_din rstat = REG_RD(ser, regi_ser, r_stat_din);
+
+	return !(rstat.rts_n == regk_ser_active);
+}
+
+/*
+ * A set = 0 means 3.3V on the pin, bitvalue: 0=active, 1=inactive
+ *                                            0=0V    , 1=3.3V
+ */
+static inline void crisv32_serial_set_rts(struct uart_cris_port *up,
+					  int set, int force)
+{
+	void __iomem *regi_ser = up->regi_ser;
+
+	unsigned long flags;
+	reg_ser_rw_rec_ctrl rec_ctrl;
+
+	local_irq_save(flags);
+	rec_ctrl = REG_RD(ser, regi_ser, rw_rec_ctrl);
+
+	if (set)
+		rec_ctrl.rts_n = regk_ser_active;
+	else
+		rec_ctrl.rts_n = regk_ser_inactive;
+	REG_WR(ser, regi_ser, rw_rec_ctrl, rec_ctrl);
+	local_irq_restore(flags);
+}
+
+static inline int crisv32_serial_get_cts(struct uart_cris_port *up)
+{
+	void __iomem *regi_ser = up->regi_ser;
+	reg_ser_r_stat_din rstat = REG_RD(ser, regi_ser, r_stat_din);
+
+	return (rstat.cts_n == regk_ser_active);
+}
+
+/*
+ * Send a single character for XON/XOFF purposes.  We do it in this separate
+ * function instead of the alternative support port.x_char, in the ...start_tx
+ * function, so we don't mix up this case with possibly enabling transmission
+ * of queued-up data (in case that's disabled after *receiving* an XOFF or
+ * negative CTS).  This function is used for both DMA and non-DMA case; see HW
+ * docs specifically blessing sending characters manually when DMA for
+ * transmission is enabled and running.  We may be asked to transmit despite
+ * the transmitter being disabled by a ..._stop_tx call so we need to enable
+ * it temporarily but restore the state afterwards.
+ */
+static void etraxfs_uart_send_xchar(struct uart_port *port, char ch)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+	reg_ser_rw_dout dout = { .data = ch };
+	reg_ser_rw_ack_intr ack_intr = { .tr_rdy = regk_ser_yes };
+	reg_ser_r_stat_din rstat;
+	reg_ser_rw_tr_ctrl prev_tr_ctrl, tr_ctrl;
+	void __iomem *regi_ser = up->regi_ser;
+	unsigned long flags;
+
+	/*
+	 * Wait for tr_rdy in case a character is already being output.  Make
+	 * sure we have integrity between the register reads and the writes
+	 * below, but don't busy-wait with interrupts off and the port lock
+	 * taken.
+	 */
+	spin_lock_irqsave(&port->lock, flags);
+	do {
+		spin_unlock_irqrestore(&port->lock, flags);
+		spin_lock_irqsave(&port->lock, flags);
+		prev_tr_ctrl = tr_ctrl = REG_RD(ser, regi_ser, rw_tr_ctrl);
+		rstat = REG_RD(ser, regi_ser, r_stat_din);
+	} while (!rstat.tr_rdy);
+
+	/*
+	 * Ack an interrupt if one was just issued for the previous character
+	 * that was output.  This is required for non-DMA as the interrupt is
+	 * used as the only indicator that the transmitter is ready and it
+	 * isn't while this x_char is being transmitted.
+	 */
+	REG_WR(ser, regi_ser, rw_ack_intr, ack_intr);
+
+	/* Enable the transmitter in case it was disabled. */
+	tr_ctrl.stop = 0;
+	REG_WR(ser, regi_ser, rw_tr_ctrl, tr_ctrl);
+
+	/*
+	 * Finally, send the blessed character; nothing should stop it now,
+	 * except for an xoff-detected state, which we'll handle below.
+	 */
+	REG_WR(ser, regi_ser, rw_dout, dout);
+	up->port.icount.tx++;
+
+	/* There might be an xoff state to clear. */
+	rstat = REG_RD(ser, up->regi_ser, r_stat_din);
+
+	/*
+	 * Clear any xoff state that *may* have been there to
+	 * inhibit transmission of the character.
+	 */
+	if (rstat.xoff_detect) {
+		reg_ser_rw_xoff_clr xoff_clr = { .clr = 1 };
+		reg_ser_rw_tr_dma_en tr_dma_en;
+
+		REG_WR(ser, regi_ser, rw_xoff_clr, xoff_clr);
+		tr_dma_en = REG_RD(ser, regi_ser, rw_tr_dma_en);
+
+		/*
+		 * If we had an xoff state but cleared it, instead sneak in a
+		 * disabled state for the transmitter, after the character we
+		 * sent.  Thus we keep the port disabled, just as if the xoff
+		 * state was still in effect (or actually, as if stop_tx had
+		 * been called, as we stop DMA too).
+		 */
+		prev_tr_ctrl.stop = 1;
+
+		tr_dma_en.en = 0;
+		REG_WR(ser, regi_ser, rw_tr_dma_en, tr_dma_en);
+	}
+
+	/* Restore "previous" enabled/disabled state of the transmitter. */
+	REG_WR(ser, regi_ser, rw_tr_ctrl, prev_tr_ctrl);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/*
+ * Do not spin_lock_irqsave or disable interrupts by other means here; it's
+ * already done by the caller.
+ */
+static void etraxfs_uart_start_tx(struct uart_port *port)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+
+	/* we have already done below if a write is ongoing */
+	if (up->write_ongoing)
+		return;
+
+	/* Signal that write is ongoing */
+	up->write_ongoing = 1;
+
+	etraxfs_uart_start_tx_bottom(port);
+}
+
+static inline void etraxfs_uart_start_tx_bottom(struct uart_port *port)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+	void __iomem *regi_ser = up->regi_ser;
+	reg_ser_rw_tr_ctrl tr_ctrl;
+	reg_ser_rw_intr_mask intr_mask;
+
+	tr_ctrl = REG_RD(ser, regi_ser, rw_tr_ctrl);
+	tr_ctrl.stop = regk_ser_no;
+	REG_WR(ser, regi_ser, rw_tr_ctrl, tr_ctrl);
+	intr_mask = REG_RD(ser, regi_ser, rw_intr_mask);
+	intr_mask.tr_rdy = regk_ser_yes;
+	REG_WR(ser, regi_ser, rw_intr_mask, intr_mask);
+}
+
+/*
+ * This function handles both the DMA and non-DMA case by ordering the
+ * transmitter to stop of after the current character.  We don't need to wait
+ * for any such character to be completely transmitted; we do that where it
+ * matters, like in etraxfs_uart_set_termios.  Don't busy-wait here; see
+ * Documentation/serial/driver: this function is called within
+ * spin_lock_irq{,save} and thus separate ones would be disastrous (when SMP).
+ * There's no documented need to set the txd pin to any particular value;
+ * break setting is controlled solely by etraxfs_uart_break_ctl.
+ */
+static void etraxfs_uart_stop_tx(struct uart_port *port)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+	void __iomem *regi_ser = up->regi_ser;
+	reg_ser_rw_tr_ctrl tr_ctrl;
+	reg_ser_rw_intr_mask intr_mask;
+	reg_ser_rw_tr_dma_en tr_dma_en = {0};
+	reg_ser_rw_xoff_clr xoff_clr = {0};
+
+	/*
+	 * For the non-DMA case, we'd get a tr_rdy interrupt that we're not
+	 * interested in as we're not transmitting any characters.  For the
+	 * DMA case, that interrupt is already turned off, but no reason to
+	 * waste code on conditionals here.
+	 */
+	intr_mask = REG_RD(ser, regi_ser, rw_intr_mask);
+	intr_mask.tr_rdy = regk_ser_no;
+	REG_WR(ser, regi_ser, rw_intr_mask, intr_mask);
+
+	tr_ctrl = REG_RD(ser, regi_ser, rw_tr_ctrl);
+	tr_ctrl.stop = 1;
+	REG_WR(ser, regi_ser, rw_tr_ctrl, tr_ctrl);
+
+	/*
+	 * Always clear possible hardware xoff-detected state here, no need to
+	 * unnecessary consider mctrl settings and when they change.  We clear
+	 * it here rather than in start_tx: both functions are called as the
+	 * effect of XOFF processing, but start_tx is also called when upper
+	 * levels tell the driver that there are more characters to send, so
+	 * avoid adding code there.
+	 */
+	xoff_clr.clr = 1;
+	REG_WR(ser, regi_ser, rw_xoff_clr, xoff_clr);
+
+	/*
+	 * Disable transmitter DMA, so that if we're in XON/XOFF, we can send
+	 * those single characters without also giving go-ahead for queued up
+	 * DMA data.
+	 */
+	tr_dma_en.en = 0;
+	REG_WR(ser, regi_ser, rw_tr_dma_en, tr_dma_en);
+
+	/*
+	 * Make sure that write_ongoing is reset when stopping tx.
+	 */
+	up->write_ongoing = 0;
+}
+
+static void etraxfs_uart_stop_rx(struct uart_port *port)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+	void __iomem *regi_ser = up->regi_ser;
+	reg_ser_rw_rec_ctrl rec_ctrl = REG_RD(ser, regi_ser, rw_rec_ctrl);
+
+	rec_ctrl.en = regk_ser_no;
+	REG_WR(ser, regi_ser, rw_rec_ctrl, rec_ctrl);
+}
+
+static void etraxfs_uart_enable_ms(struct uart_port *port)
+{
+}
+
+static void check_modem_status(struct uart_cris_port *up)
+{
+}
+
+static unsigned int etraxfs_uart_tx_empty(struct uart_port *port)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+	unsigned long flags;
+	unsigned int ret;
+	reg_ser_r_stat_din rstat = {0};
+
+	spin_lock_irqsave(&up->port.lock, flags);
+
+	rstat = REG_RD(ser, up->regi_ser, r_stat_din);
+	ret = rstat.tr_empty ? TIOCSER_TEMT : 0;
+
+	spin_unlock_irqrestore(&up->port.lock, flags);
+	return ret;
+}
+static unsigned int etraxfs_uart_get_mctrl(struct uart_port *port)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+	unsigned int ret;
+
+	ret = 0;
+	if (crisv32_serial_get_rts(up))
+		ret |= TIOCM_RTS;
+	/* DTR is active low */
+	if (up->dtr_pin && !gpiod_get_raw_value(up->dtr_pin))
+		ret |= TIOCM_DTR;
+	/* CD is active low */
+	if (up->cd_pin && !gpiod_get_raw_value(up->cd_pin))
+		ret |= TIOCM_CD;
+	/* RI is active low */
+	if (up->ri_pin && !gpiod_get_raw_value(up->ri_pin))
+		ret |= TIOCM_RI;
+	/* DSR is active low */
+	if (up->dsr_pin && !gpiod_get_raw_value(up->dsr_pin))
+		ret |= TIOCM_DSR;
+	if (crisv32_serial_get_cts(up))
+		ret |= TIOCM_CTS;
+	return ret;
+}
+
+static void etraxfs_uart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+
+	crisv32_serial_set_rts(up, mctrl & TIOCM_RTS ? 1 : 0, 0);
+	/* DTR is active low */
+	if (up->dtr_pin)
+		gpiod_set_raw_value(up->dtr_pin, mctrl & TIOCM_DTR ? 0 : 1);
+	/* RI is active low */
+	if (up->ri_pin)
+		gpiod_set_raw_value(up->ri_pin, mctrl & TIOCM_RNG ? 0 : 1);
+	/* CD is active low */
+	if (up->cd_pin)
+		gpiod_set_raw_value(up->cd_pin, mctrl & TIOCM_CD ? 0 : 1);
+}
+
+static void etraxfs_uart_break_ctl(struct uart_port *port, int break_state)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+	unsigned long flags;
+	reg_ser_rw_tr_ctrl tr_ctrl;
+	reg_ser_rw_tr_dma_en tr_dma_en;
+	reg_ser_rw_intr_mask intr_mask;
+
+	spin_lock_irqsave(&up->port.lock, flags);
+	tr_ctrl = REG_RD(ser, up->regi_ser, rw_tr_ctrl);
+	tr_dma_en = REG_RD(ser, up->regi_ser, rw_tr_dma_en);
+	intr_mask = REG_RD(ser, up->regi_ser, rw_intr_mask);
+
+	if (break_state != 0) { /* Send break */
+		/*
+		 * We need to disable DMA (if used) or tr_rdy interrupts if no
+		 * DMA.  No need to make this conditional on use of DMA;
+		 * disabling will be a no-op for the other mode.
+		 */
+		intr_mask.tr_rdy = regk_ser_no;
+		tr_dma_en.en = 0;
+
+		/*
+		 * Stop transmission and set the txd pin to 0 after the
+		 * current character.  The txd setting will take effect after
+		 * any current transmission has completed.
+		 */
+		tr_ctrl.stop = 1;
+		tr_ctrl.txd = 0;
+	} else {
+		/* Re-enable the serial interrupt. */
+		intr_mask.tr_rdy = regk_ser_yes;
+
+		tr_ctrl.stop = 0;
+		tr_ctrl.txd = 1;
+	}
+	REG_WR(ser, up->regi_ser, rw_tr_ctrl, tr_ctrl);
+	REG_WR(ser, up->regi_ser, rw_tr_dma_en, tr_dma_en);
+	REG_WR(ser, up->regi_ser, rw_intr_mask, intr_mask);
+
+	spin_unlock_irqrestore(&up->port.lock, flags);
+}
+
+static void
+transmit_chars_no_dma(struct uart_cris_port *up)
+{
+	int max_count;
+	struct circ_buf *xmit = &up->port.state->xmit;
+
+	void __iomem *regi_ser = up->regi_ser;
+	reg_ser_r_stat_din rstat;
+	reg_ser_rw_ack_intr ack_intr = { .tr_rdy = regk_ser_yes };
+
+	if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) {
+		/* No more to send, so disable the interrupt. */
+		reg_ser_rw_intr_mask intr_mask;
+
+		intr_mask = REG_RD(ser, regi_ser, rw_intr_mask);
+		intr_mask.tr_rdy = 0;
+		intr_mask.tr_empty = 0;
+		REG_WR(ser, regi_ser, rw_intr_mask, intr_mask);
+		up->write_ongoing = 0;
+		return;
+	}
+
+	/* If the serport is fast, we send up to max_count bytes before
+	   exiting the loop.  */
+	max_count = 64;
+	do {
+		reg_ser_rw_dout dout = { .data = xmit->buf[xmit->tail] };
+
+		REG_WR(ser, regi_ser, rw_dout, dout);
+		REG_WR(ser, regi_ser, rw_ack_intr, ack_intr);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE-1);
+		up->port.icount.tx++;
+		if (xmit->head == xmit->tail)
+			break;
+		rstat = REG_RD(ser, regi_ser, r_stat_din);
+	} while ((--max_count > 0) && rstat.tr_rdy);
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&up->port);
+}
+
+static void receive_chars_no_dma(struct uart_cris_port *up)
+{
+	reg_ser_rs_stat_din stat_din;
+	reg_ser_r_stat_din rstat;
+	struct tty_port *port;
+	struct uart_icount *icount;
+	int max_count = 16;
+	char flag;
+	reg_ser_rw_ack_intr ack_intr = { 0 };
+
+	rstat = REG_RD(ser, up->regi_ser, r_stat_din);
+	icount = &up->port.icount;
+	port = &up->port.state->port;
+
+	do {
+		stat_din = REG_RD(ser, up->regi_ser, rs_stat_din);
+
+		flag = TTY_NORMAL;
+		ack_intr.dav = 1;
+		REG_WR(ser, up->regi_ser, rw_ack_intr, ack_intr);
+		icount->rx++;
+
+		if (stat_din.framing_err | stat_din.par_err | stat_din.orun) {
+			if (stat_din.data == 0x00 &&
+			    stat_din.framing_err) {
+				/* Most likely a break. */
+				flag = TTY_BREAK;
+				icount->brk++;
+			} else if (stat_din.par_err) {
+				flag = TTY_PARITY;
+				icount->parity++;
+			} else if (stat_din.orun) {
+				flag = TTY_OVERRUN;
+				icount->overrun++;
+			} else if (stat_din.framing_err) {
+				flag = TTY_FRAME;
+				icount->frame++;
+			}
+		}
+
+		/*
+		 * If this becomes important, we probably *could* handle this
+		 * gracefully by keeping track of the unhandled character.
+		 */
+		if (!tty_insert_flip_char(port, stat_din.data, flag))
+			panic("%s: No tty buffer space", __func__);
+		rstat = REG_RD(ser, up->regi_ser, r_stat_din);
+	} while (rstat.dav && (max_count-- > 0));
+	spin_unlock(&up->port.lock);
+	tty_flip_buffer_push(port);
+	spin_lock(&up->port.lock);
+}
+
+static irqreturn_t
+ser_interrupt(int irq, void *dev_id)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)dev_id;
+	void __iomem *regi_ser;
+	int handled = 0;
+
+	spin_lock(&up->port.lock);
+
+	regi_ser = up->regi_ser;
+
+	if (regi_ser) {
+		reg_ser_r_masked_intr masked_intr;
+
+		masked_intr = REG_RD(ser, regi_ser, r_masked_intr);
+		/*
+		 * Check what interrupts are active before taking
+		 * actions. If DMA is used the interrupt shouldn't
+		 * be enabled.
+		 */
+		if (masked_intr.dav) {
+			receive_chars_no_dma(up);
+			handled = 1;
+		}
+		check_modem_status(up);
+
+		if (masked_intr.tr_rdy) {
+			transmit_chars_no_dma(up);
+			handled = 1;
+		}
+	}
+	spin_unlock(&up->port.lock);
+	return IRQ_RETVAL(handled);
+}
+
+#ifdef CONFIG_CONSOLE_POLL
+static int etraxfs_uart_get_poll_char(struct uart_port *port)
+{
+	reg_ser_rs_stat_din stat;
+	reg_ser_rw_ack_intr ack_intr = { 0 };
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+
+	do {
+		stat = REG_RD(ser, up->regi_ser, rs_stat_din);
+	} while (!stat.dav);
+
+	/* Ack the data_avail interrupt. */
+	ack_intr.dav = 1;
+	REG_WR(ser, up->regi_ser, rw_ack_intr, ack_intr);
+
+	return stat.data;
+}
+
+static void etraxfs_uart_put_poll_char(struct uart_port *port,
+					unsigned char c)
+{
+	reg_ser_r_stat_din stat;
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+
+	do {
+		stat = REG_RD(ser, up->regi_ser, r_stat_din);
+	} while (!stat.tr_rdy);
+	REG_WR_INT(ser, up->regi_ser, rw_dout, c);
+}
+#endif /* CONFIG_CONSOLE_POLL */
+
+static int etraxfs_uart_startup(struct uart_port *port)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+	unsigned long flags;
+	reg_ser_rw_intr_mask ser_intr_mask = {0};
+
+	ser_intr_mask.dav = regk_ser_yes;
+
+	if (request_irq(etraxfs_uart_ports[port->line]->irq, ser_interrupt,
+			0, DRV_NAME, etraxfs_uart_ports[port->line]))
+		panic("irq ser%d", port->line);
+
+	spin_lock_irqsave(&up->port.lock, flags);
+
+	REG_WR(ser, up->regi_ser, rw_intr_mask, ser_intr_mask);
+
+	etraxfs_uart_set_mctrl(&up->port, up->port.mctrl);
+
+	spin_unlock_irqrestore(&up->port.lock, flags);
+
+	return 0;
+}
+
+static void etraxfs_uart_shutdown(struct uart_port *port)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+	unsigned long flags;
+
+	spin_lock_irqsave(&up->port.lock, flags);
+
+	etraxfs_uart_stop_tx(port);
+	etraxfs_uart_stop_rx(port);
+
+	free_irq(etraxfs_uart_ports[port->line]->irq,
+		 etraxfs_uart_ports[port->line]);
+
+	etraxfs_uart_set_mctrl(&up->port, up->port.mctrl);
+
+	spin_unlock_irqrestore(&up->port.lock, flags);
+
+}
+
+static void
+etraxfs_uart_set_termios(struct uart_port *port, struct ktermios *termios,
+			 struct ktermios *old)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+	unsigned long flags;
+	reg_ser_rw_xoff xoff;
+	reg_ser_rw_xoff_clr xoff_clr = {0};
+	reg_ser_rw_tr_ctrl tx_ctrl = {0};
+	reg_ser_rw_tr_dma_en tx_dma_en = {0};
+	reg_ser_rw_rec_ctrl rx_ctrl = {0};
+	reg_ser_rw_tr_baud_div tx_baud_div = {0};
+	reg_ser_rw_rec_baud_div rx_baud_div = {0};
+	int baud;
+
+	if (old &&
+	    termios->c_cflag == old->c_cflag &&
+	    termios->c_iflag == old->c_iflag)
+		return;
+
+	/* Tx: 8 bit, no/even parity, 1 stop bit, no cts. */
+	tx_ctrl.base_freq = regk_ser_f29_493;
+	tx_ctrl.en = 0;
+	tx_ctrl.stop = 0;
+	tx_ctrl.auto_rts = regk_ser_no;
+	tx_ctrl.txd = 1;
+	tx_ctrl.auto_cts = 0;
+	/* Rx: 8 bit, no/even parity. */
+	rx_ctrl.dma_err = regk_ser_stop;
+	rx_ctrl.sampling = regk_ser_majority;
+	rx_ctrl.timeout = 1;
+
+	rx_ctrl.rts_n = regk_ser_inactive;
+
+	/* Common for tx and rx: 8N1. */
+	tx_ctrl.data_bits = regk_ser_bits8;
+	rx_ctrl.data_bits = regk_ser_bits8;
+	tx_ctrl.par = regk_ser_even;
+	rx_ctrl.par = regk_ser_even;
+	tx_ctrl.par_en = regk_ser_no;
+	rx_ctrl.par_en = regk_ser_no;
+
+	tx_ctrl.stop_bits = regk_ser_bits1;
+
+	/*
+	 * Change baud-rate and write it to the hardware.
+	 *
+	 * baud_clock = base_freq / (divisor*8)
+	 * divisor = base_freq / (baud_clock * 8)
+	 * base_freq is either:
+	 * off, ext, 29.493MHz, 32.000 MHz, 32.768 MHz or 100 MHz
+	 * 20.493MHz is used for standard baudrates
+	 */
+
+	/*
+	 * For the console port we keep the original baudrate here.  Not very
+	 * beautiful.
+	 */
+	if ((port != console_port) || old)
+		baud = uart_get_baud_rate(port, termios, old, 0,
+					  port->uartclk / 8);
+	else
+		baud = console_baud;
+
+	tx_baud_div.div = 29493000 / (8 * baud);
+	/* Rx uses same as tx. */
+	rx_baud_div.div = tx_baud_div.div;
+	rx_ctrl.base_freq = tx_ctrl.base_freq;
+
+	if ((termios->c_cflag & CSIZE) == CS7) {
+		/* Set 7 bit mode. */
+		tx_ctrl.data_bits = regk_ser_bits7;
+		rx_ctrl.data_bits = regk_ser_bits7;
+	}
+
+	if (termios->c_cflag & CSTOPB) {
+		/* Set 2 stop bit mode. */
+		tx_ctrl.stop_bits = regk_ser_bits2;
+	}
+
+	if (termios->c_cflag & PARENB) {
+		/* Enable parity. */
+		tx_ctrl.par_en = regk_ser_yes;
+		rx_ctrl.par_en = regk_ser_yes;
+	}
+
+	if (termios->c_cflag & CMSPAR) {
+		if (termios->c_cflag & PARODD) {
+			/* Set mark parity if PARODD and CMSPAR. */
+			tx_ctrl.par = regk_ser_mark;
+			rx_ctrl.par = regk_ser_mark;
+		} else {
+			tx_ctrl.par = regk_ser_space;
+			rx_ctrl.par = regk_ser_space;
+		}
+	} else {
+		if (termios->c_cflag & PARODD) {
+			/* Set odd parity. */
+		       tx_ctrl.par = regk_ser_odd;
+		       rx_ctrl.par = regk_ser_odd;
+		}
+	}
+
+	if (termios->c_cflag & CRTSCTS) {
+		/* Enable automatic CTS handling. */
+		tx_ctrl.auto_cts = regk_ser_yes;
+	}
+
+	/* Make sure the tx and rx are enabled. */
+	tx_ctrl.en = regk_ser_yes;
+	rx_ctrl.en = regk_ser_yes;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	tx_dma_en.en = 0;
+	REG_WR(ser, up->regi_ser, rw_tr_dma_en, tx_dma_en);
+
+	/* Actually write the control regs (if modified) to the hardware. */
+	uart_update_timeout(port, termios->c_cflag, port->uartclk/8);
+	MODIFY_REG(up->regi_ser, rw_rec_baud_div, rx_baud_div);
+	MODIFY_REG(up->regi_ser, rw_rec_ctrl, rx_ctrl);
+
+	MODIFY_REG(up->regi_ser, rw_tr_baud_div, tx_baud_div);
+	MODIFY_REG(up->regi_ser, rw_tr_ctrl, tx_ctrl);
+
+	tx_dma_en.en = 0;
+	REG_WR(ser, up->regi_ser, rw_tr_dma_en, tx_dma_en);
+
+	xoff = REG_RD(ser, up->regi_ser, rw_xoff);
+
+	if (up->port.state && up->port.state->port.tty &&
+	    (up->port.state->port.tty->termios.c_iflag & IXON)) {
+		xoff.chr = STOP_CHAR(up->port.state->port.tty);
+		xoff.automatic = regk_ser_yes;
+	} else
+		xoff.automatic = regk_ser_no;
+
+	MODIFY_REG(up->regi_ser, rw_xoff, xoff);
+
+	/*
+	 * Make sure we don't start in an automatically shut-off state due to
+	 * a previous early exit.
+	 */
+	xoff_clr.clr = 1;
+	REG_WR(ser, up->regi_ser, rw_xoff_clr, xoff_clr);
+
+	etraxfs_uart_set_mctrl(&up->port, up->port.mctrl);
+	spin_unlock_irqrestore(&up->port.lock, flags);
+}
+
+static const char *
+etraxfs_uart_type(struct uart_port *port)
+{
+	return "CRISv32";
+}
+
+static void etraxfs_uart_release_port(struct uart_port *port)
+{
+}
+
+static int etraxfs_uart_request_port(struct uart_port *port)
+{
+	return 0;
+}
+
+static void etraxfs_uart_config_port(struct uart_port *port, int flags)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+
+	up->port.type = PORT_CRIS;
+}
+
+static const struct uart_ops etraxfs_uart_pops = {
+	.tx_empty = etraxfs_uart_tx_empty,
+	.set_mctrl = etraxfs_uart_set_mctrl,
+	.get_mctrl = etraxfs_uart_get_mctrl,
+	.stop_tx = etraxfs_uart_stop_tx,
+	.start_tx = etraxfs_uart_start_tx,
+	.send_xchar = etraxfs_uart_send_xchar,
+	.stop_rx = etraxfs_uart_stop_rx,
+	.enable_ms = etraxfs_uart_enable_ms,
+	.break_ctl = etraxfs_uart_break_ctl,
+	.startup = etraxfs_uart_startup,
+	.shutdown = etraxfs_uart_shutdown,
+	.set_termios = etraxfs_uart_set_termios,
+	.type = etraxfs_uart_type,
+	.release_port = etraxfs_uart_release_port,
+	.request_port = etraxfs_uart_request_port,
+	.config_port = etraxfs_uart_config_port,
+#ifdef CONFIG_CONSOLE_POLL
+	.poll_get_char = etraxfs_uart_get_poll_char,
+	.poll_put_char = etraxfs_uart_put_poll_char,
+#endif
+};
+
+static void cris_serial_port_init(struct uart_port *port, int line)
+{
+	struct uart_cris_port *up = (struct uart_cris_port *)port;
+
+	if (up->initialized)
+		return;
+	up->initialized = 1;
+	port->line = line;
+	spin_lock_init(&port->lock);
+	port->ops = &etraxfs_uart_pops;
+	port->irq = up->irq;
+	port->iobase = (unsigned long) up->regi_ser;
+	port->uartclk = 29493000;
+
+	/*
+	 * We can't fit any more than 255 here (unsigned char), though
+	 * actually UART_XMIT_SIZE characters could be pending output.
+	 * At time of this writing, the definition of "fifosize" is here the
+	 * amount of characters that can be pending output after a start_tx call
+	 * until tx_empty returns 1: see serial_core.c:uart_wait_until_sent.
+	 * This matters for timeout calculations unfortunately, but keeping
+	 * larger amounts at the DMA wouldn't win much so let's just play nice.
+	 */
+	port->fifosize = 255;
+	port->flags = UPF_BOOT_AUTOCONF;
+}
+
+static int etraxfs_uart_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct uart_cris_port *up;
+	int dev_id;
+
+	if (!np)
+		return -ENODEV;
+
+	dev_id = of_alias_get_id(np, "serial");
+	if (dev_id < 0)
+		dev_id = 0;
+
+	if (dev_id >= UART_NR)
+		return -EINVAL;
+
+	if (etraxfs_uart_ports[dev_id])
+		return -EBUSY;
+
+	up = devm_kzalloc(&pdev->dev, sizeof(struct uart_cris_port),
+			  GFP_KERNEL);
+	if (!up)
+		return -ENOMEM;
+
+	up->irq = irq_of_parse_and_map(np, 0);
+	up->regi_ser = of_iomap(np, 0);
+	up->dtr_pin = devm_gpiod_get_optional(&pdev->dev, "dtr");
+	up->dsr_pin = devm_gpiod_get_optional(&pdev->dev, "dsr");
+	up->ri_pin = devm_gpiod_get_optional(&pdev->dev, "ri");
+	up->cd_pin = devm_gpiod_get_optional(&pdev->dev, "cd");
+	up->port.dev = &pdev->dev;
+	cris_serial_port_init(&up->port, dev_id);
+
+	etraxfs_uart_ports[dev_id] = up;
+	platform_set_drvdata(pdev, &up->port);
+	uart_add_one_port(&etraxfs_uart_driver, &up->port);
+
+	return 0;
+}
+
+static int etraxfs_uart_remove(struct platform_device *pdev)
+{
+	struct uart_port *port;
+
+	port = platform_get_drvdata(pdev);
+	uart_remove_one_port(&etraxfs_uart_driver, port);
+	etraxfs_uart_ports[pdev->id] = NULL;
+
+	return 0;
+}
+
+static const struct of_device_id etraxfs_uart_dt_ids[] = {
+	{ .compatible = "axis,etraxfs-uart" },
+	{ /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(of, etraxfs_uart_dt_ids);
+
+static struct platform_driver etraxfs_uart_platform_driver = {
+	.driver = {
+		.name   = DRV_NAME,
+		.of_match_table	= of_match_ptr(etraxfs_uart_dt_ids),
+	},
+	.probe          = etraxfs_uart_probe,
+	.remove         = etraxfs_uart_remove,
+};
+
+static int __init etraxfs_uart_init(void)
+{
+	int ret;
+
+	ret = uart_register_driver(&etraxfs_uart_driver);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&etraxfs_uart_platform_driver);
+	if (ret)
+		uart_unregister_driver(&etraxfs_uart_driver);
+
+	return ret;
+}
+
+static void __exit etraxfs_uart_exit(void)
+{
+	platform_driver_unregister(&etraxfs_uart_platform_driver);
+	uart_unregister_driver(&etraxfs_uart_driver);
+}
+
+module_init(etraxfs_uart_init);
+module_exit(etraxfs_uart_exit);
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index 55da91e763bb..b2122813f18a 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -255,4 +255,7 @@
 /* SPRD SERIAL  */
 #define PORT_SPRD	111
 
+/* Cris v10 / v32 SoC */
+#define PORT_CRIS	112
+
 #endif /* _UAPILINUX_SERIAL_CORE_H */
-- 
cgit v1.2.3-71-gd317


From 83d2b9ba1abca241df44a502b6da950a25856b5b Mon Sep 17 00:00:00 2001
From: Jarno Rajahalme <jrajahalme@nicira.com>
Date: Thu, 5 Feb 2015 13:40:49 -0800
Subject: net: openvswitch: Support masked set actions.

OVS userspace already probes the openvswitch kernel module for
OVS_ACTION_ATTR_SET_MASKED support.  This patch adds the kernel module
implementation of masked set actions.

The existing set action sets many fields at once.  When only a subset
of the IP header fields, for example, should be modified, all the IP
fields need to be exact matched so that the other field values can be
copied to the set action.  A masked set action allows modification of
an arbitrary subset of the supported header bits without requiring the
rest to be matched.

Masked set action is now supported for all writeable key types, except
for the tunnel key.  The set tunnel action is an exception as any
input tunnel info is cleared before action processing starts, so there
is no tunnel info to mask.

The kernel module converts all (non-tunnel) set actions to masked set
actions.  This makes action processing more uniform, and results in
less branching and duplicating the action processing code.  When
returning actions to userspace, the fully masked set actions are
converted back to normal set actions.  We use a kernel internal action
code to be able to tell the userspace provided and converted masked
set actions apart.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h |  22 ++-
 net/openvswitch/actions.c        | 373 ++++++++++++++++++++++++---------------
 net/openvswitch/flow_netlink.c   | 161 +++++++++++++----
 3 files changed, 383 insertions(+), 173 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 7a8785a99243..bbd49a0c46c7 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -599,6 +599,12 @@ struct ovs_action_hash {
  * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
  * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
  * value.
+ * @OVS_ACTION_ATTR_SET_MASKED: Replaces the contents of an existing header.  A
+ * nested %OVS_KEY_ATTR_* attribute specifies a header to modify, its value,
+ * and a mask.  For every bit set in the mask, the corresponding bit value
+ * is copied from the value to the packet header field, rest of the bits are
+ * left unchanged.  The non-masked value bits must be passed in as zeroes.
+ * Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute.
  * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
  * packet.
  * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
@@ -617,6 +623,9 @@ struct ovs_action_hash {
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
  * type may not be changed.
+ *
+ * @OVS_ACTION_ATTR_SET_TO_MASKED: Kernel internal masked set action translated
+ * from the @OVS_ACTION_ATTR_SET.
  */
 
 enum ovs_action_attr {
@@ -631,8 +640,19 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_HASH,	      /* struct ovs_action_hash. */
 	OVS_ACTION_ATTR_PUSH_MPLS,    /* struct ovs_action_push_mpls. */
 	OVS_ACTION_ATTR_POP_MPLS,     /* __be16 ethertype. */
+	OVS_ACTION_ATTR_SET_MASKED,   /* One nested OVS_KEY_ATTR_* including
+				       * data immediately followed by a mask.
+				       * The data must be zero for the unmasked
+				       * bits. */
+
+	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
+				       * from userspace. */
 
-	__OVS_ACTION_ATTR_MAX
+#ifdef __KERNEL__
+	OVS_ACTION_ATTR_SET_TO_MASKED, /* Kernel module internal masked
+					* set action converted from
+					* OVS_ACTION_ATTR_SET. */
+#endif
 };
 
 #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b4cffe686126..b491c1c296fe 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -185,10 +185,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 	return 0;
 }
 
-static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
-		    const __be32 *mpls_lse)
+/* 'KEY' must not have any bits set outside of the 'MASK' */
+#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
+#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK))
+
+static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const __be32 *mpls_lse, const __be32 *mask)
 {
 	__be32 *stack;
+	__be32 lse;
 	int err;
 
 	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
@@ -196,14 +201,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 		return err;
 
 	stack = (__be32 *)skb_mpls_header(skb);
+	lse = MASKED(*stack, *mpls_lse, *mask);
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		__be32 diff[] = { ~(*stack), *mpls_lse };
+		__be32 diff[] = { ~(*stack), lse };
+
 		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
 					  ~skb->csum);
 	}
 
-	*stack = *mpls_lse;
-	key->mpls.top_lse = *mpls_lse;
+	*stack = lse;
+	flow_key->mpls.top_lse = lse;
 	return 0;
 }
 
@@ -230,23 +237,39 @@ static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 			     ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
 }
 
-static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key,
-			const struct ovs_key_ethernet *eth_key)
+/* 'src' is already properly masked. */
+static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
+{
+	u16 *dst = (u16 *)dst_;
+	const u16 *src = (const u16 *)src_;
+	const u16 *mask = (const u16 *)mask_;
+
+	SET_MASKED(dst[0], src[0], mask[0]);
+	SET_MASKED(dst[1], src[1], mask[1]);
+	SET_MASKED(dst[2], src[2], mask[2]);
+}
+
+static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
+			const struct ovs_key_ethernet *key,
+			const struct ovs_key_ethernet *mask)
 {
 	int err;
+
 	err = skb_ensure_writable(skb, ETH_HLEN);
 	if (unlikely(err))
 		return err;
 
 	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
-	ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
-	ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
+	ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
+			       mask->eth_src);
+	ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
+			       mask->eth_dst);
 
 	ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
-	ether_addr_copy(key->eth.src, eth_key->eth_src);
-	ether_addr_copy(key->eth.dst, eth_key->eth_dst);
+	ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
+	ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
 	return 0;
 }
 
@@ -304,6 +327,15 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
 	}
 }
 
+static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
+			   const __be32 mask[4], __be32 masked[4])
+{
+	masked[0] = MASKED(old[0], addr[0], mask[0]);
+	masked[1] = MASKED(old[1], addr[1], mask[1]);
+	masked[2] = MASKED(old[2], addr[2], mask[2]);
+	masked[3] = MASKED(old[3], addr[3], mask[3]);
+}
+
 static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 			  __be32 addr[4], const __be32 new_addr[4],
 			  bool recalculate_csum)
@@ -315,29 +347,29 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 	memcpy(addr, new_addr, sizeof(__be32[4]));
 }
 
-static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
+static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
 {
-	nh->priority = tc >> 4;
-	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
+	/* Bits 21-24 are always unmasked, so this retains their values. */
+	SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
+	SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
+	SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
 }
 
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
+static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
+		       u8 mask)
 {
-	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
-	nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
-	nh->flow_lbl[2] = fl & 0x000000FF;
-}
+	new_ttl = MASKED(nh->ttl, new_ttl, mask);
 
-static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
-{
 	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
 	nh->ttl = new_ttl;
 }
 
-static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
-		    const struct ovs_key_ipv4 *ipv4_key)
+static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const struct ovs_key_ipv4 *key,
+		    const struct ovs_key_ipv4 *mask)
 {
 	struct iphdr *nh;
+	__be32 new_addr;
 	int err;
 
 	err = skb_ensure_writable(skb, skb_network_offset(skb) +
@@ -347,36 +379,49 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
 
 	nh = ip_hdr(skb);
 
-	if (ipv4_key->ipv4_src != nh->saddr) {
-		set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
-		key->ipv4.addr.src = ipv4_key->ipv4_src;
-	}
+	/* Setting an IP addresses is typically only a side effect of
+	 * matching on them in the current userspace implementation, so it
+	 * makes sense to check if the value actually changed.
+	 */
+	if (mask->ipv4_src) {
+		new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
 
-	if (ipv4_key->ipv4_dst != nh->daddr) {
-		set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
-		key->ipv4.addr.dst = ipv4_key->ipv4_dst;
+		if (unlikely(new_addr != nh->saddr)) {
+			set_ip_addr(skb, nh, &nh->saddr, new_addr);
+			flow_key->ipv4.addr.src = new_addr;
+		}
 	}
+	if (mask->ipv4_dst) {
+		new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
 
-	if (ipv4_key->ipv4_tos != nh->tos) {
-		ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
-		key->ip.tos = nh->tos;
+		if (unlikely(new_addr != nh->daddr)) {
+			set_ip_addr(skb, nh, &nh->daddr, new_addr);
+			flow_key->ipv4.addr.dst = new_addr;
+		}
 	}
-
-	if (ipv4_key->ipv4_ttl != nh->ttl) {
-		set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
-		key->ip.ttl = ipv4_key->ipv4_ttl;
+	if (mask->ipv4_tos) {
+		ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
+		flow_key->ip.tos = nh->tos;
+	}
+	if (mask->ipv4_ttl) {
+		set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
+		flow_key->ip.ttl = nh->ttl;
 	}
 
 	return 0;
 }
 
-static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
-		    const struct ovs_key_ipv6 *ipv6_key)
+static bool is_ipv6_mask_nonzero(const __be32 addr[4])
+{
+	return !!(addr[0] | addr[1] | addr[2] | addr[3]);
+}
+
+static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const struct ovs_key_ipv6 *key,
+		    const struct ovs_key_ipv6 *mask)
 {
 	struct ipv6hdr *nh;
 	int err;
-	__be32 *saddr;
-	__be32 *daddr;
 
 	err = skb_ensure_writable(skb, skb_network_offset(skb) +
 				  sizeof(struct ipv6hdr));
@@ -384,71 +429,77 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
 		return err;
 
 	nh = ipv6_hdr(skb);
-	saddr = (__be32 *)&nh->saddr;
-	daddr = (__be32 *)&nh->daddr;
-
-	if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) {
-		set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
-			      ipv6_key->ipv6_src, true);
-		memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src,
-		       sizeof(ipv6_key->ipv6_src));
-	}
 
-	if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
+	/* Setting an IP addresses is typically only a side effect of
+	 * matching on them in the current userspace implementation, so it
+	 * makes sense to check if the value actually changed.
+	 */
+	if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
+		__be32 *saddr = (__be32 *)&nh->saddr;
+		__be32 masked[4];
+
+		mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
+
+		if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
+			set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
+				      true);
+			memcpy(&flow_key->ipv6.addr.src, masked,
+			       sizeof(flow_key->ipv6.addr.src));
+		}
+	}
+	if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
 		unsigned int offset = 0;
 		int flags = IP6_FH_F_SKIP_RH;
 		bool recalc_csum = true;
-
-		if (ipv6_ext_hdr(nh->nexthdr))
-			recalc_csum = ipv6_find_hdr(skb, &offset,
-						    NEXTHDR_ROUTING, NULL,
-						    &flags) != NEXTHDR_ROUTING;
-
-		set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
-			      ipv6_key->ipv6_dst, recalc_csum);
-		memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst,
-		       sizeof(ipv6_key->ipv6_dst));
+		__be32 *daddr = (__be32 *)&nh->daddr;
+		__be32 masked[4];
+
+		mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
+
+		if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
+			if (ipv6_ext_hdr(nh->nexthdr))
+				recalc_csum = (ipv6_find_hdr(skb, &offset,
+							     NEXTHDR_ROUTING,
+							     NULL, &flags)
+					       != NEXTHDR_ROUTING);
+
+			set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
+				      recalc_csum);
+			memcpy(&flow_key->ipv6.addr.dst, masked,
+			       sizeof(flow_key->ipv6.addr.dst));
+		}
+	}
+	if (mask->ipv6_tclass) {
+		ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+		flow_key->ip.tos = ipv6_get_dsfield(nh);
+	}
+	if (mask->ipv6_label) {
+		set_ipv6_fl(nh, ntohl(key->ipv6_label),
+			    ntohl(mask->ipv6_label));
+		flow_key->ipv6.label =
+		    *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
+	}
+	if (mask->ipv6_hlimit) {
+		SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
+		flow_key->ip.ttl = nh->hop_limit;
 	}
-
-	set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
-	key->ip.tos = ipv6_get_dsfield(nh);
-
-	set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
-	key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
-
-	nh->hop_limit = ipv6_key->ipv6_hlimit;
-	key->ip.ttl = ipv6_key->ipv6_hlimit;
 	return 0;
 }
 
 /* Must follow skb_ensure_writable() since that can move the skb data. */
 static void set_tp_port(struct sk_buff *skb, __be16 *port,
-			 __be16 new_port, __sum16 *check)
+			__be16 new_port, __sum16 *check)
 {
 	inet_proto_csum_replace2(check, skb, *port, new_port, 0);
 	*port = new_port;
-	skb_clear_hash(skb);
-}
-
-static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
-{
-	struct udphdr *uh = udp_hdr(skb);
-
-	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
-		set_tp_port(skb, port, new_port, &uh->check);
-
-		if (!uh->check)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		*port = new_port;
-		skb_clear_hash(skb);
-	}
 }
 
-static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
-		   const struct ovs_key_udp *udp_port_key)
+static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		   const struct ovs_key_udp *key,
+		   const struct ovs_key_udp *mask)
 {
 	struct udphdr *uh;
+	__be16 src, dst;
 	int err;
 
 	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
@@ -457,23 +508,40 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
 		return err;
 
 	uh = udp_hdr(skb);
-	if (udp_port_key->udp_src != uh->source) {
-		set_udp_port(skb, &uh->source, udp_port_key->udp_src);
-		key->tp.src = udp_port_key->udp_src;
-	}
+	/* Either of the masks is non-zero, so do not bother checking them. */
+	src = MASKED(uh->source, key->udp_src, mask->udp_src);
+	dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
 
-	if (udp_port_key->udp_dst != uh->dest) {
-		set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
-		key->tp.dst = udp_port_key->udp_dst;
+	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
+		if (likely(src != uh->source)) {
+			set_tp_port(skb, &uh->source, src, &uh->check);
+			flow_key->tp.src = src;
+		}
+		if (likely(dst != uh->dest)) {
+			set_tp_port(skb, &uh->dest, dst, &uh->check);
+			flow_key->tp.dst = dst;
+		}
+
+		if (unlikely(!uh->check))
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		uh->source = src;
+		uh->dest = dst;
+		flow_key->tp.src = src;
+		flow_key->tp.dst = dst;
 	}
 
+	skb_clear_hash(skb);
+
 	return 0;
 }
 
-static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
-		   const struct ovs_key_tcp *tcp_port_key)
+static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		   const struct ovs_key_tcp *key,
+		   const struct ovs_key_tcp *mask)
 {
 	struct tcphdr *th;
+	__be16 src, dst;
 	int err;
 
 	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
@@ -482,50 +550,49 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
 		return err;
 
 	th = tcp_hdr(skb);
-	if (tcp_port_key->tcp_src != th->source) {
-		set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
-		key->tp.src = tcp_port_key->tcp_src;
+	src = MASKED(th->source, key->tcp_src, mask->tcp_src);
+	if (likely(src != th->source)) {
+		set_tp_port(skb, &th->source, src, &th->check);
+		flow_key->tp.src = src;
 	}
-
-	if (tcp_port_key->tcp_dst != th->dest) {
-		set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
-		key->tp.dst = tcp_port_key->tcp_dst;
+	dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
+	if (likely(dst != th->dest)) {
+		set_tp_port(skb, &th->dest, dst, &th->check);
+		flow_key->tp.dst = dst;
 	}
+	skb_clear_hash(skb);
 
 	return 0;
 }
 
-static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key,
-		    const struct ovs_key_sctp *sctp_port_key)
+static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const struct ovs_key_sctp *key,
+		    const struct ovs_key_sctp *mask)
 {
+	unsigned int sctphoff = skb_transport_offset(skb);
 	struct sctphdr *sh;
+	__le32 old_correct_csum, new_csum, old_csum;
 	int err;
-	unsigned int sctphoff = skb_transport_offset(skb);
 
 	err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
 	if (unlikely(err))
 		return err;
 
 	sh = sctp_hdr(skb);
-	if (sctp_port_key->sctp_src != sh->source ||
-	    sctp_port_key->sctp_dst != sh->dest) {
-		__le32 old_correct_csum, new_csum, old_csum;
+	old_csum = sh->checksum;
+	old_correct_csum = sctp_compute_cksum(skb, sctphoff);
 
-		old_csum = sh->checksum;
-		old_correct_csum = sctp_compute_cksum(skb, sctphoff);
+	sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
+	sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
 
-		sh->source = sctp_port_key->sctp_src;
-		sh->dest = sctp_port_key->sctp_dst;
+	new_csum = sctp_compute_cksum(skb, sctphoff);
 
-		new_csum = sctp_compute_cksum(skb, sctphoff);
+	/* Carry any checksum errors through. */
+	sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
 
-		/* Carry any checksum errors through. */
-		sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
-
-		skb_clear_hash(skb);
-		key->tp.src = sctp_port_key->sctp_src;
-		key->tp.dst = sctp_port_key->sctp_dst;
-	}
+	skb_clear_hash(skb);
+	flow_key->tp.src = sh->source;
+	flow_key->tp.dst = sh->dest;
 
 	return 0;
 }
@@ -653,52 +720,77 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
 	key->ovs_flow_hash = hash;
 }
 
-static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
-			      const struct nlattr *nested_attr)
+static int execute_set_action(struct sk_buff *skb,
+			      struct sw_flow_key *flow_key,
+			      const struct nlattr *a)
+{
+	/* Only tunnel set execution is supported without a mask. */
+	if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
+		OVS_CB(skb)->egress_tun_info = nla_data(a);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/* Mask is at the midpoint of the data. */
+#define get_mask(a, type) ((const type)nla_data(a) + 1)
+
+static int execute_masked_set_action(struct sk_buff *skb,
+				     struct sw_flow_key *flow_key,
+				     const struct nlattr *a)
 {
 	int err = 0;
 
-	switch (nla_type(nested_attr)) {
+	switch (nla_type(a)) {
 	case OVS_KEY_ATTR_PRIORITY:
-		skb->priority = nla_get_u32(nested_attr);
-		key->phy.priority = skb->priority;
+		SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *));
+		flow_key->phy.priority = skb->priority;
 		break;
 
 	case OVS_KEY_ATTR_SKB_MARK:
-		skb->mark = nla_get_u32(nested_attr);
-		key->phy.skb_mark = skb->mark;
+		SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
+		flow_key->phy.skb_mark = skb->mark;
 		break;
 
 	case OVS_KEY_ATTR_TUNNEL_INFO:
-		OVS_CB(skb)->egress_tun_info = nla_data(nested_attr);
+		/* Masked data not supported for tunnel. */
+		err = -EINVAL;
 		break;
 
 	case OVS_KEY_ATTR_ETHERNET:
-		err = set_eth_addr(skb, key, nla_data(nested_attr));
+		err = set_eth_addr(skb, flow_key, nla_data(a),
+				   get_mask(a, struct ovs_key_ethernet *));
 		break;
 
 	case OVS_KEY_ATTR_IPV4:
-		err = set_ipv4(skb, key, nla_data(nested_attr));
+		err = set_ipv4(skb, flow_key, nla_data(a),
+			       get_mask(a, struct ovs_key_ipv4 *));
 		break;
 
 	case OVS_KEY_ATTR_IPV6:
-		err = set_ipv6(skb, key, nla_data(nested_attr));
+		err = set_ipv6(skb, flow_key, nla_data(a),
+			       get_mask(a, struct ovs_key_ipv6 *));
 		break;
 
 	case OVS_KEY_ATTR_TCP:
-		err = set_tcp(skb, key, nla_data(nested_attr));
+		err = set_tcp(skb, flow_key, nla_data(a),
+			      get_mask(a, struct ovs_key_tcp *));
 		break;
 
 	case OVS_KEY_ATTR_UDP:
-		err = set_udp(skb, key, nla_data(nested_attr));
+		err = set_udp(skb, flow_key, nla_data(a),
+			      get_mask(a, struct ovs_key_udp *));
 		break;
 
 	case OVS_KEY_ATTR_SCTP:
-		err = set_sctp(skb, key, nla_data(nested_attr));
+		err = set_sctp(skb, flow_key, nla_data(a),
+			       get_mask(a, struct ovs_key_sctp *));
 		break;
 
 	case OVS_KEY_ATTR_MPLS:
-		err = set_mpls(skb, key, nla_data(nested_attr));
+		err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
+								    __be32 *));
 		break;
 	}
 
@@ -818,6 +910,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			err = execute_set_action(skb, key, nla_data(a));
 			break;
 
+		case OVS_ACTION_ATTR_SET_MASKED:
+		case OVS_ACTION_ATTR_SET_TO_MASKED:
+			err = execute_masked_set_action(skb, key, nla_data(a));
+			break;
+
 		case OVS_ACTION_ATTR_SAMPLE:
 			err = sample(dp, skb, key, a);
 			break;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 8b9a612b39d1..993281e6278d 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1695,16 +1695,6 @@ static int validate_and_copy_sample(const struct nlattr *attr,
 	return 0;
 }
 
-static int validate_tp_port(const struct sw_flow_key *flow_key,
-			    __be16 eth_type)
-{
-	if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
-	    (flow_key->tp.src || flow_key->tp.dst))
-		return 0;
-
-	return -EINVAL;
-}
-
 void ovs_match_init(struct sw_flow_match *match,
 		    struct sw_flow_key *key,
 		    struct sw_flow_mask *mask)
@@ -1805,23 +1795,45 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	return err;
 }
 
+/* Return false if there are any non-masked bits set.
+ * Mask follows data immediately, before any netlink padding.
+ */
+static bool validate_masked(u8 *data, int len)
+{
+	u8 *mask = data + len;
+
+	while (len--)
+		if (*data++ & ~*mask++)
+			return false;
+
+	return true;
+}
+
 static int validate_set(const struct nlattr *a,
 			const struct sw_flow_key *flow_key,
 			struct sw_flow_actions **sfa,
-			bool *set_tun, __be16 eth_type, bool log)
+			bool *skip_copy, __be16 eth_type, bool masked, bool log)
 {
 	const struct nlattr *ovs_key = nla_data(a);
 	int key_type = nla_type(ovs_key);
+	size_t key_len;
 
 	/* There can be only one key in a action */
 	if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
 		return -EINVAL;
 
+	key_len = nla_len(ovs_key);
+	if (masked)
+		key_len /= 2;
+
 	if (key_type > OVS_KEY_ATTR_MAX ||
-	    (ovs_key_lens[key_type].len != nla_len(ovs_key) &&
+	    (ovs_key_lens[key_type].len != key_len &&
 	     ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
 		return -EINVAL;
 
+	if (masked && !validate_masked(nla_data(ovs_key), key_len))
+		return -EINVAL;
+
 	switch (key_type) {
 	const struct ovs_key_ipv4 *ipv4_key;
 	const struct ovs_key_ipv6 *ipv6_key;
@@ -1836,7 +1848,10 @@ static int validate_set(const struct nlattr *a,
 		if (eth_p_mpls(eth_type))
 			return -EINVAL;
 
-		*set_tun = true;
+		if (masked)
+			return -EINVAL; /* Masked tunnel set not supported. */
+
+		*skip_copy = true;
 		err = validate_and_copy_set_tun(a, sfa, log);
 		if (err)
 			return err;
@@ -1846,48 +1861,66 @@ static int validate_set(const struct nlattr *a,
 		if (eth_type != htons(ETH_P_IP))
 			return -EINVAL;
 
-		if (!flow_key->ip.proto)
-			return -EINVAL;
-
 		ipv4_key = nla_data(ovs_key);
-		if (ipv4_key->ipv4_proto != flow_key->ip.proto)
-			return -EINVAL;
 
-		if (ipv4_key->ipv4_frag != flow_key->ip.frag)
-			return -EINVAL;
+		if (masked) {
+			const struct ovs_key_ipv4 *mask = ipv4_key + 1;
 
+			/* Non-writeable fields. */
+			if (mask->ipv4_proto || mask->ipv4_frag)
+				return -EINVAL;
+		} else {
+			if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+				return -EINVAL;
+
+			if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+				return -EINVAL;
+		}
 		break;
 
 	case OVS_KEY_ATTR_IPV6:
 		if (eth_type != htons(ETH_P_IPV6))
 			return -EINVAL;
 
-		if (!flow_key->ip.proto)
-			return -EINVAL;
-
 		ipv6_key = nla_data(ovs_key);
-		if (ipv6_key->ipv6_proto != flow_key->ip.proto)
-			return -EINVAL;
 
-		if (ipv6_key->ipv6_frag != flow_key->ip.frag)
-			return -EINVAL;
+		if (masked) {
+			const struct ovs_key_ipv6 *mask = ipv6_key + 1;
+
+			/* Non-writeable fields. */
+			if (mask->ipv6_proto || mask->ipv6_frag)
+				return -EINVAL;
+
+			/* Invalid bits in the flow label mask? */
+			if (ntohl(mask->ipv6_label) & 0xFFF00000)
+				return -EINVAL;
+		} else {
+			if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+				return -EINVAL;
 
+			if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+				return -EINVAL;
+		}
 		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
 			return -EINVAL;
 
 		break;
 
 	case OVS_KEY_ATTR_TCP:
-		if (flow_key->ip.proto != IPPROTO_TCP)
+		if ((eth_type != htons(ETH_P_IP) &&
+		     eth_type != htons(ETH_P_IPV6)) ||
+		    flow_key->ip.proto != IPPROTO_TCP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key, eth_type);
+		break;
 
 	case OVS_KEY_ATTR_UDP:
-		if (flow_key->ip.proto != IPPROTO_UDP)
+		if ((eth_type != htons(ETH_P_IP) &&
+		     eth_type != htons(ETH_P_IPV6)) ||
+		    flow_key->ip.proto != IPPROTO_UDP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key, eth_type);
+		break;
 
 	case OVS_KEY_ATTR_MPLS:
 		if (!eth_p_mpls(eth_type))
@@ -1895,15 +1928,45 @@ static int validate_set(const struct nlattr *a,
 		break;
 
 	case OVS_KEY_ATTR_SCTP:
-		if (flow_key->ip.proto != IPPROTO_SCTP)
+		if ((eth_type != htons(ETH_P_IP) &&
+		     eth_type != htons(ETH_P_IPV6)) ||
+		    flow_key->ip.proto != IPPROTO_SCTP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key, eth_type);
+		break;
 
 	default:
 		return -EINVAL;
 	}
 
+	/* Convert non-masked non-tunnel set actions to masked set actions. */
+	if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
+		int start, len = key_len * 2;
+		struct nlattr *at;
+
+		*skip_copy = true;
+
+		start = add_nested_action_start(sfa,
+						OVS_ACTION_ATTR_SET_TO_MASKED,
+						log);
+		if (start < 0)
+			return start;
+
+		at = __add_action(sfa, key_type, NULL, len, log);
+		if (IS_ERR(at))
+			return PTR_ERR(at);
+
+		memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
+		memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
+		/* Clear non-writeable bits from otherwise writeable fields. */
+		if (key_type == OVS_KEY_ATTR_IPV6) {
+			struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
+
+			mask->ipv6_label &= htonl(0x000FFFFF);
+		}
+		add_nested_action_end(*sfa, start);
+	}
+
 	return 0;
 }
 
@@ -1965,6 +2028,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 			[OVS_ACTION_ATTR_POP_VLAN] = 0,
 			[OVS_ACTION_ATTR_SET] = (u32)-1,
+			[OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
 			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
 			[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
 		};
@@ -2060,7 +2124,14 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 
 		case OVS_ACTION_ATTR_SET:
 			err = validate_set(a, key, sfa,
-					   &skip_copy, eth_type, log);
+					   &skip_copy, eth_type, false, log);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_SET_MASKED:
+			err = validate_set(a, key, sfa,
+					   &skip_copy, eth_type, true, log);
 			if (err)
 				return err;
 			break;
@@ -2090,6 +2161,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 	return 0;
 }
 
+/* 'key' must be the masked key. */
 int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key,
 			 struct sw_flow_actions **sfa, bool log)
@@ -2177,6 +2249,21 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 	return 0;
 }
 
+static int masked_set_action_to_set_action_attr(const struct nlattr *a,
+						struct sk_buff *skb)
+{
+	const struct nlattr *ovs_key = nla_data(a);
+	size_t key_len = nla_len(ovs_key) / 2;
+
+	/* Revert the conversion we did from a non-masked set action to
+	 * masked set action.
+	 */
+	if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 {
 	const struct nlattr *a;
@@ -2192,6 +2279,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 				return err;
 			break;
 
+		case OVS_ACTION_ATTR_SET_TO_MASKED:
+			err = masked_set_action_to_set_action_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
 		case OVS_ACTION_ATTR_SAMPLE:
 			err = sample_action_to_attr(a, skb);
 			if (err)
-- 
cgit v1.2.3-71-gd317


From 032ee4236954eb214651cb9bfc1b38ffa8fd7a01 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 6 Feb 2015 16:04:38 -0500
Subject: tcp: helpers to mitigate ACK loops by rate-limiting out-of-window
 dupacks

Helpers for mitigating ACK loops by rate-limiting dupacks sent in
response to incoming out-of-window packets.

This patch includes:

- rate-limiting logic
- sysctl to control how often we allow dupacks to out-of-window packets
- SNMP counter for cases where we rate-limited our dupack sending

The rate-limiting logic in this patch decides to not send dupacks in
response to out-of-window segments if (a) they are SYNs or pure ACKs
and (b) the remote endpoint is sending them faster than the configured
rate limit.

We rate-limit our responses rather than blocking them entirely or
resetting the connection, because legitimate connections can rely on
dupacks in response to some out-of-window segments. For example, zero
window probes are typically sent with a sequence number that is below
the current window, and ZWPs thus expect to thus elicit a dupack in
response.

We allow dupacks in response to TCP segments with data, because these
may be spurious retransmissions for which the remote endpoint wants to
receive DSACKs. This is safe because segments with data can't
realistically be part of ACK loops, which by their nature consist of
each side sending pure/data-less ACKs to each other.

The dupack interval is controlled by a new sysctl knob,
tcp_invalid_ratelimit, given in milliseconds, in case an administrator
needs to dial this upward in the face of a high-rate DoS attack. The
name and units are chosen to be analogous to the existing analogous
knob for ICMP, icmp_ratelimit.

The default value for tcp_invalid_ratelimit is 500ms, which allows at
most one such dupack per 500ms. This is chosen to be 2x faster than
the 1-second minimum RTO interval allowed by RFC 6298 (section 2, rule
2.4). We allow the extra 2x factor because network delay variations
can cause packets sent at 1 second intervals to be compressed and
arrive much closer.

Reported-by: Avery Fay <avery@mixpanel.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 22 ++++++++++++++++++++++
 include/net/tcp.h                      | 32 ++++++++++++++++++++++++++++++++
 include/uapi/linux/snmp.h              |  6 ++++++
 net/ipv4/proc.c                        |  6 ++++++
 net/ipv4/sysctl_net_ipv4.c             |  7 +++++++
 net/ipv4/tcp_input.c                   |  1 +
 6 files changed, 74 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a5e4c813f17f..1b8c964b0d17 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -290,6 +290,28 @@ tcp_frto - INTEGER
 
 	By default it's enabled with a non-zero value. 0 disables F-RTO.
 
+tcp_invalid_ratelimit - INTEGER
+	Limit the maximal rate for sending duplicate acknowledgments
+	in response to incoming TCP packets that are for an existing
+	connection but that are invalid due to any of these reasons:
+
+	  (a) out-of-window sequence number,
+	  (b) out-of-window acknowledgment number, or
+	  (c) PAWS (Protection Against Wrapped Sequence numbers) check failure
+
+	This can help mitigate simple "ack loop" DoS attacks, wherein
+	a buggy or malicious middlebox or man-in-the-middle can
+	rewrite TCP header fields in manner that causes each endpoint
+	to think that the other is sending invalid TCP segments, thus
+	causing each side to send an unterminating stream of duplicate
+	acknowledgments for invalid segments.
+
+	Using 0 disables rate-limiting of dupacks in response to
+	invalid segments; otherwise this value specifies the minimal
+	space between sending such dupacks, in milliseconds.
+
+	Default: 500 (milliseconds).
+
 tcp_keepalive_time - INTEGER
 	How often TCP sends out keepalive messages when keepalive is enabled.
 	Default: 2hours.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 28e9bd3abceb..b81f45c67b2e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -274,6 +274,7 @@ extern int sysctl_tcp_challenge_ack_limit;
 extern unsigned int sysctl_tcp_notsent_lowat;
 extern int sysctl_tcp_min_tso_segs;
 extern int sysctl_tcp_autocorking;
+extern int sysctl_tcp_invalid_ratelimit;
 
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -1236,6 +1237,37 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
 	return true;
 }
 
+/* Return true if we're currently rate-limiting out-of-window ACKs and
+ * thus shouldn't send a dupack right now. We rate-limit dupacks in
+ * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
+ * attacks that send repeated SYNs or ACKs for the same connection. To
+ * do this, we do not send a duplicate SYNACK or ACK if the remote
+ * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
+ */
+static inline bool tcp_oow_rate_limited(struct net *net,
+					const struct sk_buff *skb,
+					int mib_idx, u32 *last_oow_ack_time)
+{
+	/* Data packets without SYNs are not likely part of an ACK loop. */
+	if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
+	    !tcp_hdr(skb)->syn)
+		goto not_rate_limited;
+
+	if (*last_oow_ack_time) {
+		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+
+		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+			NET_INC_STATS_BH(net, mib_idx);
+			return true;	/* rate-limited: don't send yet! */
+		}
+	}
+
+	*last_oow_ack_time = tcp_time_stamp;
+
+not_rate_limited:
+	return false;	/* not rate-limited: go ahead, send dupack now! */
+}
+
 static inline void tcp_mib_init(struct net *net)
 {
 	/* See RFC 2012 */
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index b22224100011..6a6fb747c78d 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -270,6 +270,12 @@ enum
 	LINUX_MIB_TCPHYSTARTTRAINCWND,		/* TCPHystartTrainCwnd */
 	LINUX_MIB_TCPHYSTARTDELAYDETECT,	/* TCPHystartDelayDetect */
 	LINUX_MIB_TCPHYSTARTDELAYCWND,		/* TCPHystartDelayCwnd */
+	LINUX_MIB_TCPACKSKIPPEDSYNRECV,		/* TCPACKSkippedSynRecv */
+	LINUX_MIB_TCPACKSKIPPEDPAWS,		/* TCPACKSkippedPAWS */
+	LINUX_MIB_TCPACKSKIPPEDSEQ,		/* TCPACKSkippedSeq */
+	LINUX_MIB_TCPACKSKIPPEDFINWAIT2,	/* TCPACKSkippedFinWait2 */
+	LINUX_MIB_TCPACKSKIPPEDTIMEWAIT,	/* TCPACKSkippedTimeWait */
+	LINUX_MIB_TCPACKSKIPPEDCHALLENGE,	/* TCPACKSkippedChallenge */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 8f9cd200ce20..d8953ef0770c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -292,6 +292,12 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPHystartTrainCwnd", LINUX_MIB_TCPHYSTARTTRAINCWND),
 	SNMP_MIB_ITEM("TCPHystartDelayDetect", LINUX_MIB_TCPHYSTARTDELAYDETECT),
 	SNMP_MIB_ITEM("TCPHystartDelayCwnd", LINUX_MIB_TCPHYSTARTDELAYCWND),
+	SNMP_MIB_ITEM("TCPACKSkippedSynRecv", LINUX_MIB_TCPACKSKIPPEDSYNRECV),
+	SNMP_MIB_ITEM("TCPACKSkippedPAWS", LINUX_MIB_TCPACKSKIPPEDPAWS),
+	SNMP_MIB_ITEM("TCPACKSkippedSeq", LINUX_MIB_TCPACKSKIPPEDSEQ),
+	SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2),
+	SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT),
+	SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e0ee384a448f..82601a68cf90 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -728,6 +728,13 @@ static struct ctl_table ipv4_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+	{
+		.procname	= "tcp_invalid_ratelimit",
+		.data		= &sysctl_tcp_invalid_ratelimit,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies,
+	},
 	{
 		.procname	= "icmp_msgs_per_sec",
 		.data		= &sysctl_icmp_msgs_per_sec,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d3dfff78fa19..9401aa43b814 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,6 +100,7 @@ int sysctl_tcp_thin_dupack __read_mostly;
 
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_early_retrans __read_mostly = 3;
+int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
-- 
cgit v1.2.3-71-gd317


From e5863d9ad754926e7d3f38b43ac8bd48ef73b097 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Wed, 17 Dec 2014 21:08:12 -0500
Subject: dm: allocate requests in target when stacking on blk-mq devices

For blk-mq request-based DM the responsibility of allocating a cloned
request is transfered from DM core to the target type.  Doing so
enables the cloned request to be allocated from the appropriate
blk-mq request_queue's pool (only the DM target, e.g. multipath, can
know which block device to send a given cloned request to).

Care was taken to preserve compatibility with old-style block request
completion that requires request-based DM _not_ acquire the clone
request's queue lock in the completion path.  As such, there are now 2
different request-based DM target_type interfaces:
1) the original .map_rq() interface will continue to be used for
   non-blk-mq devices -- the preallocated clone request is passed in
   from DM core.
2) a new .clone_and_map_rq() and .release_clone_rq() will be used for
   blk-mq devices -- blk_get_request() and blk_put_request() are used
   respectively from these hooks.

dm_table_set_type() was updated to detect if the request-based target is
being stacked on blk-mq devices, if so DM_TYPE_MQ_REQUEST_BASED is set.
DM core disallows switching the DM table's type after it is set.  This
means that there is no mixing of non-blk-mq and blk-mq devices within
the same request-based DM table.

[This patch was started by Keith and later heavily modified by Mike]

Tested-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-mpath.c         |  51 ++++++++++++++++---
 drivers/md/dm-table.c         |  34 +++++++++++--
 drivers/md/dm-target.c        |  15 +++++-
 drivers/md/dm.c               | 114 +++++++++++++++++++++++++++++++-----------
 drivers/md/dm.h               |   8 +--
 include/linux/device-mapper.h |   7 +++
 include/uapi/linux/dm-ioctl.h |   4 +-
 7 files changed, 185 insertions(+), 48 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 2552b88f8953..863fc8c1ac06 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -11,6 +11,7 @@
 #include "dm-path-selector.h"
 #include "dm-uevent.h"
 
+#include <linux/blkdev.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/mempool.h>
@@ -378,12 +379,13 @@ static int __must_push_back(struct multipath *m)
 /*
  * Map cloned requests
  */
-static int multipath_map(struct dm_target *ti, struct request *clone,
-			 union map_info *map_context)
+static int __multipath_map(struct dm_target *ti, struct request *clone,
+			   union map_info *map_context,
+			   struct request *rq, struct request **__clone)
 {
 	struct multipath *m = (struct multipath *) ti->private;
 	int r = DM_MAPIO_REQUEUE;
-	size_t nr_bytes = blk_rq_bytes(clone);
+	size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq);
 	struct pgpath *pgpath;
 	struct block_device *bdev;
 	struct dm_mpath_io *mpio;
@@ -416,12 +418,25 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
 
 	bdev = pgpath->path.dev->bdev;
 
-	clone->q = bdev_get_queue(bdev);
-	clone->rq_disk = bdev->bd_disk;
-	clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
-
 	spin_unlock_irq(&m->lock);
 
+	if (clone) {
+		/* Old request-based interface: allocated clone is passed in */
+		clone->q = bdev_get_queue(bdev);
+		clone->rq_disk = bdev->bd_disk;
+		clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
+	} else {
+		/* blk-mq request-based interface */
+		*__clone = blk_get_request(bdev_get_queue(bdev),
+					   rq_data_dir(rq), GFP_KERNEL);
+		if (IS_ERR(*__clone))
+			/* ENOMEM, requeue */
+			return r;
+		(*__clone)->bio = (*__clone)->biotail = NULL;
+		(*__clone)->rq_disk = bdev->bd_disk;
+		(*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT;
+	}
+
 	if (pgpath->pg->ps.type->start_io)
 		pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
 					      &pgpath->path,
@@ -434,6 +449,24 @@ out_unlock:
 	return r;
 }
 
+static int multipath_map(struct dm_target *ti, struct request *clone,
+			 union map_info *map_context)
+{
+	return __multipath_map(ti, clone, map_context, NULL, NULL);
+}
+
+static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
+				   union map_info *map_context,
+				   struct request **clone)
+{
+	return __multipath_map(ti, NULL, map_context, rq, clone);
+}
+
+static void multipath_release_clone(struct request *clone)
+{
+	blk_put_request(clone);
+}
+
 /*
  * If we run out of usable paths, should we queue I/O or error it?
  */
@@ -1670,11 +1703,13 @@ out:
  *---------------------------------------------------------------*/
 static struct target_type multipath_target = {
 	.name = "multipath",
-	.version = {1, 7, 0},
+	.version = {1, 8, 0},
 	.module = THIS_MODULE,
 	.ctr = multipath_ctr,
 	.dtr = multipath_dtr,
 	.map_rq = multipath_map,
+	.clone_and_map_rq = multipath_clone_and_map,
+	.release_clone_rq = multipath_release_clone,
 	.rq_end_io = multipath_end_io,
 	.presuspend = multipath_presuspend,
 	.postsuspend = multipath_postsuspend,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 3afae9e062f8..2d7e373955f3 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -827,6 +827,7 @@ static int dm_table_set_type(struct dm_table *t)
 {
 	unsigned i;
 	unsigned bio_based = 0, request_based = 0, hybrid = 0;
+	bool use_blk_mq = false;
 	struct dm_target *tgt;
 	struct dm_dev_internal *dd;
 	struct list_head *devices;
@@ -872,11 +873,26 @@ static int dm_table_set_type(struct dm_table *t)
 	/* Non-request-stackable devices can't be used for request-based dm */
 	devices = dm_table_get_devices(t);
 	list_for_each_entry(dd, devices, list) {
-		if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev->bdev))) {
-			DMWARN("table load rejected: including"
-			       " non-request-stackable devices");
+		struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
+
+		if (!blk_queue_stackable(q)) {
+			DMERR("table load rejected: including"
+			      " non-request-stackable devices");
 			return -EINVAL;
 		}
+
+		if (q->mq_ops)
+			use_blk_mq = true;
+	}
+
+	if (use_blk_mq) {
+		/* verify _all_ devices in the table are blk-mq devices */
+		list_for_each_entry(dd, devices, list)
+			if (!bdev_get_queue(dd->dm_dev->bdev)->mq_ops) {
+				DMERR("table load rejected: not all devices"
+				      " are blk-mq request-stackable");
+				return -EINVAL;
+			}
 	}
 
 	/*
@@ -890,7 +906,7 @@ static int dm_table_set_type(struct dm_table *t)
 		return -EINVAL;
 	}
 
-	t->type = DM_TYPE_REQUEST_BASED;
+	t->type = !use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED;
 
 	return 0;
 }
@@ -907,7 +923,15 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t)
 
 bool dm_table_request_based(struct dm_table *t)
 {
-	return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
+	unsigned table_type = dm_table_get_type(t);
+
+	return (table_type == DM_TYPE_REQUEST_BASED ||
+		table_type == DM_TYPE_MQ_REQUEST_BASED);
+}
+
+bool dm_table_mq_request_based(struct dm_table *t)
+{
+	return dm_table_get_type(t) == DM_TYPE_MQ_REQUEST_BASED;
 }
 
 static int dm_table_alloc_md_mempools(struct dm_table *t)
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 242e3cec397a..925ec1b15e75 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -137,13 +137,26 @@ static int io_err_map_rq(struct dm_target *ti, struct request *clone,
 	return -EIO;
 }
 
+static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
+				   union map_info *map_context,
+				   struct request **clone)
+{
+	return -EIO;
+}
+
+static void io_err_release_clone_rq(struct request *clone)
+{
+}
+
 static struct target_type error_target = {
 	.name = "error",
-	.version = {1, 2, 0},
+	.version = {1, 3, 0},
 	.ctr  = io_err_ctr,
 	.dtr  = io_err_dtr,
 	.map  = io_err_map,
 	.map_rq = io_err_map_rq,
+	.clone_and_map_rq = io_err_clone_and_map_rq,
+	.release_clone_rq = io_err_release_clone_rq,
 };
 
 int __init dm_target_init(void)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ae1219893948..549b815999a1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1044,7 +1044,10 @@ static void free_rq_clone(struct request *clone)
 	struct dm_rq_target_io *tio = clone->end_io_data;
 
 	blk_rq_unprep_clone(clone);
-	free_clone_request(tio->md, clone);
+	if (clone->q && clone->q->mq_ops)
+		tio->ti->type->release_clone_rq(clone);
+	else
+		free_clone_request(tio->md, clone);
 	free_rq_tio(tio);
 }
 
@@ -1086,7 +1089,8 @@ static void dm_unprep_request(struct request *rq)
 	rq->special = NULL;
 	rq->cmd_flags &= ~REQ_DONTPREP;
 
-	free_rq_clone(clone);
+	if (clone)
+		free_rq_clone(clone);
 }
 
 /*
@@ -1185,6 +1189,13 @@ static void dm_softirq_done(struct request *rq)
 	struct dm_rq_target_io *tio = rq->special;
 	struct request *clone = tio->clone;
 
+	if (!clone) {
+		blk_end_request_all(rq, tio->error);
+		rq_completed(tio->md, rq_data_dir(rq), false);
+		free_rq_tio(tio);
+		return;
+	}
+
 	if (rq->cmd_flags & REQ_FAILED)
 		mapped = false;
 
@@ -1207,7 +1218,7 @@ static void dm_complete_request(struct request *rq, int error)
  * Complete the not-mapped clone and the original request with the error status
  * through softirq context.
  * Target's rq_end_io() function isn't called.
- * This may be used when the target's map_rq() function fails.
+ * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
  */
 static void dm_kill_unmapped_request(struct request *rq, int error)
 {
@@ -1222,13 +1233,15 @@ static void end_clone_request(struct request *clone, int error)
 {
 	struct dm_rq_target_io *tio = clone->end_io_data;
 
-	/*
-	 * For just cleaning up the information of the queue in which
-	 * the clone was dispatched.
-	 * The clone is *NOT* freed actually here because it is alloced from
-	 * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags.
-	 */
-	__blk_put_request(clone->q, clone);
+	if (!clone->q->mq_ops) {
+		/*
+		 * For just cleaning up the information of the queue in which
+		 * the clone was dispatched.
+		 * The clone is *NOT* freed actually here because it is alloced
+		 * from dm own mempool (REQ_ALLOCED isn't set).
+		 */
+		__blk_put_request(clone->q, clone);
+	}
 
 	/*
 	 * Actual request completion is done in a softirq context which doesn't
@@ -1789,6 +1802,8 @@ static struct dm_rq_target_io *prep_tio(struct request *rq,
 					struct mapped_device *md, gfp_t gfp_mask)
 {
 	struct dm_rq_target_io *tio;
+	int srcu_idx;
+	struct dm_table *table;
 
 	tio = alloc_rq_tio(md, gfp_mask);
 	if (!tio)
@@ -1802,10 +1817,15 @@ static struct dm_rq_target_io *prep_tio(struct request *rq,
 	memset(&tio->info, 0, sizeof(tio->info));
 	init_kthread_work(&tio->work, map_tio_request);
 
-	if (!clone_rq(rq, md, tio, gfp_mask)) {
-		free_rq_tio(tio);
-		return NULL;
+	table = dm_get_live_table(md, &srcu_idx);
+	if (!dm_table_mq_request_based(table)) {
+		if (!clone_rq(rq, md, tio, gfp_mask)) {
+			dm_put_live_table(md, srcu_idx);
+			free_rq_tio(tio);
+			return NULL;
+		}
 	}
+	dm_put_live_table(md, srcu_idx);
 
 	return tio;
 }
@@ -1835,17 +1855,36 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
 
 /*
  * Returns:
- * 0  : the request has been processed (not requeued)
- * !0 : the request has been requeued
+ * 0                : the request has been processed
+ * DM_MAPIO_REQUEUE : the original request needs to be requeued
+ * < 0              : the request was completed due to failure
  */
 static int map_request(struct dm_target *ti, struct request *rq,
 		       struct mapped_device *md)
 {
-	int r, requeued = 0;
+	int r;
 	struct dm_rq_target_io *tio = rq->special;
-	struct request *clone = tio->clone;
+	struct request *clone = NULL;
+
+	if (tio->clone) {
+		clone = tio->clone;
+		r = ti->type->map_rq(ti, clone, &tio->info);
+	} else {
+		r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
+		if (r < 0) {
+			/* The target wants to complete the I/O */
+			dm_kill_unmapped_request(rq, r);
+			return r;
+		}
+		if (IS_ERR(clone))
+			return DM_MAPIO_REQUEUE;
+		if (setup_clone(clone, rq, tio, GFP_KERNEL)) {
+			/* -ENOMEM */
+			ti->type->release_clone_rq(clone);
+			return DM_MAPIO_REQUEUE;
+		}
+	}
 
-	r = ti->type->map_rq(ti, clone, &tio->info);
 	switch (r) {
 	case DM_MAPIO_SUBMITTED:
 		/* The target has taken the I/O to submit by itself later */
@@ -1859,7 +1898,6 @@ static int map_request(struct dm_target *ti, struct request *rq,
 	case DM_MAPIO_REQUEUE:
 		/* The target wants to requeue the I/O */
 		dm_requeue_unmapped_request(clone);
-		requeued = 1;
 		break;
 	default:
 		if (r > 0) {
@@ -1869,17 +1907,20 @@ static int map_request(struct dm_target *ti, struct request *rq,
 
 		/* The target wants to complete the I/O */
 		dm_kill_unmapped_request(rq, r);
-		break;
+		return r;
 	}
 
-	return requeued;
+	return 0;
 }
 
 static void map_tio_request(struct kthread_work *work)
 {
 	struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
+	struct request *rq = tio->orig;
+	struct mapped_device *md = tio->md;
 
-	map_request(tio->ti, tio->orig, tio->md);
+	if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE)
+		dm_requeue_unmapped_original_request(md, rq);
 }
 
 static void dm_start_request(struct mapped_device *md, struct request *orig)
@@ -2459,6 +2500,14 @@ unsigned dm_get_md_type(struct mapped_device *md)
 	return md->type;
 }
 
+static bool dm_md_type_request_based(struct mapped_device *md)
+{
+	unsigned table_type = dm_get_md_type(md);
+
+	return (table_type == DM_TYPE_REQUEST_BASED ||
+		table_type == DM_TYPE_MQ_REQUEST_BASED);
+}
+
 struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
 {
 	return md->immutable_target_type;
@@ -2511,8 +2560,7 @@ static int dm_init_request_based_queue(struct mapped_device *md)
  */
 int dm_setup_md_queue(struct mapped_device *md)
 {
-	if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) &&
-	    !dm_init_request_based_queue(md)) {
+	if (dm_md_type_request_based(md) && !dm_init_request_based_queue(md)) {
 		DMWARN("Cannot initialize queue for request-based mapped device");
 		return -EINVAL;
 	}
@@ -3184,27 +3232,35 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
 {
 	struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
 	struct kmem_cache *cachep;
-	unsigned int pool_size;
+	unsigned int pool_size = 0;
 	unsigned int front_pad;
 
 	if (!pools)
 		return NULL;
 
-	if (type == DM_TYPE_BIO_BASED) {
+	switch (type) {
+	case DM_TYPE_BIO_BASED:
 		cachep = _io_cache;
 		pool_size = dm_get_reserved_bio_based_ios();
 		front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
-	} else if (type == DM_TYPE_REQUEST_BASED) {
-		cachep = _rq_tio_cache;
+		break;
+	case DM_TYPE_REQUEST_BASED:
 		pool_size = dm_get_reserved_rq_based_ios();
 		pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
 		if (!pools->rq_pool)
 			goto out;
+		/* fall through to setup remaining rq-based pools */
+	case DM_TYPE_MQ_REQUEST_BASED:
+		cachep = _rq_tio_cache;
+		if (!pool_size)
+			pool_size = dm_get_reserved_rq_based_ios();
 		front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
 		/* per_bio_data_size is not used. See __bind_mempools(). */
 		WARN_ON(per_bio_data_size != 0);
-	} else
+		break;
+	default:
 		goto out;
+	}
 
 	pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
 	if (!pools->io_pool)
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 84b0f9e4ba6c..84d79784b866 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -34,9 +34,10 @@
 /*
  * Type of table and mapped_device's mempool
  */
-#define DM_TYPE_NONE		0
-#define DM_TYPE_BIO_BASED	1
-#define DM_TYPE_REQUEST_BASED	2
+#define DM_TYPE_NONE			0
+#define DM_TYPE_BIO_BASED		1
+#define DM_TYPE_REQUEST_BASED		2
+#define DM_TYPE_MQ_REQUEST_BASED	3
 
 /*
  * List of devices that a metadevice uses and should open/close.
@@ -73,6 +74,7 @@ int dm_table_any_busy_target(struct dm_table *t);
 unsigned dm_table_get_type(struct dm_table *t);
 struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
 bool dm_table_request_based(struct dm_table *t);
+bool dm_table_mq_request_based(struct dm_table *t);
 void dm_table_free_md_mempools(struct dm_table *t);
 struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
 
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 19296fba58e8..2646aed1d3fe 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -48,6 +48,11 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
 typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio);
 typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
 				  union map_info *map_context);
+typedef int (*dm_clone_and_map_request_fn) (struct dm_target *ti,
+					    struct request *rq,
+					    union map_info *map_context,
+					    struct request **clone);
+typedef void (*dm_release_clone_request_fn) (struct request *clone);
 
 /*
  * Returns:
@@ -143,6 +148,8 @@ struct target_type {
 	dm_dtr_fn dtr;
 	dm_map_fn map;
 	dm_map_request_fn map_rq;
+	dm_clone_and_map_request_fn clone_and_map_rq;
+	dm_release_clone_request_fn release_clone_rq;
 	dm_endio_fn end_io;
 	dm_request_endio_fn rq_end_io;
 	dm_presuspend_fn presuspend;
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index a570d7b5796c..889f3a5b7b18 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	29
+#define DM_VERSION_MINOR	30
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2014-10-28)"
+#define DM_VERSION_EXTRA	"-ioctl (2014-12-22)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v1.2.3-71-gd317


From d0796d1ef63deb38147729664691ba3090930b26 Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:04 +0100
Subject: tipc: convert legacy nl bearer dump to nl compat

Introduce a framework for dumping netlink data from the new netlink
API and formatting it to the old legacy API format. This is done by
looping the dump data and calling a format handler for each entity, in
this case a bearer.

We dump until either all data is dumped or we reach the limited buffer
size of the legacy API. Remember, the legacy API doesn't scale.

In this commit we convert TIPC_CMD_GET_BEARER_NAMES to use the compat
layer.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_config.h |   5 +
 net/tipc/bearer.c                |  29 -----
 net/tipc/bearer.h                |   1 -
 net/tipc/config.c                |   3 -
 net/tipc/netlink_compat.c        | 274 ++++++++++++++++++++++++++++++++++++++-
 5 files changed, 278 insertions(+), 34 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index 876d0a14863c..e1f4f05f4c5c 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -272,6 +272,11 @@ static inline int TLV_CHECK(const void *tlv, __u16 space, __u16 exp_type)
 		(ntohs(((struct tlv_desc *)tlv)->tlv_type) == exp_type);
 }
 
+static inline int TLV_GET_LEN(struct tlv_desc *tlv)
+{
+	return ntohs(tlv->tlv_len);
+}
+
 static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
 {
 	struct tlv_desc *tlv_ptr;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 35d400e8c2e5..7a9e29641e61 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -205,35 +205,6 @@ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name)
 	return NULL;
 }
 
-/**
- * tipc_bearer_get_names - record names of bearers in buffer
- */
-struct sk_buff *tipc_bearer_get_names(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct sk_buff *buf;
-	struct tipc_bearer *b;
-	int i, j;
-
-	buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME));
-	if (!buf)
-		return NULL;
-
-	for (i = 0; media_info_array[i] != NULL; i++) {
-		for (j = 0; j < MAX_BEARERS; j++) {
-			b = rtnl_dereference(tn->bearer_list[j]);
-			if (!b)
-				continue;
-			if (b->media == media_info_array[i]) {
-				tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME,
-						    b->name,
-						    strlen(b->name) + 1);
-			}
-		}
-	}
-	return buf;
-}
-
 void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index c035e3e24764..956858276d93 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -205,7 +205,6 @@ void tipc_disable_l2_media(struct tipc_bearer *b);
 int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
 		     struct tipc_bearer *b, struct tipc_media_addr *dest);
 
-struct sk_buff *tipc_bearer_get_names(struct net *net);
 void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest);
 void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest);
 struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name);
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 6873360cda53..52e84b0ac48a 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -252,9 +252,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_nametbl_get(net, req_tlv_area,
 					       req_tlv_space);
 		break;
-	case TIPC_CMD_GET_BEARER_NAMES:
-		rep_tlv_buf = tipc_bearer_get_names(net);
-		break;
 	case TIPC_CMD_GET_MEDIA_NAMES:
 		rep_tlv_buf = tipc_media_get_names();
 		break;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index f752854c8b10..bd75ea290e3a 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -33,9 +33,265 @@
 
 #include "core.h"
 #include "config.h"
+#include "bearer.h"
 #include <net/genetlink.h>
 #include <linux/tipc_config.h>
 
+/* The legacy API had an artificial message length limit called
+ * ULTRA_STRING_MAX_LEN.
+ */
+#define ULTRA_STRING_MAX_LEN 32768
+
+#define TIPC_SKB_MAX TLV_SPACE(ULTRA_STRING_MAX_LEN)
+
+#define REPLY_TRUNCATED "<truncated>\n"
+
+struct tipc_nl_compat_msg {
+	u16 cmd;
+	int rep_size;
+	struct sk_buff *rep;
+	struct tlv_desc *req;
+	struct sock *dst_sk;
+};
+
+struct tipc_nl_compat_cmd_dump {
+	int (*dumpit)(struct sk_buff *, struct netlink_callback *);
+	int (*format)(struct tipc_nl_compat_msg *msg, struct nlattr **attrs);
+};
+
+static int tipc_skb_tailroom(struct sk_buff *skb)
+{
+	int tailroom;
+	int limit;
+
+	tailroom = skb_tailroom(skb);
+	limit = TIPC_SKB_MAX - skb->len;
+
+	if (tailroom < limit)
+		return tailroom;
+
+	return limit;
+}
+
+static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
+{
+	struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb);
+
+	if (tipc_skb_tailroom(skb) < TLV_SPACE(len))
+		return -EMSGSIZE;
+
+	skb_put(skb, TLV_SPACE(len));
+	tlv->tlv_type = htons(type);
+	tlv->tlv_len = htons(TLV_LENGTH(len));
+	if (len && data)
+		memcpy(TLV_DATA(tlv), data, len);
+
+	return 0;
+}
+
+static struct sk_buff *tipc_tlv_alloc(int size)
+{
+	int hdr_len;
+	struct sk_buff *buf;
+
+	size = TLV_SPACE(size);
+	hdr_len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
+
+	buf = alloc_skb(hdr_len + size, GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	skb_reserve(buf, hdr_len);
+
+	return buf;
+}
+
+static struct sk_buff *tipc_get_err_tlv(char *str)
+{
+	int str_len = strlen(str) + 1;
+	struct sk_buff *buf;
+
+	buf = tipc_tlv_alloc(TLV_SPACE(str_len));
+	if (buf)
+		tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len);
+
+	return buf;
+}
+
+static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
+				   struct tipc_nl_compat_msg *msg,
+				   struct sk_buff *arg)
+{
+	int len = 0;
+	int err;
+	struct sk_buff *buf;
+	struct nlmsghdr *nlmsg;
+	struct netlink_callback cb;
+
+	memset(&cb, 0, sizeof(cb));
+	cb.nlh = (struct nlmsghdr *)arg->data;
+	cb.skb = arg;
+
+	buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf->sk = msg->dst_sk;
+
+	do {
+		int rem;
+
+		len = (*cmd->dumpit)(buf, &cb);
+
+		nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) {
+			struct nlattr **attrs;
+
+			err = tipc_nlmsg_parse(nlmsg, &attrs);
+			if (err)
+				goto err_out;
+
+			err = (*cmd->format)(msg, attrs);
+			if (err)
+				goto err_out;
+
+			if (tipc_skb_tailroom(msg->rep) <= 1) {
+				err = -EMSGSIZE;
+				goto err_out;
+			}
+		}
+
+		skb_reset_tail_pointer(buf);
+		buf->len = 0;
+
+	} while (len);
+
+	err = 0;
+
+err_out:
+	kfree_skb(buf);
+
+	if (err == -EMSGSIZE) {
+		/* The legacy API only considered messages filling
+		 * "ULTRA_STRING_MAX_LEN" to be truncated.
+		 */
+		if ((TIPC_SKB_MAX - msg->rep->len) <= 1) {
+			char *tail = skb_tail_pointer(msg->rep);
+
+			if (*tail != '\0')
+				sprintf(tail - sizeof(REPLY_TRUNCATED) - 1,
+					REPLY_TRUNCATED);
+		}
+
+		return 0;
+	}
+
+	return err;
+}
+
+static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
+				 struct tipc_nl_compat_msg *msg)
+{
+	int err;
+	struct sk_buff *arg;
+
+	msg->rep = tipc_tlv_alloc(msg->rep_size);
+	if (!msg->rep)
+		return -ENOMEM;
+
+	arg = nlmsg_new(0, GFP_KERNEL);
+	if (!arg) {
+		kfree_skb(msg->rep);
+		return -ENOMEM;
+	}
+
+	err = __tipc_nl_compat_dumpit(cmd, msg, arg);
+	if (err)
+		kfree_skb(msg->rep);
+
+	kfree_skb(arg);
+
+	return err;
+}
+
+static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
+				      struct nlattr **attrs)
+{
+	struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1];
+
+	nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER],
+			 NULL);
+
+	return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME,
+			    nla_data(bearer[TIPC_NLA_BEARER_NAME]),
+			    nla_len(bearer[TIPC_NLA_BEARER_NAME]));
+}
+
+static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
+{
+	struct tipc_nl_compat_cmd_dump dump;
+
+	memset(&dump, 0, sizeof(dump));
+
+	switch (msg->cmd) {
+	case TIPC_CMD_GET_BEARER_NAMES:
+		msg->rep_size = MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME);
+		dump.dumpit = tipc_nl_bearer_dump;
+		dump.format = tipc_nl_compat_bearer_dump;
+		return tipc_nl_compat_dumpit(&dump, msg);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
+{
+	int err;
+	int len;
+	struct tipc_nl_compat_msg msg;
+	struct nlmsghdr *req_nlh;
+	struct nlmsghdr *rep_nlh;
+	struct tipc_genlmsghdr *req_userhdr = info->userhdr;
+	struct net *net = genl_info_net(info);
+
+	memset(&msg, 0, sizeof(msg));
+
+	req_nlh = (struct nlmsghdr *)skb->data;
+	msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN;
+	msg.cmd = req_userhdr->cmd;
+	msg.dst_sk = info->dst_sk;
+
+	if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) {
+		msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN);
+		err = -EACCES;
+		goto send;
+	}
+
+	len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN);
+	if (TLV_GET_LEN(msg.req) && !TLV_OK(msg.req, len)) {
+		msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
+		err = -EOPNOTSUPP;
+		goto send;
+	}
+
+	err = tipc_nl_compat_handle(&msg);
+	if (err == -EOPNOTSUPP)
+		msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
+	else if (err == -EINVAL)
+		msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR);
+send:
+	if (!msg.rep)
+		return err;
+
+	len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
+	skb_push(msg.rep, len);
+	rep_nlh = nlmsg_hdr(msg.rep);
+	memcpy(rep_nlh, info->nlhdr, len);
+	rep_nlh->nlmsg_len = msg.rep->len;
+	genlmsg_unicast(net, msg.rep, NETLINK_CB(skb).portid);
+
+	return err;
+}
+
 static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = genl_info_net(info);
@@ -69,6 +325,22 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+/* Temporary function to keep functionality throughout the patchset
+ * without having to mess with the global variables and other trickery
+ * of the old API.
+ */
+static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
+{
+	struct tipc_genlmsghdr *req = info->userhdr;
+
+	switch (req->cmd) {
+	case TIPC_CMD_GET_BEARER_NAMES:
+		return tipc_nl_compat_recv(skb, info);
+	}
+
+	return handle_cmd(skb, info);
+}
+
 static struct genl_family tipc_genl_compat_family = {
 	.id		= GENL_ID_GENERATE,
 	.name		= TIPC_GENL_NAME,
@@ -81,7 +353,7 @@ static struct genl_family tipc_genl_compat_family = {
 static struct genl_ops tipc_genl_compat_ops[] = {
 	{
 		.cmd		= TIPC_GENL_CMD,
-		.doit		= handle_cmd,
+		.doit		= tipc_nl_compat_tmp_wrap,
 	},
 };
 
-- 
cgit v1.2.3-71-gd317


From 9ab154658a7ff2c5076607e02f18581c6859fc2a Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:05 +0100
Subject: tipc: convert legacy nl bearer enable/disable to nl compat

Introduce a framework for transcoding legacy nl action into actions
(.doit) calls from the new nl API. This is done by converting the
incoming TLV data into netlink data with nested netlink attributes.
Unfortunately due to the randomness of the legacy API we can't do this
generically so each legacy netlink command requires a specific
transcoding recipe. In this case for bearer enable and bearer disable.

Convert TIPC_CMD_ENABLE_BEARER and TIPC_CMD_DISABLE_BEARER into doit
compat calls.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_config.h |   5 ++
 net/tipc/bearer.c                |  26 ++-----
 net/tipc/bearer.h                |   3 -
 net/tipc/config.c                |  33 ---------
 net/tipc/netlink_compat.c        | 144 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 154 insertions(+), 57 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index e1f4f05f4c5c..f9226566c1b8 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -277,6 +277,11 @@ static inline int TLV_GET_LEN(struct tlv_desc *tlv)
 	return ntohs(tlv->tlv_len);
 }
 
+static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv,  __u16 type)
+{
+	return (ntohs(tlv->tlv_type) == type);
+}
+
 static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
 {
 	struct tlv_desc *tlv_ptr;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 7a9e29641e61..de1c800ef806 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -236,8 +236,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
 /**
  * tipc_enable_bearer - enable bearer with the given name
  */
-int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain,
-		       u32 priority)
+static int tipc_enable_bearer(struct net *net, const char *name,
+			      u32 disc_domain, u32 priority)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bearer *b_ptr;
@@ -393,22 +393,6 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
 	kfree_rcu(b_ptr, rcu);
 }
 
-int tipc_disable_bearer(struct net *net, const char *name)
-{
-	struct tipc_bearer *b_ptr;
-	int res;
-
-	b_ptr = tipc_bearer_find(net, name);
-	if (b_ptr == NULL) {
-		pr_warn("Attempt to disable unknown bearer <%s>\n", name);
-		res = -EINVAL;
-	} else {
-		bearer_disable(net, b_ptr, false);
-		res = 0;
-	}
-	return res;
-}
-
 int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b)
 {
 	struct net_device *dev;
@@ -756,7 +740,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 	char *name;
 	struct tipc_bearer *bearer;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
-	struct net *net = genl_info_net(info);
+	struct net *net = sock_net(skb->sk);
 
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
@@ -787,11 +771,11 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 
 int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 {
-	struct net *net = genl_info_net(info);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	int err;
 	char *bearer;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	u32 domain;
 	u32 prio;
 
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 956858276d93..06f25d144871 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -173,9 +173,6 @@ struct tipc_bearer_names {
  */
 
 void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr);
-int tipc_enable_bearer(struct net *net, const char *bearer_name,
-		       u32 disc_domain, u32 priority);
-int tipc_disable_bearer(struct net *net, const char *name);
 
 /*
  * Routines made available to TIPC by supported media types
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 52e84b0ac48a..f8cd5e1b545f 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -134,33 +134,6 @@ static struct sk_buff *tipc_show_stats(void)
 	return buf;
 }
 
-static struct sk_buff *cfg_enable_bearer(struct net *net)
-{
-	struct tipc_bearer_config *args;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_CONFIG))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area);
-	if (tipc_enable_bearer(net, args->name,
-			       ntohl(args->disc_domain),
-			       ntohl(args->priority)))
-		return tipc_cfg_reply_error_string("unable to enable bearer");
-
-	return tipc_cfg_reply_none();
-}
-
-static struct sk_buff *cfg_disable_bearer(struct net *net)
-{
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_NAME))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	if (tipc_disable_bearer(net, (char *)TLV_DATA(req_tlv_area)))
-		return tipc_cfg_reply_error_string("unable to disable bearer");
-
-	return tipc_cfg_reply_none();
-}
-
 static struct sk_buff *cfg_set_own_addr(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
@@ -267,12 +240,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_link_cmd_config(net, req_tlv_area,
 						   req_tlv_space, cmd);
 		break;
-	case TIPC_CMD_ENABLE_BEARER:
-		rep_tlv_buf = cfg_enable_bearer(net);
-		break;
-	case TIPC_CMD_DISABLE_BEARER:
-		rep_tlv_buf = cfg_disable_bearer(net);
-		break;
 	case TIPC_CMD_SET_NODE_ADDR:
 		rep_tlv_buf = cfg_set_own_addr(net);
 		break;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index bd75ea290e3a..12b0f4424797 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -49,6 +49,7 @@
 struct tipc_nl_compat_msg {
 	u16 cmd;
 	int rep_size;
+	int req_type;
 	struct sk_buff *rep;
 	struct tlv_desc *req;
 	struct sock *dst_sk;
@@ -59,6 +60,11 @@ struct tipc_nl_compat_cmd_dump {
 	int (*format)(struct tipc_nl_compat_msg *msg, struct nlattr **attrs);
 };
 
+struct tipc_nl_compat_cmd_doit {
+	int (*doit)(struct sk_buff *skb, struct genl_info *info);
+	int (*transcode)(struct sk_buff *skb, struct tipc_nl_compat_msg *msg);
+};
+
 static int tipc_skb_tailroom(struct sk_buff *skb)
 {
 	int tailroom;
@@ -213,6 +219,78 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 	return err;
 }
 
+static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
+				 struct tipc_nl_compat_msg *msg)
+{
+	int err;
+	struct sk_buff *doit_buf;
+	struct sk_buff *trans_buf;
+	struct nlattr **attrbuf;
+	struct genl_info info;
+
+	trans_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!trans_buf)
+		return -ENOMEM;
+
+	err = (*cmd->transcode)(trans_buf, msg);
+	if (err)
+		goto trans_out;
+
+	attrbuf = kmalloc((tipc_genl_family.maxattr + 1) *
+			sizeof(struct nlattr *), GFP_KERNEL);
+	if (!attrbuf) {
+		err = -ENOMEM;
+		goto trans_out;
+	}
+
+	err = nla_parse(attrbuf, tipc_genl_family.maxattr,
+			(const struct nlattr *)trans_buf->data,
+			trans_buf->len, NULL);
+	if (err)
+		goto parse_out;
+
+	doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!doit_buf) {
+		err = -ENOMEM;
+		goto parse_out;
+	}
+
+	doit_buf->sk = msg->dst_sk;
+
+	memset(&info, 0, sizeof(info));
+	info.attrs = attrbuf;
+
+	err = (*cmd->doit)(doit_buf, &info);
+
+	kfree_skb(doit_buf);
+parse_out:
+	kfree(attrbuf);
+trans_out:
+	kfree_skb(trans_buf);
+
+	return err;
+}
+
+static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
+			       struct tipc_nl_compat_msg *msg)
+{
+	int err;
+
+	if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type))
+		return -EINVAL;
+
+	err = __tipc_nl_compat_doit(cmd, msg);
+	if (err)
+		return err;
+
+	/* The legacy API considered an empty message a success message */
+	msg->rep = tipc_tlv_alloc(0);
+	if (!msg->rep)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
 				      struct nlattr **attrs)
 {
@@ -226,11 +304,65 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
 			    nla_len(bearer[TIPC_NLA_BEARER_NAME]));
 }
 
+static int tipc_nl_compat_bearer_enable(struct sk_buff *skb,
+					struct tipc_nl_compat_msg *msg)
+{
+	struct nlattr *prop;
+	struct nlattr *bearer;
+	struct tipc_bearer_config *b;
+
+	b = (struct tipc_bearer_config *)TLV_DATA(msg->req);
+
+	bearer = nla_nest_start(skb, TIPC_NLA_BEARER);
+	if (!bearer)
+		return -EMSGSIZE;
+
+	if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name))
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, TIPC_NLA_BEARER_DOMAIN, ntohl(b->disc_domain)))
+		return -EMSGSIZE;
+
+	if (ntohl(b->priority) <= TIPC_MAX_LINK_PRI) {
+		prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP);
+		if (!prop)
+			return -EMSGSIZE;
+		if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(b->priority)))
+			return -EMSGSIZE;
+		nla_nest_end(skb, prop);
+	}
+	nla_nest_end(skb, bearer);
+
+	return 0;
+}
+
+static int tipc_nl_compat_bearer_disable(struct sk_buff *skb,
+					 struct tipc_nl_compat_msg *msg)
+{
+	char *name;
+	struct nlattr *bearer;
+
+	name = (char *)TLV_DATA(msg->req);
+
+	bearer = nla_nest_start(skb, TIPC_NLA_BEARER);
+	if (!bearer)
+		return -EMSGSIZE;
+
+	if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name))
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, bearer);
+
+	return 0;
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
+	struct tipc_nl_compat_cmd_doit doit;
 
 	memset(&dump, 0, sizeof(dump));
+	memset(&doit, 0, sizeof(doit));
 
 	switch (msg->cmd) {
 	case TIPC_CMD_GET_BEARER_NAMES:
@@ -238,6 +370,16 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		dump.dumpit = tipc_nl_bearer_dump;
 		dump.format = tipc_nl_compat_bearer_dump;
 		return tipc_nl_compat_dumpit(&dump, msg);
+	case TIPC_CMD_ENABLE_BEARER:
+		msg->req_type = TIPC_TLV_BEARER_CONFIG;
+		doit.doit = tipc_nl_bearer_enable;
+		doit.transcode = tipc_nl_compat_bearer_enable;
+		return tipc_nl_compat_doit(&doit, msg);
+	case TIPC_CMD_DISABLE_BEARER:
+		msg->req_type = TIPC_TLV_BEARER_NAME;
+		doit.doit = tipc_nl_bearer_disable;
+		doit.transcode = tipc_nl_compat_bearer_disable;
+		return tipc_nl_compat_doit(&doit, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -335,6 +477,8 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 
 	switch (req->cmd) {
 	case TIPC_CMD_GET_BEARER_NAMES:
+	case TIPC_CMD_ENABLE_BEARER:
+	case TIPC_CMD_DISABLE_BEARER:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3-71-gd317


From f2b3b2d4ccbf9666f5f42a21347cd1aaa532b2fa Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:06 +0100
Subject: tipc: convert legacy nl link stat to nl compat

Add functionality for safely appending string data to a TLV without
keeping write count in the caller.

Convert TIPC_CMD_SHOW_LINK_STATS to compat dumpit.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_config.h |  10 ++
 net/tipc/bcast.c                 |  43 --------
 net/tipc/bcast.h                 |   1 -
 net/tipc/config.c                |   4 -
 net/tipc/link.c                  | 141 ---------------------------
 net/tipc/link.h                  |   3 -
 net/tipc/netlink_compat.c        | 205 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 215 insertions(+), 192 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index f9226566c1b8..087b0ef82c07 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -277,11 +277,21 @@ static inline int TLV_GET_LEN(struct tlv_desc *tlv)
 	return ntohs(tlv->tlv_len);
 }
 
+static inline void TLV_SET_LEN(struct tlv_desc *tlv, __u16 len)
+{
+	tlv->tlv_len = htons(len);
+}
+
 static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv,  __u16 type)
 {
 	return (ntohs(tlv->tlv_type) == type);
 }
 
+static inline void TLV_SET_TYPE(struct tlv_desc *tlv, __u16 type)
+{
+	tlv->tlv_type = htons(type);
+}
+
 static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
 {
 	struct tlv_desc *tlv_ptr;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index e96fd6a6d5c2..3e41704832de 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -860,49 +860,6 @@ msg_full:
 	return -EMSGSIZE;
 }
 
-int tipc_bclink_stats(struct net *net, char *buf, const u32 buf_size)
-{
-	int ret;
-	struct tipc_stats *s;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_link *bcl = tn->bcl;
-
-	if (!bcl)
-		return 0;
-
-	tipc_bclink_lock(net);
-
-	s = &bcl->stats;
-
-	ret = tipc_snprintf(buf, buf_size, "Link <%s>\n"
-			    "  Window:%u packets\n",
-			    bcl->name, bcl->queue_limit[0]);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
-			     s->recv_info, s->recv_fragments,
-			     s->recv_fragmented, s->recv_bundles,
-			     s->recv_bundled);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX packets:%u fragments:%u/%u bundles:%u/%u\n",
-			     s->sent_info, s->sent_fragments,
-			     s->sent_fragmented, s->sent_bundles,
-			     s->sent_bundled);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  RX naks:%u defs:%u dups:%u\n",
-			     s->recv_nacks, s->deferred_recv, s->duplicates);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX naks:%u acks:%u dups:%u\n",
-			     s->sent_nacks, s->sent_acks, s->retransmitted);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  Congestion link:%u  Send queue max:%u avg:%u\n",
-			     s->link_congs, s->max_queue_sz,
-			     s->queue_sz_counts ?
-			     (s->accu_queue_sz / s->queue_sz_counts) : 0);
-
-	tipc_bclink_unlock(net);
-	return ret;
-}
-
 int tipc_bclink_reset_stats(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index a910c0b9f249..43f397fbac55 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -127,7 +127,6 @@ u32  tipc_bclink_get_last_sent(struct net *net);
 u32  tipc_bclink_acks_missing(struct tipc_node *n_ptr);
 void tipc_bclink_update_link_state(struct tipc_node *node,
 				   u32 last_sent);
-int  tipc_bclink_stats(struct net *net, char *stats_buf, const u32 buf_size);
 int  tipc_bclink_reset_stats(struct net *net);
 int  tipc_bclink_set_queue_limits(struct net *net, u32 limit);
 void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr,
diff --git a/net/tipc/config.c b/net/tipc/config.c
index f8cd5e1b545f..67b76ee847f3 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -213,10 +213,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_node_get_links(net, req_tlv_area,
 						  req_tlv_space);
 		break;
-	case TIPC_CMD_SHOW_LINK_STATS:
-		rep_tlv_buf = tipc_link_cmd_show_stats(net, req_tlv_area,
-						       req_tlv_space);
-		break;
 	case TIPC_CMD_RESET_LINK_STATS:
 		rep_tlv_buf = tipc_link_cmd_reset_stats(net, req_tlv_area,
 							req_tlv_space);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 466f28fcf215..2622fb99344a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2146,147 +2146,6 @@ struct sk_buff *tipc_link_cmd_reset_stats(struct net *net,
 	return tipc_cfg_reply_none();
 }
 
-/**
- * percent - convert count to a percentage of total (rounding up or down)
- */
-static u32 percent(u32 count, u32 total)
-{
-	return (count * 100 + (total / 2)) / total;
-}
-
-/**
- * tipc_link_stats - print link statistics
- * @net: the applicable net namespace
- * @name: link name
- * @buf: print buffer area
- * @buf_size: size of print buffer area
- *
- * Returns length of print buffer data string (or 0 if error)
- */
-static int tipc_link_stats(struct net *net, const char *name, char *buf,
-			   const u32 buf_size)
-{
-	struct tipc_link *l;
-	struct tipc_stats *s;
-	struct tipc_node *node;
-	char *status;
-	u32 profile_total = 0;
-	unsigned int bearer_id;
-	int ret;
-
-	if (!strcmp(name, tipc_bclink_name))
-		return tipc_bclink_stats(net, buf, buf_size);
-
-	node = tipc_link_find_owner(net, name, &bearer_id);
-	if (!node)
-		return 0;
-
-	tipc_node_lock(node);
-
-	l = node->links[bearer_id];
-	if (!l) {
-		tipc_node_unlock(node);
-		return 0;
-	}
-
-	s = &l->stats;
-
-	if (tipc_link_is_active(l))
-		status = "ACTIVE";
-	else if (tipc_link_is_up(l))
-		status = "STANDBY";
-	else
-		status = "DEFUNCT";
-
-	ret = tipc_snprintf(buf, buf_size, "Link <%s>\n"
-			    "  %s  MTU:%u  Priority:%u  Tolerance:%u ms"
-			    "  Window:%u packets\n",
-			    l->name, status, l->max_pkt, l->priority,
-			    l->tolerance, l->queue_limit[0]);
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
-			     l->next_in_no - s->recv_info, s->recv_fragments,
-			     s->recv_fragmented, s->recv_bundles,
-			     s->recv_bundled);
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX packets:%u fragments:%u/%u bundles:%u/%u\n",
-			     l->next_out_no - s->sent_info, s->sent_fragments,
-			     s->sent_fragmented, s->sent_bundles,
-			     s->sent_bundled);
-
-	profile_total = s->msg_length_counts;
-	if (!profile_total)
-		profile_total = 1;
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX profile sample:%u packets  average:%u octets\n"
-			     "  0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% "
-			     "-16384:%u%% -32768:%u%% -66000:%u%%\n",
-			     s->msg_length_counts,
-			     s->msg_lengths_total / profile_total,
-			     percent(s->msg_length_profile[0], profile_total),
-			     percent(s->msg_length_profile[1], profile_total),
-			     percent(s->msg_length_profile[2], profile_total),
-			     percent(s->msg_length_profile[3], profile_total),
-			     percent(s->msg_length_profile[4], profile_total),
-			     percent(s->msg_length_profile[5], profile_total),
-			     percent(s->msg_length_profile[6], profile_total));
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  RX states:%u probes:%u naks:%u defs:%u"
-			     " dups:%u\n", s->recv_states, s->recv_probes,
-			     s->recv_nacks, s->deferred_recv, s->duplicates);
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX states:%u probes:%u naks:%u acks:%u"
-			     " dups:%u\n", s->sent_states, s->sent_probes,
-			     s->sent_nacks, s->sent_acks, s->retransmitted);
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  Congestion link:%u  Send queue"
-			     " max:%u avg:%u\n", s->link_congs,
-			     s->max_queue_sz, s->queue_sz_counts ?
-			     (s->accu_queue_sz / s->queue_sz_counts) : 0);
-
-	tipc_node_unlock(node);
-	return ret;
-}
-
-struct sk_buff *tipc_link_cmd_show_stats(struct net *net,
-					 const void *req_tlv_area,
-					 int req_tlv_space)
-{
-	struct sk_buff *buf;
-	struct tlv_desc *rep_tlv;
-	int str_len;
-	int pb_len;
-	char *pb;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
-	if (!buf)
-		return NULL;
-
-	rep_tlv = (struct tlv_desc *)buf->data;
-	pb = TLV_DATA(rep_tlv);
-	pb_len = ULTRA_STRING_MAX_LEN;
-	str_len = tipc_link_stats(net, (char *)TLV_DATA(req_tlv_area),
-				  pb, pb_len);
-	if (!str_len) {
-		kfree_skb(buf);
-		return tipc_cfg_reply_error_string("link not found");
-	}
-	str_len += 1;	/* for "\0" */
-	skb_put(buf, TLV_SPACE(str_len));
-	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
-
-	return buf;
-}
-
 static void link_print(struct tipc_link *l_ptr, const char *str)
 {
 	struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id);
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 34d3f55c4cea..8c8340cf991f 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -217,9 +217,6 @@ int tipc_link_is_active(struct tipc_link *l_ptr);
 void tipc_link_purge_queues(struct tipc_link *l_ptr);
 struct sk_buff *tipc_link_cmd_config(struct net *net, const void *req_tlv_area,
 				     int req_tlv_space, u16 cmd);
-struct sk_buff *tipc_link_cmd_show_stats(struct net *net,
-					 const void *req_tlv_area,
-					 int req_tlv_space);
 struct sk_buff *tipc_link_cmd_reset_stats(struct net *net,
 					  const void *req_tlv_area,
 					  int req_tlv_space);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 12b0f4424797..899bd94da467 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -34,6 +34,7 @@
 #include "core.h"
 #include "config.h"
 #include "bearer.h"
+#include "link.h"
 #include <net/genetlink.h>
 #include <linux/tipc_config.h>
 
@@ -48,6 +49,7 @@
 
 struct tipc_nl_compat_msg {
 	u16 cmd;
+	int rep_type;
 	int rep_size;
 	int req_type;
 	struct sk_buff *rep;
@@ -95,6 +97,40 @@ static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
 	return 0;
 }
 
+static void tipc_tlv_init(struct sk_buff *skb, u16 type)
+{
+	struct tlv_desc *tlv = (struct tlv_desc *)skb->data;
+
+	TLV_SET_LEN(tlv, 0);
+	TLV_SET_TYPE(tlv, type);
+	skb_put(skb, sizeof(struct tlv_desc));
+}
+
+static int tipc_tlv_sprintf(struct sk_buff *skb, const char *fmt, ...)
+{
+	int n;
+	u16 len;
+	u32 rem;
+	char *buf;
+	struct tlv_desc *tlv;
+	va_list args;
+
+	rem = tipc_skb_tailroom(skb);
+
+	tlv = (struct tlv_desc *)skb->data;
+	len = TLV_GET_LEN(tlv);
+	buf = TLV_DATA(tlv) + len;
+
+	va_start(args, fmt);
+	n = vscnprintf(buf, rem, fmt, args);
+	va_end(args);
+
+	TLV_SET_LEN(tlv, n + len);
+	skb_put(skb, n);
+
+	return n;
+}
+
 static struct sk_buff *tipc_tlv_alloc(int size)
 {
 	int hdr_len;
@@ -200,10 +236,16 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 	int err;
 	struct sk_buff *arg;
 
+	if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type))
+		return -EINVAL;
+
 	msg->rep = tipc_tlv_alloc(msg->rep_size);
 	if (!msg->rep)
 		return -ENOMEM;
 
+	if (msg->rep_type)
+		tipc_tlv_init(msg->rep, msg->rep_type);
+
 	arg = nlmsg_new(0, GFP_KERNEL);
 	if (!arg) {
 		kfree_skb(msg->rep);
@@ -356,6 +398,161 @@ static int tipc_nl_compat_bearer_disable(struct sk_buff *skb,
 	return 0;
 }
 
+static inline u32 perc(u32 count, u32 total)
+{
+	return (count * 100 + (total / 2)) / total;
+}
+
+static void __fill_bc_link_stat(struct tipc_nl_compat_msg *msg,
+				struct nlattr *prop[], struct nlattr *stats[])
+{
+	tipc_tlv_sprintf(msg->rep, "  Window:%u packets\n",
+			 nla_get_u32(prop[TIPC_NLA_PROP_WIN]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  TX packets:%u fragments:%u/%u bundles:%u/%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED]));
+
+	tipc_tlv_sprintf(msg->rep, "  RX naks:%u defs:%u dups:%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES]));
+
+	tipc_tlv_sprintf(msg->rep, "  TX naks:%u acks:%u dups:%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  Congestion link:%u  Send queue max:%u avg:%u",
+			 nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE]));
+}
+
+static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
+					 struct nlattr **attrs)
+{
+	char *name;
+	struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
+	struct nlattr *prop[TIPC_NLA_PROP_MAX + 1];
+	struct nlattr *stats[TIPC_NLA_STATS_MAX + 1];
+
+	nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL);
+
+	nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP],
+			 NULL);
+
+	nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS],
+			 NULL);
+
+	name = (char *)TLV_DATA(msg->req);
+	if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0)
+		return 0;
+
+	tipc_tlv_sprintf(msg->rep, "\nLink <%s>\n",
+			 nla_data(link[TIPC_NLA_LINK_NAME]));
+
+	if (link[TIPC_NLA_LINK_BROADCAST]) {
+		__fill_bc_link_stat(msg, prop, stats);
+		return 0;
+	}
+
+	if (link[TIPC_NLA_LINK_ACTIVE])
+		tipc_tlv_sprintf(msg->rep, "  ACTIVE");
+	else if (link[TIPC_NLA_LINK_UP])
+		tipc_tlv_sprintf(msg->rep, "  STANDBY");
+	else
+		tipc_tlv_sprintf(msg->rep, "  DEFUNCT");
+
+	tipc_tlv_sprintf(msg->rep, "  MTU:%u  Priority:%u",
+			 nla_get_u32(link[TIPC_NLA_LINK_MTU]),
+			 nla_get_u32(prop[TIPC_NLA_PROP_PRIO]));
+
+	tipc_tlv_sprintf(msg->rep, "  Tolerance:%u ms  Window:%u packets\n",
+			 nla_get_u32(prop[TIPC_NLA_PROP_TOL]),
+			 nla_get_u32(prop[TIPC_NLA_PROP_WIN]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
+			 nla_get_u32(link[TIPC_NLA_LINK_RX]) -
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  TX packets:%u fragments:%u/%u bundles:%u/%u\n",
+			 nla_get_u32(link[TIPC_NLA_LINK_TX]) -
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  TX profile sample:%u packets  average:%u octets\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_CNT]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_TOT]) /
+			 nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% ",
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P0]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P1]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P2]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P3]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])));
+
+	tipc_tlv_sprintf(msg->rep, "-16384:%u%% -32768:%u%% -66000:%u%%\n",
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P4]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P5]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P6]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  RX states:%u probes:%u naks:%u defs:%u dups:%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_STATES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_PROBES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  TX states:%u probes:%u naks:%u acks:%u dups:%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_STATES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_PROBES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  Congestion link:%u  Send queue max:%u avg:%u",
+			 nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE]));
+
+	return 0;
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
@@ -380,6 +577,13 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		doit.doit = tipc_nl_bearer_disable;
 		doit.transcode = tipc_nl_compat_bearer_disable;
 		return tipc_nl_compat_doit(&doit, msg);
+	case TIPC_CMD_SHOW_LINK_STATS:
+		msg->req_type = TIPC_TLV_LINK_NAME;
+		msg->rep_size = ULTRA_STRING_MAX_LEN;
+		msg->rep_type = TIPC_TLV_ULTRA_STRING;
+		dump.dumpit = tipc_nl_link_dump;
+		dump.format = tipc_nl_compat_link_stat_dump;
+		return tipc_nl_compat_dumpit(&dump, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -479,6 +683,7 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_GET_BEARER_NAMES:
 	case TIPC_CMD_ENABLE_BEARER:
 	case TIPC_CMD_DISABLE_BEARER:
+	case TIPC_CMD_SHOW_LINK_STATS:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3-71-gd317


From a4505152044c31f7b8e108a87ab2009901d96d0e Mon Sep 17 00:00:00 2001
From: Hariprasad Shenai <hariprasad@chelsio.com>
Date: Mon, 9 Feb 2015 12:07:29 +0530
Subject: ethtool: rename reserved1 memeber in ethtool_drvinfo for expansion
 ROM version

Renamed the reserved1 member of struct ethtool_drvinfo to erom_version to get
expansion/option ROM version of the adapter if present.

Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 5f66d9c2889d..2e49fc880d29 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -139,6 +139,7 @@ static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep)
 
 #define ETHTOOL_FWVERS_LEN	32
 #define ETHTOOL_BUSINFO_LEN	32
+#define ETHTOOL_EROMVERS_LEN	32
 
 /**
  * struct ethtool_drvinfo - general driver and device information
@@ -148,6 +149,7 @@ static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep)
  *	not be an empty string.
  * @version: Driver version string; may be an empty string
  * @fw_version: Firmware version string; may be an empty string
+ * @erom_version: Expansion ROM version string; may be an empty string
  * @bus_info: Device bus address.  This should match the dev_name()
  *	string for the underlying bus device, if there is one.  May be
  *	an empty string.
@@ -176,7 +178,7 @@ struct ethtool_drvinfo {
 	char	version[32];
 	char	fw_version[ETHTOOL_FWVERS_LEN];
 	char	bus_info[ETHTOOL_BUSINFO_LEN];
-	char	reserved1[32];
+	char	erom_version[ETHTOOL_EROMVERS_LEN];
 	char	reserved2[12];
 	__u32	n_priv_flags;
 	__u32	n_stats;
-- 
cgit v1.2.3-71-gd317


From 6140a8f5623820cec7f56c63444b9551d8d35775 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 6 Feb 2015 15:05:08 -0700
Subject: vfio-pci: Add device request interface

Userspace can opt to receive a device request notification,
indicating that the device should be released.  This is setup
the same way as the error IRQ and also supports eventfd signaling.
Future support may forcefully remove the device from the user if
the request is ignored.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci.c         | 21 ++++++++++++++++++++-
 drivers/vfio/pci/vfio_pci_intrs.c   | 16 ++++++++++++++++
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 include/uapi/linux/vfio.h           |  1 +
 4 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 7cc0122a18ce..f8a186381ae8 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -239,9 +239,12 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
 
 			return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
 		}
-	} else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX)
+	} else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX) {
 		if (pci_is_pcie(vdev->pdev))
 			return 1;
+	} else if (irq_type == VFIO_PCI_REQ_IRQ_INDEX) {
+		return 1;
+	}
 
 	return 0;
 }
@@ -464,6 +467,7 @@ static long vfio_pci_ioctl(void *device_data,
 
 		switch (info.index) {
 		case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
+		case VFIO_PCI_REQ_IRQ_INDEX:
 			break;
 		case VFIO_PCI_ERR_IRQ_INDEX:
 			if (pci_is_pcie(vdev->pdev))
@@ -828,6 +832,20 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
 			       req_len, vma->vm_page_prot);
 }
 
+static void vfio_pci_request(void *device_data, unsigned int count)
+{
+	struct vfio_pci_device *vdev = device_data;
+
+	mutex_lock(&vdev->igate);
+
+	if (vdev->req_trigger) {
+		dev_dbg(&vdev->pdev->dev, "Requesting device from user\n");
+		eventfd_signal(vdev->req_trigger, 1);
+	}
+
+	mutex_unlock(&vdev->igate);
+}
+
 static const struct vfio_device_ops vfio_pci_ops = {
 	.name		= "vfio-pci",
 	.open		= vfio_pci_open,
@@ -836,6 +854,7 @@ static const struct vfio_device_ops vfio_pci_ops = {
 	.read		= vfio_pci_read,
 	.write		= vfio_pci_write,
 	.mmap		= vfio_pci_mmap,
+	.request	= vfio_pci_request,
 };
 
 static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index b134befbfd0e..f88bfdf5b6a0 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -817,6 +817,16 @@ static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
 	return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger, flags, data);
 }
 
+static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
+				    unsigned index, unsigned start,
+				    unsigned count, uint32_t flags, void *data)
+{
+	if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count != 1)
+		return -EINVAL;
+
+	return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger, flags, data);
+}
+
 int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
 			    unsigned index, unsigned start, unsigned count,
 			    void *data)
@@ -858,6 +868,12 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
 				func = vfio_pci_set_err_trigger;
 			break;
 		}
+	case VFIO_PCI_REQ_IRQ_INDEX:
+		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
+		case VFIO_IRQ_SET_ACTION_TRIGGER:
+			func = vfio_pci_set_req_trigger;
+			break;
+		}
 	}
 
 	if (!func)
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 671c17a6e6d0..c9f9b323f152 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -58,6 +58,7 @@ struct vfio_pci_device {
 	struct pci_saved_state	*pci_saved_state;
 	int			refcnt;
 	struct eventfd_ctx	*err_trigger;
+	struct eventfd_ctx	*req_trigger;
 };
 
 #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 29715d27548f..82889c30f4f5 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -333,6 +333,7 @@ enum {
 	VFIO_PCI_MSI_IRQ_INDEX,
 	VFIO_PCI_MSIX_IRQ_INDEX,
 	VFIO_PCI_ERR_IRQ_INDEX,
+	VFIO_PCI_REQ_IRQ_INDEX,
 	VFIO_PCI_NUM_IRQS
 };
 
-- 
cgit v1.2.3-71-gd317


From e6a02746e0a9cdda5114db912fe2aadfed289aae Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 11 Feb 2015 15:01:14 +1030
Subject: virtio: define VIRTIO_PCI_CAP_PCI_CFG in header.

This provides backdoor access to the device MMIOs, and every device should
have one.  From the virtio 1.0 spec (CS03):

  4.1.4.7.1 Device Requirements: PCI configuration access capability

  The device MUST present at least one VIRTIO_PCI_CAP_PCI_CFG capability.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_pci.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index 3b7e4d2765fb..75301468359f 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -109,8 +109,10 @@
 #define VIRTIO_PCI_CAP_NOTIFY_CFG	2
 /* ISR access */
 #define VIRTIO_PCI_CAP_ISR_CFG		3
-/* Device specific confiuration */
+/* Device specific configuration */
 #define VIRTIO_PCI_CAP_DEVICE_CFG	4
+/* PCI configuration access */
+#define VIRTIO_PCI_CAP_PCI_CFG		5
 
 /* This is the PCI capability header: */
 struct virtio_pci_cap {
-- 
cgit v1.2.3-71-gd317


From 527100a4ee744bbfc90f1609ee4a0144883b3e4a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 11 Feb 2015 15:01:14 +1030
Subject: virtio: Don't expose legacy block features when VIRTIO_BLK_NO_LEGACY
 defined.

This allows modern implementations to ensure they don't use legacy
feature bits or SCSI commands (which are not used in v1.0 non-legacy).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_blk.h | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 247c8ba8544a..3c53eec4ae22 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -31,22 +31,25 @@
 #include <linux/virtio_types.h>
 
 /* Feature bits */
-#define VIRTIO_BLK_F_BARRIER	0	/* Does host support barriers? */
 #define VIRTIO_BLK_F_SIZE_MAX	1	/* Indicates maximum segment size */
 #define VIRTIO_BLK_F_SEG_MAX	2	/* Indicates maximum # of segments */
 #define VIRTIO_BLK_F_GEOMETRY	4	/* Legacy geometry available  */
 #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
-#define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
-#define VIRTIO_BLK_F_WCE	9	/* Writeback mode enabled after reset */
 #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
-#define VIRTIO_BLK_F_CONFIG_WCE	11	/* Writeback mode available in config */
 #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
 
+/* Legacy feature bits */
+#ifndef VIRTIO_BLK_NO_LEGACY
+#define VIRTIO_BLK_F_BARRIER	0	/* Does host support barriers? */
+#define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
+#define VIRTIO_BLK_F_WCE	9	/* Writeback mode enabled after reset */
+#define VIRTIO_BLK_F_CONFIG_WCE	11	/* Writeback mode available in config */
 #ifndef __KERNEL__
 /* Old (deprecated) name for VIRTIO_BLK_F_WCE. */
 #define VIRTIO_BLK_F_FLUSH VIRTIO_BLK_F_WCE
 #endif
+#endif /* !VIRTIO_BLK_NO_LEGACY */
 
 #define VIRTIO_BLK_ID_BYTES	20	/* ID string length */
 
@@ -100,8 +103,10 @@ struct virtio_blk_config {
 #define VIRTIO_BLK_T_IN		0
 #define VIRTIO_BLK_T_OUT	1
 
+#ifndef VIRTIO_BLK_NO_LEGACY
 /* This bit says it's a scsi command, not an actual read or write. */
 #define VIRTIO_BLK_T_SCSI_CMD	2
+#endif /* VIRTIO_BLK_NO_LEGACY */
 
 /* Cache flush command */
 #define VIRTIO_BLK_T_FLUSH	4
@@ -109,8 +114,10 @@ struct virtio_blk_config {
 /* Get device ID command */
 #define VIRTIO_BLK_T_GET_ID    8
 
+#ifndef VIRTIO_BLK_NO_LEGACY
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER	0x80000000
+#endif /* !VIRTIO_BLK_NO_LEGACY */
 
 /* This is the first element of the read scatter-gather list. */
 struct virtio_blk_outhdr {
@@ -122,12 +129,14 @@ struct virtio_blk_outhdr {
 	__virtio64 sector;
 };
 
+#ifndef VIRTIO_BLK_NO_LEGACY
 struct virtio_scsi_inhdr {
 	__virtio32 errors;
 	__virtio32 data_len;
 	__virtio32 sense_len;
 	__virtio32 residual;
 };
+#endif /* !VIRTIO_BLK_NO_LEGACY */
 
 /* And this is the final byte of the write scatter-gather list. */
 #define VIRTIO_BLK_S_OK		0
-- 
cgit v1.2.3-71-gd317


From 6d96ee98b1d08bcf0f90a6bf2c6766dda6b3a010 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 11 Feb 2015 15:01:14 +1030
Subject: virtio: Don't expose legacy config features when
 VIRTIO_CONFIG_NO_LEGACY defined.

The VIRTIO_F_ANY_LAYOUT and VIRTIO_F_NOTIFY_ON_EMPTY features are pre-1.0
only.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_config.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index a6d0cdeaacd4..c18264df9504 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -49,12 +49,14 @@
 #define VIRTIO_TRANSPORT_F_START	28
 #define VIRTIO_TRANSPORT_F_END		33
 
+#ifndef VIRTIO_CONFIG_NO_LEGACY
 /* Do we get callbacks when the ring is completely used, even if we've
  * suppressed them? */
 #define VIRTIO_F_NOTIFY_ON_EMPTY	24
 
 /* Can the device handle any descriptor layout? */
 #define VIRTIO_F_ANY_LAYOUT		27
+#endif /* VIRTIO_CONFIG_NO_LEGACY */
 
 /* v1.0 compliant. */
 #define VIRTIO_F_VERSION_1		32
-- 
cgit v1.2.3-71-gd317


From 0ace2ca89cbd6bcdf2b9d2df1fa0fa24ea9d1653 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 10 Feb 2015 16:30:32 -0800
Subject: vxlan: Use checksum partial with remote checksum offload

Change remote checksum handling to set checksum partial as default
behavior. Added an iflink parameter to configure not using
checksum partial (calling csum_partial to update checksum).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c          | 25 +++++++++++++++++++------
 include/net/vxlan.h          |  4 +++-
 include/uapi/linux/if_link.h |  1 +
 3 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 4f04443cfd33..1e0a775ea882 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -555,7 +555,8 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
 static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
 					  unsigned int off,
 					  struct vxlanhdr *vh, size_t hdrlen,
-					  u32 data, struct gro_remcsum *grc)
+					  u32 data, struct gro_remcsum *grc,
+					  bool nopartial)
 {
 	size_t start, offset, plen;
 
@@ -580,7 +581,7 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
 	}
 
 	skb_gro_remcsum_process(skb, (void *)vh + hdrlen,
-				start, offset, grc, true);
+				start, offset, grc, nopartial);
 
 	skb->remcsum_offload = 1;
 
@@ -618,7 +619,9 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 
 	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
 		vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
-				       ntohl(vh->vx_vni), &grc);
+				       ntohl(vh->vx_vni), &grc,
+				       !!(vs->flags &
+					  VXLAN_F_REMCSUM_NOPARTIAL));
 
 		if (!vh)
 			goto out;
@@ -1155,7 +1158,7 @@ static void vxlan_igmp_leave(struct work_struct *work)
 }
 
 static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
-				      size_t hdrlen, u32 data)
+				      size_t hdrlen, u32 data, bool nopartial)
 {
 	size_t start, offset, plen;
 
@@ -1171,7 +1174,8 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
 
 	vh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
 
-	skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset, true);
+	skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset,
+			    nopartial);
 
 	return vh;
 }
@@ -1208,7 +1212,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		goto drop;
 
 	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
-		vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni);
+		vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni,
+				    !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL));
 		if (!vxh)
 			goto drop;
 
@@ -2437,6 +2442,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_REMCSUM_TX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_REMCSUM_RX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_GBP]	= { .type = NLA_FLAG, },
+	[IFLA_VXLAN_REMCSUM_NOPARTIAL]	= { .type = NLA_FLAG },
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -2760,6 +2766,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 	if (data[IFLA_VXLAN_GBP])
 		vxlan->flags |= VXLAN_F_GBP;
 
+	if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
+		vxlan->flags |= VXLAN_F_REMCSUM_NOPARTIAL;
+
 	if (vxlan_find_vni(src_net, vni, use_ipv6 ? AF_INET6 : AF_INET,
 			   vxlan->dst_port, vxlan->flags)) {
 		pr_info("duplicate VNI %u\n", vni);
@@ -2909,6 +2918,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_flag(skb, IFLA_VXLAN_GBP))
 		goto nla_put_failure;
 
+	if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL &&
+	    nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 2927d6244481..eabd3a038674 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -128,13 +128,15 @@ struct vxlan_sock {
 #define VXLAN_F_REMCSUM_TX		0x200
 #define VXLAN_F_REMCSUM_RX		0x400
 #define VXLAN_F_GBP			0x800
+#define VXLAN_F_REMCSUM_NOPARTIAL	0x1000
 
 /* Flags that are used in the receive patch. These flags must match in
  * order for a socket to be shareable
  */
 #define VXLAN_F_RCV_FLAGS		(VXLAN_F_GBP |			\
 					 VXLAN_F_UDP_ZERO_CSUM6_RX |	\
-					 VXLAN_F_REMCSUM_RX)
+					 VXLAN_F_REMCSUM_RX |		\
+					 VXLAN_F_REMCSUM_NOPARTIAL)
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 0deee3eeddbf..dfd0bb22e554 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -374,6 +374,7 @@ enum {
 	IFLA_VXLAN_REMCSUM_TX,
 	IFLA_VXLAN_REMCSUM_RX,
 	IFLA_VXLAN_GBP,
+	IFLA_VXLAN_REMCSUM_NOPARTIAL,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
-- 
cgit v1.2.3-71-gd317


From fe881ef11cf0220f118816181930494d484c4883 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 10 Feb 2015 16:30:33 -0800
Subject: gue: Use checksum partial with remote checksum offload

Change remote checksum handling to set checksum partial as default
behavior. Added an iflink parameter to configure not using
checksum partial (calling csum_partial to update checksum).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/fou.h |  1 +
 net/ipv4/fou.c           | 28 ++++++++++++++++++++++------
 2 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h
index 8df06894da23..c303588bb767 100644
--- a/include/uapi/linux/fou.h
+++ b/include/uapi/linux/fou.h
@@ -14,6 +14,7 @@ enum {
 	FOU_ATTR_AF,				/* u8 */
 	FOU_ATTR_IPPROTO,			/* u8 */
 	FOU_ATTR_TYPE,				/* u8 */
+	FOU_ATTR_REMCSUM_NOPARTIAL,		/* flag */
 
 	__FOU_ATTR_MAX,
 };
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index d320f575cc62..ff069f6597ac 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -22,14 +22,18 @@ static LIST_HEAD(fou_list);
 struct fou {
 	struct socket *sock;
 	u8 protocol;
+	u8 flags;
 	u16 port;
 	struct udp_offload udp_offloads;
 	struct list_head list;
 };
 
+#define FOU_F_REMCSUM_NOPARTIAL BIT(0)
+
 struct fou_cfg {
 	u16 type;
 	u8 protocol;
+	u8 flags;
 	struct udp_port_cfg udp_config;
 };
 
@@ -64,7 +68,8 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
 }
 
 static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
-				  void *data, size_t hdrlen, u8 ipproto)
+				  void *data, size_t hdrlen, u8 ipproto,
+				  bool nopartial)
 {
 	__be16 *pd = data;
 	size_t start = ntohs(pd[0]);
@@ -75,7 +80,8 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
 		return NULL;
 	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
 
-	skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset, true);
+	skb_remcsum_process(skb, (void *)guehdr + hdrlen,
+			    start, offset, nopartial);
 
 	return guehdr;
 }
@@ -136,7 +142,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
 
 		if (flags & GUE_PFLAG_REMCSUM) {
 			guehdr = gue_remcsum(skb, guehdr, data + doffset,
-					     hdrlen, guehdr->proto_ctype);
+					     hdrlen, guehdr->proto_ctype,
+					     !!(fou->flags &
+						FOU_F_REMCSUM_NOPARTIAL));
 			if (!guehdr)
 				goto drop;
 
@@ -209,7 +217,7 @@ out_unlock:
 static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 				      struct guehdr *guehdr, void *data,
 				      size_t hdrlen, u8 ipproto,
-				      struct gro_remcsum *grc)
+				      struct gro_remcsum *grc, bool nopartial)
 {
 	__be16 *pd = data;
 	size_t start = ntohs(pd[0]);
@@ -230,7 +238,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 	}
 
 	skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen,
-				start, offset, grc, true);
+				start, offset, grc, nopartial);
 
 	skb->remcsum_offload = 1;
 
@@ -250,6 +258,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
 	void *data;
 	u16 doffset = 0;
 	int flush = 1;
+	struct fou *fou = container_of(uoff, struct fou, udp_offloads);
 	struct gro_remcsum grc;
 
 	skb_gro_remcsum_init(&grc);
@@ -294,7 +303,9 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
 		if (flags & GUE_PFLAG_REMCSUM) {
 			guehdr = gue_gro_remcsum(skb, off, guehdr,
 						 data + doffset, hdrlen,
-						 guehdr->proto_ctype, &grc);
+						 guehdr->proto_ctype, &grc,
+						 !!(fou->flags &
+						    FOU_F_REMCSUM_NOPARTIAL));
 			if (!guehdr)
 				goto out;
 
@@ -455,6 +466,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 
 	sk = sock->sk;
 
+	fou->flags = cfg->flags;
 	fou->port = cfg->udp_config.local_udp_port;
 
 	/* Initial for fou type */
@@ -541,6 +553,7 @@ static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
 	[FOU_ATTR_AF] = { .type = NLA_U8, },
 	[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
 	[FOU_ATTR_TYPE] = { .type = NLA_U8, },
+	[FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
 };
 
 static int parse_nl_config(struct genl_info *info,
@@ -571,6 +584,9 @@ static int parse_nl_config(struct genl_info *info,
 	if (info->attrs[FOU_ATTR_TYPE])
 		cfg->type = nla_get_u8(info->attrs[FOU_ATTR_TYPE]);
 
+	if (info->attrs[FOU_ATTR_REMCSUM_NOPARTIAL])
+		cfg->flags |= FOU_F_REMCSUM_NOPARTIAL;
+
 	return 0;
 }
 
-- 
cgit v1.2.3-71-gd317


From 56873f43abdcd574b25105867a990f067747b2f4 Mon Sep 17 00:00:00 2001
From: "Wang, Yalin" <Yalin.Wang@sonymobile.com>
Date: Wed, 11 Feb 2015 15:24:51 -0800
Subject: mm:add KPF_ZERO_PAGE flag for /proc/kpageflags

Add KPF_ZERO_PAGE flag for zero_page, so that userspace processes can
detect zero_page in /proc/kpageflags, and then do memory analysis more
accurately.

Signed-off-by: Yalin Wang <yalin.wang@sonymobile.com>
Acked-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/pagemap.txt           |  8 ++++++++
 fs/proc/page.c                         | 16 +++++++++++++---
 include/linux/huge_mm.h                | 12 ++++++++++++
 include/uapi/linux/kernel-page-flags.h |  1 +
 mm/huge_memory.c                       |  7 +------
 tools/vm/page-types.c                  |  1 +
 6 files changed, 36 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt
index 5948e455c4d2..6fbd55ef6b45 100644
--- a/Documentation/vm/pagemap.txt
+++ b/Documentation/vm/pagemap.txt
@@ -62,6 +62,8 @@ There are three components to pagemap:
     20. NOPAGE
     21. KSM
     22. THP
+    23. BALLOON
+    24. ZERO_PAGE
 
 Short descriptions to the page flags:
 
@@ -102,6 +104,12 @@ Short descriptions to the page flags:
 22. THP
     contiguous pages which construct transparent hugepages
 
+23. BALLOON
+    balloon compaction page
+
+24. ZERO_PAGE
+    zero page for pfn_zero or huge_zero page
+
     [IO related page flags]
  1. ERROR     IO error occurred
  3. UPTODATE  page has up-to-date data
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 1e3187da1fed..7eee2d8b97d9 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -5,6 +5,7 @@
 #include <linux/ksm.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
+#include <linux/huge_mm.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/hugetlb.h>
@@ -121,9 +122,18 @@ u64 stable_page_flags(struct page *page)
 	 * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon
 	 * to make sure a given page is a thp, not a non-huge compound page.
 	 */
-	else if (PageTransCompound(page) && (PageLRU(compound_head(page)) ||
-					     PageAnon(compound_head(page))))
-		u |= 1 << KPF_THP;
+	else if (PageTransCompound(page)) {
+		struct page *head = compound_head(page);
+
+		if (PageLRU(head) || PageAnon(head))
+			u |= 1 << KPF_THP;
+		else if (is_huge_zero_page(head)) {
+			u |= 1 << KPF_ZERO_PAGE;
+			u |= 1 << KPF_THP;
+		}
+	} else if (is_zero_pfn(page_to_pfn(page)))
+		u |= 1 << KPF_ZERO_PAGE;
+
 
 	/*
 	 * Caveats on high order pages: page->_count will only be set
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ad9051bab267..f10b20f05159 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -157,6 +157,13 @@ static inline int hpage_nr_pages(struct page *page)
 extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 				unsigned long addr, pmd_t pmd, pmd_t *pmdp);
 
+extern struct page *huge_zero_page;
+
+static inline bool is_huge_zero_page(struct page *page)
+{
+	return ACCESS_ONCE(huge_zero_page) == page;
+}
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -206,6 +213,11 @@ static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_str
 	return 0;
 }
 
+static inline bool is_huge_zero_page(struct page *page)
+{
+	return false;
+}
+
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_HUGE_MM_H */
diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
index 2f96d233c980..a6c4962e5d46 100644
--- a/include/uapi/linux/kernel-page-flags.h
+++ b/include/uapi/linux/kernel-page-flags.h
@@ -32,6 +32,7 @@
 #define KPF_KSM			21
 #define KPF_THP			22
 #define KPF_BALLOON		23
+#define KPF_ZERO_PAGE		24
 
 
 #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 817a875f2b8c..889713180980 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -171,12 +171,7 @@ static int start_khugepaged(void)
 }
 
 static atomic_t huge_zero_refcount;
-static struct page *huge_zero_page __read_mostly;
-
-static inline bool is_huge_zero_page(struct page *page)
-{
-	return ACCESS_ONCE(huge_zero_page) == page;
-}
+struct page *huge_zero_page __read_mostly;
 
 static inline bool is_huge_zero_pmd(pmd_t pmd)
 {
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 264fbc297e0b..8bdf16b8ba60 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -133,6 +133,7 @@ static const char * const page_flag_names[] = {
 	[KPF_KSM]		= "x:ksm",
 	[KPF_THP]		= "t:thp",
 	[KPF_BALLOON]		= "o:balloon",
+	[KPF_ZERO_PAGE]		= "z:zero_page",
 
 	[KPF_RESERVED]		= "r:reserved",
 	[KPF_MLOCKED]		= "m:mlocked",
-- 
cgit v1.2.3-71-gd317


From 9791554b45a2acc28247f66a5fd5bbc212a6b8c8 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 8 Jan 2015 12:17:37 +0000
Subject: MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS

Userland code may be built using an ABI which permits linking to objects
that have more restrictive floating point requirements. For example,
userland code may be built to target the O32 FPXX ABI. Such code may be
linked with other FPXX code, or code built for either one of the more
restrictive FP32 or FP64. When linking with more restrictive code, the
overall requirement of the process becomes that of the more restrictive
code. The kernel has no way to know in advance which mode the process
will need to be executed in, and indeed it may need to change during
execution. The dynamic loader is the only code which will know the
overall required mode, and so it needs to have a means to instruct the
kernel to switch the FP mode of the process.

This patch introduces 2 new options to the prctl syscall which provide
such a capability. The FP mode of the process is represented as a
simple bitmask combining a number of mode bits mirroring those present
in the hardware. Userland can either retrieve the current FP mode of
the process:

  mode = prctl(PR_GET_FP_MODE);

or modify the current FP mode of the process:

  err = prctl(PR_SET_FP_MODE, new_mode);

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: Matthew Fortune <matthew.fortune@imgtec.com>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/8899/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/mmu.h         |  3 ++
 arch/mips/include/asm/mmu_context.h |  2 +
 arch/mips/include/asm/processor.h   | 11 +++++
 arch/mips/kernel/process.c          | 92 +++++++++++++++++++++++++++++++++++++
 arch/mips/kernel/traps.c            | 19 ++++++++
 include/uapi/linux/prctl.h          |  5 ++
 kernel/sys.c                        | 12 +++++
 7 files changed, 144 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h
index c436138945a8..1afa1f986df8 100644
--- a/arch/mips/include/asm/mmu.h
+++ b/arch/mips/include/asm/mmu.h
@@ -1,9 +1,12 @@
 #ifndef __ASM_MMU_H
 #define __ASM_MMU_H
 
+#include <linux/atomic.h>
+
 typedef struct {
 	unsigned long asid[NR_CPUS];
 	void *vdso;
+	atomic_t fp_mode_switching;
 } mm_context_t;
 
 #endif /* __ASM_MMU_H */
diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h
index 2f82568a3ee4..87f11072f557 100644
--- a/arch/mips/include/asm/mmu_context.h
+++ b/arch/mips/include/asm/mmu_context.h
@@ -132,6 +132,8 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	for_each_possible_cpu(i)
 		cpu_context(i, mm) = 0;
 
+	atomic_set(&mm->context.fp_mode_switching, 0);
+
 	return 0;
 }
 
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index f1df4cb4a286..9daa38608cd8 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -399,4 +399,15 @@ unsigned long get_wchan(struct task_struct *p);
 
 #endif
 
+/*
+ * Functions & macros implementing the PR_GET_FP_MODE & PR_SET_FP_MODE options
+ * to the prctl syscall.
+ */
+extern int mips_get_process_fp_mode(struct task_struct *task);
+extern int mips_set_process_fp_mode(struct task_struct *task,
+				    unsigned int value);
+
+#define GET_FP_MODE(task)		mips_get_process_fp_mode(task)
+#define SET_FP_MODE(task,value)		mips_set_process_fp_mode(task, value)
+
 #endif /* _ASM_PROCESSOR_H */
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index eb76434828e8..4677b4c67da6 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -25,6 +25,7 @@
 #include <linux/completion.h>
 #include <linux/kallsyms.h>
 #include <linux/random.h>
+#include <linux/prctl.h>
 
 #include <asm/asm.h>
 #include <asm/bootinfo.h>
@@ -550,3 +551,94 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
 {
 	smp_call_function(arch_dump_stack, NULL, 1);
 }
+
+int mips_get_process_fp_mode(struct task_struct *task)
+{
+	int value = 0;
+
+	if (!test_tsk_thread_flag(task, TIF_32BIT_FPREGS))
+		value |= PR_FP_MODE_FR;
+	if (test_tsk_thread_flag(task, TIF_HYBRID_FPREGS))
+		value |= PR_FP_MODE_FRE;
+
+	return value;
+}
+
+int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
+{
+	const unsigned int known_bits = PR_FP_MODE_FR | PR_FP_MODE_FRE;
+	unsigned long switch_count;
+	struct task_struct *t;
+
+	/* Check the value is valid */
+	if (value & ~known_bits)
+		return -EOPNOTSUPP;
+
+	/* Avoid inadvertently triggering emulation */
+	if ((value & PR_FP_MODE_FR) && cpu_has_fpu &&
+	    !(current_cpu_data.fpu_id & MIPS_FPIR_F64))
+		return -EOPNOTSUPP;
+	if ((value & PR_FP_MODE_FRE) && cpu_has_fpu && !cpu_has_fre)
+		return -EOPNOTSUPP;
+
+	/* Save FP & vector context, then disable FPU & MSA */
+	if (task->signal == current->signal)
+		lose_fpu(1);
+
+	/* Prevent any threads from obtaining live FP context */
+	atomic_set(&task->mm->context.fp_mode_switching, 1);
+	smp_mb__after_atomic();
+
+	/*
+	 * If there are multiple online CPUs then wait until all threads whose
+	 * FP mode is about to change have been context switched. This approach
+	 * allows us to only worry about whether an FP mode switch is in
+	 * progress when FP is first used in a tasks time slice. Pretty much all
+	 * of the mode switch overhead can thus be confined to cases where mode
+	 * switches are actually occuring. That is, to here. However for the
+	 * thread performing the mode switch it may take a while...
+	 */
+	if (num_online_cpus() > 1) {
+		spin_lock_irq(&task->sighand->siglock);
+
+		for_each_thread(task, t) {
+			if (t == current)
+				continue;
+
+			switch_count = t->nvcsw + t->nivcsw;
+
+			do {
+				spin_unlock_irq(&task->sighand->siglock);
+				cond_resched();
+				spin_lock_irq(&task->sighand->siglock);
+			} while ((t->nvcsw + t->nivcsw) == switch_count);
+		}
+
+		spin_unlock_irq(&task->sighand->siglock);
+	}
+
+	/*
+	 * There are now no threads of the process with live FP context, so it
+	 * is safe to proceed with the FP mode switch.
+	 */
+	for_each_thread(task, t) {
+		/* Update desired FP register width */
+		if (value & PR_FP_MODE_FR) {
+			clear_tsk_thread_flag(t, TIF_32BIT_FPREGS);
+		} else {
+			set_tsk_thread_flag(t, TIF_32BIT_FPREGS);
+			clear_tsk_thread_flag(t, TIF_MSA_CTX_LIVE);
+		}
+
+		/* Update desired FP single layout */
+		if (value & PR_FP_MODE_FRE)
+			set_tsk_thread_flag(t, TIF_HYBRID_FPREGS);
+		else
+			clear_tsk_thread_flag(t, TIF_HYBRID_FPREGS);
+	}
+
+	/* Allow threads to use FP again */
+	atomic_set(&task->mm->context.fp_mode_switching, 0);
+
+	return 0;
+}
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index ad3d2031c327..d5fbfb51b9da 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1134,10 +1134,29 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action,
 	return NOTIFY_OK;
 }
 
+static int wait_on_fp_mode_switch(atomic_t *p)
+{
+	/*
+	 * The FP mode for this task is currently being switched. That may
+	 * involve modifications to the format of this tasks FP context which
+	 * make it unsafe to proceed with execution for the moment. Instead,
+	 * schedule some other task.
+	 */
+	schedule();
+	return 0;
+}
+
 static int enable_restore_fp_context(int msa)
 {
 	int err, was_fpu_owner, prior_msa;
 
+	/*
+	 * If an FP mode switch is currently underway, wait for it to
+	 * complete before proceeding.
+	 */
+	wait_on_atomic_t(&current->mm->context.fp_mode_switching,
+			 wait_on_fp_mode_switch, TASK_KILLABLE);
+
 	if (!used_math()) {
 		/* First time FP context user. */
 		preempt_disable();
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 89f63503f903..31891d9535e2 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -185,4 +185,9 @@ struct prctl_mm_map {
 #define PR_MPX_ENABLE_MANAGEMENT  43
 #define PR_MPX_DISABLE_MANAGEMENT 44
 
+#define PR_SET_FP_MODE		45
+#define PR_GET_FP_MODE		46
+# define PR_FP_MODE_FR		(1 << 0)	/* 64b FP registers */
+# define PR_FP_MODE_FRE		(1 << 1)	/* 32b compatibility */
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index a8c9f5a7dda6..08b16bbdcdf0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -97,6 +97,12 @@
 #ifndef MPX_DISABLE_MANAGEMENT
 # define MPX_DISABLE_MANAGEMENT(a)	(-EINVAL)
 #endif
+#ifndef GET_FP_MODE
+# define GET_FP_MODE(a)		(-EINVAL)
+#endif
+#ifndef SET_FP_MODE
+# define SET_FP_MODE(a,b)	(-EINVAL)
+#endif
 
 /*
  * this is where the system-wide overflow UID and GID are defined, for
@@ -2215,6 +2221,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_MPX_DISABLE_MANAGEMENT:
 		error = MPX_DISABLE_MANAGEMENT(me);
 		break;
+	case PR_SET_FP_MODE:
+		error = SET_FP_MODE(me, arg2);
+		break;
+	case PR_GET_FP_MODE:
+		error = GET_FP_MODE(me);
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
cgit v1.2.3-71-gd317


From 8a0516ed8b90c95ffa1363b420caa37418149f21 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Thu, 12 Feb 2015 14:58:22 -0800
Subject: mm: convert p[te|md]_numa users to p[te|md]_protnone_numa

Convert existing users of pte_numa and friends to the new helper.  Note
that the kernel is broken after this patch is applied until the other page
table modifiers are also altered.  This patch layout is to make review
easier.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kirill Shutemov <kirill.shutemov@linux.intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kvm/book3s_hv_rm_mmu.c |  2 +-
 arch/powerpc/mm/fault.c             |  5 -----
 arch/powerpc/mm/pgtable.c           | 11 ++++++++---
 arch/powerpc/mm/pgtable_64.c        |  3 ++-
 arch/x86/mm/gup.c                   |  4 ++--
 include/uapi/linux/mempolicy.h      |  2 +-
 mm/gup.c                            | 10 +++++-----
 mm/huge_memory.c                    | 16 ++++++++--------
 mm/memory.c                         |  4 ++--
 mm/mprotect.c                       | 38 ++++++++++---------------------------
 mm/pgtable-generic.c                |  2 +-
 11 files changed, 40 insertions(+), 57 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 510bdfbc4073..625407e4d3b0 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -212,7 +212,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	/* Look up the Linux PTE for the backing page */
 	pte_size = psize;
 	pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
-	if (pte_present(pte) && !pte_numa(pte)) {
+	if (pte_present(pte) && !pte_protnone(pte)) {
 		if (writing && !pte_write(pte))
 			/* make the actual HPTE be read-only */
 			ptel = hpte_make_readonly(ptel);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 6154b0a2b063..f38327b95f76 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -398,8 +398,6 @@ good_area:
 		 * processors use the same I/D cache coherency mechanism
 		 * as embedded.
 		 */
-		if (error_code & DSISR_PROTFAULT)
-			goto bad_area;
 #endif /* CONFIG_PPC_STD_MMU */
 
 		/*
@@ -423,9 +421,6 @@ good_area:
 		flags |= FAULT_FLAG_WRITE;
 	/* a read */
 	} else {
-		/* protection fault */
-		if (error_code & 0x08000000)
-			goto bad_area;
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 			goto bad_area;
 	}
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index c90e602677c9..83dfcb55ffef 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -172,9 +172,14 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
 void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
 		pte_t pte)
 {
-#ifdef CONFIG_DEBUG_VM
-	WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
-#endif
+	/*
+	 * When handling numa faults, we already have the pte marked
+	 * _PAGE_PRESENT, but we can be sure that it is not in hpte.
+	 * Hence we can use set_pte_at for them.
+	 */
+	VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
+		(_PAGE_PRESENT | _PAGE_USER));
+
 	/* Note: mm->context.id might not yet have been assigned as
 	 * this context might not have been activated yet when this
 	 * is called.
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 4fe5f64cc179..91bb8836825a 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -718,7 +718,8 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 		pmd_t *pmdp, pmd_t pmd)
 {
 #ifdef CONFIG_DEBUG_VM
-	WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT);
+	WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) ==
+		(_PAGE_PRESENT | _PAGE_USER));
 	assert_spin_locked(&mm->page_table_lock);
 	WARN_ON(!pmd_trans_huge(pmd));
 #endif
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 89df70e0caa6..81bf3d2af3eb 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -84,7 +84,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
 		struct page *page;
 
 		/* Similar to the PMD case, NUMA hinting must take slow path */
-		if (pte_numa(pte)) {
+		if (pte_protnone(pte)) {
 			pte_unmap(ptep);
 			return 0;
 		}
@@ -178,7 +178,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 			 * slowpath for accounting purposes and so that they
 			 * can be serialised against THP migration.
 			 */
-			if (pmd_numa(pmd))
+			if (pmd_protnone(pmd))
 				return 0;
 			if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
 				return 0;
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 0d11c3dcd3a1..9cd8b21dddbe 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -67,7 +67,7 @@ enum mpol_rebind_step {
 #define MPOL_F_LOCAL   (1 << 1)	/* preferred local allocation */
 #define MPOL_F_REBINDING (1 << 2)	/* identify policies in rebinding */
 #define MPOL_F_MOF	(1 << 3) /* this policy wants migrate on fault */
-#define MPOL_F_MORON	(1 << 4) /* Migrate On pte_numa Reference On Node */
+#define MPOL_F_MORON	(1 << 4) /* Migrate On protnone Reference On Node */
 
 
 #endif /* _UAPI_LINUX_MEMPOLICY_H */
diff --git a/mm/gup.c b/mm/gup.c
index c2da1163986a..51bf0b06ca7b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -64,7 +64,7 @@ retry:
 		migration_entry_wait(mm, pmd, address);
 		goto retry;
 	}
-	if ((flags & FOLL_NUMA) && pte_numa(pte))
+	if ((flags & FOLL_NUMA) && pte_protnone(pte))
 		goto no_page;
 	if ((flags & FOLL_WRITE) && !pte_write(pte)) {
 		pte_unmap_unlock(ptep, ptl);
@@ -184,7 +184,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
 			return page;
 		return no_page_table(vma, flags);
 	}
-	if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+	if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
 		return no_page_table(vma, flags);
 	if (pmd_trans_huge(*pmd)) {
 		if (flags & FOLL_SPLIT) {
@@ -906,10 +906,10 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 
 		/*
 		 * Similar to the PMD case below, NUMA hinting must take slow
-		 * path
+		 * path using the pte_protnone check.
 		 */
 		if (!pte_present(pte) || pte_special(pte) ||
-			pte_numa(pte) || (write && !pte_write(pte)))
+			pte_protnone(pte) || (write && !pte_write(pte)))
 			goto pte_unmap;
 
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -1104,7 +1104,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 			 * slowpath for accounting purposes and so that they
 			 * can be serialised against THP migration.
 			 */
-			if (pmd_numa(pmd))
+			if (pmd_protnone(pmd))
 				return 0;
 
 			if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c6921362c5fc..915941c45169 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1211,7 +1211,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 		return ERR_PTR(-EFAULT);
 
 	/* Full NUMA hinting faults to serialise migration in fault paths */
-	if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
+	if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
 		goto out;
 
 	page = pmd_page(*pmd);
@@ -1342,7 +1342,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	/*
 	 * Migrate the THP to the requested node, returns with page unlocked
-	 * and pmd_numa cleared.
+	 * and access rights restored.
 	 */
 	spin_unlock(ptl);
 	migrated = migrate_misplaced_transhuge_page(mm, vma,
@@ -1357,7 +1357,7 @@ clear_pmdnuma:
 	BUG_ON(!PageLocked(page));
 	pmd = pmd_mknonnuma(pmd);
 	set_pmd_at(mm, haddr, pmdp, pmd);
-	VM_BUG_ON(pmd_numa(*pmdp));
+	VM_BUG_ON(pmd_protnone(*pmdp));
 	update_mmu_cache_pmd(vma, addr, pmdp);
 	unlock_page(page);
 out_unlock:
@@ -1483,7 +1483,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		ret = 1;
 		if (!prot_numa) {
 			entry = pmdp_get_and_clear_notify(mm, addr, pmd);
-			if (pmd_numa(entry))
+			if (pmd_protnone(entry))
 				entry = pmd_mknonnuma(entry);
 			entry = pmd_modify(entry, newprot);
 			ret = HPAGE_PMD_NR;
@@ -1499,7 +1499,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			 * local vs remote hits on the zero page.
 			 */
 			if (!is_huge_zero_page(page) &&
-			    !pmd_numa(*pmd)) {
+			    !pmd_protnone(*pmd)) {
 				pmdp_set_numa(mm, addr, pmd);
 				ret = HPAGE_PMD_NR;
 			}
@@ -1767,9 +1767,9 @@ static int __split_huge_page_map(struct page *page,
 			pte_t *pte, entry;
 			BUG_ON(PageCompound(page+i));
 			/*
-			 * Note that pmd_numa is not transferred deliberately
-			 * to avoid any possibility that pte_numa leaks to
-			 * a PROT_NONE VMA by accident.
+			 * Note that NUMA hinting access restrictions are not
+			 * transferred to avoid any possibility of altering
+			 * permissions across VMAs.
 			 */
 			entry = mk_pte(page + i, vma->vm_page_prot);
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
diff --git a/mm/memory.c b/mm/memory.c
index bbe6a73a899d..92e6a6299e86 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3124,7 +3124,7 @@ static int handle_pte_fault(struct mm_struct *mm,
 					pte, pmd, flags, entry);
 	}
 
-	if (pte_numa(entry))
+	if (pte_protnone(entry))
 		return do_numa_page(mm, vma, address, entry, pte, pmd);
 
 	ptl = pte_lockptr(mm, pmd);
@@ -3202,7 +3202,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			if (pmd_trans_splitting(orig_pmd))
 				return 0;
 
-			if (pmd_numa(orig_pmd))
+			if (pmd_protnone(orig_pmd))
 				return do_huge_pmd_numa_page(mm, vma, address,
 							     orig_pmd, pmd);
 
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 33121662f08b..44ffa698484d 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -75,36 +75,18 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		oldpte = *pte;
 		if (pte_present(oldpte)) {
 			pte_t ptent;
-			bool updated = false;
 
-			if (!prot_numa) {
-				ptent = ptep_modify_prot_start(mm, addr, pte);
-				if (pte_numa(ptent))
-					ptent = pte_mknonnuma(ptent);
-				ptent = pte_modify(ptent, newprot);
-				/*
-				 * Avoid taking write faults for pages we
-				 * know to be dirty.
-				 */
-				if (dirty_accountable && pte_dirty(ptent) &&
-				    (pte_soft_dirty(ptent) ||
-				     !(vma->vm_flags & VM_SOFTDIRTY)))
-					ptent = pte_mkwrite(ptent);
-				ptep_modify_prot_commit(mm, addr, pte, ptent);
-				updated = true;
-			} else {
-				struct page *page;
-
-				page = vm_normal_page(vma, addr, oldpte);
-				if (page && !PageKsm(page)) {
-					if (!pte_numa(oldpte)) {
-						ptep_set_numa(mm, addr, pte);
-						updated = true;
-					}
-				}
+			ptent = ptep_modify_prot_start(mm, addr, pte);
+			ptent = pte_modify(ptent, newprot);
+
+			/* Avoid taking write faults for known dirty pages */
+			if (dirty_accountable && pte_dirty(ptent) &&
+					(pte_soft_dirty(ptent) ||
+					 !(vma->vm_flags & VM_SOFTDIRTY))) {
+				ptent = pte_mkwrite(ptent);
 			}
-			if (updated)
-				pages++;
+			ptep_modify_prot_commit(mm, addr, pte, ptent);
+			pages++;
 		} else if (IS_ENABLED(CONFIG_MIGRATION)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
 
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index dfb79e028ecb..4b8ad760dde3 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -193,7 +193,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 		     pmd_t *pmdp)
 {
 	pmd_t entry = *pmdp;
-	if (pmd_numa(entry))
+	if (pmd_protnone(entry))
 		entry = pmd_mknonnuma(entry);
 	set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry));
 	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
-- 
cgit v1.2.3-71-gd317


From ed9ecb0415b97b5f9f91f146e1977bb372c74c6d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 13 Feb 2015 17:13:44 +1030
Subject: virtio: Don't expose legacy net features when VIRTIO_NET_NO_LEGACY
 defined.

In particular, the virtio header always has the u16 num_buffers field.
We define a new 'struct virtio_net_hdr_v1' for this (rather than
simply calling it 'struct virtio_net_hdr', to avoid nasty type errors
if some parts of a project define VIRTIO_NET_NO_LEGACY and some don't.

Transitional devices (which can't define VIRTIO_NET_NO_LEGACY) will
have to keep using struct virtio_net_hdr_mrg_rxbuf, which has the same
byte layout as struct virtio_net_hdr_v1.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/uapi/linux/virtio_net.h | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index b5f1677b291c..4a9b58113d6e 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -35,7 +35,6 @@
 #define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
 #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
 #define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
-#define VIRTIO_NET_F_GSO	6	/* Host handles pkts w/ any GSO type */
 #define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
 #define VIRTIO_NET_F_GUEST_TSO6	8	/* Guest can handle TSOv6 in. */
 #define VIRTIO_NET_F_GUEST_ECN	9	/* Guest can handle TSO[6] w/ ECN in. */
@@ -56,6 +55,10 @@
 					 * Steering */
 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
 
+#ifndef VIRTIO_NET_NO_LEGACY
+#define VIRTIO_NET_F_GSO	6	/* Host handles pkts w/ any GSO type */
+#endif /* VIRTIO_NET_NO_LEGACY */
+
 #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
 #define VIRTIO_NET_S_ANNOUNCE	2	/* Announcement is needed */
 
@@ -71,8 +74,9 @@ struct virtio_net_config {
 	__u16 max_virtqueue_pairs;
 } __attribute__((packed));
 
+#ifndef VIRTIO_NET_NO_LEGACY
 /* This header comes first in the scatter-gather list.
- * If VIRTIO_F_ANY_LAYOUT is not negotiated, it must
+ * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must
  * be the first element of the scatter-gather list.  If you don't
  * specify GSO or CSUM features, you can simply ignore the header. */
 struct virtio_net_hdr {
@@ -97,6 +101,30 @@ struct virtio_net_hdr_mrg_rxbuf {
 	struct virtio_net_hdr hdr;
 	__virtio16 num_buffers;	/* Number of merged rx buffers */
 };
+#else /* ... VIRTIO_NET_NO_LEGACY */
+/*
+ * This header comes first in the scatter-gather list.  If you don't
+ * specify GSO or CSUM features, you can simply ignore the header.
+ *
+ * This is bitwise-equivalent to the legacy struct virtio_net_hdr_mrg_rxbuf.
+ */
+struct virtio_net_hdr_v1 {
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	/* Use csum_start, csum_offset */
+#define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
+	__u8 flags;
+#define VIRTIO_NET_HDR_GSO_NONE		0	/* Not a GSO frame */
+#define VIRTIO_NET_HDR_GSO_TCPV4	1	/* GSO frame, IPv4 TCP (TSO) */
+#define VIRTIO_NET_HDR_GSO_UDP		3	/* GSO frame, IPv4 UDP (UFO) */
+#define VIRTIO_NET_HDR_GSO_TCPV6	4	/* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_ECN		0x80	/* TCP has ECN set */
+	__u8 gso_type;
+	__virtio16 hdr_len;	/* Ethernet + IP + tcp/udp hdrs */
+	__virtio16 gso_size;	/* Bytes to append to hdr_len per frame */
+	__virtio16 csum_start;	/* Position to start checksumming from */
+	__virtio16 csum_offset;	/* Offset after that to place checksum */
+	__virtio16 num_buffers;	/* Number of merged rx buffers */
+};
+#endif /* ...VIRTIO_NET_NO_LEGACY */
 
 /*
  * Control virtqueue data structures
-- 
cgit v1.2.3-71-gd317


From e68c48f97547979c91de04b487d79dc0d3be7015 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 17 Feb 2015 16:12:43 +1030
Subject: virtio_net: unconditionally define struct virtio_net_hdr_v1.

This was introduced in commit ed9ecb0415b97b5f9f91f146e1977bb372c74c6d,
but only defined if !VIRTIO_NET_NO_LEGACY.  We should always define
it: easier for users to have conditional legacy code.

Suggested-by: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/uapi/linux/virtio_net.h | 54 +++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 29 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index 4a9b58113d6e..7bbee79ca293 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -74,39 +74,12 @@ struct virtio_net_config {
 	__u16 max_virtqueue_pairs;
 } __attribute__((packed));
 
-#ifndef VIRTIO_NET_NO_LEGACY
-/* This header comes first in the scatter-gather list.
- * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must
- * be the first element of the scatter-gather list.  If you don't
- * specify GSO or CSUM features, you can simply ignore the header. */
-struct virtio_net_hdr {
-#define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	// Use csum_start, csum_offset
-#define VIRTIO_NET_HDR_F_DATA_VALID	2	// Csum is valid
-	__u8 flags;
-#define VIRTIO_NET_HDR_GSO_NONE		0	// Not a GSO frame
-#define VIRTIO_NET_HDR_GSO_TCPV4	1	// GSO frame, IPv4 TCP (TSO)
-#define VIRTIO_NET_HDR_GSO_UDP		3	// GSO frame, IPv4 UDP (UFO)
-#define VIRTIO_NET_HDR_GSO_TCPV6	4	// GSO frame, IPv6 TCP
-#define VIRTIO_NET_HDR_GSO_ECN		0x80	// TCP has ECN set
-	__u8 gso_type;
-	__virtio16 hdr_len;		/* Ethernet + IP + tcp/udp hdrs */
-	__virtio16 gso_size;		/* Bytes to append to hdr_len per frame */
-	__virtio16 csum_start;	/* Position to start checksumming from */
-	__virtio16 csum_offset;	/* Offset after that to place checksum */
-};
-
-/* This is the version of the header to use when the MRG_RXBUF
- * feature has been negotiated. */
-struct virtio_net_hdr_mrg_rxbuf {
-	struct virtio_net_hdr hdr;
-	__virtio16 num_buffers;	/* Number of merged rx buffers */
-};
-#else /* ... VIRTIO_NET_NO_LEGACY */
 /*
  * This header comes first in the scatter-gather list.  If you don't
  * specify GSO or CSUM features, you can simply ignore the header.
  *
- * This is bitwise-equivalent to the legacy struct virtio_net_hdr_mrg_rxbuf.
+ * This is bitwise-equivalent to the legacy struct virtio_net_hdr_mrg_rxbuf,
+ * only flattened.
  */
 struct virtio_net_hdr_v1 {
 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	/* Use csum_start, csum_offset */
@@ -124,6 +97,29 @@ struct virtio_net_hdr_v1 {
 	__virtio16 csum_offset;	/* Offset after that to place checksum */
 	__virtio16 num_buffers;	/* Number of merged rx buffers */
 };
+
+#ifndef VIRTIO_NET_NO_LEGACY
+/* This header comes first in the scatter-gather list.
+ * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must
+ * be the first element of the scatter-gather list.  If you don't
+ * specify GSO or CSUM features, you can simply ignore the header. */
+struct virtio_net_hdr {
+	/* See VIRTIO_NET_HDR_F_* */
+	__u8 flags;
+	/* See VIRTIO_NET_HDR_GSO_* */
+	__u8 gso_type;
+	__virtio16 hdr_len;		/* Ethernet + IP + tcp/udp hdrs */
+	__virtio16 gso_size;		/* Bytes to append to hdr_len per frame */
+	__virtio16 csum_start;	/* Position to start checksumming from */
+	__virtio16 csum_offset;	/* Offset after that to place checksum */
+};
+
+/* This is the version of the header to use when the MRG_RXBUF
+ * feature has been negotiated. */
+struct virtio_net_hdr_mrg_rxbuf {
+	struct virtio_net_hdr hdr;
+	__virtio16 num_buffers;	/* Number of merged rx buffers */
+};
 #endif /* ...VIRTIO_NET_NO_LEGACY */
 
 /*
-- 
cgit v1.2.3-71-gd317


From 35e88d5c22e1916c819b5a8756aed2f09a4aba18 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Tue, 17 Feb 2015 15:41:51 +0100
Subject: fs/binfmt_som: Drop kernel support for HP-UX SOM binaries

The parisc arch has been the only user of HP-UX SOM binaries.

Support for HP-UX executables was never finished and since we now drop support
for the HP-UX compat layer anyway, it does not makes sense to keep the
BINFMT_SOM support.

Cc: linux-fsdevel@vger.kernel.org
Cc: linux-parisc@vger.kernel.org
Signed-off-by: Helge Deller <deller@gmx.de>
---
 fs/Kconfig.binfmt         |   7 --
 fs/Makefile               |   1 -
 fs/binfmt_som.c           | 299 ----------------------------------------------
 include/uapi/linux/Kbuild |   1 -
 include/uapi/linux/som.h  | 154 ------------------------
 5 files changed, 462 deletions(-)
 delete mode 100644 fs/binfmt_som.c
 delete mode 100644 include/uapi/linux/som.h

(limited to 'include/uapi/linux')

diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index c055d56ec63d..270c48148f79 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -149,13 +149,6 @@ config BINFMT_EM86
 	  later load the module when you want to use a Linux/Intel binary. The
 	  module will be called binfmt_em86. If unsure, say Y.
 
-config BINFMT_SOM
-	tristate "Kernel support for SOM binaries"
-	depends on PARISC && HPUX
-	help
-	  SOM is a binary executable format inherited from HP/UX.  Say
-	  Y here to be able to load and execute SOM binaries directly.
-
 config BINFMT_MISC
 	tristate "Kernel support for MISC binaries"
 	---help---
diff --git a/fs/Makefile b/fs/Makefile
index bedff48e8fdc..b7e0cfe3af2f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -37,7 +37,6 @@ obj-$(CONFIG_BINFMT_SCRIPT)	+= binfmt_script.o
 obj-$(CONFIG_BINFMT_ELF)	+= binfmt_elf.o
 obj-$(CONFIG_COMPAT_BINFMT_ELF)	+= compat_binfmt_elf.o
 obj-$(CONFIG_BINFMT_ELF_FDPIC)	+= binfmt_elf_fdpic.o
-obj-$(CONFIG_BINFMT_SOM)	+= binfmt_som.o
 obj-$(CONFIG_BINFMT_FLAT)	+= binfmt_flat.o
 
 obj-$(CONFIG_FS_MBCACHE)	+= mbcache.o
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
deleted file mode 100644
index 4e00ed68d4a6..000000000000
--- a/fs/binfmt_som.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * linux/fs/binfmt_som.c
- *
- * These are the functions used to load SOM format executables as used
- * by HP-UX.  
- *
- * Copyright 1999 Matthew Wilcox <willy@bofh.ai>
- * based on binfmt_elf which is
- * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
- */
-
-#include <linux/module.h>
-
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/binfmts.h>
-#include <linux/som.h>
-#include <linux/string.h>
-#include <linux/file.h>
-#include <linux/fcntl.h>
-#include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/shm.h>
-#include <linux/personality.h>
-#include <linux/init.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-
-
-#include <linux/elf.h>
-
-static int load_som_binary(struct linux_binprm * bprm);
-static int load_som_library(struct file *);
-
-/*
- * If we don't support core dumping, then supply a NULL so we
- * don't even try.
- */
-#if 0
-static int som_core_dump(struct coredump_params *cprm);
-#else
-#define som_core_dump	NULL
-#endif
-
-#define SOM_PAGESTART(_v) ((_v) & ~(unsigned long)(SOM_PAGESIZE-1))
-#define SOM_PAGEOFFSET(_v) ((_v) & (SOM_PAGESIZE-1))
-#define SOM_PAGEALIGN(_v) (((_v) + SOM_PAGESIZE - 1) & ~(SOM_PAGESIZE - 1))
-
-static struct linux_binfmt som_format = {
-	.module		= THIS_MODULE,
-	.load_binary	= load_som_binary,
-	.load_shlib	= load_som_library,
-	.core_dump	= som_core_dump,
-	.min_coredump	= SOM_PAGESIZE
-};
-
-/*
- * create_som_tables() parses the env- and arg-strings in new user
- * memory and creates the pointer tables from them, and puts their
- * addresses on the "stack", returning the new stack pointer value.
- */
-static void create_som_tables(struct linux_binprm *bprm)
-{
-	char **argv, **envp;
-	int argc = bprm->argc;
-	int envc = bprm->envc;
-	unsigned long p;
-	unsigned long *sp;
-
-	/* Word-align the stack pointer */
-	sp = (unsigned long *)((bprm->p + 3) & ~3);
-
-	envp = (char **) sp;
-	sp += envc + 1;
-	argv = (char **) sp;
-	sp += argc + 1;
-
-	__put_user((unsigned long) envp,++sp);
-	__put_user((unsigned long) argv,++sp);
-
-	__put_user(argc, ++sp);
-
-	bprm->p = (unsigned long) sp;
-
-	p = current->mm->arg_start;
-	while (argc-- > 0) {
-		__put_user((char *)p,argv++);
-		p += strlen_user((char *)p);
-	}
-	__put_user(NULL, argv);
-	current->mm->arg_end = current->mm->env_start = p;
-	while (envc-- > 0) {
-		__put_user((char *)p,envp++);
-		p += strlen_user((char *)p);
-	}
-	__put_user(NULL, envp);
-	current->mm->env_end = p;
-}
-
-static int check_som_header(struct som_hdr *som_ex)
-{
-	int *buf = (int *)som_ex;
-	int i, ck;
-
-	if (som_ex->system_id != SOM_SID_PARISC_1_0 &&
-	    som_ex->system_id != SOM_SID_PARISC_1_1 &&
-	    som_ex->system_id != SOM_SID_PARISC_2_0)
-		return -ENOEXEC;
-
-	if (som_ex->a_magic != SOM_EXEC_NONSHARE &&
-	    som_ex->a_magic != SOM_EXEC_SHARE &&
-	    som_ex->a_magic != SOM_EXEC_DEMAND)
-		return -ENOEXEC;
-
-	if (som_ex->version_id != SOM_ID_OLD &&
-	    som_ex->version_id != SOM_ID_NEW)
-		return -ENOEXEC;
-
-	ck = 0;
-	for (i=0; i<32; i++)
-		ck ^= buf[i];
-	if (ck != 0)
-		return -ENOEXEC;
-
-	return 0;
-}
-
-static int map_som_binary(struct file *file,
-		const struct som_exec_auxhdr *hpuxhdr)
-{
-	unsigned long code_start, code_size, data_start, data_size;
-	unsigned long bss_start, som_brk;
-	int retval;
-	int prot = PROT_READ | PROT_EXEC;
-	int flags = MAP_FIXED|MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
-
-	mm_segment_t old_fs = get_fs();
-	set_fs(get_ds());
-
-	code_start = SOM_PAGESTART(hpuxhdr->exec_tmem);
-	code_size = SOM_PAGEALIGN(hpuxhdr->exec_tsize);
-	current->mm->start_code = code_start;
-	current->mm->end_code = code_start + code_size;
-	retval = vm_mmap(file, code_start, code_size, prot,
-			flags, SOM_PAGESTART(hpuxhdr->exec_tfile));
-	if (retval < 0 && retval > -1024)
-		goto out;
-
-	data_start = SOM_PAGESTART(hpuxhdr->exec_dmem);
-	data_size = SOM_PAGEALIGN(hpuxhdr->exec_dsize);
-	current->mm->start_data = data_start;
-	current->mm->end_data = bss_start = data_start + data_size;
-	retval = vm_mmap(file, data_start, data_size,
-			prot | PROT_WRITE, flags,
-			SOM_PAGESTART(hpuxhdr->exec_dfile));
-	if (retval < 0 && retval > -1024)
-		goto out;
-
-	som_brk = bss_start + SOM_PAGEALIGN(hpuxhdr->exec_bsize);
-	current->mm->start_brk = current->mm->brk = som_brk;
-	retval = vm_mmap(NULL, bss_start, som_brk - bss_start,
-			prot | PROT_WRITE, MAP_FIXED | MAP_PRIVATE, 0);
-	if (retval > 0 || retval < -1024)
-		retval = 0;
-out:
-	set_fs(old_fs);
-	return retval;
-}
-
-
-/*
- * These are the functions used to load SOM executables and shared
- * libraries.  There is no binary dependent code anywhere else.
- */
-
-static int
-load_som_binary(struct linux_binprm * bprm)
-{
-	int retval;
-	unsigned int size;
-	unsigned long som_entry;
-	struct som_hdr *som_ex;
-	struct som_exec_auxhdr *hpuxhdr;
-	struct pt_regs *regs = current_pt_regs();
-
-	/* Get the exec-header */
-	som_ex = (struct som_hdr *) bprm->buf;
-
-	retval = check_som_header(som_ex);
-	if (retval != 0)
-		goto out;
-
-	/* Now read in the auxiliary header information */
-
-	retval = -ENOMEM;
-	size = som_ex->aux_header_size;
-	if (size > SOM_PAGESIZE)
-		goto out;
-	hpuxhdr = kmalloc(size, GFP_KERNEL);
-	if (!hpuxhdr)
-		goto out;
-
-	retval = kernel_read(bprm->file, som_ex->aux_header_location,
-			(char *) hpuxhdr, size);
-	if (retval != size) {
-		if (retval >= 0)
-			retval = -EIO;
-		goto out_free;
-	}
-
-	/* Flush all traces of the currently running executable */
-	retval = flush_old_exec(bprm);
-	if (retval)
-		goto out_free;
-
-	/* OK, This is the point of no return */
-	current->personality = PER_HPUX;
-	setup_new_exec(bprm);
-
-	/* Set the task size for HP-UX processes such that
-	 * the gateway page is outside the address space.
-	 * This can be fixed later, but for now, this is much
-	 * easier.
-	 */
-
-	current->thread.task_size = 0xc0000000;
-
-	/* Set map base to allow enough room for hp-ux heap growth */
-
-	current->thread.map_base = 0x80000000;
-
-	retval = map_som_binary(bprm->file, hpuxhdr);
-	if (retval < 0)
-		goto out_free;
-
-	som_entry = hpuxhdr->exec_entry;
-	kfree(hpuxhdr);
-
-	set_binfmt(&som_format);
-	install_exec_creds(bprm);
-	setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
-
-	create_som_tables(bprm);
-
-	current->mm->start_stack = bprm->p;
-
-#if 0
-	printk("(start_brk) %08lx\n" , (unsigned long) current->mm->start_brk);
-	printk("(end_code) %08lx\n" , (unsigned long) current->mm->end_code);
-	printk("(start_code) %08lx\n" , (unsigned long) current->mm->start_code);
-	printk("(end_data) %08lx\n" , (unsigned long) current->mm->end_data);
-	printk("(start_stack) %08lx\n" , (unsigned long) current->mm->start_stack);
-	printk("(brk) %08lx\n" , (unsigned long) current->mm->brk);
-#endif
-
-	map_hpux_gateway_page(current,current->mm);
-
-	start_thread_som(regs, som_entry, bprm->p);
-	return 0;
-
-	/* error cleanup */
-out_free:
-	kfree(hpuxhdr);
-out:
-	return retval;
-}
-
-static int load_som_library(struct file *f)
-{
-/* No lib support in SOM yet.  gizza chance.. */
-	return -ENOEXEC;
-}
-	/* Install the SOM loader.
-	 * N.B. We *rely* on the table being the right size with the
-	 * right number of free slots...
-	 */
-
-static int __init init_som_binfmt(void)
-{
-	register_binfmt(&som_format);
-	return 0;
-}
-
-static void __exit exit_som_binfmt(void)
-{
-	/* Remove the SOM loader. */
-	unregister_binfmt(&som_format);
-}
-
-core_initcall(init_som_binfmt);
-module_exit(exit_som_binfmt);
-
-MODULE_LICENSE("GPL");
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 7b8141bf59a7..68ceb97c458c 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -370,7 +370,6 @@ header-y += snmp.h
 header-y += sock_diag.h
 header-y += socket.h
 header-y += sockios.h
-header-y += som.h
 header-y += sonet.h
 header-y += sonypi.h
 header-y += soundcard.h
diff --git a/include/uapi/linux/som.h b/include/uapi/linux/som.h
deleted file mode 100644
index 166594e4e7be..000000000000
--- a/include/uapi/linux/som.h
+++ /dev/null
@@ -1,154 +0,0 @@
-#ifndef _LINUX_SOM_H
-#define _LINUX_SOM_H
-
-/* File format definition for SOM executables / shared libraries */
-
-/* we need struct timespec */
-#include <linux/time.h>
-
-#define SOM_PAGESIZE 4096
-
-/* this is the SOM header */
-struct som_hdr {
-	short		system_id;		/* magic number - system */
-	short		a_magic;		/* magic number - file type */
-	unsigned int	version_id;		/* versiod ID: YYMMDDHH */
-	struct timespec	file_time;		/* system clock */
-	unsigned int	entry_space;		/* space for entry point */
-	unsigned int	entry_subspace;		/* subspace for entry point */
-	unsigned int	entry_offset;		/* offset of entry point */
-	unsigned int	aux_header_location;	/* auxiliary header location */
-	unsigned int	aux_header_size;	/* auxiliary header size */
-	unsigned int	som_length;		/* length of entire SOM */
-	unsigned int	presumed_dp;		/* compiler's DP value */
-	unsigned int	space_location;		/* space dictionary location */
-	unsigned int	space_total;		/* number of space entries */
-	unsigned int	subspace_location;	/* subspace entries location */
-	unsigned int	subspace_total;		/* number of subspace entries */
-	unsigned int	loader_fixup_location;	/* MPE/iX loader fixup */
-	unsigned int	loader_fixup_total;	/* number of fixup records */
-	unsigned int	space_strings_location;	/* (sub)space names */
-	unsigned int	space_strings_size;	/* size of strings area */
-	unsigned int	init_array_location;	/* reserved */
-	unsigned int	init_array_total;	/* reserved */
-	unsigned int	compiler_location;	/* module dictionary */
-	unsigned int	compiler_total;		/* number of modules */
-	unsigned int	symbol_location;	/* symbol dictionary */
-	unsigned int	symbol_total;		/* number of symbols */
-	unsigned int	fixup_request_location;	/* fixup requests */
-	unsigned int	fixup_request_total;	/* number of fixup requests */
-	unsigned int	symbol_strings_location;/* module & symbol names area */
-	unsigned int	symbol_strings_size;	/* size of strings area */
-	unsigned int	unloadable_sp_location;	/* unloadable spaces location */
-	unsigned int	unloadable_sp_size;	/* size of data */
-	unsigned int	checksum;
-};
-
-/* values for system_id */
-
-#define SOM_SID_PARISC_1_0	0x020b
-#define SOM_SID_PARISC_1_1	0x0210
-#define SOM_SID_PARISC_2_0	0x0214
-
-/* values for a_magic */
-
-#define SOM_LIB_EXEC		0x0104
-#define SOM_RELOCATABLE		0x0106
-#define SOM_EXEC_NONSHARE	0x0107
-#define SOM_EXEC_SHARE		0x0108
-#define SOM_EXEC_DEMAND		0x010B
-#define SOM_LIB_DYN		0x010D
-#define SOM_LIB_SHARE		0x010E
-#define SOM_LIB_RELOC		0x0619
-
-/* values for version_id.  Decimal not hex, yes.  Grr. */
-
-#define SOM_ID_OLD		85082112
-#define SOM_ID_NEW		87102412
-
-struct aux_id {
-	unsigned int	mandatory :1;	/* the linker must understand this */
-	unsigned int	copy	  :1;	/* Must be copied by the linker */
-	unsigned int	append	  :1;	/* Must be merged by the linker */
-	unsigned int	ignore	  :1;	/* Discard section if unknown */
-	unsigned int	reserved  :12;
-	unsigned int	type	  :16;	/* Header type */
-	unsigned int	length;		/* length of _following_ data */
-};
-
-/* The Exec Auxiliary Header.  Called The HP-UX Header within HP apparently. */
-struct som_exec_auxhdr {
-	struct aux_id	som_auxhdr;
-	int		exec_tsize;	/* Text size in bytes */
-	int		exec_tmem;	/* Address to load text at */
-	int		exec_tfile;	/* Location of text in file */
-	int		exec_dsize;	/* Data size in bytes */
-	int		exec_dmem;	/* Address to load data at */
-	int		exec_dfile;	/* Location of data in file */
-	int		exec_bsize;	/* Uninitialised data (bss) */
-	int		exec_entry;	/* Address to start executing */
-	int		exec_flags;	/* loader flags */
-	int		exec_bfill;	/* initialisation value for bss */
-};
-
-/* Oh, the things people do to avoid casts.  Shame it'll break with gcc's
- * new aliasing rules really.
- */
-union name_pt {
-	char *		n_name;
-	unsigned int	n_strx;
-};
-
-/* The Space Dictionary */
-struct space_dictionary_record {
-	union name_pt	name;			/* index to subspace name */
-	unsigned int	is_loadable	:1;	/* loadable */
-	unsigned int	is_defined	:1;	/* defined within file */
-	unsigned int	is_private	:1;	/* not sharable */
-	unsigned int	has_intermediate_code :1; /* contains intermediate code */
-	unsigned int	is_tspecific	:1;	/* thread specific */
-	unsigned int	reserved	:11;	/* for future expansion */
-	unsigned int	sort_key	:8;	/* for linker */
-	unsigned int	reserved2	:8;	/* for future expansion */
-
-	int		space_number;		/* index */
-	int		subspace_index;		/* index into subspace dict */
-	unsigned int	subspace_quantity;	/* number of subspaces */
-	int		loader_fix_index;	/* for loader */
-	unsigned int	loader_fix_quantity;	/* for loader */
-	int		init_pointer_index;	/* data pointer array index */
-	unsigned int	init_pointer_quantity;	/* number of data pointers */
-};
-
-/* The Subspace Dictionary */
-struct subspace_dictionary_record {
-	int		space_index;
-	unsigned int	access_control_bits :7;
-	unsigned int	memory_resident	:1;
-	unsigned int	dup_common	:1;
-	unsigned int	is_common	:1;
-	unsigned int	quadrant	:2;
-	unsigned int	initially_frozen :1;
-	unsigned int	is_first	:1;
-	unsigned int	code_only	:1;
-	unsigned int	sort_key	:8;
-	unsigned int	replicate_init	:1;
-	unsigned int	continuation	:1;
-	unsigned int	is_tspecific	:1;
-	unsigned int	is_comdat	:1;
-	unsigned int	reserved	:4;
-
-	int		file_loc_init_value;
-	unsigned int	initialization_length;
-	unsigned int	subspace_start;
-	unsigned int	subspace_length;
-
-	unsigned int	reserved2	:5;
-	unsigned int	alignment	:27;
-
-	union name_pt	name;
-	int		fixup_request_index;
-	unsigned int	fixup_request_quantity;
-};
-
-#endif /* _LINUX_SOM_H */
-- 
cgit v1.2.3-71-gd317


From 9dc5c05f45ca8101025046cda7f8aca8835204f2 Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoff@infradead.org>
Date: Tue, 17 Feb 2015 13:45:50 -0800
Subject: kexec: Fix make headers_check

Remove the unneded declaration for a kexec_load() routine.

Fixes errors like these when running 'make headers_check':

include/uapi/linux/kexec.h: userspace cannot reference function or variable defined in the kernel

Paul said:

: The kexec_load declaration isn't very useful for userspace, see the patch
: I submitted in http://lkml.kernel.org/r/1389791824.17407.9.camel@x220 .
: And After my attempt the export of that declaration has also been
: discussed in
: http://lkml.kernel.org/r/115373b6ac68ee7a305975896e1c4971e8e51d4c.1408731991.git.geoff@infradead.org
:
: In that last discussion no one has been able to point to an actual user of
: it.  So, as far as I can tell, no one actually uses it.  Which makes
: sense, because including this header by itself doesn't give one access to
: a useful definition of kexec_load.  So why bother with the declaration?

Signed-off-by: Geoff Levand <geoff@infradead.org>
Acked-by: Paul Bolle <pebolle@tiscali.nl>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Maximilian Attems <max@stro.at>
Cc: Michal Marek <mmarek@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/kexec.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 6925f5b42f89..99048e501b88 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -55,12 +55,6 @@ struct kexec_segment {
 	size_t memsz;
 };
 
-/* Load a new kernel image as described by the kexec_segment array
- * consisting of passed number of segments at the entry-point address.
- * The flags allow different useage types.
- */
-extern int kexec_load(void *, size_t, struct kexec_segment *,
-		unsigned long int);
 #endif /* __KERNEL__ */
 
 #endif /* _UAPILINUX_KEXEC_H */
-- 
cgit v1.2.3-71-gd317


From e1e5e5641e6f271321aec257ed26a72715e4a8c2 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 19 Feb 2015 13:39:03 -0700
Subject: NVMe: Metadata format support

Adds support for NVMe metadata formats and exposes block devices for
all namespaces regardless of their format. Namespace formats that are
unusable will have disk capacity set to 0, but a handle to the block
device is created to simplify device management. A namespace is not
usable when the format requires host interleave block and metadata in
single buffer, has no provisioned storage, or has better data but failed
to register with blk integrity.

The namespace has to be scanned in two phases to support separate
metadata formats. The first establishes the sector size and capacity
prior to invoking add_disk. If metadata is required, the capacity will
be temporarilly set to 0 until it can be revalidated and registered with
the integrity extenstions after add_disk completes.

The driver relies on the integrity extensions to provide the metadata
buffer. NVMe requires this be a single physically contiguous region,
so only one integrity segment is allowed per command. If the metadata
is used for T10 PI, the driver provides mappings to save and restore
the reftag physical block translation. The driver provides no-op
functions for generate and verify if metadata is not used for protection
information. This way the setup is always provided by the block layer.

If a request does not supply a required metadata buffer, the command
is failed with bad address. This could only happen if a user manually
disables verify/generate on such a disk. The only exception to where
this is okay is if the controller is capable of stripping/generating
the metadata, which is possible on some types of formats.

The metadata scatter gather list now occupies the spot in the nvme_iod
that used to be used to link retryable IOD's, but we don't do that
anymore, so the field was unused.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 drivers/block/nvme-core.c | 298 +++++++++++++++++++++++++++++++++++-----------
 include/linux/nvme.h      |   2 +
 include/uapi/linux/nvme.h |  16 +++
 3 files changed, 249 insertions(+), 67 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index cbdfbbf98392..3ffa57a932ea 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -37,6 +37,7 @@
 #include <linux/ptrace.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/t10-pi.h>
 #include <linux/types.h>
 #include <scsi/sg.h>
 #include <asm-generic/io-64-nonatomic-lo-hi.h>
@@ -482,6 +483,62 @@ static int nvme_error_status(u16 status)
 	}
 }
 
+static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
+{
+	if (be32_to_cpu(pi->ref_tag) == v)
+		pi->ref_tag = cpu_to_be32(p);
+}
+
+static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
+{
+	if (be32_to_cpu(pi->ref_tag) == p)
+		pi->ref_tag = cpu_to_be32(v);
+}
+
+/**
+ * nvme_dif_remap - remaps ref tags to bip seed and physical lba
+ *
+ * The virtual start sector is the one that was originally submitted by the
+ * block layer.	Due to partitioning, MD/DM cloning, etc. the actual physical
+ * start sector may be different. Remap protection information to match the
+ * physical LBA on writes, and back to the original seed on reads.
+ *
+ * Type 0 and 3 do not have a ref tag, so no remapping required.
+ */
+static void nvme_dif_remap(struct request *req,
+			void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
+{
+	struct nvme_ns *ns = req->rq_disk->private_data;
+	struct bio_integrity_payload *bip;
+	struct t10_pi_tuple *pi;
+	void *p, *pmap;
+	u32 i, nlb, ts, phys, virt;
+
+	if (!ns->pi_type || ns->pi_type == NVME_NS_DPS_PI_TYPE3)
+		return;
+
+	bip = bio_integrity(req->bio);
+	if (!bip)
+		return;
+
+	pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset;
+	if (!pmap)
+		return;
+
+	p = pmap;
+	virt = bip_get_seed(bip);
+	phys = nvme_block_nr(ns, blk_rq_pos(req));
+	nlb = (blk_rq_bytes(req) >> ns->lba_shift);
+	ts = ns->disk->integrity->tuple_size;
+
+	for (i = 0; i < nlb; i++, virt++, phys++) {
+		pi = (struct t10_pi_tuple *)p;
+		dif_swap(phys, virt, pi);
+		p += ts;
+	}
+	kunmap_atomic(pmap);
+}
+
 static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
 {
@@ -512,9 +569,16 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 			"completing aborted command with status:%04x\n",
 			status);
 
-	if (iod->nents)
+	if (iod->nents) {
 		dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents,
 			rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+		if (blk_integrity_rq(req)) {
+			if (!rq_data_dir(req))
+				nvme_dif_remap(req, nvme_dif_complete);
+			dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->meta_sg, 1,
+				rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+		}
+	}
 	nvme_free_iod(nvmeq->dev, iod);
 
 	blk_mq_complete_request(req);
@@ -670,6 +734,24 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
 	cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
 	cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
 	cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+
+	if (blk_integrity_rq(req)) {
+		cmnd->rw.metadata = cpu_to_le64(sg_dma_address(iod->meta_sg));
+		switch (ns->pi_type) {
+		case NVME_NS_DPS_PI_TYPE3:
+			control |= NVME_RW_PRINFO_PRCHK_GUARD;
+			break;
+		case NVME_NS_DPS_PI_TYPE1:
+		case NVME_NS_DPS_PI_TYPE2:
+			control |= NVME_RW_PRINFO_PRCHK_GUARD |
+					NVME_RW_PRINFO_PRCHK_REF;
+			cmnd->rw.reftag = cpu_to_le32(
+					nvme_block_nr(ns, blk_rq_pos(req)));
+			break;
+		}
+	} else if (ns->ms)
+		control |= NVME_RW_PRINFO_PRACT;
+
 	cmnd->rw.control = cpu_to_le16(control);
 	cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
 
@@ -690,6 +772,19 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_iod *iod;
 	enum dma_data_direction dma_dir;
 
+	/*
+	 * If formated with metadata, require the block layer provide a buffer
+	 * unless this namespace is formated such that the metadata can be
+	 * stripped/generated by the controller with PRACT=1.
+	 */
+	if (ns->ms && !blk_integrity_rq(req)) {
+		if (!(ns->pi_type && ns->ms == 8)) {
+			req->errors = -EFAULT;
+			blk_mq_complete_request(req);
+			return BLK_MQ_RQ_QUEUE_OK;
+		}
+	}
+
 	iod = nvme_alloc_iod(req, ns->dev, GFP_ATOMIC);
 	if (!iod)
 		return BLK_MQ_RQ_QUEUE_BUSY;
@@ -725,6 +820,21 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 					iod->nents, dma_dir);
 			goto retry_cmd;
 		}
+		if (blk_integrity_rq(req)) {
+			if (blk_rq_count_integrity_sg(req->q, req->bio) != 1)
+				goto error_cmd;
+
+			sg_init_table(iod->meta_sg, 1);
+			if (blk_rq_map_integrity_sg(
+					req->q, req->bio, iod->meta_sg) != 1)
+				goto error_cmd;
+
+			if (rq_data_dir(req))
+				nvme_dif_remap(req, nvme_dif_prep);
+
+			if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir))
+				goto error_cmd;
+		}
 	}
 
 	nvme_set_info(cmd, iod, req_completion);
@@ -1875,13 +1985,61 @@ static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo)
 	return 0;
 }
 
+static void nvme_config_discard(struct nvme_ns *ns)
+{
+	u32 logical_block_size = queue_logical_block_size(ns->queue);
+	ns->queue->limits.discard_zeroes_data = 0;
+	ns->queue->limits.discard_alignment = logical_block_size;
+	ns->queue->limits.discard_granularity = logical_block_size;
+	ns->queue->limits.max_discard_sectors = 0xffffffff;
+	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
+}
+
+static int nvme_noop_verify(struct blk_integrity_iter *iter)
+{
+	return 0;
+}
+
+static int nvme_noop_generate(struct blk_integrity_iter *iter)
+{
+	return 0;
+}
+
+struct blk_integrity nvme_meta_noop = {
+	.name			= "NVME_META_NOOP",
+	.generate_fn		= nvme_noop_generate,
+	.verify_fn		= nvme_noop_verify,
+};
+
+static void nvme_init_integrity(struct nvme_ns *ns)
+{
+	struct blk_integrity integrity;
+
+	switch (ns->pi_type) {
+	case NVME_NS_DPS_PI_TYPE3:
+		integrity = t10_pi_type3_crc;
+		break;
+	case NVME_NS_DPS_PI_TYPE1:
+	case NVME_NS_DPS_PI_TYPE2:
+		integrity = t10_pi_type1_crc;
+		break;
+	default:
+		integrity = nvme_meta_noop;
+		break;
+	}
+	integrity.tuple_size = ns->ms;
+	blk_integrity_register(ns->disk, &integrity);
+	blk_queue_max_integrity_segments(ns->queue, 1);
+}
+
 static int nvme_revalidate_disk(struct gendisk *disk)
 {
 	struct nvme_ns *ns = disk->private_data;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ns *id;
 	dma_addr_t dma_addr;
-	int lbaf;
+	int lbaf, pi_type, old_ms;
+	unsigned short bs;
 
 	id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
 								GFP_KERNEL);
@@ -1890,16 +2048,50 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 								__func__);
 		return 0;
 	}
+	if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) {
+		dev_warn(&dev->pci_dev->dev,
+			"identify failed ns:%d, setting capacity to 0\n",
+			ns->ns_id);
+		memset(id, 0, sizeof(*id));
+	}
 
-	if (nvme_identify(dev, ns->ns_id, 0, dma_addr))
-		goto free;
-
-	lbaf = id->flbas & 0xf;
+	old_ms = ns->ms;
+	lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
 	ns->lba_shift = id->lbaf[lbaf].ds;
+	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+
+	/*
+	 * If identify namespace failed, use default 512 byte block size so
+	 * block layer can use before failing read/write for 0 capacity.
+	 */
+	if (ns->lba_shift == 0)
+		ns->lba_shift = 9;
+	bs = 1 << ns->lba_shift;
+
+	/* XXX: PI implementation requires metadata equal t10 pi tuple size */
+	pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
+					id->dps & NVME_NS_DPS_PI_MASK : 0;
+
+	if (disk->integrity && (ns->pi_type != pi_type || ns->ms != old_ms ||
+				bs != queue_logical_block_size(disk->queue) ||
+				(ns->ms && id->flbas & NVME_NS_FLBAS_META_EXT)))
+		blk_integrity_unregister(disk);
+
+	ns->pi_type = pi_type;
+	blk_queue_logical_block_size(ns->queue, bs);
+
+	if (ns->ms && !disk->integrity && (disk->flags & GENHD_FL_UP) &&
+				!(id->flbas & NVME_NS_FLBAS_META_EXT))
+		nvme_init_integrity(ns);
+
+	if (id->ncap == 0 || (ns->ms && !disk->integrity))
+		set_capacity(disk, 0);
+	else
+		set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
+
+	if (dev->oncs & NVME_CTRL_ONCS_DSM)
+		nvme_config_discard(ns);
 
-	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
-	set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
- free:
 	dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
 	return 0;
 }
@@ -1956,30 +2148,16 @@ static int nvme_kthread(void *data)
 	return 0;
 }
 
-static void nvme_config_discard(struct nvme_ns *ns)
-{
-	u32 logical_block_size = queue_logical_block_size(ns->queue);
-	ns->queue->limits.discard_zeroes_data = 0;
-	ns->queue->limits.discard_alignment = logical_block_size;
-	ns->queue->limits.discard_granularity = logical_block_size;
-	ns->queue->limits.max_discard_sectors = 0xffffffff;
-	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
-}
-
-static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
-			struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
+static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 {
 	struct nvme_ns *ns;
 	struct gendisk *disk;
 	int node = dev_to_node(&dev->pci_dev->dev);
-	int lbaf;
-
-	if (rt->attributes & NVME_LBART_ATTRIB_HIDE)
-		return NULL;
 
 	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
 	if (!ns)
-		return NULL;
+		return;
+
 	ns->queue = blk_mq_init_queue(&dev->tagset);
 	if (IS_ERR(ns->queue))
 		goto out_free_ns;
@@ -1995,9 +2173,9 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
 
 	ns->ns_id = nsid;
 	ns->disk = disk;
-	lbaf = id->flbas & 0xf;
-	ns->lba_shift = id->lbaf[lbaf].ds;
-	ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+	ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
+	list_add_tail(&ns->list, &dev->namespaces);
+
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
 	if (dev->max_hw_sectors)
 		blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
@@ -2014,18 +2192,23 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
 	disk->driverfs_dev = &dev->pci_dev->dev;
 	disk->flags = GENHD_FL_EXT_DEVT;
 	sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid);
-	set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
-
-	if (dev->oncs & NVME_CTRL_ONCS_DSM)
-		nvme_config_discard(ns);
-
-	return ns;
 
+	/*
+	 * Initialize capacity to 0 until we establish the namespace format and
+	 * setup integrity extentions if necessary. The revalidate_disk after
+	 * add_disk allows the driver to register with integrity if the format
+	 * requires it.
+	 */
+	set_capacity(disk, 0);
+	nvme_revalidate_disk(ns->disk);
+	add_disk(ns->disk);
+	if (ns->ms)
+		revalidate_disk(ns->disk);
+	return;
  out_free_queue:
 	blk_cleanup_queue(ns->queue);
  out_free_ns:
 	kfree(ns);
-	return NULL;
 }
 
 static void nvme_create_io_queues(struct nvme_dev *dev)
@@ -2150,22 +2333,20 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	struct pci_dev *pdev = dev->pci_dev;
 	int res;
 	unsigned nn, i;
-	struct nvme_ns *ns;
 	struct nvme_id_ctrl *ctrl;
-	struct nvme_id_ns *id_ns;
 	void *mem;
 	dma_addr_t dma_addr;
 	int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
 
-	mem = dma_alloc_coherent(&pdev->dev, 8192, &dma_addr, GFP_KERNEL);
+	mem = dma_alloc_coherent(&pdev->dev, 4096, &dma_addr, GFP_KERNEL);
 	if (!mem)
 		return -ENOMEM;
 
 	res = nvme_identify(dev, 0, 1, dma_addr);
 	if (res) {
 		dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res);
-		res = -EIO;
-		goto out;
+		dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
+		return -EIO;
 	}
 
 	ctrl = mem;
@@ -2191,6 +2372,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 		} else
 			dev->max_hw_sectors = max_hw_sectors;
 	}
+	dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
 
 	dev->tagset.ops = &nvme_mq_ops;
 	dev->tagset.nr_hw_queues = dev->online_queues - 1;
@@ -2203,33 +2385,12 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	dev->tagset.driver_data = dev;
 
 	if (blk_mq_alloc_tag_set(&dev->tagset))
-		goto out;
-
-	id_ns = mem;
-	for (i = 1; i <= nn; i++) {
-		res = nvme_identify(dev, i, 0, dma_addr);
-		if (res)
-			continue;
-
-		if (id_ns->ncap == 0)
-			continue;
-
-		res = nvme_get_features(dev, NVME_FEAT_LBA_RANGE, i,
-							dma_addr + 4096, NULL);
-		if (res)
-			memset(mem + 4096, 0, 4096);
+		return 0;
 
-		ns = nvme_alloc_ns(dev, i, mem, mem + 4096);
-		if (ns)
-			list_add_tail(&ns->list, &dev->namespaces);
-	}
-	list_for_each_entry(ns, &dev->namespaces, list)
-		add_disk(ns->disk);
-	res = 0;
+	for (i = 1; i <= nn; i++)
+		nvme_alloc_ns(dev, i);
 
- out:
-	dma_free_coherent(&dev->pci_dev->dev, 8192, mem, dma_addr);
-	return res;
+	return 0;
 }
 
 static int nvme_dev_map(struct nvme_dev *dev)
@@ -2528,8 +2689,11 @@ static void nvme_dev_remove(struct nvme_dev *dev)
 	struct nvme_ns *ns;
 
 	list_for_each_entry(ns, &dev->namespaces, list) {
-		if (ns->disk->flags & GENHD_FL_UP)
+		if (ns->disk->flags & GENHD_FL_UP) {
+			if (ns->disk->integrity)
+				blk_integrity_unregister(ns->disk);
 			del_gendisk(ns->disk);
+		}
 		if (!blk_queue_dying(ns->queue)) {
 			blk_mq_abort_requeue_list(ns->queue);
 			blk_cleanup_queue(ns->queue);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 19a5d4b23209..cca264db2478 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -121,6 +121,7 @@ struct nvme_ns {
 	unsigned ns_id;
 	int lba_shift;
 	int ms;
+	int pi_type;
 	u64 mode_select_num_blocks;
 	u32 mode_select_block_len;
 };
@@ -138,6 +139,7 @@ struct nvme_iod {
 	int nents;		/* Used in scatterlist */
 	int length;		/* Of data, in bytes */
 	dma_addr_t first_dma;
+	struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */
 	struct scatterlist sg[0];
 };
 
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index 26386cf3db44..406bfc95652c 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -124,10 +124,22 @@ struct nvme_id_ns {
 
 enum {
 	NVME_NS_FEAT_THIN	= 1 << 0,
+	NVME_NS_FLBAS_LBA_MASK	= 0xf,
+	NVME_NS_FLBAS_META_EXT	= 0x10,
 	NVME_LBAF_RP_BEST	= 0,
 	NVME_LBAF_RP_BETTER	= 1,
 	NVME_LBAF_RP_GOOD	= 2,
 	NVME_LBAF_RP_DEGRADED	= 3,
+	NVME_NS_DPC_PI_LAST	= 1 << 4,
+	NVME_NS_DPC_PI_FIRST	= 1 << 3,
+	NVME_NS_DPC_PI_TYPE3	= 1 << 2,
+	NVME_NS_DPC_PI_TYPE2	= 1 << 1,
+	NVME_NS_DPC_PI_TYPE1	= 1 << 0,
+	NVME_NS_DPS_PI_FIRST	= 1 << 3,
+	NVME_NS_DPS_PI_MASK	= 0x7,
+	NVME_NS_DPS_PI_TYPE1	= 1,
+	NVME_NS_DPS_PI_TYPE2	= 2,
+	NVME_NS_DPS_PI_TYPE3	= 3,
 };
 
 struct nvme_smart_log {
@@ -261,6 +273,10 @@ enum {
 	NVME_RW_DSM_LATENCY_LOW		= 3 << 4,
 	NVME_RW_DSM_SEQ_REQ		= 1 << 6,
 	NVME_RW_DSM_COMPRESSED		= 1 << 7,
+	NVME_RW_PRINFO_PRCHK_REF	= 1 << 10,
+	NVME_RW_PRINFO_PRCHK_APP	= 1 << 11,
+	NVME_RW_PRINFO_PRCHK_GUARD	= 1 << 12,
+	NVME_RW_PRINFO_PRACT		= 1 << 13,
 };
 
 struct nvme_dsm_cmd {
-- 
cgit v1.2.3-71-gd317


From 4f1982b4e262c45475a91b4253e9bc7f7c991c13 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 19 Feb 2015 13:42:14 -0700
Subject: NVMe: Update SCSI Inquiry VPD 83h translation

The original translation created collisions on Inquiry VPD 83 for many
existing devices. Newer specifications provide other ways to translate
based on the device's version can be used to create unique identifiers.

Version 1.1 provides an EUI64 field that uniquely identifies each
namespace, and 1.2 added the longer NGUID field for the same reason.
Both follow the IEEE EUI format and readily translate to the SCSI device
identification EUI designator type 2h. For devices implementing either,
the translation will use this type, defaulting to the EUI64 8-byte type if
implemented then NGUID's 16 byte version if not. If neither are provided,
the 1.0 translation is used, and is updated to use the SCSI String format
to guarantee a unique identifier.

Knowing when to use the new fields depends on the nvme controller's
revision. The NVME_VS macro was not decoding this correctly, so that is
fixed in this patch and moved to a more appropriate place.

Since the Identify Namespace structure required an update for the NGUID
field, this patch adds the remaining new 1.2 fields to the structure.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 drivers/block/nvme-scsi.c | 94 ++++++++++++++++++++++++++---------------------
 include/linux/nvme.h      |  2 -
 include/uapi/linux/nvme.h | 10 ++++-
 3 files changed, 62 insertions(+), 44 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 5e78568026c3..5cc907648742 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -779,10 +779,8 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	struct nvme_dev *dev = ns->dev;
 	dma_addr_t dma_addr;
 	void *mem;
-	struct nvme_id_ctrl *id_ctrl;
 	int res = SNTI_TRANSLATION_SUCCESS;
 	int nvme_sc;
-	u8 ieee[4];
 	int xfer_len;
 	__be32 tmp_id = cpu_to_be32(ns->ns_id);
 
@@ -793,46 +791,60 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		goto out_dma;
 	}
 
-	/* nvme controller identify */
-	nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		goto out_free;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_free;
-	}
-	id_ctrl = mem;
-
-	/* Since SCSI tried to save 4 bits... [SPC-4(r34) Table 591] */
-	ieee[0] = id_ctrl->ieee[0] << 4;
-	ieee[1] = id_ctrl->ieee[0] >> 4 | id_ctrl->ieee[1] << 4;
-	ieee[2] = id_ctrl->ieee[1] >> 4 | id_ctrl->ieee[2] << 4;
-	ieee[3] = id_ctrl->ieee[2] >> 4;
-
-	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
+	memset(inq_response, 0, alloc_len);
 	inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;    /* Page Code */
-	inq_response[3] = 20;      /* Page Length */
-	/* Designation Descriptor start */
-	inq_response[4] = 0x01;    /* Proto ID=0h | Code set=1h */
-	inq_response[5] = 0x03;    /* PIV=0b | Asso=00b | Designator Type=3h */
-	inq_response[6] = 0x00;    /* Rsvd */
-	inq_response[7] = 16;      /* Designator Length */
-	/* Designator start */
-	inq_response[8] = 0x60 | ieee[3]; /* NAA=6h | IEEE ID MSB, High nibble*/
-	inq_response[9] = ieee[2];        /* IEEE ID */
-	inq_response[10] = ieee[1];       /* IEEE ID */
-	inq_response[11] = ieee[0];       /* IEEE ID| Vendor Specific ID... */
-	inq_response[12] = (dev->pci_dev->vendor & 0xFF00) >> 8;
-	inq_response[13] = (dev->pci_dev->vendor & 0x00FF);
-	inq_response[14] = dev->serial[0];
-	inq_response[15] = dev->serial[1];
-	inq_response[16] = dev->model[0];
-	inq_response[17] = dev->model[1];
-	memcpy(&inq_response[18], &tmp_id, sizeof(u32));
-	/* Last 2 bytes are zero */
+	if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) {
+		struct nvme_id_ns *id_ns = mem;
+		void *eui = id_ns->eui64;
+		int len = sizeof(id_ns->eui64);
 
-	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
+		nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+		res = nvme_trans_status_code(hdr, nvme_sc);
+		if (res)
+			goto out_free;
+		if (nvme_sc) {
+			res = nvme_sc;
+			goto out_free;
+		}
+
+		if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) {
+			if (bitmap_empty(eui, len * 8)) {
+				eui = id_ns->nguid;
+				len = sizeof(id_ns->nguid);
+			}
+		}
+		if (bitmap_empty(eui, len * 8))
+			goto scsi_string;
+
+		inq_response[3] = 4 + len; /* Page Length */
+		/* Designation Descriptor start */
+		inq_response[4] = 0x01;    /* Proto ID=0h | Code set=1h */
+		inq_response[5] = 0x02;    /* PIV=0b | Asso=00b | Designator Type=2h */
+		inq_response[6] = 0x00;    /* Rsvd */
+		inq_response[7] = len;     /* Designator Length */
+		memcpy(&inq_response[8], eui, len);
+	} else {
+ scsi_string:
+		if (alloc_len < 72) {
+			res = nvme_trans_completion(hdr,
+					SAM_STAT_CHECK_CONDITION,
+					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
+					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+			goto out_free;
+		}
+		inq_response[3] = 0x48;    /* Page Length */
+		/* Designation Descriptor start */
+		inq_response[4] = 0x03;    /* Proto ID=0h | Code set=3h */
+		inq_response[5] = 0x08;    /* PIV=0b | Asso=00b | Designator Type=8h */
+		inq_response[6] = 0x00;    /* Rsvd */
+		inq_response[7] = 0x44;    /* Designator Length */
+
+		sprintf(&inq_response[8], "%04x", dev->pci_dev->vendor);
+		memcpy(&inq_response[12], dev->model, sizeof(dev->model));
+		sprintf(&inq_response[52], "%04x", tmp_id);
+		memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
+	}
+	xfer_len = alloc_len;
 	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 
  out_free:
@@ -2222,7 +2234,7 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	page_code = GET_INQ_PAGE_CODE(cmd);
 	alloc_len = GET_INQ_ALLOC_LENGTH(cmd);
 
-	inq_response = kmalloc(STANDARD_INQUIRY_LENGTH, GFP_KERNEL);
+	inq_response = kmalloc(alloc_len, GFP_KERNEL);
 	if (inq_response == NULL) {
 		res = -ENOMEM;
 		goto out_mem;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index cca264db2478..1f062a9e521d 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -62,8 +62,6 @@ enum {
 	NVME_CSTS_SHST_MASK	= 3 << 2,
 };
 
-#define NVME_VS(major, minor)	(major << 16 | minor)
-
 extern unsigned char nvme_io_timeout;
 #define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)
 
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index 406bfc95652c..aef9a81b2d75 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -115,7 +115,13 @@ struct nvme_id_ns {
 	__le16			nawun;
 	__le16			nawupf;
 	__le16			nacwu;
-	__u8			rsvd40[80];
+	__le16			nabsn;
+	__le16			nabo;
+	__le16			nabspf;
+	__u16			rsvd46;
+	__le64			nvmcap[2];
+	__u8			rsvd64[40];
+	__u8			nguid[16];
 	__u8			eui64[8];
 	struct nvme_lbaf	lbaf[16];
 	__u8			rsvd192[192];
@@ -565,6 +571,8 @@ struct nvme_passthru_cmd {
 	__u32	result;
 };
 
+#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8))
+
 #define nvme_admin_cmd nvme_passthru_cmd
 
 #define NVME_IOCTL_ID		_IO('N', 0x40)
-- 
cgit v1.2.3-71-gd317


From 30ff54765976e132674e3eae2071ed8ed494665c Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Mon, 23 Feb 2015 08:17:12 -0500
Subject: net: sched: export tc_connmark.h so it is uapi accessible

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tc_act/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild
index 19d5219b0b99..242cf0c6e33d 100644
--- a/include/uapi/linux/tc_act/Kbuild
+++ b/include/uapi/linux/tc_act/Kbuild
@@ -9,3 +9,4 @@ header-y += tc_pedit.h
 header-y += tc_skbedit.h
 header-y += tc_vlan.h
 header-y += tc_bpf.h
+header-y += tc_connmark.h
-- 
cgit v1.2.3-71-gd317


From 647f162b8e7e446c4bade031eb8a1a0a83d3de82 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 1 Mar 2015 10:24:28 -0500
Subject: serial: uapi: Declare all userspace-visible io types

ioctl(TIOCGSERIAL|TIOCSSERIAL) report and can change the port->iotype.
UART drivers use the UPIO_* definitions, but the uapi header defines
parallel values and userspace uses these parallel values for ioctls;
thus the userspace values are definitive.

Define UPIO_* iotypes in terms of the uapi defines, SERIAL_IO_*;
extend the uapi defines to include all values in use by the serial
core.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 14 +++++++-------
 include/uapi/linux/serial.h |  4 ++++
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 1094f2d9cadb..d10965f0d8a4 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -143,13 +143,13 @@ struct uart_port {
 	unsigned char		iotype;			/* io access style */
 	unsigned char		unused1;
 
-#define UPIO_PORT		(0)			/* 8b I/O port access */
-#define UPIO_HUB6		(1)			/* Hub6 ISA card */
-#define UPIO_MEM		(2)			/* 8b MMIO access */
-#define UPIO_MEM32		(3)			/* 32b little endian */
-#define UPIO_AU			(4)			/* Au1x00 and RT288x type IO */
-#define UPIO_TSI		(5)			/* Tsi108/109 type IO */
-#define UPIO_MEM32BE		(6)			/* 32b big endian */
+#define UPIO_PORT		(SERIAL_IO_PORT)	/* 8b I/O port access */
+#define UPIO_HUB6		(SERIAL_IO_HUB6)	/* Hub6 ISA card */
+#define UPIO_MEM		(SERIAL_IO_MEM)		/* 8b MMIO access */
+#define UPIO_MEM32		(SERIAL_IO_MEM32)	/* 32b little endian */
+#define UPIO_AU			(SERIAL_IO_AU)		/* Au1x00 and RT288x type IO */
+#define UPIO_TSI		(SERIAL_IO_TSI)		/* Tsi108/109 type IO */
+#define UPIO_MEM32BE		(SERIAL_IO_MEM32BE)	/* 32b big endian */
 
 	unsigned int		read_status_mask;	/* driver specific */
 	unsigned int		ignore_status_mask;	/* driver specific */
diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h
index 5e0d0ed61cf3..25331f9faa76 100644
--- a/include/uapi/linux/serial.h
+++ b/include/uapi/linux/serial.h
@@ -65,6 +65,10 @@ struct serial_struct {
 #define SERIAL_IO_PORT	0
 #define SERIAL_IO_HUB6	1
 #define SERIAL_IO_MEM	2
+#define SERIAL_IO_MEM32	  3
+#define SERIAL_IO_AU	  4
+#define SERIAL_IO_TSI	  5
+#define SERIAL_IO_MEM32BE 6
 
 #define UART_CLEAR_FIFO		0x01
 #define UART_USE_FIFO		0x02
-- 
cgit v1.2.3-71-gd317


From 7e41a9def062167b5405711a42c9ecfd163e31a9 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 6 Mar 2015 12:50:03 +1030
Subject: virtio_blk: typo fix

Now that QEmu reuses linux virtio headers, we noticed
a typo in the exported virtio block header. Fix it up.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/uapi/linux/virtio_blk.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 3c53eec4ae22..b695ba959186 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -60,7 +60,7 @@ struct virtio_blk_config {
 	__u32 size_max;
 	/* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */
 	__u32 seg_max;
-	/* geometry the device (if VIRTIO_BLK_F_GEOMETRY) */
+	/* geometry of the device (if VIRTIO_BLK_F_GEOMETRY) */
 	struct virtio_blk_geometry {
 		__u16 cylinders;
 		__u8 heads;
-- 
cgit v1.2.3-71-gd317


From 0fa2a56437d0b7ef5d86eef2778ad3469ca72d5a Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 6 Mar 2015 12:50:03 +1030
Subject: virtio_blk: fix comment for virtio 1.0

Fix up comment to match virtio 1.0 logic:
virtio_blk_outhdr isn't the first elements anymore,
the only requirement is that it comes first in
the s/g list.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/uapi/linux/virtio_blk.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index b695ba959186..19c66fcbab8a 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -119,7 +119,11 @@ struct virtio_blk_config {
 #define VIRTIO_BLK_T_BARRIER	0x80000000
 #endif /* !VIRTIO_BLK_NO_LEGACY */
 
-/* This is the first element of the read scatter-gather list. */
+/*
+ * This comes first in the read scatter-gather list.
+ * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated,
+ * this is the first element of the read scatter-gather list.
+ */
 struct virtio_blk_outhdr {
 	/* VIRTIO_BLK_T* */
 	__virtio32 type;
-- 
cgit v1.2.3-71-gd317


From a4994b810d52ccb26de922c8d231fe05d14610d4 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 13 Mar 2015 11:59:11 +1030
Subject: uapi/virtio_scsi: allow overriding CDB/SENSE size

QEMU wants to use virtio scsi structures with
a different VIRTIO_SCSI_CDB_SIZE/VIRTIO_SCSI_SENSE_SIZE,
let's add ifdefs to allow overriding them.

Keep the old defines under new names:
VIRTIO_SCSI_CDB_DEFAULT_SIZE/VIRTIO_SCSI_SENSE_DEFAULT_SIZE,
since that's what these values really are:
defaults for cdb/sense size fields.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/uapi/linux/virtio_scsi.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_scsi.h b/include/uapi/linux/virtio_scsi.h
index 42b9370771b0..cc18ef8825c0 100644
--- a/include/uapi/linux/virtio_scsi.h
+++ b/include/uapi/linux/virtio_scsi.h
@@ -29,8 +29,16 @@
 
 #include <linux/virtio_types.h>
 
-#define VIRTIO_SCSI_CDB_SIZE   32
-#define VIRTIO_SCSI_SENSE_SIZE 96
+/* Default values of the CDB and sense data size configuration fields */
+#define VIRTIO_SCSI_CDB_DEFAULT_SIZE   32
+#define VIRTIO_SCSI_SENSE_DEFAULT_SIZE 96
+
+#ifndef VIRTIO_SCSI_CDB_SIZE
+#define VIRTIO_SCSI_CDB_SIZE VIRTIO_SCSI_CDB_DEFAULT_SIZE
+#endif
+#ifndef VIRTIO_SCSI_SENSE_SIZE
+#define VIRTIO_SCSI_SENSE_SIZE VIRTIO_SCSI_SENSE_DEFAULT_SIZE
+#endif
 
 /* SCSI command request, followed by data-out */
 struct virtio_scsi_cmd_req {
-- 
cgit v1.2.3-71-gd317