From 02d62e86fe892c59a1259d089d4d16ac76977a37 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 06:30:52 -0800 Subject: net: un-inline sk_busy_loop() There is really little gain from inlining this big function. We'll soon make it even bigger in following patches. This means we no longer need to export napi_by_id() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/busy_poll.h | 45 +-------------------------------------------- 1 file changed, 1 insertion(+), 44 deletions(-) (limited to 'include/net') diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 1d67fb6b23a0..2fbeb1313c0f 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -72,50 +72,7 @@ static inline bool busy_loop_timeout(unsigned long end_time) return time_after(now, end_time); } -/* when used in sock_poll() nonblock is known at compile time to be true - * so the loop and end_time will be optimized out - */ -static inline bool sk_busy_loop(struct sock *sk, int nonblock) -{ - unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0; - const struct net_device_ops *ops; - struct napi_struct *napi; - int rc = false; - - /* - * rcu read lock for napi hash - * bh so we don't race with net_rx_action - */ - rcu_read_lock_bh(); - - napi = napi_by_id(sk->sk_napi_id); - if (!napi) - goto out; - - ops = napi->dev->netdev_ops; - if (!ops->ndo_busy_poll) - goto out; - - do { - rc = ops->ndo_busy_poll(napi); - - if (rc == LL_FLUSH_FAILED) - break; /* permanent failure */ - - if (rc > 0) - /* local bh are disabled so it is ok to use _BH */ - NET_ADD_STATS_BH(sock_net(sk), - LINUX_MIB_BUSYPOLLRXPACKETS, rc); - cpu_relax(); - - } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) && - !need_resched() && !busy_loop_timeout(end_time)); - - rc = !skb_queue_empty(&sk->sk_receive_queue); -out: - rcu_read_unlock_bh(); - return rc; -} +bool sk_busy_loop(struct sock *sk, int nonblock); /* used in the NIC receive handler to mark the skb */ static inline void skb_mark_napi_id(struct sk_buff *skb, -- cgit v1.2.3-71-gd317 From f5c4a42a7549cbc29dbac2b5ede05a3bc2bb4522 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 5 Nov 2015 07:09:59 +0100 Subject: Bluetooth: Add hci_skb_* helper wrappers for bt_cb(skb) access For all the HCI driver related variables accesssed via bt_cb(skb), provide helper wrappers. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 42844d7b154a..663e0ef1eaa0 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -316,6 +316,10 @@ struct bt_skb_cb { }; #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb)) +#define hci_skb_pkt_type(skb) bt_cb((skb))->pkt_type +#define hci_skb_expect(skb) bt_cb((skb))->expect +#define hci_skb_opcode(skb) bt_cb((skb))->hci.opcode + static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how) { struct sk_buff *skb; -- cgit v1.2.3-71-gd317 From 5fcc86bd2695d4ccad2d87cb424f4c01a4e544f3 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 5 Nov 2015 09:31:39 +0200 Subject: Bluetooth: Remove redundant setting to zero of bt_cb The socket allocation functions will always memset skb->cb to zero so there's no need to make other initializations needing the same thing. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 663e0ef1eaa0..a85e6d3d75ef 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -325,10 +325,8 @@ static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how) struct sk_buff *skb; skb = alloc_skb(len + BT_SKB_RESERVE, how); - if (skb) { + if (skb) skb_reserve(skb, BT_SKB_RESERVE); - bt_cb(skb)->incoming = 0; - } return skb; } @@ -338,10 +336,8 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, struct sk_buff *skb; skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err); - if (skb) { + if (skb) skb_reserve(skb, BT_SKB_RESERVE); - bt_cb(skb)->incoming = 0; - } if (!skb && *err) return NULL; -- cgit v1.2.3-71-gd317 From 44d271377479c4d4fe7f2d07d188656684773fbd Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 5 Nov 2015 09:31:40 +0200 Subject: Bluetooth: Compress the size of struct hci_ctrl We can reduce the size of the hci_ctrl struct by converting 'bool req_start' to 'u8 req_flags' and making the two function pointers a union (since only one is ever set at a time). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/bluetooth.h | 11 ++++++++--- net/bluetooth/hci_core.c | 14 +++++++------- net/bluetooth/hci_request.c | 10 +++++++--- net/bluetooth/hci_sock.c | 2 +- 4 files changed, 23 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index a85e6d3d75ef..8d38f411009c 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -296,12 +296,17 @@ typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode); typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb); +#define HCI_REQ_START BIT(0) +#define HCI_REQ_SKB BIT(1) + struct hci_ctrl { __u16 opcode; - bool req_start; + u8 req_flags; u8 req_event; - hci_req_complete_t req_complete; - hci_req_complete_skb_t req_complete_skb; + union { + hci_req_complete_t req_complete; + hci_req_complete_skb_t req_complete_skb; + }; }; struct bt_skb_cb { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index db26cbd1cd9d..bc97fc6de876 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3695,7 +3695,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, /* Stand-alone HCI commands must be flagged as * single-command requests. */ - bt_cb(skb)->hci.req_start = true; + bt_cb(skb)->hci.req_flags |= HCI_REQ_START; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); @@ -4392,7 +4392,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev) if (!skb) return true; - return bt_cb(skb)->hci.req_start; + return (bt_cb(skb)->hci.req_flags & HCI_REQ_START); } static void hci_resend_last(struct hci_dev *hdev) @@ -4452,20 +4452,20 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, * callback would be found in hdev->sent_cmd instead of the * command queue (hdev->cmd_q). */ - if (bt_cb(hdev->sent_cmd)->hci.req_complete) { - *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete; + if (bt_cb(hdev->sent_cmd)->hci.req_flags & HCI_REQ_SKB) { + *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb; return; } - if (bt_cb(hdev->sent_cmd)->hci.req_complete_skb) { - *req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb; + if (bt_cb(hdev->sent_cmd)->hci.req_complete) { + *req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete; return; } /* Remove all pending commands belonging to this request */ spin_lock_irqsave(&hdev->cmd_q.lock, flags); while ((skb = __skb_dequeue(&hdev->cmd_q))) { - if (bt_cb(skb)->hci.req_start) { + if (bt_cb(skb)->hci.req_flags & HCI_REQ_START) { __skb_queue_head(&hdev->cmd_q, skb); break; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index bdb170995966..5ba27c30e8f2 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -56,8 +56,12 @@ static int req_run(struct hci_request *req, hci_req_complete_t complete, return -ENODATA; skb = skb_peek_tail(&req->cmd_q); - bt_cb(skb)->hci.req_complete = complete; - bt_cb(skb)->hci.req_complete_skb = complete_skb; + if (complete) { + bt_cb(skb)->hci.req_complete = complete; + } else if (complete_skb) { + bt_cb(skb)->hci.req_complete_skb = complete_skb; + bt_cb(skb)->hci.req_flags |= HCI_REQ_SKB; + } spin_lock_irqsave(&hdev->cmd_q.lock, flags); skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); @@ -128,7 +132,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, } if (skb_queue_empty(&req->cmd_q)) - bt_cb(skb)->hci.req_start = true; + bt_cb(skb)->hci.req_flags |= HCI_REQ_START; bt_cb(skb)->hci.req_event = event; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 235ad0fa3571..19b23013c4f6 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1249,7 +1249,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, /* Stand-alone HCI commands must be flagged as * single-command requests. */ - bt_cb(skb)->hci.req_start = true; + bt_cb(skb)->hci.req_flags |= HCI_REQ_START; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); -- cgit v1.2.3-71-gd317 From dd31506d4aece48943802c2bca3f1f7d2e7266b4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 8 Nov 2015 07:47:12 +0100 Subject: Bluetooth: Add support for sending system notes to monitor channel The monitor channel can be used to send generic system notes as text strings for debugging purposes. This adds the system note monitor code and uses it for including kernel and subsystem version into traces. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/bluetooth.h | 2 ++ include/net/bluetooth/hci_mon.h | 1 + net/bluetooth/af_bluetooth.c | 8 +++----- net/bluetooth/hci_sock.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 35 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 8d38f411009c..bfd1590821d6 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -29,6 +29,8 @@ #include #include +#define BT_SUBSYS_VERSION "2.21" + #ifndef AF_BLUETOOTH #define AF_BLUETOOTH 31 #define PF_BLUETOOTH AF_BLUETOOTH diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 2b67567cf28d..c91bb23eb29e 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -43,6 +43,7 @@ struct hci_mon_hdr { #define HCI_MON_CLOSE_INDEX 9 #define HCI_MON_INDEX_INFO 10 #define HCI_MON_VENDOR_DIAG 11 +#define HCI_MON_SYSTEM_NOTE 12 struct hci_mon_new_index { __u8 type; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index a3bffd1ec2b4..34c53d5862f6 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -33,8 +33,6 @@ #include "selftest.h" -#define VERSION "2.21" - /* Bluetooth sockets */ #define BT_MAX_PROTO 8 static const struct net_proto_family *bt_proto[BT_MAX_PROTO]; @@ -715,7 +713,7 @@ static int __init bt_init(void) sock_skb_cb_check_size(sizeof(struct bt_skb_cb)); - BT_INFO("Core ver %s", VERSION); + BT_INFO("Core ver %s", BT_SUBSYS_VERSION); err = bt_selftest(); if (err < 0) @@ -789,7 +787,7 @@ subsys_initcall(bt_init); module_exit(bt_exit); MODULE_AUTHOR("Marcel Holtmann "); -MODULE_DESCRIPTION("Bluetooth Core ver " VERSION); -MODULE_VERSION(VERSION); +MODULE_DESCRIPTION("Bluetooth Core ver " BT_SUBSYS_VERSION); +MODULE_VERSION(BT_SUBSYS_VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_BLUETOOTH); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 18a41eae295c..710265c35d16 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -26,6 +26,8 @@ #include #include +#include +#include #include #include @@ -383,6 +385,29 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) return skb; } +static void send_monitor_note(struct sock *sk, const char *text) +{ + size_t len = strlen(text); + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(len + 1, GFP_ATOMIC); + if (!skb) + return; + + strcpy(skb_put(skb, len + 1), text); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_SYSTEM_NOTE); + hdr->index = cpu_to_le16(HCI_DEV_NONE); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + if (sock_queue_rcv_skb(sk, skb)) + kfree_skb(skb); +} + static void send_monitor_replay(struct sock *sk) { struct hci_dev *hdev; @@ -872,6 +897,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, */ hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + send_monitor_note(sk, "Linux version " UTS_RELEASE + " (" UTS_MACHINE ")"); + send_monitor_note(sk, "Bluetooth subsystem version " + BT_SUBSYS_VERSION); send_monitor_replay(sk); atomic_inc(&monitor_promisc); -- cgit v1.2.3-71-gd317 From ac71494934c475e3f51e5e3e64a12f57618d82a4 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 8 Nov 2015 07:47:13 +0100 Subject: Bluetooth: Add support for controller specific logging To enable controller specific logging, the userspace daemon has to have the ability to log per controller. To facilitate this support, provide a dedicated logging channel. Messages in this channel will be included in the monitor queue and with that also forwarded to monitoring tools along with the actual hardware traces. All messages from the logging channel are timestamped and with that allow an easy correlation between userspace messages and hardware events. This will increase the ability to debug problems faster. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/hci_mon.h | 1 + include/net/bluetooth/hci_sock.h | 1 + net/bluetooth/hci_sock.c | 102 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index c91bb23eb29e..587d0131b349 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -44,6 +44,7 @@ struct hci_mon_hdr { #define HCI_MON_INDEX_INFO 10 #define HCI_MON_VENDOR_DIAG 11 #define HCI_MON_SYSTEM_NOTE 12 +#define HCI_MON_USER_LOGGING 13 struct hci_mon_new_index { __u8 type; diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h index 9a46d665c1b5..8e9138acdae1 100644 --- a/include/net/bluetooth/hci_sock.h +++ b/include/net/bluetooth/hci_sock.h @@ -45,6 +45,7 @@ struct sockaddr_hci { #define HCI_CHANNEL_USER 1 #define HCI_CHANNEL_MONITOR 2 #define HCI_CHANNEL_CONTROL 3 +#define HCI_CHANNEL_LOGGING 4 struct hci_filter { unsigned long type_mask; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 710265c35d16..41f579ba447b 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -906,6 +906,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, atomic_inc(&monitor_promisc); break; + case HCI_CHANNEL_LOGGING: + if (haddr.hci_dev != HCI_DEV_NONE) { + err = -EINVAL; + goto done; + } + + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + goto done; + } + break; + default: if (!hci_mgmt_chan_find(haddr.hci_channel)) { err = -EINVAL; @@ -1033,6 +1045,9 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, if (flags & MSG_OOB) return -EOPNOTSUPP; + if (hci_pi(sk)->channel == HCI_CHANNEL_LOGGING) + return -EOPNOTSUPP; + if (sk->sk_state == BT_CLOSED) return 0; @@ -1179,6 +1194,90 @@ done: return err; } +static int hci_logging_frame(struct sock *sk, struct msghdr *msg, int len) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + struct hci_dev *hdev; + u16 index; + int err; + + /* The logging frame consists at minimum of the standard header, + * the priority byte, the ident length byte and at least one string + * terminator NUL byte. Anything shorter are invalid packets. + */ + if (len < sizeof(*hdr) + 3) + return -EINVAL; + + skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + return err; + + if (memcpy_from_msg(skb_put(skb, len), msg, len)) { + err = -EFAULT; + goto drop; + } + + hdr = (void *)skb->data; + + if (__le16_to_cpu(hdr->len) != len - sizeof(*hdr)) { + err = -EINVAL; + goto drop; + } + + if (__le16_to_cpu(hdr->opcode) == 0x0000) { + __u8 priority = skb->data[sizeof(*hdr)]; + __u8 ident_len = skb->data[sizeof(*hdr) + 1]; + + /* Only the priorities 0-7 are valid and with that any other + * value results in an invalid packet. + * + * The priority byte is followed by an ident length byte and + * the NUL terminated ident string. Check that the ident + * length is not overflowing the packet and also that the + * ident string itself is NUL terminated. In case the ident + * length is zero, the length value actually doubles as NUL + * terminator identifier. + * + * The message follows the ident string (if present) and + * must be NUL terminated. Otherwise it is not a valid packet. + */ + if (priority > 7 || skb->data[len - 1] != 0x00 || + ident_len > len - sizeof(*hdr) - 3 || + skb->data[sizeof(*hdr) + ident_len + 1] != 0x00) { + err = -EINVAL; + goto drop; + } + } else { + err = -EINVAL; + goto drop; + } + + index = __le16_to_cpu(hdr->index); + + if (index != MGMT_INDEX_NONE) { + hdev = hci_dev_get(index); + if (!hdev) { + err = -ENODEV; + goto drop; + } + } else { + hdev = NULL; + } + + hdr->opcode = cpu_to_le16(HCI_MON_USER_LOGGING); + + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, HCI_SOCK_TRUSTED, NULL); + err = len; + + if (hdev) + hci_dev_put(hdev); + +drop: + kfree_skb(skb); + return err; +} + static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { @@ -1208,6 +1307,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, case HCI_CHANNEL_MONITOR: err = -EOPNOTSUPP; goto done; + case HCI_CHANNEL_LOGGING: + err = hci_logging_frame(sk, msg, len); + goto done; default: mutex_lock(&mgmt_chan_list_lock); chan = __hci_mgmt_chan_find(hci_pi(sk)->channel); -- cgit v1.2.3-71-gd317 From 030e7f8141a262e32dc064d7cf12377d769d45c2 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 10 Nov 2015 09:44:53 +0200 Subject: Bluetooth: Remove unnecessary call to hci_update_background_scan The hci_conn_params_clear_all() function is only called from hci_unregister_dev() at which point it's completely futile to try to do any LE scanning updates. Simply remove this unnecessary function call. At the same time we can make the function static since it's only accessed from within the same c-file. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_core.c | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 1878d0a96333..15e6a2bffc2b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1036,7 +1036,6 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type); void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type); -void hci_conn_params_clear_all(struct hci_dev *hdev); void hci_conn_params_clear_disabled(struct hci_dev *hdev); struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bc97fc6de876..ea648e9913f9 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3070,15 +3070,13 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev) } /* This function requires the caller holds hdev->lock */ -void hci_conn_params_clear_all(struct hci_dev *hdev) +static void hci_conn_params_clear_all(struct hci_dev *hdev) { struct hci_conn_params *params, *tmp; list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) hci_conn_params_free(params); - hci_update_background_scan(hdev); - BT_DBG("All LE connection parameters were removed"); } -- cgit v1.2.3-71-gd317 From 2e93e53b8f86fb38a9a3c3bd08e539c40b3f8d89 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:17 +0200 Subject: Bluetooth: Run all background scan updates through req_workqueue Instead of firing off a simple async request queue all background scan updates through req_workqueue and use hci_req_sync() there to ensure that no two updates overlap with each other. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 ++ net/bluetooth/hci_request.c | 39 +++++++++++++++++---------------------- net/bluetooth/hci_request.h | 6 +++++- net/bluetooth/mgmt.c | 2 +- 4 files changed, 25 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 15e6a2bffc2b..c2ca6a58d1e0 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -327,6 +327,8 @@ struct hci_dev { struct work_struct cmd_work; struct work_struct tx_work; + struct work_struct bg_scan_update; + struct sk_buff_head rx_q; struct sk_buff_head raw_q; struct sk_buff_head cmd_q; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index d48206277fe4..0adbb59ec2f0 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -731,28 +731,6 @@ void __hci_update_background_scan(struct hci_request *req) } } -static void update_background_scan_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - if (status) - BT_DBG("HCI request failed to update background scanning: " - "status 0x%2.2x", status); -} - -void hci_update_background_scan(struct hci_dev *hdev) -{ - int err; - struct hci_request req; - - hci_req_init(&req, hdev); - - __hci_update_background_scan(&req); - - err = hci_req_run(&req, update_background_scan_complete); - if (err && err != -ENODATA) - BT_ERR("Failed to run HCI request: err %d", err); -} - void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { @@ -846,10 +824,27 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) return 0; } +static void update_bg_scan(struct hci_request *req, unsigned long opt) +{ + hci_dev_lock(req->hdev); + __hci_update_background_scan(req); + hci_dev_unlock(req->hdev); +} + +static void bg_scan_update(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + bg_scan_update); + + hci_req_sync(hdev, update_bg_scan, 0, HCI_CMD_TIMEOUT); +} + void hci_request_setup(struct hci_dev *hdev) { + INIT_WORK(&hdev->bg_scan_update, bg_scan_update); } void hci_request_cancel_all(struct hci_dev *hdev) { + cancel_work_sync(&hdev->bg_scan_update); } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 9759b7175f8e..983e687fee22 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -64,12 +64,16 @@ void __hci_update_page_scan(struct hci_request *req); int hci_update_random_address(struct hci_request *req, bool require_privacy, u8 *own_addr_type); -void hci_update_background_scan(struct hci_dev *hdev); void __hci_update_background_scan(struct hci_request *req); int hci_abort_conn(struct hci_conn *conn, u8 reason); void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason); +static inline void hci_update_background_scan(struct hci_dev *hdev) +{ + queue_work(hdev->req_workqueue, &hdev->bg_scan_update); +} + void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7f22119276f3..29c9fec814b4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2510,8 +2510,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) hci_req_init(&req, hdev); update_adv_data(&req); update_scan_rsp_data(&req); - __hci_update_background_scan(&req); hci_req_run(&req, NULL); + hci_update_background_scan(hdev); } unlock: -- cgit v1.2.3-71-gd317 From 4ebeee2dff9815619be6ff9a845d33716f48468c Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:19 +0200 Subject: Bluetooth: Add HCI status return parameter to hci_req_sync() In some cases it may be important to get the exact HCI status rather than the converted HCI-to-errno value. Add an optional return parameter to the hci_req_sync() API to allow for this. Since there are no good HCI translation candidates for cancelation and timeout, use the "unknown" status code for those cases. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 3 ++- net/bluetooth/hci_core.c | 26 +++++++++++++------------- net/bluetooth/hci_request.c | 12 +++++++++--- net/bluetooth/hci_request.h | 4 ++-- 4 files changed, 26 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 0205b80cc90b..cc2216727655 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -452,7 +452,8 @@ enum { #define HCI_ERROR_REMOTE_POWER_OFF 0x15 #define HCI_ERROR_LOCAL_HOST_TERM 0x16 #define HCI_ERROR_PAIRING_NOT_ALLOWED 0x18 -#define HCI_ERROR_INVALID_LL_PARAMS 0x1E +#define HCI_ERROR_INVALID_LL_PARAMS 0x1e +#define HCI_ERROR_UNSPECIFIED 0x1f #define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c /* Flow control modes */ diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 965bc01a0d91..029d7798cffa 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -768,14 +768,14 @@ static int __hci_init(struct hci_dev *hdev) { int err; - err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; if (hci_dev_test_flag(hdev, HCI_SETUP)) hci_debugfs_create_basic(hdev); - err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; @@ -786,11 +786,11 @@ static int __hci_init(struct hci_dev *hdev) if (hdev->dev_type != HCI_BREDR) return 0; - err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; - err = __hci_req_sync(hdev, hci_init4_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init4_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; @@ -846,7 +846,7 @@ static int __hci_unconf_init(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return 0; - err = __hci_req_sync(hdev, hci_init0_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init0_req, 0, HCI_INIT_TIMEOUT, NULL); if (err < 0) return err; @@ -1204,7 +1204,7 @@ int hci_inquiry(void __user *arg) if (do_inquiry) { err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir, - timeo); + timeo, NULL); if (err < 0) goto done; @@ -1570,7 +1570,7 @@ int hci_dev_do_close(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) && !auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { set_bit(HCI_INIT, &hdev->flags); - __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); + __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT, NULL); clear_bit(HCI_INIT, &hdev->flags); } @@ -1667,7 +1667,7 @@ static int hci_dev_do_reset(struct hci_dev *hdev) atomic_set(&hdev->cmd_cnt, 1); hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; - ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); + ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT, NULL); hci_req_sync_unlock(hdev); return ret; @@ -1802,7 +1802,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) switch (cmd) { case HCISETAUTH: err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); break; case HCISETENCRYPT: @@ -1814,18 +1814,18 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!test_bit(HCI_AUTH, &hdev->flags)) { /* Auth must be enabled first */ err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); if (err) break; } err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); break; case HCISETSCAN: err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); /* Ensure that the connectable and discoverable states * get correctly modified as this was a non-mgmt change. @@ -1836,7 +1836,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) case HCISETLINKPOL: err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt, - HCI_INIT_TIMEOUT); + HCI_INIT_TIMEOUT, NULL); break; case HCISETLINKMODE: diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 0adbb59ec2f0..b1d4d5bba7c1 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -186,7 +186,7 @@ EXPORT_SYMBOL(__hci_cmd_sync); /* Execute request and wait for completion. */ int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, unsigned long opt), - unsigned long opt, __u32 timeout) + unsigned long opt, u32 timeout, u8 *hci_status) { struct hci_request req; DECLARE_WAITQUEUE(wait, current); @@ -231,14 +231,20 @@ int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, switch (hdev->req_status) { case HCI_REQ_DONE: err = -bt_to_errno(hdev->req_result); + if (hci_status) + *hci_status = hdev->req_result; break; case HCI_REQ_CANCELED: err = -hdev->req_result; + if (hci_status) + *hci_status = HCI_ERROR_UNSPECIFIED; break; default: err = -ETIMEDOUT; + if (hci_status) + *hci_status = HCI_ERROR_UNSPECIFIED; break; } @@ -251,7 +257,7 @@ int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, unsigned long opt), - unsigned long opt, __u32 timeout) + unsigned long opt, u32 timeout, u8 *hci_status) { int ret; @@ -260,7 +266,7 @@ int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, /* Serialize all requests */ hci_req_sync_lock(hdev); - ret = __hci_req_sync(hdev, req, opt, timeout); + ret = __hci_req_sync(hdev, req, opt, timeout, hci_status); hci_req_sync_unlock(hdev); return ret; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 983e687fee22..8441d12a62dd 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -46,10 +46,10 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, int hci_req_sync(struct hci_dev *hdev, void (*req)(struct hci_request *req, unsigned long opt), - unsigned long opt, __u32 timeout); + unsigned long opt, u32 timeout, u8 *hci_status); int __hci_req_sync(struct hci_dev *hdev, void (*func)(struct hci_request *req, unsigned long opt), - unsigned long opt, __u32 timeout); + unsigned long opt, u32 timeout, u8 *hci_status); void hci_req_sync_cancel(struct hci_dev *hdev, int err); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, -- cgit v1.2.3-71-gd317 From 7c1fbed23981faff2840ddc8909e7c78d80ade30 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:11:23 +0200 Subject: Bluetooth: Move LE scan disable/restart behind req_workqueue To avoid any risks of races, place also these LE scan modification work callbacks behind the same work queue as the other LE scan changes. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 5 +- net/bluetooth/hci_core.c | 168 ------------------------------------ net/bluetooth/hci_request.c | 179 +++++++++++++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 4 +- 4 files changed, 183 insertions(+), 173 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c2ca6a58d1e0..1f75aebbd8c4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -328,6 +328,8 @@ struct hci_dev { struct work_struct tx_work; struct work_struct bg_scan_update; + struct delayed_work le_scan_disable; + struct delayed_work le_scan_restart; struct sk_buff_head rx_q; struct sk_buff_head raw_q; @@ -372,9 +374,6 @@ struct hci_dev { DECLARE_BITMAP(dev_flags, __HCI_NUM_FLAGS); - struct delayed_work le_scan_disable; - struct delayed_work le_scan_restart; - __s8 adv_tx_power; __u8 adv_data[HCI_MAX_AD_LENGTH]; __u8 adv_data_len; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 029d7798cffa..0655521dd8bc 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1527,9 +1527,6 @@ int hci_dev_do_close(struct hci_dev *hdev) if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE)) cancel_delayed_work(&hdev->service_cache); - cancel_delayed_work_sync(&hdev->le_scan_disable); - cancel_delayed_work_sync(&hdev->le_scan_restart); - if (hci_dev_test_flag(hdev, HCI_MGMT)) cancel_delayed_work_sync(&hdev->rpa_expired); @@ -2889,169 +2886,6 @@ static void hci_conn_params_clear_all(struct hci_dev *hdev) BT_DBG("All LE connection parameters were removed"); } -static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode) -{ - if (status) { - BT_ERR("Failed to start inquiry: status %d", status); - - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - return; - } -} - -static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; - struct hci_cp_inquiry cp; - int err; - - if (status) { - BT_ERR("Failed to disable LE scanning: status %d", status); - return; - } - - hdev->discovery.scan_start = 0; - - switch (hdev->discovery.type) { - case DISCOV_TYPE_LE: - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - break; - - case DISCOV_TYPE_INTERLEAVED: - hci_dev_lock(hdev); - - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, - &hdev->quirks)) { - /* If we were running LE only scan, change discovery - * state. If we were running both LE and BR/EDR inquiry - * simultaneously, and BR/EDR inquiry is already - * finished, stop discovery, otherwise BR/EDR inquiry - * will stop discovery when finished. If we will resolve - * remote device name, do not change discovery state. - */ - if (!test_bit(HCI_INQUIRY, &hdev->flags) && - hdev->discovery.state != DISCOVERY_RESOLVING) - hci_discovery_set_state(hdev, - DISCOVERY_STOPPED); - } else { - struct hci_request req; - - hci_inquiry_cache_flush(hdev); - - hci_req_init(&req, hdev); - - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; - hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); - - err = hci_req_run(&req, inquiry_complete); - if (err) { - BT_ERR("Inquiry request failed: err %d", err); - hci_discovery_set_state(hdev, - DISCOVERY_STOPPED); - } - } - - hci_dev_unlock(hdev); - break; - } -} - -static void le_scan_disable_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - le_scan_disable.work); - struct hci_request req; - int err; - - BT_DBG("%s", hdev->name); - - cancel_delayed_work_sync(&hdev->le_scan_restart); - - hci_req_init(&req, hdev); - - hci_req_add_le_scan_disable(&req); - - err = hci_req_run(&req, le_scan_disable_work_complete); - if (err) - BT_ERR("Disable LE scanning request failed: err %d", err); -} - -static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - unsigned long timeout, duration, scan_start, now; - - BT_DBG("%s", hdev->name); - - if (status) { - BT_ERR("Failed to restart LE scan: status %d", status); - return; - } - - if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || - !hdev->discovery.scan_start) - return; - - /* When the scan was started, hdev->le_scan_disable has been queued - * after duration from scan_start. During scan restart this job - * has been canceled, and we need to queue it again after proper - * timeout, to make sure that scan does not run indefinitely. - */ - duration = hdev->discovery.scan_duration; - scan_start = hdev->discovery.scan_start; - now = jiffies; - if (now - scan_start <= duration) { - int elapsed; - - if (now >= scan_start) - elapsed = now - scan_start; - else - elapsed = ULONG_MAX - scan_start + now; - - timeout = duration - elapsed; - } else { - timeout = 0; - } - queue_delayed_work(hdev->workqueue, - &hdev->le_scan_disable, timeout); -} - -static void le_scan_restart_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - le_scan_restart.work); - struct hci_request req; - struct hci_cp_le_set_scan_enable cp; - int err; - - BT_DBG("%s", hdev->name); - - /* If controller is not scanning we are done. */ - if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return; - - hci_req_init(&req, hdev); - - hci_req_add_le_scan_disable(&req); - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_ENABLE; - cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - - err = hci_req_run(&req, le_scan_restart_work_complete); - if (err) - BT_ERR("Restart LE scan request failed: err %d", err); -} - /* Copy the Identity Address of the controller. * * If the controller has a public BD_ADDR, then by default use that one. @@ -3151,8 +2985,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); - INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); - INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, hci_adv_timeout_expire); skb_queue_head_init(&hdev->rx_q); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 8aa06cc545c3..4588fe2bfc0e 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -858,12 +858,191 @@ static void bg_scan_update(struct work_struct *work) hci_dev_unlock(hdev); } +static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + if (status) { + BT_ERR("Failed to start inquiry: status %d", status); + + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + return; + } +} + +static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) +{ + /* General inquiry access code (GIAC) */ + u8 lap[3] = { 0x33, 0x8b, 0x9e }; + struct hci_cp_inquiry cp; + int err; + + if (status) { + BT_ERR("Failed to disable LE scanning: status %d", status); + return; + } + + hdev->discovery.scan_start = 0; + + switch (hdev->discovery.type) { + case DISCOV_TYPE_LE: + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + break; + + case DISCOV_TYPE_INTERLEAVED: + hci_dev_lock(hdev); + + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, + &hdev->quirks)) { + /* If we were running LE only scan, change discovery + * state. If we were running both LE and BR/EDR inquiry + * simultaneously, and BR/EDR inquiry is already + * finished, stop discovery, otherwise BR/EDR inquiry + * will stop discovery when finished. If we will resolve + * remote device name, do not change discovery state. + */ + if (!test_bit(HCI_INQUIRY, &hdev->flags) && + hdev->discovery.state != DISCOVERY_RESOLVING) + hci_discovery_set_state(hdev, + DISCOVERY_STOPPED); + } else { + struct hci_request req; + + hci_inquiry_cache_flush(hdev); + + hci_req_init(&req, hdev); + + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; + hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + err = hci_req_run(&req, inquiry_complete); + if (err) { + BT_ERR("Inquiry request failed: err %d", err); + hci_discovery_set_state(hdev, + DISCOVERY_STOPPED); + } + } + + hci_dev_unlock(hdev); + break; + } +} + +static void le_scan_disable(struct hci_request *req, unsigned long opt) +{ + hci_req_add_le_scan_disable(req); +} + +static void le_scan_disable_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + le_scan_disable.work); + u8 status; + int err; + + BT_DBG("%s", hdev->name); + + cancel_delayed_work(&hdev->le_scan_restart); + + err = hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status); + if (err) + return; + + le_scan_disable_work_complete(hdev, status); +} + +static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status) +{ + unsigned long timeout, duration, scan_start, now; + + BT_DBG("%s", hdev->name); + + if (status) { + BT_ERR("Failed to restart LE scan: status %d", status); + return; + } + + hci_dev_lock(hdev); + + if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || + !hdev->discovery.scan_start) + goto unlock; + + /* When the scan was started, hdev->le_scan_disable has been queued + * after duration from scan_start. During scan restart this job + * has been canceled, and we need to queue it again after proper + * timeout, to make sure that scan does not run indefinitely. + */ + duration = hdev->discovery.scan_duration; + scan_start = hdev->discovery.scan_start; + now = jiffies; + if (now - scan_start <= duration) { + int elapsed; + + if (now >= scan_start) + elapsed = now - scan_start; + else + elapsed = ULONG_MAX - scan_start + now; + + timeout = duration - elapsed; + } else { + timeout = 0; + } + + queue_delayed_work(hdev->req_workqueue, + &hdev->le_scan_disable, timeout); + +unlock: + hci_dev_unlock(hdev); +} + +static void le_scan_restart(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_scan_enable cp; + + /* If controller is not scanning we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) + return; + + hci_req_add_le_scan_disable(req); + + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_ENABLE; + cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); +} + +static void le_scan_restart_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + le_scan_restart.work); + u8 status; + int err; + + BT_DBG("%s", hdev->name); + + err = hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status); + if (err) + return; + + le_scan_restart_work_complete(hdev, status); +} + void hci_request_setup(struct hci_dev *hdev) { INIT_WORK(&hdev->bg_scan_update, bg_scan_update); + INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); + INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); } void hci_request_cancel_all(struct hci_dev *hdev) { cancel_work_sync(&hdev->bg_scan_update); + cancel_delayed_work_sync(&hdev->le_scan_disable); + cancel_delayed_work_sync(&hdev->le_scan_restart); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index bb870c3aadae..a229cfd0530e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4367,7 +4367,7 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status, hdev->discovery.scan_duration = timeout; } - queue_delayed_work(hdev->workqueue, + queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable, timeout); } @@ -8389,7 +8389,7 @@ static void restart_le_scan(struct hci_dev *hdev) hdev->discovery.scan_duration)) return; - queue_delayed_work(hdev->workqueue, &hdev->le_scan_restart, + queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_restart, DISCOV_LE_RESTART_DELAY); } -- cgit v1.2.3-71-gd317 From e68f072b7396574df5324e1cf93e4b0c92460735 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:30:30 +0200 Subject: Bluetooth: Move Start Discovery to req_workqueue Since discovery also deals with LE scanning it makes sense to move it behind the same req_workqueue as other LE scanning changes. This also simplifies the logic since we do many of the actions in a synchronous manner. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 + net/bluetooth/hci_request.c | 202 +++++++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 238 ++------------------------------------- 3 files changed, 213 insertions(+), 229 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 1f75aebbd8c4..72ea8a6d7d70 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -327,6 +327,7 @@ struct hci_dev { struct work_struct cmd_work; struct work_struct tx_work; + struct work_struct discov_update; struct work_struct bg_scan_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; @@ -1473,6 +1474,7 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status); void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, u8 status); void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status); +void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index ecfa4105e00d..da1e30b85e77 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1042,8 +1042,209 @@ static void le_scan_restart_work(struct work_struct *work) le_scan_restart_work_complete(hdev, status); } +static int bredr_inquiry(struct hci_request *req, unsigned long opt) +{ + struct hci_cp_inquiry cp; + /* General inquiry access code (GIAC) */ + u8 lap[3] = { 0x33, 0x8b, 0x9e }; + + BT_DBG("%s", req->hdev->name); + + hci_dev_lock(req->hdev); + hci_inquiry_cache_flush(req->hdev); + hci_dev_unlock(req->hdev); + + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = DISCOV_BREDR_INQUIRY_LEN; + + hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); + + return 0; +} + +static void cancel_adv_timeout(struct hci_dev *hdev) +{ + if (hdev->adv_instance_timeout) { + hdev->adv_instance_timeout = 0; + cancel_delayed_work(&hdev->adv_instance_expire); + } +} + +static void disable_advertising(struct hci_request *req) +{ + u8 enable = 0x00; + + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); +} + +static int active_scan(struct hci_request *req, unsigned long opt) +{ + uint16_t interval = opt; + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_scan_param param_cp; + struct hci_cp_le_set_scan_enable enable_cp; + u8 own_addr_type; + int err; + + BT_DBG("%s", hdev->name); + + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { + hci_dev_lock(hdev); + + /* Don't let discovery abort an outgoing connection attempt + * that's using directed advertising. + */ + if (hci_lookup_le_connect(hdev)) { + hci_dev_unlock(hdev); + return -EBUSY; + } + + cancel_adv_timeout(hdev); + hci_dev_unlock(hdev); + + disable_advertising(req); + } + + /* If controller is scanning, it means the background scanning is + * running. Thus, we should temporarily stop it in order to set the + * discovery scanning parameters. + */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) + hci_req_add_le_scan_disable(req); + + /* All active scans will be done with either a resolvable private + * address (when privacy feature has been enabled) or non-resolvable + * private address. + */ + err = hci_update_random_address(req, true, &own_addr_type); + if (err < 0) + own_addr_type = ADDR_LE_DEV_PUBLIC; + + memset(¶m_cp, 0, sizeof(param_cp)); + param_cp.type = LE_SCAN_ACTIVE; + param_cp.interval = cpu_to_le16(interval); + param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN); + param_cp.own_address_type = own_addr_type; + + hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), + ¶m_cp); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_ENABLE; + enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + + hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), + &enable_cp); + + return 0; +} + +static int interleaved_discov(struct hci_request *req, unsigned long opt) +{ + int err; + + BT_DBG("%s", req->hdev->name); + + err = active_scan(req, opt); + if (err) + return err; + + return bredr_inquiry(req, opt); +} + +static void start_discovery(struct hci_dev *hdev, u8 *status) +{ + unsigned long timeout; + + BT_DBG("%s type %u", hdev->name, hdev->discovery.type); + + switch (hdev->discovery.type) { + case DISCOV_TYPE_BREDR: + if (!hci_dev_test_flag(hdev, HCI_INQUIRY)) + hci_req_sync(hdev, bredr_inquiry, 0, HCI_CMD_TIMEOUT, + status); + return; + case DISCOV_TYPE_INTERLEAVED: + /* When running simultaneous discovery, the LE scanning time + * should occupy the whole discovery time sine BR/EDR inquiry + * and LE scanning are scheduled by the controller. + * + * For interleaving discovery in comparison, BR/EDR inquiry + * and LE scanning are done sequentially with separate + * timeouts. + */ + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, + &hdev->quirks)) { + timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); + /* During simultaneous discovery, we double LE scan + * interval. We must leave some time for the controller + * to do BR/EDR inquiry. + */ + hci_req_sync(hdev, interleaved_discov, + DISCOV_LE_SCAN_INT * 2, HCI_CMD_TIMEOUT, + status); + break; + } + + timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); + hci_req_sync(hdev, active_scan, DISCOV_LE_SCAN_INT, + HCI_CMD_TIMEOUT, status); + break; + case DISCOV_TYPE_LE: + timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); + hci_req_sync(hdev, active_scan, DISCOV_LE_SCAN_INT, + HCI_CMD_TIMEOUT, status); + break; + default: + *status = HCI_ERROR_UNSPECIFIED; + return; + } + + if (*status) + return; + + BT_DBG("%s timeout %u ms", hdev->name, jiffies_to_msecs(timeout)); + + /* When service discovery is used and the controller has a + * strict duplicate filter, it is important to remember the + * start and duration of the scan. This is required for + * restarting scanning during the discovery phase. + */ + if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) && + hdev->discovery.result_filtering) { + hdev->discovery.scan_start = jiffies; + hdev->discovery.scan_duration = timeout; + } + + queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable, + timeout); +} + +static void discov_update(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + discov_update); + u8 status = 0; + + switch (hdev->discovery.state) { + case DISCOVERY_STARTING: + start_discovery(hdev, &status); + mgmt_start_discovery_complete(hdev, status); + if (status) + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + else + hci_discovery_set_state(hdev, DISCOVERY_FINDING); + break; + case DISCOVERY_STOPPED: + default: + return; + } +} + void hci_request_setup(struct hci_dev *hdev) { + INIT_WORK(&hdev->discov_update, discov_update); INIT_WORK(&hdev->bg_scan_update, bg_scan_update); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); @@ -1051,6 +1252,7 @@ void hci_request_setup(struct hci_dev *hdev) void hci_request_cancel_all(struct hci_dev *hdev) { + cancel_work_sync(&hdev->discov_update); cancel_work_sync(&hdev->bg_scan_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e634b4d85249..63b099471c92 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -4164,145 +4164,9 @@ done: return err; } -static bool trigger_bredr_inquiry(struct hci_request *req, u8 *status) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_inquiry cp; - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; - - *status = mgmt_bredr_support(hdev); - if (*status) - return false; - - if (hci_dev_test_flag(hdev, HCI_INQUIRY)) { - *status = MGMT_STATUS_BUSY; - return false; - } - - hci_inquiry_cache_flush(hdev); - - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = DISCOV_BREDR_INQUIRY_LEN; - - hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); - - return true; -} - -static bool trigger_le_scan(struct hci_request *req, u16 interval, u8 *status) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_scan_param param_cp; - struct hci_cp_le_set_scan_enable enable_cp; - u8 own_addr_type; - int err; - - *status = mgmt_le_support(hdev); - if (*status) - return false; - - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) { - /* Don't let discovery abort an outgoing connection attempt - * that's using directed advertising. - */ - if (hci_lookup_le_connect(hdev)) { - *status = MGMT_STATUS_REJECTED; - return false; - } - - cancel_adv_timeout(hdev); - disable_advertising(req); - } - - /* If controller is scanning, it means the background scanning is - * running. Thus, we should temporarily stop it in order to set the - * discovery scanning parameters. - */ - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) - hci_req_add_le_scan_disable(req); - - /* All active scans will be done with either a resolvable private - * address (when privacy feature has been enabled) or non-resolvable - * private address. - */ - err = hci_update_random_address(req, true, &own_addr_type); - if (err < 0) { - *status = MGMT_STATUS_FAILED; - return false; - } - - memset(¶m_cp, 0, sizeof(param_cp)); - param_cp.type = LE_SCAN_ACTIVE; - param_cp.interval = cpu_to_le16(interval); - param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN); - param_cp.own_address_type = own_addr_type; - - hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), - ¶m_cp); - - memset(&enable_cp, 0, sizeof(enable_cp)); - enable_cp.enable = LE_SCAN_ENABLE; - enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), - &enable_cp); - - return true; -} - -static bool trigger_discovery(struct hci_request *req, u8 *status) -{ - struct hci_dev *hdev = req->hdev; - - switch (hdev->discovery.type) { - case DISCOV_TYPE_BREDR: - if (!trigger_bredr_inquiry(req, status)) - return false; - break; - - case DISCOV_TYPE_INTERLEAVED: - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, - &hdev->quirks)) { - /* During simultaneous discovery, we double LE scan - * interval. We must leave some time for the controller - * to do BR/EDR inquiry. - */ - if (!trigger_le_scan(req, DISCOV_LE_SCAN_INT * 2, - status)) - return false; - - if (!trigger_bredr_inquiry(req, status)) - return false; - - return true; - } - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { - *status = MGMT_STATUS_NOT_SUPPORTED; - return false; - } - /* fall through */ - - case DISCOV_TYPE_LE: - if (!trigger_le_scan(req, DISCOV_LE_SCAN_INT, status)) - return false; - break; - - default: - *status = MGMT_STATUS_INVALID_PARAMS; - return false; - } - - return true; -} - -static void start_discovery_complete(struct hci_dev *hdev, u8 status, - u16 opcode) +void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; - unsigned long timeout; BT_DBG("status %d", status); @@ -4317,61 +4181,6 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status, mgmt_pending_remove(cmd); } - if (status) { - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - goto unlock; - } - - hci_discovery_set_state(hdev, DISCOVERY_FINDING); - - /* If the scan involves LE scan, pick proper timeout to schedule - * hdev->le_scan_disable that will stop it. - */ - switch (hdev->discovery.type) { - case DISCOV_TYPE_LE: - timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); - break; - case DISCOV_TYPE_INTERLEAVED: - /* When running simultaneous discovery, the LE scanning time - * should occupy the whole discovery time sine BR/EDR inquiry - * and LE scanning are scheduled by the controller. - * - * For interleaving discovery in comparison, BR/EDR inquiry - * and LE scanning are done sequentially with separate - * timeouts. - */ - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) - timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT); - else - timeout = msecs_to_jiffies(hdev->discov_interleaved_timeout); - break; - case DISCOV_TYPE_BREDR: - timeout = 0; - break; - default: - BT_ERR("Invalid discovery type %d", hdev->discovery.type); - timeout = 0; - break; - } - - if (timeout) { - /* When service discovery is used and the controller has - * a strict duplicate filter, it is important to remember - * the start and duration of the scan. This is required - * for restarting scanning during the discovery phase. - */ - if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, - &hdev->quirks) && - hdev->discovery.result_filtering) { - hdev->discovery.scan_start = jiffies; - hdev->discovery.scan_duration = timeout; - } - - queue_delayed_work(hdev->req_workqueue, - &hdev->le_scan_disable, timeout); - } - -unlock: hci_dev_unlock(hdev); } @@ -4407,7 +4216,6 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, { struct mgmt_cp_start_discovery *cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; u8 status; int err; @@ -4436,14 +4244,6 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, hdev, data, len); - if (!cmd) { - err = -ENOMEM; - goto failed; - } - - cmd->cmd_complete = generic_cmd_complete; - /* Clear the discovery filter first to free any previously * allocated memory for the UUID list. */ @@ -4452,22 +4252,17 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hdev->discovery.type = cp->type; hdev->discovery.report_invalid_rssi = false; - hci_req_init(&req, hdev); - - if (!trigger_discovery(&req, &status)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - status, &cp->type, sizeof(cp->type)); - mgmt_pending_remove(cmd); + cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, hdev, data, len); + if (!cmd) { + err = -ENOMEM; goto failed; } - err = hci_req_run(&req, start_discovery_complete); - if (err < 0) { - mgmt_pending_remove(cmd); - goto failed; - } + cmd->cmd_complete = generic_cmd_complete; hci_discovery_set_state(hdev, DISCOVERY_STARTING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + err = 0; failed: hci_dev_unlock(hdev); @@ -4486,7 +4281,6 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, { struct mgmt_cp_start_service_discovery *cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; const u16 max_uuid_count = ((U16_MAX - sizeof(*cp)) / 16); u16 uuid_count, expected_len; u8 status; @@ -4574,23 +4368,9 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, } } - hci_req_init(&req, hdev); - - if (!trigger_discovery(&req, &status)) { - err = mgmt_cmd_complete(sk, hdev->id, - MGMT_OP_START_SERVICE_DISCOVERY, - status, &cp->type, sizeof(cp->type)); - mgmt_pending_remove(cmd); - goto failed; - } - - err = hci_req_run(&req, start_discovery_complete); - if (err < 0) { - mgmt_pending_remove(cmd); - goto failed; - } - hci_discovery_set_state(hdev, DISCOVERY_STARTING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + err = 0; failed: hci_dev_unlock(hdev); -- cgit v1.2.3-71-gd317 From 2154d3f4fb83c812a161c4910948dd876997e111 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 08:30:45 +0200 Subject: Bluetooth: Move Stop Discovery to req_workqueue Since discovery also deals with LE scanning it makes sense to move it behind the same req_workqueue as other LE scanning changes. This also simplifies the logic since we do many of the actions in a synchronous manner. Part of this refactoring is moving hci_req_stop_discovery() to hci_request.c. At the same time the function receives support for properly handling the STOPPING state since that's the state we'll be in when stopping through the req_workqueue. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_request.c | 62 ++++++++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 3 ++ net/bluetooth/mgmt.c | 72 +++------------------------------------- 4 files changed, 71 insertions(+), 67 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 72ea8a6d7d70..609f4a03899c 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1475,6 +1475,7 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, u8 status); void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status); void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status); +void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status); void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index da1e30b85e77..3219ee66faad 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1221,6 +1221,62 @@ static void start_discovery(struct hci_dev *hdev, u8 *status) timeout); } +bool hci_req_stop_discovery(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct discovery_state *d = &hdev->discovery; + struct hci_cp_remote_name_req_cancel cp; + struct inquiry_entry *e; + bool ret = false; + + BT_DBG("%s state %u", hdev->name, hdev->discovery.state); + + if (d->state == DISCOVERY_FINDING || d->state == DISCOVERY_STOPPING) { + if (test_bit(HCI_INQUIRY, &hdev->flags)) + hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL); + + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { + cancel_delayed_work(&hdev->le_scan_disable); + hci_req_add_le_scan_disable(req); + } + + ret = true; + } else { + /* Passive scanning */ + if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { + hci_req_add_le_scan_disable(req); + ret = true; + } + } + + /* No further actions needed for LE-only discovery */ + if (d->type == DISCOV_TYPE_LE) + return ret; + + if (d->state == DISCOVERY_RESOLVING || d->state == DISCOVERY_STOPPING) { + e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, + NAME_PENDING); + if (!e) + return ret; + + bacpy(&cp.bdaddr, &e->data.bdaddr); + hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp), + &cp); + ret = true; + } + + return ret; +} + +static int stop_discovery(struct hci_request *req, unsigned long opt) +{ + hci_dev_lock(req->hdev); + hci_req_stop_discovery(req); + hci_dev_unlock(req->hdev); + + return 0; +} + static void discov_update(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -1236,6 +1292,12 @@ static void discov_update(struct work_struct *work) else hci_discovery_set_state(hdev, DISCOVERY_FINDING); break; + case DISCOVERY_STOPPING: + hci_req_sync(hdev, stop_discovery, 0, HCI_CMD_TIMEOUT, &status); + mgmt_stop_discovery_complete(hdev, status); + if (!status) + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + break; case DISCOVERY_STOPPED: default: return; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 1927013f5e67..6b9e59f7f7a9 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -58,6 +58,9 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); +/* Returns true if HCI commands were queued */ +bool hci_req_stop_discovery(struct hci_request *req); + void hci_update_page_scan(struct hci_dev *hdev); void __hci_update_page_scan(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 63b099471c92..8cdacef6b108 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1416,49 +1416,6 @@ static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode) } } -static bool hci_stop_discovery(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_remote_name_req_cancel cp; - struct inquiry_entry *e; - - switch (hdev->discovery.state) { - case DISCOVERY_FINDING: - if (test_bit(HCI_INQUIRY, &hdev->flags)) - hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL); - - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { - cancel_delayed_work(&hdev->le_scan_disable); - hci_req_add_le_scan_disable(req); - } - - return true; - - case DISCOVERY_RESOLVING: - e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, - NAME_PENDING); - if (!e) - break; - - bacpy(&cp.bdaddr, &e->data.bdaddr); - hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp), - &cp); - - return true; - - default: - /* Passive scanning */ - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { - hci_req_add_le_scan_disable(req); - return true; - } - - break; - } - - return false; -} - static void advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance) { @@ -1636,7 +1593,7 @@ static int clean_up_hci_state(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_LE_ADV)) disable_advertising(&req); - discov_stopped = hci_stop_discovery(&req); + discov_stopped = hci_req_stop_discovery(&req); list_for_each_entry(conn, &hdev->conn_hash.list, list) { /* 0x15 == Terminated due to Power Off */ @@ -4377,7 +4334,7 @@ failed: return err; } -static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode) +void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; @@ -4391,9 +4348,6 @@ static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode) mgmt_pending_remove(cmd); } - if (!status) - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); } @@ -4402,7 +4356,6 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_stop_discovery *mgmt_cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; int err; BT_DBG("%s", hdev->name); @@ -4431,24 +4384,9 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, cmd->cmd_complete = generic_cmd_complete; - hci_req_init(&req, hdev); - - hci_stop_discovery(&req); - - err = hci_req_run(&req, stop_discovery_complete); - if (!err) { - hci_discovery_set_state(hdev, DISCOVERY_STOPPING); - goto unlock; - } - - mgmt_pending_remove(cmd); - - /* If no HCI commands were sent we're done */ - if (err == -ENODATA) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, 0, - &mgmt_cp->type, sizeof(mgmt_cp->type)); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - } + hci_discovery_set_state(hdev, DISCOVERY_STOPPING); + queue_work(hdev->req_workqueue, &hdev->discov_update); + err = 0; unlock: hci_dev_unlock(hdev); -- cgit v1.2.3-71-gd317 From 0ad06aa6a7682319bb1adcc187a1fa8db6b2da2c Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 11 Nov 2015 14:44:57 +0200 Subject: Bluetooth: Fix specifying role for LE connections The hci_connect_le_scan() is (as the name implies) a master/central role API, so it makes no sense in passing a role parameter to it. At the same time this patch also fixes the direct advertising support for LE L2CAP sockets where we now call the more appropriate hci_le_connect() API if slave/peripheral role is desired. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 +- net/bluetooth/hci_conn.c | 4 ++-- net/bluetooth/l2cap_core.c | 15 +++++++-------- net/bluetooth/mgmt.c | 3 +-- 4 files changed, 11 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 609f4a03899c..55ce209157b1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -877,7 +877,7 @@ struct hci_chan *hci_chan_lookup_handle(struct hci_dev *hdev, __u16 handle); struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, - u16 conn_timeout, u8 role); + u16 conn_timeout); struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, u16 conn_timeout, u8 role); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1ed1e153b3fa..673c2254935b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -988,7 +988,7 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev, /* This function requires the caller holds hdev->lock */ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, u8 sec_level, - u16 conn_timeout, u8 role) + u16 conn_timeout) { struct hci_conn *conn; @@ -1018,7 +1018,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, BT_DBG("requesting refresh of dst_addr"); - conn = hci_conn_add(hdev, LE_LINK, dst, role); + conn = hci_conn_add(hdev, LE_LINK, dst, HCI_ROLE_MASTER); if (!conn) return ERR_PTR(-ENOMEM); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 66e8b6ee19a5..139da8106b04 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7113,8 +7113,6 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, chan->dcid = cid; if (bdaddr_type_is_le(dst_type)) { - u8 role; - /* Convert from L2CAP channel address type to HCI address type */ if (dst_type == BDADDR_LE_PUBLIC) @@ -7123,14 +7121,15 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, dst_type = ADDR_LE_DEV_RANDOM; if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - role = HCI_ROLE_SLAVE; + hcon = hci_connect_le(hdev, dst, dst_type, + chan->sec_level, + HCI_LE_CONN_TIMEOUT, + HCI_ROLE_SLAVE); else - role = HCI_ROLE_MASTER; + hcon = hci_connect_le_scan(hdev, dst, dst_type, + chan->sec_level, + HCI_LE_CONN_TIMEOUT); - hcon = hci_connect_le_scan(hdev, dst, dst_type, - chan->sec_level, - HCI_LE_CONN_TIMEOUT, - role); } else { u8 auth_type = l2cap_get_auth_type(chan); hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e4ad0457547a..eca203e891d2 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3518,8 +3518,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr, addr_type, sec_level, - HCI_LE_CONN_TIMEOUT, - HCI_ROLE_MASTER); + HCI_LE_CONN_TIMEOUT); } if (IS_ERR(conn)) { -- cgit v1.2.3-71-gd317 From 40b25fe5dc57a6557b96241b75ae63dce716a487 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 19 Nov 2015 16:16:43 +0100 Subject: Bluetooth: Add support for Get Advertising Size Information command The Get Advertising Size Information command allows to retrieve size information for advertising data and scan response data fields depending on the selected flags. This is useful if applications want to know the available size ahead of time. Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg --- include/net/bluetooth/mgmt.h | 13 ++++++++++ net/bluetooth/mgmt.c | 58 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index b831242d48a4..af17774c9416 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -571,6 +571,19 @@ struct mgmt_rp_remove_advertising { __u8 instance; } __packed; +#define MGMT_OP_GET_ADV_SIZE_INFO 0x0040 +struct mgmt_cp_get_adv_size_info { + __u8 instance; + __le32 flags; +} __packed; +#define MGMT_GET_ADV_SIZE_INFO_SIZE 5 +struct mgmt_rp_get_adv_size_info { + __u8 instance; + __le32 flags; + __u8 max_adv_data_len; + __u8 max_scan_rsp_len; +} __packed; + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 05370e76feb0..dc8e428050d9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -102,6 +102,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_READ_ADV_FEATURES, MGMT_OP_ADD_ADVERTISING, MGMT_OP_REMOVE_ADVERTISING, + MGMT_OP_GET_ADV_SIZE_INFO, }; static const u16 mgmt_events[] = { @@ -7059,6 +7060,62 @@ unlock: return err; } +static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) +{ + u8 max_len = HCI_MAX_AD_LENGTH; + + if (is_adv_data) { + if (adv_flags & (MGMT_ADV_FLAG_DISCOV | + MGMT_ADV_FLAG_LIMITED_DISCOV | + MGMT_ADV_FLAG_MANAGED_FLAGS)) + max_len -= 3; + + if (adv_flags & MGMT_ADV_FLAG_TX_POWER) + max_len -= 3; + } + + return max_len; +} + +static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + struct mgmt_cp_get_adv_size_info *cp = data; + struct mgmt_rp_get_adv_size_info rp; + u32 flags, supported_flags; + int err; + + BT_DBG("%s", hdev->name); + + if (!lmp_le_capable(hdev)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_STATUS_REJECTED); + + if (cp->instance < 1 || cp->instance > HCI_MAX_ADV_INSTANCES) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_STATUS_INVALID_PARAMS); + + flags = __le32_to_cpu(cp->flags); + + /* The current implementation only supports a subset of the specified + * flags. + */ + supported_flags = get_supported_adv_flags(hdev); + if (flags & ~supported_flags) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_STATUS_INVALID_PARAMS); + + rp.instance = cp->instance; + rp.flags = cp->flags; + rp.max_adv_data_len = tlv_data_max_len(flags, true); + rp.max_scan_rsp_len = tlv_data_max_len(flags, false); + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); + + return err; +} + static const struct hci_mgmt_handler mgmt_handlers[] = { { NULL }, /* 0x0000 (no command) */ { read_version, MGMT_READ_VERSION_SIZE, @@ -7146,6 +7203,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { add_advertising, MGMT_ADD_ADVERTISING_SIZE, HCI_MGMT_VAR_LEN }, { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, + { get_adv_size_info, MGMT_GET_ADV_SIZE_INFO_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) -- cgit v1.2.3-71-gd317 From cc30c16344fc3a25153175c7eb9037b2136cd466 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 20 Nov 2015 03:56:23 +0100 Subject: net: dsa: Add support for a switch reset gpio Some boards have a gpio line tied to the switch reset pin. Allow this gpio to be retrieved from the device tree, and take the switch out of reset before performing the probe. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/dsa.txt | 3 +++ include/net/dsa.h | 8 ++++++++ net/dsa/dsa.c | 17 +++++++++++++++++ 3 files changed, 28 insertions(+) (limited to 'include/net') diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt index 04e6bef3ac3f..5fdbbcdf8c4b 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.txt +++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt @@ -31,6 +31,8 @@ A switch child node has the following optional property: switch. Must be set if the switch can not detect the presence and/or size of a connected EEPROM, otherwise optional. +- reset-gpios : phandle and specifier to a gpio line connected to + reset pin of the switch chip. A switch may have multiple "port" children nodes @@ -114,6 +116,7 @@ Example: #size-cells = <0>; reg = <17 1>; /* MDIO address 17, switch 1 in tree */ mii-bus = <&mii_bus1>; + reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>; switch1port0: port@0 { reg = <0>; diff --git a/include/net/dsa.h b/include/net/dsa.h index 82a4c6011173..3f23dd9d6a69 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -64,6 +65,13 @@ struct dsa_chip_data { * NULL if there is only one switch chip. */ s8 *rtable; + + /* + * A switch may have a GPIO line tied to its reset pin. Parse + * this from the device tree, and use it before performing + * switch soft reset. + */ + struct gpio_desc *reset; }; struct dsa_platform_data { diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 1eba07feb34a..0b5565f923cc 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include "dsa_priv.h" @@ -688,6 +689,9 @@ static int dsa_of_probe(struct device *dev) const char *port_name; int chip_index, port_index; const unsigned int *sw_addr, *port_reg; + int gpio; + enum of_gpio_flags of_flags; + unsigned long flags; u32 eeprom_len; int ret; @@ -766,6 +770,19 @@ static int dsa_of_probe(struct device *dev) put_device(cd->host_dev); cd->host_dev = &mdio_bus_switch->dev; } + gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, + &of_flags); + if (gpio_is_valid(gpio)) { + flags = (of_flags == OF_GPIO_ACTIVE_LOW ? + GPIOF_ACTIVE_LOW : 0); + ret = devm_gpio_request_one(dev, gpio, flags, + "switch_reset"); + if (ret) + goto out_free_chip; + + cd->reset = gpio_to_desc(gpio); + gpiod_direction_output(cd->reset, 0); + } for_each_available_child_of_node(child, port) { port_reg = of_get_property(port, "reg", NULL); -- cgit v1.2.3-71-gd317 From 029f7f3b8701cc7aca8bdb31f0c7edd6a479e357 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Nov 2015 23:32:39 +0100 Subject: netfilter: ipv6: nf_defrag: avoid/free clone operations commit 6aafeef03b9d9ecf ("netfilter: push reasm skb through instead of original frag skbs") changed ipv6 defrag to not use the original skbs anymore. So rather than keeping the original skbs around just to discard them afterwards just use the original skbs directly for the fraglist of the newly assembled skb and remove the extra clone/free operations. The skb that completes the fragment queue is morphed into a the reassembled one instead, just like ipv4 defrag. openvswitch doesn't need any additional skb_morph magic anymore to deal with this situation so just remove that. A followup patch can then also remove the NF_HOOK (re)invocation in the ipv6 netfilter defrag hook. Cc: Joe Stringer Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 1 - net/ipv6/netfilter/nf_conntrack_reasm.c | 105 +++++++++++----------------- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 6 -- net/openvswitch/conntrack.c | 14 ---- 4 files changed, 40 insertions(+), 86 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index fb7da5bb76cc..fcd20cf8f5d5 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -6,7 +6,6 @@ void nf_defrag_ipv6_enable(void); int nf_ct_frag6_init(void); void nf_ct_frag6_cleanup(void); struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user); -void nf_ct_frag6_consume_orig(struct sk_buff *skb); struct inet_frags_ctl; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index d5efeb87350e..1a86a08adbe5 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -56,7 +56,6 @@ struct nf_ct_frag6_skb_cb { struct inet6_skb_parm h; int offset; - struct sk_buff *orig; }; #define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb)) @@ -170,12 +169,6 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q) return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); } -static void nf_skb_free(struct sk_buff *skb) -{ - if (NFCT_FRAG6_CB(skb)->orig) - kfree_skb(NFCT_FRAG6_CB(skb)->orig); -} - static void nf_ct_frag6_expire(unsigned long data) { struct frag_queue *fq; @@ -376,9 +369,9 @@ err: * the last and the first frames arrived and all the bits are here. */ static struct sk_buff * -nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) +nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { - struct sk_buff *fp, *op, *head = fq->q.fragments; + struct sk_buff *fp, *head = fq->q.fragments; int payload_len; u8 ecn; @@ -429,10 +422,38 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->csum = 0; clone->ip_summed = head->ip_summed; - NFCT_FRAG6_CB(clone)->orig = NULL; add_frag_mem_limit(fq->q.net, clone->truesize); } + /* morph head into last received skb: prev. + * + * This allows callers of ipv6 conntrack defrag to continue + * to use the last skb(frag) passed into the reasm engine. + * The last skb frag 'silently' turns into the full reassembled skb. + * + * Since prev is also part of q->fragments we have to clone it first. + */ + if (head != prev) { + struct sk_buff *iter; + + fp = skb_clone(prev, GFP_ATOMIC); + if (!fp) + goto out_oom; + + fp->next = prev->next; + skb_queue_walk(head, iter) { + if (iter->next != prev) + continue; + iter->next = fp; + break; + } + + skb_morph(prev, head); + prev->next = head->next; + consume_skb(head); + head = prev; + } + /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0]; @@ -473,21 +494,6 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) fq->q.fragments = NULL; fq->q.fragments_tail = NULL; - /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ - fp = skb_shinfo(head)->frag_list; - if (fp && NFCT_FRAG6_CB(fp)->orig == NULL) - /* at above code, head skb is divided into two skbs. */ - fp = fp->next; - - op = NFCT_FRAG6_CB(head)->orig; - for (; fp; fp = fp->next) { - struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig; - - op->next = orig; - op = orig; - NFCT_FRAG6_CB(fp)->orig = NULL; - } - return head; out_oversize: @@ -565,7 +571,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { - struct sk_buff *clone; struct net_device *dev = skb->dev; struct frag_hdr *fhdr; struct frag_queue *fq; @@ -583,42 +588,30 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) return skb; - clone = skb_clone(skb, GFP_ATOMIC); - if (clone == NULL) { - pr_debug("Can't clone skb\n"); + if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) return skb; - } - NFCT_FRAG6_CB(clone)->orig = skb; - - if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) { - pr_debug("message is too short.\n"); - goto ret_orig; - } - - skb_set_transport_header(clone, fhoff); - hdr = ipv6_hdr(clone); - fhdr = (struct frag_hdr *)skb_transport_header(clone); + skb_set_transport_header(skb, fhoff); + hdr = ipv6_hdr(skb); + fhdr = (struct frag_hdr *)skb_transport_header(skb); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); - if (fq == NULL) { - pr_debug("Can't find and can't create new queue\n"); - goto ret_orig; - } + if (fq == NULL) + return skb; spin_lock_bh(&fq->q.lock); - if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { + if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) { spin_unlock_bh(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); inet_frag_put(&fq->q, &nf_frags); - goto ret_orig; + return skb; } if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { - ret_skb = nf_ct_frag6_reasm(fq, dev); + ret_skb = nf_ct_frag6_reasm(fq, skb, dev); if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); } @@ -626,26 +619,9 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use inet_frag_put(&fq->q, &nf_frags); return ret_skb; - -ret_orig: - kfree_skb(clone); - return skb; } EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); -void nf_ct_frag6_consume_orig(struct sk_buff *skb) -{ - struct sk_buff *s, *s2; - - for (s = NFCT_FRAG6_CB(skb)->orig; s;) { - s2 = s->next; - s->next = NULL; - consume_skb(s); - s = s2; - } -} -EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig); - static int nf_ct_net_init(struct net *net) { int res; @@ -680,7 +656,6 @@ int nf_ct_frag6_init(void) nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; - nf_frags.skb_free = nf_skb_free; nf_frags.qsize = sizeof(struct frag_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 4fdbed5ebfb6..fb96b1018884 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -69,12 +69,6 @@ static unsigned int ipv6_defrag(void *priv, if (reasm == NULL) return NF_STOLEN; - /* error occurred or not fragmented */ - if (reasm == skb) - return NF_ACCEPT; - - nf_ct_frag6_consume_orig(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm, state->in, state->out, state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c2cc11168fd5..cac2169f2909 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -321,21 +321,7 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, if (!reasm) return -EINPROGRESS; - if (skb == reasm) { - kfree_skb(skb); - return -EINVAL; - } - - /* Don't free 'skb' even though it is one of the original - * fragments, as we're going to morph it into the head. - */ - skb_get(skb); - nf_ct_frag6_consume_orig(reasm); - key->ip.proto = ipv6_hdr(reasm)->nexthdr; - skb_morph(skb, reasm); - skb->next = reasm->next; - consume_skb(reasm); ovs_cb.mru = IP6CB(skb)->frag_max_size; #endif } else { -- cgit v1.2.3-71-gd317 From daaa7d647f81f3f1494d9a9029d611b666d63181 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Nov 2015 23:32:40 +0100 Subject: netfilter: ipv6: avoid nf_iterate recursion The previous patch changed nf_ct_frag6_gather() to morph reassembled skb with the previous one. This means that the return value is always NULL or the skb argument. So change it to an err value. Instead of invoking NF_HOOK recursively with threshold to skip already-called hooks we can now just return NF_ACCEPT to move on to the next hook except for -EINPROGRESS (which means skb has been queued for reassembly), in which case we return NF_STOLEN. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 71 +++++++++++++---------------- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 14 ++---- net/openvswitch/conntrack.c | 11 ++--- 4 files changed, 42 insertions(+), 56 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index fcd20cf8f5d5..ddf162f7966f 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -5,7 +5,7 @@ void nf_defrag_ipv6_enable(void); int nf_ct_frag6_init(void); void nf_ct_frag6_cleanup(void); -struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user); +int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user); struct inet_frags_ctl; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 1a86a08adbe5..912bc3afc183 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -361,14 +361,15 @@ err: /* * Check if this packet is complete. - * Returns NULL on failure by any reason, and pointer - * to current nexthdr field in reassembled frame. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. + * + * returns true if *prev skb has been transformed into the reassembled + * skb, false otherwise. */ -static struct sk_buff * +static bool nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { struct sk_buff *fp, *head = fq->q.fragments; @@ -382,22 +383,21 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) - goto out_fail; + return false; /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { - pr_debug("payload len is too large.\n"); - goto out_oversize; + net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", + payload_len); + return false; } /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) { - pr_debug("skb is cloned but can't expand head"); - goto out_oom; - } + if (skb_unclone(head, GFP_ATOMIC)) + return false; /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part @@ -408,7 +408,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic clone = alloc_skb(0, GFP_ATOMIC); if (clone == NULL) - goto out_oom; + return false; clone->next = head->next; head->next = clone; @@ -438,7 +438,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic fp = skb_clone(prev, GFP_ATOMIC); if (!fp) - goto out_oom; + return false; fp->next = prev->next; skb_queue_walk(head, iter) { @@ -494,16 +494,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic fq->q.fragments = NULL; fq->q.fragments_tail = NULL; - return head; - -out_oversize: - net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", - payload_len); - goto out_fail; -out_oom: - net_dbg_ratelimited("nf_ct_frag6_reasm: no memory for reassembly\n"); -out_fail: - return NULL; + return true; } /* @@ -569,27 +560,26 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) return 0; } -struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) +int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { struct net_device *dev = skb->dev; + int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr; - int fhoff, nhoff; u8 prevhdr; - struct sk_buff *ret_skb = NULL; /* Jumbo payload inhibits frag. header */ if (ipv6_hdr(skb)->payload_len == 0) { pr_debug("payload len = 0\n"); - return skb; + return -EINVAL; } if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) - return skb; + return -EINVAL; if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr))) - return skb; + return -ENOMEM; skb_set_transport_header(skb, fhoff); hdr = ipv6_hdr(skb); @@ -598,27 +588,28 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); if (fq == NULL) - return skb; + return -ENOMEM; spin_lock_bh(&fq->q.lock); if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) { - spin_unlock_bh(&fq->q.lock); - pr_debug("Can't insert skb to queue\n"); - inet_frag_put(&fq->q, &nf_frags); - return skb; + ret = -EINVAL; + goto out_unlock; } + /* after queue has assumed skb ownership, only 0 or -EINPROGRESS + * must be returned. + */ + ret = -EINPROGRESS; if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) { - ret_skb = nf_ct_frag6_reasm(fq, skb, dev); - if (ret_skb == NULL) - pr_debug("Can't reassemble fragmented packets\n"); - } - spin_unlock_bh(&fq->q.lock); + fq->q.meat == fq->q.len && + nf_ct_frag6_reasm(fq, skb, dev)) + ret = 0; +out_unlock: + spin_unlock_bh(&fq->q.lock); inet_frag_put(&fq->q, &nf_frags); - return ret_skb; + return ret; } EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index fb96b1018884..f7aab5ab93a5 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -55,7 +55,7 @@ static unsigned int ipv6_defrag(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - struct sk_buff *reasm; + int err; #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ @@ -63,17 +63,13 @@ static unsigned int ipv6_defrag(void *priv, return NF_ACCEPT; #endif - reasm = nf_ct_frag6_gather(state->net, skb, - nf_ct6_defrag_user(state->hook, skb)); + err = nf_ct_frag6_gather(state->net, skb, + nf_ct6_defrag_user(state->hook, skb)); /* queued */ - if (reasm == NULL) + if (err == -EINPROGRESS) return NF_STOLEN; - NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm, - state->in, state->out, - state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); - - return NF_STOLEN; + return NF_ACCEPT; } static struct nf_hook_ops ipv6_defrag_ops[] = { diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index cac2169f2909..0c68c8e46d0b 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -300,10 +300,10 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, u16 zone, struct sk_buff *skb) { struct ovs_skb_cb ovs_cb = *OVS_CB(skb); + int err; if (key->eth.type == htons(ETH_P_IP)) { enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; - int err; memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); err = ip_defrag(net, skb, user); @@ -314,14 +314,13 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key, #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) } else if (key->eth.type == htons(ETH_P_IPV6)) { enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; - struct sk_buff *reasm; memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); - reasm = nf_ct_frag6_gather(net, skb, user); - if (!reasm) - return -EINPROGRESS; + err = nf_ct_frag6_gather(net, skb, user); + if (err) + return err; - key->ip.proto = ipv6_hdr(reasm)->nexthdr; + key->ip.proto = ipv6_hdr(skb)->nexthdr; ovs_cb.mru = IP6CB(skb)->frag_max_size; #endif } else { -- cgit v1.2.3-71-gd317 From a9ecfbe7fcf2f600718c3f995cbb46043f7a7a5d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 24 Nov 2015 00:03:28 +0100 Subject: netfilter: nf_tables: remove unused struct members Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4bd7508bedc9..101d7d7ec243 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -19,8 +19,6 @@ struct nft_pktinfo { const struct net_device *out; u8 pf; u8 hook; - u8 nhoff; - u8 thoff; u8 tprot; /* for x_tables compatibility */ struct xt_action_param xt; -- cgit v1.2.3-71-gd317 From 7ec3f7b47b8d9ad7ba425726f2c58f9ddce040df Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 24 Nov 2015 10:00:22 +0000 Subject: netfilter: nft_payload: add packet mangling support Add support for mangling packet payload. Checksum for the specified base header is updated automatically if requested, however no updates for any kind of pseudo headers are supported, meaning no stateless NAT is supported. For checksum updates different checksumming methods can be specified. The currently supported methods are NONE for no checksum updates, and INET for internet type checksums. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 9 +++ include/uapi/linux/netfilter/nf_tables.h | 17 ++++ net/netfilter/nft_payload.c | 135 +++++++++++++++++++++++++++++-- 3 files changed, 155 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index c6f400cfaac8..4ff5424909aa 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -47,6 +47,15 @@ struct nft_payload { enum nft_registers dreg:8; }; +struct nft_payload_set { + enum nft_payload_bases base:8; + u8 offset; + u8 len; + enum nft_registers sreg:8; + u8 csum_type; + u8 csum_offset; +}; + extern const struct nft_expr_ops nft_payload_fast_ops; int nft_payload_module_init(void); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index d8c8a7c9d88a..5f3ececf84b3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -597,6 +597,17 @@ enum nft_payload_bases { NFT_PAYLOAD_TRANSPORT_HEADER, }; +/** + * enum nft_payload_csum_types - nf_tables payload expression checksum types + * + * @NFT_PAYLOAD_CSUM_NONE: no checksumming + * @NFT_PAYLOAD_CSUM_INET: internet checksum (RFC 791) + */ +enum nft_payload_csum_types { + NFT_PAYLOAD_CSUM_NONE, + NFT_PAYLOAD_CSUM_INET, +}; + /** * enum nft_payload_attributes - nf_tables payload expression netlink attributes * @@ -604,6 +615,9 @@ enum nft_payload_bases { * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases) * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32) * @NFTA_PAYLOAD_LEN: payload length (NLA_U32) + * @NFTA_PAYLOAD_SREG: source register to load data from (NLA_U32: nft_registers) + * @NFTA_PAYLOAD_CSUM_TYPE: checksum type (NLA_U32) + * @NFTA_PAYLOAD_CSUM_OFFSET: checksum offset relative to base (NLA_U32) */ enum nft_payload_attributes { NFTA_PAYLOAD_UNSPEC, @@ -611,6 +625,9 @@ enum nft_payload_attributes { NFTA_PAYLOAD_BASE, NFTA_PAYLOAD_OFFSET, NFTA_PAYLOAD_LEN, + NFTA_PAYLOAD_SREG, + NFTA_PAYLOAD_CSUM_TYPE, + NFTA_PAYLOAD_CSUM_OFFSET, __NFTA_PAYLOAD_MAX }; #define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 09b4b07eb676..12cd4bf16d17 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -107,10 +107,13 @@ err: } static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { - [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 }, }; static int nft_payload_init(const struct nft_ctx *ctx, @@ -160,6 +163,118 @@ const struct nft_expr_ops nft_payload_fast_ops = { .dump = nft_payload_dump, }; +static void nft_payload_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_payload_set *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + const u32 *src = ®s->data[priv->sreg]; + int offset, csum_offset; + __wsum fsum, tsum; + __sum16 sum; + + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + if (!skb_mac_header_was_set(skb)) + goto err; + offset = skb_mac_header(skb) - skb->data; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + offset = skb_network_offset(skb); + break; + case NFT_PAYLOAD_TRANSPORT_HEADER: + offset = pkt->xt.thoff; + break; + default: + BUG(); + } + + csum_offset = offset + priv->csum_offset; + offset += priv->offset; + + if (priv->csum_type == NFT_PAYLOAD_CSUM_INET && + (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER || + skb->ip_summed != CHECKSUM_PARTIAL)) { + if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) + goto err; + + fsum = skb_checksum(skb, offset, priv->len, 0); + tsum = csum_partial(src, priv->len, 0); + sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum), + tsum)); + if (sum == 0) + sum = CSUM_MANGLED_0; + + if (!skb_make_writable(skb, csum_offset + sizeof(sum)) || + skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) + goto err; + } + + if (!skb_make_writable(skb, max(offset + priv->len, 0)) || + skb_store_bits(skb, offset, src, priv->len) < 0) + goto err; + + return; +err: + regs->verdict.code = NFT_BREAK; +} + +static int nft_payload_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_payload_set *priv = nft_expr_priv(expr); + + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]); + + if (tb[NFTA_PAYLOAD_CSUM_TYPE]) + priv->csum_type = + ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); + if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) + priv->csum_offset = + ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET])); + + switch (priv->csum_type) { + case NFT_PAYLOAD_CSUM_NONE: + case NFT_PAYLOAD_CSUM_INET: + break; + default: + return -EOPNOTSUPP; + } + + return nft_validate_register_load(priv->sreg, priv->len); +} + +static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_payload_set *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_PAYLOAD_SREG, priv->sreg) || + nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) || + nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET, + htonl(priv->csum_offset))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nft_expr_ops nft_payload_set_ops = { + .type = &nft_payload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)), + .eval = nft_payload_set_eval, + .init = nft_payload_set_init, + .dump = nft_payload_set_dump, +}; + static const struct nft_expr_ops * nft_payload_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -167,8 +282,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, enum nft_payload_bases base; unsigned int offset, len; - if (tb[NFTA_PAYLOAD_DREG] == NULL || - tb[NFTA_PAYLOAD_BASE] == NULL || + if (tb[NFTA_PAYLOAD_BASE] == NULL || tb[NFTA_PAYLOAD_OFFSET] == NULL || tb[NFTA_PAYLOAD_LEN] == NULL) return ERR_PTR(-EINVAL); @@ -183,6 +297,15 @@ nft_payload_select_ops(const struct nft_ctx *ctx, return ERR_PTR(-EOPNOTSUPP); } + if (tb[NFTA_PAYLOAD_SREG] != NULL) { + if (tb[NFTA_PAYLOAD_DREG] != NULL) + return ERR_PTR(-EINVAL); + return &nft_payload_set_ops; + } + + if (tb[NFTA_PAYLOAD_DREG] == NULL) + return ERR_PTR(-EINVAL); + offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); -- cgit v1.2.3-71-gd317 From 1ce0bf50ae2233c7115a18c0c623662d177b434c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 26 Nov 2015 13:55:39 +0800 Subject: net: Generalise wq_has_sleeper helper The memory barrier in the helper wq_has_sleeper is needed by just about every user of waitqueue_active. This patch generalises it by making it take a wait_queue_head_t directly. The existing helper is renamed to skwq_has_sleeper. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/algif_aead.c | 4 ++-- crypto/algif_skcipher.c | 4 ++-- include/linux/wait.h | 21 +++++++++++++++++++++ include/net/sock.h | 15 +++++---------- net/atm/common.c | 4 ++-- net/core/sock.c | 8 ++++---- net/core/stream.c | 2 +- net/dccp/output.c | 2 +- net/iucv/af_iucv.c | 2 +- net/rxrpc/af_rxrpc.c | 2 +- net/sctp/socket.c | 2 +- net/tipc/socket.c | 4 ++-- net/unix/af_unix.c | 2 +- 13 files changed, 44 insertions(+), 28 deletions(-) (limited to 'include/net') diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 0aa6fdfb448a..fb99f30849d2 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -106,7 +106,7 @@ static void aead_wmem_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); @@ -157,7 +157,7 @@ static void aead_data_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLRDNORM | POLLRDBAND); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index af31a0ee4057..0e6702e41472 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -238,7 +238,7 @@ static void skcipher_wmem_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); @@ -288,7 +288,7 @@ static void skcipher_data_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLRDNORM | POLLRDBAND); diff --git a/include/linux/wait.h b/include/linux/wait.h index 1e1bf9f963a9..6aa09a875fbd 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -107,6 +107,27 @@ static inline int waitqueue_active(wait_queue_head_t *q) return !list_empty(&q->task_list); } +/** + * wq_has_sleeper - check if there are any waiting processes + * @wq: wait queue head + * + * Returns true if wq has waiting processes + * + * Please refer to the comment for waitqueue_active. + */ +static inline bool wq_has_sleeper(wait_queue_head_t *wq) +{ + /* + * We need to be sure we are in sync with the + * add_wait_queue modifications to the wait queue. + * + * This memory barrier should be paired with one on the + * waiting side. + */ + smp_mb(); + return waitqueue_active(wq); +} + extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait); extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); diff --git a/include/net/sock.h b/include/net/sock.h index 7f89e4ba18d1..62d35afcb3ac 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -1879,12 +1880,12 @@ static inline bool sk_has_allocations(const struct sock *sk) } /** - * wq_has_sleeper - check if there are any waiting processes + * skwq_has_sleeper - check if there are any waiting processes * @wq: struct socket_wq * * Returns true if socket_wq has waiting processes * - * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory + * The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory * barrier call. They were added due to the race found within the tcp code. * * Consider following tcp code paths: @@ -1910,15 +1911,9 @@ static inline bool sk_has_allocations(const struct sock *sk) * data on the socket. * */ -static inline bool wq_has_sleeper(struct socket_wq *wq) +static inline bool skwq_has_sleeper(struct socket_wq *wq) { - /* We need to be sure we are in sync with the - * add_wait_queue modifications to the wait queue. - * - * This memory barrier is paired in the sock_poll_wait. - */ - smp_mb(); - return wq && waitqueue_active(&wq->wait); + return wq && wq_has_sleeper(&wq->wait); } /** diff --git a/net/atm/common.c b/net/atm/common.c index 49a872db7e42..6dc12305799e 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -96,7 +96,7 @@ static void vcc_def_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up(&wq->wait); rcu_read_unlock(); } @@ -117,7 +117,7 @@ static void vcc_write_space(struct sock *sk) if (vcc_writable(sk)) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); diff --git a/net/core/sock.c b/net/core/sock.c index 1e4dd54bfb5a..2769bd3a4d7c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2283,7 +2283,7 @@ static void sock_def_wakeup(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_all(&wq->wait); rcu_read_unlock(); } @@ -2294,7 +2294,7 @@ static void sock_def_error_report(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_poll(&wq->wait, POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); rcu_read_unlock(); @@ -2306,7 +2306,7 @@ static void sock_def_readable(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); @@ -2324,7 +2324,7 @@ static void sock_def_write_space(struct sock *sk) */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); diff --git a/net/core/stream.c b/net/core/stream.c index d70f77a0c889..8ff9d63b4265 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -35,7 +35,7 @@ void sk_stream_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) diff --git a/net/dccp/output.c b/net/dccp/output.c index 4ce912e691d0..b66c84db0766 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -201,7 +201,7 @@ void dccp_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index fcb2752419c6..4f0aa91470c6 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -303,7 +303,7 @@ static void iucv_sock_wake_msglim(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_all(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); rcu_read_unlock(); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 1f8a144a5dc2..7e2d1057d8bc 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -67,7 +67,7 @@ static void rxrpc_write_space(struct sock *sk) if (rxrpc_writable(sk)) { struct socket_wq *wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible(&wq->wait); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 897c01c029ca..ec10b66354b8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6978,7 +6978,7 @@ void sctp_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 552dbaba9cf3..525acf6dd1c6 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1492,7 +1492,7 @@ static void tipc_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); rcu_read_unlock(); @@ -1509,7 +1509,7 @@ static void tipc_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLRDNORM | POLLRDBAND); rcu_read_unlock(); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 955ec152cb71..efb706e1d1c0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -339,7 +339,7 @@ static void unix_write_space(struct sock *sk) rcu_read_lock(); if (unix_writable(sk)) { wq = rcu_dereference(sk->sk_wq); - if (wq_has_sleeper(wq)) + if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | POLLWRNORM | POLLWRBAND); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); -- cgit v1.2.3-71-gd317 From 7450aaf61f0ae2ee6cc6491138d11df2c25e7609 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Nov 2015 08:57:28 -0800 Subject: tcp: suppress too verbose messages in tcp_send_ack() If tcp_send_ack() can not allocate skb, we properly handle this and setup a timer to try later. Use __GFP_NOWARN to avoid polluting syslog in the case host is under memory pressure, so that pertinent messages are not lost under a flood of useless information. sk_gfp_atomic() can use its gfp_mask argument (all callers currently were using GFP_ATOMIC before this patch) We rename sk_gfp_atomic() to sk_gfp_mask() to clearly express this function now takes into account its second argument (gfp_mask) Note that when tcp_transmit_skb() is called with clone_it set to false, we do not attempt memory allocations, so can pass a 0 gfp_mask, which most compilers can emit faster than a non zero or constant value. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 4 ++-- net/ipv4/tcp_output.c | 14 ++++++++------ net/ipv6/tcp_ipv6.c | 6 +++--- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 62d35afcb3ac..9065f8b7e646 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -775,9 +775,9 @@ static inline int sk_memalloc_socks(void) #endif -static inline gfp_t sk_gfp_atomic(const struct sock *sk, gfp_t gfp_mask) +static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask) { - return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC); + return gfp_mask | (sk->sk_allocation & __GFP_MEMALLOC); } static inline void sk_acceptq_removed(struct sock *sk) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index cb7ca569052c..a800cee88035 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2296,7 +2296,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, return; if (tcp_write_xmit(sk, cur_mss, nonagle, 0, - sk_gfp_atomic(sk, GFP_ATOMIC))) + sk_gfp_mask(sk, GFP_ATOMIC))) tcp_check_probe_timer(sk); } @@ -3352,8 +3352,9 @@ void tcp_send_ack(struct sock *sk) * tcp_transmit_skb() will set the ownership to this * sock. */ - buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); - if (!buff) { + buff = alloc_skb(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); + if (unlikely(!buff)) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, @@ -3375,7 +3376,7 @@ void tcp_send_ack(struct sock *sk) /* Send it off, this clears delayed acks for us. */ skb_mstamp_get(&buff->skb_mstamp); - tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); + tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0); } EXPORT_SYMBOL_GPL(tcp_send_ack); @@ -3396,7 +3397,8 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) struct sk_buff *skb; /* We don't queue it, tcp_transmit_skb() sets ownership. */ - skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = alloc_skb(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN)); if (!skb) return -1; @@ -3409,7 +3411,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib) tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); skb_mstamp_get(&skb->skb_mstamp); NET_INC_STATS(sock_net(sk), mib); - return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); + return tcp_transmit_skb(sk, skb, 0, (__force gfp_t)0); } void tcp_send_window_probe(struct sock *sk) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c5429a636f1a..41bcd59a2ac7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1130,7 +1130,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, AF_INET6, key->key, key->keylen, - sk_gfp_atomic(sk, GFP_ATOMIC)); + sk_gfp_mask(sk, GFP_ATOMIC)); } #endif @@ -1146,7 +1146,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Clone pktoptions received with SYN, if we own the req */ if (ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, - sk_gfp_atomic(sk, GFP_ATOMIC)); + sk_gfp_mask(sk, GFP_ATOMIC)); consume_skb(ireq->pktopts); ireq->pktopts = NULL; if (newnp->pktoptions) @@ -1212,7 +1212,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); + opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst = sk->sk_rx_dst; -- cgit v1.2.3-71-gd317 From 5d397061ca2081d8a99e4bee5792122faa6aaf86 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:19 +0100 Subject: bonding: allow notifications for bond_set_slave_link_state Similar to state notifications. We allow caller to indicate if the notification should happen now or later, depending on if he holds rtnl mutex or not. Introduce bond_slave_link_notify function (similar to bond_slave_state_notify) which is later on called with rtnl mutex and goes over slaves and executes delayed notification. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 54 +++++++++++++++++++++++++++-------------- include/net/bonding.h | 32 +++++++++++++++++++++--- 2 files changed, 65 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e94e79ad757c..7695490061ec 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -830,7 +830,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) } new_active->delay = 0; - bond_set_slave_link_state(new_active, BOND_LINK_UP); + bond_set_slave_link_state(new_active, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_handle_link_change(new_active, BOND_LINK_UP); @@ -1580,21 +1581,26 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { if (bond->params.updelay) { bond_set_slave_link_state(new_slave, - BOND_LINK_BACK); + BOND_LINK_BACK, + BOND_SLAVE_NOTIFY_NOW); new_slave->delay = bond->params.updelay; } else { bond_set_slave_link_state(new_slave, - BOND_LINK_UP); + BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); } } else { - bond_set_slave_link_state(new_slave, BOND_LINK_DOWN); + bond_set_slave_link_state(new_slave, BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_NOW); } } else if (bond->params.arp_interval) { bond_set_slave_link_state(new_slave, (netif_carrier_ok(slave_dev) ? - BOND_LINK_UP : BOND_LINK_DOWN)); + BOND_LINK_UP : BOND_LINK_DOWN), + BOND_SLAVE_NOTIFY_NOW); } else { - bond_set_slave_link_state(new_slave, BOND_LINK_UP); + bond_set_slave_link_state(new_slave, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); } if (new_slave->link != BOND_LINK_DOWN) @@ -2013,7 +2019,8 @@ static int bond_miimon_inspect(struct bonding *bond) if (link_state) continue; - bond_set_slave_link_state(slave, BOND_LINK_FAIL); + bond_set_slave_link_state(slave, BOND_LINK_FAIL, + BOND_SLAVE_NOTIFY_LATER); slave->delay = bond->params.downdelay; if (slave->delay) { netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n", @@ -2028,7 +2035,8 @@ static int bond_miimon_inspect(struct bonding *bond) case BOND_LINK_FAIL: if (link_state) { /* recovered before downdelay expired */ - bond_set_slave_link_state(slave, BOND_LINK_UP); + bond_set_slave_link_state(slave, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_LATER); slave->last_link_up = jiffies; netdev_info(bond->dev, "link status up again after %d ms for interface %s\n", (bond->params.downdelay - slave->delay) * @@ -2050,7 +2058,8 @@ static int bond_miimon_inspect(struct bonding *bond) if (!link_state) continue; - bond_set_slave_link_state(slave, BOND_LINK_BACK); + bond_set_slave_link_state(slave, BOND_LINK_BACK, + BOND_SLAVE_NOTIFY_LATER); slave->delay = bond->params.updelay; if (slave->delay) { @@ -2064,7 +2073,8 @@ static int bond_miimon_inspect(struct bonding *bond) case BOND_LINK_BACK: if (!link_state) { bond_set_slave_link_state(slave, - BOND_LINK_DOWN); + BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_LATER); netdev_info(bond->dev, "link status down again after %d ms for interface %s\n", (bond->params.updelay - slave->delay) * bond->params.miimon, @@ -2102,7 +2112,8 @@ static void bond_miimon_commit(struct bonding *bond) continue; case BOND_LINK_UP: - bond_set_slave_link_state(slave, BOND_LINK_UP); + bond_set_slave_link_state(slave, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); slave->last_link_up = jiffies; primary = rtnl_dereference(bond->primary_slave); @@ -2142,7 +2153,8 @@ static void bond_miimon_commit(struct bonding *bond) if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; - bond_set_slave_link_state(slave, BOND_LINK_DOWN); + bond_set_slave_link_state(slave, BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_NOW); if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || BOND_MODE(bond) == BOND_MODE_8023AD) @@ -2725,7 +2737,8 @@ static void bond_ab_arp_commit(struct bonding *bond) struct slave *current_arp_slave; current_arp_slave = rtnl_dereference(bond->current_arp_slave); - bond_set_slave_link_state(slave, BOND_LINK_UP); + bond_set_slave_link_state(slave, BOND_LINK_UP, + BOND_SLAVE_NOTIFY_NOW); if (current_arp_slave) { bond_set_slave_inactive_flags( current_arp_slave, @@ -2748,7 +2761,8 @@ static void bond_ab_arp_commit(struct bonding *bond) if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; - bond_set_slave_link_state(slave, BOND_LINK_DOWN); + bond_set_slave_link_state(slave, BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_NOW); bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW); @@ -2827,7 +2841,8 @@ static bool bond_ab_arp_probe(struct bonding *bond) * up when it is actually down */ if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) { - bond_set_slave_link_state(slave, BOND_LINK_DOWN); + bond_set_slave_link_state(slave, BOND_LINK_DOWN, + BOND_SLAVE_NOTIFY_LATER); if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; @@ -2847,7 +2862,8 @@ static bool bond_ab_arp_probe(struct bonding *bond) if (!new_slave) goto check_state; - bond_set_slave_link_state(new_slave, BOND_LINK_BACK); + bond_set_slave_link_state(new_slave, BOND_LINK_BACK, + BOND_SLAVE_NOTIFY_LATER); bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER); bond_arp_send_all(bond, new_slave); new_slave->last_link_up = jiffies; @@ -2855,7 +2871,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) check_state: bond_for_each_slave_rcu(bond, slave, iter) { - if (slave->should_notify) { + if (slave->should_notify || slave->should_notify_link) { should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; break; } @@ -2910,8 +2926,10 @@ re_arm: if (should_notify_peers) call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); - if (should_notify_rtnl) + if (should_notify_rtnl) { bond_slave_state_notify(bond); + bond_slave_link_notify(bond); + } rtnl_unlock(); } diff --git a/include/net/bonding.h b/include/net/bonding.h index c1740a2794a3..1df437715e2f 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -165,7 +165,8 @@ struct slave { u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ inactive:1, /* indicates inactive slave */ - should_notify:1; /* indicateds whether the state changed */ + should_notify:1, /* indicates whether the state changed */ + should_notify_link:1; /* indicates whether the link changed */ u8 duplex; u32 original_mtu; u32 link_failure_count; @@ -504,10 +505,35 @@ static inline bool bond_is_slave_inactive(struct slave *slave) return slave->inactive; } -static inline void bond_set_slave_link_state(struct slave *slave, int state) +static inline void bond_set_slave_link_state(struct slave *slave, int state, + bool notify) { + if (slave->link == state) + return; + slave->link = state; - bond_queue_slave_event(slave); + if (notify) { + bond_queue_slave_event(slave); + slave->should_notify_link = 0; + } else { + if (slave->should_notify_link) + slave->should_notify_link = 0; + else + slave->should_notify_link = 1; + } +} + +static inline void bond_slave_link_notify(struct bonding *bond) +{ + struct list_head *iter; + struct slave *tmp; + + bond_for_each_slave(bond, tmp, iter) { + if (tmp->should_notify_link) { + bond_queue_slave_event(tmp); + tmp->should_notify_link = 0; + } + } } static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local) -- cgit v1.2.3-71-gd317 From f7c7eb7f7af7f87e0fc150994785fd139576e43a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 3 Dec 2015 12:12:20 +0100 Subject: bonding: implement lower state change propagation Let netdev notifier listeners know about link and slave state change. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 10 ++++++++++ include/net/bonding.h | 7 +++++++ 2 files changed, 17 insertions(+) (limited to 'include/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 7695490061ec..2f1145063c60 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1317,6 +1317,16 @@ void bond_queue_slave_event(struct slave *slave) queue_delayed_work(slave->bond->wq, &nnw->work, 0); } +void bond_lower_state_changed(struct slave *slave) +{ + struct netdev_lag_lower_state_info info; + + info.link_up = slave->link == BOND_LINK_UP || + slave->link == BOND_LINK_FAIL; + info.tx_enabled = bond_is_active_slave(slave); + netdev_lower_state_changed(slave->dev, &info); +} + /* enslave device to bond device */ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) { diff --git a/include/net/bonding.h b/include/net/bonding.h index 1df437715e2f..ee6c52053aa3 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -247,6 +247,7 @@ struct bonding { ((struct slave *) rtnl_dereference(dev->rx_handler_data)) void bond_queue_slave_event(struct slave *slave); +void bond_lower_state_changed(struct slave *slave); struct bond_vlan_tag { __be16 vlan_proto; @@ -328,6 +329,7 @@ static inline void bond_set_active_slave(struct slave *slave) if (slave->backup) { slave->backup = 0; bond_queue_slave_event(slave); + bond_lower_state_changed(slave); rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -337,6 +339,7 @@ static inline void bond_set_backup_slave(struct slave *slave) if (!slave->backup) { slave->backup = 1; bond_queue_slave_event(slave); + bond_lower_state_changed(slave); rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); } } @@ -349,6 +352,7 @@ static inline void bond_set_slave_state(struct slave *slave, slave->backup = slave_state; if (notify) { + bond_lower_state_changed(slave); rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC); bond_queue_slave_event(slave); slave->should_notify = 0; @@ -380,6 +384,7 @@ static inline void bond_slave_state_notify(struct bonding *bond) bond_for_each_slave(bond, tmp, iter) { if (tmp->should_notify) { + bond_lower_state_changed(tmp); rtmsg_ifinfo(RTM_NEWLINK, tmp->dev, 0, GFP_ATOMIC); tmp->should_notify = 0; } @@ -514,6 +519,7 @@ static inline void bond_set_slave_link_state(struct slave *slave, int state, slave->link = state; if (notify) { bond_queue_slave_event(slave); + bond_lower_state_changed(slave); slave->should_notify_link = 0; } else { if (slave->should_notify_link) @@ -531,6 +537,7 @@ static inline void bond_slave_link_notify(struct bonding *bond) bond_for_each_slave(bond, tmp, iter) { if (tmp->should_notify_link) { bond_queue_slave_event(tmp); + bond_lower_state_changed(tmp); tmp->should_notify_link = 0; } } -- cgit v1.2.3-71-gd317 From 357ab2234d57f6c74386f64ded42dff8e3c0500b Mon Sep 17 00:00:00 2001 From: Asias He Date: Wed, 2 Dec 2015 14:43:59 +0800 Subject: VSOCK: Introduce vsock_find_unbound_socket and vsock_bind_dgram_generic Signed-off-by: Asias He Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- include/net/af_vsock.h | 2 ++ net/vmw_vsock/af_vsock.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) (limited to 'include/net') diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index e9eb2d6791b3..a0c8fa2ababf 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -175,8 +175,10 @@ void vsock_insert_connected(struct vsock_sock *vsk); void vsock_remove_bound(struct vsock_sock *vsk); void vsock_remove_connected(struct vsock_sock *vsk); struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); +struct sock *vsock_find_unbound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); +int vsock_bind_dgram_generic(struct vsock_sock *vsk, struct sockaddr_vm *addr); #endif /* __AF_VSOCK_H__ */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7fd1220fbfa0..77247a2b670b 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -223,6 +223,17 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) return NULL; } +static struct sock *__vsock_find_unbound_socket(struct sockaddr_vm *addr) +{ + struct vsock_sock *vsk; + + list_for_each_entry(vsk, vsock_unbound_sockets, bound_table) + if (addr->svm_port == vsk->local_addr.svm_port) + return sk_vsock(vsk); + + return NULL; +} + static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -298,6 +309,21 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) } EXPORT_SYMBOL_GPL(vsock_find_bound_socket); +struct sock *vsock_find_unbound_socket(struct sockaddr_vm *addr) +{ + struct sock *sk; + + spin_lock_bh(&vsock_table_lock); + sk = __vsock_find_unbound_socket(addr); + if (sk) + sock_hold(sk); + + spin_unlock_bh(&vsock_table_lock); + + return sk; +} +EXPORT_SYMBOL_GPL(vsock_find_unbound_socket); + struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -532,6 +558,50 @@ static int __vsock_bind_stream(struct vsock_sock *vsk, return 0; } +int vsock_bind_dgram_generic(struct vsock_sock *vsk, struct sockaddr_vm *addr) +{ + static u32 port = LAST_RESERVED_PORT + 1; + struct sockaddr_vm new_addr; + + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); + + if (addr->svm_port == VMADDR_PORT_ANY) { + bool found = false; + unsigned int i; + + for (i = 0; i < MAX_PORT_RETRIES; i++) { + if (port <= LAST_RESERVED_PORT) + port = LAST_RESERVED_PORT + 1; + + new_addr.svm_port = port++; + + if (!__vsock_find_unbound_socket(&new_addr)) { + found = true; + break; + } + } + + if (!found) + return -EADDRNOTAVAIL; + } else { + /* If port is in reserved range, ensure caller + * has necessary privileges. + */ + if (addr->svm_port <= LAST_RESERVED_PORT && + !capable(CAP_NET_BIND_SERVICE)) { + return -EACCES; + } + + if (__vsock_find_unbound_socket(&new_addr)) + return -EADDRINUSE; + } + + vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); + + return 0; +} +EXPORT_SYMBOL_GPL(vsock_bind_dgram_generic); + static int __vsock_bind_dgram(struct vsock_sock *vsk, struct sockaddr_vm *addr) { -- cgit v1.2.3-71-gd317 From 3110489117581a980537b6d999a3724214ba772c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Oct 2015 17:35:19 +0200 Subject: mac80211: allow driver to prevent two stations w/ same address Some devices or drivers cannot deal with having the same station address for different virtual interfaces, say as a client to two virtual AP interfaces. Rather than requiring each driver with a limitation like that to enforce it, add a hardware flag for it. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ net/mac80211/debugfs.c | 1 + net/mac80211/sta_info.c | 18 ++++++++++++++++-- 3 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 760bc4d5a2cf..8628118214cc 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1901,6 +1901,11 @@ struct ieee80211_txq { * @IEEE80211_HW_BEACON_TX_STATUS: The device/driver provides TX status * for sent beacons. * + * @IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR: Hardware (or driver) requires that each + * station has a unique address, i.e. each station entry can be identified + * by just its MAC address; this prevents, for example, the same station + * from connecting to two virtual AP interfaces at the same time. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -1936,6 +1941,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_TDLS_WIDER_BW, IEEE80211_HW_SUPPORTS_AMSDU_IN_AMPDU, IEEE80211_HW_BEACON_TX_STATUS, + IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 4d2aaebd4f97..abbdff03ce92 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -125,6 +125,7 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = { FLAG(TDLS_WIDER_BW), FLAG(SUPPORTS_AMSDU_IN_AMPDU), FLAG(BEACON_TX_STATUS), + FLAG(NEEDS_UNIQUE_STA_ADDR), /* keep last for the build bug below */ (void *)0x1 diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f91d1873218c..8f630f51d9bd 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -435,6 +435,19 @@ static int sta_info_insert_check(struct sta_info *sta) is_multicast_ether_addr(sta->sta.addr))) return -EINVAL; + /* Strictly speaking this isn't necessary as we hold the mutex, but + * the rhashtable code can't really deal with that distinction. We + * do require the mutex for correctness though. + */ + rcu_read_lock(); + lockdep_assert_held(&sdata->local->sta_mtx); + if (ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR) && + ieee80211_find_sta_by_ifaddr(&sdata->local->hw, sta->addr, NULL)) { + rcu_read_unlock(); + return -ENOTUNIQ; + } + rcu_read_unlock(); + return 0; } @@ -554,14 +567,15 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) might_sleep(); + mutex_lock(&local->sta_mtx); + err = sta_info_insert_check(sta); if (err) { + mutex_unlock(&local->sta_mtx); rcu_read_lock(); goto out_free; } - mutex_lock(&local->sta_mtx); - err = sta_info_insert_finish(sta); if (err) goto out_free; -- cgit v1.2.3-71-gd317 From 0483eeac59876ac37d4edbabd48727a468416d5b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 23 Oct 2015 09:50:03 +0200 Subject: cfg80211: replace ieee80211_ie_split() with an inline The function is a very simple wrapper around another one, just adds a few default parameters, so replace it with a static inline instead of using EXPORT_SYMBOL, reducing the module size slightly. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 +++++-- net/wireless/util.c | 7 ------- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2c7bdb81d30c..e568872203a5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5173,8 +5173,11 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, * buffer starts, which may be @ielen if the entire (remainder) * of the buffer should be used. */ -size_t ieee80211_ie_split(const u8 *ies, size_t ielen, - const u8 *ids, int n_ids, size_t offset); +static inline size_t ieee80211_ie_split(const u8 *ies, size_t ielen, + const u8 *ids, int n_ids, size_t offset) +{ + return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset); +} /** * cfg80211_report_wowlan_wakeup - report wakeup from WoWLAN diff --git a/net/wireless/util.c b/net/wireless/util.c index baf7218cec15..010a3c75a677 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1325,13 +1325,6 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, } EXPORT_SYMBOL(ieee80211_ie_split_ric); -size_t ieee80211_ie_split(const u8 *ies, size_t ielen, - const u8 *ids, int n_ids, size_t offset) -{ - return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset); -} -EXPORT_SYMBOL(ieee80211_ie_split); - bool ieee80211_operating_class_to_band(u8 operating_class, enum ieee80211_band *band) { -- cgit v1.2.3-71-gd317 From 0ead2510f8cec11ce96308d79a1b4ee272fb5238 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 17 Nov 2015 10:24:36 +0200 Subject: mac80211: allow the driver to send EOSP when needed This can happen when the driver needs to send less frames than expected and then needs to close the SP. Mac80211 still needs to set the more_data properly based on its buffer state (ps_tx_buffer and buffered frames on other TIDs). To that end, refactor the code that delivers frames upon uAPSD trigger frames to be able to get only the more_data bit without actually delivering those frames in case the driver is just asking to set a NDP with EOSP and MORE_DATA bit properly set. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 22 ++++++++ net/mac80211/sta_info.c | 144 +++++++++++++++++++++++++++++++++--------------- net/mac80211/trace.h | 25 +++++++++ 3 files changed, 148 insertions(+), 43 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8628118214cc..18ac733afc91 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4868,6 +4868,28 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, */ void ieee80211_sta_eosp(struct ieee80211_sta *pubsta); +/** + * ieee80211_send_eosp_nullfunc - ask mac80211 to send NDP with EOSP + * @pubsta: the station + * @tid: the tid of the NDP + * + * Sometimes the device understands that it needs to close + * the Service Period unexpectedly. This can happen when + * sending frames that are filling holes in the BA window. + * In this case, the device can ask mac80211 to send a + * Nullfunc frame with EOSP set. When that happens, the + * driver must have called ieee80211_sta_set_buffered() to + * let mac80211 know that there are no buffered frames any + * more, otherwise mac80211 will get the more_data bit wrong. + * The low level driver must have made sure that the frame + * will be sent despite the station being in power-save. + * Mac80211 won't call allow_buffered_frames(). + * Note that calling this function, doesn't exempt the driver + * from closing the EOSP properly, it will still have to call + * ieee80211_sta_eosp when the NDP is sent. + */ +void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid); + /** * ieee80211_iter_keys - iterate keys programmed into the device * @hw: pointer obtained from ieee80211_alloc_hw() diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 8f630f51d9bd..723fa30aafc5 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2,6 +2,7 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2006-2007 Jiri Benc * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright (C) 2015 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1244,11 +1245,11 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) ieee80211_check_fast_xmit(sta); } -static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, int tid, +static void ieee80211_send_null_response(struct sta_info *sta, int tid, enum ieee80211_frame_release_type reason, - bool call_driver) + bool call_driver, bool more_data) { + struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_qos_hdr *nullfunc; struct sk_buff *skb; @@ -1288,9 +1289,13 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, if (qos) { nullfunc->qos_ctrl = cpu_to_le16(tid); - if (reason == IEEE80211_FRAME_RELEASE_UAPSD) + if (reason == IEEE80211_FRAME_RELEASE_UAPSD) { nullfunc->qos_ctrl |= cpu_to_le16(IEEE80211_QOS_CTL_EOSP); + if (more_data) + nullfunc->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + } } info = IEEE80211_SKB_CB(skb); @@ -1337,22 +1342,48 @@ static int find_highest_prio_tid(unsigned long tids) return fls(tids) - 1; } +/* Indicates if the MORE_DATA bit should be set in the last + * frame obtained by ieee80211_sta_ps_get_frames. + * Note that driver_release_tids is relevant only if + * reason = IEEE80211_FRAME_RELEASE_PSPOLL + */ +static bool +ieee80211_sta_ps_more_data(struct sta_info *sta, u8 ignored_acs, + enum ieee80211_frame_release_type reason, + unsigned long driver_release_tids) +{ + int ac; + + /* If the driver has data on more than one TID then + * certainly there's more data if we release just a + * single frame now (from a single TID). This will + * only happen for PS-Poll. + */ + if (reason == IEEE80211_FRAME_RELEASE_PSPOLL && + hweight16(driver_release_tids) > 1) + return true; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + if (ignored_acs & BIT(ac)) + continue; + + if (!skb_queue_empty(&sta->tx_filtered[ac]) || + !skb_queue_empty(&sta->ps_tx_buf[ac])) + return true; + } + + return false; +} + static void -ieee80211_sta_ps_deliver_response(struct sta_info *sta, - int n_frames, u8 ignored_acs, - enum ieee80211_frame_release_type reason) +ieee80211_sta_ps_get_frames(struct sta_info *sta, int n_frames, u8 ignored_acs, + enum ieee80211_frame_release_type reason, + struct sk_buff_head *frames, + unsigned long *driver_release_tids) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - bool more_data = false; int ac; - unsigned long driver_release_tids = 0; - struct sk_buff_head frames; - - /* Service or PS-Poll period starts */ - set_sta_flag(sta, WLAN_STA_SP); - - __skb_queue_head_init(&frames); /* Get response frame(s) and more data bit for the last one. */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { @@ -1366,26 +1397,13 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, /* if we already have frames from software, then we can't also * release from hardware queues */ - if (skb_queue_empty(&frames)) { - driver_release_tids |= sta->driver_buffered_tids & tids; - driver_release_tids |= sta->txq_buffered_tids & tids; + if (skb_queue_empty(frames)) { + *driver_release_tids |= + sta->driver_buffered_tids & tids; + *driver_release_tids |= sta->txq_buffered_tids & tids; } - if (driver_release_tids) { - /* If the driver has data on more than one TID then - * certainly there's more data if we release just a - * single frame now (from a single TID). This will - * only happen for PS-Poll. - */ - if (reason == IEEE80211_FRAME_RELEASE_PSPOLL && - hweight16(driver_release_tids) > 1) { - more_data = true; - driver_release_tids = - BIT(find_highest_prio_tid( - driver_release_tids)); - break; - } - } else { + if (!*driver_release_tids) { struct sk_buff *skb; while (n_frames > 0) { @@ -1399,20 +1417,44 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, if (!skb) break; n_frames--; - __skb_queue_tail(&frames, skb); + __skb_queue_tail(frames, skb); } } - /* If we have more frames buffered on this AC, then set the - * more-data bit and abort the loop since we can't send more - * data from other ACs before the buffered frames from this. + /* If we have more frames buffered on this AC, then abort the + * loop since we can't send more data from other ACs before + * the buffered frames from this. */ if (!skb_queue_empty(&sta->tx_filtered[ac]) || - !skb_queue_empty(&sta->ps_tx_buf[ac])) { - more_data = true; + !skb_queue_empty(&sta->ps_tx_buf[ac])) break; - } } +} + +static void +ieee80211_sta_ps_deliver_response(struct sta_info *sta, + int n_frames, u8 ignored_acs, + enum ieee80211_frame_release_type reason) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + unsigned long driver_release_tids = 0; + struct sk_buff_head frames; + bool more_data; + + /* Service or PS-Poll period starts */ + set_sta_flag(sta, WLAN_STA_SP); + + __skb_queue_head_init(&frames); + + ieee80211_sta_ps_get_frames(sta, n_frames, ignored_acs, reason, + &frames, &driver_release_tids); + + more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids); + + if (reason == IEEE80211_FRAME_RELEASE_PSPOLL) + driver_release_tids = + BIT(find_highest_prio_tid(driver_release_tids)); if (skb_queue_empty(&frames) && !driver_release_tids) { int tid; @@ -1435,7 +1477,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, /* This will evaluate to 1, 3, 5 or 7. */ tid = 7 - ((ffs(~ignored_acs) - 1) << 1); - ieee80211_send_null_response(sdata, sta, tid, reason, true); + ieee80211_send_null_response(sta, tid, reason, true, false); } else if (!driver_release_tids) { struct sk_buff_head pending; struct sk_buff *skb; @@ -1535,8 +1577,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, if (need_null) ieee80211_send_null_response( - sdata, sta, find_highest_prio_tid(tids), - reason, false); + sta, find_highest_prio_tid(tids), + reason, false, false); sta_info_recalc_tim(sta); } else { @@ -1674,6 +1716,22 @@ void ieee80211_sta_eosp(struct ieee80211_sta *pubsta) } EXPORT_SYMBOL(ieee80211_sta_eosp); +void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + enum ieee80211_frame_release_type reason; + bool more_data; + + trace_api_send_eosp_nullfunc(sta->local, pubsta, tid); + + reason = IEEE80211_FRAME_RELEASE_UAPSD; + more_data = ieee80211_sta_ps_more_data(sta, ~sta->sta.uapsd_queues, + reason, 0); + + ieee80211_send_null_response(sta, tid, reason, false, more_data); +} +EXPORT_SYMBOL(ieee80211_send_eosp_nullfunc); + void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, u8 tid, bool buffered) { diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 56c6d6cfa5a1..a6b4442776a0 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2027,6 +2027,31 @@ TRACE_EVENT(api_eosp, ) ); +TRACE_EVENT(api_send_eosp_nullfunc, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, + u8 tid), + + TP_ARGS(local, sta, tid), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + __field(u8, tid) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + __entry->tid = tid; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT " tid:%d", + LOCAL_PR_ARG, STA_PR_ARG, __entry->tid + ) +); + TRACE_EVENT(api_sta_set_buffered, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta, -- cgit v1.2.3-71-gd317 From ef044763a3ca6b9e0bb65a9ce0cb38c0eca62756 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Tue, 17 Nov 2015 10:24:37 +0200 Subject: mac80211: add atomic uploaded keys iterator add ieee80211_iter_keys_rcu() to iterate over uploaded keys in atomic context (when rcu is locked) The station removal code removes the keys only after calling synchronize_net(), so it's not safe to iterate the keys at this point (and postponing the actual key deletion with call_rcu() might result in some badly-ordered ops calls). Add a flag to indicate a station is being removed, and skip the configured keys if it's set. Signed-off-by: Eliad Peller Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 24 +++++++++++++++++++++ net/mac80211/key.c | 56 +++++++++++++++++++++++++++++++++++++++++++++---- net/mac80211/sta_info.c | 1 + net/mac80211/sta_info.h | 2 ++ 4 files changed, 79 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 18ac733afc91..a68051c41ac3 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4917,6 +4917,30 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, void *data), void *iter_data); +/** + * ieee80211_iter_keys_rcu - iterate keys programmed into the device + * @hw: pointer obtained from ieee80211_alloc_hw() + * @vif: virtual interface to iterate, may be %NULL for all + * @iter: iterator function that will be called for each key + * @iter_data: custom data to pass to the iterator function + * + * This function can be used to iterate all the keys known to + * mac80211, even those that weren't previously programmed into + * the device. Note that due to locking reasons, keys of station + * in removal process will be skipped. + * + * This function requires being called in an RCU critical section, + * and thus iter must be atomic. + */ +void ieee80211_iter_keys_rcu(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + void (*iter)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key, + void *data), + void *iter_data); + /** * ieee80211_iter_chan_contexts_atomic - iterate channel contexts * @hw: pointre obtained from ieee80211_alloc_hw(). diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 44388d6a1d8e..5e5bc599da4c 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -4,6 +4,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007-2008 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright 2015 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -320,7 +321,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, return; if (new) - list_add_tail(&new->list, &sdata->key_list); + list_add_tail_rcu(&new->list, &sdata->key_list); WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx); @@ -368,7 +369,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, } if (old) - list_del(&old->list); + list_del_rcu(&old->list); } struct ieee80211_key * @@ -592,8 +593,8 @@ static void ieee80211_key_destroy(struct ieee80211_key *key, return; /* - * Synchronize so the TX path can no longer be using - * this key before we free/remove it. + * Synchronize so the TX path and rcu key iterators + * can no longer be using this key before we free/remove it. */ synchronize_net(); @@ -744,6 +745,53 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_iter_keys); +static void +_ieee80211_iter_keys_rcu(struct ieee80211_hw *hw, + struct ieee80211_sub_if_data *sdata, + void (*iter)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key, + void *data), + void *iter_data) +{ + struct ieee80211_key *key; + + list_for_each_entry_rcu(key, &sdata->key_list, list) { + /* skip keys of station in removal process */ + if (key->sta && key->sta->removed) + continue; + if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + continue; + + iter(hw, &sdata->vif, + key->sta ? &key->sta->sta : NULL, + &key->conf, iter_data); + } +} + +void ieee80211_iter_keys_rcu(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + void (*iter)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key, + void *data), + void *iter_data) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + + if (vif) { + sdata = vif_to_sdata(vif); + _ieee80211_iter_keys_rcu(hw, sdata, iter, iter_data); + } else { + list_for_each_entry_rcu(sdata, &local->interfaces, list) + _ieee80211_iter_keys_rcu(hw, sdata, iter, iter_data); + } +} +EXPORT_SYMBOL(ieee80211_iter_keys_rcu); + static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata, struct list_head *keys) { diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 723fa30aafc5..4402ad5b27d1 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -883,6 +883,7 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) } list_del_rcu(&sta->list); + sta->removed = true; drv_sta_pre_rcu_remove(local, sta->sdata, sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 2cafb21b422f..d6051629ed15 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -367,6 +367,7 @@ DECLARE_EWMA(signal, 1024, 8) * @mesh: mesh STA information * @debugfs: debug filesystem info * @dead: set to true when sta is unlinked + * @removed: set to true when sta is being removed from sta_list * @uploaded: set to true when sta is uploaded to the driver * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) @@ -412,6 +413,7 @@ struct sta_info { u16 listen_interval; bool dead; + bool removed; bool uploaded; -- cgit v1.2.3-71-gd317 From b115b972997428b9134aba377721fea6486adbd0 Mon Sep 17 00:00:00 2001 From: "Janusz.Dziedzic@tieto.com" Date: Tue, 27 Oct 2015 08:38:40 +0100 Subject: mac80211: add new IEEE80211_VIF_GET_NOA_UPDATE flag Add new VIF flag, that will allow get NOA update notification when driver will request this, even this is not pure P2P vif (eg. STA vif). Signed-off-by: Janusz Dziedzic Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ net/mac80211/mlme.c | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a68051c41ac3..7c30faff245f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1321,11 +1321,15 @@ struct ieee80211_channel_switch { * interface. This flag should be set during interface addition, * but may be set/cleared as late as authentication to an AP. It is * only valid for managed/station mode interfaces. + * @IEEE80211_VIF_GET_NOA_UPDATE: request to handle NOA attributes + * and send P2P_PS notification to the driver if NOA changed, even + * this is not pure P2P vif. */ enum ieee80211_vif_flags { IEEE80211_VIF_BEACON_FILTER = BIT(0), IEEE80211_VIF_SUPPORTS_CQM_RSSI = BIT(1), IEEE80211_VIF_SUPPORTS_UAPSD = BIT(2), + IEEE80211_VIF_GET_NOA_UPDATE = BIT(3), }; /** diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b140cc6651f4..123b26d177e8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1930,7 +1930,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.flags |= IEEE80211_STA_RESET_SIGNAL_AVE; - if (sdata->vif.p2p) { + if (sdata->vif.p2p || + sdata->vif.driver_flags & IEEE80211_VIF_GET_NOA_UPDATE) { const struct cfg80211_bss_ies *ies; rcu_read_lock(); @@ -3458,7 +3459,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } } - if (sdata->vif.p2p) { + if (sdata->vif.p2p || + sdata->vif.driver_flags & IEEE80211_VIF_GET_NOA_UPDATE) { struct ieee80211_p2p_noa_attr noa = {}; int ret; -- cgit v1.2.3-71-gd317 From 91d3ab46730379e89e1e908c6f62fbcadb3d8f08 Mon Sep 17 00:00:00 2001 From: Vidyullatha Kanchanapally Date: Fri, 30 Oct 2015 19:14:49 +0530 Subject: cfg80211: Add support for aborting an ongoing scan Implement new functionality for aborting an ongoing scan. Add NL80211_CMD_ABORT_SCAN to the nl80211 interface. After aborting the scan, driver shall provide the scan status by calling cfg80211_scan_done(). Reviewed-by: Jouni Malinen Signed-off-by: Vidyullatha Kanchanapally Signed-off-by: Sunil Dutt [change command to take wdev instead of netdev so that it can be used on p2p-device scans] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ include/uapi/linux/nl80211.h | 6 ++++++ net/wireless/nl80211.c | 26 ++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 8 ++++++++ net/wireless/trace.h | 4 ++++ 5 files changed, 47 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e568872203a5..9bcaaf7cd15a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2321,6 +2321,8 @@ struct cfg80211_qos_map { * the driver, and will be valid until passed to cfg80211_scan_done(). * For scan results, call cfg80211_inform_bss(); you can call this outside * the scan/scan_done bracket too. + * @abort_scan: Tell the driver to abort an ongoing scan. The driver shall + * indicate the status of the scan through cfg80211_scan_done(). * * @auth: Request to authenticate with the specified peer * (invoked with the wireless_dev mutex held) @@ -2593,6 +2595,7 @@ struct cfg80211_ops { int (*scan)(struct wiphy *wiphy, struct cfg80211_scan_request *request); + void (*abort_scan)(struct wiphy *wiphy, struct wireless_dev *wdev); int (*auth)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_auth_request *req); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 07099cb14778..5b7b5ebe7ca8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -820,6 +820,10 @@ * as an event to indicate changes for devices with wiphy-specific regdom * management. * + * @NL80211_CMD_ABORT_SCAN: Stop an ongoing scan. Returns -ENOENT if a scan is + * not running. The driver indicates the status of the scan through + * cfg80211_scan_done(). + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1006,6 +1010,8 @@ enum nl80211_commands { NL80211_CMD_WIPHY_REG_CHANGE, + NL80211_CMD_ABORT_SCAN, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 41e57d0c4d43..67e7b531db79 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5997,6 +5997,24 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_abort_scan(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + + if (!rdev->ops->abort_scan) + return -EOPNOTSUPP; + + if (rdev->scan_msg) + return 0; + + if (!rdev->scan_req) + return -ENOENT; + + rdev_abort_scan(rdev, wdev); + return 0; +} + static int nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, struct cfg80211_sched_scan_request *request, @@ -10944,6 +10962,14 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_ABORT_SCAN, + .doit = nl80211_abort_scan, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, { .cmd = NL80211_CMD_GET_SCAN, .policy = nl80211_policy, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index b8cc594d409d..8ae0c04f9fc7 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -427,6 +427,14 @@ static inline int rdev_scan(struct cfg80211_registered_device *rdev, return ret; } +static inline void rdev_abort_scan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + trace_rdev_abort_scan(&rdev->wiphy, wdev); + rdev->ops->abort_scan(&rdev->wiphy, wdev); + trace_rdev_return_void(&rdev->wiphy); +} + static inline int rdev_auth(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_auth_request *req) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 5b9139e53199..09b242b09bed 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2917,6 +2917,10 @@ TRACE_EVENT(rdev_set_coalesce, WIPHY_PR_ARG, __entry->n_rules) ); +DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3-71-gd317 From 4baee937b8d551c89f61542a575378e407b63415 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 7 Dec 2015 13:57:32 +0100 Subject: net: dsa: remove DSA link polling Since no more DSA driver uses the polling callback, and since the phylib handles the link detection, remove the link polling work and timer code. Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- include/net/dsa.h | 12 ------------ net/dsa/dsa.c | 43 ------------------------------------------- 2 files changed, 55 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 3f23dd9d6a69..26a0e86e611e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -116,13 +116,6 @@ struct dsa_switch_tree { s8 cpu_switch; s8 cpu_port; - /* - * Link state polling. - */ - int link_poll_needed; - struct work_struct link_poll_work; - struct timer_list link_poll_timer; - /* * Data for the individual switch chips. */ @@ -231,11 +224,6 @@ struct dsa_switch_driver { int (*phy_write)(struct dsa_switch *ds, int port, int regnum, u16 val); - /* - * Link state polling and IRQ handling. - */ - void (*poll_link)(struct dsa_switch *ds); - /* * Link state adjustment (called from libphy) */ diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index b7448c8490ac..0f41f71efac1 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -508,33 +508,6 @@ static int dsa_switch_resume(struct dsa_switch *ds) } #endif - -/* link polling *************************************************************/ -static void dsa_link_poll_work(struct work_struct *ugly) -{ - struct dsa_switch_tree *dst; - int i; - - dst = container_of(ugly, struct dsa_switch_tree, link_poll_work); - - for (i = 0; i < dst->pd->nr_chips; i++) { - struct dsa_switch *ds = dst->ds[i]; - - if (ds != NULL && ds->drv->poll_link != NULL) - ds->drv->poll_link(ds); - } - - mod_timer(&dst->link_poll_timer, round_jiffies(jiffies + HZ)); -} - -static void dsa_link_poll_timer(unsigned long _dst) -{ - struct dsa_switch_tree *dst = (void *)_dst; - - schedule_work(&dst->link_poll_work); -} - - /* platform driver init and cleanup *****************************************/ static int dev_is_class(struct device *dev, void *class) { @@ -877,8 +850,6 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, } dst->ds[i] = ds; - if (ds->drv->poll_link != NULL) - dst->link_poll_needed = 1; ++configured; } @@ -897,15 +868,6 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, wmb(); dev->dsa_ptr = (void *)dst; - if (dst->link_poll_needed) { - INIT_WORK(&dst->link_poll_work, dsa_link_poll_work); - init_timer(&dst->link_poll_timer); - dst->link_poll_timer.data = (unsigned long)dst; - dst->link_poll_timer.function = dsa_link_poll_timer; - dst->link_poll_timer.expires = round_jiffies(jiffies + HZ); - add_timer(&dst->link_poll_timer); - } - return 0; } @@ -972,11 +934,6 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; - if (dst->link_poll_needed) - del_timer_sync(&dst->link_poll_timer); - - flush_work(&dst->link_poll_work); - for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; -- cgit v1.2.3-71-gd317 From 8ac2837c89c8c0fcad557e4380aeef80580390f9 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 9 Dec 2015 10:51:12 +0800 Subject: Revert "Merge branch 'vsock-virtio'" This reverts commit 0d76d6e8b2507983a2cae4c09880798079007421 and merge commit c402293bd76fbc93e52ef8c0947ab81eea3ae019, reversing changes made to c89359a42e2a49656451569c382eed63e781153c. The virtio-vsock device specification is not finalized yet. Michael Tsirkin voiced concerned about merging this code when the hardware interface (and possibly the userspace interface) could still change. Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- drivers/vhost/Kconfig | 4 - drivers/vhost/Kconfig.vsock | 7 - drivers/vhost/Makefile | 4 - drivers/vhost/vsock.c | 630 --------------- drivers/vhost/vsock.h | 4 - include/linux/virtio_vsock.h | 209 ----- include/net/af_vsock.h | 2 - include/uapi/linux/virtio_ids.h | 1 - include/uapi/linux/virtio_vsock.h | 89 --- net/vmw_vsock/Kconfig | 18 - net/vmw_vsock/Makefile | 2 - net/vmw_vsock/af_vsock.c | 70 -- net/vmw_vsock/virtio_transport.c | 466 ----------- net/vmw_vsock/virtio_transport_common.c | 1272 ------------------------------- 14 files changed, 2778 deletions(-) delete mode 100644 drivers/vhost/Kconfig.vsock delete mode 100644 drivers/vhost/vsock.c delete mode 100644 drivers/vhost/vsock.h delete mode 100644 include/linux/virtio_vsock.h delete mode 100644 include/uapi/linux/virtio_vsock.h delete mode 100644 net/vmw_vsock/virtio_transport.c delete mode 100644 net/vmw_vsock/virtio_transport_common.c (limited to 'include/net') diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 81449bfc8d3b..533eaf04f12f 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -47,7 +47,3 @@ config VHOST_CROSS_ENDIAN_LEGACY adds some overhead, it is disabled by default. If unsure, say "N". - -if STAGING -source "drivers/vhost/Kconfig.vsock" -endif diff --git a/drivers/vhost/Kconfig.vsock b/drivers/vhost/Kconfig.vsock deleted file mode 100644 index 3491865d3eb9..000000000000 --- a/drivers/vhost/Kconfig.vsock +++ /dev/null @@ -1,7 +0,0 @@ -config VHOST_VSOCK - tristate "vhost virtio-vsock driver" - depends on VSOCKETS && EVENTFD - select VIRTIO_VSOCKETS_COMMON - default n - ---help--- - Say M here to enable the vhost-vsock for virtio-vsock guests diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile index 6b012b986b57..e0441c34db1c 100644 --- a/drivers/vhost/Makefile +++ b/drivers/vhost/Makefile @@ -4,9 +4,5 @@ vhost_net-y := net.o obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o vhost_scsi-y := scsi.o -obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o -vhost_vsock-y := vsock.o - obj-$(CONFIG_VHOST_RING) += vringh.o - obj-$(CONFIG_VHOST) += vhost.o diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c deleted file mode 100644 index 64bcb10bb901..000000000000 --- a/drivers/vhost/vsock.c +++ /dev/null @@ -1,630 +0,0 @@ -/* - * vhost transport for vsock - * - * Copyright (C) 2013-2015 Red Hat, Inc. - * Author: Asias He - * Stefan Hajnoczi - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ -#include -#include -#include -#include -#include -#include - -#include -#include "vhost.h" -#include "vsock.h" - -#define VHOST_VSOCK_DEFAULT_HOST_CID 2 - -static int vhost_transport_socket_init(struct vsock_sock *vsk, - struct vsock_sock *psk); - -enum { - VHOST_VSOCK_FEATURES = VHOST_FEATURES, -}; - -/* Used to track all the vhost_vsock instances on the system. */ -static LIST_HEAD(vhost_vsock_list); -static DEFINE_MUTEX(vhost_vsock_mutex); - -struct vhost_vsock_virtqueue { - struct vhost_virtqueue vq; -}; - -struct vhost_vsock { - /* Vhost device */ - struct vhost_dev dev; - /* Vhost vsock virtqueue*/ - struct vhost_vsock_virtqueue vqs[VSOCK_VQ_MAX]; - /* Link to global vhost_vsock_list*/ - struct list_head list; - /* Head for pkt from host to guest */ - struct list_head send_pkt_list; - /* Work item to send pkt */ - struct vhost_work send_pkt_work; - /* Wait queue for send pkt */ - wait_queue_head_t queue_wait; - /* Used for global tx buf limitation */ - u32 total_tx_buf; - /* Guest contex id this vhost_vsock instance handles */ - u32 guest_cid; -}; - -static u32 vhost_transport_get_local_cid(void) -{ - return VHOST_VSOCK_DEFAULT_HOST_CID; -} - -static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) -{ - struct vhost_vsock *vsock; - - mutex_lock(&vhost_vsock_mutex); - list_for_each_entry(vsock, &vhost_vsock_list, list) { - if (vsock->guest_cid == guest_cid) { - mutex_unlock(&vhost_vsock_mutex); - return vsock; - } - } - mutex_unlock(&vhost_vsock_mutex); - - return NULL; -} - -static void -vhost_transport_do_send_pkt(struct vhost_vsock *vsock, - struct vhost_virtqueue *vq) -{ - bool added = false; - - mutex_lock(&vq->mutex); - vhost_disable_notify(&vsock->dev, vq); - for (;;) { - struct virtio_vsock_pkt *pkt; - struct iov_iter iov_iter; - unsigned out, in; - struct sock *sk; - size_t nbytes; - size_t len; - int head; - - if (list_empty(&vsock->send_pkt_list)) { - vhost_enable_notify(&vsock->dev, vq); - break; - } - - head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), - &out, &in, NULL, NULL); - pr_debug("%s: head = %d\n", __func__, head); - if (head < 0) - break; - - if (head == vq->num) { - if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { - vhost_disable_notify(&vsock->dev, vq); - continue; - } - break; - } - - pkt = list_first_entry(&vsock->send_pkt_list, - struct virtio_vsock_pkt, list); - list_del_init(&pkt->list); - - if (out) { - virtio_transport_free_pkt(pkt); - vq_err(vq, "Expected 0 output buffers, got %u\n", out); - break; - } - - len = iov_length(&vq->iov[out], in); - iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); - - nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); - if (nbytes != sizeof(pkt->hdr)) { - virtio_transport_free_pkt(pkt); - vq_err(vq, "Faulted on copying pkt hdr\n"); - break; - } - - nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); - if (nbytes != pkt->len) { - virtio_transport_free_pkt(pkt); - vq_err(vq, "Faulted on copying pkt buf\n"); - break; - } - - vhost_add_used(vq, head, pkt->len); /* TODO should this be sizeof(pkt->hdr) + pkt->len? */ - added = true; - - virtio_transport_dec_tx_pkt(pkt); - vsock->total_tx_buf -= pkt->len; - - sk = sk_vsock(pkt->trans->vsk); - /* Release refcnt taken in vhost_transport_send_pkt */ - sock_put(sk); - - virtio_transport_free_pkt(pkt); - } - if (added) - vhost_signal(&vsock->dev, vq); - mutex_unlock(&vq->mutex); - - if (added) - wake_up(&vsock->queue_wait); -} - -static void vhost_transport_send_pkt_work(struct vhost_work *work) -{ - struct vhost_virtqueue *vq; - struct vhost_vsock *vsock; - - vsock = container_of(work, struct vhost_vsock, send_pkt_work); - vq = &vsock->vqs[VSOCK_VQ_RX].vq; - - vhost_transport_do_send_pkt(vsock, vq); -} - -static int -vhost_transport_send_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info) -{ - u32 src_cid, src_port, dst_cid, dst_port; - struct virtio_transport *trans; - struct virtio_vsock_pkt *pkt; - struct vhost_virtqueue *vq; - struct vhost_vsock *vsock; - u32 pkt_len = info->pkt_len; - DEFINE_WAIT(wait); - - src_cid = vhost_transport_get_local_cid(); - src_port = vsk->local_addr.svm_port; - if (!info->remote_cid) { - dst_cid = vsk->remote_addr.svm_cid; - dst_port = vsk->remote_addr.svm_port; - } else { - dst_cid = info->remote_cid; - dst_port = info->remote_port; - } - - /* Find the vhost_vsock according to guest context id */ - vsock = vhost_vsock_get(dst_cid); - if (!vsock) - return -ENODEV; - - trans = vsk->trans; - vq = &vsock->vqs[VSOCK_VQ_RX].vq; - - /* we can send less than pkt_len bytes */ - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; - - /* virtio_transport_get_credit might return less than pkt_len credit */ - pkt_len = virtio_transport_get_credit(trans, pkt_len); - - /* Do not send zero length OP_RW pkt*/ - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) - return pkt_len; - - /* Respect global tx buf limitation */ - mutex_lock(&vq->mutex); - while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) { - prepare_to_wait_exclusive(&vsock->queue_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&vq->mutex); - schedule(); - mutex_lock(&vq->mutex); - finish_wait(&vsock->queue_wait, &wait); - } - vsock->total_tx_buf += pkt_len; - mutex_unlock(&vq->mutex); - - pkt = virtio_transport_alloc_pkt(vsk, info, pkt_len, - src_cid, src_port, - dst_cid, dst_port); - if (!pkt) { - mutex_lock(&vq->mutex); - vsock->total_tx_buf -= pkt_len; - mutex_unlock(&vq->mutex); - virtio_transport_put_credit(trans, pkt_len); - return -ENOMEM; - } - - pr_debug("%s:info->pkt_len= %d\n", __func__, pkt_len); - /* Released in vhost_transport_do_send_pkt */ - sock_hold(&trans->vsk->sk); - virtio_transport_inc_tx_pkt(pkt); - - /* Queue it up in vhost work */ - mutex_lock(&vq->mutex); - list_add_tail(&pkt->list, &vsock->send_pkt_list); - vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); - mutex_unlock(&vq->mutex); - - return pkt_len; -} - -static struct virtio_transport_pkt_ops vhost_ops = { - .send_pkt = vhost_transport_send_pkt, -}; - -static struct virtio_vsock_pkt * -vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, - unsigned int out, unsigned int in) -{ - struct virtio_vsock_pkt *pkt; - struct iov_iter iov_iter; - size_t nbytes; - size_t len; - - if (in != 0) { - vq_err(vq, "Expected 0 input buffers, got %u\n", in); - return NULL; - } - - pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) - return NULL; - - len = iov_length(vq->iov, out); - iov_iter_init(&iov_iter, WRITE, vq->iov, out, len); - - nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); - if (nbytes != sizeof(pkt->hdr)) { - vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n", - sizeof(pkt->hdr), nbytes); - kfree(pkt); - return NULL; - } - - if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_DGRAM) - pkt->len = le32_to_cpu(pkt->hdr.len) & 0XFFFF; - else if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) - pkt->len = le32_to_cpu(pkt->hdr.len); - - /* No payload */ - if (!pkt->len) - return pkt; - - /* The pkt is too big */ - if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { - kfree(pkt); - return NULL; - } - - pkt->buf = kmalloc(pkt->len, GFP_KERNEL); - if (!pkt->buf) { - kfree(pkt); - return NULL; - } - - nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter); - if (nbytes != pkt->len) { - vq_err(vq, "Expected %u byte payload, got %zu bytes\n", - pkt->len, nbytes); - virtio_transport_free_pkt(pkt); - return NULL; - } - - return pkt; -} - -static void vhost_vsock_handle_ctl_kick(struct vhost_work *work) -{ - struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, - poll.work); - struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, - dev); - - pr_debug("%s vq=%p, vsock=%p\n", __func__, vq, vsock); -} - -static void vhost_vsock_handle_tx_kick(struct vhost_work *work) -{ - struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, - poll.work); - struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, - dev); - struct virtio_vsock_pkt *pkt; - int head; - unsigned int out, in; - bool added = false; - u32 len; - - mutex_lock(&vq->mutex); - vhost_disable_notify(&vsock->dev, vq); - for (;;) { - head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), - &out, &in, NULL, NULL); - if (head < 0) - break; - - if (head == vq->num) { - if (unlikely(vhost_enable_notify(&vsock->dev, vq))) { - vhost_disable_notify(&vsock->dev, vq); - continue; - } - break; - } - - pkt = vhost_vsock_alloc_pkt(vq, out, in); - if (!pkt) { - vq_err(vq, "Faulted on pkt\n"); - continue; - } - - len = pkt->len; - - /* Only accept correctly addressed packets */ - if (le32_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid && - le32_to_cpu(pkt->hdr.dst_cid) == vhost_transport_get_local_cid()) - virtio_transport_recv_pkt(pkt); - else - virtio_transport_free_pkt(pkt); - - vhost_add_used(vq, head, len); - added = true; - } - if (added) - vhost_signal(&vsock->dev, vq); - mutex_unlock(&vq->mutex); -} - -static void vhost_vsock_handle_rx_kick(struct vhost_work *work) -{ - struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, - poll.work); - struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock, - dev); - - vhost_transport_do_send_pkt(vsock, vq); -} - -static int vhost_vsock_dev_open(struct inode *inode, struct file *file) -{ - struct vhost_virtqueue **vqs; - struct vhost_vsock *vsock; - int ret; - - vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); - if (!vsock) - return -ENOMEM; - - pr_debug("%s:vsock=%p\n", __func__, vsock); - - vqs = kmalloc(VSOCK_VQ_MAX * sizeof(*vqs), GFP_KERNEL); - if (!vqs) { - ret = -ENOMEM; - goto out; - } - - vqs[VSOCK_VQ_CTRL] = &vsock->vqs[VSOCK_VQ_CTRL].vq; - vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX].vq; - vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX].vq; - vsock->vqs[VSOCK_VQ_CTRL].vq.handle_kick = vhost_vsock_handle_ctl_kick; - vsock->vqs[VSOCK_VQ_TX].vq.handle_kick = vhost_vsock_handle_tx_kick; - vsock->vqs[VSOCK_VQ_RX].vq.handle_kick = vhost_vsock_handle_rx_kick; - - vhost_dev_init(&vsock->dev, vqs, VSOCK_VQ_MAX); - - file->private_data = vsock; - init_waitqueue_head(&vsock->queue_wait); - INIT_LIST_HEAD(&vsock->send_pkt_list); - vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work); - - mutex_lock(&vhost_vsock_mutex); - list_add_tail(&vsock->list, &vhost_vsock_list); - mutex_unlock(&vhost_vsock_mutex); - return 0; - -out: - kfree(vsock); - return ret; -} - -static void vhost_vsock_flush(struct vhost_vsock *vsock) -{ - int i; - - for (i = 0; i < VSOCK_VQ_MAX; i++) - vhost_poll_flush(&vsock->vqs[i].vq.poll); - vhost_work_flush(&vsock->dev, &vsock->send_pkt_work); -} - -static int vhost_vsock_dev_release(struct inode *inode, struct file *file) -{ - struct vhost_vsock *vsock = file->private_data; - - mutex_lock(&vhost_vsock_mutex); - list_del(&vsock->list); - mutex_unlock(&vhost_vsock_mutex); - - vhost_dev_stop(&vsock->dev); - vhost_vsock_flush(vsock); - vhost_dev_cleanup(&vsock->dev, false); - kfree(vsock->dev.vqs); - kfree(vsock); - return 0; -} - -static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u32 guest_cid) -{ - struct vhost_vsock *other; - - /* Refuse reserved CIDs */ - if (guest_cid <= VMADDR_CID_HOST) { - return -EINVAL; - } - - /* Refuse if CID is already in use */ - other = vhost_vsock_get(guest_cid); - if (other && other != vsock) { - return -EADDRINUSE; - } - - mutex_lock(&vhost_vsock_mutex); - vsock->guest_cid = guest_cid; - pr_debug("%s:guest_cid=%d\n", __func__, guest_cid); - mutex_unlock(&vhost_vsock_mutex); - - return 0; -} - -static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) -{ - struct vhost_virtqueue *vq; - int i; - - if (features & ~VHOST_VSOCK_FEATURES) - return -EOPNOTSUPP; - - mutex_lock(&vsock->dev.mutex); - if ((features & (1 << VHOST_F_LOG_ALL)) && - !vhost_log_access_ok(&vsock->dev)) { - mutex_unlock(&vsock->dev.mutex); - return -EFAULT; - } - - for (i = 0; i < VSOCK_VQ_MAX; i++) { - vq = &vsock->vqs[i].vq; - mutex_lock(&vq->mutex); - vq->acked_features = features; - mutex_unlock(&vq->mutex); - } - mutex_unlock(&vsock->dev.mutex); - return 0; -} - -static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, - unsigned long arg) -{ - struct vhost_vsock *vsock = f->private_data; - void __user *argp = (void __user *)arg; - u64 __user *featurep = argp; - u32 __user *cidp = argp; - u32 guest_cid; - u64 features; - int r; - - switch (ioctl) { - case VHOST_VSOCK_SET_GUEST_CID: - if (get_user(guest_cid, cidp)) - return -EFAULT; - return vhost_vsock_set_cid(vsock, guest_cid); - case VHOST_GET_FEATURES: - features = VHOST_VSOCK_FEATURES; - if (copy_to_user(featurep, &features, sizeof(features))) - return -EFAULT; - return 0; - case VHOST_SET_FEATURES: - if (copy_from_user(&features, featurep, sizeof(features))) - return -EFAULT; - return vhost_vsock_set_features(vsock, features); - default: - mutex_lock(&vsock->dev.mutex); - r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); - if (r == -ENOIOCTLCMD) - r = vhost_vring_ioctl(&vsock->dev, ioctl, argp); - else - vhost_vsock_flush(vsock); - mutex_unlock(&vsock->dev.mutex); - return r; - } -} - -static const struct file_operations vhost_vsock_fops = { - .owner = THIS_MODULE, - .open = vhost_vsock_dev_open, - .release = vhost_vsock_dev_release, - .llseek = noop_llseek, - .unlocked_ioctl = vhost_vsock_dev_ioctl, -}; - -static struct miscdevice vhost_vsock_misc = { - .minor = MISC_DYNAMIC_MINOR, - .name = "vhost-vsock", - .fops = &vhost_vsock_fops, -}; - -static int -vhost_transport_socket_init(struct vsock_sock *vsk, struct vsock_sock *psk) -{ - struct virtio_transport *trans; - int ret; - - ret = virtio_transport_do_socket_init(vsk, psk); - if (ret) - return ret; - - trans = vsk->trans; - trans->ops = &vhost_ops; - - return ret; -} - -static struct vsock_transport vhost_transport = { - .get_local_cid = vhost_transport_get_local_cid, - - .init = vhost_transport_socket_init, - .destruct = virtio_transport_destruct, - .release = virtio_transport_release, - .connect = virtio_transport_connect, - .shutdown = virtio_transport_shutdown, - - .dgram_enqueue = virtio_transport_dgram_enqueue, - .dgram_dequeue = virtio_transport_dgram_dequeue, - .dgram_bind = virtio_transport_dgram_bind, - .dgram_allow = virtio_transport_dgram_allow, - - .stream_enqueue = virtio_transport_stream_enqueue, - .stream_dequeue = virtio_transport_stream_dequeue, - .stream_has_data = virtio_transport_stream_has_data, - .stream_has_space = virtio_transport_stream_has_space, - .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, - .stream_is_active = virtio_transport_stream_is_active, - .stream_allow = virtio_transport_stream_allow, - - .notify_poll_in = virtio_transport_notify_poll_in, - .notify_poll_out = virtio_transport_notify_poll_out, - .notify_recv_init = virtio_transport_notify_recv_init, - .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, - .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, - .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, - .notify_send_init = virtio_transport_notify_send_init, - .notify_send_pre_block = virtio_transport_notify_send_pre_block, - .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, - .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, - - .set_buffer_size = virtio_transport_set_buffer_size, - .set_min_buffer_size = virtio_transport_set_min_buffer_size, - .set_max_buffer_size = virtio_transport_set_max_buffer_size, - .get_buffer_size = virtio_transport_get_buffer_size, - .get_min_buffer_size = virtio_transport_get_min_buffer_size, - .get_max_buffer_size = virtio_transport_get_max_buffer_size, -}; - -static int __init vhost_vsock_init(void) -{ - int ret; - - ret = vsock_core_init(&vhost_transport); - if (ret < 0) - return ret; - return misc_register(&vhost_vsock_misc); -}; - -static void __exit vhost_vsock_exit(void) -{ - misc_deregister(&vhost_vsock_misc); - vsock_core_exit(); -}; - -module_init(vhost_vsock_init); -module_exit(vhost_vsock_exit); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Asias He"); -MODULE_DESCRIPTION("vhost transport for vsock "); diff --git a/drivers/vhost/vsock.h b/drivers/vhost/vsock.h deleted file mode 100644 index 0ddb107b86ca..000000000000 --- a/drivers/vhost/vsock.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef VHOST_VSOCK_H -#define VHOST_VSOCK_H -#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u32) -#endif diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h deleted file mode 100644 index a5f3ecc038f7..000000000000 --- a/include/linux/virtio_vsock.h +++ /dev/null @@ -1,209 +0,0 @@ -/* - * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers: - * - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of IBM nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Copyright (C) Red Hat, Inc., 2013-2015 - * Copyright (C) Asias He , 2013 - * Copyright (C) Stefan Hajnoczi , 2015 - */ - -#ifndef _LINUX_VIRTIO_VSOCK_H -#define _LINUX_VIRTIO_VSOCK_H - -#include -#include -#include - -#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128 -#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256) -#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256) -#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) -#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL -#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) -#define VIRTIO_VSOCK_MAX_TX_BUF_SIZE (1024 * 1024 * 16) -#define VIRTIO_VSOCK_MAX_DGRAM_SIZE (1024 * 64) - -struct vsock_transport_recv_notify_data; -struct vsock_transport_send_notify_data; -struct sockaddr_vm; -struct vsock_sock; - -enum { - VSOCK_VQ_CTRL = 0, - VSOCK_VQ_RX = 1, /* for host to guest data */ - VSOCK_VQ_TX = 2, /* for guest to host data */ - VSOCK_VQ_MAX = 3, -}; - -/* virtio transport socket state */ -struct virtio_transport { - struct virtio_transport_pkt_ops *ops; - struct vsock_sock *vsk; - - u32 buf_size; - u32 buf_size_min; - u32 buf_size_max; - - struct mutex tx_lock; - struct mutex rx_lock; - - struct list_head rx_queue; - u32 rx_bytes; - - /* Protected by trans->tx_lock */ - u32 tx_cnt; - u32 buf_alloc; - u32 peer_fwd_cnt; - u32 peer_buf_alloc; - /* Protected by trans->rx_lock */ - u32 fwd_cnt; - - /* Protected by sk_lock */ - u16 dgram_id; - struct list_head incomplete_dgrams; /* dgram fragments */ -}; - -struct virtio_vsock_pkt { - struct virtio_vsock_hdr hdr; - struct virtio_transport *trans; - struct work_struct work; - struct list_head list; - void *buf; - u32 len; - u32 off; -}; - -struct virtio_vsock_pkt_info { - u32 remote_cid, remote_port; - struct msghdr *msg; - u32 pkt_len; - u16 type; - u16 op; - u32 flags; - u16 dgram_id; - u16 dgram_len; -}; - -struct virtio_transport_pkt_ops { - int (*send_pkt)(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info); -}; - -void virtio_vsock_dumppkt(const char *func, - const struct virtio_vsock_pkt *pkt); - -struct sock * -virtio_transport_get_pending(struct sock *listener, - struct virtio_vsock_pkt *pkt); -struct virtio_vsock_pkt * -virtio_transport_alloc_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info, - size_t len, - u32 src_cid, - u32 src_port, - u32 dst_cid, - u32 dst_port); -ssize_t -virtio_transport_stream_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, - int type); -int -virtio_transport_dgram_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, int flags); - -s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); -s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); - -int virtio_transport_do_socket_init(struct vsock_sock *vsk, - struct vsock_sock *psk); -u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk); -u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk); -u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk); -void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val); -void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val); -void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val); -int -virtio_transport_notify_poll_in(struct vsock_sock *vsk, - size_t target, - bool *data_ready_now); -int -virtio_transport_notify_poll_out(struct vsock_sock *vsk, - size_t target, - bool *space_available_now); - -int virtio_transport_notify_recv_init(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data); -int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data); -int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data); -int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, - size_t target, ssize_t copied, bool data_read, - struct vsock_transport_recv_notify_data *data); -int virtio_transport_notify_send_init(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data); -int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data); -int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data); -int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, - ssize_t written, struct vsock_transport_send_notify_data *data); - -u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk); -bool virtio_transport_stream_is_active(struct vsock_sock *vsk); -bool virtio_transport_stream_allow(u32 cid, u32 port); -int virtio_transport_dgram_bind(struct vsock_sock *vsk, - struct sockaddr_vm *addr); -bool virtio_transport_dgram_allow(u32 cid, u32 port); - -int virtio_transport_connect(struct vsock_sock *vsk); - -int virtio_transport_shutdown(struct vsock_sock *vsk, int mode); - -void virtio_transport_release(struct vsock_sock *vsk); - -ssize_t -virtio_transport_stream_enqueue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len); -int -virtio_transport_dgram_enqueue(struct vsock_sock *vsk, - struct sockaddr_vm *remote_addr, - struct msghdr *msg, - size_t len); - -void virtio_transport_destruct(struct vsock_sock *vsk); - -void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt); -void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); -void virtio_transport_inc_tx_pkt(struct virtio_vsock_pkt *pkt); -void virtio_transport_dec_tx_pkt(struct virtio_vsock_pkt *pkt); -u32 virtio_transport_get_credit(struct virtio_transport *trans, u32 wanted); -void virtio_transport_put_credit(struct virtio_transport *trans, u32 credit); -#endif /* _LINUX_VIRTIO_VSOCK_H */ diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index a0c8fa2ababf..e9eb2d6791b3 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -175,10 +175,8 @@ void vsock_insert_connected(struct vsock_sock *vsk); void vsock_remove_bound(struct vsock_sock *vsk); void vsock_remove_connected(struct vsock_sock *vsk); struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); -struct sock *vsock_find_unbound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst); void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); -int vsock_bind_dgram_generic(struct vsock_sock *vsk, struct sockaddr_vm *addr); #endif /* __AF_VSOCK_H__ */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 16dcf5d06cd7..77925f587b15 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -39,7 +39,6 @@ #define VIRTIO_ID_9P 9 /* 9p virtio console */ #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ #define VIRTIO_ID_CAIF 12 /* Virtio caif */ -#define VIRTIO_ID_VSOCK 13 /* virtio vsock transport */ #define VIRTIO_ID_GPU 16 /* virtio GPU */ #define VIRTIO_ID_INPUT 18 /* virtio input */ diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h deleted file mode 100644 index 8cf9b5682628..000000000000 --- a/include/uapi/linux/virtio_vsock.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers: - * - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of IBM nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * Copyright (C) Red Hat, Inc., 2013-2015 - * Copyright (C) Asias He , 2013 - * Copyright (C) Stefan Hajnoczi , 2015 - */ - -#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H -#define _UAPI_LINUX_VIRTIO_VOSCK_H - -#include -#include -#include - -struct virtio_vsock_config { - __le32 guest_cid; - __le32 max_virtqueue_pairs; -}; - -struct virtio_vsock_hdr { - __le32 src_cid; - __le32 src_port; - __le32 dst_cid; - __le32 dst_port; - __le32 len; - __le16 type; /* enum virtio_vsock_type */ - __le16 op; /* enum virtio_vsock_op */ - __le32 flags; - __le32 buf_alloc; - __le32 fwd_cnt; -}; - -enum virtio_vsock_type { - VIRTIO_VSOCK_TYPE_STREAM = 1, - VIRTIO_VSOCK_TYPE_DGRAM = 2, -}; - -enum virtio_vsock_op { - VIRTIO_VSOCK_OP_INVALID = 0, - - /* Connect operations */ - VIRTIO_VSOCK_OP_REQUEST = 1, - VIRTIO_VSOCK_OP_RESPONSE = 2, - VIRTIO_VSOCK_OP_ACK = 3, - VIRTIO_VSOCK_OP_RST = 4, - VIRTIO_VSOCK_OP_SHUTDOWN = 5, - - /* To send payload */ - VIRTIO_VSOCK_OP_RW = 6, - - /* Tell the peer our credit info */ - VIRTIO_VSOCK_OP_CREDIT_UPDATE = 7, - /* Request the peer to send the credit info to us */ - VIRTIO_VSOCK_OP_CREDIT_REQUEST = 8, -}; - -/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */ -enum virtio_vsock_shutdown { - VIRTIO_VSOCK_SHUTDOWN_RCV = 1, - VIRTIO_VSOCK_SHUTDOWN_SEND = 2, -}; - -#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig index 74e0bc887a33..14810abedc2e 100644 --- a/net/vmw_vsock/Kconfig +++ b/net/vmw_vsock/Kconfig @@ -26,21 +26,3 @@ config VMWARE_VMCI_VSOCKETS To compile this driver as a module, choose M here: the module will be called vmw_vsock_vmci_transport. If unsure, say N. - -config VIRTIO_VSOCKETS - tristate "virtio transport for Virtual Sockets" - depends on VSOCKETS && VIRTIO - select VIRTIO_VSOCKETS_COMMON - help - This module implements a virtio transport for Virtual Sockets. - - Enable this transport if your Virtual Machine runs on Qemu/KVM. - - To compile this driver as a module, choose M here: the module - will be called virtio_vsock_transport. If unsure, say N. - -config VIRTIO_VSOCKETS_COMMON - tristate - ---help--- - This option is selected by any driver which needs to access - the virtio_vsock. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile index cf4c29439081..2ce52d70f224 100644 --- a/net/vmw_vsock/Makefile +++ b/net/vmw_vsock/Makefile @@ -1,7 +1,5 @@ obj-$(CONFIG_VSOCKETS) += vsock.o obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o -obj-$(CONFIG_VIRTIO_VSOCKETS) += virtio_transport.o -obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += virtio_transport_common.o vsock-y += af_vsock.o vsock_addr.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 77247a2b670b..7fd1220fbfa0 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -223,17 +223,6 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr) return NULL; } -static struct sock *__vsock_find_unbound_socket(struct sockaddr_vm *addr) -{ - struct vsock_sock *vsk; - - list_for_each_entry(vsk, vsock_unbound_sockets, bound_table) - if (addr->svm_port == vsk->local_addr.svm_port) - return sk_vsock(vsk); - - return NULL; -} - static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -309,21 +298,6 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr) } EXPORT_SYMBOL_GPL(vsock_find_bound_socket); -struct sock *vsock_find_unbound_socket(struct sockaddr_vm *addr) -{ - struct sock *sk; - - spin_lock_bh(&vsock_table_lock); - sk = __vsock_find_unbound_socket(addr); - if (sk) - sock_hold(sk); - - spin_unlock_bh(&vsock_table_lock); - - return sk; -} -EXPORT_SYMBOL_GPL(vsock_find_unbound_socket); - struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sockaddr_vm *dst) { @@ -558,50 +532,6 @@ static int __vsock_bind_stream(struct vsock_sock *vsk, return 0; } -int vsock_bind_dgram_generic(struct vsock_sock *vsk, struct sockaddr_vm *addr) -{ - static u32 port = LAST_RESERVED_PORT + 1; - struct sockaddr_vm new_addr; - - vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); - - if (addr->svm_port == VMADDR_PORT_ANY) { - bool found = false; - unsigned int i; - - for (i = 0; i < MAX_PORT_RETRIES; i++) { - if (port <= LAST_RESERVED_PORT) - port = LAST_RESERVED_PORT + 1; - - new_addr.svm_port = port++; - - if (!__vsock_find_unbound_socket(&new_addr)) { - found = true; - break; - } - } - - if (!found) - return -EADDRNOTAVAIL; - } else { - /* If port is in reserved range, ensure caller - * has necessary privileges. - */ - if (addr->svm_port <= LAST_RESERVED_PORT && - !capable(CAP_NET_BIND_SERVICE)) { - return -EACCES; - } - - if (__vsock_find_unbound_socket(&new_addr)) - return -EADDRINUSE; - } - - vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port); - - return 0; -} -EXPORT_SYMBOL_GPL(vsock_bind_dgram_generic); - static int __vsock_bind_dgram(struct vsock_sock *vsk, struct sockaddr_vm *addr) { diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c deleted file mode 100644 index df65dca55fa1..000000000000 --- a/net/vmw_vsock/virtio_transport.c +++ /dev/null @@ -1,466 +0,0 @@ -/* - * virtio transport for vsock - * - * Copyright (C) 2013-2015 Red Hat, Inc. - * Author: Asias He - * Stefan Hajnoczi - * - * Some of the code is take from Gerd Hoffmann 's - * early virtio-vsock proof-of-concept bits. - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static struct workqueue_struct *virtio_vsock_workqueue; -static struct virtio_vsock *the_virtio_vsock; -static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ -static void virtio_vsock_rx_fill(struct virtio_vsock *vsock); - -struct virtio_vsock { - /* Virtio device */ - struct virtio_device *vdev; - /* Virtio virtqueue */ - struct virtqueue *vqs[VSOCK_VQ_MAX]; - /* Wait queue for send pkt */ - wait_queue_head_t queue_wait; - /* Work item to send pkt */ - struct work_struct tx_work; - /* Work item to recv pkt */ - struct work_struct rx_work; - /* Mutex to protect send pkt*/ - struct mutex tx_lock; - /* Mutex to protect recv pkt*/ - struct mutex rx_lock; - /* Number of recv buffers */ - int rx_buf_nr; - /* Number of max recv buffers */ - int rx_buf_max_nr; - /* Used for global tx buf limitation */ - u32 total_tx_buf; - /* Guest context id, just like guest ip address */ - u32 guest_cid; -}; - -static struct virtio_vsock *virtio_vsock_get(void) -{ - return the_virtio_vsock; -} - -static u32 virtio_transport_get_local_cid(void) -{ - struct virtio_vsock *vsock = virtio_vsock_get(); - - return vsock->guest_cid; -} - -static int -virtio_transport_send_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info) -{ - u32 src_cid, src_port, dst_cid, dst_port; - int ret, in_sg = 0, out_sg = 0; - struct virtio_transport *trans; - struct virtio_vsock_pkt *pkt; - struct virtio_vsock *vsock; - struct scatterlist hdr, buf, *sgs[2]; - struct virtqueue *vq; - u32 pkt_len = info->pkt_len; - DEFINE_WAIT(wait); - - vsock = virtio_vsock_get(); - if (!vsock) - return -ENODEV; - - src_cid = virtio_transport_get_local_cid(); - src_port = vsk->local_addr.svm_port; - if (!info->remote_cid) { - dst_cid = vsk->remote_addr.svm_cid; - dst_port = vsk->remote_addr.svm_port; - } else { - dst_cid = info->remote_cid; - dst_port = info->remote_port; - } - - trans = vsk->trans; - vq = vsock->vqs[VSOCK_VQ_TX]; - - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; - pkt_len = virtio_transport_get_credit(trans, pkt_len); - /* Do not send zero length OP_RW pkt*/ - if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) - return pkt_len; - - /* Respect global tx buf limitation */ - mutex_lock(&vsock->tx_lock); - while (pkt_len + vsock->total_tx_buf > VIRTIO_VSOCK_MAX_TX_BUF_SIZE) { - prepare_to_wait_exclusive(&vsock->queue_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&vsock->tx_lock); - schedule(); - mutex_lock(&vsock->tx_lock); - finish_wait(&vsock->queue_wait, &wait); - } - vsock->total_tx_buf += pkt_len; - mutex_unlock(&vsock->tx_lock); - - pkt = virtio_transport_alloc_pkt(vsk, info, pkt_len, - src_cid, src_port, - dst_cid, dst_port); - if (!pkt) { - mutex_lock(&vsock->tx_lock); - vsock->total_tx_buf -= pkt_len; - mutex_unlock(&vsock->tx_lock); - virtio_transport_put_credit(trans, pkt_len); - return -ENOMEM; - } - - pr_debug("%s:info->pkt_len= %d\n", __func__, info->pkt_len); - - /* Will be released in virtio_transport_send_pkt_work */ - sock_hold(&trans->vsk->sk); - virtio_transport_inc_tx_pkt(pkt); - - /* Put pkt in the virtqueue */ - sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); - sgs[out_sg++] = &hdr; - if (info->msg && info->pkt_len > 0) { - sg_init_one(&buf, pkt->buf, pkt->len); - sgs[out_sg++] = &buf; - } - - mutex_lock(&vsock->tx_lock); - while ((ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, - GFP_KERNEL)) < 0) { - prepare_to_wait_exclusive(&vsock->queue_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&vsock->tx_lock); - schedule(); - mutex_lock(&vsock->tx_lock); - finish_wait(&vsock->queue_wait, &wait); - } - virtqueue_kick(vq); - mutex_unlock(&vsock->tx_lock); - - return pkt_len; -} - -static struct virtio_transport_pkt_ops virtio_ops = { - .send_pkt = virtio_transport_send_pkt, -}; - -static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) -{ - int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; - struct virtio_vsock_pkt *pkt; - struct scatterlist hdr, buf, *sgs[2]; - struct virtqueue *vq; - int ret; - - vq = vsock->vqs[VSOCK_VQ_RX]; - - do { - pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) { - pr_debug("%s: fail to allocate pkt\n", __func__); - goto out; - } - - /* TODO: use mergeable rx buffer */ - pkt->buf = kmalloc(buf_len, GFP_KERNEL); - if (!pkt->buf) { - pr_debug("%s: fail to allocate pkt->buf\n", __func__); - goto err; - } - - sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); - sgs[0] = &hdr; - - sg_init_one(&buf, pkt->buf, buf_len); - sgs[1] = &buf; - ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL); - if (ret) - goto err; - vsock->rx_buf_nr++; - } while (vq->num_free); - if (vsock->rx_buf_nr > vsock->rx_buf_max_nr) - vsock->rx_buf_max_nr = vsock->rx_buf_nr; -out: - virtqueue_kick(vq); - return; -err: - virtqueue_kick(vq); - virtio_transport_free_pkt(pkt); - return; -} - -static void virtio_transport_send_pkt_work(struct work_struct *work) -{ - struct virtio_vsock *vsock = - container_of(work, struct virtio_vsock, tx_work); - struct virtio_vsock_pkt *pkt; - bool added = false; - struct virtqueue *vq; - unsigned int len; - struct sock *sk; - - vq = vsock->vqs[VSOCK_VQ_TX]; - mutex_lock(&vsock->tx_lock); - do { - virtqueue_disable_cb(vq); - while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { - sk = &pkt->trans->vsk->sk; - virtio_transport_dec_tx_pkt(pkt); - /* Release refcnt taken in virtio_transport_send_pkt */ - sock_put(sk); - vsock->total_tx_buf -= pkt->len; - virtio_transport_free_pkt(pkt); - added = true; - } - } while (!virtqueue_enable_cb(vq)); - mutex_unlock(&vsock->tx_lock); - - if (added) - wake_up(&vsock->queue_wait); -} - -static void virtio_transport_recv_pkt_work(struct work_struct *work) -{ - struct virtio_vsock *vsock = - container_of(work, struct virtio_vsock, rx_work); - struct virtio_vsock_pkt *pkt; - struct virtqueue *vq; - unsigned int len; - - vq = vsock->vqs[VSOCK_VQ_RX]; - mutex_lock(&vsock->rx_lock); - do { - virtqueue_disable_cb(vq); - while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) { - pkt->len = len; - virtio_transport_recv_pkt(pkt); - vsock->rx_buf_nr--; - } - } while (!virtqueue_enable_cb(vq)); - - if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) - virtio_vsock_rx_fill(vsock); - mutex_unlock(&vsock->rx_lock); -} - -static void virtio_vsock_ctrl_done(struct virtqueue *vq) -{ -} - -static void virtio_vsock_tx_done(struct virtqueue *vq) -{ - struct virtio_vsock *vsock = vq->vdev->priv; - - if (!vsock) - return; - queue_work(virtio_vsock_workqueue, &vsock->tx_work); -} - -static void virtio_vsock_rx_done(struct virtqueue *vq) -{ - struct virtio_vsock *vsock = vq->vdev->priv; - - if (!vsock) - return; - queue_work(virtio_vsock_workqueue, &vsock->rx_work); -} - -static int -virtio_transport_socket_init(struct vsock_sock *vsk, struct vsock_sock *psk) -{ - struct virtio_transport *trans; - int ret; - - ret = virtio_transport_do_socket_init(vsk, psk); - if (ret) - return ret; - - trans = vsk->trans; - trans->ops = &virtio_ops; - return ret; -} - -static struct vsock_transport virtio_transport = { - .get_local_cid = virtio_transport_get_local_cid, - - .init = virtio_transport_socket_init, - .destruct = virtio_transport_destruct, - .release = virtio_transport_release, - .connect = virtio_transport_connect, - .shutdown = virtio_transport_shutdown, - - .dgram_bind = virtio_transport_dgram_bind, - .dgram_dequeue = virtio_transport_dgram_dequeue, - .dgram_enqueue = virtio_transport_dgram_enqueue, - .dgram_allow = virtio_transport_dgram_allow, - - .stream_dequeue = virtio_transport_stream_dequeue, - .stream_enqueue = virtio_transport_stream_enqueue, - .stream_has_data = virtio_transport_stream_has_data, - .stream_has_space = virtio_transport_stream_has_space, - .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, - .stream_is_active = virtio_transport_stream_is_active, - .stream_allow = virtio_transport_stream_allow, - - .notify_poll_in = virtio_transport_notify_poll_in, - .notify_poll_out = virtio_transport_notify_poll_out, - .notify_recv_init = virtio_transport_notify_recv_init, - .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, - .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, - .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, - .notify_send_init = virtio_transport_notify_send_init, - .notify_send_pre_block = virtio_transport_notify_send_pre_block, - .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, - .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, - - .set_buffer_size = virtio_transport_set_buffer_size, - .set_min_buffer_size = virtio_transport_set_min_buffer_size, - .set_max_buffer_size = virtio_transport_set_max_buffer_size, - .get_buffer_size = virtio_transport_get_buffer_size, - .get_min_buffer_size = virtio_transport_get_min_buffer_size, - .get_max_buffer_size = virtio_transport_get_max_buffer_size, -}; - -static int virtio_vsock_probe(struct virtio_device *vdev) -{ - vq_callback_t *callbacks[] = { - virtio_vsock_ctrl_done, - virtio_vsock_rx_done, - virtio_vsock_tx_done, - }; - const char *names[] = { - "ctrl", - "rx", - "tx", - }; - struct virtio_vsock *vsock = NULL; - u32 guest_cid; - int ret; - - ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); - if (ret) - return ret; - - /* Only one virtio-vsock device per guest is supported */ - if (the_virtio_vsock) { - ret = -EBUSY; - goto out; - } - - vsock = kzalloc(sizeof(*vsock), GFP_KERNEL); - if (!vsock) { - ret = -ENOMEM; - goto out; - } - - vsock->vdev = vdev; - - ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX, - vsock->vqs, callbacks, names); - if (ret < 0) - goto out; - - vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid), - &guest_cid, sizeof(guest_cid)); - vsock->guest_cid = le32_to_cpu(guest_cid); - pr_debug("%s:guest_cid=%d\n", __func__, vsock->guest_cid); - - ret = vsock_core_init(&virtio_transport); - if (ret < 0) - goto out_vqs; - - vsock->rx_buf_nr = 0; - vsock->rx_buf_max_nr = 0; - - vdev->priv = the_virtio_vsock = vsock; - init_waitqueue_head(&vsock->queue_wait); - mutex_init(&vsock->tx_lock); - mutex_init(&vsock->rx_lock); - INIT_WORK(&vsock->rx_work, virtio_transport_recv_pkt_work); - INIT_WORK(&vsock->tx_work, virtio_transport_send_pkt_work); - - mutex_lock(&vsock->rx_lock); - virtio_vsock_rx_fill(vsock); - mutex_unlock(&vsock->rx_lock); - - mutex_unlock(&the_virtio_vsock_mutex); - return 0; - -out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); -out: - kfree(vsock); - mutex_unlock(&the_virtio_vsock_mutex); - return ret; -} - -static void virtio_vsock_remove(struct virtio_device *vdev) -{ - struct virtio_vsock *vsock = vdev->priv; - - mutex_lock(&the_virtio_vsock_mutex); - the_virtio_vsock = NULL; - vsock_core_exit(); - mutex_unlock(&the_virtio_vsock_mutex); - - kfree(vsock); -} - -static struct virtio_device_id id_table[] = { - { VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID }, - { 0 }, -}; - -static unsigned int features[] = { -}; - -static struct virtio_driver virtio_vsock_driver = { - .feature_table = features, - .feature_table_size = ARRAY_SIZE(features), - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = virtio_vsock_probe, - .remove = virtio_vsock_remove, -}; - -static int __init virtio_vsock_init(void) -{ - int ret; - - virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); - if (!virtio_vsock_workqueue) - return -ENOMEM; - ret = register_virtio_driver(&virtio_vsock_driver); - if (ret) - destroy_workqueue(virtio_vsock_workqueue); - return ret; -} - -static void __exit virtio_vsock_exit(void) -{ - unregister_virtio_driver(&virtio_vsock_driver); - destroy_workqueue(virtio_vsock_workqueue); -} - -module_init(virtio_vsock_init); -module_exit(virtio_vsock_exit); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Asias He"); -MODULE_DESCRIPTION("virtio transport for vsock"); -MODULE_DEVICE_TABLE(virtio, id_table); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c deleted file mode 100644 index 28f790da6f15..000000000000 --- a/net/vmw_vsock/virtio_transport_common.c +++ /dev/null @@ -1,1272 +0,0 @@ -/* - * common code for virtio vsock - * - * Copyright (C) 2013-2015 Red Hat, Inc. - * Author: Asias He - * Stefan Hajnoczi - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define COOKIEBITS 24 -#define COOKIEMASK (((u32)1 << COOKIEBITS) - 1) -#define VSOCK_TIMEOUT_INIT 4 - -#define SHA_MESSAGE_WORDS 16 -#define SHA_VSOCK_WORDS 5 - -static u32 vsockcookie_secret[2][SHA_MESSAGE_WORDS - SHA_VSOCK_WORDS + - SHA_DIGEST_WORDS]; - -static DEFINE_PER_CPU(__u32[SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS + - SHA_WORKSPACE_WORDS], vsock_cookie_scratch); - -static u32 cookie_hash(u32 saddr, u32 daddr, u16 sport, u16 dport, - u32 count, int c) -{ - __u32 *tmp = this_cpu_ptr(vsock_cookie_scratch); - - memcpy(tmp + SHA_VSOCK_WORDS, vsockcookie_secret[c], - sizeof(vsockcookie_secret[c])); - tmp[0] = saddr; - tmp[1] = daddr; - tmp[2] = sport; - tmp[3] = dport; - tmp[4] = count; - sha_transform(tmp + SHA_MESSAGE_WORDS, (__u8 *)tmp, - tmp + SHA_MESSAGE_WORDS + SHA_DIGEST_WORDS); - - return tmp[17]; -} - -static u32 -virtio_vsock_secure_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, - u32 count) -{ - u32 h1, h2; - - h1 = cookie_hash(saddr, daddr, sport, dport, 0, 0); - h2 = cookie_hash(saddr, daddr, sport, dport, count, 1); - - return h1 + (count << COOKIEBITS) + (h2 & COOKIEMASK); -} - -static u32 -virtio_vsock_check_cookie(u32 saddr, u32 daddr, u32 sport, u32 dport, - u32 count, u32 cookie, u32 maxdiff) -{ - u32 diff; - u32 ret; - - cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0); - - diff = (count - (cookie >> COOKIEBITS)) & ((u32)-1 >> COOKIEBITS); - pr_debug("%s: diff=%x\n", __func__, diff); - if (diff >= maxdiff) - return (u32)-1; - - ret = (cookie - - cookie_hash(saddr, daddr, sport, dport, count - diff, 1)) - & COOKIEMASK; - pr_debug("%s: ret=%x\n", __func__, diff); - - return ret; -} - -void virtio_vsock_dumppkt(const char *func, const struct virtio_vsock_pkt *pkt) -{ - pr_debug("%s: pkt=%p, op=%d, len=%d, %d:%d---%d:%d, len=%d\n", - func, pkt, - le16_to_cpu(pkt->hdr.op), - le32_to_cpu(pkt->hdr.len), - le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.src_port), - le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.dst_port), - pkt->len); -} -EXPORT_SYMBOL_GPL(virtio_vsock_dumppkt); - -struct virtio_vsock_pkt * -virtio_transport_alloc_pkt(struct vsock_sock *vsk, - struct virtio_vsock_pkt_info *info, - size_t len, - u32 src_cid, - u32 src_port, - u32 dst_cid, - u32 dst_port) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt *pkt; - int err; - - BUG_ON(!trans); - - pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) - return NULL; - - pkt->hdr.type = cpu_to_le16(info->type); - pkt->hdr.op = cpu_to_le16(info->op); - pkt->hdr.src_cid = cpu_to_le32(src_cid); - pkt->hdr.src_port = cpu_to_le32(src_port); - pkt->hdr.dst_cid = cpu_to_le32(dst_cid); - pkt->hdr.dst_port = cpu_to_le32(dst_port); - pkt->hdr.flags = cpu_to_le32(info->flags); - pkt->len = len; - pkt->trans = trans; - if (info->type == VIRTIO_VSOCK_TYPE_DGRAM) - pkt->hdr.len = cpu_to_le32(len + (info->dgram_len << 16)); - else if (info->type == VIRTIO_VSOCK_TYPE_STREAM) - pkt->hdr.len = cpu_to_le32(len); - - if (info->msg && len > 0) { - pkt->buf = kmalloc(len, GFP_KERNEL); - if (!pkt->buf) - goto out_pkt; - err = memcpy_from_msg(pkt->buf, info->msg, len); - if (err) - goto out; - } - - return pkt; - -out: - kfree(pkt->buf); -out_pkt: - kfree(pkt); - return NULL; -} -EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt); - -struct sock * -virtio_transport_get_pending(struct sock *listener, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vlistener; - struct vsock_sock *vpending; - struct sockaddr_vm src; - struct sockaddr_vm dst; - struct sock *pending; - - vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); - vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); - - vlistener = vsock_sk(listener); - list_for_each_entry(vpending, &vlistener->pending_links, - pending_links) { - if (vsock_addr_equals_addr(&src, &vpending->remote_addr) && - vsock_addr_equals_addr(&dst, &vpending->local_addr)) { - pending = sk_vsock(vpending); - sock_hold(pending); - return pending; - } - } - - return NULL; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_pending); - -static void virtio_transport_inc_rx_pkt(struct virtio_vsock_pkt *pkt) -{ - pkt->trans->rx_bytes += pkt->len; -} - -static void virtio_transport_dec_rx_pkt(struct virtio_vsock_pkt *pkt) -{ - pkt->trans->rx_bytes -= pkt->len; - pkt->trans->fwd_cnt += pkt->len; -} - -void virtio_transport_inc_tx_pkt(struct virtio_vsock_pkt *pkt) -{ - mutex_lock(&pkt->trans->tx_lock); - pkt->hdr.fwd_cnt = cpu_to_le32(pkt->trans->fwd_cnt); - pkt->hdr.buf_alloc = cpu_to_le32(pkt->trans->buf_alloc); - mutex_unlock(&pkt->trans->tx_lock); -} -EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); - -void virtio_transport_dec_tx_pkt(struct virtio_vsock_pkt *pkt) -{ -} -EXPORT_SYMBOL_GPL(virtio_transport_dec_tx_pkt); - -u32 virtio_transport_get_credit(struct virtio_transport *trans, u32 credit) -{ - u32 ret; - - mutex_lock(&trans->tx_lock); - ret = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); - if (ret > credit) - ret = credit; - trans->tx_cnt += ret; - mutex_unlock(&trans->tx_lock); - - pr_debug("%s: ret=%d, buf_alloc=%d, peer_buf_alloc=%d," - "tx_cnt=%d, fwd_cnt=%d, peer_fwd_cnt=%d\n", __func__, - ret, trans->buf_alloc, trans->peer_buf_alloc, - trans->tx_cnt, trans->fwd_cnt, trans->peer_fwd_cnt); - - return ret; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_credit); - -void virtio_transport_put_credit(struct virtio_transport *trans, u32 credit) -{ - mutex_lock(&trans->tx_lock); - trans->tx_cnt -= credit; - mutex_unlock(&trans->tx_lock); -} -EXPORT_SYMBOL_GPL(virtio_transport_put_credit); - -static int virtio_transport_send_credit_update(struct vsock_sock *vsk, int type, struct virtio_vsock_hdr *hdr) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, - .type = type, - }; - - if (hdr && type == VIRTIO_VSOCK_TYPE_DGRAM) { - info.remote_cid = le32_to_cpu(hdr->src_cid); - info.remote_port = le32_to_cpu(hdr->src_port); - } - - pr_debug("%s: sk=%p send_credit_update\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} - -static int virtio_transport_send_credit_request(struct vsock_sock *vsk, int type) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_CREDIT_REQUEST, - .type = type, - }; - - pr_debug("%s: sk=%p send_credit_request\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} - -static ssize_t -virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt *pkt; - size_t bytes, total = 0; - int err = -EFAULT; - - mutex_lock(&trans->rx_lock); - while (total < len && trans->rx_bytes > 0 && - !list_empty(&trans->rx_queue)) { - pkt = list_first_entry(&trans->rx_queue, - struct virtio_vsock_pkt, list); - - bytes = len - total; - if (bytes > pkt->len - pkt->off) - bytes = pkt->len - pkt->off; - - err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes); - if (err) - goto out; - total += bytes; - pkt->off += bytes; - if (pkt->off == pkt->len) { - virtio_transport_dec_rx_pkt(pkt); - list_del(&pkt->list); - virtio_transport_free_pkt(pkt); - } - } - mutex_unlock(&trans->rx_lock); - - /* Send a credit pkt to peer */ - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, - NULL); - - return total; - -out: - mutex_unlock(&trans->rx_lock); - if (total) - err = total; - return err; -} - -ssize_t -virtio_transport_stream_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, int flags) -{ - if (flags & MSG_PEEK) - return -EOPNOTSUPP; - - return virtio_transport_stream_do_dequeue(vsk, msg, len); -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue); - -struct dgram_skb { - struct list_head list; - struct sk_buff *skb; - u16 id; -}; - -static struct dgram_skb *dgram_id_to_skb(struct virtio_transport *trans, - u16 id) -{ - struct dgram_skb *dgram_skb; - - list_for_each_entry(dgram_skb, &trans->incomplete_dgrams, list) { - if (dgram_skb->id == id) - return dgram_skb; - } - - return NULL; -} - -static void -virtio_transport_recv_dgram(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct sk_buff *skb = NULL; - struct vsock_sock *vsk; - struct virtio_transport *trans; - size_t size; - u16 dgram_id, pkt_off, dgram_len, pkt_len; - u32 flags, len; - struct dgram_skb *dgram_skb; - - vsk = vsock_sk(sk); - trans = vsk->trans; - - /* len: dgram_len | pkt_len */ - len = le32_to_cpu(pkt->hdr.len); - dgram_len = len >> 16; - pkt_len = len & 0xFFFF; - - /* flags: dgram_id | pkt_off */ - flags = le32_to_cpu(pkt->hdr.flags); - dgram_id = flags >> 16; - pkt_off = flags & 0xFFFF; - - pr_debug("%s: dgram_len=%d, pkt_len=%d, id=%d, off=%d\n", __func__, - dgram_len, pkt_len, dgram_id, pkt_off); - - dgram_skb = dgram_id_to_skb(trans, dgram_id); - if (dgram_skb) { - /* This pkt is for a existing dgram */ - skb = dgram_skb->skb; - pr_debug("%s:found skb\n", __func__); - } - - /* Packet payload must be within datagram bounds */ - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - goto drop; - if (pkt_len > dgram_len) - goto drop; - if (pkt_off > dgram_len) - goto drop; - if (dgram_len - pkt_off < pkt_len) - goto drop; - - if (!skb) { - /* This pkt is for a new dgram */ - pr_debug("%s:create skb\n", __func__); - - size = sizeof(pkt->hdr) + dgram_len; - /* Attach the packet to the socket's receive queue as an sk_buff. */ - dgram_skb = kzalloc(sizeof(struct dgram_skb), GFP_ATOMIC); - if (!dgram_skb) - goto drop; - - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) { - kfree(dgram_skb); - dgram_skb = NULL; - goto drop; - } - dgram_skb->id = dgram_id; - dgram_skb->skb = skb; - list_add_tail(&dgram_skb->list, &trans->incomplete_dgrams); - - /* sk_receive_skb() will do a sock_put(), so hold here. */ - sock_hold(sk); - skb_put(skb, size); - memcpy(skb->data, &pkt->hdr, sizeof(pkt->hdr)); - } - - memcpy(skb->data + sizeof(pkt->hdr) + pkt_off, pkt->buf, pkt_len); - - pr_debug("%s:C, off=%d, pkt_len=%d, dgram_len=%d\n", __func__, - pkt_off, pkt_len, dgram_len); - - /* We are done with this dgram */ - if (pkt_off + pkt_len == dgram_len) { - pr_debug("%s:dgram_id=%d is done\n", __func__, dgram_id); - list_del(&dgram_skb->list); - kfree(dgram_skb); - sk_receive_skb(sk, skb, 0); - } - virtio_transport_free_pkt(pkt); - return; - -drop: - if (dgram_skb) { - list_del(&dgram_skb->list); - kfree(dgram_skb); - kfree_skb(skb); - sock_put(sk); - } - virtio_transport_free_pkt(pkt); -} - -int -virtio_transport_dgram_dequeue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len, int flags) -{ - struct virtio_vsock_hdr *hdr; - struct sk_buff *skb; - int noblock; - int err; - int dgram_len; - - noblock = flags & MSG_DONTWAIT; - - if (flags & MSG_OOB || flags & MSG_ERRQUEUE) - return -EOPNOTSUPP; - - /* Retrieve the head sk_buff from the socket's receive queue. */ - err = 0; - skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); - if (err) - return err; - if (!skb) - return -EAGAIN; - - hdr = (struct virtio_vsock_hdr *)skb->data; - if (!hdr) - goto out; - - dgram_len = le32_to_cpu(hdr->len) >> 16; - /* Place the datagram payload in the user's iovec. */ - err = skb_copy_datagram_msg(skb, sizeof(*hdr), msg, dgram_len); - if (err) - goto out; - - if (msg->msg_name) { - /* Provide the address of the sender. */ - DECLARE_SOCKADDR(struct sockaddr_vm *, vm_addr, msg->msg_name); - vsock_addr_init(vm_addr, le32_to_cpu(hdr->src_cid), le32_to_cpu(hdr->src_port)); - msg->msg_namelen = sizeof(*vm_addr); - } - err = dgram_len; - - /* Send a credit pkt to peer */ - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, hdr); - - pr_debug("%s:done, recved =%d\n", __func__, dgram_len); -out: - skb_free_datagram(&vsk->sk, skb); - return err; -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue); - -s64 virtio_transport_stream_has_data(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - s64 bytes; - - mutex_lock(&trans->rx_lock); - bytes = trans->rx_bytes; - mutex_unlock(&trans->rx_lock); - - return bytes; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data); - -static s64 virtio_transport_has_space(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - s64 bytes; - - bytes = trans->peer_buf_alloc - (trans->tx_cnt - trans->peer_fwd_cnt); - if (bytes < 0) - bytes = 0; - - return bytes; -} - -s64 virtio_transport_stream_has_space(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - s64 bytes; - - mutex_lock(&trans->tx_lock); - bytes = virtio_transport_has_space(vsk); - mutex_unlock(&trans->tx_lock); - - pr_debug("%s: bytes=%lld\n", __func__, bytes); - - return bytes; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space); - -int virtio_transport_do_socket_init(struct vsock_sock *vsk, - struct vsock_sock *psk) -{ - struct virtio_transport *trans; - - trans = kzalloc(sizeof(*trans), GFP_KERNEL); - if (!trans) - return -ENOMEM; - - vsk->trans = trans; - trans->vsk = vsk; - if (psk) { - struct virtio_transport *ptrans = psk->trans; - trans->buf_size = ptrans->buf_size; - trans->buf_size_min = ptrans->buf_size_min; - trans->buf_size_max = ptrans->buf_size_max; - trans->peer_buf_alloc = ptrans->peer_buf_alloc; - } else { - trans->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE; - trans->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE; - trans->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE; - } - - trans->buf_alloc = trans->buf_size; - - pr_debug("%s: trans->buf_alloc=%d\n", __func__, trans->buf_alloc); - - mutex_init(&trans->rx_lock); - mutex_init(&trans->tx_lock); - INIT_LIST_HEAD(&trans->rx_queue); - INIT_LIST_HEAD(&trans->incomplete_dgrams); - - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init); - -u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size); - -u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size_min; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size); - -u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size_max; -} -EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size); - -void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val) -{ - struct virtio_transport *trans = vsk->trans; - - if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) - val = VIRTIO_VSOCK_MAX_BUF_SIZE; - if (val < trans->buf_size_min) - trans->buf_size_min = val; - if (val > trans->buf_size_max) - trans->buf_size_max = val; - trans->buf_size = val; - trans->buf_alloc = val; -} -EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size); - -void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val) -{ - struct virtio_transport *trans = vsk->trans; - - if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) - val = VIRTIO_VSOCK_MAX_BUF_SIZE; - if (val > trans->buf_size) - trans->buf_size = val; - trans->buf_size_min = val; -} -EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size); - -void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val) -{ - struct virtio_transport *trans = vsk->trans; - - if (val > VIRTIO_VSOCK_MAX_BUF_SIZE) - val = VIRTIO_VSOCK_MAX_BUF_SIZE; - if (val < trans->buf_size) - trans->buf_size = val; - trans->buf_size_max = val; -} -EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size); - -int -virtio_transport_notify_poll_in(struct vsock_sock *vsk, - size_t target, - bool *data_ready_now) -{ - if (vsock_stream_has_data(vsk)) - *data_ready_now = true; - else - *data_ready_now = false; - - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in); - -int -virtio_transport_notify_poll_out(struct vsock_sock *vsk, - size_t target, - bool *space_avail_now) -{ - s64 free_space; - - free_space = vsock_stream_has_space(vsk); - if (free_space > 0) - *space_avail_now = true; - else if (free_space == 0) - *space_avail_now = false; - - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out); - -int virtio_transport_notify_recv_init(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init); - -int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block); - -int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk, - size_t target, struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue); - -int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk, - size_t target, ssize_t copied, bool data_read, - struct vsock_transport_recv_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue); - -int virtio_transport_notify_send_init(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init); - -int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block); - -int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk, - struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue); - -int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk, - ssize_t written, struct vsock_transport_send_notify_data *data) -{ - return 0; -} -EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue); - -u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - return trans->buf_size; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat); - -bool virtio_transport_stream_is_active(struct vsock_sock *vsk) -{ - return true; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active); - -bool virtio_transport_stream_allow(u32 cid, u32 port) -{ - return true; -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_allow); - -int virtio_transport_dgram_bind(struct vsock_sock *vsk, - struct sockaddr_vm *addr) -{ - return vsock_bind_dgram_generic(vsk, addr); -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind); - -bool virtio_transport_dgram_allow(u32 cid, u32 port) -{ - return true; -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow); - -int virtio_transport_connect(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_REQUEST, - .type = VIRTIO_VSOCK_TYPE_STREAM, - }; - - pr_debug("%s: vsk=%p send_request\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} -EXPORT_SYMBOL_GPL(virtio_transport_connect); - -int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_SHUTDOWN, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .flags = (mode & RCV_SHUTDOWN ? - VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | - (mode & SEND_SHUTDOWN ? - VIRTIO_VSOCK_SHUTDOWN_SEND : 0), - }; - - pr_debug("%s: vsk=%p: send_shutdown\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} -EXPORT_SYMBOL_GPL(virtio_transport_shutdown); - -void virtio_transport_release(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - struct sock *sk = &vsk->sk; - struct dgram_skb *dgram_skb; - struct dgram_skb *dgram_skb_tmp; - - pr_debug("%s: vsk=%p\n", __func__, vsk); - - /* Tell other side to terminate connection */ - if (sk->sk_type == SOCK_STREAM && sk->sk_state == SS_CONNECTED) { - virtio_transport_shutdown(vsk, SHUTDOWN_MASK); - } - - /* Free incomplete dgrams */ - lock_sock(sk); - list_for_each_entry_safe(dgram_skb, dgram_skb_tmp, - &trans->incomplete_dgrams, list) { - list_del(&dgram_skb->list); - kfree_skb(dgram_skb->skb); - kfree(dgram_skb); - sock_put(sk); /* held in virtio_transport_recv_dgram() */ - } - release_sock(sk); -} -EXPORT_SYMBOL_GPL(virtio_transport_release); - -int -virtio_transport_dgram_enqueue(struct vsock_sock *vsk, - struct sockaddr_vm *remote_addr, - struct msghdr *msg, - size_t dgram_len) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RW, - .type = VIRTIO_VSOCK_TYPE_DGRAM, - .msg = msg, - }; - size_t total_written = 0, pkt_off = 0, written; - u16 dgram_id; - - /* The max size of a single dgram we support is 64KB */ - if (dgram_len > VIRTIO_VSOCK_MAX_DGRAM_SIZE) - return -EMSGSIZE; - - info.dgram_len = dgram_len; - vsk->remote_addr = *remote_addr; - - dgram_id = trans->dgram_id++; - - /* TODO: To optimize, if we have enough credit to send the pkt already, - * do not ask the peer to send credit to use */ - virtio_transport_send_credit_request(vsk, VIRTIO_VSOCK_TYPE_DGRAM); - - while (total_written < dgram_len) { - info.pkt_len = dgram_len - total_written; - info.flags = dgram_id << 16 | pkt_off; - written = trans->ops->send_pkt(vsk, &info); - if (written < 0) - return -ENOMEM; - if (written == 0) { - /* TODO: if written = 0, we need a sleep & wakeup - * instead of sleep */ - pr_debug("%s: SHOULD WAIT written==0", __func__); - msleep(10); - } - total_written += written; - pkt_off += written; - pr_debug("%s:id=%d, dgram_len=%zu, off=%zu, total_written=%zu, written=%zu\n", - __func__, dgram_id, dgram_len, pkt_off, total_written, written); - } - - return dgram_len; -} -EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue); - -ssize_t -virtio_transport_stream_enqueue(struct vsock_sock *vsk, - struct msghdr *msg, - size_t len) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RW, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .msg = msg, - .pkt_len = len, - }; - - return trans->ops->send_pkt(vsk, &info); -} -EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue); - -void virtio_transport_destruct(struct vsock_sock *vsk) -{ - struct virtio_transport *trans = vsk->trans; - - pr_debug("%s: vsk=%p\n", __func__, vsk); - kfree(trans); -} -EXPORT_SYMBOL_GPL(virtio_transport_destruct); - -static int virtio_transport_send_ack(struct vsock_sock *vsk, u32 cookie) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_ACK, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .flags = cpu_to_le32(cookie), - }; - - pr_debug("%s: sk=%p send_offer\n", __func__, vsk); - return trans->ops->send_pkt(vsk, &info); -} - -static int virtio_transport_send_reset(struct vsock_sock *vsk, - struct virtio_vsock_pkt *pkt) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RST, - .type = VIRTIO_VSOCK_TYPE_STREAM, - }; - - pr_debug("%s\n", __func__); - - /* Send RST only if the original pkt is not a RST pkt */ - if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) - return 0; - - return trans->ops->send_pkt(vsk, &info); -} - -static int -virtio_transport_recv_connecting(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - int err; - int skerr; - u32 cookie; - - pr_debug("%s: vsk=%p\n", __func__, vsk); - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_RESPONSE: - cookie = le32_to_cpu(pkt->hdr.flags); - pr_debug("%s: got RESPONSE and send ACK, cookie=%x\n", __func__, cookie); - err = virtio_transport_send_ack(vsk, cookie); - if (err < 0) { - skerr = -err; - goto destroy; - } - sk->sk_state = SS_CONNECTED; - sk->sk_socket->state = SS_CONNECTED; - vsock_insert_connected(vsk); - sk->sk_state_change(sk); - break; - case VIRTIO_VSOCK_OP_INVALID: - pr_debug("%s: got invalid\n", __func__); - break; - case VIRTIO_VSOCK_OP_RST: - pr_debug("%s: got rst\n", __func__); - skerr = ECONNRESET; - err = 0; - goto destroy; - default: - pr_debug("%s: got def\n", __func__); - skerr = EPROTO; - err = -EINVAL; - goto destroy; - } - return 0; - -destroy: - virtio_transport_send_reset(vsk, pkt); - sk->sk_state = SS_UNCONNECTED; - sk->sk_err = skerr; - sk->sk_error_report(sk); - return err; -} - -static int -virtio_transport_recv_connected(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - struct virtio_transport *trans = vsk->trans; - int err = 0; - - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_RW: - pkt->len = le32_to_cpu(pkt->hdr.len); - pkt->off = 0; - pkt->trans = trans; - - mutex_lock(&trans->rx_lock); - virtio_transport_inc_rx_pkt(pkt); - list_add_tail(&pkt->list, &trans->rx_queue); - mutex_unlock(&trans->rx_lock); - - sk->sk_data_ready(sk); - return err; - case VIRTIO_VSOCK_OP_CREDIT_UPDATE: - sk->sk_write_space(sk); - break; - case VIRTIO_VSOCK_OP_SHUTDOWN: - pr_debug("%s: got shutdown\n", __func__); - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) - vsk->peer_shutdown |= RCV_SHUTDOWN; - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) - vsk->peer_shutdown |= SEND_SHUTDOWN; - if (le32_to_cpu(pkt->hdr.flags)) - sk->sk_state_change(sk); - break; - case VIRTIO_VSOCK_OP_RST: - pr_debug("%s: got rst\n", __func__); - sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; - if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = SS_DISCONNECTING; - sk->sk_state_change(sk); - break; - default: - err = -EINVAL; - break; - } - - virtio_transport_free_pkt(pkt); - return err; -} - -static int -virtio_transport_send_response(struct vsock_sock *vsk, - struct virtio_vsock_pkt *pkt) -{ - struct virtio_transport *trans = vsk->trans; - struct virtio_vsock_pkt_info info = { - .op = VIRTIO_VSOCK_OP_RESPONSE, - .type = VIRTIO_VSOCK_TYPE_STREAM, - .remote_cid = le32_to_cpu(pkt->hdr.src_cid), - .remote_port = le32_to_cpu(pkt->hdr.src_port), - }; - u32 cookie; - - cookie = virtio_vsock_secure_cookie(le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.src_port), - le32_to_cpu(pkt->hdr.dst_port), - jiffies / (HZ * 60)); - info.flags = cpu_to_le32(cookie); - - pr_debug("%s: send_response, cookie=%x\n", __func__, le32_to_cpu(cookie)); - - return trans->ops->send_pkt(vsk, &info); -} - -/* Handle server socket */ -static int -virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - struct vsock_sock *vpending; - struct sock *pending; - int err; - u32 cookie; - - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_REQUEST: - err = virtio_transport_send_response(vsk, pkt); - if (err < 0) { - // FIXME vsk should be vpending - virtio_transport_send_reset(vsk, pkt); - return err; - } - break; - case VIRTIO_VSOCK_OP_ACK: - cookie = le32_to_cpu(pkt->hdr.flags); - err = virtio_vsock_check_cookie(le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.src_port), - le32_to_cpu(pkt->hdr.dst_port), - jiffies / (HZ * 60), - le32_to_cpu(pkt->hdr.flags), - VSOCK_TIMEOUT_INIT); - pr_debug("%s: cookie=%x, err=%d\n", __func__, cookie, err); - if (err) - return err; - - /* So no pending socket are responsible for this pkt, create one */ - pr_debug("%s: create pending\n", __func__); - pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, - sk->sk_type, 0); - if (!pending) { - virtio_transport_send_reset(vsk, pkt); - return -ENOMEM; - } - sk->sk_ack_backlog++; - pending->sk_state = SS_CONNECTING; - - vpending = vsock_sk(pending); - vsock_addr_init(&vpending->local_addr, le32_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.dst_port)); - vsock_addr_init(&vpending->remote_addr, le32_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.src_port)); - vsock_add_pending(sk, pending); - - pr_debug("%s: get pending\n", __func__); - pending = virtio_transport_get_pending(sk, pkt); - vpending = vsock_sk(pending); - lock_sock(pending); - switch (pending->sk_state) { - case SS_CONNECTING: - if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_ACK) { - pr_debug("%s: op=%d != OP_ACK\n", __func__, - le16_to_cpu(pkt->hdr.op)); - virtio_transport_send_reset(vpending, pkt); - pending->sk_err = EPROTO; - pending->sk_state = SS_UNCONNECTED; - sock_put(pending); - } else { - pending->sk_state = SS_CONNECTED; - vsock_insert_connected(vpending); - - vsock_remove_pending(sk, pending); - vsock_enqueue_accept(sk, pending); - - sk->sk_data_ready(sk); - } - err = 0; - break; - default: - pr_debug("%s: sk->sk_ack_backlog=%d\n", __func__, - sk->sk_ack_backlog); - virtio_transport_send_reset(vpending, pkt); - err = -EINVAL; - break; - } - if (err < 0) - vsock_remove_pending(sk, pending); - release_sock(pending); - - /* Release refcnt obtained in virtio_transport_get_pending */ - sock_put(pending); - break; - default: - break; - } - - return 0; -} - -static void virtio_transport_space_update(struct sock *sk, - struct virtio_vsock_pkt *pkt) -{ - struct vsock_sock *vsk = vsock_sk(sk); - struct virtio_transport *trans = vsk->trans; - bool space_available; - - /* buf_alloc and fwd_cnt is always included in the hdr */ - mutex_lock(&trans->tx_lock); - trans->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); - trans->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); - space_available = virtio_transport_has_space(vsk); - mutex_unlock(&trans->tx_lock); - - if (space_available) - sk->sk_write_space(sk); -} - -/* We are under the virtio-vsock's vsock->rx_lock or - * vhost-vsock's vq->mutex lock */ -void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) -{ - struct virtio_transport *trans; - struct sockaddr_vm src, dst; - struct vsock_sock *vsk; - struct sock *sk; - - vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid), le32_to_cpu(pkt->hdr.src_port)); - vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid), le32_to_cpu(pkt->hdr.dst_port)); - - virtio_vsock_dumppkt(__func__, pkt); - - if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_DGRAM) { - sk = vsock_find_unbound_socket(&dst); - if (!sk) - goto free_pkt; - - vsk = vsock_sk(sk); - trans = vsk->trans; - BUG_ON(!trans); - - virtio_transport_space_update(sk, pkt); - - lock_sock(sk); - switch (le16_to_cpu(pkt->hdr.op)) { - case VIRTIO_VSOCK_OP_CREDIT_UPDATE: - virtio_transport_free_pkt(pkt); - break; - case VIRTIO_VSOCK_OP_CREDIT_REQUEST: - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_DGRAM, - &pkt->hdr); - virtio_transport_free_pkt(pkt); - break; - case VIRTIO_VSOCK_OP_RW: - virtio_transport_recv_dgram(sk, pkt); - break; - default: - virtio_transport_free_pkt(pkt); - break; - } - release_sock(sk); - - /* Release refcnt obtained when we fetched this socket out of - * the unbound list. - */ - sock_put(sk); - return; - } else if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) { - /* The socket must be in connected or bound table - * otherwise send reset back - */ - sk = vsock_find_connected_socket(&src, &dst); - if (!sk) { - sk = vsock_find_bound_socket(&dst); - if (!sk) { - pr_debug("%s: can not find bound_socket\n", __func__); - virtio_vsock_dumppkt(__func__, pkt); - /* Ignore this pkt instead of sending reset back */ - /* TODO send a RST unless this packet is a RST (to avoid infinite loops) */ - goto free_pkt; - } - } - - vsk = vsock_sk(sk); - trans = vsk->trans; - BUG_ON(!trans); - - virtio_transport_space_update(sk, pkt); - - lock_sock(sk); - switch (sk->sk_state) { - case VSOCK_SS_LISTEN: - virtio_transport_recv_listen(sk, pkt); - virtio_transport_free_pkt(pkt); - break; - case SS_CONNECTING: - virtio_transport_recv_connecting(sk, pkt); - virtio_transport_free_pkt(pkt); - break; - case SS_CONNECTED: - virtio_transport_recv_connected(sk, pkt); - break; - default: - virtio_transport_free_pkt(pkt); - break; - } - release_sock(sk); - - /* Release refcnt obtained when we fetched this socket out of the - * bound or connected list. - */ - sock_put(sk); - } - return; - -free_pkt: - virtio_transport_free_pkt(pkt); -} -EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt); - -void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt) -{ - kfree(pkt->buf); - kfree(pkt); -} -EXPORT_SYMBOL_GPL(virtio_transport_free_pkt); - -static int __init virtio_vsock_common_init(void) -{ - get_random_bytes(vsockcookie_secret, sizeof(vsockcookie_secret)); - return 0; -} - -static void __exit virtio_vsock_common_exit(void) -{ -} - -module_init(virtio_vsock_common_init); -module_exit(virtio_vsock_common_exit); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Asias He"); -MODULE_DESCRIPTION("common code for virtio vsock"); -- cgit v1.2.3-71-gd317 From 297dbde19cf6a0ccb6fd4396c6220a5912ed61e8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:51 -0500 Subject: netprio_cgroup: limit the maximum css->id to USHRT_MAX netprio builds per-netdev contiguous priomap array which is indexed by css->id. The array is allocated using kzalloc() effectively limiting the maximum ID supported to some thousand range. This patch caps the maximum supported css->id to USHRT_MAX which should be way above what is actually useable. This allows reducing sock->sk_cgrp_prioidx to u16 from u32. The freed up part will be used to overload the cgroup related fields. sock->sk_cgrp_prioidx's position is swapped with sk_mark so that the two cgroup related fields are adjacent. Signed-off-by: Tejun Heo Acked-by: Daniel Wagner Cc: Daniel Borkmann CC: Neil Horman Signed-off-by: David S. Miller --- include/net/sock.h | 10 +++++----- net/core/netprio_cgroup.c | 9 +++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 6f58b84fc742..a95bcf7d6efa 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -288,7 +288,6 @@ struct cg_proto; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_priority: %SO_PRIORITY setting - * @sk_cgrp_prioidx: socket group's priority map index * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family * @sk_peer_pid: &struct pid for this socket's peer @@ -309,6 +308,7 @@ struct cg_proto; * @sk_send_head: front of stuff to transmit * @sk_security: used by security modules * @sk_mark: generic packet mark + * @sk_cgrp_prioidx: socket group's priority map index * @sk_classid: this socket's cgroup classid * @sk_cgrp: this socket's cgroup-specific proto data * @sk_write_pending: a write to stream socket waits to start @@ -425,9 +425,7 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; __u32 sk_priority; -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) - __u32 sk_cgrp_prioidx; -#endif + __u32 sk_mark; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; long sk_rcvtimeo; @@ -445,7 +443,9 @@ struct sock { #ifdef CONFIG_SECURITY void *sk_security; #endif - __u32 sk_mark; +#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) + u16 sk_cgrp_prioidx; +#endif #ifdef CONFIG_CGROUP_NET_CLASSID u32 sk_classid; #endif diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index cbd0a199bf52..2b9159b7a28a 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -27,6 +27,12 @@ #include +/* + * netprio allocates per-net_device priomap array which is indexed by + * css->id. Limiting css ID to 16bits doesn't lose anything. + */ +#define NETPRIO_ID_MAX USHRT_MAX + #define PRIOMAP_MIN_SZ 128 /* @@ -144,6 +150,9 @@ static int cgrp_css_online(struct cgroup_subsys_state *css) struct net_device *dev; int ret = 0; + if (css->id > NETPRIO_ID_MAX) + return -ENOSPC; + if (!parent_css) return 0; -- cgit v1.2.3-71-gd317 From 2a56a1fec290bf0bc4676bbf4efdb3744953a3e7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:52 -0500 Subject: net: wrap sock->sk_cgrp_prioidx and ->sk_classid inside a struct Introduce sock->sk_cgrp_data which is a struct sock_cgroup_data. ->sk_cgroup_prioidx and ->sk_classid are moved into it. The struct and its accessors are defined in cgroup-defs.h. This is to prepare for overloading the fields with a cgroup pointer. This patch mostly performs equivalent conversions but the followings are noteworthy. * Equality test before updating classid is removed from sock_update_classid(). This shouldn't make any noticeable difference and a similar test will be implemented on the helper side later. * sock_update_netprioidx() now takes struct sock_cgroup_data and can be moved to netprio_cgroup.h without causing include dependency loop. Moved. * The dummy version of sock_update_netprioidx() converted to a static inline function while at it. Signed-off-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 36 ++++++++++++++++++++++++++++++++++++ include/net/cls_cgroup.h | 11 +++++------ include/net/netprio_cgroup.h | 16 +++++++++++++--- include/net/sock.h | 11 +++-------- net/Kconfig | 6 ++++++ net/core/dev.c | 3 ++- net/core/netclassid_cgroup.c | 4 ++-- net/core/netprio_cgroup.c | 3 ++- net/core/scm.c | 4 ++-- net/core/sock.c | 15 ++------------- net/netfilter/nft_meta.c | 2 +- net/netfilter/xt_cgroup.c | 3 ++- 12 files changed, 76 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 504d8591b6d3..ed128fed0335 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -542,4 +542,40 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} #endif /* CONFIG_CGROUPS */ +#ifdef CONFIG_SOCK_CGROUP_DATA + +struct sock_cgroup_data { + u16 prioidx; + u32 classid; +}; + +static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd) +{ + return skcd->prioidx; +} + +static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd) +{ + return skcd->classid; +} + +static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, + u16 prioidx) +{ + skcd->prioidx = prioidx; +} + +static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, + u32 classid) +{ + skcd->classid = classid; +} + +#else /* CONFIG_SOCK_CGROUP_DATA */ + +struct sock_cgroup_data { +}; + +#endif /* CONFIG_SOCK_CGROUP_DATA */ + #endif /* _LINUX_CGROUP_DEFS_H */ diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index ccd6d8bffa4d..c0a92e2c286d 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -41,13 +41,12 @@ static inline u32 task_cls_classid(struct task_struct *p) return classid; } -static inline void sock_update_classid(struct sock *sk) +static inline void sock_update_classid(struct sock_cgroup_data *skcd) { u32 classid; classid = task_cls_classid(current); - if (classid != sk->sk_classid) - sk->sk_classid = classid; + sock_cgroup_set_classid(skcd, classid); } static inline u32 task_get_classid(const struct sk_buff *skb) @@ -64,17 +63,17 @@ static inline u32 task_get_classid(const struct sk_buff *skb) * softirqs always disables bh. */ if (in_serving_softirq()) { - /* If there is an sk_classid we'll use that. */ + /* If there is an sock_cgroup_classid we'll use that. */ if (!skb->sk) return 0; - classid = skb->sk->sk_classid; + classid = sock_cgroup_classid(&skb->sk->sk_cgrp_data); } return classid; } #else /* !CONFIG_CGROUP_NET_CLASSID */ -static inline void sock_update_classid(struct sock *sk) +static inline void sock_update_classid(struct sock_cgroup_data *skcd) { } diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index f2a9597ff53c..604190596cde 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -25,8 +25,6 @@ struct netprio_map { u32 priomap[]; }; -void sock_update_netprioidx(struct sock *sk); - static inline u32 task_netprioidx(struct task_struct *p) { struct cgroup_subsys_state *css; @@ -38,13 +36,25 @@ static inline u32 task_netprioidx(struct task_struct *p) rcu_read_unlock(); return idx; } + +static inline void sock_update_netprioidx(struct sock_cgroup_data *skcd) +{ + if (in_interrupt()) + return; + + sock_cgroup_set_prioidx(skcd, task_netprioidx(current)); +} + #else /* !CONFIG_CGROUP_NET_PRIO */ + static inline u32 task_netprioidx(struct task_struct *p) { return 0; } -#define sock_update_netprioidx(sk) +static inline void sock_update_netprioidx(struct sock_cgroup_data *skcd) +{ +} #endif /* CONFIG_CGROUP_NET_PRIO */ #endif /* _NET_CLS_CGROUP_H */ diff --git a/include/net/sock.h b/include/net/sock.h index a95bcf7d6efa..0ca22b014de1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -308,8 +309,7 @@ struct cg_proto; * @sk_send_head: front of stuff to transmit * @sk_security: used by security modules * @sk_mark: generic packet mark - * @sk_cgrp_prioidx: socket group's priority map index - * @sk_classid: this socket's cgroup classid + * @sk_cgrp_data: cgroup data for this cgroup * @sk_cgrp: this socket's cgroup-specific proto data * @sk_write_pending: a write to stream socket waits to start * @sk_state_change: callback to indicate change in the state of the sock @@ -443,12 +443,7 @@ struct sock { #ifdef CONFIG_SECURITY void *sk_security; #endif -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) - u16 sk_cgrp_prioidx; -#endif -#ifdef CONFIG_CGROUP_NET_CLASSID - u32 sk_classid; -#endif + struct sock_cgroup_data sk_cgrp_data; struct cg_proto *sk_cgrp; void (*sk_state_change)(struct sock *sk); void (*sk_data_ready)(struct sock *sk); diff --git a/net/Kconfig b/net/Kconfig index 127da94ae25e..11f8c22af34d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -250,9 +250,14 @@ config XPS depends on SMP default y +config SOCK_CGROUP_DATA + bool + default n + config CGROUP_NET_PRIO bool "Network priority cgroup" depends on CGROUPS + select SOCK_CGROUP_DATA ---help--- Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis. @@ -260,6 +265,7 @@ config CGROUP_NET_PRIO config CGROUP_NET_CLASSID bool "Network classid cgroup" depends on CGROUPS + select SOCK_CGROUP_DATA ---help--- Cgroup subsystem for use as general purpose socket classid marker that is being used in cls_cgroup and for netfilter matching. diff --git a/net/core/dev.c b/net/core/dev.c index e5c395473eba..8f705fcedb94 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2929,7 +2929,8 @@ static void skb_update_prio(struct sk_buff *skb) struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap); if (!skb->priority && skb->sk && map) { - unsigned int prioidx = skb->sk->sk_cgrp_prioidx; + unsigned int prioidx = + sock_cgroup_prioidx(&skb->sk->sk_cgrp_data); if (prioidx < map->priomap_len) skb->priority = map->priomap[prioidx]; diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 2e4df84c34a1..e60ded46b3ac 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -62,8 +62,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) struct socket *sock = sock_from_file(file, &err); if (sock) - sock->sk->sk_classid = (u32)(unsigned long)v; - + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, + (unsigned long)v); return 0; } diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 2b9159b7a28a..de42aa7f6c77 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -223,7 +223,8 @@ static int update_netprio(const void *v, struct file *file, unsigned n) int err; struct socket *sock = sock_from_file(file, &err); if (sock) - sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; + sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, + (unsigned long)v); return 0; } diff --git a/net/core/scm.c b/net/core/scm.c index 8a1741b14302..14596fb37172 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -289,8 +289,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) /* Bump the usage count and install the file. */ sock = sock_from_file(fp[i], &err); if (sock) { - sock_update_netprioidx(sock->sk); - sock_update_classid(sock->sk); + sock_update_netprioidx(&sock->sk->sk_cgrp_data); + sock_update_classid(&sock->sk->sk_cgrp_data); } fd_install(new_fd, get_file(fp[i])); } diff --git a/net/core/sock.c b/net/core/sock.c index 7965ef487375..947741dc43fa 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1393,17 +1393,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } -#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) -void sock_update_netprioidx(struct sock *sk) -{ - if (in_interrupt()) - return; - - sk->sk_cgrp_prioidx = task_netprioidx(current); -} -EXPORT_SYMBOL_GPL(sock_update_netprioidx); -#endif - /** * sk_alloc - All socket objects are allocated here * @net: the applicable net namespace @@ -1432,8 +1421,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, net); atomic_set(&sk->sk_wmem_alloc, 1); - sock_update_classid(sk); - sock_update_netprioidx(sk); + sock_update_classid(&sk->sk_cgrp_data); + sock_update_netprioidx(&sk->sk_cgrp_data); } return sk; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9dfaf4d55ee0..1915cab7f32d 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -174,7 +174,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, sk = skb_to_full_sk(skb); if (!sk || !sk_fullsock(sk)) goto err; - *dest = sk->sk_classid; + *dest = sock_cgroup_classid(&sk->sk_cgrp_data); break; #endif default: diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index a1d126f29463..54eaeb45ce99 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -42,7 +42,8 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) if (skb->sk == NULL || !sk_fullsock(skb->sk)) return false; - return (info->id == skb->sk->sk_classid) ^ info->invert; + return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^ + info->invert; } static struct xt_match cgroup_mt_reg __read_mostly = { -- cgit v1.2.3-71-gd317 From 33d5a7b14bfd02e60af9d223db8dfff0cbcabe6b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 28 Nov 2015 21:53:04 +0100 Subject: netfilter: nf_tables: extend tracing infrastructure nft monitor mode can then decode and display this trace data. Parts of LL/Network/Transport headers are provided as separate attributes. Otherwise, printing IP address data becomes virtually impossible for userspace since in the case of the netdev family we really don't want userspace to have to know all the possible link layer types and/or sizes just to display/print an ip address. We also don't want userspace to have to follow ipv6 header chains to get the s/dport info, the kernel already did this work for us. To avoid bloating nft_do_chain all data required for tracing is encapsulated in nft_traceinfo. The structure is initialized unconditionally(!) for each nft_do_chain invocation. This unconditionall call will be moved under a static key in a followup patch. With lots of help from Patrick McHardy and Pablo Neira. Signed-off-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 32 ++++ include/uapi/linux/netfilter/nf_tables.h | 52 ++++++ include/uapi/linux/netfilter/nfnetlink.h | 2 + net/netfilter/Makefile | 2 +- net/netfilter/nf_tables_api.c | 12 +- net/netfilter/nf_tables_core.c | 45 +++-- net/netfilter/nf_tables_trace.c | 271 +++++++++++++++++++++++++++++++ net/netfilter/nfnetlink.c | 1 + 8 files changed, 398 insertions(+), 19 deletions(-) create mode 100644 net/netfilter/nf_tables_trace.c (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 101d7d7ec243..b313cda49194 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -888,6 +888,38 @@ void nft_unregister_chain_type(const struct nf_chain_type *); int nft_register_expr(struct nft_expr_type *); void nft_unregister_expr(struct nft_expr_type *); +int nft_verdict_dump(struct sk_buff *skb, int type, + const struct nft_verdict *v); + +/** + * struct nft_traceinfo - nft tracing information and state + * + * @pkt: pktinfo currently processed + * @basechain: base chain currently processed + * @chain: chain currently processed + * @rule: rule that was evaluated + * @verdict: verdict given by rule + * @type: event type (enum nft_trace_types) + * @packet_dumped: packet headers sent in a previous traceinfo message + * @trace: other struct members are initialised + */ +struct nft_traceinfo { + const struct nft_pktinfo *pkt; + const struct nft_base_chain *basechain; + const struct nft_chain *chain; + const struct nft_rule *rule; + const struct nft_verdict *verdict; + enum nft_trace_types type; + bool packet_dumped; + bool trace; +}; + +void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, + const struct nft_verdict *verdict, + const struct nft_chain *basechain); + +void nft_trace_notify(struct nft_traceinfo *info); + #define nft_dereference(p) \ nfnl_dereference(p, NFNL_SUBSYS_NFTABLES) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 5f3ececf84b3..b48a3ab761f8 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -83,6 +83,7 @@ enum nft_verdicts { * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes) * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes) * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes) + * @NFT_MSG_TRACE: trace event (enum nft_trace_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -102,6 +103,7 @@ enum nf_tables_msg_types { NFT_MSG_DELSETELEM, NFT_MSG_NEWGEN, NFT_MSG_GETGEN, + NFT_MSG_TRACE, NFT_MSG_MAX, }; @@ -987,4 +989,54 @@ enum nft_gen_attributes { }; #define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) +/** + * enum nft_trace_attributes - nf_tables trace netlink attributes + * + * @NFTA_TRACE_TABLE: name of the table (NLA_STRING) + * @NFTA_TRACE_CHAIN: name of the chain (NLA_STRING) + * @NFTA_TRACE_RULE_HANDLE: numeric handle of the rule (NLA_U64) + * @NFTA_TRACE_TYPE: type of the event (NLA_U32: nft_trace_types) + * @NFTA_TRACE_VERDICT: verdict returned by hook (NLA_NESTED: nft_verdicts) + * @NFTA_TRACE_ID: pseudo-id, same for each skb traced (NLA_U32) + * @NFTA_TRACE_LL_HEADER: linklayer header (NLA_BINARY) + * @NFTA_TRACE_NETWORK_HEADER: network header (NLA_BINARY) + * @NFTA_TRACE_TRANSPORT_HEADER: transport header (NLA_BINARY) + * @NFTA_TRACE_IIF: indev ifindex (NLA_U32) + * @NFTA_TRACE_IIFTYPE: netdev->type of indev (NLA_U16) + * @NFTA_TRACE_OIF: outdev ifindex (NLA_U32) + * @NFTA_TRACE_OIFTYPE: netdev->type of outdev (NLA_U16) + * @NFTA_TRACE_MARK: nfmark (NLA_U32) + * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32) + * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32) + */ +enum nft_trace_attibutes { + NFTA_TRACE_UNSPEC, + NFTA_TRACE_TABLE, + NFTA_TRACE_CHAIN, + NFTA_TRACE_RULE_HANDLE, + NFTA_TRACE_TYPE, + NFTA_TRACE_VERDICT, + NFTA_TRACE_ID, + NFTA_TRACE_LL_HEADER, + NFTA_TRACE_NETWORK_HEADER, + NFTA_TRACE_TRANSPORT_HEADER, + NFTA_TRACE_IIF, + NFTA_TRACE_IIFTYPE, + NFTA_TRACE_OIF, + NFTA_TRACE_OIFTYPE, + NFTA_TRACE_MARK, + NFTA_TRACE_NFPROTO, + NFTA_TRACE_POLICY, + __NFTA_TRACE_MAX +}; +#define NFTA_TRACE_MAX (__NFTA_TRACE_MAX - 1) + +enum nft_trace_types { + NFT_TRACETYPE_UNSPEC, + NFT_TRACETYPE_POLICY, + NFT_TRACETYPE_RETURN, + NFT_TRACETYPE_RULE, + __NFT_TRACETYPE_MAX +}; +#define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1) #endif /* _LINUX_NF_TABLES_H */ diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h index 354a7e5e50f2..4bb8cb7730e7 100644 --- a/include/uapi/linux/netfilter/nfnetlink.h +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -22,6 +22,8 @@ enum nfnetlink_groups { #define NFNLGRP_NFTABLES NFNLGRP_NFTABLES NFNLGRP_ACCT_QUOTA, #define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA + NFNLGRP_NFTRACE, +#define NFNLGRP_NFTRACE NFNLGRP_NFTRACE __NFNLGRP_MAX, }; #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 7638c36b498c..22934846b5d1 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -67,7 +67,7 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o # nf_tables -nf_tables-objs += nf_tables_core.o nf_tables_api.o +nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 93cc4737018f..c4969a0d54ba 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4446,22 +4446,22 @@ static void nft_verdict_uninit(const struct nft_data *data) } } -static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data) +int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v) { struct nlattr *nest; - nest = nla_nest_start(skb, NFTA_DATA_VERDICT); + nest = nla_nest_start(skb, type); if (!nest) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code))) + if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code))) goto nla_put_failure; - switch (data->verdict.code) { + switch (v->code) { case NFT_JUMP: case NFT_GOTO: if (nla_put_string(skb, NFTA_VERDICT_CHAIN, - data->verdict.chain->name)) + v->chain->name)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -4572,7 +4572,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, err = nft_value_dump(skb, data, len); break; case NFT_DATA_VERDICT: - err = nft_verdict_dump(skb, data); + err = nft_verdict_dump(skb, NFTA_DATA_VERDICT, &data->verdict); break; default: err = -EINVAL; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index f3695a497408..2395de7c8ab2 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -44,22 +44,36 @@ static struct nf_loginfo trace_loginfo = { }, }; -static void __nft_trace_packet(const struct nft_pktinfo *pkt, - const struct nft_chain *chain, - int rulenum, enum nft_trace type) +static noinline void __nft_trace_packet(struct nft_traceinfo *info, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) { + const struct nft_pktinfo *pkt = info->pkt; + + if (!pkt->skb->nf_trace) + return; + + info->chain = chain; + info->type = type; + + nft_trace_notify(info); + nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", chain->table->name, chain->name, comments[type], rulenum); } -static inline void nft_trace_packet(const struct nft_pktinfo *pkt, +static inline void nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, - int rulenum, enum nft_trace type) + const struct nft_rule *rule, + int rulenum, + enum nft_trace_types type) { - if (unlikely(pkt->skb->nf_trace)) - __nft_trace_packet(pkt, chain, rulenum, type); + if (unlikely(info->trace)) { + info->rule = rule; + __nft_trace_packet(info, chain, rulenum, type); + } } static void nft_cmp_fast_eval(const struct nft_expr *expr, @@ -121,7 +135,9 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) struct nft_stats *stats; int rulenum; unsigned int gencursor = nft_genmask_cur(net); + struct nft_traceinfo info; + nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); @@ -151,7 +167,8 @@ next_rule: regs.verdict.code = NFT_CONTINUE; continue; case NFT_CONTINUE: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); continue; } break; @@ -161,7 +178,8 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); return regs.verdict.code; } @@ -174,7 +192,8 @@ next_rule: stackptr++; /* fall through */ case NFT_GOTO: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); chain = regs.verdict.chain; goto do_chain; @@ -182,7 +201,8 @@ next_rule: rulenum++; /* fall through */ case NFT_RETURN: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RETURN); break; default: WARN_ON(1); @@ -196,7 +216,8 @@ next_rule: goto next_rule; } - nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); + nft_trace_packet(&info, basechain, NULL, -1, + NFT_TRACETYPE_POLICY); rcu_read_lock_bh(); stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats)); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c new file mode 100644 index 000000000000..36fd7ad6729a --- /dev/null +++ b/net/netfilter/nf_tables_trace.c @@ -0,0 +1,271 @@ +/* + * (C) 2015 Red Hat GmbH + * Author: Florian Westphal + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFT_TRACETYPE_LL_HSIZE 20 +#define NFT_TRACETYPE_NETWORK_HSIZE 40 +#define NFT_TRACETYPE_TRANSPORT_HSIZE 20 + +static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) +{ + __be32 id; + + /* using skb address as ID results in a limited number of + * values (and quick reuse). + * + * So we attempt to use as many skb members that will not + * change while skb is with netfilter. + */ + id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb), + skb->skb_iif); + + return nla_put_be32(nlskb, NFTA_TRACE_ID, id); +} + +static int trace_fill_header(struct sk_buff *nlskb, u16 type, + const struct sk_buff *skb, + int off, unsigned int len) +{ + struct nlattr *nla; + + if (len == 0) + return 0; + + nla = nla_reserve(nlskb, type, len); + if (!nla || skb_copy_bits(skb, off, nla_data(nla), len)) + return -1; + + return 0; +} + +static int nf_trace_fill_ll_header(struct sk_buff *nlskb, + const struct sk_buff *skb) +{ + struct vlan_ethhdr veth; + int off; + + BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE); + + off = skb_mac_header(skb) - skb->data; + if (off != -ETH_HLEN) + return -1; + + if (skb_copy_bits(skb, off, &veth, ETH_HLEN)) + return -1; + + veth.h_vlan_proto = skb->vlan_proto; + veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + veth.h_vlan_encapsulated_proto = skb->protocol; + + return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth); +} + +static int nf_trace_fill_dev_info(struct sk_buff *nlskb, + const struct net_device *indev, + const struct net_device *outdev) +{ + if (indev) { + if (nla_put_be32(nlskb, NFTA_TRACE_IIF, + htonl(indev->ifindex))) + return -1; + + if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE, + htons(indev->type))) + return -1; + } + + if (outdev) { + if (nla_put_be32(nlskb, NFTA_TRACE_OIF, + htonl(outdev->ifindex))) + return -1; + + if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE, + htons(outdev->type))) + return -1; + } + + return 0; +} + +static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, + const struct nft_pktinfo *pkt) +{ + const struct sk_buff *skb = pkt->skb; + unsigned int len = min_t(unsigned int, + pkt->xt.thoff - skb_network_offset(skb), + NFT_TRACETYPE_NETWORK_HSIZE); + int off = skb_network_offset(skb); + + if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len)) + return -1; + + len = min_t(unsigned int, skb->len - pkt->xt.thoff, + NFT_TRACETYPE_TRANSPORT_HSIZE); + + if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, + pkt->xt.thoff, len)) + return -1; + + if (!skb_mac_header_was_set(skb)) + return 0; + + if (skb_vlan_tag_get(skb)) + return nf_trace_fill_ll_header(nlskb, skb); + + off = skb_mac_header(skb) - skb->data; + len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE); + return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER, + skb, off, len); +} + +static int nf_trace_fill_rule_info(struct sk_buff *nlskb, + const struct nft_traceinfo *info) +{ + if (!info->rule) + return 0; + + /* a continue verdict with ->type == RETURN means that this is + * an implicit return (end of chain reached). + * + * Since no rule matched, the ->rule pointer is invalid. + */ + if (info->type == NFT_TRACETYPE_RETURN && + info->verdict->code == NFT_CONTINUE) + return 0; + + return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE, + cpu_to_be64(info->rule->handle)); +} + +void nft_trace_notify(struct nft_traceinfo *info) +{ + const struct nft_pktinfo *pkt = info->pkt; + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + struct sk_buff *skb; + unsigned int size; + int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE; + + if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE)) + return; + + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + + nla_total_size(NFT_TABLE_MAXNAMELEN) + + nla_total_size(NFT_CHAIN_MAXNAMELEN) + + nla_total_size(sizeof(__be64)) + /* rule handle */ + nla_total_size(sizeof(__be32)) + /* trace type */ + nla_total_size(0) + /* VERDICT, nested */ + nla_total_size(sizeof(u32)) + /* verdict code */ + nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */ + nla_total_size(sizeof(u32)) + /* id */ + nla_total_size(NFT_TRACETYPE_LL_HSIZE) + + nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) + + nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) + + nla_total_size(sizeof(u32)) + /* iif */ + nla_total_size(sizeof(__be16)) + /* iiftype */ + nla_total_size(sizeof(u32)) + /* oif */ + nla_total_size(sizeof(__be16)) + /* oiftype */ + nla_total_size(sizeof(u32)) + /* mark */ + nla_total_size(sizeof(u32)) + /* nfproto */ + nla_total_size(sizeof(u32)); /* policy */ + + skb = nlmsg_new(size, GFP_ATOMIC); + if (!skb) + return; + + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0); + if (!nlh) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = info->basechain->type->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf))) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) + goto nla_put_failure; + + if (trace_fill_id(skb, pkt->skb)) + goto nla_put_failure; + + if (info->chain) { + if (nla_put_string(skb, NFTA_TRACE_CHAIN, + info->chain->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_TRACE_TABLE, + info->chain->table->name)) + goto nla_put_failure; + } + + if (nf_trace_fill_rule_info(skb, info)) + goto nla_put_failure; + + switch (info->type) { + case NFT_TRACETYPE_UNSPEC: + case __NFT_TRACETYPE_MAX: + break; + case NFT_TRACETYPE_RETURN: + case NFT_TRACETYPE_RULE: + if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict)) + goto nla_put_failure; + break; + case NFT_TRACETYPE_POLICY: + if (nla_put_be32(skb, NFTA_TRACE_POLICY, + info->basechain->policy)) + goto nla_put_failure; + break; + } + + if (pkt->skb->mark && + nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark))) + goto nla_put_failure; + + if (!info->packet_dumped) { + if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out)) + goto nla_put_failure; + + if (nf_trace_fill_pkt_info(skb, pkt)) + goto nla_put_failure; + info->packet_dumped = true; + } + + nlmsg_end(skb, nlh); + nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); + return; + + nla_put_failure: + WARN_ON_ONCE(1); + kfree_skb(skb); +} + +void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, + const struct nft_verdict *verdict, + const struct nft_chain *chain) +{ + info->basechain = nft_base_chain(chain); + info->trace = true; + info->packet_dumped = false; + info->pkt = pkt; + info->verdict = verdict; +} diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 46453ab318db..28591fa94ba5 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -49,6 +49,7 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP, [NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES, [NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT, + [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES, }; void nfnl_lock(__u8 subsys_id) -- cgit v1.2.3-71-gd317 From e639f7ab079b5256660018511d87aa34b54f1a9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 28 Nov 2015 21:53:05 +0100 Subject: netfilter: nf_tables: wrap tracing with a static key Only needed when meta nftrace rule(s) were added. The assumption is that no such rules are active, so the call to nft_trace_init is "never" needed. When nftrace rules are active, we always call the nft_trace_* functions, but will only send netlink messages when all of the following are true: - traceinfo structure was initialised - skb->nf_trace == 1 - at least one subscriber to trace group. Adding an extra conditional (static_branch ... && skb->nf_trace) nft_trace_init( ..) Is possible but results in a larger nft_do_chain footprint. Signed-off-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 1 + include/net/netfilter/nft_meta.h | 3 +++ net/bridge/netfilter/nft_meta_bridge.c | 1 + net/netfilter/nf_tables_core.c | 9 ++++++--- net/netfilter/nf_tables_trace.c | 4 ++++ net/netfilter/nft_meta.c | 16 ++++++++++++++++ 6 files changed, 31 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 4ff5424909aa..a9060dd99db7 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -57,6 +57,7 @@ struct nft_payload_set { }; extern const struct nft_expr_ops nft_payload_fast_ops; +extern struct static_key_false nft_trace_enabled; int nft_payload_module_init(void); void nft_payload_module_exit(void); diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 711887a09e91..d27588c8dbd9 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -33,4 +33,7 @@ void nft_meta_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); +void nft_meta_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr); + #endif diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index a21269b83f16..4b901d9f2e7c 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -84,6 +84,7 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, .init = nft_meta_set_init, + .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, }; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 2395de7c8ab2..67fa41d317f6 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +51,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, { const struct nft_pktinfo *pkt = info->pkt; - if (!pkt->skb->nf_trace) + if (!info->trace || !pkt->skb->nf_trace) return; info->chain = chain; @@ -70,7 +71,7 @@ static inline void nft_trace_packet(struct nft_traceinfo *info, int rulenum, enum nft_trace_types type) { - if (unlikely(info->trace)) { + if (static_branch_unlikely(&nft_trace_enabled)) { info->rule = rule; __nft_trace_packet(info, chain, rulenum, type); } @@ -137,7 +138,9 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) unsigned int gencursor = nft_genmask_cur(net); struct nft_traceinfo info; - nft_trace_init(&info, pkt, ®s.verdict, basechain); + info.trace = false; + if (static_branch_unlikely(&nft_trace_enabled)) + nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 36fd7ad6729a..e9e959f65d91 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -24,6 +25,9 @@ #define NFT_TRACETYPE_NETWORK_HSIZE 40 #define NFT_TRACETYPE_TRANSPORT_HSIZE 20 +DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); +EXPORT_SYMBOL_GPL(nft_trace_enabled); + static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) { __be32 id; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9dfaf4d55ee0..85a465b773e5 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -18,10 +18,12 @@ #include #include #include +#include #include #include #include /* for TCP_TIME_WAIT */ #include +#include #include void nft_meta_get_eval(const struct nft_expr *expr, @@ -297,6 +299,9 @@ int nft_meta_set_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (priv->key == NFT_META_NFTRACE) + static_branch_inc(&nft_trace_enabled); + return 0; } EXPORT_SYMBOL_GPL(nft_meta_set_init); @@ -334,6 +339,16 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_meta_set_dump); +void nft_meta_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + + if (priv->key == NFT_META_NFTRACE) + static_branch_dec(&nft_trace_enabled); +} +EXPORT_SYMBOL_GPL(nft_meta_set_destroy); + static struct nft_expr_type nft_meta_type; static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, @@ -348,6 +363,7 @@ static const struct nft_expr_ops nft_meta_set_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, .init = nft_meta_set_init, + .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, }; -- cgit v1.2.3-71-gd317 From 01b1cb87d37fb19cdaa5e7002416fdde156873d0 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 16 Nov 2015 12:52:21 +0200 Subject: Bluetooth: Run page scan updates through hdev->req_workqueue Since Add/Remove Device perform the page scan updates independently from the HCI command completion we've introduced a potential race when multiple mgmt commands are queued. Doing the page scan updates through the req_workqueue ensures that the state changes are performed in a race-free manner. At the same time, to make the request helper more widely usable, extend it to also cover Inquiry Scan changes since those are behind the same HCI command. This is also reflected in the new name of the API as well as the work struct name. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_event.c | 4 ++-- net/bluetooth/hci_request.c | 27 ++++++++++++++++++--------- net/bluetooth/hci_request.h | 8 ++++++-- net/bluetooth/mgmt.c | 16 ++++++++-------- 5 files changed, 35 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 55ce209157b1..eda809a5c3df 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -329,6 +329,7 @@ struct hci_dev { struct work_struct discov_update; struct work_struct bg_scan_update; + struct work_struct scan_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d57c11c1c6b5..703e37f1a955 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2176,7 +2176,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES, sizeof(cp), &cp); - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); } /* Set packet type for incoming connection */ @@ -2362,7 +2362,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags)) hci_remove_link_key(hdev, &conn->dst); - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); } params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e639671f54bd..78c026b4ffa1 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -637,7 +637,7 @@ static bool disconnected_whitelist_entries(struct hci_dev *hdev) return false; } -void __hci_update_page_scan(struct hci_request *req) +void __hci_req_update_scan(struct hci_request *req) { struct hci_dev *hdev = req->hdev; u8 scan; @@ -657,22 +657,29 @@ void __hci_update_page_scan(struct hci_request *req) else scan = SCAN_DISABLED; - if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE)) - return; - if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) scan |= SCAN_INQUIRY; + if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE) && + test_bit(HCI_ISCAN, &hdev->flags) == !!(scan & SCAN_INQUIRY)) + return; + hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } -void hci_update_page_scan(struct hci_dev *hdev) +static int update_scan(struct hci_request *req, unsigned long opt) { - struct hci_request req; + hci_dev_lock(req->hdev); + __hci_req_update_scan(req); + hci_dev_unlock(req->hdev); + return 0; +} - hci_req_init(&req, hdev); - __hci_update_page_scan(&req); - hci_req_run(&req, NULL); +static void scan_update_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, scan_update); + + hci_req_sync(hdev, update_scan, 0, HCI_CMD_TIMEOUT, NULL); } /* This function controls the background scanning based on hdev->pend_le_conns @@ -1270,6 +1277,7 @@ void hci_request_setup(struct hci_dev *hdev) { INIT_WORK(&hdev->discov_update, discov_update); INIT_WORK(&hdev->bg_scan_update, bg_scan_update); + INIT_WORK(&hdev->scan_update, scan_update_work); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); } @@ -1280,6 +1288,7 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->discov_update); cancel_work_sync(&hdev->bg_scan_update); + cancel_work_sync(&hdev->scan_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 6b9e59f7f7a9..cc8275520fb2 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -61,8 +61,12 @@ void hci_req_add_le_passive_scan(struct hci_request *req); /* Returns true if HCI commands were queued */ bool hci_req_stop_discovery(struct hci_request *req); -void hci_update_page_scan(struct hci_dev *hdev); -void __hci_update_page_scan(struct hci_request *req); +static inline void hci_req_update_scan(struct hci_dev *hdev) +{ + queue_work(hdev->req_workqueue, &hdev->scan_update); +} + +void __hci_req_update_scan(struct hci_request *req); int hci_update_random_address(struct hci_request *req, bool require_privacy, u8 *own_addr_type); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3d9d2e4839c5..0d20e1328528 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1810,7 +1810,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status, * entries. */ hci_req_init(&req, hdev); - __hci_update_page_scan(&req); + __hci_req_update_scan(&req); update_class(&req); hci_req_run(&req, NULL); @@ -2058,7 +2058,7 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status, if (conn_changed || discov_changed) { new_settings(hdev, cmd->sk); - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); if (discov_changed) mgmt_update_adv_data(hdev); hci_update_background_scan(hdev); @@ -2092,7 +2092,7 @@ static int set_connectable_update_settings(struct hci_dev *hdev, return err; if (changed) { - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); hci_update_background_scan(hdev); return new_settings(hdev, sk); } @@ -5041,7 +5041,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_req_init(&req, hdev); write_fast_connectable(&req, false); - __hci_update_page_scan(&req); + __hci_req_update_scan(&req); /* Since only the advertising data flags will change, there * is no need to update the scan response data. @@ -5927,7 +5927,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev, if (err) goto unlock; - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); goto added; } @@ -6024,7 +6024,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, goto unlock; } - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type); @@ -6089,7 +6089,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev, kfree(b); } - hci_update_page_scan(hdev); + hci_req_update_scan(hdev); list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) { if (p->auto_connect == HCI_AUTO_CONN_DISABLED) @@ -7397,7 +7397,7 @@ static int powered_update_hci(struct hci_dev *hdev) write_fast_connectable(&req, true); else write_fast_connectable(&req, false); - __hci_update_page_scan(&req); + __hci_req_update_scan(&req); update_class(&req); update_name(&req); update_eir(&req); -- cgit v1.2.3-71-gd317 From f22525700b2ae34eb97a29a91e2eee902062b484 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 18 Nov 2015 12:49:20 +0200 Subject: Bluetooth: Move advertising instance management to hci_request.c This paves the way for eventually performing advertising changes through the hdev->req_workqueue. Some new APIs need to be exposed from mgmt.c to hci_request.c and vice-versa, but many of them will go away once hdev->req_workqueue gets used. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 9 +- net/bluetooth/hci_conn.c | 2 +- net/bluetooth/hci_core.c | 19 +- net/bluetooth/hci_event.c | 4 +- net/bluetooth/hci_request.c | 533 +++++++++++++++++++++++++++++++++++- net/bluetooth/hci_request.h | 14 + net/bluetooth/mgmt.c | 574 +++------------------------------------ 7 files changed, 589 insertions(+), 566 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index eda809a5c3df..b56085b6ecce 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1434,9 +1434,7 @@ void mgmt_index_added(struct hci_dev *hdev); void mgmt_index_removed(struct hci_dev *hdev); void mgmt_set_powered_failed(struct hci_dev *hdev, int err); int mgmt_powered(struct hci_dev *hdev, u8 powered); -int mgmt_update_adv_data(struct hci_dev *hdev); void mgmt_discoverable_timeout(struct hci_dev *hdev); -void mgmt_adv_timeout_expired(struct hci_dev *hdev); void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent); void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, @@ -1491,8 +1489,13 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type, u8 store_hint, u16 min_interval, u16 max_interval, u16 latency, u16 timeout); -void mgmt_reenable_advertising(struct hci_dev *hdev); void mgmt_smp_complete(struct hci_conn *conn, bool complete); +bool mgmt_get_connectable(struct hci_dev *hdev); +u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev); +void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, + u8 instance); +void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, + u8 instance); u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency, u16 to_multiplier); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2d334e07fd77..e2600213cd50 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -683,7 +683,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status) /* Re-enable advertising in case this was a failed connection * attempt as a peripheral. */ - mgmt_reenable_advertising(hdev); + hci_req_reenable_advertising(hdev); } static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 89af7e4fac02..bab8958bf46e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1549,11 +1549,6 @@ int hci_dev_do_close(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_MGMT)) cancel_delayed_work_sync(&hdev->rpa_expired); - if (hdev->adv_instance_timeout) { - cancel_delayed_work_sync(&hdev->adv_instance_expire); - hdev->adv_instance_timeout = 0; - } - /* Avoid potential lockdep warnings from the *_flush() calls by * ensuring the workqueue is empty up front. */ @@ -1774,7 +1769,7 @@ static void hci_update_scan_state(struct hci_dev *hdev, u8 scan) hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - mgmt_update_adv_data(hdev); + hci_req_update_adv_data(hdev, HCI_ADV_CURRENT); mgmt_new_settings(hdev); } @@ -2112,17 +2107,6 @@ static void hci_discov_off(struct work_struct *work) mgmt_discoverable_timeout(hdev); } -static void hci_adv_timeout_expire(struct work_struct *work) -{ - struct hci_dev *hdev; - - hdev = container_of(work, struct hci_dev, adv_instance_expire.work); - - BT_DBG("%s", hdev->name); - - mgmt_adv_timeout_expired(hdev); -} - void hci_uuids_clear(struct hci_dev *hdev) { struct bt_uuid *uuid, *tmp; @@ -3003,7 +2987,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); - INIT_DELAYED_WORK(&hdev->adv_instance_expire, hci_adv_timeout_expire); skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 703e37f1a955..7554da5b7a8f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1183,7 +1183,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, hci_discovery_set_state(hdev, DISCOVERY_STOPPED); else if (!hci_dev_test_flag(hdev, HCI_LE_ADV) && hdev->discovery.state == DISCOVERY_FINDING) - mgmt_reenable_advertising(hdev); + hci_req_reenable_advertising(hdev); break; @@ -2401,7 +2401,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) * is timed out due to Directed Advertising." */ if (type == LE_LINK) - mgmt_reenable_advertising(hdev); + hci_req_reenable_advertising(hdev); unlock: hci_dev_unlock(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 7c85435b8982..e6622bd1926d 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -23,6 +23,7 @@ #include #include +#include #include "smp.h" #include "hci_request.h" @@ -580,6 +581,524 @@ void hci_req_add_le_passive_scan(struct hci_request *req) &enable_cp); } +static u8 get_current_adv_instance(struct hci_dev *hdev) +{ + /* The "Set Advertising" setting supersedes the "Add Advertising" + * setting. Here we set the advertising data based on which + * setting was set. When neither apply, default to the global settings, + * represented by instance "0". + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + !hci_dev_test_flag(hdev, HCI_ADVERTISING)) + return hdev->cur_adv_instance; + + return 0x00; +} + +static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev) +{ + u8 instance = get_current_adv_instance(hdev); + struct adv_info *adv_instance; + + /* Ignore instance 0 */ + if (instance == 0x00) + return 0; + + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return 0; + + /* TODO: Take into account the "appearance" and "local-name" flags here. + * These are currently being ignored as they are not supported. + */ + return adv_instance->scan_rsp_len; +} + +void __hci_req_disable_advertising(struct hci_request *req) +{ + u8 enable = 0x00; + + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); +} + +static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance) +{ + u32 flags; + struct adv_info *adv_instance; + + if (instance == 0x00) { + /* Instance 0 always manages the "Tx Power" and "Flags" + * fields + */ + flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; + + /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting + * corresponds to the "connectable" instance flag. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) + flags |= MGMT_ADV_FLAG_CONNECTABLE; + + return flags; + } + + adv_instance = hci_find_adv_instance(hdev, instance); + + /* Return 0 when we got an invalid instance identifier. */ + if (!adv_instance) + return 0; + + return adv_instance->flags; +} + +void __hci_req_enable_advertising(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_adv_param cp; + u8 own_addr_type, enable = 0x01; + bool connectable; + u8 instance; + u32 flags; + + if (hci_conn_num(hdev, LE_LINK) > 0) + return; + + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + __hci_req_disable_advertising(req); + + /* Clear the HCI_LE_ADV bit temporarily so that the + * hci_update_random_address knows that it's safe to go ahead + * and write a new random address. The flag will be set back on + * as soon as the SET_ADV_ENABLE HCI command completes. + */ + hci_dev_clear_flag(hdev, HCI_LE_ADV); + + instance = get_current_adv_instance(hdev); + flags = get_adv_instance_flags(hdev, instance); + + /* If the "connectable" instance flag was not set, then choose between + * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. + */ + connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) || + mgmt_get_connectable(hdev); + + /* Set require_privacy to true only when non-connectable + * advertising is used. In that case it is fine to use a + * non-resolvable private address. + */ + if (hci_update_random_address(req, !connectable, &own_addr_type) < 0) + return; + + memset(&cp, 0, sizeof(cp)); + cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval); + cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval); + + if (connectable) + cp.type = LE_ADV_IND; + else if (get_cur_adv_instance_scan_rsp_len(hdev)) + cp.type = LE_ADV_SCAN_IND; + else + cp.type = LE_ADV_NONCONN_IND; + + cp.own_address_type = own_addr_type; + cp.channel_map = hdev->le_adv_channel_map; + + hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp); + + hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); +} + +static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +{ + u8 ad_len = 0; + size_t name_len; + + name_len = strlen(hdev->dev_name); + if (name_len > 0) { + size_t max_len = HCI_MAX_AD_LENGTH - ad_len - 2; + + if (name_len > max_len) { + name_len = max_len; + ptr[1] = EIR_NAME_SHORT; + } else + ptr[1] = EIR_NAME_COMPLETE; + + ptr[0] = name_len + 1; + + memcpy(ptr + 2, hdev->dev_name, name_len); + + ad_len += (name_len + 2); + ptr += (name_len + 2); + } + + return ad_len; +} + +static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, + u8 *ptr) +{ + struct adv_info *adv_instance; + + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return 0; + + /* TODO: Set the appropriate entries based on advertising instance flags + * here once flags other than 0 are supported. + */ + memcpy(ptr, adv_instance->scan_rsp_data, + adv_instance->scan_rsp_len); + + return adv_instance->scan_rsp_len; +} + +static void update_inst_scan_rsp_data(struct hci_request *req, u8 instance) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_scan_rsp_data cp; + u8 len; + + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return; + + memset(&cp, 0, sizeof(cp)); + + if (instance) + len = create_instance_scan_rsp_data(hdev, instance, cp.data); + else + len = create_default_scan_rsp_data(hdev, cp.data); + + if (hdev->scan_rsp_data_len == len && + !memcmp(cp.data, hdev->scan_rsp_data, len)) + return; + + memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data)); + hdev->scan_rsp_data_len = len; + + cp.length = len; + + hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp); +} + +void __hci_req_update_scan_rsp_data(struct hci_request *req, int instance) +{ + if (instance == HCI_ADV_CURRENT) + instance = get_current_adv_instance(req->hdev); + + update_inst_scan_rsp_data(req, get_current_adv_instance(req->hdev)); +} + +static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) +{ + struct adv_info *adv_instance = NULL; + u8 ad_len = 0, flags = 0; + u32 instance_flags; + + /* Return 0 when the current instance identifier is invalid. */ + if (instance) { + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return 0; + } + + instance_flags = get_adv_instance_flags(hdev, instance); + + /* The Add Advertising command allows userspace to set both the general + * and limited discoverable flags. + */ + if (instance_flags & MGMT_ADV_FLAG_DISCOV) + flags |= LE_AD_GENERAL; + + if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV) + flags |= LE_AD_LIMITED; + + if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) { + /* If a discovery flag wasn't provided, simply use the global + * settings. + */ + if (!flags) + flags |= mgmt_get_adv_discov_flags(hdev); + + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + flags |= LE_AD_NO_BREDR; + + /* If flags would still be empty, then there is no need to + * include the "Flags" AD field". + */ + if (flags) { + ptr[0] = 0x02; + ptr[1] = EIR_FLAGS; + ptr[2] = flags; + + ad_len += 3; + ptr += 3; + } + } + + if (adv_instance) { + memcpy(ptr, adv_instance->adv_data, + adv_instance->adv_data_len); + ad_len += adv_instance->adv_data_len; + ptr += adv_instance->adv_data_len; + } + + /* Provide Tx Power only if we can provide a valid value for it */ + if (hdev->adv_tx_power != HCI_TX_POWER_INVALID && + (instance_flags & MGMT_ADV_FLAG_TX_POWER)) { + ptr[0] = 0x02; + ptr[1] = EIR_TX_POWER; + ptr[2] = (u8)hdev->adv_tx_power; + + ad_len += 3; + ptr += 3; + } + + return ad_len; +} + +static void update_inst_adv_data(struct hci_request *req, u8 instance) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_le_set_adv_data cp; + u8 len; + + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return; + + memset(&cp, 0, sizeof(cp)); + + len = create_instance_adv_data(hdev, instance, cp.data); + + /* There's nothing to do if the data hasn't changed */ + if (hdev->adv_data_len == len && + memcmp(cp.data, hdev->adv_data, len) == 0) + return; + + memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); + hdev->adv_data_len = len; + + cp.length = len; + + hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); +} + +void __hci_req_update_adv_data(struct hci_request *req, int instance) +{ + if (instance == HCI_ADV_CURRENT) + instance = get_current_adv_instance(req->hdev); + + update_inst_adv_data(req, instance); +} + +int hci_req_update_adv_data(struct hci_dev *hdev, int instance) +{ + struct hci_request req; + + hci_req_init(&req, hdev); + __hci_req_update_adv_data(&req, instance); + + return hci_req_run(&req, NULL); +} + +static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) +{ + BT_DBG("%s status %u", hdev->name, status); +} + +void hci_req_reenable_advertising(struct hci_dev *hdev) +{ + struct hci_request req; + u8 instance; + + if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && + !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + return; + + instance = get_current_adv_instance(hdev); + + hci_req_init(&req, hdev); + + if (instance) { + __hci_req_schedule_adv_instance(&req, instance, true); + } else { + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); + __hci_req_enable_advertising(&req); + } + + hci_req_run(&req, adv_enable_complete); +} + +static void adv_timeout_expire(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + adv_instance_expire.work); + + struct hci_request req; + u8 instance; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + hdev->adv_instance_timeout = 0; + + instance = get_current_adv_instance(hdev); + if (instance == 0x00) + goto unlock; + + hci_req_init(&req, hdev); + + hci_req_clear_adv_instance(hdev, &req, instance, false); + + if (list_empty(&hdev->adv_instances)) + __hci_req_disable_advertising(&req); + + if (!skb_queue_empty(&req.cmd_q)) + hci_req_run(&req, NULL); + +unlock: + hci_dev_unlock(hdev); +} + +int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, + bool force) +{ + struct hci_dev *hdev = req->hdev; + struct adv_info *adv_instance = NULL; + u16 timeout; + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + return -EPERM; + + if (hdev->adv_instance_timeout) + return -EBUSY; + + adv_instance = hci_find_adv_instance(hdev, instance); + if (!adv_instance) + return -ENOENT; + + /* A zero timeout means unlimited advertising. As long as there is + * only one instance, duration should be ignored. We still set a timeout + * in case further instances are being added later on. + * + * If the remaining lifetime of the instance is more than the duration + * then the timeout corresponds to the duration, otherwise it will be + * reduced to the remaining instance lifetime. + */ + if (adv_instance->timeout == 0 || + adv_instance->duration <= adv_instance->remaining_time) + timeout = adv_instance->duration; + else + timeout = adv_instance->remaining_time; + + /* The remaining time is being reduced unless the instance is being + * advertised without time limit. + */ + if (adv_instance->timeout) + adv_instance->remaining_time = + adv_instance->remaining_time - timeout; + + hdev->adv_instance_timeout = timeout; + queue_delayed_work(hdev->req_workqueue, + &hdev->adv_instance_expire, + msecs_to_jiffies(timeout * 1000)); + + /* If we're just re-scheduling the same instance again then do not + * execute any HCI commands. This happens when a single instance is + * being advertised. + */ + if (!force && hdev->cur_adv_instance == instance && + hci_dev_test_flag(hdev, HCI_LE_ADV)) + return 0; + + hdev->cur_adv_instance = instance; + __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(req, HCI_ADV_CURRENT); + __hci_req_enable_advertising(req); + + return 0; +} + +static void cancel_adv_timeout(struct hci_dev *hdev) +{ + if (hdev->adv_instance_timeout) { + hdev->adv_instance_timeout = 0; + cancel_delayed_work(&hdev->adv_instance_expire); + } +} + +/* For a single instance: + * - force == true: The instance will be removed even when its remaining + * lifetime is not zero. + * - force == false: the instance will be deactivated but kept stored unless + * the remaining lifetime is zero. + * + * For instance == 0x00: + * - force == true: All instances will be removed regardless of their timeout + * setting. + * - force == false: Only instances that have a timeout will be removed. + */ +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, + u8 instance, bool force) +{ + struct adv_info *adv_instance, *n, *next_instance = NULL; + int err; + u8 rem_inst; + + /* Cancel any timeout concerning the removed instance(s). */ + if (!instance || hdev->cur_adv_instance == instance) + cancel_adv_timeout(hdev); + + /* Get the next instance to advertise BEFORE we remove + * the current one. This can be the same instance again + * if there is only one instance. + */ + if (instance && hdev->cur_adv_instance == instance) + next_instance = hci_get_next_instance(hdev, instance); + + if (instance == 0x00) { + list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, + list) { + if (!(force || adv_instance->timeout)) + continue; + + rem_inst = adv_instance->instance; + err = hci_remove_adv_instance(hdev, rem_inst); + if (!err) + mgmt_advertising_removed(NULL, hdev, rem_inst); + } + hdev->cur_adv_instance = 0x00; + } else { + adv_instance = hci_find_adv_instance(hdev, instance); + + if (force || (adv_instance && adv_instance->timeout && + !adv_instance->remaining_time)) { + /* Don't advertise a removed instance. */ + if (next_instance && + next_instance->instance == instance) + next_instance = NULL; + + err = hci_remove_adv_instance(hdev, instance); + if (!err) + mgmt_advertising_removed(NULL, hdev, instance); + } + } + + if (list_empty(&hdev->adv_instances)) { + hdev->cur_adv_instance = 0x00; + hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); + } + + if (!req || !hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_ADVERTISING)) + return; + + if (next_instance) + __hci_req_schedule_adv_instance(req, next_instance->instance, + false); +} + static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) { struct hci_dev *hdev = req->hdev; @@ -1031,14 +1550,6 @@ unlock: hci_dev_unlock(hdev); } -static void cancel_adv_timeout(struct hci_dev *hdev) -{ - if (hdev->adv_instance_timeout) { - hdev->adv_instance_timeout = 0; - cancel_delayed_work(&hdev->adv_instance_expire); - } -} - static void disable_advertising(struct hci_request *req) { u8 enable = 0x00; @@ -1280,6 +1791,7 @@ void hci_request_setup(struct hci_dev *hdev) INIT_WORK(&hdev->scan_update, scan_update_work); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); + INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); } void hci_request_cancel_all(struct hci_dev *hdev) @@ -1291,4 +1803,9 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->scan_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); + + if (hdev->adv_instance_timeout) { + cancel_delayed_work_sync(&hdev->adv_instance_expire); + hdev->adv_instance_timeout = 0; + } } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index cc8275520fb2..5358b1b12ca0 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -58,6 +58,20 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_req_add_le_scan_disable(struct hci_request *req); void hci_req_add_le_passive_scan(struct hci_request *req); +#define HCI_ADV_CURRENT (-1) + +void hci_req_reenable_advertising(struct hci_dev *hdev); +void __hci_req_enable_advertising(struct hci_request *req); +void __hci_req_disable_advertising(struct hci_request *req); +void __hci_req_update_adv_data(struct hci_request *req, int instance); +int hci_req_update_adv_data(struct hci_dev *hdev, int instance); +void __hci_req_update_scan_rsp_data(struct hci_request *req, int instance); + +int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, + bool force); +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, + u8 instance, bool force); + /* Returns true if HCI commands were queued */ bool hci_req_stop_discovery(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0d20e1328528..6d0f0025052f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -841,98 +841,7 @@ static struct mgmt_pending_cmd *pending_find_data(u16 opcode, return mgmt_pending_find_data(HCI_CHANNEL_CONTROL, opcode, hdev, data); } -static u8 get_current_adv_instance(struct hci_dev *hdev) -{ - /* The "Set Advertising" setting supersedes the "Add Advertising" - * setting. Here we set the advertising data based on which - * setting was set. When neither apply, default to the global settings, - * represented by instance "0". - */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && - !hci_dev_test_flag(hdev, HCI_ADVERTISING)) - return hdev->cur_adv_instance; - - return 0x00; -} - -static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) -{ - u8 ad_len = 0; - size_t name_len; - - name_len = strlen(hdev->dev_name); - if (name_len > 0) { - size_t max_len = HCI_MAX_AD_LENGTH - ad_len - 2; - - if (name_len > max_len) { - name_len = max_len; - ptr[1] = EIR_NAME_SHORT; - } else - ptr[1] = EIR_NAME_COMPLETE; - - ptr[0] = name_len + 1; - - memcpy(ptr + 2, hdev->dev_name, name_len); - - ad_len += (name_len + 2); - ptr += (name_len + 2); - } - - return ad_len; -} - -static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, - u8 *ptr) -{ - struct adv_info *adv_instance; - - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return 0; - - /* TODO: Set the appropriate entries based on advertising instance flags - * here once flags other than 0 are supported. - */ - memcpy(ptr, adv_instance->scan_rsp_data, - adv_instance->scan_rsp_len); - - return adv_instance->scan_rsp_len; -} - -static void update_inst_scan_rsp_data(struct hci_request *req, u8 instance) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_scan_rsp_data cp; - u8 len; - - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return; - - memset(&cp, 0, sizeof(cp)); - - if (instance) - len = create_instance_scan_rsp_data(hdev, instance, cp.data); - else - len = create_default_scan_rsp_data(hdev, cp.data); - - if (hdev->scan_rsp_data_len == len && - !memcmp(cp.data, hdev->scan_rsp_data, len)) - return; - - memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data)); - hdev->scan_rsp_data_len = len; - - cp.length = len; - - hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp); -} - -static void update_scan_rsp_data(struct hci_request *req) -{ - update_inst_scan_rsp_data(req, get_current_adv_instance(req->hdev)); -} - -static u8 get_adv_discov_flags(struct hci_dev *hdev) +u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev) { struct mgmt_pending_cmd *cmd; @@ -956,7 +865,7 @@ static u8 get_adv_discov_flags(struct hci_dev *hdev) return 0; } -static bool get_connectable(struct hci_dev *hdev) +bool mgmt_get_connectable(struct hci_dev *hdev) { struct mgmt_pending_cmd *cmd; @@ -973,163 +882,6 @@ static bool get_connectable(struct hci_dev *hdev) return hci_dev_test_flag(hdev, HCI_CONNECTABLE); } -static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance) -{ - u32 flags; - struct adv_info *adv_instance; - - if (instance == 0x00) { - /* Instance 0 always manages the "Tx Power" and "Flags" - * fields - */ - flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; - - /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting - * corresponds to the "connectable" instance flag. - */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) - flags |= MGMT_ADV_FLAG_CONNECTABLE; - - return flags; - } - - adv_instance = hci_find_adv_instance(hdev, instance); - - /* Return 0 when we got an invalid instance identifier. */ - if (!adv_instance) - return 0; - - return adv_instance->flags; -} - -static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev) -{ - u8 instance = get_current_adv_instance(hdev); - struct adv_info *adv_instance; - - /* Ignore instance 0 */ - if (instance == 0x00) - return 0; - - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return 0; - - /* TODO: Take into account the "appearance" and "local-name" flags here. - * These are currently being ignored as they are not supported. - */ - return adv_instance->scan_rsp_len; -} - -static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) -{ - struct adv_info *adv_instance = NULL; - u8 ad_len = 0, flags = 0; - u32 instance_flags; - - /* Return 0 when the current instance identifier is invalid. */ - if (instance) { - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return 0; - } - - instance_flags = get_adv_instance_flags(hdev, instance); - - /* The Add Advertising command allows userspace to set both the general - * and limited discoverable flags. - */ - if (instance_flags & MGMT_ADV_FLAG_DISCOV) - flags |= LE_AD_GENERAL; - - if (instance_flags & MGMT_ADV_FLAG_LIMITED_DISCOV) - flags |= LE_AD_LIMITED; - - if (flags || (instance_flags & MGMT_ADV_FLAG_MANAGED_FLAGS)) { - /* If a discovery flag wasn't provided, simply use the global - * settings. - */ - if (!flags) - flags |= get_adv_discov_flags(hdev); - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - flags |= LE_AD_NO_BREDR; - - /* If flags would still be empty, then there is no need to - * include the "Flags" AD field". - */ - if (flags) { - ptr[0] = 0x02; - ptr[1] = EIR_FLAGS; - ptr[2] = flags; - - ad_len += 3; - ptr += 3; - } - } - - if (adv_instance) { - memcpy(ptr, adv_instance->adv_data, - adv_instance->adv_data_len); - ad_len += adv_instance->adv_data_len; - ptr += adv_instance->adv_data_len; - } - - /* Provide Tx Power only if we can provide a valid value for it */ - if (hdev->adv_tx_power != HCI_TX_POWER_INVALID && - (instance_flags & MGMT_ADV_FLAG_TX_POWER)) { - ptr[0] = 0x02; - ptr[1] = EIR_TX_POWER; - ptr[2] = (u8)hdev->adv_tx_power; - - ad_len += 3; - ptr += 3; - } - - return ad_len; -} - -static void update_inst_adv_data(struct hci_request *req, u8 instance) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_adv_data cp; - u8 len; - - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return; - - memset(&cp, 0, sizeof(cp)); - - len = create_instance_adv_data(hdev, instance, cp.data); - - /* There's nothing to do if the data hasn't changed */ - if (hdev->adv_data_len == len && - memcmp(cp.data, hdev->adv_data, len) == 0) - return; - - memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); - hdev->adv_data_len = len; - - cp.length = len; - - hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); -} - -static void update_adv_data(struct hci_request *req) -{ - update_inst_adv_data(req, get_current_adv_instance(req->hdev)); -} - -int mgmt_update_adv_data(struct hci_dev *hdev) -{ - struct hci_request req; - - hci_req_init(&req, hdev); - update_adv_data(&req); - - return hci_req_run(&req, NULL); -} - static void create_eir(struct hci_dev *hdev, u8 *data) { u8 *ptr = data; @@ -1247,70 +999,6 @@ static void update_class(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); } -static void disable_advertising(struct hci_request *req) -{ - u8 enable = 0x00; - - hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); -} - -static void enable_advertising(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_adv_param cp; - u8 own_addr_type, enable = 0x01; - bool connectable; - u8 instance; - u32 flags; - - if (hci_conn_num(hdev, LE_LINK) > 0) - return; - - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - disable_advertising(req); - - /* Clear the HCI_LE_ADV bit temporarily so that the - * hci_update_random_address knows that it's safe to go ahead - * and write a new random address. The flag will be set back on - * as soon as the SET_ADV_ENABLE HCI command completes. - */ - hci_dev_clear_flag(hdev, HCI_LE_ADV); - - instance = get_current_adv_instance(hdev); - flags = get_adv_instance_flags(hdev, instance); - - /* If the "connectable" instance flag was not set, then choose between - * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. - */ - connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) || - get_connectable(hdev); - - /* Set require_privacy to true only when non-connectable - * advertising is used. In that case it is fine to use a - * non-resolvable private address. - */ - if (hci_update_random_address(req, !connectable, &own_addr_type) < 0) - return; - - memset(&cp, 0, sizeof(cp)); - cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval); - cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval); - - if (connectable) - cp.type = LE_ADV_IND; - else if (get_cur_adv_instance_scan_rsp_len(hdev)) - cp.type = LE_ADV_SCAN_IND; - else - cp.type = LE_ADV_NONCONN_IND; - - cp.own_address_type = own_addr_type; - cp.channel_map = hdev->le_adv_channel_map; - - hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp); - - hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); -} - static void service_cache_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, @@ -1346,10 +1034,11 @@ static void rpa_expired(struct work_struct *work) return; /* The generation of a new RPA and programming it into the - * controller happens in the enable_advertising() function. + * controller happens in the hci_req_enable_advertising() + * function. */ hci_req_init(&req, hdev); - enable_advertising(&req); + __hci_req_enable_advertising(&req); hci_req_run(&req, NULL); } @@ -1417,8 +1106,7 @@ static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode) } } -static void advertising_added(struct sock *sk, struct hci_dev *hdev, - u8 instance) +void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance) { struct mgmt_ev_advertising_added ev; @@ -1427,8 +1115,8 @@ static void advertising_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_ADVERTISING_ADDED, hdev, &ev, sizeof(ev), sk); } -static void advertising_removed(struct sock *sk, struct hci_dev *hdev, - u8 instance) +void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, + u8 instance) { struct mgmt_ev_advertising_removed ev; @@ -1437,65 +1125,6 @@ static void advertising_removed(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_ADVERTISING_REMOVED, hdev, &ev, sizeof(ev), sk); } -static int schedule_adv_instance(struct hci_request *req, u8 instance, - bool force) { - struct hci_dev *hdev = req->hdev; - struct adv_info *adv_instance = NULL; - u16 timeout; - - if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) - return -EPERM; - - if (hdev->adv_instance_timeout) - return -EBUSY; - - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return -ENOENT; - - /* A zero timeout means unlimited advertising. As long as there is - * only one instance, duration should be ignored. We still set a timeout - * in case further instances are being added later on. - * - * If the remaining lifetime of the instance is more than the duration - * then the timeout corresponds to the duration, otherwise it will be - * reduced to the remaining instance lifetime. - */ - if (adv_instance->timeout == 0 || - adv_instance->duration <= adv_instance->remaining_time) - timeout = adv_instance->duration; - else - timeout = adv_instance->remaining_time; - - /* The remaining time is being reduced unless the instance is being - * advertised without time limit. - */ - if (adv_instance->timeout) - adv_instance->remaining_time = - adv_instance->remaining_time - timeout; - - hdev->adv_instance_timeout = timeout; - queue_delayed_work(hdev->workqueue, - &hdev->adv_instance_expire, - msecs_to_jiffies(timeout * 1000)); - - /* If we're just re-scheduling the same instance again then do not - * execute any HCI commands. This happens when a single instance is - * being advertised. - */ - if (!force && hdev->cur_adv_instance == instance && - hci_dev_test_flag(hdev, HCI_LE_ADV)) - return 0; - - hdev->cur_adv_instance = instance; - update_adv_data(req); - update_scan_rsp_data(req); - enable_advertising(req); - - return 0; -} - static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { @@ -1504,76 +1133,6 @@ static void cancel_adv_timeout(struct hci_dev *hdev) } } -/* For a single instance: - * - force == true: The instance will be removed even when its remaining - * lifetime is not zero. - * - force == false: the instance will be deactivated but kept stored unless - * the remaining lifetime is zero. - * - * For instance == 0x00: - * - force == true: All instances will be removed regardless of their timeout - * setting. - * - force == false: Only instances that have a timeout will be removed. - */ -static void clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, - u8 instance, bool force) -{ - struct adv_info *adv_instance, *n, *next_instance = NULL; - int err; - u8 rem_inst; - - /* Cancel any timeout concerning the removed instance(s). */ - if (!instance || hdev->cur_adv_instance == instance) - cancel_adv_timeout(hdev); - - /* Get the next instance to advertise BEFORE we remove - * the current one. This can be the same instance again - * if there is only one instance. - */ - if (instance && hdev->cur_adv_instance == instance) - next_instance = hci_get_next_instance(hdev, instance); - - if (instance == 0x00) { - list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, - list) { - if (!(force || adv_instance->timeout)) - continue; - - rem_inst = adv_instance->instance; - err = hci_remove_adv_instance(hdev, rem_inst); - if (!err) - advertising_removed(NULL, hdev, rem_inst); - } - hdev->cur_adv_instance = 0x00; - } else { - adv_instance = hci_find_adv_instance(hdev, instance); - - if (force || (adv_instance && adv_instance->timeout && - !adv_instance->remaining_time)) { - /* Don't advertise a removed instance. */ - if (next_instance && - next_instance->instance == instance) - next_instance = NULL; - - err = hci_remove_adv_instance(hdev, instance); - if (!err) - advertising_removed(NULL, hdev, instance); - } - } - - if (list_empty(&hdev->adv_instances)) { - hdev->cur_adv_instance = 0x00; - hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); - } - - if (!req || !hdev_is_powered(hdev) || - hci_dev_test_flag(hdev, HCI_ADVERTISING)) - return; - - if (next_instance) - schedule_adv_instance(req, next_instance->instance, false); -} - static int clean_up_hci_state(struct hci_dev *hdev) { struct hci_request req; @@ -1589,10 +1148,10 @@ static int clean_up_hci_state(struct hci_dev *hdev) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } - clear_adv_instance(hdev, NULL, 0x00, false); + hci_req_clear_adv_instance(hdev, NULL, 0x00, false); if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - disable_advertising(&req); + __hci_req_disable_advertising(&req); discov_stopped = hci_req_stop_discovery(&req); @@ -1975,7 +1534,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan); update_ad: - update_adv_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); err = hci_req_run(&req, set_discoverable_complete); if (err < 0) @@ -2060,7 +1619,7 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status, new_settings(hdev, cmd->sk); hci_req_update_scan(hdev); if (discov_changed) - mgmt_update_adv_data(hdev); + hci_req_update_adv_data(hdev, HCI_ADV_CURRENT); hci_update_background_scan(hdev); } @@ -2151,7 +1710,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); } - update_adv_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); } else if (cp->val != test_bit(HCI_PSCAN, &hdev->flags)) { if (cp->val) { scan = SCAN_PAGE; @@ -2181,7 +1740,7 @@ no_scan_update: /* Update the advertising parameters if necessary */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) - enable_advertising(&req); + __hci_req_enable_advertising(&req); err = hci_req_run(&req, set_connectable_complete); if (err < 0) { @@ -2466,8 +2025,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) struct hci_request req; hci_req_init(&req, hdev); - update_adv_data(&req); - update_scan_rsp_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); hci_req_run(&req, NULL); hci_update_background_scan(hdev); } @@ -2518,7 +2077,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) enabled = lmp_host_le_capable(hdev); if (!val) - clear_adv_instance(hdev, NULL, 0x00, true); + hci_req_clear_adv_instance(hdev, NULL, 0x00, true); if (!hdev_is_powered(hdev) || val == enabled) { bool changed = false; @@ -2565,7 +2124,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_cp.simul = 0x00; } else { if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - disable_advertising(&req); + __hci_req_disable_advertising(&req); } hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(hci_cp), @@ -3856,7 +3415,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, * no need to udpate the advertising data here. */ if (lmp_le_capable(hdev)) - update_scan_rsp_data(&req); + __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); err = hci_req_run(&req, set_name_complete); if (err < 0) @@ -4600,7 +4159,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, hci_req_init(&req, hdev); - err = schedule_adv_instance(&req, instance, true); + err = __hci_req_schedule_adv_instance(&req, instance, true); if (!err) err = hci_req_run(&req, enable_advertising_instance); @@ -4697,11 +4256,11 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, * We cannot use update_[adv|scan_rsp]_data() here as the * HCI_ADVERTISING flag is not yet set. */ - update_inst_adv_data(&req, 0x00); - update_inst_scan_rsp_data(&req, 0x00); - enable_advertising(&req); + __hci_req_update_adv_data(&req, 0x00); + __hci_req_update_scan_rsp_data(&req, 0x00); + __hci_req_enable_advertising(&req); } else { - disable_advertising(&req); + __hci_req_disable_advertising(&req); } err = hci_req_run(&req, set_advertising_complete); @@ -5033,8 +4592,8 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto unlock; } - /* We need to flip the bit already here so that update_adv_data - * generates the correct flags. + /* We need to flip the bit already here so that + * hci_req_update_adv_data generates the correct flags. */ hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); @@ -5046,7 +4605,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) /* Since only the advertising data flags will change, there * is no need to update the scan response data. */ - update_adv_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); err = hci_req_run(&req, set_bredr_complete); if (err < 0) @@ -6583,7 +6142,7 @@ static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev, rand, sizeof(rand)); } - flags = get_adv_discov_flags(hdev); + flags = mgmt_get_adv_discov_flags(hdev); if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) flags |= LE_AD_NO_BREDR; @@ -6772,7 +6331,7 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status, cancel_adv_timeout(hdev); hci_remove_adv_instance(hdev, instance); - advertising_removed(cmd ? cmd->sk : NULL, hdev, instance); + mgmt_advertising_removed(cmd ? cmd->sk : NULL, hdev, instance); } if (!cmd) @@ -6794,31 +6353,6 @@ unlock: hci_dev_unlock(hdev); } -void mgmt_adv_timeout_expired(struct hci_dev *hdev) -{ - u8 instance; - struct hci_request req; - - hdev->adv_instance_timeout = 0; - - instance = get_current_adv_instance(hdev); - if (instance == 0x00) - return; - - hci_dev_lock(hdev); - hci_req_init(&req, hdev); - - clear_adv_instance(hdev, &req, instance, false); - - if (list_empty(&hdev->adv_instances)) - disable_advertising(&req); - - if (!skb_queue_empty(&req.cmd_q)) - hci_req_run(&req, NULL); - - hci_dev_unlock(hdev); -} - static int add_advertising(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -6897,7 +6431,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, * actually added. */ if (hdev->adv_instance_cnt > prev_instance_cnt) - advertising_added(sk, hdev, cp->instance); + mgmt_advertising_added(sk, hdev, cp->instance); hci_dev_set_flag(hdev, HCI_ADVERTISING_INSTANCE); @@ -6944,7 +6478,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - err = schedule_adv_instance(&req, schedule_instance, true); + err = __hci_req_schedule_adv_instance(&req, schedule_instance, true); if (!err) err = hci_req_run(&req, add_advertising_complete); @@ -7024,10 +6558,10 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - clear_adv_instance(hdev, &req, cp->instance, true); + hci_req_clear_adv_instance(hdev, &req, cp->instance, true); if (list_empty(&hdev->adv_instances)) - disable_advertising(&req); + __hci_req_disable_advertising(&req); /* If no HCI commands have been collected so far or the HCI_ADVERTISING * flag is set or the device isn't powered then we have no HCI @@ -7367,8 +6901,8 @@ static int powered_update_hci(struct hci_dev *hdev) if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && (hci_dev_test_flag(hdev, HCI_ADVERTISING) || !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) { - update_adv_data(&req); - update_scan_rsp_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); } if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && @@ -7380,11 +6914,12 @@ static int powered_update_hci(struct hci_dev *hdev) } if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - enable_advertising(&req); + __hci_req_enable_advertising(&req); else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && hdev->cur_adv_instance) - schedule_adv_instance(&req, hdev->cur_adv_instance, - true); + __hci_req_schedule_adv_instance(&req, + hdev->cur_adv_instance, + true); } link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); @@ -7505,7 +7040,7 @@ void mgmt_discoverable_timeout(struct hci_dev *hdev) * only update AD if advertising was enabled using Set Advertising. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - update_adv_data(&req); + __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); hci_req_run(&req, NULL); @@ -8352,35 +7887,6 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering) mgmt_event(MGMT_EV_DISCOVERING, hdev, &ev, sizeof(ev), NULL); } -static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) -{ - BT_DBG("%s status %u", hdev->name, status); -} - -void mgmt_reenable_advertising(struct hci_dev *hdev) -{ - struct hci_request req; - u8 instance; - - if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) - return; - - instance = get_current_adv_instance(hdev); - - hci_req_init(&req, hdev); - - if (instance) { - schedule_adv_instance(&req, instance, true); - } else { - update_adv_data(&req); - update_scan_rsp_data(&req); - enable_advertising(&req); - } - - hci_req_run(&req, adv_enable_complete); -} - static struct hci_mgmt_chan chan = { .channel = HCI_CHANNEL_CONTROL, .handler_count = ARRAY_SIZE(mgmt_handlers), -- cgit v1.2.3-71-gd317 From 53c0ba74510c1182786dcd1e3710215467777601 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 22 Nov 2015 16:43:43 +0300 Subject: Bluetooth: Move connectable changes to hdev->req_workqueue This way the connectable changes are synchronized against each other, which helps avoid potential races. The connectable mode is also linked together with LE advertising which makes is more convenient to have it behind the same workqueue. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 + net/bluetooth/hci_request.c | 39 ++++++++++++++++++ net/bluetooth/mgmt.c | 86 ++++++---------------------------------- 3 files changed, 53 insertions(+), 74 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b56085b6ecce..a855e41df68c 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -330,6 +330,7 @@ struct hci_dev { struct work_struct discov_update; struct work_struct bg_scan_update; struct work_struct scan_update; + struct work_struct connectable_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; @@ -1491,6 +1492,7 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, u16 max_interval, u16 latency, u16 timeout); void mgmt_smp_complete(struct hci_conn *conn, bool complete); bool mgmt_get_connectable(struct hci_dev *hdev); +void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status); u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev); void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e6622bd1926d..167c90644b4b 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1274,6 +1274,43 @@ static void scan_update_work(struct work_struct *work) hci_req_sync(hdev, update_scan, 0, HCI_CMD_TIMEOUT, NULL); } +static int connectable_update(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + hci_dev_lock(hdev); + + __hci_req_update_scan(req); + + /* If BR/EDR is not enabled and we disable advertising as a + * by-product of disabling connectable, we need to update the + * advertising flags. + */ + if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + + /* Update the advertising parameters if necessary */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + __hci_req_enable_advertising(req); + + __hci_update_background_scan(req); + + hci_dev_unlock(hdev); + + return 0; +} + +static void connectable_update_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + connectable_update); + u8 status; + + hci_req_sync(hdev, connectable_update, 0, HCI_CMD_TIMEOUT, &status); + mgmt_set_connectable_complete(hdev, status); +} + void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { @@ -1789,6 +1826,7 @@ void hci_request_setup(struct hci_dev *hdev) INIT_WORK(&hdev->discov_update, discov_update); INIT_WORK(&hdev->bg_scan_update, bg_scan_update); INIT_WORK(&hdev->scan_update, scan_update_work); + INIT_WORK(&hdev->connectable_update, connectable_update_work); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); @@ -1801,6 +1839,7 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->discov_update); cancel_work_sync(&hdev->bg_scan_update); cancel_work_sync(&hdev->scan_update); + cancel_work_sync(&hdev->connectable_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6d0f0025052f..d8b76ca5c820 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1580,12 +1580,9 @@ static void write_fast_connectable(struct hci_request *req, bool enable) hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); } -static void set_connectable_complete(struct hci_dev *hdev, u8 status, - u16 opcode) +void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; - struct mgmt_mode *cp; - bool conn_changed, discov_changed; BT_DBG("status 0x%02x", status); @@ -1601,27 +1598,8 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status, goto remove_cmd; } - cp = cmd->param; - if (cp->val) { - conn_changed = !hci_dev_test_and_set_flag(hdev, - HCI_CONNECTABLE); - discov_changed = false; - } else { - conn_changed = hci_dev_test_and_clear_flag(hdev, - HCI_CONNECTABLE); - discov_changed = hci_dev_test_and_clear_flag(hdev, - HCI_DISCOVERABLE); - } - send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev); - - if (conn_changed || discov_changed) { - new_settings(hdev, cmd->sk); - hci_req_update_scan(hdev); - if (discov_changed) - hci_req_update_adv_data(hdev, HCI_ADV_CURRENT); - hci_update_background_scan(hdev); - } + new_settings(hdev, cmd->sk); remove_cmd: mgmt_pending_remove(cmd); @@ -1664,8 +1642,6 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_mode *cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; - u8 scan; int err; BT_DBG("request for %s", hdev->name); @@ -1699,57 +1675,19 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - hci_req_init(&req, hdev); - - /* If BR/EDR is not enabled and we disable advertising as a - * by-product of disabling connectable, we need to update the - * advertising flags. - */ - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { - if (!cp->val) { - hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); - } - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - } else if (cp->val != test_bit(HCI_PSCAN, &hdev->flags)) { - if (cp->val) { - scan = SCAN_PAGE; - } else { - /* If we don't have any whitelist entries just - * disable all scanning. If there are entries - * and we had both page and inquiry scanning - * enabled then fall back to only page scanning. - * Otherwise no changes are needed. - */ - if (list_empty(&hdev->whitelist)) - scan = SCAN_DISABLED; - else if (test_bit(HCI_ISCAN, &hdev->flags)) - scan = SCAN_PAGE; - else - goto no_scan_update; - - if (test_bit(HCI_ISCAN, &hdev->flags) && - hdev->discov_timeout > 0) - cancel_delayed_work(&hdev->discov_off); - } + if (cp->val) { + hci_dev_set_flag(hdev, HCI_CONNECTABLE); + } else { + if (hdev->discov_timeout > 0) + cancel_delayed_work(&hdev->discov_off); - hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_CONNECTABLE); } -no_scan_update: - /* Update the advertising parameters if necessary */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) - __hci_req_enable_advertising(&req); - - err = hci_req_run(&req, set_connectable_complete); - if (err < 0) { - mgmt_pending_remove(cmd); - if (err == -ENODATA) - err = set_connectable_update_settings(hdev, sk, - cp->val); - goto failed; - } + queue_work(hdev->req_workqueue, &hdev->connectable_update); + err = 0; failed: hci_dev_unlock(hdev); -- cgit v1.2.3-71-gd317 From aed1a8851db022c3bd22af41a343068b8c6e40c1 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sun, 22 Nov 2015 17:24:44 +0300 Subject: Bluetooth: Move discoverable changes to hdev->req_workqueue The discoverable mode is intrinsically linked with the connectable mode e.g. through sharing the same HCI command (Write Scan Enable) for BR/EDR. It makes therefore sense to move it to hci_request.c and run the changes through the same hdev->req_workqueue. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 2 + net/bluetooth/hci_request.c | 64 ++++++++++++++++++++++++++++ net/bluetooth/mgmt.c | 90 ++++++---------------------------------- 3 files changed, 79 insertions(+), 77 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a855e41df68c..0a6966ed7ee1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -331,6 +331,7 @@ struct hci_dev { struct work_struct bg_scan_update; struct work_struct scan_update; struct work_struct connectable_update; + struct work_struct discoverable_update; struct delayed_work le_scan_disable; struct delayed_work le_scan_restart; @@ -1493,6 +1494,7 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr, void mgmt_smp_complete(struct hci_conn *conn, bool complete); bool mgmt_get_connectable(struct hci_dev *hdev); void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status); +void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status); u8 mgmt_get_adv_discov_flags(struct hci_dev *hdev); void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e5e827b762b9..8f72218ed805 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1351,6 +1351,68 @@ void __hci_req_update_class(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); } +static void write_iac(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + struct hci_cp_write_current_iac_lap cp; + + if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) + return; + + if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) { + /* Limited discoverable mode */ + cp.num_iac = min_t(u8, hdev->num_iac, 2); + cp.iac_lap[0] = 0x00; /* LIAC */ + cp.iac_lap[1] = 0x8b; + cp.iac_lap[2] = 0x9e; + cp.iac_lap[3] = 0x33; /* GIAC */ + cp.iac_lap[4] = 0x8b; + cp.iac_lap[5] = 0x9e; + } else { + /* General discoverable mode */ + cp.num_iac = 1; + cp.iac_lap[0] = 0x33; /* GIAC */ + cp.iac_lap[1] = 0x8b; + cp.iac_lap[2] = 0x9e; + } + + hci_req_add(req, HCI_OP_WRITE_CURRENT_IAC_LAP, + (cp.num_iac * 3) + 1, &cp); +} + +static int discoverable_update(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + hci_dev_lock(hdev); + + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { + write_iac(req); + __hci_req_update_scan(req); + __hci_req_update_class(req); + } + + /* Advertising instances don't use the global discoverable setting, so + * only update AD if advertising was enabled using Set Advertising. + */ + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) + __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + + hci_dev_unlock(hdev); + + return 0; +} + +static void discoverable_update_work(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + discoverable_update); + u8 status; + + hci_req_sync(hdev, discoverable_update, 0, HCI_CMD_TIMEOUT, &status); + mgmt_set_discoverable_complete(hdev, status); +} + void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, u8 reason) { @@ -1867,6 +1929,7 @@ void hci_request_setup(struct hci_dev *hdev) INIT_WORK(&hdev->bg_scan_update, bg_scan_update); INIT_WORK(&hdev->scan_update, scan_update_work); INIT_WORK(&hdev->connectable_update, connectable_update_work); + INIT_WORK(&hdev->discoverable_update, discoverable_update_work); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); @@ -1880,6 +1943,7 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->bg_scan_update); cancel_work_sync(&hdev->scan_update); cancel_work_sync(&hdev->connectable_update); + cancel_work_sync(&hdev->discoverable_update); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f5a4ee92f2bf..8846cb3b0aaa 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1282,13 +1282,9 @@ static u8 mgmt_le_support(struct hci_dev *hdev) return MGMT_STATUS_SUCCESS; } -static void set_discoverable_complete(struct hci_dev *hdev, u8 status, - u16 opcode) +void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status) { struct mgmt_pending_cmd *cmd; - struct mgmt_mode *cp; - struct hci_request req; - bool changed; BT_DBG("status 0x%02x", status); @@ -1305,33 +1301,14 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status, goto remove_cmd; } - cp = cmd->param; - if (cp->val) { - changed = !hci_dev_test_and_set_flag(hdev, HCI_DISCOVERABLE); - - if (hdev->discov_timeout > 0) { - int to = msecs_to_jiffies(hdev->discov_timeout * 1000); - queue_delayed_work(hdev->workqueue, &hdev->discov_off, - to); - } - } else { - changed = hci_dev_test_and_clear_flag(hdev, HCI_DISCOVERABLE); + if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE) && + hdev->discov_timeout > 0) { + int to = msecs_to_jiffies(hdev->discov_timeout * 1000); + queue_delayed_work(hdev->req_workqueue, &hdev->discov_off, to); } send_settings_rsp(cmd->sk, MGMT_OP_SET_DISCOVERABLE, hdev); - - if (changed) - new_settings(hdev, cmd->sk); - - /* When the discoverable mode gets changed, make sure - * that class of device has the limited discoverable - * bit correctly set. Also update page scan based on whitelist - * entries. - */ - hci_req_init(&req, hdev); - __hci_req_update_scan(&req); - __hci_req_update_class(&req); - hci_req_run(&req, NULL); + new_settings(hdev, cmd->sk); remove_cmd: mgmt_pending_remove(cmd); @@ -1345,9 +1322,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_cp_set_discoverable *cp = data; struct mgmt_pending_cmd *cmd; - struct hci_request req; u16 timeout; - u8 scan; int err; BT_DBG("request for %s", hdev->name); @@ -1447,58 +1422,19 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, cancel_delayed_work(&hdev->discov_off); hdev->discov_timeout = timeout; + if (cp->val) + hci_dev_set_flag(hdev, HCI_DISCOVERABLE); + else + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + /* Limited discoverable mode */ if (cp->val == 0x02) hci_dev_set_flag(hdev, HCI_LIMITED_DISCOVERABLE); else hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - hci_req_init(&req, hdev); - - /* The procedure for LE-only controllers is much simpler - just - * update the advertising data. - */ - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - goto update_ad; - - scan = SCAN_PAGE; - - if (cp->val) { - struct hci_cp_write_current_iac_lap hci_cp; - - if (cp->val == 0x02) { - /* Limited discoverable mode */ - hci_cp.num_iac = min_t(u8, hdev->num_iac, 2); - hci_cp.iac_lap[0] = 0x00; /* LIAC */ - hci_cp.iac_lap[1] = 0x8b; - hci_cp.iac_lap[2] = 0x9e; - hci_cp.iac_lap[3] = 0x33; /* GIAC */ - hci_cp.iac_lap[4] = 0x8b; - hci_cp.iac_lap[5] = 0x9e; - } else { - /* General discoverable mode */ - hci_cp.num_iac = 1; - hci_cp.iac_lap[0] = 0x33; /* GIAC */ - hci_cp.iac_lap[1] = 0x8b; - hci_cp.iac_lap[2] = 0x9e; - } - - hci_req_add(&req, HCI_OP_WRITE_CURRENT_IAC_LAP, - (hci_cp.num_iac * 3) + 1, &hci_cp); - - scan |= SCAN_INQUIRY; - } else { - hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - } - - hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, sizeof(scan), &scan); - -update_ad: - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - - err = hci_req_run(&req, set_discoverable_complete); - if (err < 0) - mgmt_pending_remove(cmd); + queue_work(hdev->req_workqueue, &hdev->discoverable_update); + err = 0; failed: hci_dev_unlock(hdev); -- cgit v1.2.3-71-gd317 From c366f555b8df67633b849a5088bb897d6c63aaa5 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 23 Nov 2015 15:43:06 +0200 Subject: Bluetooth: Move discoverable timeout behind hdev->req_workqueue Since the other discoverable changes are behind req_workqueue now it only makes sense to move the discoverable timeout there as well. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_core.c | 13 ------------- net/bluetooth/hci_request.c | 26 +++++++++++++++++++++++++ net/bluetooth/mgmt.c | 41 ++-------------------------------------- 4 files changed, 28 insertions(+), 53 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 0a6966ed7ee1..319bf020cea6 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1436,7 +1436,6 @@ void mgmt_index_added(struct hci_dev *hdev); void mgmt_index_removed(struct hci_dev *hdev); void mgmt_set_powered_failed(struct hci_dev *hdev, int err); int mgmt_powered(struct hci_dev *hdev, u8 powered); -void mgmt_discoverable_timeout(struct hci_dev *hdev); void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent); void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bab8958bf46e..484c75f3332c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1537,7 +1537,6 @@ int hci_dev_do_close(struct hci_dev *hdev) flush_work(&hdev->rx_work); if (hdev->discov_timeout > 0) { - cancel_delayed_work(&hdev->discov_off); hdev->discov_timeout = 0; hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); @@ -2096,17 +2095,6 @@ static void hci_error_reset(struct work_struct *work) hci_dev_do_open(hdev); } -static void hci_discov_off(struct work_struct *work) -{ - struct hci_dev *hdev; - - hdev = container_of(work, struct hci_dev, discov_off.work); - - BT_DBG("%s", hdev->name); - - mgmt_discoverable_timeout(hdev); -} - void hci_uuids_clear(struct hci_dev *hdev) { struct bt_uuid *uuid, *tmp; @@ -2986,7 +2974,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_WORK(&hdev->error_reset, hci_error_reset); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); - INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 8f72218ed805..fe14fd121d36 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1923,6 +1923,30 @@ static void discov_update(struct work_struct *work) } } +static void discov_off(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + discov_off.work); + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + /* When discoverable timeout triggers, then just make sure + * the limited discoverable flag is cleared. Even in the case + * of a timeout triggered from general discoverable, it is + * safe to unconditionally clear the flag. + */ + hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); + hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); + hdev->discov_timeout = 0; + + hci_dev_unlock(hdev); + + hci_req_sync(hdev, discoverable_update, 0, HCI_CMD_TIMEOUT, NULL); + mgmt_new_settings(hdev); +} + void hci_request_setup(struct hci_dev *hdev) { INIT_WORK(&hdev->discov_update, discov_update); @@ -1930,6 +1954,7 @@ void hci_request_setup(struct hci_dev *hdev) INIT_WORK(&hdev->scan_update, scan_update_work); INIT_WORK(&hdev->connectable_update, connectable_update_work); INIT_WORK(&hdev->discoverable_update, discoverable_update_work); + INIT_DELAYED_WORK(&hdev->discov_off, discov_off); INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); @@ -1944,6 +1969,7 @@ void hci_request_cancel_all(struct hci_dev *hdev) cancel_work_sync(&hdev->scan_update); cancel_work_sync(&hdev->connectable_update); cancel_work_sync(&hdev->discoverable_update); + cancel_delayed_work_sync(&hdev->discov_off); cancel_delayed_work_sync(&hdev->le_scan_disable); cancel_delayed_work_sync(&hdev->le_scan_restart); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8846cb3b0aaa..29b3bb70ae9f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1401,8 +1401,8 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, if (cp->val && hdev->discov_timeout > 0) { int to = msecs_to_jiffies(hdev->discov_timeout * 1000); - queue_delayed_work(hdev->workqueue, &hdev->discov_off, - to); + queue_delayed_work(hdev->req_workqueue, + &hdev->discov_off, to); } err = send_settings_rsp(sk, MGMT_OP_SET_DISCOVERABLE, hdev); @@ -6848,43 +6848,6 @@ void mgmt_set_powered_failed(struct hci_dev *hdev, int err) mgmt_pending_remove(cmd); } -void mgmt_discoverable_timeout(struct hci_dev *hdev) -{ - struct hci_request req; - - hci_dev_lock(hdev); - - /* When discoverable timeout triggers, then just make sure - * the limited discoverable flag is cleared. Even in the case - * of a timeout triggered from general discoverable, it is - * safe to unconditionally clear the flag. - */ - hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); - hci_dev_clear_flag(hdev, HCI_DISCOVERABLE); - - hci_req_init(&req, hdev); - if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { - u8 scan = SCAN_PAGE; - hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, - sizeof(scan), &scan); - } - __hci_req_update_class(&req); - - /* Advertising instances don't use the global discoverable setting, so - * only update AD if advertising was enabled using Set Advertising. - */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - - hci_req_run(&req, NULL); - - hdev->discov_timeout = 0; - - new_settings(hdev, NULL); - - hci_dev_unlock(hdev); -} - void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent) { -- cgit v1.2.3-71-gd317 From 2ff13894cfb877cb3d02d96a8402202f0a6f3efd Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 25 Nov 2015 16:15:44 +0200 Subject: Bluetooth: Perform HCI update for power on synchronously The request to update HCI during power on is always coming either from hdev->req_workqueue or through an ioctl, so it's safe to use hci_req_sync for it. This way we also eliminate potential races with incoming mgmt commands or other actions while powering on. Part of this refactoring is the splitting of mgmt_powered() into mgmt_power_on() and __mgmt_power_off() functions. The main reason is the different requirements as far as hdev locking is concerned, as highlighted with the __ prefix of the power off API. Since the power on in the case of clearing the AUTO_OFF flag cannot be done synchronously in the set_powered mgmt handler, the hci_power_on work callback is extended to cover this (which also simplifies the set_powered helper a lot). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 3 +- net/bluetooth/hci_core.c | 21 ++++-- net/bluetooth/hci_request.c | 100 ++++++++++++++++++++++++++++ net/bluetooth/hci_request.h | 2 + net/bluetooth/mgmt.c | 136 +++------------------------------------ 5 files changed, 128 insertions(+), 134 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 319bf020cea6..c95e0326c41a 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1435,7 +1435,8 @@ int mgmt_new_settings(struct hci_dev *hdev); void mgmt_index_added(struct hci_dev *hdev); void mgmt_index_removed(struct hci_dev *hdev); void mgmt_set_powered_failed(struct hci_dev *hdev, int err); -int mgmt_powered(struct hci_dev *hdev, u8 powered); +void mgmt_power_on(struct hci_dev *hdev, int err); +void __mgmt_power_off(struct hci_dev *hdev); void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, bool persistent); void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 484c75f3332c..eac3f6fa1272 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1399,10 +1399,10 @@ static int hci_dev_do_open(struct hci_dev *hdev) !hci_dev_test_flag(hdev, HCI_CONFIG) && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + hci_dev_test_flag(hdev, HCI_MGMT) && hdev->dev_type == HCI_BREDR) { - hci_dev_lock(hdev); - mgmt_powered(hdev, 1); - hci_dev_unlock(hdev); + ret = __hci_req_hci_power_on(hdev); + mgmt_power_on(hdev, ret); } } else { /* Init failed, cleanup */ @@ -1559,8 +1559,9 @@ int hci_dev_do_close(struct hci_dev *hdev) auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF); - if (!auto_off && hdev->dev_type == HCI_BREDR) - mgmt_powered(hdev, 0); + if (!auto_off && hdev->dev_type == HCI_BREDR && + hci_dev_test_flag(hdev, HCI_MGMT)) + __mgmt_power_off(hdev); hci_inquiry_cache_flush(hdev); hci_pend_le_actions_clear(hdev); @@ -2013,6 +2014,16 @@ static void hci_power_on(struct work_struct *work) BT_DBG("%s", hdev->name); + if (test_bit(HCI_UP, &hdev->flags) && + hci_dev_test_flag(hdev, HCI_MGMT) && + hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { + hci_req_sync_lock(hdev); + err = __hci_req_hci_power_on(hdev); + hci_req_sync_unlock(hdev); + mgmt_power_on(hdev, err); + return; + } + err = hci_dev_do_open(hdev); if (err < 0) { hci_dev_lock(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 0abd83ddd4fb..7cc24f1448bd 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -2181,6 +2181,106 @@ static void discov_off(struct work_struct *work) mgmt_new_settings(hdev); } +static int powered_update_hci(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + struct adv_info *adv_instance; + u8 link_sec; + + hci_dev_lock(hdev); + + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && + !lmp_host_ssp_capable(hdev)) { + u8 mode = 0x01; + + hci_req_add(req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); + + if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) { + u8 support = 0x01; + + hci_req_add(req, HCI_OP_WRITE_SC_SUPPORT, + sizeof(support), &support); + } + } + + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && + lmp_bredr_capable(hdev)) { + struct hci_cp_write_le_host_supported cp; + + cp.le = 0x01; + cp.simul = 0x00; + + /* Check first if we already have the right + * host state (host features set) + */ + if (cp.le != lmp_host_le_capable(hdev) || + cp.simul != lmp_host_le_br_capable(hdev)) + hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, + sizeof(cp), &cp); + } + + if (lmp_le_capable(hdev)) { + /* Make sure the controller has a good default for + * advertising data. This also applies to the case + * where BR/EDR was toggled during the AUTO_OFF phase. + */ + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && + (hci_dev_test_flag(hdev, HCI_ADVERTISING) || + !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) { + __hci_req_update_adv_data(req, HCI_ADV_CURRENT); + __hci_req_update_scan_rsp_data(req, HCI_ADV_CURRENT); + } + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + hdev->cur_adv_instance == 0x00 && + !list_empty(&hdev->adv_instances)) { + adv_instance = list_first_entry(&hdev->adv_instances, + struct adv_info, list); + hdev->cur_adv_instance = adv_instance->instance; + } + + if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) + __hci_req_enable_advertising(req); + else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + hdev->cur_adv_instance) + __hci_req_schedule_adv_instance(req, + hdev->cur_adv_instance, + true); + } + + link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); + if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) + hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, + sizeof(link_sec), &link_sec); + + if (lmp_bredr_capable(hdev)) { + if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) + __hci_req_write_fast_connectable(req, true); + else + __hci_req_write_fast_connectable(req, false); + __hci_req_update_scan(req); + __hci_req_update_class(req); + __hci_req_update_name(req); + __hci_req_update_eir(req); + } + + hci_dev_unlock(hdev); + return 0; +} + +int __hci_req_hci_power_on(struct hci_dev *hdev) +{ + /* Register the available SMP channels (BR/EDR and LE) only when + * successfully powering on the controller. This late + * registration is required so that LE SMP can clearly decide if + * the public address or static address is used. + */ + smp_register(hdev); + + return __hci_req_sync(hdev, powered_update_hci, 0, HCI_CMD_TIMEOUT, + NULL); +} + void hci_request_setup(struct hci_dev *hdev) { INIT_WORK(&hdev->discov_update, discov_update); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index d3dd24deca74..a24d3b55094c 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -55,6 +55,8 @@ void hci_req_sync_cancel(struct hci_dev *hdev, int err); struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); +int __hci_req_hci_power_on(struct hci_dev *hdev); + void __hci_req_write_fast_connectable(struct hci_request *req, bool enable); void __hci_req_update_name(struct hci_request *req); void __hci_req_update_eir(struct hci_request *req); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0a7e6f4de383..468402ad933c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -961,17 +961,6 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { - cancel_delayed_work(&hdev->power_off); - - if (cp->val) { - mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, - data, len); - err = mgmt_powered(hdev, 1); - goto failed; - } - } - if (!!cp->val == hdev_is_powered(hdev)) { err = send_settings_rsp(sk, MGMT_OP_SET_POWERED, hdev); goto failed; @@ -6434,139 +6423,33 @@ static void restart_le_actions(struct hci_dev *hdev) } } -static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode) +void mgmt_power_on(struct hci_dev *hdev, int err) { struct cmd_lookup match = { NULL, hdev }; - BT_DBG("status 0x%02x", status); + BT_DBG("err %d", err); - if (!status) { + hci_dev_lock(hdev); + + if (!err) { restart_le_actions(hdev); hci_update_background_scan(hdev); } - hci_dev_lock(hdev); - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); new_settings(hdev, match.sk); - hci_dev_unlock(hdev); - if (match.sk) sock_put(match.sk); -} -static int powered_update_hci(struct hci_dev *hdev) -{ - struct hci_request req; - struct adv_info *adv_instance; - u8 link_sec; - - hci_req_init(&req, hdev); - - if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED) && - !lmp_host_ssp_capable(hdev)) { - u8 mode = 0x01; - - hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); - - if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) { - u8 support = 0x01; - - hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, - sizeof(support), &support); - } - } - - if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && - lmp_bredr_capable(hdev)) { - struct hci_cp_write_le_host_supported cp; - - cp.le = 0x01; - cp.simul = 0x00; - - /* Check first if we already have the right - * host state (host features set) - */ - if (cp.le != lmp_host_le_capable(hdev) || - cp.simul != lmp_host_le_br_capable(hdev)) - hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED, - sizeof(cp), &cp); - } - - if (lmp_le_capable(hdev)) { - /* Make sure the controller has a good default for - * advertising data. This also applies to the case - * where BR/EDR was toggled during the AUTO_OFF phase. - */ - if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && - (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) { - __hci_req_update_adv_data(&req, HCI_ADV_CURRENT); - __hci_req_update_scan_rsp_data(&req, HCI_ADV_CURRENT); - } - - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && - hdev->cur_adv_instance == 0x00 && - !list_empty(&hdev->adv_instances)) { - adv_instance = list_first_entry(&hdev->adv_instances, - struct adv_info, list); - hdev->cur_adv_instance = adv_instance->instance; - } - - if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) - __hci_req_enable_advertising(&req); - else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && - hdev->cur_adv_instance) - __hci_req_schedule_adv_instance(&req, - hdev->cur_adv_instance, - true); - } - - link_sec = hci_dev_test_flag(hdev, HCI_LINK_SECURITY); - if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) - hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE, - sizeof(link_sec), &link_sec); - - if (lmp_bredr_capable(hdev)) { - if (hci_dev_test_flag(hdev, HCI_FAST_CONNECTABLE)) - __hci_req_write_fast_connectable(&req, true); - else - __hci_req_write_fast_connectable(&req, false); - __hci_req_update_scan(&req); - __hci_req_update_class(&req); - __hci_req_update_name(&req); - __hci_req_update_eir(&req); - } - - return hci_req_run(&req, powered_complete); + hci_dev_unlock(hdev); } -int mgmt_powered(struct hci_dev *hdev, u8 powered) +void __mgmt_power_off(struct hci_dev *hdev) { struct cmd_lookup match = { NULL, hdev }; u8 status, zero_cod[] = { 0, 0, 0 }; - int err; - - if (!hci_dev_test_flag(hdev, HCI_MGMT)) - return 0; - - if (powered) { - /* Register the available SMP channels (BR/EDR and LE) only - * when successfully powering on the controller. This late - * registration is required so that LE SMP can clearly - * decide if the public address or static address is used. - */ - smp_register(hdev); - - if (powered_update_hci(hdev) == 0) - return 0; - - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, - &match); - goto new_settings; - } mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); @@ -6588,13 +6471,10 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered) mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, zero_cod, sizeof(zero_cod), NULL); -new_settings: - err = new_settings(hdev, match.sk); + new_settings(hdev, match.sk); if (match.sk) sock_put(match.sk); - - return err; } void mgmt_set_powered_failed(struct hci_dev *hdev, int err) -- cgit v1.2.3-71-gd317 From 17fd08ffb5981cff2c921eb479f46b872b02b2b9 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 26 Nov 2015 12:15:59 +0200 Subject: Bluetooth: Remove unnecessary HCI_ADVERTISING_INSTANCE flag This flag just tells us whether hdev->adv_instances is empty or not. We can equally well use the list_empty() function to get this information. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 - net/bluetooth/hci_request.c | 19 ++++++++----------- net/bluetooth/mgmt.c | 8 +------- 3 files changed, 9 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index cc2216727655..339ea57be423 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -239,7 +239,6 @@ enum { HCI_LE_ENABLED, HCI_ADVERTISING, HCI_ADVERTISING_CONNECTABLE, - HCI_ADVERTISING_INSTANCE, HCI_CONNECTABLE, HCI_DISCOVERABLE, HCI_LIMITED_DISCOVERABLE, diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 7cc24f1448bd..adfcd6f1d0de 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -822,7 +822,7 @@ static u8 get_current_adv_instance(struct hci_dev *hdev) * setting was set. When neither apply, default to the global settings, * represented by instance "0". */ - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + if (!list_empty(&hdev->adv_instances) && !hci_dev_test_flag(hdev, HCI_ADVERTISING)) return hdev->cur_adv_instance; @@ -1144,7 +1144,7 @@ void hci_req_reenable_advertising(struct hci_dev *hdev) u8 instance; if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + list_empty(&hdev->adv_instances)) return; instance = get_current_adv_instance(hdev); @@ -1202,7 +1202,7 @@ int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, u16 timeout; if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + list_empty(&hdev->adv_instances)) return -EPERM; if (hdev->adv_instance_timeout) @@ -1319,10 +1319,8 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, } } - if (list_empty(&hdev->adv_instances)) { + if (list_empty(&hdev->adv_instances)) hdev->cur_adv_instance = 0x00; - hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); - } if (!req || !hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING)) @@ -1525,7 +1523,7 @@ static int connectable_update(struct hci_request *req, unsigned long opt) /* Update the advertising parameters if necessary */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) + !list_empty(&hdev->adv_instances)) __hci_req_enable_advertising(req); __hci_update_background_scan(req); @@ -2226,13 +2224,12 @@ static int powered_update_hci(struct hci_request *req, unsigned long opt) */ if (hci_dev_test_flag(hdev, HCI_LE_ENABLED) && (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE))) { + list_empty(&hdev->adv_instances))) { __hci_req_update_adv_data(req, HCI_ADV_CURRENT); __hci_req_update_scan_rsp_data(req, HCI_ADV_CURRENT); } - if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && - hdev->cur_adv_instance == 0x00 && + if (hdev->cur_adv_instance == 0x00 && !list_empty(&hdev->adv_instances)) { adv_instance = list_first_entry(&hdev->adv_instances, struct adv_info, list); @@ -2241,7 +2238,7 @@ static int powered_update_hci(struct hci_request *req, unsigned long opt) if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) __hci_req_enable_advertising(req); - else if (hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) && + else if (!list_empty(&hdev->adv_instances) && hdev->cur_adv_instance) __hci_req_schedule_adv_instance(req, hdev->cur_adv_instance, diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9ce2bb2fc977..03a65e89a7d7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3734,7 +3734,6 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, * set up earlier, then re-enable multi-instance advertising. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - !hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE) || list_empty(&hdev->adv_instances)) goto unlock; @@ -5892,9 +5891,6 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status, cmd = pending_find(MGMT_OP_ADD_ADVERTISING, hdev); - if (status) - hci_dev_clear_flag(hdev, HCI_ADVERTISING_INSTANCE); - list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) { if (!adv_instance->pending) continue; @@ -6012,8 +6008,6 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, if (hdev->adv_instance_cnt > prev_instance_cnt) mgmt_advertising_added(sk, hdev, cp->instance); - hci_dev_set_flag(hdev, HCI_ADVERTISING_INSTANCE); - if (hdev->cur_adv_instance == cp->instance) { /* If the currently advertised instance is being changed then * cancel the current advertising and schedule the next @@ -6129,7 +6123,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!hci_dev_test_flag(hdev, HCI_ADVERTISING_INSTANCE)) { + if (list_empty(&hdev->adv_instances)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); goto unlock; -- cgit v1.2.3-71-gd317 From 00f59314111a6b18ee65b238b38c470dbdbf3be5 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 9 Dec 2015 22:46:29 +0100 Subject: 6lowpan: add lowpan dev register helpers This patch introduces register and unregister functionality for lowpan interfaces. While register a lowpan interface there are several things which need to be initialize by the 6lowpan subsystem. Upcoming functionality need to register/unregister per interface components e.g. debugfs entry. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/6lowpan.h | 7 ++++++- net/6lowpan/core.c | 33 +++++++++++++++++++++++++++++++-- net/bluetooth/6lowpan.c | 8 +++----- net/ieee802154/6lowpan/core.c | 6 ++---- 4 files changed, 42 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index cf3bc564ac03..730211fd8ed7 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -185,7 +185,12 @@ static inline void lowpan_push_hc_data(u8 **hc_ptr, const void *data, *hc_ptr += len; } -void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype); +int lowpan_register_netdevice(struct net_device *dev, + enum lowpan_lltypes lltype); +int lowpan_register_netdev(struct net_device *dev, + enum lowpan_lltypes lltype); +void lowpan_unregister_netdevice(struct net_device *dev); +void lowpan_unregister_netdev(struct net_device *dev); /** * lowpan_header_decompress - replace 6LoWPAN header with IPv6 header diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index 83b19e072224..80fc50987cf3 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -15,7 +15,8 @@ #include -void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype) +int lowpan_register_netdevice(struct net_device *dev, + enum lowpan_lltypes lltype) { dev->addr_len = EUI64_ADDR_LEN; dev->type = ARPHRD_6LOWPAN; @@ -23,8 +24,36 @@ void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype) dev->priv_flags |= IFF_NO_QUEUE; lowpan_priv(dev)->lltype = lltype; + + return register_netdevice(dev); +} +EXPORT_SYMBOL(lowpan_register_netdevice); + +int lowpan_register_netdev(struct net_device *dev, + enum lowpan_lltypes lltype) +{ + int ret; + + rtnl_lock(); + ret = lowpan_register_netdevice(dev, lltype); + rtnl_unlock(); + return ret; +} +EXPORT_SYMBOL(lowpan_register_netdev); + +void lowpan_unregister_netdevice(struct net_device *dev) +{ + unregister_netdevice(dev); +} +EXPORT_SYMBOL(lowpan_unregister_netdevice); + +void lowpan_unregister_netdev(struct net_device *dev) +{ + rtnl_lock(); + lowpan_unregister_netdevice(dev); + rtnl_unlock(); } -EXPORT_SYMBOL(lowpan_netdev_setup); +EXPORT_SYMBOL(lowpan_unregister_netdev); static int __init lowpan_module_init(void) { diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 9e9cca3689a0..d040365ba98e 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -825,9 +825,7 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev) list_add_rcu(&(*dev)->list, &bt_6lowpan_devices); spin_unlock(&devices_lock); - lowpan_netdev_setup(netdev, LOWPAN_LLTYPE_BTLE); - - err = register_netdev(netdev); + err = lowpan_register_netdev(netdev, LOWPAN_LLTYPE_BTLE); if (err < 0) { BT_INFO("register_netdev failed %d", err); spin_lock(&devices_lock); @@ -890,7 +888,7 @@ static void delete_netdev(struct work_struct *work) struct lowpan_dev *entry = container_of(work, struct lowpan_dev, delete_netdev); - unregister_netdev(entry->netdev); + lowpan_unregister_netdev(entry->netdev); /* The entry pointer is deleted by the netdev destructor. */ } @@ -1348,7 +1346,7 @@ static void disconnect_devices(void) ifdown(entry->netdev); BT_DBG("Unregistering netdev %s %p", entry->netdev->name, entry->netdev); - unregister_netdev(entry->netdev); + lowpan_unregister_netdev(entry->netdev); kfree(entry); } } diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 20c49c724ba0..737c87a2a41e 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -161,9 +161,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *ldev, wdev->needed_headroom; ldev->needed_tailroom = wdev->needed_tailroom; - lowpan_netdev_setup(ldev, LOWPAN_LLTYPE_IEEE802154); - - ret = register_netdevice(ldev); + ret = lowpan_register_netdevice(ldev, LOWPAN_LLTYPE_IEEE802154); if (ret < 0) { dev_put(wdev); return ret; @@ -180,7 +178,7 @@ static void lowpan_dellink(struct net_device *ldev, struct list_head *head) ASSERT_RTNL(); wdev->ieee802154_ptr->lowpan_dev = NULL; - unregister_netdevice(ldev); + lowpan_unregister_netdevice(ldev); dev_put(wdev); } -- cgit v1.2.3-71-gd317 From b1815fd949e5bd06d118019acf68f87c9414f705 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 9 Dec 2015 22:46:30 +0100 Subject: 6lowpan: add debugfs support This patch will introduce a 6lowpan entry into the debugfs if enabled. Inside this 6lowpan directory we create a subdirectories of all 6lowpan interfaces to offer a per interface debugfs support. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/6lowpan.h | 3 +++ net/6lowpan/6lowpan_i.h | 28 ++++++++++++++++++++++++++ net/6lowpan/Kconfig | 8 ++++++++ net/6lowpan/Makefile | 1 + net/6lowpan/core.c | 28 +++++++++++++++++++++++++- net/6lowpan/debugfs.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 net/6lowpan/6lowpan_i.h create mode 100644 net/6lowpan/debugfs.c (limited to 'include/net') diff --git a/include/net/6lowpan.h b/include/net/6lowpan.h index 730211fd8ed7..2f6a3f2233ed 100644 --- a/include/net/6lowpan.h +++ b/include/net/6lowpan.h @@ -53,6 +53,8 @@ #ifndef __6LOWPAN_H__ #define __6LOWPAN_H__ +#include + #include #include @@ -98,6 +100,7 @@ enum lowpan_lltypes { struct lowpan_priv { enum lowpan_lltypes lltype; + struct dentry *iface_debugfs; /* must be last */ u8 priv[0] __aligned(sizeof(void *)); diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h new file mode 100644 index 000000000000..d16bb4b14aa1 --- /dev/null +++ b/net/6lowpan/6lowpan_i.h @@ -0,0 +1,28 @@ +#ifndef __6LOWPAN_I_H +#define __6LOWPAN_I_H + +#include + +#ifdef CONFIG_6LOWPAN_DEBUGFS +int lowpan_dev_debugfs_init(struct net_device *dev); +void lowpan_dev_debugfs_exit(struct net_device *dev); + +int __init lowpan_debugfs_init(void); +void lowpan_debugfs_exit(void); +#else +static inline int lowpan_dev_debugfs_init(struct net_device *dev) +{ + return 0; +} + +static inline void lowpan_dev_debugfs_exit(struct net_device *dev) { } + +static inline int __init lowpan_debugfs_init(void) +{ + return 0; +} + +static inline void lowpan_debugfs_exit(void) { } +#endif /* CONFIG_6LOWPAN_DEBUGFS */ + +#endif /* __6LOWPAN_I_H */ diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig index bcb9d8a21fc0..9c051512d14f 100644 --- a/net/6lowpan/Kconfig +++ b/net/6lowpan/Kconfig @@ -5,6 +5,14 @@ menuconfig 6LOWPAN This enables IPv6 over Low power Wireless Personal Area Network - "6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks. +config 6LOWPAN_DEBUGFS + bool "6LoWPAN debugfs support" + depends on 6LOWPAN + depends on DEBUG_FS + ---help--- + This enables 6LoWPAN debugfs support. For example to manipulate + IPHC context information at runtime. + menuconfig 6LOWPAN_NHC tristate "Next Header and Generic Header Compression Support" depends on 6LOWPAN diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index 9e35a5d7b92d..e44f3bf2dd42 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_6LOWPAN) += 6lowpan.o 6lowpan-y := core.o iphc.o nhc.o +6lowpan-$(CONFIG_6LOWPAN_DEBUGFS) += debugfs.o #rfc6282 nhcs obj-$(CONFIG_6LOWPAN_NHC_DEST) += nhc_dest.o diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c index 80fc50987cf3..c7f06f5c0121 100644 --- a/net/6lowpan/core.c +++ b/net/6lowpan/core.c @@ -15,9 +15,13 @@ #include +#include "6lowpan_i.h" + int lowpan_register_netdevice(struct net_device *dev, enum lowpan_lltypes lltype) { + int ret; + dev->addr_len = EUI64_ADDR_LEN; dev->type = ARPHRD_6LOWPAN; dev->mtu = IPV6_MIN_MTU; @@ -25,7 +29,15 @@ int lowpan_register_netdevice(struct net_device *dev, lowpan_priv(dev)->lltype = lltype; - return register_netdevice(dev); + ret = lowpan_dev_debugfs_init(dev); + if (ret < 0) + return ret; + + ret = register_netdevice(dev); + if (ret < 0) + lowpan_dev_debugfs_exit(dev); + + return ret; } EXPORT_SYMBOL(lowpan_register_netdevice); @@ -44,6 +56,7 @@ EXPORT_SYMBOL(lowpan_register_netdev); void lowpan_unregister_netdevice(struct net_device *dev) { unregister_netdevice(dev); + lowpan_dev_debugfs_exit(dev); } EXPORT_SYMBOL(lowpan_unregister_netdevice); @@ -57,6 +70,12 @@ EXPORT_SYMBOL(lowpan_unregister_netdev); static int __init lowpan_module_init(void) { + int ret; + + ret = lowpan_debugfs_init(); + if (ret < 0) + return ret; + request_module_nowait("ipv6"); request_module_nowait("nhc_dest"); @@ -69,6 +88,13 @@ static int __init lowpan_module_init(void) return 0; } + +static void __exit lowpan_module_exit(void) +{ + lowpan_debugfs_exit(); +} + module_init(lowpan_module_init); +module_exit(lowpan_module_exit); MODULE_LICENSE("GPL"); diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c new file mode 100644 index 000000000000..88eef84df0fc --- /dev/null +++ b/net/6lowpan/debugfs.c @@ -0,0 +1,53 @@ +/* This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: + * (C) 2015 Pengutronix, Alexander Aring + * Copyright (c) 2015 Nordic Semiconductor. All Rights Reserved. + */ + +#include + +#include "6lowpan_i.h" + +static struct dentry *lowpan_debugfs; + +int lowpan_dev_debugfs_init(struct net_device *dev) +{ + struct lowpan_priv *lpriv = lowpan_priv(dev); + + /* creating the root */ + lpriv->iface_debugfs = debugfs_create_dir(dev->name, lowpan_debugfs); + if (!lpriv->iface_debugfs) + goto fail; + + return 0; + +fail: + return -EINVAL; +} + +void lowpan_dev_debugfs_exit(struct net_device *dev) +{ + debugfs_remove_recursive(lowpan_priv(dev)->iface_debugfs); +} + +int __init lowpan_debugfs_init(void) +{ + lowpan_debugfs = debugfs_create_dir("6lowpan", NULL); + if (!lowpan_debugfs) + return -EINVAL; + + return 0; +} + +void lowpan_debugfs_exit(void) +{ + debugfs_remove_recursive(lowpan_debugfs); +} -- cgit v1.2.3-71-gd317 From 818f1f3e70dd4c8e6f8d59c617857be0fa0fce7c Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 9 Dec 2015 22:46:31 +0100 Subject: ipv6: add ipv6_addr_prefix_copy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a static inline function ipv6_addr_prefix_copy which copies a ipv6 address prefix(argument pfx) into the ipv6 address prefix. The prefix len is given by plen as bits. This function mainly based on ipv6_addr_prefix which copies one address prefix from address into a new ipv6 address destination and zero all other address bits. The difference is that ipv6_addr_prefix_copy don't get a prefix from an ipv6 address, it sets a prefix to an ipv6 address with keeping other address bits. The use case is for context based address compression inside 6LoWPAN IPHC header which keeping ipv6 prefixes inside a context table to lookup address-bits without sending them. Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Acked-by: Łukasz Duda Acked-by: Hannes Frederic Sowa Acked-by: YOSHIFUJI Hideaki Acked-by: David S. Miller Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/ipv6.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 9a5c9f013784..6570f379aba2 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -401,6 +401,21 @@ static inline void ipv6_addr_prefix(struct in6_addr *pfx, pfx->s6_addr[o] = addr->s6_addr[o] & (0xff00 >> b); } +static inline void ipv6_addr_prefix_copy(struct in6_addr *addr, + const struct in6_addr *pfx, + int plen) +{ + /* caller must guarantee 0 <= plen <= 128 */ + int o = plen >> 3, + b = plen & 0x7; + + memcpy(addr->s6_addr, pfx, o); + if (b != 0) { + addr->s6_addr[o] &= ~(0xff00 >> b); + addr->s6_addr[o] |= (pfx->s6_addr[o] & (0xff00 >> b)); + } +} + static inline void __ipv6_addr_set_half(__be32 *addr, __be32 wh, __be32 wl) { -- cgit v1.2.3-71-gd317 From 369620a09bc5ab867342d51f1820c66b00d78a2c Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 10 Dec 2015 12:37:44 -0800 Subject: rco: Clean up casting errors Fixe a couple of cast errors found by sparse. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/checksum.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/checksum.h b/include/net/checksum.h index 9fcaedf994ee..10a16b5bd1c7 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -165,7 +165,8 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum, csum = csum_sub(csum, csum_partial(ptr, start, 0)); /* Set derived checksum in packet */ - delta = csum_sub(csum_fold(csum), *psum); + delta = csum_sub((__force __wsum)csum_fold(csum), + (__force __wsum)*psum); *psum = csum_fold(csum); return delta; -- cgit v1.2.3-71-gd317 From 19576c9478682a398276c994ea0d2696474df32b Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 9 Dec 2015 14:07:40 +0100 Subject: netfilter: cttimeout: add netns support Add a per-netns list of timeout objects and adjust code to use it. Signed-off-by: Pablo Neira Ayuso --- include/net/net_namespace.h | 3 + include/net/netfilter/nf_conntrack_timeout.h | 2 +- net/netfilter/nf_conntrack_timeout.c | 2 +- net/netfilter/nfnetlink_cttimeout.c | 82 +++++++++++++++++----------- net/netfilter/xt_CT.c | 2 +- 5 files changed, 57 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 2dcea635ecce..4089abc6e9c0 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -121,6 +121,9 @@ struct net { #if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT) struct list_head nfnl_acct_list; #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + struct list_head nfct_timeout_list; +#endif #endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index f72be38860a7..5cc5e9e6171a 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -104,7 +104,7 @@ static inline void nf_conntrack_timeout_fini(void) #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(const char *name); +extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name); extern void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout); #endif diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 93da609d9d29..26e742006c48 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -25,7 +25,7 @@ #include struct ctnl_timeout * -(*nf_ct_timeout_find_get_hook)(const char *name) __read_mostly; +(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook); void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index c7a2d0e1c462..3921d544f5ba 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -38,8 +38,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); -static LIST_HEAD(cttimeout_list); - static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING, .len = CTNL_TIMEOUT_NAME_MAX - 1}, @@ -90,7 +88,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); - list_for_each_entry(timeout, &cttimeout_list, head) { + list_for_each_entry(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -145,7 +143,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, timeout->l3num = l3num; timeout->l4proto = l4proto; atomic_set(&timeout->refcnt, 1); - list_add_tail_rcu(&timeout->head, &cttimeout_list); + list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list); return 0; err: @@ -209,6 +207,7 @@ nla_put_failure: static int ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct ctnl_timeout *cur, *last; if (cb->args[2]) @@ -219,7 +218,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[1] = 0; rcu_read_lock(); - list_for_each_entry_rcu(cur, &cttimeout_list, head) { + list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) { if (last) { if (cur != last) continue; @@ -245,6 +244,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(skb->sk); int ret = -ENOENT; char *name; struct ctnl_timeout *cur; @@ -260,7 +260,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, return -EINVAL; name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &cttimeout_list, head) { + list_for_each_entry(cur, &net->nfct_timeout_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) @@ -301,17 +301,17 @@ static void untimeout(struct nf_conntrack_tuple_hash *i, RCU_INIT_POINTER(timeout_ext->timeout, NULL); } -static void ctnl_untimeout(struct ctnl_timeout *timeout) +static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) { struct nf_conntrack_tuple_hash *h; const struct hlist_nulls_node *nn; int i; local_bh_disable(); - for (i = 0; i < init_net.ct.htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < init_net.ct.htable_size) { - hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode) + if (i < net->ct.htable_size) { + hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) untimeout(h, timeout); } spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); @@ -320,7 +320,7 @@ static void ctnl_untimeout(struct ctnl_timeout *timeout) } /* try to delete object, fail if it is still in use. */ -static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) +static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) { int ret = 0; @@ -329,7 +329,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); nf_ct_l4proto_put(timeout->l4proto); - ctnl_untimeout(timeout); + ctnl_untimeout(net, timeout); kfree_rcu(timeout, rcu_head); } else { /* still in use, restore reference counter. */ @@ -344,23 +344,24 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(skb->sk); char *name; struct ctnl_timeout *cur; int ret = -ENOENT; if (!cda[CTA_TIMEOUT_NAME]) { - list_for_each_entry(cur, &cttimeout_list, head) - ctnl_timeout_try_del(cur); + list_for_each_entry(cur, &net->nfct_timeout_list, head) + ctnl_timeout_try_del(net, cur); return 0; } name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &cttimeout_list, head) { + list_for_each_entry(cur, &net->nfct_timeout_list, head) { if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; - ret = ctnl_timeout_try_del(cur); + ret = ctnl_timeout_try_del(net, cur); if (ret < 0) return ret; @@ -511,12 +512,13 @@ err: } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -static struct ctnl_timeout *ctnl_timeout_find_get(const char *name) +static struct ctnl_timeout * +ctnl_timeout_find_get(struct net *net, const char *name) { struct ctnl_timeout *timeout, *matching = NULL; rcu_read_lock(); - list_for_each_entry_rcu(timeout, &cttimeout_list, head) { + list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -569,10 +571,39 @@ static const struct nfnetlink_subsystem cttimeout_subsys = { MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); +static int __net_init cttimeout_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->nfct_timeout_list); + + return 0; +} + +static void __net_exit cttimeout_net_exit(struct net *net) +{ + struct ctnl_timeout *cur, *tmp; + + ctnl_untimeout(net, NULL); + + list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { + list_del_rcu(&cur->head); + nf_ct_l4proto_put(cur->l4proto); + kfree_rcu(cur, rcu_head); + } +} + +static struct pernet_operations cttimeout_ops = { + .init = cttimeout_net_init, + .exit = cttimeout_net_exit, +}; + static int __init cttimeout_init(void) { int ret; + ret = register_pernet_subsys(&cttimeout_ops); + if (ret < 0) + return ret; + ret = nfnetlink_subsys_register(&cttimeout_subsys); if (ret < 0) { pr_err("cttimeout_init: cannot register cttimeout with " @@ -586,28 +617,17 @@ static int __init cttimeout_init(void) return 0; err_out: + unregister_pernet_subsys(&cttimeout_ops); return ret; } static void __exit cttimeout_exit(void) { - struct ctnl_timeout *cur, *tmp; - pr_info("cttimeout: unregistering from nfnetlink.\n"); nfnetlink_subsys_unregister(&cttimeout_subsys); - /* Make sure no conntrack objects refer to custom timeouts anymore. */ - ctnl_untimeout(NULL); - - list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) { - list_del_rcu(&cur->head); - /* We are sure that our objects have no clients at this point, - * it's safe to release them all without checking refcnt. - */ - nf_ct_l4proto_put(cur->l4proto); - kfree_rcu(cur, rcu_head); - } + unregister_pernet_subsys(&cttimeout_ops); #ifdef CONFIG_NF_CONNTRACK_TIMEOUT RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index e7ac07e53b59..6669e68d589e 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -143,7 +143,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, goto out; } - timeout = timeout_find_get(timeout_name); + timeout = timeout_find_get(par->net, timeout_name); if (timeout == NULL) { ret = -ENOENT; pr_info("No such timeout policy \"%s\"\n", timeout_name); -- cgit v1.2.3-71-gd317 From 6ff64f6f9242d7e50f3e99cb280f69d1927a5fa6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 15 Dec 2015 16:03:35 +0100 Subject: switchdev: Pass original device to port netdev driver switchdev drivers need to know the netdev on which the switchdev op was invoked. For example, the STP state of a VLAN interface configured on top of a port can change while being member in a bridge. In this case, the underlying driver should only change the STP state of that particular VLAN and not of all the VLANs configured on the port. However, current switchdev infrastructure only passes the port netdev down to the driver. Solve that by passing the original device down to the driver as part of the required switchdev object / attribute. This doesn't entail any change in current switchdev drivers. It simply enables those supporting stacked devices to know the originating device and act accordingly. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 2 ++ net/bridge/br_fdb.c | 1 + net/bridge/br_stp.c | 2 ++ net/bridge/br_stp_if.c | 1 + net/bridge/br_vlan.c | 2 ++ net/core/net-sysfs.c | 1 + net/core/rtnetlink.c | 1 + net/switchdev/switchdev.c | 12 ++++++++++++ 8 files changed, 22 insertions(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 1d22ce9f352e..6612946167fe 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -50,6 +50,7 @@ enum switchdev_attr_id { }; struct switchdev_attr { + struct net_device *orig_dev; enum switchdev_attr_id id; u32 flags; union { @@ -68,6 +69,7 @@ enum switchdev_obj_id { }; struct switchdev_obj { + struct net_device *orig_dev; enum switchdev_obj_id id; u32 flags; }; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a642bb829d09..82e3e9705017 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -135,6 +135,7 @@ static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) { struct switchdev_obj_port_fdb fdb = { .obj = { + .orig_dev = f->dst->dev, .id = SWITCHDEV_OBJ_ID_PORT_FDB, .flags = SWITCHDEV_F_DEFER, }, diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 5f3f64553179..b3cca126b103 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -40,6 +40,7 @@ void br_log_state(const struct net_bridge_port *p) void br_set_state(struct net_bridge_port *p, unsigned int state) { struct switchdev_attr attr = { + .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE, .flags = SWITCHDEV_F_DEFER, .u.stp_state = state, @@ -570,6 +571,7 @@ int br_set_max_age(struct net_bridge *br, unsigned long val) int br_set_ageing_time(struct net_bridge *br, u32 ageing_time) { struct switchdev_attr attr = { + .orig_dev = br->dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP, .u.ageing_time = ageing_time, diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 5396ff08af32..775e00fbeb1e 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -37,6 +37,7 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no) void br_init_port(struct net_bridge_port *p) { struct switchdev_attr attr = { + .orig_dev = p->dev, .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER, .u.ageing_time = p->br->ageing_time, diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 1394da63614a..66c4549efbbb 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -73,6 +73,7 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, u16 vid, u16 flags) { struct switchdev_obj_port_vlan v = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .flags = flags, .vid_begin = vid, @@ -120,6 +121,7 @@ static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br, u16 vid) { struct switchdev_obj_port_vlan v = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .vid_begin = vid, .vid_end = vid, diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f88a62ab019d..bca8c350e7f3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -471,6 +471,7 @@ static ssize_t phys_switch_id_show(struct device *dev, if (dev_isalive(netdev)) { struct switchdev_attr attr = { + .orig_dev = netdev, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 34ba7a08876d..d8b0113d3eec 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1027,6 +1027,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; struct switchdev_attr attr = { + .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index f34e535e93bd..df790d3385a2 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -723,6 +723,7 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, u32 filter_mask) { struct switchdev_vlan_dump dump = { + .vlan.obj.orig_dev = dev, .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .skb = skb, .filter_mask = filter_mask, @@ -757,6 +758,7 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, int nlflags) { struct switchdev_attr attr = { + .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, }; u16 mode = BRIDGE_MODE_UNDEF; @@ -778,6 +780,7 @@ static int switchdev_port_br_setflag(struct net_device *dev, unsigned long brport_flag) { struct switchdev_attr attr = { + .orig_dev = dev, .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, }; u8 flag = nla_get_u8(nlattr); @@ -853,6 +856,7 @@ static int switchdev_port_br_afspec(struct net_device *dev, struct nlattr *attr; struct bridge_vlan_info *vinfo; struct switchdev_obj_port_vlan vlan = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, }; int rem; @@ -975,6 +979,7 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], u16 vid, u16 nlm_flags) { struct switchdev_obj_port_fdb fdb = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, .vid = vid, }; @@ -1000,6 +1005,7 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], u16 vid) { struct switchdev_obj_port_fdb fdb = { + .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, .vid = vid, }; @@ -1077,6 +1083,7 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *filter_dev, int idx) { struct switchdev_fdb_dump dump = { + .fdb.obj.orig_dev = dev, .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, .dev = dev, .skb = skb, @@ -1135,6 +1142,7 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) if (!dev) return NULL; + attr.orig_dev = dev; if (switchdev_port_attr_get(dev, &attr)) return NULL; @@ -1194,6 +1202,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, if (!dev) return 0; + ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_add(dev, &ipv4_fib.obj); if (!err) fi->fib_flags |= RTNH_F_OFFLOAD; @@ -1238,6 +1247,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, if (!dev) return 0; + ipv4_fib.obj.orig_dev = dev; err = switchdev_port_obj_del(dev, &ipv4_fib.obj); if (!err) fi->fib_flags &= ~RTNH_F_OFFLOAD; @@ -1270,10 +1280,12 @@ static bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { struct switchdev_attr a_attr = { + .orig_dev = a, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; struct switchdev_attr b_attr = { + .orig_dev = b, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, .flags = SWITCHDEV_F_NO_RECURSE, }; -- cgit v1.2.3-71-gd317 From a188222b6ed29404ac2d4232d35d1fe0e77af370 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:43 -0800 Subject: net: Rename NETIF_F_ALL_CSUM to NETIF_F_CSUM_MASK The name NETIF_F_ALL_CSUM is a misnomer. This does not correspond to the set of features for offloading all checksums. This is a mask of the checksum offload related features bits. It is incorrect to set both NETIF_F_HW_CSUM and NETIF_F_IP_CSUM or NETIF_F_IPV6 at the same time for features of a device. This patch: - Changes instances of NETIF_F_ALL_CSUM to NETIF_F_CSUM_MASK (where NETIF_F_ALL_CSUM is being used as a mask). - Changes bonding, sfc/efx, ipvlan, macvlan, vlan, and team drivers to use NEITF_F_HW_CSUM in features list instead of NETIF_F_ALL_CSUM. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 7 +++---- drivers/net/ethernet/emulex/benet/be_main.c | 2 +- drivers/net/ethernet/ibm/ibmveth.c | 5 +++-- drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/jme.c | 2 +- drivers/net/ethernet/marvell/sky2.c | 2 +- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 4 ++-- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c | 2 +- drivers/net/ethernet/sfc/efx.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++-- drivers/net/ipvlan/ipvlan_main.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/macvtap.c | 2 +- drivers/net/team/team.c | 3 +-- drivers/net/usb/r8152.c | 2 +- drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c | 2 +- include/linux/netdev_features.h | 7 ++++++- include/linux/netdevice.h | 6 +++--- include/net/vxlan.h | 2 +- net/8021q/vlan_dev.c | 2 +- net/core/dev.c | 10 +++++----- net/core/ethtool.c | 2 +- net/ipv4/tcp.c | 4 ++-- 25 files changed, 43 insertions(+), 39 deletions(-) (limited to 'include/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index fe0e7a6f4d72..cab99fd44c8e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1067,12 +1067,12 @@ static netdev_features_t bond_fix_features(struct net_device *dev, return features; } -#define BOND_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \ +#define BOND_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ NETIF_F_HIGHDMA | NETIF_F_LRO) -#define BOND_ENC_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | NETIF_F_RXCSUM |\ - NETIF_F_ALL_TSO) +#define BOND_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_RXCSUM | NETIF_F_ALL_TSO) static void bond_compute_features(struct bonding *bond) { @@ -4182,7 +4182,6 @@ void bond_setup(struct net_device *bond_dev) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; - bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; bond_dev->features |= bond_dev->hw_features; } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 4cab8879f5ae..34e324f20d80 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5289,7 +5289,7 @@ static netdev_features_t be_features_check(struct sk_buff *skb, skb->inner_protocol != htons(ETH_P_TEB) || skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 7af870a3c549..6691b5a45b9d 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -763,7 +763,7 @@ static netdev_features_t ibmveth_fix_features(struct net_device *dev, */ if (!(features & NETIF_F_RXCSUM)) - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; return features; } @@ -928,7 +928,8 @@ static int ibmveth_set_features(struct net_device *dev, rc1 = ibmveth_set_csum_offload(dev, rx_csum); if (rc1 && !adapter->rx_csum) dev->features = - features & ~(NETIF_F_ALL_CSUM | NETIF_F_RXCSUM); + features & ~(NETIF_F_CSUM_MASK | + NETIF_F_RXCSUM); } if (large_send != adapter->large_send) { diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index d9854d39576d..83ddf362ea77 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1357,7 +1357,7 @@ static netdev_features_t fm10k_features_check(struct sk_buff *skb, if (!skb->encapsulation || fm10k_tx_encap_offload(skb)) return features; - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } static const struct net_device_ops fm10k_netdev_ops = { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index a63d980f478e..c284e4341c7c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8766,7 +8766,7 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb, if (skb->encapsulation && (skb_inner_mac_header(skb) - skb_transport_header(skb) > I40E_MAX_TUNNEL_HDR_LEN)) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 9f27001cac1f..fca35aa90d0f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8598,7 +8598,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, if (unlikely(skb_inner_mac_header(skb) - skb_transport_header(skb) > IXGBE_MAX_TUNNEL_HDR_LEN)) - return features & ~NETIF_F_ALL_CSUM; + return features & ~NETIF_F_CSUM_MASK; return features; } diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 060dd3922974..b1de7afd4116 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2753,7 +2753,7 @@ static netdev_features_t jme_fix_features(struct net_device *netdev, netdev_features_t features) { if (netdev->mtu > 1900) - features &= ~(NETIF_F_ALL_TSO | NETIF_F_ALL_CSUM); + features &= ~(NETIF_F_ALL_TSO | NETIF_F_CSUM_MASK); return features; } diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 5606a043063e..ec0a22119e09 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4380,7 +4380,7 @@ static netdev_features_t sky2_fix_features(struct net_device *dev, */ if (dev->mtu > ETH_DATA_LEN && hw->chip_id == CHIP_ID_YUKON_EC_U) { netdev_info(dev, "checksum offload not possible with jumbo frames\n"); - features &= ~(NETIF_F_TSO|NETIF_F_SG|NETIF_F_ALL_CSUM); + features &= ~(NETIF_F_TSO | NETIF_F_SG | NETIF_F_CSUM_MASK); } /* Some hardware requires receive checksum for RSS to work. */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 038ac6b14a60..7060539d276a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2071,7 +2071,7 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, l4_hdr = ipv6_hdr(skb)->nexthdr; break; default: - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || @@ -2080,7 +2080,7 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, (l4_hdr == IPPROTO_UDP && (skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c index 08d4be616064..e097e6baaac4 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_param.c @@ -500,7 +500,7 @@ void pch_gbe_check_options(struct pch_gbe_adapter *adapter) val = XsumTX; pch_gbe_validate_option(&val, &opt, adapter); if (!val) - dev->features &= ~NETIF_F_ALL_CSUM; + dev->features &= ~NETIF_F_CSUM_MASK; } { /* Flow Control */ static const struct pch_gbe_option opt = { diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index b405349a570c..1fe13c733c1e 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3131,7 +3131,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev, if (efx->type->offload_features & NETIF_F_V6_CSUM) net_dev->features |= NETIF_F_TSO6; /* Mask for features that also apply to VLAN devices */ - net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG | + net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | NETIF_F_RXCSUM); /* All offloads can be toggled */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3c6549aee11d..0b0fea73a7a7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2402,7 +2402,7 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev, features &= ~NETIF_F_RXCSUM; if (!priv->plat->tx_coe) - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; /* Some GMAC devices have a bugged Jumbo frame support that * needs to have the Tx COE disabled for oversized frames @@ -2410,7 +2410,7 @@ static netdev_features_t stmmac_fix_features(struct net_device *dev, * the TX csum insertionin the TDES and not use SF. */ if (priv->plat->bugged_jumbo && (dev->mtu > ETH_DATA_LEN)) - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; return features; } diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index a9268db4e349..f94392d07126 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -88,7 +88,7 @@ static struct lock_class_key ipvlan_netdev_xmit_lock_key; static struct lock_class_key ipvlan_netdev_addr_lock_key; #define IPVLAN_FEATURES \ - (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ + (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 06c8bfeaccd6..ae3b486fb663 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -762,7 +762,7 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; NETIF_F_GSO_ROBUST) #define MACVLAN_FEATURES \ - (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ + (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_LRO | \ NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0fc521941c71..d636d051fac8 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -388,7 +388,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) * check, we either support them all or none. */ if (skb->ip_summed == CHECKSUM_PARTIAL && - !(features & NETIF_F_ALL_CSUM) && + !(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) goto drop; skb_queue_tail(&q->sk.sk_receive_queue, skb); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 059c0f60a2b2..915f60fce186 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -981,7 +981,7 @@ static void team_port_disable(struct team *team, team_lower_state_changed(port); } -#define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \ +#define TEAM_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ NETIF_F_HIGHDMA | NETIF_F_LRO) @@ -2091,7 +2091,6 @@ static void team_setup(struct net_device *dev) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; - dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM); dev->features |= dev->hw_features; } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d9427ca3dba7..34642a9583e0 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1986,7 +1986,7 @@ rtl8152_features_check(struct sk_buff *skb, struct net_device *dev, int offset = skb_transport_offset(skb); if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && offset > max_offset) - features &= ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); else if ((skb->len + sizeof(struct tx_desc)) > agg_buf_sz) features &= ~NETIF_F_GSO_MASK; diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c index 679785b0209c..9de4f23910d8 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c @@ -69,7 +69,7 @@ ksocknal_lib_zc_capable(ksock_conn_t *conn) /* ZC if the socket supports scatter/gather and doesn't need software * checksums */ - return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_ALL_CSUM) != 0); + return ((caps & NETIF_F_SG) != 0 && (caps & NETIF_F_CSUM_MASK) != 0); } int diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 6395f8309393..2c4e94ab88da 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -149,7 +149,12 @@ enum { #define NETIF_F_GEN_CSUM NETIF_F_HW_CSUM #define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) #define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) -#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) + +/* List of IP checksum features. Note that NETIF_HW_CSUM should not be + * set in features when NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM are set-- + * this would be contradictory + */ +#define NETIF_F_CSUM_MASK (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) #define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1bb21ff0fa64..a54223a113b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3763,12 +3763,12 @@ static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, netdev_features_t f2) { if (f1 & NETIF_F_GEN_CSUM) - f1 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + f1 |= (NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); if (f2 & NETIF_F_GEN_CSUM) - f2 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + f2 |= (NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); f1 &= f2; if (f1 & NETIF_F_GEN_CSUM) - f1 &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + f1 &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); return f1; } diff --git a/include/net/vxlan.h b/include/net/vxlan.h index c1c899c3a51b..b5a1aec1a167 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -232,7 +232,7 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, skb->inner_protocol != htons(ETH_P_TEB) || (skb_inner_mac_header(skb) - skb_transport_header(skb) != sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) - return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 45b74e875381..ad5e2fd1012c 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -543,7 +543,7 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); - dev->hw_features = NETIF_F_ALL_CSUM | NETIF_F_SG | + dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC | NETIF_F_ALL_FCOE; diff --git a/net/core/dev.c b/net/core/dev.c index 8f705fcedb94..5a3b5a404642 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2645,7 +2645,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, type)) { - features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_CSUM_MASK; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } @@ -2792,7 +2792,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device else skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - if (!(features & NETIF_F_ALL_CSUM) && + if (!(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) goto out_kfree_skb; } @@ -7572,15 +7572,15 @@ netdev_features_t netdev_increment_features(netdev_features_t all, netdev_features_t one, netdev_features_t mask) { if (mask & NETIF_F_GEN_CSUM) - mask |= NETIF_F_ALL_CSUM; + mask |= NETIF_F_CSUM_MASK; mask |= NETIF_F_VLAN_CHALLENGED; - all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; + all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask; all &= one | ~NETIF_F_ALL_FOR_ALL; /* If one device supports hw checksumming, set for all. */ if (all & NETIF_F_GEN_CSUM) - all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); + all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_GEN_CSUM); return all; } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4a0cab85d67d..09948a726347 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -235,7 +235,7 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) switch (eth_cmd) { case ETHTOOL_GTXCSUM: case ETHTOOL_STXCSUM: - return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CRC; + return NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC; case ETHTOOL_GRXCSUM: case ETHTOOL_SRXCSUM: return NETIF_F_RXCSUM; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c82cca18c90f..cf7ef7be79f0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1018,7 +1018,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, ssize_t res; if (!(sk->sk_route_caps & NETIF_F_SG) || - !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) + !(sk->sk_route_caps & NETIF_F_CSUM_MASK)) return sock_no_sendpage(sk->sk_socket, page, offset, size, flags); @@ -1175,7 +1175,7 @@ new_segment: /* * Check whether we can use HW checksum. */ - if (sk->sk_route_caps & NETIF_F_ALL_CSUM) + if (sk->sk_route_caps & NETIF_F_CSUM_MASK) skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); -- cgit v1.2.3-71-gd317 From 9a49850d0af7b9fd14d091dfe61ef6cb369f86b9 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 14 Dec 2015 11:19:45 -0800 Subject: tcp: Fix conditions to determine checksum offload In tcp_send_sendpage and tcp_sendmsg we check the route capabilities to determine if checksum offload can be performed. This check currently does not take the IP protocol into account for devices that advertise only one of NETIF_F_IPV6_CSUM or NETIF_F_IP_CSUM. This patch adds a function to check capabilities for checksum offload with a socket called sk_check_csum_caps. This function checks for specific IPv4 or IPv6 offload support based on the family of the socket. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/sock.h | 9 +++++++++ net/ipv4/tcp.c | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 0ca22b014de1..ab0269f4b2cc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1791,6 +1791,15 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags) sk->sk_route_caps &= ~flags; } +static inline bool sk_check_csum_caps(struct sock *sk) +{ + return (sk->sk_route_caps & NETIF_F_HW_CSUM) || + (sk->sk_family == PF_INET && + (sk->sk_route_caps & NETIF_F_IP_CSUM)) || + (sk->sk_family == PF_INET6 && + (sk->sk_route_caps & NETIF_F_IPV6_CSUM)); +} + static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb, struct iov_iter *from, char *to, int copy, int offset) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cf7ef7be79f0..92b3e61b847d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1018,7 +1018,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, ssize_t res; if (!(sk->sk_route_caps & NETIF_F_SG) || - !(sk->sk_route_caps & NETIF_F_CSUM_MASK)) + !sk_check_csum_caps(sk)) return sock_no_sendpage(sk->sk_socket, page, offset, size, flags); @@ -1175,7 +1175,7 @@ new_segment: /* * Check whether we can use HW checksum. */ - if (sk->sk_route_caps & NETIF_F_CSUM_MASK) + if (sk_check_csum_caps(sk)) skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); -- cgit v1.2.3-71-gd317 From fc9e50f5a5a4e1fa9ba2756f745a13e693cf6a06 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 15 Dec 2015 15:41:37 -0800 Subject: netlink: add a start callback for starting a netlink dump The start callback allows the caller to set up a context for the dump callbacks. Presumably, the context can then be destroyed in the done callback. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 ++ include/net/genetlink.h | 2 ++ net/netlink/af_netlink.c | 4 ++++ net/netlink/genetlink.c | 16 ++++++++++++++++ 4 files changed, 24 insertions(+) (limited to 'include/net') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 639e9b8b0e4d..0b41959aab9f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -131,6 +131,7 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask) struct netlink_callback { struct sk_buff *skb; const struct nlmsghdr *nlh; + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff * skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); @@ -153,6 +154,7 @@ struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags); struct netlink_dump_control { + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *skb, struct netlink_callback *); int (*done)(struct netlink_callback *); void *data; diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 1b6b6dcb018d..43c0e771f417 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -114,6 +114,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) * @flags: flags * @policy: attribute validation policy * @doit: standard command callback + * @start: start callback for dumps * @dumpit: callback for dumpers * @done: completion callback for dumps * @ops_list: operations list @@ -122,6 +123,7 @@ struct genl_ops { const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, struct genl_info *info); + int (*start)(struct netlink_callback *cb); int (*dumpit)(struct sk_buff *skb, struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 59651af8cc27..81dc1bb6e016 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2915,6 +2915,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb = &nlk->cb; memset(cb, 0, sizeof(*cb)); + cb->start = control->start; cb->dump = control->dump; cb->done = control->done; cb->nlh = nlh; @@ -2927,6 +2928,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, mutex_unlock(nlk->cb_mutex); + if (cb->start) + cb->start(cb); + ret = netlink_dump(sk); sock_put(sk); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index bc0e504f33a6..8e63662c6fb0 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -513,6 +513,20 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); +static int genl_lock_start(struct netlink_callback *cb) +{ + /* our ops are always const - netlink API doesn't propagate that */ + const struct genl_ops *ops = cb->data; + int rc = 0; + + if (ops->start) { + genl_lock(); + rc = ops->start(cb); + genl_unlock(); + } + return rc; +} + static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { /* our ops are always const - netlink API doesn't propagate that */ @@ -577,6 +591,7 @@ static int genl_family_rcv_msg(struct genl_family *family, .module = family->module, /* we have const, but the netlink API doesn't */ .data = (void *)ops, + .start = genl_lock_start, .dump = genl_lock_dumpit, .done = genl_lock_done, }; @@ -588,6 +603,7 @@ static int genl_family_rcv_msg(struct genl_family *family, } else { struct netlink_dump_control c = { .module = family->module, + .start = ops->start, .dump = ops->dumpit, .done = ops->done, }; -- cgit v1.2.3-71-gd317 From 7f00feaf107645d95a6d87e99b4d141ac0a08efd Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 15 Dec 2015 15:41:38 -0800 Subject: ila: Add generic ILA translation facility This patch implements an ILA tanslation table. This table can be configured with identifier to locator mappings, and can be be queried to resolve a mapping. Queries can be parameterized based on interface, direction (incoming or outoing), and matching locator. The table is implemented using rhashtable and is configured via netlink (through "ip ila .." in iproute). The table may be used as alternative means to do do ILA tanslations other than the lw tunnels Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/ila.h | 18 ++ include/uapi/linux/ila.h | 22 ++ net/ipv6/ila/Makefile | 2 +- net/ipv6/ila/ila.h | 2 + net/ipv6/ila/ila_common.c | 8 + net/ipv6/ila/ila_xlat.c | 680 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 731 insertions(+), 1 deletion(-) create mode 100644 include/net/ila.h create mode 100644 net/ipv6/ila/ila_xlat.c (limited to 'include/net') diff --git a/include/net/ila.h b/include/net/ila.h new file mode 100644 index 000000000000..9f4f43e94ae4 --- /dev/null +++ b/include/net/ila.h @@ -0,0 +1,18 @@ +/* + * ILA kernel interface + * + * Copyright (c) 2015 Tom Herbert + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +#ifndef _NET_ILA_H +#define _NET_ILA_H + +int ila_xlat_outgoing(struct sk_buff *skb); +int ila_xlat_incoming(struct sk_buff *skb); + +#endif /* _NET_ILA_H */ diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h index 7ed9e670814e..abde7bbd6f3b 100644 --- a/include/uapi/linux/ila.h +++ b/include/uapi/linux/ila.h @@ -3,13 +3,35 @@ #ifndef _UAPI_LINUX_ILA_H #define _UAPI_LINUX_ILA_H +/* NETLINK_GENERIC related info */ +#define ILA_GENL_NAME "ila" +#define ILA_GENL_VERSION 0x1 + enum { ILA_ATTR_UNSPEC, ILA_ATTR_LOCATOR, /* u64 */ + ILA_ATTR_IDENTIFIER, /* u64 */ + ILA_ATTR_LOCATOR_MATCH, /* u64 */ + ILA_ATTR_IFINDEX, /* s32 */ + ILA_ATTR_DIR, /* u32 */ __ILA_ATTR_MAX, }; #define ILA_ATTR_MAX (__ILA_ATTR_MAX - 1) +enum { + ILA_CMD_UNSPEC, + ILA_CMD_ADD, + ILA_CMD_DEL, + ILA_CMD_GET, + + __ILA_CMD_MAX, +}; + +#define ILA_CMD_MAX (__ILA_CMD_MAX - 1) + +#define ILA_DIR_IN (1 << 0) +#define ILA_DIR_OUT (1 << 1) + #endif /* _UAPI_LINUX_ILA_H */ diff --git a/net/ipv6/ila/Makefile b/net/ipv6/ila/Makefile index 31d136be2f21..4b32e5921e5c 100644 --- a/net/ipv6/ila/Makefile +++ b/net/ipv6/ila/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_IPV6_ILA) += ila.o -ila-objs := ila_common.o ila_lwt.o +ila-objs := ila_common.o ila_lwt.o ila_xlat.o diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index b94081ff2f8a..28542cb2b387 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -42,5 +42,7 @@ void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p); int ila_lwt_init(void); void ila_lwt_fini(void); +int ila_xlat_init(void); +void ila_xlat_fini(void); #endif /* __ILA_H */ diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index 64e1904991ac..32dc9aab7297 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -80,12 +80,20 @@ static int __init ila_init(void) if (ret) goto fail_lwt; + ret = ila_xlat_init(); + if (ret) + goto fail_xlat; + + return 0; +fail_xlat: + ila_lwt_fini(); fail_lwt: return ret; } static void __exit ila_fini(void) { + ila_xlat_fini(); ila_lwt_fini(); } diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c new file mode 100644 index 000000000000..295ca29a23c3 --- /dev/null +++ b/net/ipv6/ila/ila_xlat.c @@ -0,0 +1,680 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ila.h" + +struct ila_xlat_params { + struct ila_params ip; + __be64 identifier; + int ifindex; + unsigned int dir; +}; + +struct ila_map { + struct ila_xlat_params p; + struct rhash_head node; + struct ila_map __rcu *next; + struct rcu_head rcu; +}; + +static unsigned int ila_net_id; + +struct ila_net { + struct rhashtable rhash_table; + spinlock_t *locks; /* Bucket locks for entry manipulation */ + unsigned int locks_mask; + bool hooks_registered; +}; + +#define LOCKS_PER_CPU 10 + +static int alloc_ila_locks(struct ila_net *ilan) +{ + unsigned int i, size; + unsigned int nr_pcpus = num_possible_cpus(); + + nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); + size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU); + + if (sizeof(spinlock_t) != 0) { +#ifdef CONFIG_NUMA + if (size * sizeof(spinlock_t) > PAGE_SIZE) + ilan->locks = vmalloc(size * sizeof(spinlock_t)); + else +#endif + ilan->locks = kmalloc_array(size, sizeof(spinlock_t), + GFP_KERNEL); + if (!ilan->locks) + return -ENOMEM; + for (i = 0; i < size; i++) + spin_lock_init(&ilan->locks[i]); + } + ilan->locks_mask = size - 1; + + return 0; +} + +static u32 hashrnd __read_mostly; +static __always_inline void __ila_hash_secret_init(void) +{ + net_get_random_once(&hashrnd, sizeof(hashrnd)); +} + +static inline u32 ila_identifier_hash(__be64 identifier) +{ + u32 *v = (u32 *)&identifier; + + return jhash_2words(v[0], v[1], hashrnd); +} + +static inline spinlock_t *ila_get_lock(struct ila_net *ilan, __be64 identifier) +{ + return &ilan->locks[ila_identifier_hash(identifier) & ilan->locks_mask]; +} + +static inline int ila_cmp_wildcards(struct ila_map *ila, __be64 loc, + int ifindex, unsigned int dir) +{ + return (ila->p.ip.locator_match && ila->p.ip.locator_match != loc) || + (ila->p.ifindex && ila->p.ifindex != ifindex) || + !(ila->p.dir & dir); +} + +static inline int ila_cmp_params(struct ila_map *ila, struct ila_xlat_params *p) +{ + return (ila->p.ip.locator_match != p->ip.locator_match) || + (ila->p.ifindex != p->ifindex) || + (ila->p.dir != p->dir); +} + +static int ila_cmpfn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct ila_map *ila = obj; + + return (ila->p.identifier != *(__be64 *)arg->key); +} + +static inline int ila_order(struct ila_map *ila) +{ + int score = 0; + + if (ila->p.ip.locator_match) + score += 1 << 0; + + if (ila->p.ifindex) + score += 1 << 1; + + return score; +} + +static const struct rhashtable_params rht_params = { + .nelem_hint = 1024, + .head_offset = offsetof(struct ila_map, node), + .key_offset = offsetof(struct ila_map, p.identifier), + .key_len = sizeof(u64), /* identifier */ + .max_size = 1048576, + .min_size = 256, + .automatic_shrinking = true, + .obj_cmpfn = ila_cmpfn, +}; + +static struct genl_family ila_nl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = ILA_GENL_NAME, + .version = ILA_GENL_VERSION, + .maxattr = ILA_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, +}; + +static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { + [ILA_ATTR_IDENTIFIER] = { .type = NLA_U64, }, + [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, + [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, + [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, + [ILA_ATTR_DIR] = { .type = NLA_U32, }, +}; + +static int parse_nl_config(struct genl_info *info, + struct ila_xlat_params *p) +{ + memset(p, 0, sizeof(*p)); + + if (info->attrs[ILA_ATTR_IDENTIFIER]) + p->identifier = (__force __be64)nla_get_u64( + info->attrs[ILA_ATTR_IDENTIFIER]); + + if (info->attrs[ILA_ATTR_LOCATOR]) + p->ip.locator = (__force __be64)nla_get_u64( + info->attrs[ILA_ATTR_LOCATOR]); + + if (info->attrs[ILA_ATTR_LOCATOR_MATCH]) + p->ip.locator_match = (__force __be64)nla_get_u64( + info->attrs[ILA_ATTR_LOCATOR_MATCH]); + + if (info->attrs[ILA_ATTR_IFINDEX]) + p->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); + + if (info->attrs[ILA_ATTR_DIR]) + p->dir = nla_get_u32(info->attrs[ILA_ATTR_DIR]); + + return 0; +} + +/* Must be called with rcu readlock */ +static inline struct ila_map *ila_lookup_wildcards(__be64 id, __be64 loc, + int ifindex, + unsigned int dir, + struct ila_net *ilan) +{ + struct ila_map *ila; + + ila = rhashtable_lookup_fast(&ilan->rhash_table, &id, rht_params); + while (ila) { + if (!ila_cmp_wildcards(ila, loc, ifindex, dir)) + return ila; + ila = rcu_access_pointer(ila->next); + } + + return NULL; +} + +/* Must be called with rcu readlock */ +static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *p, + struct ila_net *ilan) +{ + struct ila_map *ila; + + ila = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, + rht_params); + while (ila) { + if (!ila_cmp_params(ila, p)) + return ila; + ila = rcu_access_pointer(ila->next); + } + + return NULL; +} + +static inline void ila_release(struct ila_map *ila) +{ + kfree_rcu(ila, rcu); +} + +static void ila_free_cb(void *ptr, void *arg) +{ + struct ila_map *ila = (struct ila_map *)ptr, *next; + + /* Assume rcu_readlock held */ + while (ila) { + next = rcu_access_pointer(ila->next); + ila_release(ila); + ila = next; + } +} + +static int ila_xlat_addr(struct sk_buff *skb, int dir); + +static unsigned int +ila_nf_input(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + ila_xlat_addr(skb, ILA_DIR_IN); + return NF_ACCEPT; +} + +static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = { + { + .hook = ila_nf_input, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = -1, + }, +}; + +static int ila_add_mapping(struct net *net, struct ila_xlat_params *p) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + struct ila_map *ila, *head; + spinlock_t *lock = ila_get_lock(ilan, p->identifier); + int err = 0, order; + + if (!ilan->hooks_registered) { + /* We defer registering net hooks in the namespace until the + * first mapping is added. + */ + err = nf_register_net_hooks(net, ila_nf_hook_ops, + ARRAY_SIZE(ila_nf_hook_ops)); + if (err) + return err; + + ilan->hooks_registered = true; + } + + ila = kzalloc(sizeof(*ila), GFP_KERNEL); + if (!ila) + return -ENOMEM; + + ila->p = *p; + + if (p->ip.locator_match) { + /* Precompute checksum difference for translation since we + * know both the old identifier and the new one. + */ + ila->p.ip.csum_diff = compute_csum_diff8( + (__be32 *)&p->ip.locator_match, + (__be32 *)&p->ip.locator); + } + + order = ila_order(ila); + + spin_lock(lock); + + head = rhashtable_lookup_fast(&ilan->rhash_table, &p->identifier, + rht_params); + if (!head) { + /* New entry for the rhash_table */ + err = rhashtable_lookup_insert_fast(&ilan->rhash_table, + &ila->node, rht_params); + } else { + struct ila_map *tila = head, *prev = NULL; + + do { + if (!ila_cmp_params(tila, p)) { + err = -EEXIST; + goto out; + } + + if (order > ila_order(tila)) + break; + + prev = tila; + tila = rcu_dereference_protected(tila->next, + lockdep_is_held(lock)); + } while (tila); + + if (prev) { + /* Insert in sub list of head */ + RCU_INIT_POINTER(ila->next, tila); + rcu_assign_pointer(prev->next, ila); + } else { + /* Make this ila new head */ + RCU_INIT_POINTER(ila->next, head); + err = rhashtable_replace_fast(&ilan->rhash_table, + &head->node, + &ila->node, rht_params); + if (err) + goto out; + } + } + +out: + spin_unlock(lock); + + if (err) + kfree(ila); + + return err; +} + +static int ila_del_mapping(struct net *net, struct ila_xlat_params *p) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + struct ila_map *ila, *head, *prev; + spinlock_t *lock = ila_get_lock(ilan, p->identifier); + int err = -ENOENT; + + spin_lock(lock); + + head = rhashtable_lookup_fast(&ilan->rhash_table, + &p->identifier, rht_params); + ila = head; + + prev = NULL; + + while (ila) { + if (ila_cmp_params(ila, p)) { + prev = ila; + ila = rcu_dereference_protected(ila->next, + lockdep_is_held(lock)); + continue; + } + + err = 0; + + if (prev) { + /* Not head, just delete from list */ + rcu_assign_pointer(prev->next, ila->next); + } else { + /* It is the head. If there is something in the + * sublist we need to make a new head. + */ + head = rcu_dereference_protected(ila->next, + lockdep_is_held(lock)); + if (head) { + /* Put first entry in the sublist into the + * table + */ + err = rhashtable_replace_fast( + &ilan->rhash_table, &ila->node, + &head->node, rht_params); + if (err) + goto out; + } else { + /* Entry no longer used */ + err = rhashtable_remove_fast(&ilan->rhash_table, + &ila->node, + rht_params); + } + } + + ila_release(ila); + + break; + } + +out: + spin_unlock(lock); + + return err; +} + +static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct ila_xlat_params p; + int err; + + err = parse_nl_config(info, &p); + if (err) + return err; + + return ila_add_mapping(net, &p); +} + +static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct ila_xlat_params p; + int err; + + err = parse_nl_config(info, &p); + if (err) + return err; + + ila_del_mapping(net, &p); + + return 0; +} + +static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) +{ + if (nla_put_u64(msg, ILA_ATTR_IDENTIFIER, + (__force u64)ila->p.identifier) || + nla_put_u64(msg, ILA_ATTR_LOCATOR, + (__force u64)ila->p.ip.locator) || + nla_put_u64(msg, ILA_ATTR_LOCATOR_MATCH, + (__force u64)ila->p.ip.locator_match) || + nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->p.ifindex) || + nla_put_u32(msg, ILA_ATTR_DIR, ila->p.dir)) + return -1; + + return 0; +} + +static int ila_dump_info(struct ila_map *ila, + u32 portid, u32 seq, u32 flags, + struct sk_buff *skb, u8 cmd) +{ + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &ila_nl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (ila_fill_info(ila, skb) < 0) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct ila_net *ilan = net_generic(net, ila_net_id); + struct sk_buff *msg; + struct ila_xlat_params p; + struct ila_map *ila; + int ret; + + ret = parse_nl_config(info, &p); + if (ret) + return ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rcu_read_lock(); + + ila = ila_lookup_by_params(&p, ilan); + if (ila) { + ret = ila_dump_info(ila, + info->snd_portid, + info->snd_seq, 0, msg, + info->genlhdr->cmd); + } + + rcu_read_unlock(); + + if (ret < 0) + goto out_free; + + return genlmsg_reply(msg, info); + +out_free: + nlmsg_free(msg); + return ret; +} + +struct ila_dump_iter { + struct rhashtable_iter rhiter; +}; + +static int ila_nl_dump_start(struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct ila_net *ilan = net_generic(net, ila_net_id); + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + + return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter); +} + +static int ila_nl_dump_done(struct netlink_callback *cb) +{ + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + + rhashtable_walk_exit(&iter->rhiter); + + return 0; +} + +static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args; + struct rhashtable_iter *rhiter = &iter->rhiter; + struct ila_map *ila; + int ret; + + ret = rhashtable_walk_start(rhiter); + if (ret && ret != -EAGAIN) + goto done; + + for (;;) { + ila = rhashtable_walk_next(rhiter); + + if (IS_ERR(ila)) { + if (PTR_ERR(ila) == -EAGAIN) + continue; + ret = PTR_ERR(ila); + goto done; + } else if (!ila) { + break; + } + + while (ila) { + ret = ila_dump_info(ila, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, ILA_CMD_GET); + if (ret) + goto done; + + ila = rcu_access_pointer(ila->next); + } + } + + ret = skb->len; + +done: + rhashtable_walk_stop(rhiter); + return ret; +} + +static const struct genl_ops ila_nl_ops[] = { + { + .cmd = ILA_CMD_ADD, + .doit = ila_nl_cmd_add_mapping, + .policy = ila_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = ILA_CMD_DEL, + .doit = ila_nl_cmd_del_mapping, + .policy = ila_nl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = ILA_CMD_GET, + .doit = ila_nl_cmd_get_mapping, + .start = ila_nl_dump_start, + .dumpit = ila_nl_dump, + .done = ila_nl_dump_done, + .policy = ila_nl_policy, + }, +}; + +#define ILA_HASH_TABLE_SIZE 1024 + +static __net_init int ila_init_net(struct net *net) +{ + int err; + struct ila_net *ilan = net_generic(net, ila_net_id); + + err = alloc_ila_locks(ilan); + if (err) + return err; + + rhashtable_init(&ilan->rhash_table, &rht_params); + + return 0; +} + +static __net_exit void ila_exit_net(struct net *net) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + + rhashtable_free_and_destroy(&ilan->rhash_table, ila_free_cb, NULL); + + kvfree(ilan->locks); + + if (ilan->hooks_registered) + nf_unregister_net_hooks(net, ila_nf_hook_ops, + ARRAY_SIZE(ila_nf_hook_ops)); +} + +static struct pernet_operations ila_net_ops = { + .init = ila_init_net, + .exit = ila_exit_net, + .id = &ila_net_id, + .size = sizeof(struct ila_net), +}; + +static int ila_xlat_addr(struct sk_buff *skb, int dir) +{ + struct ila_map *ila; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct net *net = dev_net(skb->dev); + struct ila_net *ilan = net_generic(net, ila_net_id); + __be64 identifier, locator_match; + size_t nhoff; + + /* Assumes skb contains a valid IPv6 header that is pulled */ + + identifier = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[8]; + locator_match = *(__be64 *)&ip6h->daddr.in6_u.u6_addr8[0]; + nhoff = sizeof(struct ipv6hdr); + + rcu_read_lock(); + + ila = ila_lookup_wildcards(identifier, locator_match, + skb->dev->ifindex, dir, ilan); + if (ila) + update_ipv6_locator(skb, &ila->p.ip); + + rcu_read_unlock(); + + return 0; +} + +int ila_xlat_incoming(struct sk_buff *skb) +{ + return ila_xlat_addr(skb, ILA_DIR_IN); +} +EXPORT_SYMBOL(ila_xlat_incoming); + +int ila_xlat_outgoing(struct sk_buff *skb) +{ + return ila_xlat_addr(skb, ILA_DIR_OUT); +} +EXPORT_SYMBOL(ila_xlat_outgoing); + +int ila_xlat_init(void) +{ + int ret; + + ret = register_pernet_device(&ila_net_ops); + if (ret) + goto exit; + + ret = genl_register_family_with_ops(&ila_nl_family, + ila_nl_ops); + if (ret < 0) + goto unregister; + + return 0; + +unregister: + unregister_pernet_device(&ila_net_ops); +exit: + return ret; +} + +void ila_xlat_fini(void) +{ + genl_unregister_family(&ila_nl_family); + unregister_pernet_device(&ila_net_ops); +} -- cgit v1.2.3-71-gd317 From 64be0aed59ad519d6f2160868734f7e278290ac1 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:03 +0900 Subject: net: diag: Add the ability to destroy a socket. This patch adds a SOCK_DESTROY operation, a destroy function pointer to sock_diag_handler, and a diag_destroy function pointer. It does not include any implementation code. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 2 ++ include/net/sock.h | 1 + include/uapi/linux/sock_diag.h | 1 + net/core/sock_diag.c | 23 ++++++++++++++++++++--- 4 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index fddebc617469..4018b48f2b3b 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -15,6 +15,7 @@ struct sock_diag_handler { __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); int (*get_info)(struct sk_buff *skb, struct sock *sk); + int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh); }; int sock_diag_register(const struct sock_diag_handler *h); @@ -68,4 +69,5 @@ bool sock_diag_has_destroy_listeners(const struct sock *sk) } void sock_diag_broadcast_destroy(struct sock *sk); +int sock_diag_destroy(struct sock *sk, int err); #endif diff --git a/include/net/sock.h b/include/net/sock.h index ab0269f4b2cc..6e6e8a25d997 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1060,6 +1060,7 @@ struct proto { void (*destroy_cgroup)(struct mem_cgroup *memcg); struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); #endif + int (*diag_destroy)(struct sock *sk, int err); }; int proto_register(struct proto *prot, int alloc_slab); diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index 49230d36f9ce..bae2d80034d4 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -4,6 +4,7 @@ #include #define SOCK_DIAG_BY_FAMILY 20 +#define SOCK_DESTROY 21 struct sock_diag_req { __u8 sdiag_family; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 0c1d58d43f67..a996ce8c8fb2 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld) } EXPORT_SYMBOL_GPL(sock_diag_unregister); -static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh) { int err; struct sock_diag_req *req = nlmsg_data(nlh); @@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) hndl = sock_diag_handlers[req->sdiag_family]; if (hndl == NULL) err = -ENOENT; - else + else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY) err = hndl->dump(skb, nlh); + else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy) + err = hndl->destroy(skb, nlh); + else + err = -EOPNOTSUPP; mutex_unlock(&sock_diag_table_mutex); return err; @@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return ret; case SOCK_DIAG_BY_FAMILY: - return __sock_diag_rcv_msg(skb, nlh); + case SOCK_DESTROY: + return __sock_diag_cmd(skb, nlh); default: return -EINVAL; } @@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group) return 0; } +int sock_diag_destroy(struct sock *sk, int err) +{ + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!sk->sk_prot->diag_destroy) + return -EOPNOTSUPP; + + return sk->sk_prot->diag_destroy(sk, err); +} +EXPORT_SYMBOL_GPL(sock_diag_destroy); + static int __net_init diag_net_init(struct net *net) { struct netlink_kernel_cfg cfg = { -- cgit v1.2.3-71-gd317 From c1e64e298b8cad309091b95d8436a0255c84f54a Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:05 +0900 Subject: net: diag: Support destroying TCP sockets. This implements SOCK_DESTROY for TCP sockets. It causes all blocking calls on the socket to fail fast with ECONNABORTED and causes a protocol close of the socket. It informs the other end of the connection by sending a RST, i.e., initiating a TCP ABORT as per RFC 793. ECONNABORTED was chosen for consistency with FreeBSD. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/Kconfig | 13 +++++++++++++ net/ipv4/tcp.c | 32 ++++++++++++++++++++++++++++++++ net/ipv4/tcp_diag.c | 19 +++++++++++++++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 6 files changed, 68 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index f80e74c5ad18..3077735b348d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1170,6 +1170,8 @@ void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); +int tcp_abort(struct sock *sk, int err); + static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 416dfa004cfb..c22920525e5d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -436,6 +436,19 @@ config INET_UDP_DIAG Support for UDP socket monitoring interface used by the ss tool. If unsure, say Y. +config INET_DIAG_DESTROY + bool "INET: allow privileged process to administratively close sockets" + depends on INET_DIAG + default n + ---help--- + Provides a SOCK_DESTROY operation that allows privileged processes + (e.g., a connection manager or a network administration tool such as + ss) to close sockets opened by other processes. Closing a socket in + this way interrupts any blocking read/write/connect operations on + the socket and causes future socket calls to behave as if the socket + had been disconnected. + If unsure, say N. + menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 92b3e61b847d..2c0e340518d2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3080,6 +3080,38 @@ void tcp_done(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_done); +int tcp_abort(struct sock *sk, int err) +{ + if (!sk_fullsock(sk)) { + sock_gen_put(sk); + return -EOPNOTSUPP; + } + + /* Don't race with userspace socket closes such as tcp_close. */ + lock_sock(sk); + + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ + local_bh_disable(); + bh_lock_sock(sk); + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_err = err; + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); + sk->sk_error_report(sk); + if (tcp_need_reset(sk->sk_state)) + tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_done(sk); + } + + bh_unlock_sock(sk); + local_bh_enable(); + release_sock(sk); + sock_put(sk); + return 0; +} +EXPORT_SYMBOL_GPL(tcp_abort); + extern struct tcp_congestion_ops tcp_reno; static __initdata unsigned long thash_entries; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index b31604086edd..4d610934fb39 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -10,6 +10,8 @@ */ #include +#include +#include #include #include @@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } +#ifdef CONFIG_INET_DIAG_DESTROY +static int tcp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req); + + if (IS_ERR(sk)) + return PTR_ERR(sk); + + return sock_diag_destroy(sk, ECONNABORTED); +} +#endif + static const struct inet_diag_handler tcp_diag_handler = { .dump = tcp_diag_dump, .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_type = IPPROTO_TCP, .idiag_info_size = sizeof(struct tcp_info), +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = tcp_diag_destroy, +#endif }; static int __init tcp_diag_init(void) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index db003438aaf5..7aa13bd3de29 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2342,6 +2342,7 @@ struct proto tcp_prot = { .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, #endif + .diag_destroy = tcp_abort, }; EXPORT_SYMBOL(tcp_prot); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c16e3fbf6854..5382c2662fa2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1890,6 +1890,7 @@ struct proto tcpv6_prot = { .proto_cgroup = tcp_proto_cgroup, #endif .clear_sk = tcp_v6_clear_sk, + .diag_destroy = tcp_abort, }; static const struct inet6_protocol tcpv6_protocol = { -- cgit v1.2.3-71-gd317 From 566178f853c1aa57be9c16007c7cca07df5d51b6 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Wed, 16 Dec 2015 13:55:04 +0800 Subject: net: sctp: dynamically enable or disable pf state As we all know, the value of pf_retrans >= max_retrans_path can disable pf state. The variables of pf_retrans and max_retrans_path can be changed by the userspace application. Sometimes the user expects to disable pf state while the 2 variables are changed to enable pf state. So it is necessary to introduce a new variable to disable pf state. According to the suggestions from Vlad Yasevich, extra1 and extra2 are removed. The initialization of pf_enable is added. Acked-by: Vlad Yasevich Signed-off-by: Zhu Yanjun Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 23 ++++++++++++++++++++++- include/net/netns/sctp.h | 7 +++++++ net/sctp/protocol.c | 3 +++ net/sctp/sm_sideeffect.c | 5 ++++- net/sctp/sysctl.c | 7 +++++++ 5 files changed, 43 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 2ea4c45cf1c8..5de632ed0ec0 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1723,6 +1723,25 @@ addip_enable - BOOLEAN Default: 0 +pf_enable - INTEGER + Enable or disable pf (pf is short for potentially failed) state. A value + of pf_retrans > path_max_retrans also disables pf state. That is, one of + both pf_enable and pf_retrans > path_max_retrans can disable pf state. + Since pf_retrans and path_max_retrans can be changed by userspace + application, sometimes user expects to disable pf state by the value of + pf_retrans > path_max_retrans, but occasionally the value of pf_retrans + or path_max_retrans is changed by the user application, this pf state is + enabled. As such, it is necessary to add this to dynamically enable + and disable pf state. See: + https://datatracker.ietf.org/doc/draft-ietf-tsvwg-sctp-failover for + details. + + 1: Enable pf. + + 0: Disable pf. + + Default: 1 + addip_noauth_enable - BOOLEAN Dynamic Address Reconfiguration (ADD-IP) requires the use of authentication to protect the operations of adding or removing new @@ -1799,7 +1818,9 @@ pf_retrans - INTEGER having to reduce path_max_retrans to a very low value. See: http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt for details. Note also that a value of pf_retrans > path_max_retrans - disables this feature + disables this feature. Since both pf_retrans and path_max_retrans can + be changed by userspace application, a variable pf_enable is used to + disable pf state. Default: 0 diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 8ba379f9e467..c501d67172b1 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -88,6 +88,13 @@ struct netns_sctp { */ int pf_retrans; + /* + * Disable Potentially-Failed feature, the feature is enabled by default + * pf_enable - 0 : disable pf + * - >0 : enable pf + */ + int pf_enable; + /* * Policy for preforming sctp/socket accounting * 0 - do socket level accounting, all assocs share sk_sndbuf diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 6c2c0accc6a0..010aced44b6b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1223,6 +1223,9 @@ static int __net_init sctp_defaults_init(struct net *net) /* Max.Burst - 4 */ net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST; + /* Enable pf state by default */ + net->sctp.pf_enable = 1; + /* Association.Max.Retrans - 10 attempts * Path.Max.Retrans - 5 attempts (per destination address) * Max.Init.Retransmits - 8 attempts diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 6098d4c42fa9..05cd16400e0b 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -477,6 +477,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, struct sctp_transport *transport, int is_hb) { + struct net *net = sock_net(asoc->base.sk); + /* The check for association's overall error counter exceeding the * threshold is done in the state function. */ @@ -503,7 +505,8 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, * is SCTP_ACTIVE, then mark this transport as Partially Failed, * see SCTP Quick Failover Draft, section 5.1 */ - if ((transport->state == SCTP_ACTIVE) && + if (net->sctp.pf_enable && + (transport->state == SCTP_ACTIVE) && (asoc->pf_retrans < transport->pathmaxrxt) && (transport->error_count > asoc->pf_retrans)) { diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 26d50c565f54..ccbfc93fb8fe 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -308,6 +308,13 @@ static struct ctl_table sctp_net_table[] = { .extra1 = &max_autoclose_min, .extra2 = &max_autoclose_max, }, + { + .procname = "pf_enable", + .data = &init_net.sctp.pf_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { /* sentinel */ } }; -- cgit v1.2.3-71-gd317 From 05ca4029b25c65c860be5baa9288ec8f7e0a97e6 Mon Sep 17 00:00:00 2001 From: "Singhai, Anjali" Date: Mon, 14 Dec 2015 12:21:20 -0800 Subject: geneve: Add geneve_get_rx_port support This patch adds an op that the drivers can call into to get existing geneve ports. Signed-off-by: Anjali Singhai Jain Signed-off-by: David S. Miller --- drivers/net/geneve.c | 24 ++++++++++++++++++++++++ include/net/geneve.h | 8 ++++++++ 2 files changed, 32 insertions(+) (limited to 'include/net') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 89325e483ecf..31b19fdf659d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1109,6 +1109,30 @@ static struct device_type geneve_type = { .name = "geneve", }; +/* Calls the ndo_add_geneve_port of the caller in order to + * supply the listening GENEVE udp ports. Callers are expected + * to implement the ndo_add_geneve_port. + */ +void geneve_get_rx_port(struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct geneve_net *gn = net_generic(net, geneve_net_id); + struct geneve_sock *gs; + sa_family_t sa_family; + struct sock *sk; + __be16 port; + + rcu_read_lock(); + list_for_each_entry_rcu(gs, &gn->sock_list, list) { + sk = gs->sock->sk; + sa_family = sk->sk_family; + port = inet_sk(sk)->inet_sport; + dev->netdev_ops->ndo_add_geneve_port(dev, sa_family, port); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(geneve_get_rx_port); + /* Initialize the device structure. */ static void geneve_setup(struct net_device *dev) { diff --git a/include/net/geneve.h b/include/net/geneve.h index 3106ed6eae0d..e6c23dc765f7 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -62,6 +62,14 @@ struct genevehdr { struct geneve_opt options[]; }; +#if IS_ENABLED(CONFIG_GENEVE) +void geneve_get_rx_port(struct net_device *netdev); +#else +static inline void geneve_get_rx_port(struct net_device *netdev) +{ +} +#endif + #ifdef CONFIG_INET struct net_device *geneve_dev_create_fb(struct net *net, const char *name, u8 name_assign_type, u16 dst_port); -- cgit v1.2.3-71-gd317 From 1a8524794fc7c70f44ac28e3a6e8fd637bc41f14 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 16 Dec 2015 13:20:43 -0800 Subject: net: l3mdev: Add master device lookup by index Add helper to lookup l3mdev master index given a device index. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/l3mdev.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/net') diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 774d85b2d5d9..786226f8e77b 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -51,6 +51,24 @@ static inline int l3mdev_master_ifindex(struct net_device *dev) return ifindex; } +static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) +{ + struct net_device *dev; + int rc = 0; + + if (likely(ifindex)) { + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev) + rc = l3mdev_master_ifindex_rcu(dev); + + rcu_read_unlock(); + } + + return rc; +} + /* get index of an interface to use for FIB lookups. For devices * enslaved to an L3 master device FIB lookups are based on the * master index @@ -167,6 +185,11 @@ static inline int l3mdev_master_ifindex(struct net_device *dev) return 0; } +static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) +{ + return 0; +} + static inline int l3mdev_fib_oif_rcu(struct net_device *dev) { return dev ? dev->ifindex : 0; -- cgit v1.2.3-71-gd317 From 6dd9a14e92e54895e143f10fef4d0b9abe109aa9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 16 Dec 2015 13:20:44 -0800 Subject: net: Allow accepted sockets to be bound to l3mdev domain Allow accepted sockets to derive their sk_bound_dev_if setting from the l3mdev domain in which the packets originated. A sysctl setting is added to control the behavior which is similar to sk_mark and sysctl_tcp_fwmark_accept. This effectively allow a process to have a "VRF-global" listen socket, with child sockets bound to the VRF device in which the packet originated. A similar behavior can be achieved using sk_mark, but a solution using marks is incomplete as it does not handle duplicate addresses in different L3 domains/VRFs. Allowing sockets to inherit the sk_bound_dev_if from l3mdev domain provides a complete solution. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 ++++++++ include/net/inet_sock.h | 14 ++++++++++++++ include/net/netns/ipv4.h | 3 +++ net/ipv4/syncookies.c | 4 ++-- net/ipv4/sysctl_net_ipv4.c | 11 +++++++++++ net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 1 + net/ipv6/syncookies.c | 4 ++-- 8 files changed, 42 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 5de632ed0ec0..ceb44a095a27 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -335,6 +335,14 @@ tcp_keepalive_intvl - INTEGER after probes started. Default value: 75sec i.e. connection will be aborted after ~11 minutes of retries. +tcp_l3mdev_accept - BOOLEAN + Enables child sockets to inherit the L3 master device index. + Enabling this option allows a "global" listen socket to work + across L3 master domains (e.g., VRFs) with connected sockets + derived from the listen socket to be bound to the L3 domain in + which the packets originated. Only valid when the kernel was + compiled with CONFIG_NET_L3_MASTER_DEV. + tcp_low_latency - BOOLEAN If set, the TCP stack makes decisions that prefer lower latency as opposed to higher throughput. By default, this diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 625bdf95d673..012b1f91f3ec 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -28,6 +28,7 @@ #include #include #include +#include /** struct ip_options - IP Options * @@ -113,6 +114,19 @@ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb) return sk->sk_mark; } +static inline int inet_request_bound_dev_if(const struct sock *sk, + struct sk_buff *skb) +{ +#ifdef CONFIG_NET_L3_MASTER_DEV + struct net *net = sock_net(sk); + + if (!sk->sk_bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept) + return l3mdev_master_ifindex_by_index(net, skb->skb_iif); +#endif + + return sk->sk_bound_dev_if; +} + struct inet_cork { unsigned int flags; __be32 addr; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index c68926b4899c..d75be32650ba 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -86,6 +86,9 @@ struct netns_ipv4 { int sysctl_fwmark_reflect; int sysctl_tcp_fwmark_accept; +#ifdef CONFIG_NET_L3_MASTER_DEV + int sysctl_tcp_l3mdev_accept; +#endif int sysctl_tcp_mtu_probing; int sysctl_tcp_base_mss; int sysctl_tcp_probe_threshold; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 4cbe9f0a4281..643a86c49020 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -351,7 +351,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack.v64 = 0; treq->tfo_listener = false; - ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_iif = inet_request_bound_dev_if(sk, skb); /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -371,7 +371,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) * hasn't changed since we received the original syn, but I see * no easy way to do this. */ - flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark, + flowi4_init_output(&fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a0bd7a55193e..41ff1f87dfd7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -915,6 +915,17 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_NET_L3_MASTER_DEV + { + .procname = "tcp_l3mdev_accept", + .data = &init_net.ipv4.sysctl_tcp_l3mdev_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#endif { .procname = "tcp_mtu_probing", .data = &init_net.ipv4.sysctl_tcp_mtu_probing, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2d656eef7f8e..7b1fddc47019 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6204,7 +6204,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_openreq_init(req, &tmp_opt, skb, sk); /* Note: tcp_v6_init_req() might override ir_iif for link locals */ - inet_rsk(req)->ir_iif = sk->sk_bound_dev_if; + inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb); af_ops->init_req(req, sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 205e6745393f..46e92fbd26a8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1276,6 +1276,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, ireq = inet_rsk(req); sk_daddr_set(newsk, ireq->ir_rmt_addr); sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); + newsk->sk_bound_dev_if = ireq->ir_iif; newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = ireq->opt; rcu_assign_pointer(newinet->inet_opt, inet_opt); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index eaf7ac496d50..2906ef20795e 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -193,7 +193,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->pktopts = skb; } - ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_iif = inet_request_bound_dev_if(sk, skb); /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) @@ -224,7 +224,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.daddr = ireq->ir_v6_rmt_addr; final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); fl6.saddr = ireq->ir_v6_loc_addr; - fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_oif = ireq->ir_iif; fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; -- cgit v1.2.3-71-gd317 From b1f0a0e99c58fbd7ea053ca36ba623718272b618 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Dec 2015 21:29:24 +0100 Subject: net: add inet_sk_transparent() helper Avoids cluttering tcp_v4_send_reset when followup patch extends it to deal with timewait sockets. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/request_sock.h | 2 +- include/net/tcp.h | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index a0dde04eb178..f49759decb28 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -68,7 +68,7 @@ struct request_sock { u32 peer_secid; }; -static inline struct request_sock *inet_reqsk(struct sock *sk) +static inline struct request_sock *inet_reqsk(const struct sock *sk) { return (struct request_sock *)sk; } diff --git a/include/net/tcp.h b/include/net/tcp.h index 3077735b348d..f33fecf4e282 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1620,6 +1620,18 @@ static inline void tcp_highest_sack_combine(struct sock *sk, tcp_sk(sk)->highest_sack = new; } +/* This helper checks if socket has IP_TRANSPARENT set */ +static inline bool inet_sk_transparent(const struct sock *sk) +{ + switch (sk->sk_state) { + case TCP_TIME_WAIT: + return inet_twsk(sk)->tw_transparent; + case TCP_NEW_SYN_RECV: + return inet_rsk(inet_reqsk(sk))->no_srccheck; + } + return inet_sk(sk)->transparent; +} + /* Determines whether this is a thin stream (which may suffer from * increased latency). Used to trigger latency-reducing mechanisms. */ -- cgit v1.2.3-71-gd317 From 039f50629b7f860f36644ed1f34b27da9aa62f43 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 24 Dec 2015 14:34:54 -0800 Subject: ip_tunnel: Move stats update to iptunnel_xmit() By moving stats update into iptunnel_xmit(), we can simplify iptunnel_xmit() usage. With this change there is no need to call another function (iptunnel_xmit_stats()) to update stats in tunnel xmit code path. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- drivers/net/geneve.c | 17 ++++++++--------- drivers/net/vxlan.c | 9 ++++----- include/net/ip6_tunnel.h | 17 ++++------------- include/net/ip_tunnels.h | 28 +++++++++++++++------------- include/net/udp_tunnel.h | 8 ++++---- net/ipv4/ip_gre.c | 7 +++---- net/ipv4/ip_tunnel.c | 7 ++----- net/ipv4/ip_tunnel_core.c | 9 +++++---- net/ipv4/ip_vti.c | 2 +- net/ipv4/udp_tunnel.c | 11 +++++------ net/ipv6/sit.c | 7 ++----- net/tipc/udp_media.c | 12 +++--------- 12 files changed, 56 insertions(+), 78 deletions(-) (limited to 'include/net') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index e6e00924f8ef..20dd66423ec8 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -918,12 +918,11 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); df = 0; } - err = udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr, - tos, ttl, df, sport, geneve->dst_port, - !net_eq(geneve->net, dev_net(geneve->dev)), - !(flags & GENEVE_F_UDP_CSUM)); + udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr, + tos, ttl, df, sport, geneve->dst_port, + !net_eq(geneve->net, dev_net(geneve->dev)), + !(flags & GENEVE_F_UDP_CSUM)); - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return NETDEV_TX_OK; tx_error: @@ -1005,10 +1004,10 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, ttl = 1; ttl = ttl ? : ip6_dst_hoplimit(dst); } - err = udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, - &fl6.saddr, &fl6.daddr, prio, ttl, - sport, geneve->dst_port, - !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX)); + udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, + &fl6.saddr, &fl6.daddr, prio, ttl, + sport, geneve->dst_port, + !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX)); return NETDEV_TX_OK; tx_error: diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ba363cedef80..fecf7b6c732e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1841,9 +1841,10 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *sk skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - return udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, - ttl, df, src_port, dst_port, xnet, - !(vxflags & VXLAN_F_UDP_CSUM)); + udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, ttl, df, + src_port, dst_port, xnet, + !(vxflags & VXLAN_F_UDP_CSUM)); + return 0; } #if IS_ENABLED(CONFIG_IPV6) @@ -2056,8 +2057,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, skb = NULL; goto rt_tx_error; } - - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); #if IS_ENABLED(CONFIG_IPV6) } else { struct dst_entry *ndst; diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index ff788b665277..ae07e94778d8 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -5,6 +5,7 @@ #include #include #include +#include #define IP6TUNNEL_ERR_TIMEO (30*HZ) @@ -83,22 +84,12 @@ int ip6_tnl_get_iflink(const struct net_device *dev); static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, struct net_device *dev) { - struct net_device_stats *stats = &dev->stats; int pkt_len, err; pkt_len = skb->len - skb_inner_network_offset(skb); err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb); - - if (net_xmit_eval(err) == 0) { - struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); - put_cpu_ptr(tstats); - } else { - stats->tx_errors++; - stats->tx_aborted_errors++; - } + if (unlikely(net_xmit_eval(err))) + pkt_len = -1; + iptunnel_xmit_stats(dev, pkt_len); } #endif diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 62a750a6a8f8..6db96ea0144f 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -273,32 +273,34 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, } int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); -int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, - __be32 src, __be32 dst, u8 proto, - u8 tos, u8 ttl, __be16 df, bool xnet); +void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, u8 proto, + u8 tos, u8 ttl, __be16 df, bool xnet); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags); struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum, int gso_type_mask); -static inline void iptunnel_xmit_stats(int err, - struct net_device_stats *err_stats, - struct pcpu_sw_netstats __percpu *stats) +static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len) { - if (err > 0) { - struct pcpu_sw_netstats *tstats = get_cpu_ptr(stats); + if (pkt_len > 0) { + struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += err; + tstats->tx_bytes += pkt_len; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); put_cpu_ptr(tstats); - } else if (err < 0) { - err_stats->tx_errors++; - err_stats->tx_aborted_errors++; } else { - err_stats->tx_dropped++; + struct net_device_stats *err_stats = &dev->stats; + + if (pkt_len < 0) { + err_stats->tx_errors++; + err_stats->tx_aborted_errors++; + } else { + err_stats->tx_dropped++; + } } } diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index cb2f89f20f5c..cca2ad3082c3 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -78,10 +78,10 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *sock_cfg); /* Transmit the skb using UDP encapsulation. */ -int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, __u8 tos, __u8 ttl, - __be16 df, __be16 src_port, __be16 dst_port, - bool xnet, bool nocheck); +void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, + __be16 df, __be16 src_port, __be16 dst_port, + bool xnet, bool nocheck); #if IS_ENABLED(CONFIG_IPV6) int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 04a48c0159cc..7c51c4e1661f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -561,10 +561,9 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) tunnel_id_to_key(tun_info->key.tun_id), 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; - err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr, - key->u.ipv4.dst, IPPROTO_GRE, - key->tos, key->ttl, df, false); - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + + iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, + key->tos, key->ttl, df, false); return; err_free_rt: diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 0f6e9ee031c4..c7bd72e9b544 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -656,7 +656,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct rtable *rt; /* Route to the other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; - int err; bool connected; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); @@ -794,10 +793,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, return; } - err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, - tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); - + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, + df, !net_eq(tunnel->net, dev_net(dev))); return; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 1db8418aa62e..eb52ce950c27 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -47,12 +47,13 @@ #include #include -int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, - __be32 src, __be32 dst, __u8 proto, - __u8 tos, __u8 ttl, __be16 df, bool xnet) +void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 proto, + __u8 tos, __u8 ttl, __be16 df, bool xnet) { int pkt_len = skb->len - skb_inner_network_offset(skb); struct net *net = dev_net(rt->dst.dev); + struct net_device *dev = skb->dev; struct iphdr *iph; int err; @@ -81,7 +82,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, err = ip_local_out(net, sk, skb); if (unlikely(net_xmit_eval(err))) pkt_len = 0; - return pkt_len; + iptunnel_xmit_stats(dev, pkt_len); } EXPORT_SYMBOL_GPL(iptunnel_xmit); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 02d9c21e2953..5cf10b777b7e 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -199,7 +199,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, err = dst_output(tunnel->net, skb->sk, skb); if (net_xmit_eval(err) == 0) err = skb->len; - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + iptunnel_xmit_stats(dev, err); return NETDEV_TX_OK; tx_error_icmp: diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index aba428626b52..0ec08814f37d 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -74,10 +74,10 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); -int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, __u8 tos, __u8 ttl, - __be16 df, __be16 src_port, __be16 dst_port, - bool xnet, bool nocheck) +void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, + __be16 df, __be16 src_port, __be16 dst_port, + bool xnet, bool nocheck) { struct udphdr *uh; @@ -91,8 +91,7 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, udp_set_csum(nocheck, skb, src, dst, skb->len); - return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, - tos, ttl, df, xnet); + iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); } EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index dcccae86190f..e794ef66a401 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -820,7 +820,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, const struct in6_addr *addr6; int addr_type; u8 ttl; - int err; u8 protocol = IPPROTO_IPV6; int t_hlen = tunnel->hlen + sizeof(struct iphdr); @@ -983,10 +982,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb_set_inner_ipproto(skb, IPPROTO_IPV6); - err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, - protocol, tos, ttl, df, - !net_eq(tunnel->net, dev_net(dev))); - iptunnel_xmit_stats(err, &dev->stats, dev->tstats); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, + df, !net_eq(tunnel->net, dev_net(dev))); return NETDEV_TX_OK; tx_error_icmp: diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 6af78c6276b4..d63a911e7fe2 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -182,15 +182,9 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, goto tx_error; } ttl = ip4_dst_hoplimit(&rt->dst); - err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, - src->ipv4.s_addr, - dst->ipv4.s_addr, 0, ttl, 0, - src->udp_port, dst->udp_port, - false, true); - if (err < 0) { - ip_rt_put(rt); - goto tx_error; - } + udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, + dst->ipv4.s_addr, 0, ttl, 0, src->udp_port, + dst->udp_port, false, true); #if IS_ENABLED(CONFIG_IPV6) } else { struct dst_entry *ndst; -- cgit v1.2.3-71-gd317 From df05ef874b284d833c2d9795a6350c6a373ab6c9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 19:39:32 +0100 Subject: netfilter: nf_tables: release objects on netns destruction We have to release the existing objects on netns removal otherwise we leak them. Chains are unregistered in first place to make sure no packets are walking on our rules and sets anymore. The object release happens by when we unregister the family via nft_release_afinfo() which is called from nft_unregister_afinfo() from the corresponding __net_exit path in every family. Reported-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/bridge/netfilter/nf_tables_bridge.c | 2 +- net/ipv4/netfilter/nf_tables_arp.c | 2 +- net/ipv4/netfilter/nf_tables_ipv4.c | 2 +- net/ipv6/netfilter/nf_tables_ipv6.c | 2 +- net/netfilter/nf_tables_api.c | 47 +++++++++++++++++++++++++++++++-- net/netfilter/nf_tables_inet.c | 2 +- net/netfilter/nf_tables_netdev.c | 2 +- 8 files changed, 52 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b313cda49194..a50f139ce087 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -880,7 +880,7 @@ struct nft_af_info { }; int nft_register_afinfo(struct net *, struct nft_af_info *); -void nft_unregister_afinfo(struct nft_af_info *); +void nft_unregister_afinfo(struct net *, struct nft_af_info *); int nft_register_chain_type(const struct nf_chain_type *); void nft_unregister_chain_type(const struct nf_chain_type *); diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 62f6b1b19589..7fcdd7261d88 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -141,7 +141,7 @@ err: static void nf_tables_bridge_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.bridge); + nft_unregister_afinfo(net, net->nft.bridge); kfree(net->nft.bridge); } diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index 9d09d4f59545..cd84d4295a20 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -57,7 +57,7 @@ err: static void nf_tables_arp_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.arp); + nft_unregister_afinfo(net, net->nft.arp); kfree(net->nft.arp); } diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index ca9dc3c46c4f..e44ba3b12fbb 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -78,7 +78,7 @@ err: static void nf_tables_ipv4_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.ipv4); + nft_unregister_afinfo(net, net->nft.ipv4); kfree(net->nft.ipv4); } diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 120ea9131be0..30b22f4dff55 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -77,7 +77,7 @@ err: static void nf_tables_ipv6_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.ipv6); + nft_unregister_afinfo(net, net->nft.ipv6); kfree(net->nft.ipv6); } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4a23f77c363a..852273110275 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -41,6 +41,8 @@ int nft_register_afinfo(struct net *net, struct nft_af_info *afi) } EXPORT_SYMBOL_GPL(nft_register_afinfo); +static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi); + /** * nft_unregister_afinfo - unregister nf_tables address family info * @@ -48,9 +50,10 @@ EXPORT_SYMBOL_GPL(nft_register_afinfo); * * Unregister the address family for use with nf_tables. */ -void nft_unregister_afinfo(struct nft_af_info *afi) +void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi) { nfnl_lock(NFNL_SUBSYS_NFTABLES); + __nft_release_afinfo(net, afi); list_del_rcu(&afi->list); nfnl_unlock(NFNL_SUBSYS_NFTABLES); } @@ -4579,7 +4582,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, } EXPORT_SYMBOL_GPL(nft_data_dump); -static int nf_tables_init_net(struct net *net) +static int __net_init nf_tables_init_net(struct net *net) { INIT_LIST_HEAD(&net->nft.af_info); INIT_LIST_HEAD(&net->nft.commit_list); @@ -4587,6 +4590,46 @@ static int nf_tables_init_net(struct net *net) return 0; } +/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */ +static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) +{ + struct nft_table *table, *nt; + struct nft_chain *chain, *nc; + struct nft_rule *rule, *nr; + struct nft_set *set, *ns; + struct nft_ctx ctx = { + .net = net, + .afi = afi, + }; + + list_for_each_entry_safe(table, nt, &afi->tables, list) { + list_for_each_entry(chain, &table->chains, list) + nf_tables_unregister_hooks(table, chain, afi->nops); + /* No packets are walking on these chains anymore. */ + ctx.table = table; + list_for_each_entry(chain, &table->chains, list) { + ctx.chain = chain; + list_for_each_entry_safe(rule, nr, &chain->rules, list) { + list_del(&rule->list); + chain->use--; + nf_tables_rule_destroy(&ctx, rule); + } + } + list_for_each_entry_safe(set, ns, &table->sets, list) { + list_del(&set->list); + table->use--; + nft_set_destroy(set); + } + list_for_each_entry_safe(chain, nc, &table->chains, list) { + list_del(&chain->list); + table->use--; + nf_tables_chain_destroy(chain); + } + list_del(&table->list); + nf_tables_table_destroy(&ctx); + } +} + static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, }; diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c index 9dd2d216cfc1..6b5f76295d3d 100644 --- a/net/netfilter/nf_tables_inet.c +++ b/net/netfilter/nf_tables_inet.c @@ -57,7 +57,7 @@ err: static void __net_exit nf_tables_inet_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.inet); + nft_unregister_afinfo(net, net->nft.inet); kfree(net->nft.inet); } diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 7b9c053ba750..2bfd1fbccec8 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -139,7 +139,7 @@ err: static void nf_tables_netdev_exit_net(struct net *net) { - nft_unregister_afinfo(net->nft.netdev); + nft_unregister_afinfo(net, net->nft.netdev); kfree(net->nft.netdev); } -- cgit v1.2.3-71-gd317 From 5ebe0b0eec9d6f703b137f9b938c52f7b91dd9d6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 19:40:49 +0100 Subject: netfilter: nf_tables: destroy basechain and rules on netdevice removal If the netdevice is destroyed, the resources that are attached should be released too as they belong to the device that is now gone. Suggested-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +---- net/netfilter/nf_tables_api.c | 31 +++++++++++++++++++++------ net/netfilter/nf_tables_netdev.c | 45 ++++++++++++++++----------------------- 3 files changed, 44 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a50f139ce087..0191fbb33a2f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -821,10 +821,7 @@ static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chai return container_of(chain, struct nft_base_chain, chain); } -int nft_register_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops); -void nft_unregister_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops); +int __nft_release_basechain(struct nft_ctx *ctx); unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 852273110275..5729844e1d46 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -131,8 +131,8 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -int nft_register_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) +static int nft_register_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops) { struct net *net = read_pnet(&basechain->pnet); @@ -141,10 +141,9 @@ int nft_register_basechain(struct nft_base_chain *basechain, return nf_register_net_hooks(net, basechain->ops, hook_nops); } -EXPORT_SYMBOL_GPL(nft_register_basechain); -void nft_unregister_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) +static void nft_unregister_basechain(struct nft_base_chain *basechain, + unsigned int hook_nops) { struct net *net = read_pnet(&basechain->pnet); @@ -153,7 +152,6 @@ void nft_unregister_basechain(struct nft_base_chain *basechain, nf_unregister_net_hooks(net, basechain->ops, hook_nops); } -EXPORT_SYMBOL_GPL(nft_unregister_basechain); static int nf_tables_register_hooks(const struct nft_table *table, struct nft_chain *chain, @@ -4590,6 +4588,27 @@ static int __net_init nf_tables_init_net(struct net *net) return 0; } +int __nft_release_basechain(struct nft_ctx *ctx) +{ + struct nft_rule *rule, *nr; + + BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN)); + + nf_tables_unregister_hooks(ctx->chain->table, ctx->chain, + ctx->afi->nops); + list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { + list_del(&rule->list); + ctx->chain->use--; + nf_tables_rule_destroy(ctx, rule); + } + list_del(&ctx->chain->list); + ctx->table->use--; + nf_tables_chain_destroy(ctx->chain); + + return 0; +} +EXPORT_SYMBOL_GPL(__nft_release_basechain); + /* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) { diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 2bfd1fbccec8..3e9c87b961ba 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -156,35 +156,17 @@ static const struct nf_chain_type nft_filter_chain_netdev = { .hook_mask = (1 << NF_NETDEV_INGRESS), }; -static void nft_netdev_event(unsigned long event, struct nft_af_info *afi, - struct net_device *dev, struct nft_table *table, - struct nft_base_chain *basechain) +static void nft_netdev_event(unsigned long event, struct net_device *dev, + struct nft_ctx *ctx) { - switch (event) { - case NETDEV_REGISTER: - if (strcmp(basechain->dev_name, dev->name) != 0) - return; + struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - BUG_ON(!(basechain->flags & NFT_BASECHAIN_DISABLED)); - - dev_hold(dev); - basechain->ops[0].dev = dev; - basechain->flags &= ~NFT_BASECHAIN_DISABLED; - if (!(table->flags & NFT_TABLE_F_DORMANT)) - nft_register_basechain(basechain, afi->nops); - break; + switch (event) { case NETDEV_UNREGISTER: if (strcmp(basechain->dev_name, dev->name) != 0) return; - BUG_ON(basechain->flags & NFT_BASECHAIN_DISABLED); - - if (!(table->flags & NFT_TABLE_F_DORMANT)) - nft_unregister_basechain(basechain, afi->nops); - - dev_put(basechain->ops[0].dev); - basechain->ops[0].dev = NULL; - basechain->flags |= NFT_BASECHAIN_DISABLED; + __nft_release_basechain(ctx); break; case NETDEV_CHANGENAME: if (dev->ifindex != basechain->ops[0].dev->ifindex) @@ -201,20 +183,29 @@ static int nf_tables_netdev_event(struct notifier_block *this, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nft_af_info *afi; struct nft_table *table; - struct nft_chain *chain; + struct nft_chain *chain, *nr; + struct nft_ctx ctx = { + .net = dev_net(dev), + }; + + if (event != NETDEV_UNREGISTER && + event != NETDEV_CHANGENAME) + return NOTIFY_DONE; nfnl_lock(NFNL_SUBSYS_NFTABLES); list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) { + ctx.afi = afi; if (afi->family != NFPROTO_NETDEV) continue; list_for_each_entry(table, &afi->tables, list) { - list_for_each_entry(chain, &table->chains, list) { + ctx.table = table; + list_for_each_entry_safe(chain, nr, &table->chains, list) { if (!(chain->flags & NFT_BASE_CHAIN)) continue; - nft_netdev_event(event, afi, dev, table, - nft_base_chain(chain)); + ctx.chain = chain; + nft_netdev_event(event, dev, &ctx); } } } -- cgit v1.2.3-71-gd317 From 9afec6d3866b8451abcf1a7a1a381a3be6c83386 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Wed, 23 Dec 2015 23:45:18 +0100 Subject: nfc: netlink: HCI event connectivity implementation Add support for missing HCI event EVT_CONNECTIVITY and forward it to userspace. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/net/nfc/nfc.h | 1 + net/nfc/core.c | 13 +++++++++++++ net/nfc/netlink.c | 37 +++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 1 + 4 files changed, 52 insertions(+) (limited to 'include/net') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index dcfcfc9c00bf..1a3de8b34ad2 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -299,6 +299,7 @@ void nfc_driver_failure(struct nfc_dev *dev, int err); int nfc_se_transaction(struct nfc_dev *dev, u8 se_idx, struct nfc_evt_transaction *evt_transaction); +int nfc_se_connectivity(struct nfc_dev *dev, u8 se_idx); int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type); int nfc_remove_se(struct nfc_dev *dev, u32 se_idx); struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx); diff --git a/net/nfc/core.c b/net/nfc/core.c index 1fe3d3b362c0..122bb81da918 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -953,6 +953,19 @@ out: } EXPORT_SYMBOL(nfc_se_transaction); +int nfc_se_connectivity(struct nfc_dev *dev, u8 se_idx) +{ + int rc; + + pr_debug("connectivity: %x\n", se_idx); + + device_lock(&dev->dev); + rc = nfc_genl_se_connectivity(dev, se_idx); + device_unlock(&dev->dev); + return rc; +} +EXPORT_SYMBOL(nfc_se_connectivity); + static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f58c1fba1026..ea023b35f1c2 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -552,6 +552,43 @@ free_msg: return -EMSGSIZE; } +int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx) +{ + struct nfc_se *se; + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_SE_CONNECTIVITY); + if (!hdr) + goto free_msg; + + se = nfc_find_se(dev, se_idx); + if (!se) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || + nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index c20b784ad720..6c6f76b370b1 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -105,6 +105,7 @@ int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type); int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx); int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx, struct nfc_evt_transaction *evt_transaction); +int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx); struct nfc_dev *nfc_get_device(unsigned int idx); -- cgit v1.2.3-71-gd317 From 502061f81d3eb4518d2e72178e494a8547788ad0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 3 Jan 2016 21:02:18 +0100 Subject: netfilter: nf_tables: add packet duplication to the netdev family You can use this to duplicate packets and inject them at the egress path of the specified interface. This duplication allows you to inspect traffic from the dummy or any other interface dedicated to this purpose. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_dup_netdev.h | 6 +++ net/netfilter/Kconfig | 16 ++++++ net/netfilter/Makefile | 6 +++ net/netfilter/nf_dup_netdev.c | 40 +++++++++++++++ net/netfilter/nft_dup_netdev.c | 97 +++++++++++++++++++++++++++++++++++ 5 files changed, 165 insertions(+) create mode 100644 include/net/netfilter/nf_dup_netdev.h create mode 100644 net/netfilter/nf_dup_netdev.c create mode 100644 net/netfilter/nft_dup_netdev.c (limited to 'include/net') diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h new file mode 100644 index 000000000000..397dcae349f9 --- /dev/null +++ b/include/net/netfilter/nf_dup_netdev.h @@ -0,0 +1,6 @@ +#ifndef _NF_DUP_NETDEV_H_ +#define _NF_DUP_NETDEV_H_ + +void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif); + +#endif diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4692782b5280..8514cc4b22a8 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -563,6 +563,22 @@ config NFT_COMPAT x_tables match/target extensions over the nf_tables framework. +if NF_TABLES_NETDEV + +config NF_DUP_NETDEV + tristate "Netfilter packet duplication support" + help + This option enables the generic packet duplication infrastructure + for Netfilter. + +config NFT_DUP_NETDEV + tristate "Netfilter nf_tables netdev packet duplication support" + select NF_DUP_NETDEV + help + This option enables packet duplication for the "netdev" family. + +endif # NF_TABLES_NETDEV + endif # NF_TABLES config NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 22934846b5d1..5c9913359537 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -66,6 +66,9 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # SYNPROXY obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o +# generic packet duplication from netdev family +obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o + # nf_tables nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o @@ -90,6 +93,9 @@ obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o obj-$(CONFIG_NFT_REDIR) += nft_redir.o +# nf_tables netdev +obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c new file mode 100644 index 000000000000..8414ee1a0319 --- /dev/null +++ b/net/netfilter/nf_dup_netdev.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) +{ + struct net_device *dev; + struct sk_buff *skb; + + dev = dev_get_by_index_rcu(pkt->net, oif); + if (dev == NULL) + return; + + skb = skb_clone(pkt->skb, GFP_ATOMIC); + if (skb == NULL) + return; + + if (skb_mac_header_was_set(skb)) + skb_push(skb, skb->mac_len); + + skb->dev = dev; + skb_sender_cpu_clear(skb); + dev_queue_xmit(skb); +} +EXPORT_SYMBOL_GPL(nf_dup_netdev_egress); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c new file mode 100644 index 000000000000..2cc1e0ef56e8 --- /dev/null +++ b/net/netfilter/nft_dup_netdev.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2015 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_dup_netdev { + enum nft_registers sreg_dev:8; +}; + +static void nft_dup_netdev_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_dup_netdev *priv = nft_expr_priv(expr); + int oif = regs->data[priv->sreg_dev]; + + nf_dup_netdev_egress(pkt, oif); +} + +static const struct nla_policy nft_dup_netdev_policy[NFTA_DUP_MAX + 1] = { + [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 }, +}; + +static int nft_dup_netdev_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_dup_netdev *priv = nft_expr_priv(expr); + + if (tb[NFTA_DUP_SREG_DEV] == NULL) + return -EINVAL; + + priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); + return nft_validate_register_load(priv->sreg_dev, sizeof(int)); +} + +static const struct nft_expr_ops nft_dup_netdev_ingress_ops; + +static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_dup_netdev *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_dup_netdev_type; +static const struct nft_expr_ops nft_dup_netdev_ops = { + .type = &nft_dup_netdev_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_netdev)), + .eval = nft_dup_netdev_eval, + .init = nft_dup_netdev_init, + .dump = nft_dup_netdev_dump, +}; + +static struct nft_expr_type nft_dup_netdev_type __read_mostly = { + .family = NFPROTO_NETDEV, + .name = "dup", + .ops = &nft_dup_netdev_ops, + .policy = nft_dup_netdev_policy, + .maxattr = NFTA_DUP_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_dup_netdev_module_init(void) +{ + return nft_register_expr(&nft_dup_netdev_type); +} + +static void __exit nft_dup_netdev_module_exit(void) +{ + nft_unregister_expr(&nft_dup_netdev_type); +} + +module_init(nft_dup_netdev_module_init); +module_exit(nft_dup_netdev_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_AF_EXPR(5, "dup"); -- cgit v1.2.3-71-gd317 From 29663b0cc1d5b9b6e2f6caf41e86c599a0310def Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 23 Dec 2015 22:36:32 +0100 Subject: mac802154: constify ieee802154_llsec_ops structure The ieee802154_llsec_ops structure is never modified, so declare it as const. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Acked-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/net/ieee802154_netdev.h | 2 +- net/mac802154/mac_cmd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index a62a051a3a2f..c4b31601cd53 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -337,7 +337,7 @@ struct ieee802154_mlme_ops { void (*get_mac_params)(struct net_device *dev, struct ieee802154_mac_params *params); - struct ieee802154_llsec_ops *llsec; + const struct ieee802154_llsec_ops *llsec; }; static inline struct ieee802154_mlme_ops * diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index 8606da459ff3..3db16346cab3 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -126,7 +126,7 @@ static void mac802154_get_mac_params(struct net_device *dev, params->lbt = wpan_dev->lbt; } -static struct ieee802154_llsec_ops mac802154_llsec_ops = { +static const struct ieee802154_llsec_ops mac802154_llsec_ops = { .get_params = mac802154_get_params, .set_params = mac802154_set_params, .add_key = mac802154_add_key, -- cgit v1.2.3-71-gd317 From 0efeff2905d0ea14f2ee83e6cefbda35ee8cf6fc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 1 Jan 2016 13:18:48 +0100 Subject: net: make ip6tunnel_xmit definition conditional Moving the caller of iptunnel_xmit_stats causes a build error in randconfig builds that disable CONFIG_INET: In file included from ../net/xfrm/xfrm_input.c:17:0: ../include/net/ip6_tunnel.h: In function 'ip6tunnel_xmit': ../include/net/ip6_tunnel.h:93:2: error: implicit declaration of function 'iptunnel_xmit_stats' [-Werror=implicit-function-declaration] iptunnel_xmit_stats(dev, pkt_len); The reason is that the iptunnel_xmit_stats definition is hidden inside #ifdef CONFIG_INET but the caller is not. We can change one or the other to fix it, and this patch adds a second #ifdef around ip6tunnel_xmit() to avoid seeing the invalid call. Signed-off-by: Arnd Bergmann Fixes: 039f50629b7f ("ip_tunnel: Move stats update to iptunnel_xmit()") Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/ip6_tunnel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index ae07e94778d8..0d0ce0b2d870 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -81,6 +81,7 @@ __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, struct net *ip6_tnl_get_link_net(const struct net_device *dev); int ip6_tnl_get_iflink(const struct net_device *dev); +#ifdef CONFIG_INET static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, struct net_device *dev) { @@ -93,3 +94,4 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, iptunnel_xmit_stats(dev, pkt_len); } #endif +#endif -- cgit v1.2.3-71-gd317 From ef456144da8ef507c8cf504284b6042e9201a05c Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 4 Jan 2016 17:41:45 -0500 Subject: soreuseport: define reuseport groups struct sock_reuseport is an optional shared structure referenced by each socket belonging to a reuseport group. When a socket is bound to an address/port not yet in use and the reuseport flag has been set, the structure will be allocated and attached to the newly bound socket. When subsequent calls to bind are made for the same address/port, the shared structure will be updated to include the new socket and the newly bound socket will reference the group structure. Usually, when an incoming packet was destined for a reuseport group, all sockets in the same group needed to be considered before a dispatching decision was made. With this structure, an appropriate socket can be found after looking up just one socket in the group. This shared structure will also allow for more complicated decisions to be made when selecting a socket (eg a BPF filter). This work is based off a similar implementation written by Ying Cai for implementing policy-based reuseport selection. Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 + include/net/sock_reuseport.h | 20 +++++ net/core/Makefile | 2 +- net/core/sock_reuseport.c | 173 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 include/net/sock_reuseport.h create mode 100644 net/core/sock_reuseport.c (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 3794cdde837a..e830c1006935 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -318,6 +318,7 @@ struct cg_proto; * @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE) * @sk_backlog_rcv: callback to process the backlog * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 + * @sk_reuseport_cb: reuseport group container */ struct sock { /* @@ -453,6 +454,7 @@ struct sock { int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); void (*sk_destruct)(struct sock *sk); + struct sock_reuseport __rcu *sk_reuseport_cb; }; #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h new file mode 100644 index 000000000000..67d1eb8fd7af --- /dev/null +++ b/include/net/sock_reuseport.h @@ -0,0 +1,20 @@ +#ifndef _SOCK_REUSEPORT_H +#define _SOCK_REUSEPORT_H + +#include +#include + +struct sock_reuseport { + struct rcu_head rcu; + + u16 max_socks; /* length of socks */ + u16 num_socks; /* elements in socks */ + struct sock *socks[0]; /* array of sock pointers */ +}; + +extern int reuseport_alloc(struct sock *sk); +extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2); +extern void reuseport_detach_sock(struct sock *sk); +extern struct sock *reuseport_select_sock(struct sock *sk, u32 hash); + +#endif /* _SOCK_REUSEPORT_H */ diff --git a/net/core/Makefile b/net/core/Makefile index 086b01fbe1bd..0b835de04de3 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ - sock_diag.o dev_ioctl.o tso.o + sock_diag.o dev_ioctl.o tso.o sock_reuseport.o obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c new file mode 100644 index 000000000000..963c8d5f3027 --- /dev/null +++ b/net/core/sock_reuseport.c @@ -0,0 +1,173 @@ +/* + * To speed up listener socket lookup, create an array to store all sockets + * listening on the same port. This allows a decision to be made after finding + * the first socket. + */ + +#include +#include + +#define INIT_SOCKS 128 + +static DEFINE_SPINLOCK(reuseport_lock); + +static struct sock_reuseport *__reuseport_alloc(u16 max_socks) +{ + size_t size = sizeof(struct sock_reuseport) + + sizeof(struct sock *) * max_socks; + struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC); + + if (!reuse) + return NULL; + + reuse->max_socks = max_socks; + + return reuse; +} + +int reuseport_alloc(struct sock *sk) +{ + struct sock_reuseport *reuse; + + /* bh lock used since this function call may precede hlist lock in + * soft irq of receive path or setsockopt from process context + */ + spin_lock_bh(&reuseport_lock); + WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)), + "multiple allocations for the same socket"); + reuse = __reuseport_alloc(INIT_SOCKS); + if (!reuse) { + spin_unlock_bh(&reuseport_lock); + return -ENOMEM; + } + + reuse->socks[0] = sk; + reuse->num_socks = 1; + rcu_assign_pointer(sk->sk_reuseport_cb, reuse); + + spin_unlock_bh(&reuseport_lock); + + return 0; +} +EXPORT_SYMBOL(reuseport_alloc); + +static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) +{ + struct sock_reuseport *more_reuse; + u32 more_socks_size, i; + + more_socks_size = reuse->max_socks * 2U; + if (more_socks_size > U16_MAX) + return NULL; + + more_reuse = __reuseport_alloc(more_socks_size); + if (!more_reuse) + return NULL; + + more_reuse->max_socks = more_socks_size; + more_reuse->num_socks = reuse->num_socks; + + memcpy(more_reuse->socks, reuse->socks, + reuse->num_socks * sizeof(struct sock *)); + + for (i = 0; i < reuse->num_socks; ++i) + rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, + more_reuse); + + kfree_rcu(reuse, rcu); + return more_reuse; +} + +/** + * reuseport_add_sock - Add a socket to the reuseport group of another. + * @sk: New socket to add to the group. + * @sk2: Socket belonging to the existing reuseport group. + * May return ENOMEM and not add socket to group under memory pressure. + */ +int reuseport_add_sock(struct sock *sk, const struct sock *sk2) +{ + struct sock_reuseport *reuse; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)), + WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)), + "socket already in reuseport group"); + + if (reuse->num_socks == reuse->max_socks) { + reuse = reuseport_grow(reuse); + if (!reuse) { + spin_unlock_bh(&reuseport_lock); + return -ENOMEM; + } + } + + reuse->socks[reuse->num_socks] = sk; + /* paired with smp_rmb() in reuseport_select_sock() */ + smp_wmb(); + reuse->num_socks++; + rcu_assign_pointer(sk->sk_reuseport_cb, reuse); + + spin_unlock_bh(&reuseport_lock); + + return 0; +} +EXPORT_SYMBOL(reuseport_add_sock); + +void reuseport_detach_sock(struct sock *sk) +{ + struct sock_reuseport *reuse; + int i; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + rcu_assign_pointer(sk->sk_reuseport_cb, NULL); + + for (i = 0; i < reuse->num_socks; i++) { + if (reuse->socks[i] == sk) { + reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; + reuse->num_socks--; + if (reuse->num_socks == 0) + kfree_rcu(reuse, rcu); + break; + } + } + spin_unlock_bh(&reuseport_lock); +} +EXPORT_SYMBOL(reuseport_detach_sock); + +/** + * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. + * @sk: First socket in the group. + * @hash: Use this hash to select. + * Returns a socket that should receive the packet (or NULL on error). + */ +struct sock *reuseport_select_sock(struct sock *sk, u32 hash) +{ + struct sock_reuseport *reuse; + struct sock *sk2 = NULL; + u16 socks; + + rcu_read_lock(); + reuse = rcu_dereference(sk->sk_reuseport_cb); + + /* if memory allocation failed or add call is not yet complete */ + if (!reuse) + goto out; + + socks = READ_ONCE(reuse->num_socks); + if (likely(socks)) { + /* paired with smp_wmb() in reuseport_add_sock() */ + smp_rmb(); + + sk2 = reuse->socks[reciprocal_scale(hash, socks)]; + } + +out: + rcu_read_unlock(); + return sk2; +} +EXPORT_SYMBOL(reuseport_select_sock); -- cgit v1.2.3-71-gd317 From e32ea7e747271a0abcd37e265005e97cc81d9df5 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 4 Jan 2016 17:41:46 -0500 Subject: soreuseport: fast reuseport UDP socket selection Include a struct sock_reuseport instance when a UDP socket binds to a specific address for the first time with the reuseport flag set. When selecting a socket for an incoming UDP packet, use the information available in sock_reuseport if present. This required adding an additional field to the UDP source address equality function to differentiate between exact and wildcard matches. The original use case allowed wildcard matches when checking for existing port uses during bind. The new use case of adding a socket to a reuseport group requires exact address matching. Performance test (using a machine with 2 CPU sockets and a total of 48 cores): Create reuseport groups of varying size. Use one socket from this group per user thread (pinning each thread to a different core) calling recvmmsg in a tight loop. Record number of messages received per second while saturating a 10G link. 10 sockets: 18% increase (~2.8M -> 3.3M pkts/s) 20 sockets: 14% increase (~2.9M -> 3.3M pkts/s) 40 sockets: 13% increase (~3.0M -> 3.4M pkts/s) This work is based off a similar implementation written by Ying Cai for implementing policy-based reuseport selection. Signed-off-by: Craig Gallek Signed-off-by: David S. Miller --- include/net/addrconf.h | 3 +- include/net/udp.h | 2 +- net/ipv4/udp.c | 119 +++++++++++++++++++++++++++++++-------- net/ipv6/inet6_connection_sock.c | 4 +- net/ipv6/udp.c | 48 +++++++++++++--- 5 files changed, 141 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 78003dfb8539..47f52d3cd8df 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -87,7 +87,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, u32 banned_flags); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2); +int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); diff --git a/include/net/udp.h b/include/net/udp.h index 6d4ed18e1427..3b5d7f93bc23 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -191,7 +191,7 @@ static inline void udp_lib_close(struct sock *sk, long timeout) } int udp_lib_get_port(struct sock *sk, unsigned short snum, - int (*)(const struct sock *, const struct sock *), + int (*)(const struct sock *, const struct sock *, bool), unsigned int hash2_nulladdr); u32 udp_flow_hashrnd(void); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ac14ae44390d..762b01f55707 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -113,6 +113,7 @@ #include #include #include "udp_impl.h" +#include struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); @@ -137,7 +138,8 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, unsigned long *bitmap, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2), + const struct sock *sk2, + bool match_wildcard), unsigned int log) { struct sock *sk2; @@ -152,8 +154,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && (!sk2->sk_reuseport || !sk->sk_reuseport || + rcu_access_pointer(sk->sk_reuseport_cb) || !uid_eq(uid, sock_i_uid(sk2))) && - saddr_comp(sk, sk2)) { + saddr_comp(sk, sk2, true)) { if (!bitmap) return 1; __set_bit(udp_sk(sk2)->udp_port_hash >> log, bitmap); @@ -170,7 +173,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, struct udp_hslot *hslot2, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2)) + const struct sock *sk2, + bool match_wildcard)) { struct sock *sk2; struct hlist_nulls_node *node; @@ -186,8 +190,9 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && (!sk2->sk_reuseport || !sk->sk_reuseport || + rcu_access_pointer(sk->sk_reuseport_cb) || !uid_eq(uid, sock_i_uid(sk2))) && - saddr_comp(sk, sk2)) { + saddr_comp(sk, sk2, true)) { res = 1; break; } @@ -196,6 +201,35 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, return res; } +static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot, + int (*saddr_same)(const struct sock *sk1, + const struct sock *sk2, + bool match_wildcard)) +{ + struct net *net = sock_net(sk); + struct hlist_nulls_node *node; + kuid_t uid = sock_i_uid(sk); + struct sock *sk2; + + sk_nulls_for_each(sk2, node, &hslot->head) { + if (net_eq(sock_net(sk2), net) && + sk2 != sk && + sk2->sk_family == sk->sk_family && + ipv6_only_sock(sk2) == ipv6_only_sock(sk) && + (udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) && + (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && + (*saddr_same)(sk, sk2, false)) { + return reuseport_add_sock(sk, sk2); + } + } + + /* Initial allocation may have already happened via setsockopt */ + if (!rcu_access_pointer(sk->sk_reuseport_cb)) + return reuseport_alloc(sk); + return 0; +} + /** * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 * @@ -207,7 +241,8 @@ static int udp_lib_lport_inuse2(struct net *net, __u16 num, */ int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*saddr_comp)(const struct sock *sk1, - const struct sock *sk2), + const struct sock *sk2, + bool match_wildcard), unsigned int hash2_nulladdr) { struct udp_hslot *hslot, *hslot2; @@ -290,6 +325,14 @@ found: udp_sk(sk)->udp_port_hash = snum; udp_sk(sk)->udp_portaddr_hash ^= snum; if (sk_unhashed(sk)) { + if (sk->sk_reuseport && + udp_reuseport_add_sock(sk, hslot, saddr_comp)) { + inet_sk(sk)->inet_num = 0; + udp_sk(sk)->udp_port_hash = 0; + udp_sk(sk)->udp_portaddr_hash ^= snum; + goto fail_unlock; + } + sk_nulls_add_node_rcu(sk, &hslot->head); hslot->count++; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -309,13 +352,22 @@ fail: } EXPORT_SYMBOL(udp_lib_get_port); -static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses + * match_wildcard == false: addresses must be exactly the same, i.e. + * 0.0.0.0 only equals to 0.0.0.0 + */ +static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2, + bool match_wildcard) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); - return (!ipv6_only_sock(sk2) && - (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr || - inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); + if (!ipv6_only_sock(sk2)) { + if (inet1->inet_rcv_saddr == inet2->inet_rcv_saddr) + return 1; + if (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr) + return match_wildcard; + } + return 0; } static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr, @@ -459,8 +511,14 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { + struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); + sk2 = reuseport_select_sock(sk, hash); + if (sk2) { + result = sk2; + goto found; + } matches = 1; } } else if (score == badness && reuseport) { @@ -478,6 +536,7 @@ begin: if (get_nulls_value(node) != slot2) goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, @@ -540,8 +599,14 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { + struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); + sk2 = reuseport_select_sock(sk, hash); + if (sk2) { + result = sk2; + goto found; + } matches = 1; } } else if (score == badness && reuseport) { @@ -560,6 +625,7 @@ begin: goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, @@ -587,7 +653,8 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { - return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); + return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, + &udp_table); } EXPORT_SYMBOL_GPL(udp4_lib_lookup); @@ -1398,6 +1465,8 @@ void udp_lib_unhash(struct sock *sk) hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock_bh(&hslot->lock); + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); if (sk_nulls_del_node_init_rcu(sk)) { hslot->count--; inet_sk(sk)->inet_num = 0; @@ -1425,22 +1494,28 @@ void udp_lib_rehash(struct sock *sk, u16 newhash) hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); nhslot2 = udp_hashslot2(udptable, newhash); udp_sk(sk)->udp_portaddr_hash = newhash; - if (hslot2 != nhslot2) { + + if (hslot2 != nhslot2 || + rcu_access_pointer(sk->sk_reuseport_cb)) { hslot = udp_hashslot(udptable, sock_net(sk), udp_sk(sk)->udp_port_hash); /* we must lock primary chain too */ spin_lock_bh(&hslot->lock); - - spin_lock(&hslot2->lock); - hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); - hslot2->count--; - spin_unlock(&hslot2->lock); - - spin_lock(&nhslot2->lock); - hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, - &nhslot2->head); - nhslot2->count++; - spin_unlock(&nhslot2->lock); + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); + + if (hslot2 != nhslot2) { + spin_lock(&hslot2->lock); + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hslot2->count--; + spin_unlock(&hslot2->lock); + + spin_lock(&nhslot2->lock); + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &nhslot2->head); + nhslot2->count++; + spin_unlock(&nhslot2->lock); + } spin_unlock_bh(&hslot->lock); } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index a7ca2cde2ecb..36c3f0155010 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -51,12 +51,12 @@ int inet6_csk_bind_conflict(const struct sock *sk, (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid((struct sock *)sk2))))) { - if (ipv6_rcv_saddr_equal(sk, sk2)) + if (ipv6_rcv_saddr_equal(sk, sk2, true)) break; } if (!relax && reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN && - ipv6_rcv_saddr_equal(sk, sk2)) + ipv6_rcv_saddr_equal(sk, sk2, true)) break; } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 00775ee27d86..6204b8992de4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -76,7 +77,14 @@ static u32 udp6_ehashfn(const struct net *net, udp_ipv6_hash_secret + net_hash_mix(net)); } -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) +/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 + * only, and any IPv4 addresses if not IPv6 only + * match_wildcard == false: addresses must be exactly the same, i.e. + * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, + * and 0.0.0.0 equals to 0.0.0.0 only + */ +int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard) { const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); int sk2_ipv6only = inet_v6_ipv6only(sk2); @@ -84,16 +92,24 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; /* if both are mapped, treat as IPv4 */ - if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) - return (!sk2_ipv6only && - (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr || - sk->sk_rcv_saddr == sk2->sk_rcv_saddr)); + if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { + if (!sk2_ipv6only) { + if (sk->sk_rcv_saddr == sk2->sk_rcv_saddr) + return 1; + if (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr) + return match_wildcard; + } + return 0; + } + + if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) + return 1; - if (addr_type2 == IPV6_ADDR_ANY && + if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) return 1; - if (addr_type == IPV6_ADDR_ANY && + if (addr_type == IPV6_ADDR_ANY && match_wildcard && !(ipv6_only_sock(sk) && addr_type2 == IPV6_ADDR_MAPPED)) return 1; @@ -253,8 +269,14 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { + struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); + sk2 = reuseport_select_sock(sk, hash); + if (sk2) { + result = sk2; + goto found; + } matches = 1; } } else if (score == badness && reuseport) { @@ -273,6 +295,7 @@ begin: goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, @@ -332,8 +355,14 @@ begin: badness = score; reuseport = sk->sk_reuseport; if (reuseport) { + struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); + sk2 = reuseport_select_sock(sk, hash); + if (sk2) { + result = sk2; + goto found; + } matches = 1; } } else if (score == badness && reuseport) { @@ -352,6 +381,7 @@ begin: goto begin; if (result) { +found: if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, hnum, saddr, sport, @@ -549,8 +579,8 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int err; struct net *net = dev_net(skb->dev); - sk = __udp6_lib_lookup(net, daddr, uh->dest, - saddr, uh->source, inet6_iif(skb), udptable); + sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, + inet6_iif(skb), udptable); if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); -- cgit v1.2.3-71-gd317 From 538950a1b7527a0a52ccd9337e3fcd304f027f13 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 4 Jan 2016 17:41:47 -0500 Subject: soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF Expose socket options for setting a classic or extended BPF program for use when selecting sockets in an SO_REUSEPORT group. These options can be used on the first socket to belong to a group before bind or on any socket in the group after bind. This change includes refactoring of the existing sk_filter code to allow reuse of the existing BPF filter validation checks. Signed-off-by: Craig Gallek Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 3 + arch/avr32/include/uapi/asm/socket.h | 3 + arch/frv/include/uapi/asm/socket.h | 3 + arch/ia64/include/uapi/asm/socket.h | 3 + arch/m32r/include/uapi/asm/socket.h | 3 + arch/mips/include/uapi/asm/socket.h | 3 + arch/mn10300/include/uapi/asm/socket.h | 3 + arch/parisc/include/uapi/asm/socket.h | 3 + arch/powerpc/include/uapi/asm/socket.h | 3 + arch/s390/include/uapi/asm/socket.h | 3 + arch/sparc/include/uapi/asm/socket.h | 3 + arch/xtensa/include/uapi/asm/socket.h | 3 + include/linux/filter.h | 2 + include/net/sock_reuseport.h | 10 ++- include/net/udp.h | 5 +- include/uapi/asm-generic/socket.h | 3 + net/core/filter.c | 121 +++++++++++++++++++++++++++------ net/core/sock.c | 29 ++++++++ net/core/sock_reuseport.c | 88 ++++++++++++++++++++++-- net/ipv4/udp.c | 14 ++-- net/ipv4/udp_diag.c | 4 +- net/ipv6/udp.c | 14 ++-- 22 files changed, 282 insertions(+), 44 deletions(-) (limited to 'include/net') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 9a20821b111c..c5fb9e6bc3a5 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -92,4 +92,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 2b65ed6b277c..9de0796240a0 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -85,4 +85,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index 4823ad125578..f02e4849ae83 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -85,5 +85,8 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 59be3d87f86d..bce29166de1b 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -94,4 +94,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 7bc4cb273856..14aa4a6bccf1 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -85,4 +85,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index dec3c850f36b..5910fe294e93 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -103,4 +103,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index cab7d6d50051..58b1aa01ab9f 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -85,4 +85,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index a5cd40cd8ee1..f9cf1223422c 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -84,4 +84,7 @@ #define SO_ATTACH_BPF 0x402B #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 0x402C +#define SO_ATTACH_REUSEPORT_EBPF 0x402D + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index c046666038f8..dd54f28ecdec 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -92,4 +92,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 296942d56e6a..d02e89d14fef 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -91,4 +91,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index e6a16c40be5f..d270ee91968e 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -81,6 +81,9 @@ #define SO_ATTACH_BPF 0x0034 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 0x0035 +#define SO_ATTACH_REUSEPORT_EBPF 0x0036 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 4120af086160..fd3b96d1153f 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -96,4 +96,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 4165e9ac9e36..294c3cdf07b3 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -447,6 +447,8 @@ void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); +int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index 67d1eb8fd7af..7dda3d7adba8 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -1,6 +1,8 @@ #ifndef _SOCK_REUSEPORT_H #define _SOCK_REUSEPORT_H +#include +#include #include #include @@ -9,12 +11,18 @@ struct sock_reuseport { u16 max_socks; /* length of socks */ u16 num_socks; /* elements in socks */ + struct bpf_prog __rcu *prog; /* optional BPF sock selector */ struct sock *socks[0]; /* array of sock pointers */ }; extern int reuseport_alloc(struct sock *sk); extern int reuseport_add_sock(struct sock *sk, const struct sock *sk2); extern void reuseport_detach_sock(struct sock *sk); -extern struct sock *reuseport_select_sock(struct sock *sk, u32 hash); +extern struct sock *reuseport_select_sock(struct sock *sk, + u32 hash, + struct sk_buff *skb, + int hdr_len); +extern struct bpf_prog *reuseport_attach_prog(struct sock *sk, + struct bpf_prog *prog); #endif /* _SOCK_REUSEPORT_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 3b5d7f93bc23..2842541e28e7 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -258,7 +258,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, - struct udp_table *tbl); + struct udp_table *tbl, struct sk_buff *skb); struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, @@ -266,7 +266,8 @@ struct sock *udp6_lib_lookup(struct net *net, struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, struct udp_table *tbl); + int dif, struct udp_table *tbl, + struct sk_buff *skb); /* * SNMP statistics for UDP and UDP-Lite diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 5c15c2a5c123..fb8a41668382 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -87,4 +87,7 @@ #define SO_ATTACH_BPF 50 #define SO_DETACH_BPF SO_DETACH_FILTER +#define SO_ATTACH_REUSEPORT_CBPF 51 +#define SO_ATTACH_REUSEPORT_EBPF 52 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/filter.c b/net/core/filter.c index c770196ae8d5..35e6fed28709 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -50,6 +50,7 @@ #include #include #include +#include /** * sk_filter - run a packet through a socket filter @@ -1167,17 +1168,32 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk) return 0; } -/** - * sk_attach_filter - attach a socket filter - * @fprog: the filter program - * @sk: the socket to use - * - * Attach the user's filter code. We first run some sanity checks on - * it to make sure it does not explode on us later. If an error - * occurs or there is insufficient memory for the filter a negative - * errno code is returned. On success the return is zero. - */ -int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk) +{ + struct bpf_prog *old_prog; + int err; + + if (bpf_prog_size(prog->len) > sysctl_optmem_max) + return -ENOMEM; + + if (sk_unhashed(sk)) { + err = reuseport_alloc(sk); + if (err) + return err; + } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) { + /* The socket wasn't bound with SO_REUSEPORT */ + return -EINVAL; + } + + old_prog = reuseport_attach_prog(sk, prog); + if (old_prog) + bpf_prog_destroy(old_prog); + + return 0; +} + +static +struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk) { unsigned int fsize = bpf_classic_proglen(fprog); unsigned int bpf_fsize = bpf_prog_size(fprog->len); @@ -1185,19 +1201,19 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) int err; if (sock_flag(sk, SOCK_FILTER_LOCKED)) - return -EPERM; + return ERR_PTR(-EPERM); /* Make sure new filter is there and in the right amounts. */ if (fprog->filter == NULL) - return -EINVAL; + return ERR_PTR(-EINVAL); prog = bpf_prog_alloc(bpf_fsize, 0); if (!prog) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (copy_from_user(prog->insns, fprog->filter, fsize)) { __bpf_prog_free(prog); - return -EFAULT; + return ERR_PTR(-EFAULT); } prog->len = fprog->len; @@ -1205,13 +1221,30 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) err = bpf_prog_store_orig_filter(prog, fprog); if (err) { __bpf_prog_free(prog); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } /* bpf_prepare_filter() already takes care of freeing * memory in case something goes wrong. */ - prog = bpf_prepare_filter(prog, NULL); + return bpf_prepare_filter(prog, NULL); +} + +/** + * sk_attach_filter - attach a socket filter + * @fprog: the filter program + * @sk: the socket to use + * + * Attach the user's filter code. We first run some sanity checks on + * it to make sure it does not explode on us later. If an error + * occurs or there is insufficient memory for the filter a negative + * errno code is returned. On success the return is zero. + */ +int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) +{ + struct bpf_prog *prog = __get_filter(fprog, sk); + int err; + if (IS_ERR(prog)) return PTR_ERR(prog); @@ -1225,23 +1258,50 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) } EXPORT_SYMBOL_GPL(sk_attach_filter); -int sk_attach_bpf(u32 ufd, struct sock *sk) +int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) { - struct bpf_prog *prog; + struct bpf_prog *prog = __get_filter(fprog, sk); int err; + if (IS_ERR(prog)) + return PTR_ERR(prog); + + err = __reuseport_attach_prog(prog, sk); + if (err < 0) { + __bpf_prog_release(prog); + return err; + } + + return 0; +} + +static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk) +{ + struct bpf_prog *prog; + if (sock_flag(sk, SOCK_FILTER_LOCKED)) - return -EPERM; + return ERR_PTR(-EPERM); prog = bpf_prog_get(ufd); if (IS_ERR(prog)) - return PTR_ERR(prog); + return prog; if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { bpf_prog_put(prog); - return -EINVAL; + return ERR_PTR(-EINVAL); } + return prog; +} + +int sk_attach_bpf(u32 ufd, struct sock *sk) +{ + struct bpf_prog *prog = __get_bpf(ufd, sk); + int err; + + if (IS_ERR(prog)) + return PTR_ERR(prog); + err = __sk_attach_prog(prog, sk); if (err < 0) { bpf_prog_put(prog); @@ -1251,6 +1311,23 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) return 0; } +int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) +{ + struct bpf_prog *prog = __get_bpf(ufd, sk); + int err; + + if (IS_ERR(prog)) + return PTR_ERR(prog); + + err = __reuseport_attach_prog(prog, sk); + if (err < 0) { + bpf_prog_put(prog); + return err; + } + + return 0; +} + #define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) #define BPF_LDST_LEN 16U diff --git a/net/core/sock.c b/net/core/sock.c index 565bab7baca9..51270238e269 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -134,6 +134,7 @@ #include #include +#include #include @@ -932,6 +933,32 @@ set_rcvbuf: } break; + case SO_ATTACH_REUSEPORT_CBPF: + ret = -EINVAL; + if (optlen == sizeof(struct sock_fprog)) { + struct sock_fprog fprog; + + ret = -EFAULT; + if (copy_from_user(&fprog, optval, sizeof(fprog))) + break; + + ret = sk_reuseport_attach_filter(&fprog, sk); + } + break; + + case SO_ATTACH_REUSEPORT_EBPF: + ret = -EINVAL; + if (optlen == sizeof(u32)) { + u32 ufd; + + ret = -EFAULT; + if (copy_from_user(&ufd, optval, sizeof(ufd))) + break; + + ret = sk_reuseport_attach_bpf(ufd, sk); + } + break; + case SO_DETACH_FILTER: ret = sk_detach_filter(sk); break; @@ -1443,6 +1470,8 @@ void sk_destruct(struct sock *sk) sk_filter_uncharge(sk, filter); RCU_INIT_POINTER(sk->sk_filter, NULL); } + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 963c8d5f3027..ae0969c0fc2e 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -1,10 +1,12 @@ /* * To speed up listener socket lookup, create an array to store all sockets * listening on the same port. This allows a decision to be made after finding - * the first socket. + * the first socket. An optional BPF program can also be configured for + * selecting the socket index from the array of available sockets. */ #include +#include #include #define INIT_SOCKS 128 @@ -22,6 +24,7 @@ static struct sock_reuseport *__reuseport_alloc(u16 max_socks) reuse->max_socks = max_socks; + RCU_INIT_POINTER(reuse->prog, NULL); return reuse; } @@ -67,6 +70,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) more_reuse->max_socks = more_socks_size; more_reuse->num_socks = reuse->num_socks; + more_reuse->prog = reuse->prog; memcpy(more_reuse->socks, reuse->socks, reuse->num_socks * sizeof(struct sock *)); @@ -75,6 +79,10 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb, more_reuse); + /* Note: we use kfree_rcu here instead of reuseport_free_rcu so + * that reuse and more_reuse can temporarily share a reference + * to prog. + */ kfree_rcu(reuse, rcu); return more_reuse; } @@ -116,6 +124,16 @@ int reuseport_add_sock(struct sock *sk, const struct sock *sk2) } EXPORT_SYMBOL(reuseport_add_sock); +static void reuseport_free_rcu(struct rcu_head *head) +{ + struct sock_reuseport *reuse; + + reuse = container_of(head, struct sock_reuseport, rcu); + if (reuse->prog) + bpf_prog_destroy(reuse->prog); + kfree(reuse); +} + void reuseport_detach_sock(struct sock *sk) { struct sock_reuseport *reuse; @@ -131,7 +149,7 @@ void reuseport_detach_sock(struct sock *sk) reuse->socks[i] = reuse->socks[reuse->num_socks - 1]; reuse->num_socks--; if (reuse->num_socks == 0) - kfree_rcu(reuse, rcu); + call_rcu(&reuse->rcu, reuseport_free_rcu); break; } } @@ -139,15 +157,53 @@ void reuseport_detach_sock(struct sock *sk) } EXPORT_SYMBOL(reuseport_detach_sock); +static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks, + struct bpf_prog *prog, struct sk_buff *skb, + int hdr_len) +{ + struct sk_buff *nskb = NULL; + u32 index; + + if (skb_shared(skb)) { + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return NULL; + skb = nskb; + } + + /* temporarily advance data past protocol header */ + if (!pskb_pull(skb, hdr_len)) { + consume_skb(nskb); + return NULL; + } + index = bpf_prog_run_save_cb(prog, skb); + __skb_push(skb, hdr_len); + + consume_skb(nskb); + + if (index >= socks) + return NULL; + + return reuse->socks[index]; +} + /** * reuseport_select_sock - Select a socket from an SO_REUSEPORT group. * @sk: First socket in the group. - * @hash: Use this hash to select. + * @hash: When no BPF filter is available, use this hash to select. + * @skb: skb to run through BPF filter. + * @hdr_len: BPF filter expects skb data pointer at payload data. If + * the skb does not yet point at the payload, this parameter represents + * how far the pointer needs to advance to reach the payload. * Returns a socket that should receive the packet (or NULL on error). */ -struct sock *reuseport_select_sock(struct sock *sk, u32 hash) +struct sock *reuseport_select_sock(struct sock *sk, + u32 hash, + struct sk_buff *skb, + int hdr_len) { struct sock_reuseport *reuse; + struct bpf_prog *prog; struct sock *sk2 = NULL; u16 socks; @@ -158,12 +214,16 @@ struct sock *reuseport_select_sock(struct sock *sk, u32 hash) if (!reuse) goto out; + prog = rcu_dereference(reuse->prog); socks = READ_ONCE(reuse->num_socks); if (likely(socks)) { /* paired with smp_wmb() in reuseport_add_sock() */ smp_rmb(); - sk2 = reuse->socks[reciprocal_scale(hash, socks)]; + if (prog && skb) + sk2 = run_bpf(reuse, socks, prog, skb, hdr_len); + else + sk2 = reuse->socks[reciprocal_scale(hash, socks)]; } out: @@ -171,3 +231,21 @@ out: return sk2; } EXPORT_SYMBOL(reuseport_select_sock); + +struct bpf_prog * +reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog) +{ + struct sock_reuseport *reuse; + struct bpf_prog *old_prog; + + spin_lock_bh(&reuseport_lock); + reuse = rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock)); + old_prog = rcu_dereference_protected(reuse->prog, + lockdep_is_held(&reuseport_lock)); + rcu_assign_pointer(reuse->prog, prog); + spin_unlock_bh(&reuseport_lock); + + return old_prog; +} +EXPORT_SYMBOL(reuseport_attach_prog); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 762b01f55707..835378365f25 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -514,7 +514,7 @@ begin: struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, NULL, 0); if (sk2) { result = sk2; goto found; @@ -553,7 +553,7 @@ found: */ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, - int dif, struct udp_table *udptable) + int dif, struct udp_table *udptable, struct sk_buff *skb) { struct sock *sk, *result; struct hlist_nulls_node *node; @@ -602,7 +602,8 @@ begin: struct sock *sk2; hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); if (sk2) { result = sk2; goto found; @@ -647,14 +648,14 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, iph->daddr, dport, inet_iif(skb), - udptable); + udptable, skb); } struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, - &udp_table); + &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp4_lib_lookup); @@ -702,7 +703,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) struct net *net = dev_net(skb->dev); sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, - iph->saddr, uh->source, skb->dev->ifindex, udptable); + iph->saddr, uh->source, skb->dev->ifindex, udptable, + NULL); if (!sk) { ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; /* No socket for error */ diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 6116604bf6e8..df1966f3b6ec 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -44,7 +44,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_dst[0], req->id.idiag_dport, - req->id.idiag_if, tbl); + req->id.idiag_if, tbl, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) sk = __udp6_lib_lookup(net, @@ -52,7 +52,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, req->id.idiag_sport, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, - req->id.idiag_if, tbl); + req->id.idiag_if, tbl, NULL); #endif else goto out_nosk; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6204b8992de4..56fcb55fda31 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -272,7 +272,7 @@ begin: struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, NULL, 0); if (sk2) { result = sk2; goto found; @@ -310,7 +310,8 @@ found: struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, struct udp_table *udptable) + int dif, struct udp_table *udptable, + struct sk_buff *skb) { struct sock *sk, *result; struct hlist_nulls_node *node; @@ -358,7 +359,8 @@ begin: struct sock *sk2; hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - sk2 = reuseport_select_sock(sk, hash); + sk2 = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); if (sk2) { result = sk2; goto found; @@ -407,13 +409,13 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, return sk; return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - udptable); + udptable, skb); } struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif) { - return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); + return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp6_lib_lookup); @@ -580,7 +582,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), udptable); + inet6_iif(skb), udptable, skb); if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); -- cgit v1.2.3-71-gd317 From 0d3b7f64c84d53658daf28e2f9772e38acb9340d Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Jan 2016 13:19:31 +0200 Subject: Bluetooth: Change eir_has_data_type() to more generic eir_get_data() To make the EIR parsing helper more general purpose, make it return the found data and its length rather than just saying whether the data was present or not. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 30 ++++++++++++++++++++---------- net/bluetooth/hci_event.c | 6 +++--- net/bluetooth/mgmt.c | 3 ++- 3 files changed, 25 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c95e0326c41a..372e2a7c4ada 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1283,31 +1283,41 @@ static inline void hci_role_switch_cfm(struct hci_conn *conn, __u8 status, mutex_unlock(&hci_cb_list_lock); } -static inline bool eir_has_data_type(u8 *data, size_t data_len, u8 type) +static inline void *eir_get_data(u8 *eir, size_t eir_len, u8 type, + size_t *data_len) { size_t parsed = 0; - if (data_len < 2) - return false; + if (eir_len < 2) + return NULL; - while (parsed < data_len - 1) { - u8 field_len = data[0]; + while (parsed < eir_len - 1) { + u8 field_len = eir[0]; if (field_len == 0) break; parsed += field_len + 1; - if (parsed > data_len) + if (parsed > eir_len) break; - if (data[1] == type) - return true; + if (eir[1] != type) { + eir += field_len + 1; + continue; + } + + /* Zero length data */ + if (field_len == 1) + return NULL; - data += field_len + 1; + if (data_len) + *data_len = field_len - 1; + + return &eir[2]; } - return false; + return NULL; } static inline bool hci_bdaddr_is_rpa(bdaddr_t *bdaddr, u8 addr_type) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7554da5b7a8f..c162af5d16bf 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3833,9 +3833,9 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, data.ssp_mode = 0x01; if (hci_dev_test_flag(hdev, HCI_MGMT)) - name_known = eir_has_data_type(info->data, - sizeof(info->data), - EIR_NAME_COMPLETE); + name_known = eir_get_data(info->data, + sizeof(info->data), + EIR_NAME_COMPLETE, NULL); else name_known = true; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 621f6fdd0dd1..3297a4ecc05e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7266,7 +7266,8 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, /* Copy EIR or advertising data into event */ memcpy(ev->eir, eir, eir_len); - if (dev_class && !eir_has_data_type(ev->eir, eir_len, EIR_CLASS_OF_DEV)) + if (dev_class && !eir_get_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, + NULL)) eir_len = eir_append_data(ev->eir, eir_len, EIR_CLASS_OF_DEV, dev_class, 3); -- cgit v1.2.3-71-gd317 From 78b781ca0d35191ebf8d8cad8beec810270f0f2e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 5 Jan 2016 13:19:32 +0200 Subject: Bluetooth: Add support for Start Limited Discovery command This patch implements the mgmt Start Limited Discovery command. Most of existing Start Discovery code is reused since the only difference is the presence of a 'limited' flag as part of the discovery state. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 1 + include/net/bluetooth/mgmt.h | 2 ++ net/bluetooth/hci_request.c | 11 ++++++--- net/bluetooth/mgmt.c | 53 +++++++++++++++++++++++++++++++++------- 4 files changed, 55 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 372e2a7c4ada..d4f82edb5cff 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -77,6 +77,7 @@ struct discovery_state { u8 last_adv_data_len; bool report_invalid_rssi; bool result_filtering; + bool limited; s8 rssi; u16 uuid_count; u8 (*uuids)[16]; diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index af17774c9416..ea73e0826aa7 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -584,6 +584,8 @@ struct mgmt_rp_get_adv_size_info { __u8 max_scan_rsp_len; } __packed; +#define MGMT_OP_START_LIMITED_DISCOVERY 0x0041 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 9997c31ef987..41b5f3813f02 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1737,8 +1737,8 @@ static int le_scan_disable(struct hci_request *req, unsigned long opt) static int bredr_inquiry(struct hci_request *req, unsigned long opt) { u8 length = opt; - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; + const u8 giac[3] = { 0x33, 0x8b, 0x9e }; + const u8 liac[3] = { 0x00, 0x8b, 0x9e }; struct hci_cp_inquiry cp; BT_DBG("%s", req->hdev->name); @@ -1748,7 +1748,12 @@ static int bredr_inquiry(struct hci_request *req, unsigned long opt) hci_dev_unlock(req->hdev); memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); + + if (req->hdev->discovery.limited) + memcpy(&cp.lap, liac, sizeof(cp.lap)); + else + memcpy(&cp.lap, giac, sizeof(cp.lap)); + cp.length = length; hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3297a4ecc05e..5a5089cb6570 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -103,6 +103,7 @@ static const u16 mgmt_commands[] = { MGMT_OP_ADD_ADVERTISING, MGMT_OP_REMOVE_ADVERTISING, MGMT_OP_GET_ADV_SIZE_INFO, + MGMT_OP_START_LIMITED_DISCOVERY, }; static const u16 mgmt_events[] = { @@ -3283,6 +3284,9 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status) if (!cmd) cmd = pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev); + if (!cmd) + cmd = pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev); + if (cmd) { cmd->cmd_complete(cmd, mgmt_status(status)); mgmt_pending_remove(cmd); @@ -3318,8 +3322,8 @@ static bool discovery_type_is_valid(struct hci_dev *hdev, uint8_t type, return true; } -static int start_discovery(struct sock *sk, struct hci_dev *hdev, - void *data, u16 len) +static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev, + u16 op, void *data, u16 len) { struct mgmt_cp_start_discovery *cp = data; struct mgmt_pending_cmd *cmd; @@ -3331,7 +3335,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (!hdev_is_powered(hdev)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, + err = mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_NOT_POWERED, &cp->type, sizeof(cp->type)); goto failed; @@ -3339,15 +3343,14 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, if (hdev->discovery.state != DISCOVERY_STOPPED || hci_dev_test_flag(hdev, HCI_PERIODIC_INQ)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - MGMT_STATUS_BUSY, &cp->type, - sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, op, MGMT_STATUS_BUSY, + &cp->type, sizeof(cp->type)); goto failed; } if (!discovery_type_is_valid(hdev, cp->type, &status)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_DISCOVERY, - status, &cp->type, sizeof(cp->type)); + err = mgmt_cmd_complete(sk, hdev->id, op, status, + &cp->type, sizeof(cp->type)); goto failed; } @@ -3358,8 +3361,12 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hdev->discovery.type = cp->type; hdev->discovery.report_invalid_rssi = false; + if (op == MGMT_OP_START_LIMITED_DISCOVERY) + hdev->discovery.limited = true; + else + hdev->discovery.limited = false; - cmd = mgmt_pending_add(sk, MGMT_OP_START_DISCOVERY, hdev, data, len); + cmd = mgmt_pending_add(sk, op, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -3376,6 +3383,21 @@ failed: return err; } +static int start_discovery(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + return start_discovery_internal(sk, hdev, MGMT_OP_START_DISCOVERY, + data, len); +} + +static int start_limited_discovery(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + return start_discovery_internal(sk, hdev, + MGMT_OP_START_LIMITED_DISCOVERY, + data, len); +} + static int service_discovery_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { @@ -6313,6 +6335,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { HCI_MGMT_VAR_LEN }, { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, { get_adv_size_info, MGMT_GET_ADV_SIZE_INFO_SIZE }, + { start_limited_discovery, MGMT_START_DISCOVERY_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) @@ -7237,6 +7260,18 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, return; } + if (hdev->discovery.limited) { + /* Check for limited discoverable bit */ + if (dev_class) { + if (!(dev_class[1] & 0x20)) + return; + } else { + u8 *flags = eir_get_data(eir, eir_len, EIR_FLAGS, NULL); + if (!flags || !(flags[0] & LE_AD_LIMITED)) + return; + } + } + /* Make sure that the buffer is big enough. The 5 extra bytes * are for the potential CoD field. */ -- cgit v1.2.3-71-gd317 From d6c0256a60e685214cc8cc2b886809f11efc0084 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Dec 2015 23:50:46 +0800 Subject: sctp: add the rhashtable apis for sctp global transport hashtable tranport hashtbale will replace the association hashtable to do the lookup for transport, and then get association by t->assoc, rhashtable apis will be used because of it's resizable, scalable and using rcu. lport + rport + paddr will be the base hashkey to locate the chain, with net to protect one netns from another, then plus the laddr to compare to get the target. this patch will provider the lookup functions: - sctp_epaddr_lookup_transport - sctp_addrs_lookup_transport hash/unhash functions: - sctp_hash_transport - sctp_unhash_transport init/destroy functions: - sctp_transport_hashtable_init - sctp_transport_hashtable_destroy Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 11 ++++ include/net/sctp/structs.h | 5 ++ net/sctp/input.c | 131 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index ce13cf20f625..7bbdfbab2efa 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -143,6 +143,17 @@ void sctp_icmp_proto_unreachable(struct sock *sk, struct sctp_transport *t); void sctp_backlog_migrate(struct sctp_association *assoc, struct sock *oldsk, struct sock *newsk); +int sctp_transport_hashtable_init(void); +void sctp_transport_hashtable_destroy(void); +void sctp_hash_transport(struct sctp_transport *t); +void sctp_unhash_transport(struct sctp_transport *t); +struct sctp_transport *sctp_addrs_lookup_transport( + struct net *net, + const union sctp_addr *laddr, + const union sctp_addr *paddr); +struct sctp_transport *sctp_epaddr_lookup_transport( + const struct sctp_endpoint *ep, + const union sctp_addr *paddr); /* * sctp/proc.c diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index eea9bdeecba2..4ab87d08e766 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -48,6 +48,7 @@ #define __sctp_structs_h__ #include +#include #include /* linux/in.h needs this!! */ #include /* We get struct sockaddr_in. */ #include /* We get struct in6_addr */ @@ -123,6 +124,8 @@ extern struct sctp_globals { struct sctp_hashbucket *assoc_hashtable; /* This is the sctp port control hash. */ struct sctp_bind_hashbucket *port_hashtable; + /* This is the hash of all transports. */ + struct rhashtable transport_hashtable; /* Sizes of above hashtables. */ int ep_hashsize; @@ -147,6 +150,7 @@ extern struct sctp_globals { #define sctp_assoc_hashtable (sctp_globals.assoc_hashtable) #define sctp_port_hashsize (sctp_globals.port_hashsize) #define sctp_port_hashtable (sctp_globals.port_hashtable) +#define sctp_transport_hashtable (sctp_globals.transport_hashtable) #define sctp_checksum_disable (sctp_globals.checksum_disable) /* SCTP Socket type: UDP or TCP style. */ @@ -753,6 +757,7 @@ static inline int sctp_packet_empty(struct sctp_packet *packet) struct sctp_transport { /* A list of transports. */ struct list_head transports; + struct rhash_head node; /* Reference counting. */ atomic_t refcnt; diff --git a/net/sctp/input.c b/net/sctp/input.c index b6493b3f11a9..bac8278b176b 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -782,6 +782,137 @@ hit: return ep; } +/* rhashtable for transport */ +struct sctp_hash_cmp_arg { + const union sctp_addr *laddr; + const union sctp_addr *paddr; + const struct net *net; +}; + +static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct sctp_hash_cmp_arg *x = arg->key; + const struct sctp_transport *t = ptr; + struct sctp_association *asoc = t->asoc; + const struct net *net = x->net; + + if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port)) + return 1; + if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr)) + return 1; + if (!net_eq(sock_net(asoc->base.sk), net)) + return 1; + if (!sctp_bind_addr_match(&asoc->base.bind_addr, + x->laddr, sctp_sk(asoc->base.sk))) + return 1; + + return 0; +} + +static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct sctp_transport *t = data; + const union sctp_addr *paddr = &t->ipaddr; + const struct net *net = sock_net(t->asoc->base.sk); + u16 lport = htons(t->asoc->base.bind_addr.port); + u32 addr; + + if (paddr->sa.sa_family == AF_INET6) + addr = jhash(&paddr->v6.sin6_addr, 16, seed); + else + addr = paddr->v4.sin_addr.s_addr; + + return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | + (__force __u32)lport, net_hash_mix(net), seed); +} + +static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed) +{ + const struct sctp_hash_cmp_arg *x = data; + const union sctp_addr *paddr = x->paddr; + const struct net *net = x->net; + u16 lport = x->laddr->v4.sin_port; + u32 addr; + + if (paddr->sa.sa_family == AF_INET6) + addr = jhash(&paddr->v6.sin6_addr, 16, seed); + else + addr = paddr->v4.sin_addr.s_addr; + + return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | + (__force __u32)lport, net_hash_mix(net), seed); +} + +static const struct rhashtable_params sctp_hash_params = { + .head_offset = offsetof(struct sctp_transport, node), + .hashfn = sctp_hash_key, + .obj_hashfn = sctp_hash_obj, + .obj_cmpfn = sctp_hash_cmp, + .automatic_shrinking = true, +}; + +int sctp_transport_hashtable_init(void) +{ + return rhashtable_init(&sctp_transport_hashtable, &sctp_hash_params); +} + +void sctp_transport_hashtable_destroy(void) +{ + rhashtable_destroy(&sctp_transport_hashtable); +} + +void sctp_hash_transport(struct sctp_transport *t) +{ + struct sctp_sockaddr_entry *addr; + struct sctp_hash_cmp_arg arg; + + addr = list_entry(t->asoc->base.bind_addr.address_list.next, + struct sctp_sockaddr_entry, list); + arg.laddr = &addr->a; + arg.paddr = &t->ipaddr; + arg.net = sock_net(t->asoc->base.sk); + +reinsert: + if (rhashtable_lookup_insert_key(&sctp_transport_hashtable, &arg, + &t->node, sctp_hash_params) == -EBUSY) + goto reinsert; +} + +void sctp_unhash_transport(struct sctp_transport *t) +{ + rhashtable_remove_fast(&sctp_transport_hashtable, &t->node, + sctp_hash_params); +} + +struct sctp_transport *sctp_addrs_lookup_transport( + struct net *net, + const union sctp_addr *laddr, + const union sctp_addr *paddr) +{ + struct sctp_hash_cmp_arg arg = { + .laddr = laddr, + .paddr = paddr, + .net = net, + }; + + return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg, + sctp_hash_params); +} + +struct sctp_transport *sctp_epaddr_lookup_transport( + const struct sctp_endpoint *ep, + const union sctp_addr *paddr) +{ + struct sctp_sockaddr_entry *addr; + struct net *net = sock_net(ep->base.sk); + + addr = list_entry(ep->base.bind_addr.address_list.next, + struct sctp_sockaddr_entry, list); + + return sctp_addrs_lookup_transport(net, &addr->a, paddr); +} + /* Insert association into the hash table. */ static void __sctp_hash_established(struct sctp_association *asoc) { -- cgit v1.2.3-71-gd317 From b5eff7128366c4a7a9b502097a968ec9cae2bea2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 30 Dec 2015 23:50:49 +0800 Subject: sctp: drop the old assoc hashtable of sctp transport hashtable will replace the association hashtable, so association hashtable is not used in sctp any more, so drop the codes about that. Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 21 ---------------- include/net/sctp/structs.h | 5 ---- net/sctp/input.c | 61 ---------------------------------------------- net/sctp/protocol.c | 30 ++--------------------- net/sctp/sm_sideeffect.c | 2 -- net/sctp/socket.c | 6 +---- 6 files changed, 3 insertions(+), 122 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 7bbdfbab2efa..835aa2ed9870 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -126,8 +126,6 @@ int sctp_primitive_ASCONF(struct net *, struct sctp_association *, void *arg); */ int sctp_rcv(struct sk_buff *skb); void sctp_v4_err(struct sk_buff *skb, u32 info); -void sctp_hash_established(struct sctp_association *); -void sctp_unhash_established(struct sctp_association *); void sctp_hash_endpoint(struct sctp_endpoint *); void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *, @@ -530,25 +528,6 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport) return (net_hash_mix(net) + lport) & (sctp_ep_hashsize - 1); } -/* This is the hash function for the association hash table. */ -static inline int sctp_assoc_hashfn(struct net *net, __u16 lport, __u16 rport) -{ - int h = (lport << 16) + rport + net_hash_mix(net); - h ^= h>>8; - return h & (sctp_assoc_hashsize - 1); -} - -/* This is the hash function for the association hash table. This is - * not used yet, but could be used as a better hash function when - * we have a vtag. - */ -static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag) -{ - int h = (lport << 16) + rport; - h ^= vtag; - return h & (sctp_assoc_hashsize - 1); -} - #define sctp_for_each_hentry(epb, head) \ hlist_for_each_entry(epb, head, node) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 4ab87d08e766..20e72129be1c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -120,8 +120,6 @@ extern struct sctp_globals { /* This is the hash of all endpoints. */ struct sctp_hashbucket *ep_hashtable; - /* This is the hash of all associations. */ - struct sctp_hashbucket *assoc_hashtable; /* This is the sctp port control hash. */ struct sctp_bind_hashbucket *port_hashtable; /* This is the hash of all transports. */ @@ -129,7 +127,6 @@ extern struct sctp_globals { /* Sizes of above hashtables. */ int ep_hashsize; - int assoc_hashsize; int port_hashsize; /* Default initialization values to be applied to new associations. */ @@ -146,8 +143,6 @@ extern struct sctp_globals { #define sctp_address_families (sctp_globals.address_families) #define sctp_ep_hashsize (sctp_globals.ep_hashsize) #define sctp_ep_hashtable (sctp_globals.ep_hashtable) -#define sctp_assoc_hashsize (sctp_globals.assoc_hashsize) -#define sctp_assoc_hashtable (sctp_globals.assoc_hashtable) #define sctp_port_hashsize (sctp_globals.port_hashsize) #define sctp_port_hashtable (sctp_globals.port_hashtable) #define sctp_transport_hashtable (sctp_globals.transport_hashtable) diff --git a/net/sctp/input.c b/net/sctp/input.c index 6f075d835764..d9a6e66c5c8a 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -913,67 +913,6 @@ struct sctp_transport *sctp_epaddr_lookup_transport( return sctp_addrs_lookup_transport(net, &addr->a, paddr); } -/* Insert association into the hash table. */ -static void __sctp_hash_established(struct sctp_association *asoc) -{ - struct net *net = sock_net(asoc->base.sk); - struct sctp_ep_common *epb; - struct sctp_hashbucket *head; - - epb = &asoc->base; - - /* Calculate which chain this entry will belong to. */ - epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port, - asoc->peer.port); - - head = &sctp_assoc_hashtable[epb->hashent]; - - write_lock(&head->lock); - hlist_add_head(&epb->node, &head->chain); - write_unlock(&head->lock); -} - -/* Add an association to the hash. Local BH-safe. */ -void sctp_hash_established(struct sctp_association *asoc) -{ - if (asoc->temp) - return; - - local_bh_disable(); - __sctp_hash_established(asoc); - local_bh_enable(); -} - -/* Remove association from the hash table. */ -static void __sctp_unhash_established(struct sctp_association *asoc) -{ - struct net *net = sock_net(asoc->base.sk); - struct sctp_hashbucket *head; - struct sctp_ep_common *epb; - - epb = &asoc->base; - - epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port, - asoc->peer.port); - - head = &sctp_assoc_hashtable[epb->hashent]; - - write_lock(&head->lock); - hlist_del_init(&epb->node); - write_unlock(&head->lock); -} - -/* Remove association from the hash table. Local BH-safe. */ -void sctp_unhash_established(struct sctp_association *asoc) -{ - if (asoc->temp) - return; - - local_bh_disable(); - __sctp_unhash_established(asoc); - local_bh_enable(); -} - /* Look up an association. */ static struct sctp_association *__sctp_lookup_association( struct net *net, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 631cfb380535..ab0d538a74ed 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1416,24 +1416,6 @@ static __init int sctp_init(void) for (order = 0; (1UL << order) < goal; order++) ; - do { - sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE / - sizeof(struct sctp_hashbucket); - if ((sctp_assoc_hashsize > (64 * 1024)) && order > 0) - continue; - sctp_assoc_hashtable = (struct sctp_hashbucket *) - __get_free_pages(GFP_KERNEL | __GFP_NOWARN, order); - } while (!sctp_assoc_hashtable && --order > 0); - if (!sctp_assoc_hashtable) { - pr_err("Failed association hash alloc\n"); - status = -ENOMEM; - goto err_ahash_alloc; - } - for (i = 0; i < sctp_assoc_hashsize; i++) { - rwlock_init(&sctp_assoc_hashtable[i].lock); - INIT_HLIST_HEAD(&sctp_assoc_hashtable[i].chain); - } - /* Allocate and initialize the endpoint hash table. */ sctp_ep_hashsize = 64; sctp_ep_hashtable = @@ -1470,8 +1452,7 @@ static __init int sctp_init(void) if (sctp_transport_hashtable_init()) goto err_thash_alloc; - pr_info("Hash tables configured (established %d bind %d)\n", - sctp_assoc_hashsize, sctp_port_hashsize); + pr_info("Hash tables configured (bind %d)\n", sctp_port_hashsize); sctp_sysctl_register(); @@ -1528,10 +1509,6 @@ err_bhash_alloc: err_thash_alloc: kfree(sctp_ep_hashtable); err_ehash_alloc: - free_pages((unsigned long)sctp_assoc_hashtable, - get_order(sctp_assoc_hashsize * - sizeof(struct sctp_hashbucket))); -err_ahash_alloc: percpu_counter_destroy(&sctp_sockets_allocated); err_percpu_counter_init: kmem_cache_destroy(sctp_chunk_cachep); @@ -1565,13 +1542,10 @@ static __exit void sctp_exit(void) sctp_sysctl_unregister(); - free_pages((unsigned long)sctp_assoc_hashtable, - get_order(sctp_assoc_hashsize * - sizeof(struct sctp_hashbucket))); - kfree(sctp_ep_hashtable); free_pages((unsigned long)sctp_port_hashtable, get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); + kfree(sctp_ep_hashtable); sctp_transport_hashtable_destroy(); percpu_counter_destroy(&sctp_sockets_allocated); diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 05cd16400e0b..4f170ad38ff4 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -866,7 +866,6 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds, (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) return; - sctp_unhash_established(asoc); sctp_association_free(asoc); } @@ -1269,7 +1268,6 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, asoc = cmd->obj.asoc; BUG_ON(asoc->peer.primary_path == NULL); sctp_endpoint_add_asoc(ep, asoc); - sctp_hash_established(asoc); break; case SCTP_CMD_UPDATE_ASSOC: diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b5f4811cea82..9bb80ec4c08f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1228,7 +1228,6 @@ out_free: * To the hash table, try to unhash it, just in case, its a noop * if it wasn't hashed so we're safe */ - sctp_unhash_established(asoc); sctp_association_free(asoc); } return err; @@ -1504,7 +1503,6 @@ static void sctp_close(struct sock *sk, long timeout) * ABORT or SHUTDOWN based on the linger options. */ if (sctp_state(asoc, CLOSED)) { - sctp_unhash_established(asoc); sctp_association_free(asoc); continue; } @@ -1986,10 +1984,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) goto out_unlock; out_free: - if (new_asoc) { - sctp_unhash_established(asoc); + if (new_asoc) sctp_association_free(asoc); - } out_unlock: release_sock(sk); -- cgit v1.2.3-71-gd317 From a72a5e2d34ec2921c0d9a7545093087e4cb90d0a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 5 Jan 2016 22:17:55 +0100 Subject: inet: kill unused skb_free op The only user was removed in commit 029f7f3b8701cc7a ("netfilter: ipv6: nf_defrag: avoid/free clone operations"). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 1 - net/ieee802154/6lowpan/reassembly.c | 1 - net/ipv4/inet_fragment.c | 10 +--------- net/ipv4/ip_fragment.c | 1 - net/ipv6/reassembly.c | 1 - 5 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index ac42bbb37b2d..12aac0fd6ee7 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -99,7 +99,6 @@ struct inet_frags { void (*constructor)(struct inet_frag_queue *q, const void *arg); void (*destructor)(struct inet_frag_queue *); - void (*skb_free)(struct sk_buff *); void (*frag_expire)(unsigned long data); struct kmem_cache *frags_cachep; const char *frags_cache_name; diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 6b437e8760d3..30d875dff6b5 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -624,7 +624,6 @@ int __init lowpan_net_frag_init(void) lowpan_frags.hashfn = lowpan_hashfn; lowpan_frags.constructor = lowpan_frag_init; lowpan_frags.destructor = NULL; - lowpan_frags.skb_free = NULL; lowpan_frags.qsize = sizeof(struct frag_queue); lowpan_frags.match = lowpan_frag_match; lowpan_frags.frag_expire = lowpan_frag_expire; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index fe144dae7372..3a88b0c73797 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -285,14 +285,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) } EXPORT_SYMBOL(inet_frag_kill); -static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, - struct sk_buff *skb) -{ - if (f->skb_free) - f->skb_free(skb); - kfree_skb(skb); -} - void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) { struct sk_buff *fp; @@ -309,7 +301,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) struct sk_buff *xp = fp->next; sum_truesize += fp->truesize; - frag_kfree_skb(nf, f, fp); + kfree_skb(fp); fp = xp; } sum = sum_truesize + f->qsize; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1fe55ae81781..3f00810b7288 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -891,7 +891,6 @@ void __init ipfrag_init(void) ip4_frags.hashfn = ip4_hashfn; ip4_frags.constructor = ip4_frag_init; ip4_frags.destructor = ip4_frag_free; - ip4_frags.skb_free = NULL; ip4_frags.qsize = sizeof(struct ipq); ip4_frags.match = ip4_frag_match; ip4_frags.frag_expire = ip_expire; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 45f5ae51de65..18f3498a6c80 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -755,7 +755,6 @@ int __init ipv6_frag_init(void) ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; ip6_frags.destructor = NULL; - ip6_frags.skb_free = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.match = ip6_frag_match; ip6_frags.frag_expire = ip6_frag_expire; -- cgit v1.2.3-71-gd317 From 81435c33e062cbd4508da6f64655cb0967eeb65f Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Wed, 6 Jan 2016 13:01:05 +0100 Subject: switchdev: add bridge vlan_filtering attribute Adding vlan_filtering attribute to allow hardware vendor to support vlan-aware bridges. Vlan_filtering is a per-bridge attribute. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 6612946167fe..603ae2f88dbb 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -47,6 +47,7 @@ enum switchdev_attr_id { SWITCHDEV_ATTR_ID_PORT_STP_STATE, SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, + SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING, }; struct switchdev_attr { @@ -58,6 +59,7 @@ struct switchdev_attr { u8 stp_state; /* PORT_STP_STATE */ unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */ u32 ageing_time; /* BRIDGE_AGEING_TIME */ + bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ } u; }; -- cgit v1.2.3-71-gd317 From e6d8ecac9e68265aee9be711c5bd29406129666f Mon Sep 17 00:00:00 2001 From: Carlos Falgueras García Date: Tue, 5 Jan 2016 14:03:32 +0100 Subject: netfilter: nf_tables: Add new attributes into nft_set to store user data. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User data is stored at after 'nft_set_ops' private data into 'data[]' flexible array. The field 'udata' points to user data and 'udlen' stores its length. Add new flag NFTA_SET_USERDATA. Signed-off-by: Carlos Falgueras García Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++++ include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 21 ++++++++++++++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0191fbb33a2f..f6b1daf2e698 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -291,6 +291,8 @@ void nft_unregister_set(struct nft_set_ops *ops); * @timeout: default timeout value in msecs * @gc_int: garbage collection interval in msecs * @policy: set parameterization (see enum nft_set_policies) + * @udlen: user data length + * @udata: user data * @ops: set ops * @pnet: network namespace * @flags: set flags @@ -310,6 +312,8 @@ struct nft_set { u64 timeout; u32 gc_int; u16 policy; + u16 udlen; + unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; possible_net_t pnet; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 731288a039f6..03c28a402c63 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -291,6 +291,7 @@ enum nft_set_desc_attributes { * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32) * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64) * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) + * @NFTA_SET_USERDATA: user data (NLA_BINARY) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -306,6 +307,7 @@ enum nft_set_attributes { NFTA_SET_ID, NFTA_SET_TIMEOUT, NFTA_SET_GC_INTERVAL, + NFTA_SET_USERDATA, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f5c397158e29..2011977cd79d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2323,6 +2323,8 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_ID] = { .type = NLA_U32 }, [NFTA_SET_TIMEOUT] = { .type = NLA_U64 }, [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 }, + [NFTA_SET_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { @@ -2482,6 +2484,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; } + if (nla_put(skb, NFTA_SET_USERDATA, set->udlen, set->udata)) + goto nla_put_failure; + desc = nla_nest_start(skb, NFTA_SET_DESC); if (desc == NULL) goto nla_put_failure; @@ -2691,6 +2696,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, u64 timeout; u32 ktype, dtype, flags, policy, gc_int; struct nft_set_desc desc; + unsigned char *udata; + u16 udlen; int err; if (nla[NFTA_SET_TABLE] == NULL || @@ -2803,12 +2810,16 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (IS_ERR(ops)) return PTR_ERR(ops); + udlen = 0; + if (nla[NFTA_SET_USERDATA]) + udlen = nla_len(nla[NFTA_SET_USERDATA]); + size = 0; if (ops->privsize != NULL) size = ops->privsize(nla); err = -ENOMEM; - set = kzalloc(sizeof(*set) + size, GFP_KERNEL); + set = kzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); if (set == NULL) goto err1; @@ -2817,6 +2828,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (err < 0) goto err2; + udata = NULL; + if (udlen) { + udata = set->data + size; + nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen); + } + INIT_LIST_HEAD(&set->bindings); write_pnet(&set->pnet, net); set->ops = ops; @@ -2827,6 +2844,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, set->flags = flags; set->size = desc.size; set->policy = policy; + set->udlen = udlen; + set->udata = udata; set->timeout = timeout; set->gc_int = gc_int; -- cgit v1.2.3-71-gd317 From 0797cbd8e2741c69a4d416d8f669639a064db8d1 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 6 Jan 2016 17:22:46 -0500 Subject: ipv4: eliminate endianness warnings in ip_fib.h fib_multipath_hash() computes a hash using __be32 values, force cast these to u32 to pacify sparse. Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- include/net/ip_fib.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9f4df68105ab..7029527725dd 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -325,7 +325,8 @@ extern u32 fib_multipath_secret __read_mostly; static inline int fib_multipath_hash(__be32 saddr, __be32 daddr) { - return jhash_2words(saddr, daddr, fib_multipath_secret) >> 1; + return jhash_2words((__force u32)saddr, (__force u32)daddr, + fib_multipath_secret) >> 1; } void fib_select_multipath(struct fib_result *res, int hash); -- cgit v1.2.3-71-gd317 From 4d41e12593a9a6c4aaf113d44c8c619067b2b0aa Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Sun, 10 Jan 2016 21:06:22 +0100 Subject: switchdev: Adding MDB entry offload Define HW multicast entry: MAC and VID. Using a MAC address simplifies support for both IPV4 and IPv6. Signed-off-by: Elad Raz Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/switchdev.h | 11 +++++++++++ net/switchdev/switchdev.c | 2 ++ 2 files changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 603ae2f88dbb..d451122e8404 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -68,6 +68,7 @@ enum switchdev_obj_id { SWITCHDEV_OBJ_ID_PORT_VLAN, SWITCHDEV_OBJ_ID_IPV4_FIB, SWITCHDEV_OBJ_ID_PORT_FDB, + SWITCHDEV_OBJ_ID_PORT_MDB, }; struct switchdev_obj { @@ -113,6 +114,16 @@ struct switchdev_obj_port_fdb { #define SWITCHDEV_OBJ_PORT_FDB(obj) \ container_of(obj, struct switchdev_obj_port_fdb, obj) +/* SWITCHDEV_OBJ_ID_PORT_MDB */ +struct switchdev_obj_port_mdb { + struct switchdev_obj obj; + unsigned char addr[ETH_ALEN]; + u16 vid; +}; + +#define SWITCHDEV_OBJ_PORT_MDB(obj) \ + container_of(obj, struct switchdev_obj_port_mdb, obj) + void switchdev_trans_item_enqueue(struct switchdev_trans *trans, void *data, void (*destructor)(void const *), struct switchdev_trans_item *tritem); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index df790d3385a2..ebc661d3b6e3 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -345,6 +345,8 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) return sizeof(struct switchdev_obj_ipv4_fib); case SWITCHDEV_OBJ_ID_PORT_FDB: return sizeof(struct switchdev_obj_port_fdb); + case SWITCHDEV_OBJ_ID_PORT_MDB: + return sizeof(struct switchdev_obj_port_mdb); default: BUG(); } -- cgit v1.2.3-71-gd317 From 787d7ac308ff2279e4b2ea393ad4d990de486ef2 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 7 Jan 2016 14:28:39 +0100 Subject: udp: restrict offloads to one namespace udp tunnel offloads tend to aggregate datagrams based on inner headers. gro engine gets notified by tunnel implementations about possible offloads. The match is solely based on the port number. Imagine a tunnel bound to port 53, the offloading will look into all DNS packets and tries to aggregate them based on the inner data found within. This could lead to data corruption and malformed DNS packets. While this patch minimizes the problem and helps an administrator to find the issue by querying ip tunnel/fou, a better way would be to match on the specific destination ip address so if a user space socket is bound to the same address it will conflict. Cc: Tom Herbert Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/geneve.c | 2 +- drivers/net/vxlan.c | 2 +- include/net/protocol.h | 2 +- net/ipv4/fou.c | 2 +- net/ipv4/udp_offload.c | 10 +++++++--- 5 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 58efdec12f30..db96b0cbb8ba 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -376,7 +376,7 @@ static void geneve_notify_add_rx_port(struct geneve_sock *gs) int err; if (sa_family == AF_INET) { - err = udp_add_offload(&gs->udp_offloads); + err = udp_add_offload(sock_net(sk), &gs->udp_offloads); if (err) pr_warn("geneve: udp_add_offload failed with status %d\n", err); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 405a7b6cca25..e1e147f2d6ce 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -621,7 +621,7 @@ static void vxlan_notify_add_rx_port(struct vxlan_sock *vs) int err; if (sa_family == AF_INET) { - err = udp_add_offload(&vs->udp_offloads); + err = udp_add_offload(net, &vs->udp_offloads); if (err) pr_warn("vxlan: udp_add_offload failed with status %d\n", err); } diff --git a/include/net/protocol.h b/include/net/protocol.h index d6fcc1fcdb5b..da689f5432de 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -107,7 +107,7 @@ int inet_del_offload(const struct net_offload *prot, unsigned char num); void inet_register_protosw(struct inet_protosw *p); void inet_unregister_protosw(struct inet_protosw *p); -int udp_add_offload(struct udp_offload *prot); +int udp_add_offload(struct net *net, struct udp_offload *prot); void udp_del_offload(struct udp_offload *prot); #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index bd903fe0f750..976f0dcf6991 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -498,7 +498,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, sk->sk_allocation = GFP_ATOMIC; if (cfg->udp_config.family == AF_INET) { - err = udp_add_offload(&fou->udp_offloads); + err = udp_add_offload(net, &fou->udp_offloads); if (err) goto error; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index f9386160cbee..5d396b96ae8b 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -21,6 +21,7 @@ static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; struct udp_offload_priv { struct udp_offload *offload; + possible_net_t net; struct rcu_head rcu; struct udp_offload_priv __rcu *next; }; @@ -241,13 +242,14 @@ out: return segs; } -int udp_add_offload(struct udp_offload *uo) +int udp_add_offload(struct net *net, struct udp_offload *uo) { struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC); if (!new_offload) return -ENOMEM; + write_pnet(&new_offload->net, net); new_offload->offload = uo; spin_lock(&udp_offload_lock); @@ -311,7 +313,8 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, rcu_read_lock(); uo_priv = rcu_dereference(udp_offload_base); for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { - if (uo_priv->offload->port == uh->dest && + if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && + uo_priv->offload->port == uh->dest && uo_priv->offload->callbacks.gro_receive) goto unflush; } @@ -389,7 +392,8 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff) uo_priv = rcu_dereference(udp_offload_base); for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { - if (uo_priv->offload->port == uh->dest && + if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && + uo_priv->offload->port == uh->dest && uo_priv->offload->callbacks.gro_complete) break; } -- cgit v1.2.3-71-gd317 From 13b287e8d1cad951634389f85b8c9b816bd3bb1e Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 7 Jan 2016 16:38:43 +0200 Subject: ipv4: Namespaceify tcp_keepalive_time sysctl knob Different net namespaces might have different requirements as to the keepalive time of tcp sockets. This might be required in cases where different firewall rules are in place which require tcp timeout sockets to be increased/decreased independently of the host. Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ include/net/tcp.h | 5 +++-- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 2 ++ net/ipv4/tcp_timer.c | 1 - 5 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d75be32650ba..9e9bbebaebd1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -94,6 +94,8 @@ struct netns_ipv4 { int sysctl_tcp_probe_threshold; u32 sysctl_tcp_probe_interval; + int sysctl_tcp_keepalive_time; + struct ping_group_range ping_group_range; atomic_t dev_addr_genid; diff --git a/include/net/tcp.h b/include/net/tcp.h index f33fecf4e282..cb4d4cf25744 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_keepalive_time; extern int sysctl_tcp_keepalive_probes; extern int sysctl_tcp_keepalive_intvl; extern int sysctl_tcp_syn_retries; @@ -1230,7 +1229,9 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp) static inline int keepalive_time_when(const struct tcp_sock *tp) { - return tp->keepalive_time ? : sysctl_tcp_keepalive_time; + struct net *net = sock_net((struct sock *)tp); + + return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; } static inline int keepalive_probes(const struct tcp_sock *tp) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 41ff1f87dfd7..1886cc842871 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -336,13 +336,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_keepalive_time", - .data = &sysctl_tcp_keepalive_time, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, { .procname = "tcp_keepalive_probes", .data = &sysctl_tcp_keepalive_probes, @@ -961,6 +954,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_keepalive_time", + .data = &init_net.ipv4.sysctl_tcp_keepalive_time, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fc4f72686705..6e14ff9a8580 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2385,6 +2385,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; + net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; + return 0; fail: tcp_sk_exit(net); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 193ba1fa8a9a..166f27b43cc0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -24,7 +24,6 @@ int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME; int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES; int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; -- cgit v1.2.3-71-gd317 From 9bd6861bd4326e3afd3f14a9ec8a723771fb20bb Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 7 Jan 2016 16:38:44 +0200 Subject: ipv4: Namespecify tcp_keepalive_probes sysctl knob This is required to have full tcp keepalive mechanism namespace support. Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 5 +++-- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_timer.c | 1 - 5 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9e9bbebaebd1..6e26ea2d0374 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -95,6 +95,7 @@ struct netns_ipv4 { u32 sysctl_tcp_probe_interval; int sysctl_tcp_keepalive_time; + int sysctl_tcp_keepalive_probes; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index cb4d4cf25744..0646521400bf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_keepalive_probes; extern int sysctl_tcp_keepalive_intvl; extern int sysctl_tcp_syn_retries; extern int sysctl_tcp_synack_retries; @@ -1236,7 +1235,9 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) static inline int keepalive_probes(const struct tcp_sock *tp) { - return tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; + struct net *net = sock_net((struct sock *)tp); + + return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; } static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1886cc842871..e99fbb77dba7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -336,13 +336,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_keepalive_probes", - .data = &sysctl_tcp_keepalive_probes, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_keepalive_intvl", .data = &sysctl_tcp_keepalive_intvl, @@ -961,6 +954,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "tcp_keepalive_probes", + .data = &init_net.ipv4.sysctl_tcp_keepalive_probes, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6e14ff9a8580..ed98de85871e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2386,6 +2386,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; + net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; return 0; fail: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 166f27b43cc0..0ccb120d591a 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -24,7 +24,6 @@ int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES; int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; -- cgit v1.2.3-71-gd317 From b840d15d39128d08ed4486085e5507d2617b9ae1 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 7 Jan 2016 16:38:45 +0200 Subject: ipv4: Namespecify the tcp_keepalive_intvl sysctl knob This is the final part required to namespaceify the tcp keep alive mechanism. Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 5 +++-- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_timer.c | 1 - 5 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 6e26ea2d0374..2b7907a35568 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -96,6 +96,7 @@ struct netns_ipv4 { int sysctl_tcp_keepalive_time; int sysctl_tcp_keepalive_probes; + int sysctl_tcp_keepalive_intvl; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0646521400bf..a80255f4ca33 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_keepalive_intvl; extern int sysctl_tcp_syn_retries; extern int sysctl_tcp_synack_retries; extern int sysctl_tcp_retries1; @@ -1223,7 +1222,9 @@ void tcp_enter_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) { - return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; + struct net *net = sock_net((struct sock *)tp); + + return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; } static inline int keepalive_time_when(const struct tcp_sock *tp) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e99fbb77dba7..46ce410703b1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -336,13 +336,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_keepalive_intvl", - .data = &sysctl_tcp_keepalive_intvl, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, { .procname = "tcp_retries1", .data = &sysctl_tcp_retries1, @@ -961,6 +954,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_keepalive_intvl", + .data = &init_net.ipv4.sysctl_tcp_keepalive_intvl, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ed98de85871e..65947c1f4733 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2387,6 +2387,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; + net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; return 0; fail: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0ccb120d591a..a4730a28b220 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -24,7 +24,6 @@ int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; -- cgit v1.2.3-71-gd317 From fdc5432a7b44ab7de17141beec19d946b9344e91 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 7 Jan 2016 15:50:22 +0100 Subject: net, sched: add skb_at_tc_ingress helper Add a skb_at_tc_ingress() as this will be needed elsewhere as well and can hide the ugly ifdef. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/net/sch_generic.h | 9 +++++++++ net/sched/cls_bpf.c | 6 +----- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index b2a8e6338576..636a362a0e03 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -407,6 +407,15 @@ bool tcf_destroy(struct tcf_proto *tp, bool force); void tcf_destroy_chain(struct tcf_proto __rcu **fl); int skb_do_redirect(struct sk_buff *); +static inline bool skb_at_tc_ingress(const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_CLS_ACT + return G_TC_AT(skb->tc_verd) & AT_INGRESS; +#else + return false; +#endif +} + /* Reset all TX qdiscs greater then index of a device. */ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5faaa5425f7b..b3c8bb4aeef5 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -79,12 +79,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_bpf_head *head = rcu_dereference_bh(tp->root); + bool at_ingress = skb_at_tc_ingress(skb); struct cls_bpf_prog *prog; -#ifdef CONFIG_NET_CLS_ACT - bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; -#else - bool at_ingress = false; -#endif int ret = -1; if (unlikely(!skb_mac_header_was_set(skb))) -- cgit v1.2.3-71-gd317